1//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This tool works as a wrapper over a linking job. This tool is used to create
10// linked device images for offloading. It scans the linker's input for embedded
11// device offloading data stored in sections `.llvm.offloading` and extracts it
12// as a temporary file. The extracted device files will then be passed to a
13// device linking job to create a final device image.
14//
15//===---------------------------------------------------------------------===//
16
17#include "clang/Basic/TargetID.h"
18#include "clang/Basic/Version.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/Bitcode/BitcodeWriter.h"
22#include "llvm/CodeGen/CommandFlags.h"
23#include "llvm/Frontend/Offloading/OffloadWrapper.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DiagnosticPrinter.h"
27#include "llvm/IR/Module.h"
28#include "llvm/IRReader/IRReader.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/MC/TargetRegistry.h"
31#include "llvm/Object/Archive.h"
32#include "llvm/Object/ArchiveWriter.h"
33#include "llvm/Object/Binary.h"
34#include "llvm/Object/ELFObjectFile.h"
35#include "llvm/Object/IRObjectFile.h"
36#include "llvm/Object/ObjectFile.h"
37#include "llvm/Object/OffloadBinary.h"
38#include "llvm/Option/ArgList.h"
39#include "llvm/Option/OptTable.h"
40#include "llvm/Option/Option.h"
41#include "llvm/Plugins/PassPlugin.h"
42#include "llvm/Remarks/HotnessThresholdParser.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/Support/Errc.h"
45#include "llvm/Support/FileOutputBuffer.h"
46#include "llvm/Support/FileSystem.h"
47#include "llvm/Support/InitLLVM.h"
48#include "llvm/Support/MemoryBuffer.h"
49#include "llvm/Support/Parallel.h"
50#include "llvm/Support/Path.h"
51#include "llvm/Support/Program.h"
52#include "llvm/Support/Signals.h"
53#include "llvm/Support/SourceMgr.h"
54#include "llvm/Support/StringSaver.h"
55#include "llvm/Support/TargetSelect.h"
56#include "llvm/Support/TimeProfiler.h"
57#include "llvm/Support/WithColor.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Target/TargetMachine.h"
60#include "llvm/TargetParser/Host.h"
61#include <atomic>
62#include <optional>
63
64using namespace llvm;
65using namespace llvm::opt;
66using namespace llvm::object;
67
68// Various tools (e.g., llc and opt) duplicate this series of declarations for
69// options related to passes and remarks.
70
71static cl::opt<bool> RemarksWithHotness(
72 "pass-remarks-with-hotness",
73 cl::desc("With PGO, include profile count in optimization remarks"),
74 cl::Hidden);
75
76static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
77 RemarksHotnessThreshold(
78 "pass-remarks-hotness-threshold",
79 cl::desc("Minimum profile count required for "
80 "an optimization remark to be output. "
81 "Use 'auto' to apply the threshold from profile summary."),
82 cl::value_desc("N or 'auto'"), cl::init(Val: 0), cl::Hidden);
83
84static cl::opt<std::string>
85 RemarksFilename("pass-remarks-output",
86 cl::desc("Output filename for pass remarks"),
87 cl::value_desc("filename"));
88
89static cl::opt<std::string>
90 RemarksPasses("pass-remarks-filter",
91 cl::desc("Only record optimization remarks from passes whose "
92 "names match the given regular expression"),
93 cl::value_desc("regex"));
94
95static cl::opt<std::string> RemarksFormat(
96 "pass-remarks-format",
97 cl::desc("The format used for serializing remarks (default: YAML)"),
98 cl::value_desc("format"), cl::init(Val: "yaml"));
99
100static cl::list<std::string>
101 PassPlugins("load-pass-plugin",
102 cl::desc("Load passes from plugin library"));
103
104static cl::opt<std::string> PassPipeline(
105 "passes",
106 cl::desc(
107 "A textual description of the pass pipeline. To have analysis passes "
108 "available before a certain pass, add 'require<foo-analysis>'. "
109 "'-passes' overrides the pass pipeline (but not all effects) from "
110 "specifying '--opt-level=O?' (O2 is the default) to "
111 "clang-linker-wrapper. Be sure to include the corresponding "
112 "'default<O?>' in '-passes'."));
113static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
114 cl::desc("Alias for -passes"));
115
116/// Path of the current binary.
117static const char *LinkerExecutable;
118
119/// Ssave intermediary results.
120static bool SaveTemps = false;
121
122/// Print arguments without executing.
123static bool DryRun = false;
124
125/// Print verbose output.
126static bool Verbose = false;
127
128/// Filename of the executable being created.
129static StringRef ExecutableName;
130
131/// Binary path for the CUDA installation.
132static std::string CudaBinaryPath;
133
134/// Mutex lock to protect writes to shared TempFiles in parallel.
135static std::mutex TempFilesMutex;
136
137/// Temporary files created by the linker wrapper.
138static std::list<SmallString<128>> TempFiles;
139
140/// Codegen flags for LTO backend.
141static codegen::RegisterCodeGenFlags CodeGenFlags;
142
143using OffloadingImage = OffloadBinary::OffloadingImage;
144
145namespace llvm {
146// Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
147template <> struct DenseMapInfo<OffloadKind> {
148 static inline OffloadKind getEmptyKey() { return OFK_LAST; }
149 static inline OffloadKind getTombstoneKey() {
150 return static_cast<OffloadKind>(OFK_LAST + 1);
151 }
152 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
153
154 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
155 return LHS == RHS;
156 }
157};
158} // namespace llvm
159
160namespace {
161using std::error_code;
162
163/// Must not overlap with llvm::opt::DriverFlag.
164enum WrapperFlags {
165 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
166 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
167};
168
169enum ID {
170 OPT_INVALID = 0, // This is not an option ID.
171#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
172#include "LinkerWrapperOpts.inc"
173 LastOption
174#undef OPTION
175};
176
177#define OPTTABLE_STR_TABLE_CODE
178#include "LinkerWrapperOpts.inc"
179#undef OPTTABLE_STR_TABLE_CODE
180
181#define OPTTABLE_PREFIXES_TABLE_CODE
182#include "LinkerWrapperOpts.inc"
183#undef OPTTABLE_PREFIXES_TABLE_CODE
184
185static constexpr OptTable::Info InfoTable[] = {
186#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
187#include "LinkerWrapperOpts.inc"
188#undef OPTION
189};
190
191class WrapperOptTable : public opt::GenericOptTable {
192public:
193 WrapperOptTable()
194 : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
195};
196
197const OptTable &getOptTable() {
198 static const WrapperOptTable *Table = []() {
199 auto Result = std::make_unique<WrapperOptTable>();
200 return Result.release();
201 }();
202 return *Table;
203}
204
205void printCommands(ArrayRef<StringRef> CmdArgs) {
206 if (CmdArgs.empty())
207 return;
208
209 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
210 for (auto IC = std::next(x: CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
211 llvm::errs() << *IC << (std::next(x: IC) != IE ? " " : "\n");
212}
213
214[[noreturn]] void reportError(Error E) {
215 outs().flush();
216 logAllUnhandledErrors(E: std::move(E),
217 OS&: WithColor::error(OS&: errs(), Prefix: LinkerExecutable));
218 exit(EXIT_FAILURE);
219}
220
221std::string getMainExecutable(const char *Name) {
222 void *Ptr = (void *)(intptr_t)&getMainExecutable;
223 auto COWPath = sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr);
224 return sys::path::parent_path(path: COWPath).str();
225}
226
227/// Get a temporary filename suitable for output.
228Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
229 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
230 SmallString<128> OutputFile;
231 std::string PrefixStr = clang::sanitizeTargetIDInFileName(TargetID: Prefix.str());
232
233 if (SaveTemps) {
234 (PrefixStr + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile);
235 } else {
236 if (std::error_code EC =
237 sys::fs::createTemporaryFile(Prefix: PrefixStr, Suffix: Extension, ResultPath&: OutputFile))
238 return createFileError(F: OutputFile, EC);
239 }
240
241 TempFiles.emplace_back(args: std::move(OutputFile));
242 return TempFiles.back();
243}
244
245/// Execute the command \p ExecutablePath with the arguments \p Args.
246Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
247 if (Verbose || DryRun)
248 printCommands(CmdArgs: Args);
249
250 if (!DryRun)
251 if (sys::ExecuteAndWait(Program: ExecutablePath, Args))
252 return createStringError(
253 Fmt: "'%s' failed", Vals: sys::path::filename(path: ExecutablePath).str().c_str());
254 return Error::success();
255}
256
257Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
258
259 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
260 if (!Path)
261 Path = sys::findProgramByName(Name);
262 if (!Path && DryRun)
263 return Name.str();
264 if (!Path)
265 return createStringError(EC: Path.getError(),
266 S: "Unable to find '" + Name + "' in path");
267 return *Path;
268}
269
270bool linkerSupportsLTO(const ArgList &Args) {
271 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
272 return Triple.isNVPTX() || Triple.isAMDGPU() ||
273 (!Triple.isGPU() &&
274 Args.getLastArgValue(Id: OPT_linker_path_EQ).ends_with(Suffix: "lld"));
275}
276
277/// Returns the hashed value for a constant string.
278std::string getHash(StringRef Str) {
279 llvm::MD5 Hasher;
280 llvm::MD5::MD5Result Hash;
281 Hasher.update(Str);
282 Hasher.final(Result&: Hash);
283 return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true);
284}
285
286/// Renames offloading entry sections in a relocatable link so they do not
287/// conflict with a later link job.
288Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
289 llvm::Triple Triple(
290 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
291 if (Triple.isOSWindows())
292 return createStringError(
293 Fmt: "Relocatable linking is not supported on COFF targets");
294
295 Expected<std::string> ObjcopyPath =
296 findProgram(Name: "llvm-objcopy", Paths: {getMainExecutable(Name: "llvm-objcopy")});
297 if (!ObjcopyPath)
298 return ObjcopyPath.takeError();
299
300 // Use the linker output file to get a unique hash. This creates a unique
301 // identifier to rename the sections to that is deterministic to the contents.
302 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "")
303 : MemoryBuffer::getFileOrSTDIN(Filename: Output);
304 if (!BufferOrErr)
305 return createStringError(Fmt: "Failed to open %s", Vals: Output.str().c_str());
306 std::string Suffix = "_" + getHash(Str: (*BufferOrErr)->getBuffer());
307
308 SmallVector<StringRef> ObjcopyArgs = {
309 *ObjcopyPath,
310 Output,
311 };
312
313 // Remove the old .llvm.offloading section to prevent further linking.
314 ObjcopyArgs.emplace_back(Args: "--remove-section");
315 ObjcopyArgs.emplace_back(Args: ".llvm.offloading");
316 StringRef Prefix = "llvm";
317 auto Section = (Prefix + "_offload_entries").str();
318 // Rename the offloading entires to make them private to this link unit.
319 ObjcopyArgs.emplace_back(Args: "--rename-section");
320 ObjcopyArgs.emplace_back(
321 Args: Args.MakeArgString(Str: Section + "=" + Section + Suffix));
322
323 // Rename the __start_ / __stop_ symbols appropriately to iterate over the
324 // newly renamed section containing the offloading entries.
325 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
326 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__start_" + Section + "=" +
327 "__start_" + Section + Suffix));
328 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
329 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__stop_" + Section + "=" +
330 "__stop_" + Section + Suffix));
331
332 if (Error Err = executeCommands(ExecutablePath: *ObjcopyPath, Args: ObjcopyArgs))
333 return Err;
334
335 return Error::success();
336}
337
338/// Runs the wrapped linker job with the newly created input.
339Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
340 llvm::TimeTraceScope TimeScope("Execute host linker");
341
342 // Render the linker arguments and add the newly created image. We add it
343 // after the output file to ensure it is linked with the correct libraries.
344 StringRef LinkerPath = Args.getLastArgValue(Id: OPT_linker_path_EQ);
345 if (LinkerPath.empty())
346 return createStringError(Fmt: "linker path missing, must pass 'linker-path'");
347 ArgStringList NewLinkerArgs;
348 for (const opt::Arg *Arg : Args) {
349 // Do not forward arguments only intended for the linker wrapper.
350 if (Arg->getOption().hasFlag(Val: WrapperOnlyOption))
351 continue;
352
353 Arg->render(Args, Output&: NewLinkerArgs);
354 if (Arg->getOption().matches(ID: OPT_o) || Arg->getOption().matches(ID: OPT_out))
355 llvm::transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs),
356 F: [&](StringRef Arg) { return Args.MakeArgString(Str: Arg); });
357 }
358
359 SmallVector<StringRef> LinkerArgs({LinkerPath});
360 for (StringRef Arg : NewLinkerArgs)
361 LinkerArgs.push_back(Elt: Arg);
362 if (Error Err = executeCommands(ExecutablePath: LinkerPath, Args: LinkerArgs))
363 return Err;
364
365 if (Args.hasArg(Ids: OPT_relocatable))
366 return relocateOffloadSection(Args, Output: ExecutableName);
367
368 return Error::success();
369}
370
371void printVersion(raw_ostream &OS) {
372 OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper") << '\n';
373}
374
375namespace nvptx {
376Expected<StringRef>
377fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
378 const ArgList &Args) {
379 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
380 // NVPTX uses the fatbinary program to bundle the linked images.
381 Expected<std::string> FatBinaryPath =
382 findProgram(Name: "fatbinary", Paths: {CudaBinaryPath + "/bin"});
383 if (!FatBinaryPath)
384 return FatBinaryPath.takeError();
385
386 llvm::Triple Triple(
387 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
388
389 // Create a new file to write the linked device image to.
390 auto TempFileOrErr =
391 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "fatbin");
392 if (!TempFileOrErr)
393 return TempFileOrErr.takeError();
394
395 SmallVector<StringRef, 16> CmdArgs;
396 CmdArgs.push_back(Elt: *FatBinaryPath);
397 CmdArgs.push_back(Elt: Triple.isArch64Bit() ? "-64" : "-32");
398 CmdArgs.push_back(Elt: "--create");
399 CmdArgs.push_back(Elt: *TempFileOrErr);
400 for (const auto &[File, Arch] : InputFiles)
401 CmdArgs.push_back(Elt: Args.MakeArgString(
402 Str: "--image3=kind=elf,sm=" + Arch.drop_front(N: 3) + ",file=" + File));
403
404 if (Error Err = executeCommands(ExecutablePath: *FatBinaryPath, Args: CmdArgs))
405 return std::move(Err);
406
407 return *TempFileOrErr;
408}
409} // namespace nvptx
410
411namespace amdgcn {
412
413// Constructs a triple string for clang offload bundler.
414// NOTE: copied from HIPUtility.cpp.
415static std::string normalizeForBundler(const llvm::Triple &T,
416 bool HasTargetID) {
417 return HasTargetID ? (T.getArchName() + "-" + T.getVendorName() + "-" +
418 T.getOSName() + "-" + T.getEnvironmentName())
419 .str()
420 : T.normalize(Form: llvm::Triple::CanonicalForm::FOUR_IDENT);
421}
422
423Expected<StringRef>
424fatbinary(ArrayRef<std::tuple<StringRef, StringRef, StringRef>> InputFiles,
425 const ArgList &Args) {
426 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
427
428 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
429 Expected<std::string> OffloadBundlerPath = findProgram(
430 Name: "clang-offload-bundler", Paths: {getMainExecutable(Name: "clang-offload-bundler")});
431 if (!OffloadBundlerPath)
432 return OffloadBundlerPath.takeError();
433
434 // Create a new file to write the linked device image to.
435 auto TempFileOrErr =
436 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "hipfb");
437 if (!TempFileOrErr)
438 return TempFileOrErr.takeError();
439
440 BumpPtrAllocator Alloc;
441 StringSaver Saver(Alloc);
442
443 SmallVector<StringRef, 16> CmdArgs;
444 CmdArgs.push_back(Elt: *OffloadBundlerPath);
445 CmdArgs.push_back(Elt: "-type=o");
446 CmdArgs.push_back(Elt: "-bundle-align=4096");
447
448 if (Args.hasArg(Ids: OPT_compress))
449 CmdArgs.push_back(Elt: "-compress");
450 if (auto *Arg = Args.getLastArg(Ids: OPT_compression_level_eq))
451 CmdArgs.push_back(
452 Elt: Args.MakeArgString(Str: Twine("-compression-level=") + Arg->getValue()));
453
454 SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"};
455 for (const auto &[File, TripleRef, Arch] : InputFiles) {
456 std::string NormalizedTriple =
457 normalizeForBundler(T: Triple(TripleRef), HasTargetID: !Arch.empty());
458 Targets.push_back(Elt: Saver.save(S: "hip-" + NormalizedTriple + "-" + Arch));
459 }
460 CmdArgs.push_back(Elt: Saver.save(S: llvm::join(R&: Targets, Separator: ",")));
461
462#ifdef _WIN32
463 CmdArgs.push_back("-input=NUL");
464#else
465 CmdArgs.push_back(Elt: "-input=/dev/null");
466#endif
467 for (const auto &[File, Triple, Arch] : InputFiles)
468 CmdArgs.push_back(Elt: Saver.save(S: "-input=" + File));
469
470 CmdArgs.push_back(Elt: Saver.save(S: "-output=" + *TempFileOrErr));
471
472 if (Error Err = executeCommands(ExecutablePath: *OffloadBundlerPath, Args: CmdArgs))
473 return std::move(Err);
474
475 return *TempFileOrErr;
476}
477} // namespace amdgcn
478
479namespace generic {
480Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
481 uint16_t ActiveOffloadKindMask) {
482 llvm::TimeTraceScope TimeScope("Clang");
483 // Use `clang` to invoke the appropriate device tools.
484 Expected<std::string> ClangPath =
485 findProgram(Name: "clang", Paths: {getMainExecutable(Name: "clang")});
486 if (!ClangPath)
487 return ClangPath.takeError();
488
489 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
490 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
491 // Create a new file to write the linked device image to. Assume that the
492 // input filename already has the device and architecture.
493 std::string OutputFileBase =
494 "." + Triple.getArchName().str() + "." + Arch.str();
495 auto TempFileOrErr = createOutputFile(
496 Prefix: sys::path::filename(path: ExecutableName) + OutputFileBase, Extension: "img");
497 if (!TempFileOrErr)
498 return TempFileOrErr.takeError();
499
500 SmallVector<StringRef, 16> CmdArgs{
501 *ClangPath,
502 "--no-default-config",
503 "-o",
504 *TempFileOrErr,
505 // Without -dumpdir, Clang will place auxiliary output files in the
506 // temporary directory of TempFileOrErr, where they will not easily be
507 // found by the user and might eventually be automatically removed. Tell
508 // Clang to instead place them alongside the final executable.
509 "-dumpdir",
510 Args.MakeArgString(Str: ExecutableName + OutputFileBase + ".img."),
511 Args.MakeArgString(Str: "--target=" + Triple.getTriple()),
512 };
513
514 if (!Arch.empty())
515 Triple.isAMDGPU() ? CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-mcpu=" + Arch))
516 : CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-march=" + Arch));
517
518 // AMDGPU is always in LTO mode currently.
519 if (Triple.isAMDGPU())
520 CmdArgs.push_back(Elt: "-flto");
521
522 // Forward all of the `--offload-opt` and similar options to the device.
523 for (auto &Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus, Ids: OPT_mllvm))
524 CmdArgs.append(
525 IL: {"-Xlinker",
526 Args.MakeArgString(Str: "--plugin-opt=" + StringRef(Arg->getValue()))});
527
528 if (!Triple.isNVPTX() && !Triple.isSPIRV())
529 CmdArgs.push_back(Elt: "-Wl,--no-undefined");
530
531 for (StringRef InputFile : InputFiles)
532 CmdArgs.push_back(Elt: InputFile);
533
534 // If this is CPU offloading we copy the input libraries.
535 if (!Triple.isGPU()) {
536 CmdArgs.push_back(Elt: "-Wl,-Bsymbolic");
537 CmdArgs.push_back(Elt: "-shared");
538 ArgStringList LinkerArgs;
539 for (const opt::Arg *Arg :
540 Args.filtered(Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_library_path, Ids: OPT_rpath,
541 Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
542 // Sometimes needed libraries are passed by name, such as when using
543 // sanitizers. We need to check the file magic for any libraries.
544 if (Arg->getOption().matches(ID: OPT_INPUT)) {
545 if (!sys::fs::exists(Path: Arg->getValue()) ||
546 sys::fs::is_directory(Path: Arg->getValue()))
547 continue;
548
549 file_magic Magic;
550 if (auto EC = identify_magic(path: Arg->getValue(), result&: Magic))
551 return createStringError(Fmt: "Failed to open %s", Vals: Arg->getValue());
552 if (Magic != file_magic::archive &&
553 Magic != file_magic::elf_shared_object)
554 continue;
555 }
556 if (Arg->getOption().matches(ID: OPT_whole_archive))
557 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--whole-archive"));
558 else if (Arg->getOption().matches(ID: OPT_no_whole_archive))
559 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--no-whole-archive"));
560 else
561 Arg->render(Args, Output&: LinkerArgs);
562 }
563 llvm::append_range(C&: CmdArgs, R&: LinkerArgs);
564 }
565
566 // Pass on -mllvm options to the linker invocation.
567 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
568 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(
569 Str: "-mllvm=" + StringRef(Arg->getValue()))});
570
571 if (SaveTemps && linkerSupportsLTO(Args))
572 CmdArgs.push_back(Elt: "-Wl,--save-temps");
573
574 if (Args.hasArg(Ids: OPT_embed_bitcode))
575 CmdArgs.push_back(Elt: "-Wl,--lto-emit-llvm");
576
577 // For linking device code with the SYCL offload kind, special handling is
578 // required. Passing --sycl-link to clang results in a call to
579 // clang-sycl-linker. Additional linker flags required by clang-sycl-linker
580 // will be communicated via the -Xlinker option.
581 if (ActiveOffloadKindMask & OFK_SYCL) {
582 CmdArgs.push_back(Elt: "--sycl-link");
583 CmdArgs.append(
584 IL: {"-Xlinker", Args.MakeArgString(Str: "-triple=" + Triple.getTriple())});
585 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: "-arch=" + Arch)});
586 }
587
588 for (StringRef Arg : Args.getAllArgValues(Id: OPT_linker_arg_EQ))
589 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: Arg)});
590 for (StringRef Arg : Args.getAllArgValues(Id: OPT_compiler_arg_EQ))
591 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Arg));
592
593 if (Error Err = executeCommands(ExecutablePath: *ClangPath, Args: CmdArgs))
594 return std::move(Err);
595
596 return *TempFileOrErr;
597}
598} // namespace generic
599
600Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
601 const ArgList &Args,
602 uint16_t ActiveOffloadKindMask) {
603 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
604 switch (Triple.getArch()) {
605 case Triple::nvptx:
606 case Triple::nvptx64:
607 case Triple::amdgcn:
608 case Triple::x86:
609 case Triple::x86_64:
610 case Triple::aarch64:
611 case Triple::aarch64_be:
612 case Triple::ppc64:
613 case Triple::ppc64le:
614 case Triple::spirv64:
615 case Triple::systemz:
616 case Triple::loongarch64:
617 return generic::clang(InputFiles, Args, ActiveOffloadKindMask);
618 default:
619 return createStringError(S: Triple.getArchName() +
620 " linking is not supported");
621 }
622}
623
624Error containerizeRawImage(std::unique_ptr<MemoryBuffer> &Img, OffloadKind Kind,
625 const ArgList &Args) {
626 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
627 if (Kind == OFK_OpenMP && Triple.isSPIRV() &&
628 Triple.getVendor() == llvm::Triple::Intel)
629 return offloading::intel::containerizeOpenMPSPIRVImage(Binary&: Img);
630 return Error::success();
631}
632
633Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
634 const OffloadBinary &Binary = *File.getBinary();
635
636 StringRef Prefix =
637 sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier());
638 SmallString<128> Filename;
639 (Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch())
640 .toVector(Out&: Filename);
641 auto TempFileOrErr = createOutputFile(Prefix: Filename, Extension: "o");
642 if (!TempFileOrErr)
643 return TempFileOrErr.takeError();
644
645 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
646 FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Binary.getImage().size());
647 if (!OutputOrErr)
648 return OutputOrErr.takeError();
649 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
650 llvm::copy(Range: Binary.getImage(), Out: Output->getBufferStart());
651 if (Error E = Output->commit())
652 return std::move(E);
653
654 return *TempFileOrErr;
655}
656
657// Compile the module to an object file using the appropriate target machine for
658// the host triple.
659Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
660 llvm::TimeTraceScope TimeScope("Compile module");
661 std::string Msg;
662 const Target *T = TargetRegistry::lookupTarget(TheTriple: M.getTargetTriple(), Error&: Msg);
663 if (!T)
664 return createStringError(S: Msg);
665
666 auto Options =
667 codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: M.getTargetTriple());
668 StringRef CPU = "";
669 StringRef Features = "";
670 std::unique_ptr<TargetMachine> TM(
671 T->createTargetMachine(TT: M.getTargetTriple(), CPU, Features, Options,
672 RM: Reloc::PIC_, CM: M.getCodeModel()));
673
674 if (M.getDataLayout().isDefault())
675 M.setDataLayout(TM->createDataLayout());
676
677 int FD = -1;
678 auto TempFileOrErr =
679 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
680 getOffloadKindName(Name: Kind) + ".image.wrapper",
681 Extension: "o");
682 if (!TempFileOrErr)
683 return TempFileOrErr.takeError();
684 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
685 return errorCodeToError(EC);
686
687 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true);
688
689 legacy::PassManager CodeGenPasses;
690 TargetLibraryInfoImpl TLII(M.getTargetTriple());
691 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
692 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
693 CodeGenFileType::ObjectFile))
694 return createStringError(Fmt: "Failed to execute host backend");
695 CodeGenPasses.run(M);
696
697 return *TempFileOrErr;
698}
699
700/// Creates the object file containing the device image and runtime
701/// registration code from the device images stored in \p Images.
702Expected<StringRef>
703wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
704 const ArgList &Args, OffloadKind Kind) {
705 llvm::TimeTraceScope TimeScope("Wrap bundled images");
706
707 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
708 for (const auto &Buffer : Buffers)
709 BuffersToWrap.emplace_back(
710 Args: ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
711
712 LLVMContext Context;
713 Module M("offload.wrapper.module", Context);
714 M.setTargetTriple(Triple(
715 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple())));
716
717 switch (Kind) {
718 case OFK_OpenMP:
719 if (Error Err = offloading::wrapOpenMPBinaries(
720 M, Images: BuffersToWrap, EntryArray: offloading::getOffloadEntryArray(M),
721 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(Ids: OPT_relocatable)))
722 return std::move(Err);
723 break;
724 case OFK_Cuda:
725 if (Error Err = offloading::wrapCudaBinary(
726 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M),
727 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
728 return std::move(Err);
729 break;
730 case OFK_HIP:
731 if (Error Err = offloading::wrapHIPBinary(
732 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M)))
733 return std::move(Err);
734 break;
735 case OFK_SYCL: {
736 // TODO: fill these options once the Driver supports them.
737 offloading::SYCLJITOptions Options;
738 if (Error Err =
739 offloading::wrapSYCLBinaries(M, Buffer: BuffersToWrap.front(), Options))
740 return std::move(Err);
741 break;
742 }
743 default:
744 return createStringError(S: getOffloadKindName(Name: Kind) +
745 " wrapping is not supported");
746 }
747
748 if (Args.hasArg(Ids: OPT_print_wrapped_module))
749 errs() << M;
750 if (Args.hasArg(Ids: OPT_save_temps)) {
751 int FD = -1;
752 auto TempFileOrErr =
753 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
754 getOffloadKindName(Name: Kind) + ".image.wrapper",
755 Extension: "bc");
756 if (!TempFileOrErr)
757 return TempFileOrErr.takeError();
758 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
759 return errorCodeToError(EC);
760 llvm::raw_fd_ostream OS(FD, true);
761 WriteBitcodeToFile(M, Out&: OS);
762 }
763
764 auto FileOrErr = compileModule(M, Kind);
765 if (!FileOrErr)
766 return FileOrErr.takeError();
767 return *FileOrErr;
768}
769
770Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
771bundleOpenMP(ArrayRef<OffloadingImage> Images) {
772 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
773 for (const OffloadingImage &Image : Images)
774 Buffers.emplace_back(
775 Args: MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(OffloadingData: Image)));
776
777 return std::move(Buffers);
778}
779
780Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
781bundleSYCL(ArrayRef<OffloadingImage> Images) {
782 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
783 if (DryRun) {
784 // In dry-run mode there is an empty input which is insufficient for the
785 // testing. Therefore, we return here a stub image.
786 OffloadingImage Image;
787 Image.TheImageKind = IMG_None;
788 Image.TheOffloadKind = OffloadKind::OFK_SYCL;
789 Image.StringData["symbols"] = "stub";
790 Image.Image = MemoryBuffer::getMemBufferCopy(InputData: "");
791 SmallString<0> SerializedImage = OffloadBinary::write(OffloadingData: Image);
792 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: SerializedImage));
793 return std::move(Buffers);
794 }
795
796 for (const OffloadingImage &Image : Images) {
797 // clang-sycl-linker packs outputs into one binary blob. Therefore, it is
798 // passed to Offload Wrapper as is.
799 StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
800 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: S));
801 }
802
803 return std::move(Buffers);
804}
805
806Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
807bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
808 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
809 for (const OffloadingImage &Image : Images)
810 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
811 y: Image.StringData.lookup(Key: "arch")));
812
813 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
814 if (!FileOrErr)
815 return FileOrErr.takeError();
816
817 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
818 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
819
820 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
821 if (std::error_code EC = ImageOrError.getError())
822 return createFileError(F: *FileOrErr, EC);
823 Buffers.emplace_back(Args: std::move(*ImageOrError));
824
825 return std::move(Buffers);
826}
827
828Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
829bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
830 SmallVector<std::tuple<StringRef, StringRef, StringRef>, 4> InputFiles;
831 for (const OffloadingImage &Image : Images)
832 InputFiles.emplace_back(Args: std::make_tuple(args: Image.Image->getBufferIdentifier(),
833 args: Image.StringData.lookup(Key: "triple"),
834 args: Image.StringData.lookup(Key: "arch")));
835
836 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
837 if (!FileOrErr)
838 return FileOrErr.takeError();
839
840 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
841 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
842
843 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
844 if (std::error_code EC = ImageOrError.getError())
845 return createFileError(F: *FileOrErr, EC);
846 Buffers.emplace_back(Args: std::move(*ImageOrError));
847
848 return std::move(Buffers);
849}
850
851/// Transforms the input \p Images into the binary format the runtime expects
852/// for the given \p Kind.
853Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
854bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
855 OffloadKind Kind) {
856 llvm::TimeTraceScope TimeScope("Bundle linked output");
857 switch (Kind) {
858 case OFK_OpenMP:
859 return bundleOpenMP(Images);
860 case OFK_SYCL:
861 return bundleSYCL(Images);
862 case OFK_Cuda:
863 return bundleCuda(Images, Args);
864 case OFK_HIP:
865 return bundleHIP(Images, Args);
866 default:
867 return createStringError(S: getOffloadKindName(Name: Kind) +
868 " bundling is not supported");
869 }
870}
871
872/// Returns a new ArgList containg arguments used for the device linking phase.
873DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
874 const InputArgList &Args) {
875 DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
876 for (Arg *A : Args)
877 DAL.append(A);
878
879 // Set the subarchitecture and target triple for this compilation.
880 const OptTable &Tbl = getOptTable();
881 StringRef Arch = Args.MakeArgString(Str: Input.front().getBinary()->getArch());
882 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_arch_EQ),
883 Value: Arch == "generic" ? "" : Arch);
884 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_triple_EQ),
885 Value: Args.MakeArgString(Str: Input.front().getBinary()->getTriple()));
886
887 // If every input file is bitcode we have whole program visibility as we
888 // do only support static linking with bitcode.
889 auto ContainsBitcode = [](const OffloadFile &F) {
890 return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode;
891 };
892 if (llvm::all_of(Range&: Input, P: ContainsBitcode))
893 DAL.AddFlagArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_whole_program));
894
895 // Forward '-Xoffload-linker' options to the appropriate backend.
896 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_linker_args_EQ)) {
897 auto [Triple, Value] = Arg.split(Separator: '=');
898 llvm::Triple TT(Triple);
899 // If this isn't a recognized triple then it's an `arg=value` option.
900 if (TT.getArch() == Triple::ArchType::UnknownArch)
901 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
902 Value: Args.MakeArgString(Str: Arg));
903 else if (Value.empty())
904 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
905 Value: Args.MakeArgString(Str: Triple));
906 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
907 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
908 Value: Args.MakeArgString(Str: Value));
909 }
910
911 // Forward '-Xoffload-compiler' options to the appropriate backend.
912 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_compiler_args_EQ)) {
913 auto [Triple, Value] = Arg.split(Separator: '=');
914 llvm::Triple TT(Triple);
915 // If this isn't a recognized triple then it's an `arg=value` option.
916 if (TT.getArch() == Triple::ArchType::UnknownArch)
917 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
918 Value: Args.MakeArgString(Str: Arg));
919 else if (Value.empty())
920 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
921 Value: Args.MakeArgString(Str: Triple));
922 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
923 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
924 Value: Args.MakeArgString(Str: Value));
925 }
926
927 return DAL;
928}
929
930Error handleOverrideImages(
931 const InputArgList &Args,
932 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
933 for (StringRef Arg : Args.getAllArgValues(Id: OPT_override_image)) {
934 OffloadKind Kind = getOffloadKind(Name: Arg.split(Separator: "=").first);
935 StringRef Filename = Arg.split(Separator: "=").second;
936
937 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
938 MemoryBuffer::getFileOrSTDIN(Filename);
939 if (std::error_code EC = BufferOrErr.getError())
940 return createFileError(F: Filename, EC);
941
942 Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
943 ObjectFile::createELFObjectFile(Object: **BufferOrErr,
944 /*InitContent=*/false);
945 if (!ElfOrErr)
946 return ElfOrErr.takeError();
947 ObjectFile &Elf = **ElfOrErr;
948
949 OffloadingImage TheImage{};
950 TheImage.TheImageKind = IMG_Object;
951 TheImage.TheOffloadKind = Kind;
952 TheImage.StringData["triple"] =
953 Args.MakeArgString(Str: Elf.makeTriple().getTriple());
954 if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
955 TheImage.StringData["arch"] = Args.MakeArgString(Str: *CPU);
956 TheImage.Image = std::move(*BufferOrErr);
957
958 Images[Kind].emplace_back(Args: std::move(TheImage));
959 }
960 return Error::success();
961}
962
963/// Transforms all the extracted offloading input files into an image that can
964/// be registered by the runtime. If NeedsWrapping is false, writes bundled
965/// output directly without wrapping or host linking.
966Expected<SmallVector<StringRef>>
967linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
968 const InputArgList &Args, char **Argv, int Argc,
969 bool NeedsWrapping) {
970 llvm::TimeTraceScope TimeScope("Handle all device input");
971
972 std::mutex ImageMtx;
973 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
974
975 // Initialize the images with any overriding inputs.
976 if (Args.hasArg(Ids: OPT_override_image))
977 if (Error Err = handleOverrideImages(Args, Images))
978 return std::move(Err);
979
980 auto Err = parallelForEachError(R&: LinkerInputFiles, Fn: [&](auto &Input) -> Error {
981 llvm::TimeTraceScope TimeScope("Link device input");
982
983 // Each thread needs its own copy of the base arguments to maintain
984 // per-device argument storage of synthetic strings.
985 const OptTable &Tbl = getOptTable();
986 BumpPtrAllocator Alloc;
987 StringSaver Saver(Alloc);
988 auto BaseArgs =
989 Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) {
990 reportError(E: createStringError(S: Err));
991 });
992 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
993
994 uint16_t ActiveOffloadKindMask = 0u;
995 for (const auto &File : Input)
996 ActiveOffloadKindMask |= File.getBinary()->getOffloadKind();
997
998 // Linking images of SYCL offload kind with images of other kind is not
999 // supported.
1000 // TODO: Remove the above limitation.
1001 if ((ActiveOffloadKindMask & OFK_SYCL) &&
1002 ((ActiveOffloadKindMask ^ OFK_SYCL) != 0))
1003 return createStringError(Fmt: "Linking images of SYCL offload kind with "
1004 "images of any other kind is not supported");
1005
1006 // Write any remaining device inputs to an output file.
1007 SmallVector<StringRef> InputFiles;
1008 for (const OffloadFile &File : Input) {
1009 auto FileNameOrErr = writeOffloadFile(File);
1010 if (!FileNameOrErr)
1011 return FileNameOrErr.takeError();
1012 InputFiles.emplace_back(Args&: *FileNameOrErr);
1013 }
1014
1015 // Link the remaining device files using the device linker.
1016 auto OutputOrErr =
1017 linkDevice(InputFiles, LinkerArgs, ActiveOffloadKindMask);
1018 if (!OutputOrErr)
1019 return OutputOrErr.takeError();
1020
1021 // Store the offloading image for each linked output file.
1022 for (OffloadKind Kind = OFK_OpenMP; Kind != OFK_LAST;
1023 Kind = static_cast<OffloadKind>((uint16_t)(Kind) << 1)) {
1024 if ((ActiveOffloadKindMask & Kind) == 0)
1025 continue;
1026 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1027 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *OutputOrErr);
1028 if (std::error_code EC = FileOrErr.getError()) {
1029 if (DryRun)
1030 FileOrErr = MemoryBuffer::getMemBuffer(InputData: "");
1031 else
1032 return createFileError(*OutputOrErr, EC);
1033 }
1034
1035 // Manually containerize offloading images not in ELF format.
1036 if (Error E = containerizeRawImage(*FileOrErr, Kind, LinkerArgs))
1037 return E;
1038
1039 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1040 OffloadingImage TheImage{};
1041 TheImage.TheImageKind =
1042 Args.hasArg(Ids: OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1043 TheImage.TheOffloadKind = Kind;
1044 TheImage.StringData["triple"] =
1045 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_triple_EQ));
1046 TheImage.StringData["arch"] =
1047 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_arch_EQ));
1048 TheImage.Image = std::move(*FileOrErr);
1049
1050 Images[Kind].emplace_back(Args: std::move(TheImage));
1051 }
1052 return Error::success();
1053 });
1054 if (Err)
1055 return std::move(Err);
1056
1057 // Create a binary image of each offloading image and either embed it into a
1058 // new object file, or if all inputs were direct offload binaries, emit the
1059 // fat binary directly (e.g. .hipfb / .fatbin).
1060 SmallVector<StringRef> WrappedOutput;
1061 for (auto &[Kind, Input] : Images) {
1062 // We sort the entries before bundling so they appear in a deterministic
1063 // order in the final binary.
1064 llvm::sort(C&: Input, Comp: [](OffloadingImage &A, OffloadingImage &B) {
1065 return A.StringData["triple"] > B.StringData["triple"] ||
1066 A.StringData["arch"] > B.StringData["arch"] ||
1067 A.TheOffloadKind < B.TheOffloadKind;
1068 });
1069 auto BundledImagesOrErr = bundleLinkedOutput(Images: Input, Args, Kind);
1070 if (!BundledImagesOrErr)
1071 return BundledImagesOrErr.takeError();
1072
1073 if (!NeedsWrapping) {
1074 if (BundledImagesOrErr->size() != 1)
1075 return createStringError(
1076 Fmt: "Expected a single bundled image for direct fat binary output");
1077
1078 Expected<std::unique_ptr<FileOutputBuffer>> FOBOrErr =
1079 FileOutputBuffer::create(
1080 FilePath: ExecutableName, Size: BundledImagesOrErr->front()->getBufferSize());
1081 if (!FOBOrErr)
1082 return FOBOrErr.takeError();
1083 std::unique_ptr<FileOutputBuffer> FOB = std::move(*FOBOrErr);
1084 llvm::copy(Range: BundledImagesOrErr->front()->getBuffer(),
1085 Out: FOB->getBufferStart());
1086 if (Error E = FOB->commit())
1087 return std::move(E);
1088
1089 continue;
1090 }
1091
1092 auto OutputOrErr = wrapDeviceImages(Buffers: *BundledImagesOrErr, Args, Kind);
1093 if (!OutputOrErr)
1094 return OutputOrErr.takeError();
1095 WrappedOutput.push_back(Elt: *OutputOrErr);
1096 }
1097
1098 return WrappedOutput;
1099}
1100
1101std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1102 const Twine &Name) {
1103 SmallString<128> Path;
1104 if (Dir.starts_with(Prefix: "="))
1105 sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name);
1106 else
1107 sys::path::append(path&: Path, a: Dir, b: Name);
1108
1109 if (sys::fs::exists(Path))
1110 return static_cast<std::string>(Path);
1111 return std::nullopt;
1112}
1113
1114std::optional<std::string>
1115findFromSearchPaths(StringRef Name, StringRef Root,
1116 ArrayRef<StringRef> SearchPaths) {
1117 for (StringRef Dir : SearchPaths)
1118 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1119 return File;
1120 return std::nullopt;
1121}
1122
1123std::optional<std::string>
1124searchLibraryBaseName(StringRef Name, StringRef Root,
1125 ArrayRef<StringRef> SearchPaths) {
1126 for (StringRef Dir : SearchPaths) {
1127 if (std::optional<std::string> File =
1128 findFile(Dir, Root, Name: "lib" + Name + ".so"))
1129 return File;
1130 if (std::optional<std::string> File =
1131 findFile(Dir, Root, Name: "lib" + Name + ".a"))
1132 return File;
1133 }
1134 return std::nullopt;
1135}
1136
1137/// Search for static libraries in the linker's library path given input like
1138/// `-lfoo` or `-l:libfoo.a`.
1139std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1140 ArrayRef<StringRef> SearchPaths) {
1141 if (Input.starts_with(Prefix: ":") || Input.ends_with(Suffix: ".lib"))
1142 return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths);
1143 return searchLibraryBaseName(Name: Input, Root, SearchPaths);
1144}
1145
1146/// Search the input files and libraries for embedded device offloading code
1147/// and add it to the list of files to be linked. Files coming from static
1148/// libraries are only added to the input if they are used by an existing
1149/// input file. Returns a list of input files intended for a single linking job.
1150Expected<SmallVector<SmallVector<OffloadFile>>>
1151getDeviceInput(const ArgList &Args) {
1152 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1153
1154 // Skip all the input if the user is overriding the output.
1155 if (Args.hasArg(Ids: OPT_override_image))
1156 return SmallVector<SmallVector<OffloadFile>>();
1157
1158 StringRef Root = Args.getLastArgValue(Id: OPT_sysroot_EQ);
1159 SmallVector<StringRef> LibraryPaths;
1160 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path, Ids: OPT_libpath))
1161 LibraryPaths.push_back(Elt: Arg->getValue());
1162
1163 BumpPtrAllocator Alloc;
1164 StringSaver Saver(Alloc);
1165
1166 // Try to extract device code from the linker input files.
1167 bool WholeArchive = Args.hasArg(Ids: OPT_wholearchive_flag) ? true : false;
1168 SmallVector<OffloadFile> ObjectFilesToExtract;
1169 SmallVector<OffloadFile> ArchiveFilesToExtract;
1170 for (const opt::Arg *Arg : Args.filtered(
1171 Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
1172 if (Arg->getOption().matches(ID: OPT_whole_archive) ||
1173 Arg->getOption().matches(ID: OPT_no_whole_archive)) {
1174 WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive);
1175 continue;
1176 }
1177
1178 std::optional<std::string> Filename =
1179 Arg->getOption().matches(ID: OPT_library)
1180 ? searchLibrary(Input: Arg->getValue(), Root, SearchPaths: LibraryPaths)
1181 : std::string(Arg->getValue());
1182
1183 if (!Filename && Arg->getOption().matches(ID: OPT_library))
1184 reportError(
1185 E: createStringError(Fmt: "unable to find library -l%s", Vals: Arg->getValue()));
1186
1187 if (!Filename || !sys::fs::exists(Path: *Filename) ||
1188 sys::fs::is_directory(Path: *Filename))
1189 continue;
1190
1191 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1192 MemoryBuffer::getFileOrSTDIN(Filename: *Filename);
1193 if (std::error_code EC = BufferOrErr.getError())
1194 return createFileError(F: *Filename, EC);
1195
1196 MemoryBufferRef Buffer = **BufferOrErr;
1197 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::elf_shared_object)
1198 continue;
1199
1200 SmallVector<OffloadFile> Binaries;
1201 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1202 return std::move(Err);
1203
1204 for (auto &OffloadFile : Binaries) {
1205 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::archive &&
1206 !WholeArchive)
1207 ArchiveFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1208 else
1209 ObjectFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1210 }
1211 }
1212
1213 // Link all standard input files and update the list of symbols.
1214 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1215 for (OffloadFile &Binary : ObjectFilesToExtract) {
1216 if (!Binary.getBinary())
1217 continue;
1218
1219 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1220 for (const auto &[ID, Input] : InputFiles)
1221 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1222 CompatibleTargets.emplace_back(Args: ID);
1223
1224 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1225 // If another target needs this binary it must be copied instead.
1226 if (Index == CompatibleTargets.size() - 1)
1227 InputFiles[ID].emplace_back(Args: std::move(Binary));
1228 else
1229 InputFiles[ID].emplace_back(Args: Binary.copy());
1230 }
1231 }
1232
1233 llvm::DenseSet<StringRef> ShouldExtract;
1234 for (auto &Arg : Args.getAllArgValues(Id: OPT_should_extract))
1235 ShouldExtract.insert(V: Arg);
1236
1237 // We only extract archive members from the fat binary if we find a used or
1238 // requested target. Unlike normal static archive handling, we just extract
1239 // every object file contained in the archive.
1240 for (OffloadFile &Binary : ArchiveFilesToExtract) {
1241 if (!Binary.getBinary())
1242 continue;
1243
1244 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1245 for (const auto &[ID, Input] : InputFiles)
1246 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1247 CompatibleTargets.emplace_back(Args: ID);
1248
1249 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1250 // Only extract an if we have an an object matching this target or it
1251 // was specifically requested.
1252 if (!InputFiles.count(Key: ID) && !ShouldExtract.contains(V: ID.second))
1253 continue;
1254
1255 // If another target needs this binary it must be copied instead.
1256 if (Index == CompatibleTargets.size() - 1)
1257 InputFiles[ID].emplace_back(Args: std::move(Binary));
1258 else
1259 InputFiles[ID].emplace_back(Args: Binary.copy());
1260 }
1261 }
1262
1263 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1264 for (auto &[ID, Input] : InputFiles)
1265 InputsForTarget.emplace_back(Args: std::move(Input));
1266
1267 return std::move(InputsForTarget);
1268}
1269
1270} // namespace
1271
1272int main(int Argc, char **Argv) {
1273 InitLLVM X(Argc, Argv);
1274 InitializeAllTargetInfos();
1275 InitializeAllTargets();
1276 InitializeAllTargetMCs();
1277 InitializeAllAsmParsers();
1278 InitializeAllAsmPrinters();
1279
1280 LinkerExecutable = Argv[0];
1281 sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]);
1282
1283 const OptTable &Tbl = getOptTable();
1284 BumpPtrAllocator Alloc;
1285 StringSaver Saver(Alloc);
1286 auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) {
1287 reportError(E: createStringError(S: Err));
1288 });
1289
1290 if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) {
1291 Tbl.printHelp(
1292 OS&: outs(),
1293 Usage: "clang-linker-wrapper [options] -- <options to passed to the linker>",
1294 Title: "\nA wrapper utility over the host linker. It scans the input files\n"
1295 "for sections that require additional processing prior to linking.\n"
1296 "The will then transparently pass all arguments and input to the\n"
1297 "specified host linker to create the final binary.\n",
1298 ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden));
1299 return EXIT_SUCCESS;
1300 }
1301 if (Args.hasArg(Ids: OPT_v)) {
1302 printVersion(OS&: outs());
1303 return EXIT_SUCCESS;
1304 }
1305
1306 // This forwards '-mllvm' arguments to LLVM if present.
1307 SmallVector<const char *> NewArgv = {Argv[0]};
1308 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
1309 NewArgv.push_back(Elt: Arg->getValue());
1310 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus))
1311 NewArgv.push_back(Elt: Arg->getValue());
1312 SmallVector<PassPlugin, 1> PluginList;
1313 PassPlugins.setCallback([&](const std::string &PluginPath) {
1314 auto Plugin = PassPlugin::Load(Filename: PluginPath);
1315 if (!Plugin)
1316 reportFatalUsageError(Err: Plugin.takeError());
1317 PluginList.emplace_back(Args&: Plugin.get());
1318 });
1319 cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]);
1320
1321 Verbose = Args.hasArg(Ids: OPT_verbose);
1322 DryRun = Args.hasArg(Ids: OPT_dry_run);
1323 SaveTemps = Args.hasArg(Ids: OPT_save_temps);
1324 CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str();
1325
1326 llvm::Triple Triple(
1327 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1328 if (Args.hasArg(Ids: OPT_o))
1329 ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out");
1330 else if (Args.hasArg(Ids: OPT_out))
1331 ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe");
1332 else
1333 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1334
1335 parallel::strategy = hardware_concurrency(ThreadCount: 1);
1336 if (auto *Arg = Args.getLastArg(Ids: OPT_wrapper_jobs)) {
1337 StringRef Val = Arg->getValue();
1338 if (Val.equals_insensitive(RHS: "jobserver"))
1339 parallel::strategy = jobserver_concurrency();
1340 else {
1341 unsigned Threads = 0;
1342 if (!llvm::to_integer(S: Val, Num&: Threads) || Threads == 0)
1343 reportError(E: createStringError(
1344 Fmt: "%s: expected a positive integer or 'jobserver', got '%s'",
1345 Vals: Arg->getSpelling().data(), Vals: Val.data()));
1346 else
1347 parallel::strategy = hardware_concurrency(ThreadCount: Threads);
1348 }
1349 }
1350
1351 if (Args.hasArg(Ids: OPT_wrapper_time_trace_eq)) {
1352 unsigned Granularity;
1353 Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500")
1354 .getAsInteger(Radix: 10, Result&: Granularity);
1355 timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]);
1356 }
1357
1358 {
1359 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1360
1361 // Extract the device input files stored in the host fat binary.
1362 auto DeviceInputFiles = getDeviceInput(Args);
1363 if (!DeviceInputFiles)
1364 reportError(E: DeviceInputFiles.takeError());
1365
1366 // Check if we should emit fat binary directly without wrapping or host
1367 // linking.
1368 bool EmitFatbinOnly = Args.hasArg(Ids: OPT_emit_fatbin_only);
1369
1370 // Link and process the device images. The function may emit a direct fat
1371 // binary if --emit-fatbin-only is specified.
1372 auto FilesOrErr = linkAndWrapDeviceFiles(LinkerInputFiles: *DeviceInputFiles, Args, Argv,
1373 Argc, NeedsWrapping: !EmitFatbinOnly);
1374 if (!FilesOrErr)
1375 reportError(E: FilesOrErr.takeError());
1376
1377 // Run the host linking job with the rendered arguments.
1378 if (!EmitFatbinOnly) {
1379 if (Error Err = runLinker(Files: *FilesOrErr, Args))
1380 reportError(E: std::move(Err));
1381 }
1382 }
1383
1384 if (const opt::Arg *Arg = Args.getLastArg(Ids: OPT_wrapper_time_trace_eq)) {
1385 if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName))
1386 reportError(E: std::move(Err));
1387 timeTraceProfilerCleanup();
1388 }
1389
1390 // Remove the temporary files created.
1391 if (!SaveTemps)
1392 for (const auto &TempFile : TempFiles)
1393 if (std::error_code EC = sys::fs::remove(path: TempFile))
1394 reportError(E: createFileError(F: TempFile, EC));
1395
1396 return EXIT_SUCCESS;
1397}
1398