1//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tool works as a wrapper over a linking job. This tool is used to create
10// linked device images for offloading. It scans the linker's input for embedded
11// device offloading data stored in sections `.llvm.offloading` and extracts it
12// as a temporary file. The extracted device files will then be passed to a
13// device linking job to create a final device image.
14//
15//===----------------------------------------------------------------------===//
16
17#include "clang/Basic/TargetID.h"
18#include "clang/Basic/Version.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/Bitcode/BitcodeWriter.h"
22#include "llvm/CodeGen/CommandFlags.h"
23#include "llvm/Frontend/Offloading/OffloadWrapper.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/DiagnosticPrinter.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IRReader/IRReader.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/MC/TargetRegistry.h"
30#include "llvm/Object/Binary.h"
31#include "llvm/Object/IRObjectFile.h"
32#include "llvm/Object/ObjectFile.h"
33#include "llvm/Object/OffloadBinary.h"
34#include "llvm/Option/ArgList.h"
35#include "llvm/Option/OptTable.h"
36#include "llvm/Option/Option.h"
37#include "llvm/Plugins/PassPlugin.h"
38#include "llvm/Remarks/HotnessThresholdParser.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/FileOutputBuffer.h"
41#include "llvm/Support/FileSystem.h"
42#include "llvm/Support/InitLLVM.h"
43#include "llvm/Support/MemoryBuffer.h"
44#include "llvm/Support/Parallel.h"
45#include "llvm/Support/Path.h"
46#include "llvm/Support/Program.h"
47#include "llvm/Support/Signals.h"
48#include "llvm/Support/SourceMgr.h"
49#include "llvm/Support/StringSaver.h"
50#include "llvm/Support/TargetSelect.h"
51#include "llvm/Support/TimeProfiler.h"
52#include "llvm/Support/WithColor.h"
53#include "llvm/Support/raw_ostream.h"
54#include "llvm/Target/TargetMachine.h"
55#include "llvm/TargetParser/Host.h"
56#include <optional>
57
58using namespace llvm;
59using namespace llvm::opt;
60using namespace llvm::object;
61
62// Various tools (e.g., llc and opt) duplicate this series of declarations for
63// options related to passes and remarks.
64
65static cl::opt<bool> RemarksWithHotness(
66 "pass-remarks-with-hotness",
67 cl::desc("With PGO, include profile count in optimization remarks"),
68 cl::Hidden);
69
70static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
71 RemarksHotnessThreshold(
72 "pass-remarks-hotness-threshold",
73 cl::desc("Minimum profile count required for "
74 "an optimization remark to be output. "
75 "Use 'auto' to apply the threshold from profile summary."),
76 cl::value_desc("N or 'auto'"), cl::init(Val: 0), cl::Hidden);
77
78static cl::opt<std::string>
79 RemarksFilename("pass-remarks-output",
80 cl::desc("Output filename for pass remarks"),
81 cl::value_desc("filename"));
82
83static cl::opt<std::string>
84 RemarksPasses("pass-remarks-filter",
85 cl::desc("Only record optimization remarks from passes whose "
86 "names match the given regular expression"),
87 cl::value_desc("regex"));
88
89static cl::opt<std::string> RemarksFormat(
90 "pass-remarks-format",
91 cl::desc("The format used for serializing remarks (default: YAML)"),
92 cl::value_desc("format"), cl::init(Val: "yaml"));
93
94static cl::list<std::string>
95 PassPlugins("load-pass-plugin",
96 cl::desc("Load passes from plugin library"));
97
98static cl::opt<std::string> PassPipeline(
99 "passes",
100 cl::desc(
101 "A textual description of the pass pipeline. To have analysis passes "
102 "available before a certain pass, add 'require<foo-analysis>'. "
103 "'-passes' overrides the pass pipeline (but not all effects) from "
104 "specifying '--opt-level=O?' (O2 is the default) to "
105 "clang-linker-wrapper. Be sure to include the corresponding "
106 "'default<O?>' in '-passes'."));
107static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
108 cl::desc("Alias for -passes"));
109
110/// Path of the current binary.
111static const char *LinkerExecutable;
112
113/// Save intermediary results.
114static bool SaveTemps = false;
115
116/// Print arguments without executing.
117static bool DryRun = false;
118
119/// Print verbose output.
120static bool Verbose = false;
121
122/// Filename of the executable being created.
123static StringRef ExecutableName;
124
125/// Binary path for the CUDA installation.
126static std::string CudaBinaryPath;
127
128/// Mutex lock to protect writes to shared TempFiles in parallel.
129static std::mutex TempFilesMutex;
130
131/// Temporary files created by the linker wrapper.
132static std::list<SmallString<128>> TempFiles;
133
134/// Codegen flags for LTO backend.
135static codegen::RegisterCodeGenFlags CodeGenFlags;
136
137/// Whether or not to look through symlinks when resolving binaries.
138static bool CanonicalPrefixes = true;
139
140using OffloadingImage = OffloadBinary::OffloadingImage;
141
142namespace llvm {
143// Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
144template <> struct DenseMapInfo<OffloadKind> {
145 static inline OffloadKind getEmptyKey() { return OFK_LAST; }
146 static inline OffloadKind getTombstoneKey() {
147 return static_cast<OffloadKind>(OFK_LAST + 1);
148 }
149 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
150
151 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
152 return LHS == RHS;
153 }
154};
155} // namespace llvm
156
157namespace {
158using std::error_code;
159
160/// Must not overlap with llvm::opt::DriverFlag.
161enum WrapperFlags {
162 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
163 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
164};
165
166enum ID {
167 OPT_INVALID = 0, // This is not an option ID.
168#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
169#include "LinkerWrapperOpts.inc"
170 LastOption
171#undef OPTION
172};
173
174#define OPTTABLE_STR_TABLE_CODE
175#include "LinkerWrapperOpts.inc"
176#undef OPTTABLE_STR_TABLE_CODE
177
178#define OPTTABLE_PREFIXES_TABLE_CODE
179#include "LinkerWrapperOpts.inc"
180#undef OPTTABLE_PREFIXES_TABLE_CODE
181
182static constexpr OptTable::Info InfoTable[] = {
183#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
184#include "LinkerWrapperOpts.inc"
185#undef OPTION
186};
187
188class WrapperOptTable : public opt::GenericOptTable {
189public:
190 WrapperOptTable()
191 : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
192};
193
194const OptTable &getOptTable() {
195 static const WrapperOptTable Table;
196 return Table;
197}
198
199void printCommands(ArrayRef<StringRef> CmdArgs) {
200 if (CmdArgs.empty())
201 return;
202
203 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
204 for (auto IC = std::next(x: CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
205 llvm::errs() << *IC << (std::next(x: IC) != IE ? " " : "\n");
206}
207
208[[noreturn]] void reportError(Error E) {
209 outs().flush();
210 logAllUnhandledErrors(E: std::move(E),
211 OS&: WithColor::error(OS&: errs(), Prefix: LinkerExecutable));
212 exit(EXIT_FAILURE);
213}
214
215std::string getExecutableDir(const char *Name) {
216 if (!CanonicalPrefixes)
217 return sys::path::parent_path(path: LinkerExecutable).str();
218 void *Ptr = reinterpret_cast<void *>(&getExecutableDir);
219 return sys::path::parent_path(path: sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr)).str();
220}
221
222/// Get a temporary filename suitable for output.
223Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
224 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
225 SmallString<128> OutputFile;
226 std::string PrefixStr = clang::sanitizeTargetIDInFileName(TargetID: Prefix.str());
227
228 if (SaveTemps) {
229 (PrefixStr + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile);
230 } else {
231 if (std::error_code EC =
232 sys::fs::createTemporaryFile(Prefix: PrefixStr, Suffix: Extension, ResultPath&: OutputFile))
233 return createFileError(F: OutputFile, EC);
234 }
235
236 TempFiles.emplace_back(args: std::move(OutputFile));
237 return TempFiles.back();
238}
239
240/// Execute the command \p ExecutablePath with the arguments \p Args.
241Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
242 if (Verbose || DryRun)
243 printCommands(CmdArgs: Args);
244
245 if (DryRun)
246 return Error::success();
247
248 // If the command line fits within system limits, execute directly.
249 if (sys::commandLineFitsWithinSystemLimits(Program: ExecutablePath, Args)) {
250 if (sys::ExecuteAndWait(Program: ExecutablePath, Args))
251 return createStringError(
252 Fmt: "'%s' failed", Vals: sys::path::filename(path: ExecutablePath).str().c_str());
253 return Error::success();
254 }
255
256 // Write the arguments to a response file and pass that instead.
257 auto TempFileOrErr = createOutputFile(Prefix: "response", Extension: "rsp");
258 if (!TempFileOrErr)
259 return TempFileOrErr.takeError();
260
261 SmallString<256> Contents;
262 raw_svector_ostream OS(Contents);
263 for (StringRef Arg : llvm::drop_begin(RangeOrContainer&: Args)) {
264 sys::printArg(OS, Arg, /*Quote=*/true);
265 OS << " ";
266 }
267
268 if (std::error_code EC = sys::writeFileWithEncoding(FileName: *TempFileOrErr, Contents))
269 return createStringError(Fmt: "failed to write response file: %s",
270 Vals: EC.message().c_str());
271
272 std::string ResponseFile = ("@" + *TempFileOrErr).str();
273 SmallVector<StringRef, 2> NewArgs = {Args.front(), ResponseFile};
274 if (sys::ExecuteAndWait(Program: ExecutablePath, Args: NewArgs))
275 return createStringError(Fmt: "'%s' failed",
276 Vals: sys::path::filename(path: ExecutablePath).str().c_str());
277 return Error::success();
278}
279
280Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
281
282 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
283 if (!Path)
284 Path = sys::findProgramByName(Name);
285 if (!Path && DryRun)
286 return Name.str();
287 if (!Path)
288 return createStringError(EC: Path.getError(),
289 S: "Unable to find '" + Name + "' in path");
290 return *Path;
291}
292
293bool linkerSupportsLTO(const ArgList &Args) {
294 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
295 return Triple.isNVPTX() || Triple.isAMDGPU() ||
296 (!Triple.isGPU() &&
297 Args.getLastArgValue(Id: OPT_linker_path_EQ).ends_with(Suffix: "lld"));
298}
299
300/// Returns the hashed value for a constant string.
301std::string getHash(StringRef Str) {
302 llvm::MD5 Hasher;
303 llvm::MD5::MD5Result Hash;
304 Hasher.update(Str);
305 Hasher.final(Result&: Hash);
306 return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true);
307}
308
309/// Renames offloading entry sections in a relocatable link so they do not
310/// conflict with a later link job.
311Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
312 llvm::Triple Triple(
313 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
314 if (Triple.isOSWindows())
315 return createStringError(
316 Fmt: "Relocatable linking is not supported on COFF targets");
317
318 Expected<std::string> ObjcopyPath =
319 findProgram(Name: "llvm-objcopy", Paths: {getExecutableDir(Name: "llvm-objcopy")});
320 if (!ObjcopyPath)
321 return ObjcopyPath.takeError();
322
323 // Use the linker output file to get a unique hash. This creates a unique
324 // identifier to rename the sections to that is deterministic to the contents.
325 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "")
326 : MemoryBuffer::getFileOrSTDIN(Filename: Output);
327 if (!BufferOrErr)
328 return createStringError(Fmt: "Failed to open %s", Vals: Output.str().c_str());
329 std::string Suffix = "_" + getHash(Str: (*BufferOrErr)->getBuffer());
330
331 SmallVector<StringRef> ObjcopyArgs = {
332 *ObjcopyPath,
333 Output,
334 };
335
336 // Remove the old .llvm.offloading section to prevent further linking.
337 ObjcopyArgs.emplace_back(Args: "--remove-section");
338 ObjcopyArgs.emplace_back(Args: ".llvm.offloading");
339 StringRef Prefix = "llvm";
340 auto Section = (Prefix + "_offload_entries").str();
341 // Rename the offloading entries to make them private to this link unit.
342 ObjcopyArgs.emplace_back(Args: "--rename-section");
343 ObjcopyArgs.emplace_back(
344 Args: Args.MakeArgString(Str: Section + "=" + Section + Suffix));
345
346 // Rename the __start_ / __stop_ symbols appropriately to iterate over the
347 // newly renamed section containing the offloading entries.
348 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
349 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__start_" + Section + "=" +
350 "__start_" + Section + Suffix));
351 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
352 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__stop_" + Section + "=" +
353 "__stop_" + Section + Suffix));
354
355 if (Error Err = executeCommands(ExecutablePath: *ObjcopyPath, Args: ObjcopyArgs))
356 return Err;
357
358 return Error::success();
359}
360
361/// Runs the wrapped linker job with the newly created input.
362Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
363 llvm::TimeTraceScope TimeScope("Execute host linker");
364
365 // Render the linker arguments and add the newly created image. We add it
366 // after the output file to ensure it is linked with the correct libraries.
367 StringRef LinkerPath = Args.getLastArgValue(Id: OPT_linker_path_EQ);
368 if (LinkerPath.empty())
369 return createStringError(Fmt: "linker path missing, must pass 'linker-path'");
370 ArgStringList NewLinkerArgs;
371 for (const opt::Arg *Arg : Args) {
372 // Do not forward arguments only intended for the linker wrapper.
373 if (Arg->getOption().hasFlag(Val: WrapperOnlyOption))
374 continue;
375
376 Arg->render(Args, Output&: NewLinkerArgs);
377 if (Arg->getOption().matches(ID: OPT_o) || Arg->getOption().matches(ID: OPT_out))
378 llvm::transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs),
379 F: [&](StringRef A) { return Args.MakeArgString(Str: A); });
380 }
381
382 SmallVector<StringRef> LinkerArgs({LinkerPath});
383 for (StringRef Arg : NewLinkerArgs)
384 LinkerArgs.push_back(Elt: Arg);
385 if (Error Err = executeCommands(ExecutablePath: LinkerPath, Args: LinkerArgs))
386 return Err;
387
388 if (Args.hasArg(Ids: OPT_relocatable))
389 return relocateOffloadSection(Args, Output: ExecutableName);
390
391 return Error::success();
392}
393
394void printVersion(raw_ostream &OS) {
395 OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper") << '\n';
396}
397
398namespace nvptx {
399Expected<StringRef>
400fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
401 const ArgList &Args) {
402 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
403 // NVPTX uses the fatbinary program to bundle the linked images.
404 Expected<std::string> FatBinaryPath =
405 findProgram(Name: "fatbinary", Paths: {CudaBinaryPath + "/bin"});
406 if (!FatBinaryPath)
407 return FatBinaryPath.takeError();
408
409 llvm::Triple Triple(
410 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
411
412 // Create a new file to write the linked device image to.
413 auto TempFileOrErr =
414 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "fatbin");
415 if (!TempFileOrErr)
416 return TempFileOrErr.takeError();
417
418 SmallVector<StringRef, 16> CmdArgs;
419 CmdArgs.push_back(Elt: *FatBinaryPath);
420 CmdArgs.push_back(Elt: Triple.isArch64Bit() ? "-64" : "-32");
421 CmdArgs.push_back(Elt: "--create");
422 CmdArgs.push_back(Elt: *TempFileOrErr);
423 for (const auto &[File, Arch] : InputFiles)
424 CmdArgs.push_back(Elt: Args.MakeArgString(
425 Str: "--image3=kind=elf,sm=" + Arch.drop_front(N: 3) + ",file=" + File));
426
427 if (Error Err = executeCommands(ExecutablePath: *FatBinaryPath, Args: CmdArgs))
428 return std::move(Err);
429
430 return *TempFileOrErr;
431}
432} // namespace nvptx
433
434namespace amdgcn {
435
436// Constructs a triple string for clang offload bundler.
437// NOTE: copied from HIPUtility.cpp.
438static std::string normalizeForBundler(const llvm::Triple &T,
439 bool HasTargetID) {
440 return HasTargetID ? (T.getArchName() + "-" + T.getVendorName() + "-" +
441 T.getOSName() + "-" + T.getEnvironmentName())
442 .str()
443 : T.normalize(Form: llvm::Triple::CanonicalForm::FOUR_IDENT);
444}
445
446Expected<StringRef>
447fatbinary(ArrayRef<std::tuple<StringRef, StringRef, StringRef>> InputFiles,
448 const ArgList &Args) {
449 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
450
451 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
452 Expected<std::string> OffloadBundlerPath = findProgram(
453 Name: "clang-offload-bundler", Paths: {getExecutableDir(Name: "clang-offload-bundler")});
454 if (!OffloadBundlerPath)
455 return OffloadBundlerPath.takeError();
456
457 // Create a new file to write the linked device image to.
458 auto TempFileOrErr =
459 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "hipfb");
460 if (!TempFileOrErr)
461 return TempFileOrErr.takeError();
462
463 BumpPtrAllocator Alloc;
464 StringSaver Saver(Alloc);
465
466 SmallVector<StringRef, 16> CmdArgs;
467 CmdArgs.push_back(Elt: *OffloadBundlerPath);
468 CmdArgs.push_back(Elt: "-type=o");
469 CmdArgs.push_back(Elt: "-bundle-align=4096");
470
471 if (Args.hasArg(Ids: OPT_compress))
472 CmdArgs.push_back(Elt: "-compress");
473 if (auto *Arg = Args.getLastArg(Ids: OPT_compression_level_eq))
474 CmdArgs.push_back(
475 Elt: Args.MakeArgString(Str: Twine("-compression-level=") + Arg->getValue()));
476
477 llvm::Triple HostTriple(
478 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
479 SmallVector<StringRef> Targets = {
480 Saver.save(S: "-targets=host-" + HostTriple.normalize())};
481 for (const auto &[File, TripleRef, Arch] : InputFiles) {
482 std::string NormalizedTriple =
483 normalizeForBundler(T: Triple(TripleRef), HasTargetID: !Arch.empty());
484 Targets.push_back(Elt: Saver.save(S: "hip-" + NormalizedTriple + "-" + Arch));
485 }
486 CmdArgs.push_back(Elt: Saver.save(S: llvm::join(R&: Targets, Separator: ",")));
487
488#ifdef _WIN32
489 CmdArgs.push_back("-input=NUL");
490#else
491 CmdArgs.push_back(Elt: "-input=/dev/null");
492#endif
493 for (const auto &[File, Triple, Arch] : InputFiles)
494 CmdArgs.push_back(Elt: Saver.save(S: "-input=" + File));
495
496 CmdArgs.push_back(Elt: Saver.save(S: "-output=" + *TempFileOrErr));
497
498 if (Error Err = executeCommands(ExecutablePath: *OffloadBundlerPath, Args: CmdArgs))
499 return std::move(Err);
500
501 return *TempFileOrErr;
502}
503} // namespace amdgcn
504
505namespace generic {
506Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
507 uint16_t ActiveOffloadKindMask) {
508 llvm::TimeTraceScope TimeScope("Clang");
509 // Use `clang` to invoke the appropriate device tools.
510 Expected<std::string> ClangPath =
511 findProgram(Name: "clang", Paths: {getExecutableDir(Name: "clang")});
512 if (!ClangPath)
513 return ClangPath.takeError();
514
515 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
516 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
517 // Create a new file to write the linked device image to. Assume that the
518 // input filename already has the device and architecture.
519 std::string OutputFileBase =
520 "." + Triple.getArchName().str() + "." + Arch.str();
521 auto TempFileOrErr = createOutputFile(
522 Prefix: sys::path::filename(path: ExecutableName) + OutputFileBase, Extension: "img");
523 if (!TempFileOrErr)
524 return TempFileOrErr.takeError();
525
526 SmallVector<StringRef, 16> CmdArgs{
527 *ClangPath,
528 "--no-default-config",
529 "-o",
530 *TempFileOrErr,
531 // Without -dumpdir, Clang will place auxiliary output files in the
532 // temporary directory of TempFileOrErr, where they will not easily be
533 // found by the user and might eventually be automatically removed. Tell
534 // Clang to instead place them alongside the final executable.
535 "-dumpdir",
536 Args.MakeArgString(Str: ExecutableName + OutputFileBase + ".img."),
537 Args.MakeArgString(Str: "--target=" + Triple.getTriple()),
538 };
539
540 if (!Arch.empty())
541 Triple.isAMDGPU() ? CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-mcpu=" + Arch))
542 : CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-march=" + Arch));
543
544 // AMDGPU is always in LTO mode currently.
545 if (Triple.isAMDGPU())
546 CmdArgs.push_back(Elt: "-flto");
547
548 // Forward all of the `--offload-opt` and `-mllvm` options to the device.
549 for (auto &Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus, Ids: OPT_mllvm))
550 CmdArgs.append(
551 IL: {"-Xlinker",
552 Args.MakeArgString(Str: "--plugin-opt=" + StringRef(Arg->getValue()))});
553
554 if (!Triple.isNVPTX() && !Triple.isSPIRV())
555 CmdArgs.push_back(Elt: "-Wl,--no-undefined");
556
557 for (StringRef InputFile : InputFiles)
558 CmdArgs.push_back(Elt: InputFile);
559
560 // If this is CPU offloading we copy the input libraries.
561 if (!Triple.isGPU()) {
562 CmdArgs.push_back(Elt: "-Wl,-Bsymbolic");
563 CmdArgs.push_back(Elt: "-shared");
564 ArgStringList LinkerArgs;
565 for (const opt::Arg *Arg :
566 Args.filtered(Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_library_path, Ids: OPT_rpath,
567 Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
568 // Sometimes needed libraries are passed by name, such as when using
569 // sanitizers. We need to check the file magic for any libraries.
570 if (Arg->getOption().matches(ID: OPT_INPUT)) {
571 if (!sys::fs::exists(Path: Arg->getValue()) ||
572 sys::fs::is_directory(Path: Arg->getValue()))
573 continue;
574
575 file_magic Magic;
576 if (auto EC = identify_magic(path: Arg->getValue(), result&: Magic))
577 return createStringError(Fmt: "Failed to open %s", Vals: Arg->getValue());
578 if (Magic != file_magic::archive &&
579 Magic != file_magic::elf_shared_object)
580 continue;
581 }
582 if (Arg->getOption().matches(ID: OPT_whole_archive))
583 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--whole-archive"));
584 else if (Arg->getOption().matches(ID: OPT_no_whole_archive))
585 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--no-whole-archive"));
586 else
587 Arg->render(Args, Output&: LinkerArgs);
588 }
589 llvm::append_range(C&: CmdArgs, R&: LinkerArgs);
590 }
591
592 // Pass on -mllvm options to the linker invocation.
593 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
594 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(
595 Str: "-mllvm=" + StringRef(Arg->getValue()))});
596
597 if (SaveTemps && linkerSupportsLTO(Args))
598 CmdArgs.push_back(Elt: "-Wl,--save-temps");
599
600 if (Args.hasArg(Ids: OPT_embed_bitcode))
601 CmdArgs.push_back(Elt: "-Wl,--lto-emit-llvm");
602
603 // For linking device code with the SYCL offload kind, special handling is
604 // required. Passing --sycl-link to clang results in a call to
605 // clang-sycl-linker. Additional linker flags required by clang-sycl-linker
606 // will be communicated via the -Xlinker option.
607 if (ActiveOffloadKindMask & OFK_SYCL) {
608 CmdArgs.push_back(Elt: "--sycl-link");
609 CmdArgs.append(
610 IL: {"-Xlinker", Args.MakeArgString(Str: "-triple=" + Triple.getTriple())});
611 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: "-arch=" + Arch)});
612 }
613
614 for (StringRef Arg : Args.getAllArgValues(Id: OPT_linker_arg_EQ))
615 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: Arg)});
616 for (StringRef Arg : Args.getAllArgValues(Id: OPT_compiler_arg_EQ))
617 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Arg));
618
619 if (Error Err = executeCommands(ExecutablePath: *ClangPath, Args: CmdArgs))
620 return std::move(Err);
621
622 return *TempFileOrErr;
623}
624} // namespace generic
625
626Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
627 const ArgList &Args,
628 uint16_t ActiveOffloadKindMask) {
629 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
630 switch (Triple.getArch()) {
631 case Triple::nvptx:
632 case Triple::nvptx64:
633 case Triple::amdgcn:
634 case Triple::x86:
635 case Triple::x86_64:
636 case Triple::aarch64:
637 case Triple::aarch64_be:
638 case Triple::ppc64:
639 case Triple::ppc64le:
640 case Triple::spirv64:
641 case Triple::systemz:
642 case Triple::loongarch64:
643 return generic::clang(InputFiles, Args, ActiveOffloadKindMask);
644 default:
645 return createStringError(S: Triple.getArchName() +
646 " linking is not supported");
647 }
648}
649
650Error containerizeRawImage(std::unique_ptr<MemoryBuffer> &Img, OffloadKind Kind,
651 const ArgList &Args) {
652 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
653 if (Kind == OFK_OpenMP && Triple.isSPIRV() &&
654 Triple.getVendor() == llvm::Triple::Intel)
655 return offloading::intel::containerizeOpenMPSPIRVImage(Binary&: Img, Triple);
656 return Error::success();
657}
658
659Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
660 const OffloadBinary &Binary = *File.getBinary();
661
662 StringRef Prefix =
663 sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier());
664 SmallString<128> Filename;
665 (Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch())
666 .toVector(Out&: Filename);
667 auto TempFileOrErr = createOutputFile(Prefix: Filename, Extension: "o");
668 if (!TempFileOrErr)
669 return TempFileOrErr.takeError();
670
671 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
672 FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Binary.getImage().size());
673 if (!OutputOrErr)
674 return OutputOrErr.takeError();
675 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
676 llvm::copy(Range: Binary.getImage(), Out: Output->getBufferStart());
677 if (Error E = Output->commit())
678 return std::move(E);
679
680 return *TempFileOrErr;
681}
682
683// Compile the module to an object file using the appropriate target machine for
684// the host triple.
685Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
686 llvm::TimeTraceScope TimeScope("Compile module");
687 std::string Msg;
688 const Target *T = TargetRegistry::lookupTarget(TheTriple: M.getTargetTriple(), Error&: Msg);
689 if (!T)
690 return createStringError(S: Msg);
691
692 auto Options =
693 codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: M.getTargetTriple());
694 StringRef CPU = "";
695 StringRef Features = "";
696 std::unique_ptr<TargetMachine> TM(
697 T->createTargetMachine(TT: M.getTargetTriple(), CPU, Features, Options,
698 RM: Reloc::PIC_, CM: M.getCodeModel()));
699
700 if (M.getDataLayout().isDefault())
701 M.setDataLayout(TM->createDataLayout());
702
703 int FD = -1;
704 auto TempFileOrErr =
705 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
706 getOffloadKindName(Name: Kind) + ".image.wrapper",
707 Extension: "o");
708 if (!TempFileOrErr)
709 return TempFileOrErr.takeError();
710 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
711 return errorCodeToError(EC);
712
713 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true);
714
715 legacy::PassManager CodeGenPasses;
716 TargetLibraryInfoImpl TLII(M.getTargetTriple());
717 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
718 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
719 CodeGenFileType::ObjectFile))
720 return createStringError(Fmt: "Failed to execute host backend");
721 CodeGenPasses.run(M);
722
723 return *TempFileOrErr;
724}
725
726/// Creates the object file containing the device image and runtime
727/// registration code from the device images stored in \p Images.
728Expected<StringRef>
729wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
730 const ArgList &Args, OffloadKind Kind) {
731 llvm::TimeTraceScope TimeScope("Wrap bundled images");
732
733 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
734 for (const auto &Buffer : Buffers)
735 BuffersToWrap.emplace_back(
736 Args: ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
737
738 LLVMContext Context;
739 Module M("offload.wrapper.module", Context);
740 M.setTargetTriple(Triple(
741 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple())));
742
743 switch (Kind) {
744 case OFK_OpenMP:
745 if (Error Err = offloading::wrapOpenMPBinaries(
746 M, Images: BuffersToWrap, EntryArray: offloading::getOffloadEntryArray(M),
747 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(Ids: OPT_relocatable)))
748 return std::move(Err);
749 break;
750 case OFK_Cuda:
751 if (Error Err = offloading::wrapCudaBinary(
752 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M),
753 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
754 return std::move(Err);
755 break;
756 case OFK_HIP:
757 if (Error Err = offloading::wrapHIPBinary(
758 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M)))
759 return std::move(Err);
760 break;
761 case OFK_SYCL: {
762 // TODO: fill these options once the Driver supports them.
763 offloading::SYCLJITOptions Options;
764 if (Error Err =
765 offloading::wrapSYCLBinaries(M, Buffer: BuffersToWrap.front(), Options))
766 return std::move(Err);
767 break;
768 }
769 default:
770 return createStringError(S: getOffloadKindName(Name: Kind) +
771 " wrapping is not supported");
772 }
773
774 if (Args.hasArg(Ids: OPT_print_wrapped_module))
775 errs() << M;
776 if (Args.hasArg(Ids: OPT_save_temps)) {
777 int FD = -1;
778 auto TempFileOrErr =
779 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
780 getOffloadKindName(Name: Kind) + ".image.wrapper",
781 Extension: "bc");
782 if (!TempFileOrErr)
783 return TempFileOrErr.takeError();
784 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
785 return errorCodeToError(EC);
786 llvm::raw_fd_ostream OS(FD, true);
787 WriteBitcodeToFile(M, Out&: OS);
788 }
789
790 auto FileOrErr = compileModule(M, Kind);
791 if (!FileOrErr)
792 return FileOrErr.takeError();
793 return *FileOrErr;
794}
795
796Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
797bundleOpenMP(ArrayRef<OffloadingImage> Images) {
798 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
799 for (const OffloadingImage &Image : Images)
800 Buffers.emplace_back(
801 Args: MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(OffloadingData: Image)));
802
803 return std::move(Buffers);
804}
805
806Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
807bundleSYCL(ArrayRef<OffloadingImage> Images) {
808 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
809 if (DryRun) {
810 // In dry-run mode there is an empty input which is insufficient for the
811 // testing. Therefore, we return here a stub image.
812 OffloadingImage Image;
813 Image.TheImageKind = IMG_None;
814 Image.TheOffloadKind = OffloadKind::OFK_SYCL;
815 Image.StringData["symbols"] = "stub";
816 Image.Image = MemoryBuffer::getMemBufferCopy(InputData: "");
817 SmallString<0> SerializedImage = OffloadBinary::write(OffloadingData: Image);
818 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: SerializedImage));
819 return std::move(Buffers);
820 }
821
822 for (const OffloadingImage &Image : Images) {
823 // clang-sycl-linker packs outputs into one binary blob. Therefore, it is
824 // passed to Offload Wrapper as is.
825 StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
826 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: S));
827 }
828
829 return std::move(Buffers);
830}
831
832Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
833bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
834 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
835 for (const OffloadingImage &Image : Images)
836 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
837 y: Image.StringData.lookup(Key: "arch")));
838
839 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
840 if (!FileOrErr)
841 return FileOrErr.takeError();
842
843 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
844 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
845
846 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
847 if (std::error_code EC = ImageOrError.getError())
848 return createFileError(F: *FileOrErr, EC);
849 Buffers.emplace_back(Args: std::move(*ImageOrError));
850
851 return std::move(Buffers);
852}
853
854Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
855bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
856 SmallVector<std::tuple<StringRef, StringRef, StringRef>, 4> InputFiles;
857 for (const OffloadingImage &Image : Images)
858 InputFiles.emplace_back(Args: std::make_tuple(args: Image.Image->getBufferIdentifier(),
859 args: Image.StringData.lookup(Key: "triple"),
860 args: Image.StringData.lookup(Key: "arch")));
861
862 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
863 if (!FileOrErr)
864 return FileOrErr.takeError();
865
866 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
867 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
868
869 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
870 if (std::error_code EC = ImageOrError.getError())
871 return createFileError(F: *FileOrErr, EC);
872 Buffers.emplace_back(Args: std::move(*ImageOrError));
873
874 return std::move(Buffers);
875}
876
877/// Transforms the input \p Images into the binary format the runtime expects
878/// for the given \p Kind.
879Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
880bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
881 OffloadKind Kind) {
882 llvm::TimeTraceScope TimeScope("Bundle linked output");
883 switch (Kind) {
884 case OFK_OpenMP:
885 return bundleOpenMP(Images);
886 case OFK_SYCL:
887 return bundleSYCL(Images);
888 case OFK_Cuda:
889 return bundleCuda(Images, Args);
890 case OFK_HIP:
891 return bundleHIP(Images, Args);
892 default:
893 return createStringError(S: getOffloadKindName(Name: Kind) +
894 " bundling is not supported");
895 }
896}
897
898/// Returns a new ArgList containing arguments used for the device linking
899/// phase.
900DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
901 const InputArgList &Args) {
902 DerivedArgList DAL(Args);
903 for (Arg *A : Args)
904 DAL.append(A);
905
906 // Set the subarchitecture and target triple for this compilation.
907 const OptTable &Tbl = getOptTable();
908 StringRef Arch = Args.MakeArgString(Str: Input.front().getBinary()->getArch());
909 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_arch_EQ),
910 Value: Arch == "generic" ? "" : Arch);
911 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_triple_EQ),
912 Value: Args.MakeArgString(Str: Input.front().getBinary()->getTriple()));
913
914 // If every input file is bitcode we have whole program visibility as we
915 // do only support static linking with bitcode.
916 auto ContainsBitcode = [](const OffloadFile &F) {
917 return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode;
918 };
919 if (llvm::all_of(Range&: Input, P: ContainsBitcode))
920 DAL.AddFlagArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_whole_program));
921
922 // Forward '-Xoffload-linker' options to the appropriate backend.
923 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_linker_args_EQ)) {
924 auto [Triple, Value] = Arg.split(Separator: '=');
925 llvm::Triple TT(Triple);
926 // If this isn't a recognized triple then it's an `arg=value` option.
927 if (TT.getArch() == Triple::ArchType::UnknownArch)
928 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
929 Value: Args.MakeArgString(Str: Arg));
930 else if (Value.empty())
931 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
932 Value: Args.MakeArgString(Str: Triple));
933 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
934 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
935 Value: Args.MakeArgString(Str: Value));
936 }
937
938 // Forward '-Xoffload-compiler' options to the appropriate backend.
939 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_compiler_args_EQ)) {
940 auto [Triple, Value] = Arg.split(Separator: '=');
941 llvm::Triple TT(Triple);
942 // If this isn't a recognized triple then it's an `arg=value` option.
943 if (TT.getArch() == Triple::ArchType::UnknownArch)
944 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
945 Value: Args.MakeArgString(Str: Arg));
946 else if (Value.empty())
947 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
948 Value: Args.MakeArgString(Str: Triple));
949 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
950 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
951 Value: Args.MakeArgString(Str: Value));
952 }
953
954 return DAL;
955}
956
957Error handleOverrideImages(
958 const InputArgList &Args,
959 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
960 for (StringRef Arg : Args.getAllArgValues(Id: OPT_override_image)) {
961 OffloadKind Kind = getOffloadKind(Name: Arg.split(Separator: "=").first);
962 StringRef Filename = Arg.split(Separator: "=").second;
963
964 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
965 MemoryBuffer::getFileOrSTDIN(Filename);
966 if (std::error_code EC = BufferOrErr.getError())
967 return createFileError(F: Filename, EC);
968
969 Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
970 ObjectFile::createELFObjectFile(Object: **BufferOrErr,
971 /*InitContent=*/false);
972 if (!ElfOrErr)
973 return ElfOrErr.takeError();
974 ObjectFile &Elf = **ElfOrErr;
975
976 OffloadingImage TheImage{};
977 TheImage.TheImageKind = IMG_Object;
978 TheImage.TheOffloadKind = Kind;
979 TheImage.StringData["triple"] =
980 Args.MakeArgString(Str: Elf.makeTriple().getTriple());
981 if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
982 TheImage.StringData["arch"] = Args.MakeArgString(Str: *CPU);
983 TheImage.Image = std::move(*BufferOrErr);
984
985 Images[Kind].emplace_back(Args: std::move(TheImage));
986 }
987 return Error::success();
988}
989
990/// Transforms all the extracted offloading input files into an image that can
991/// be registered by the runtime. If NeedsWrapping is false, writes bundled
992/// output directly without wrapping or host linking.
993Expected<SmallVector<StringRef>>
994linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
995 const InputArgList &Args, char **Argv, int Argc,
996 bool NeedsWrapping) {
997 llvm::TimeTraceScope TimeScope("Handle all device input");
998
999 std::mutex ImageMtx;
1000 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
1001
1002 // Initialize the images with any overriding inputs.
1003 if (Args.hasArg(Ids: OPT_override_image))
1004 if (Error Err = handleOverrideImages(Args, Images))
1005 return std::move(Err);
1006
1007 auto Err = parallelForEachError(R&: LinkerInputFiles, Fn: [&](auto &Input) -> Error {
1008 llvm::TimeTraceScope TimeScope("Link device input");
1009
1010 // Each thread needs its own copy of the base arguments to maintain
1011 // per-device argument storage of synthetic strings.
1012 const OptTable &Tbl = getOptTable();
1013 BumpPtrAllocator Alloc;
1014 StringSaver Saver(Alloc);
1015 auto BaseArgs =
1016 Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) {
1017 reportError(E: createStringError(S: Err));
1018 });
1019 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
1020
1021 uint16_t ActiveOffloadKindMask = 0u;
1022 for (const auto &File : Input)
1023 ActiveOffloadKindMask |= File.getBinary()->getOffloadKind();
1024
1025 // Linking images of SYCL offload kind with images of other kind is not
1026 // supported.
1027 // TODO: Remove the above limitation.
1028 if ((ActiveOffloadKindMask & OFK_SYCL) &&
1029 ((ActiveOffloadKindMask ^ OFK_SYCL) != 0))
1030 return createStringError(Fmt: "Linking images of SYCL offload kind with "
1031 "images of any other kind is not supported");
1032
1033 // Write any remaining device inputs to an output file.
1034 SmallVector<StringRef> InputFiles;
1035 for (const OffloadFile &File : Input) {
1036 auto FileNameOrErr = writeOffloadFile(File);
1037 if (!FileNameOrErr)
1038 return FileNameOrErr.takeError();
1039 InputFiles.emplace_back(Args&: *FileNameOrErr);
1040 }
1041
1042 // Link the remaining device files using the device linker.
1043 auto OutputOrErr =
1044 linkDevice(InputFiles, LinkerArgs, ActiveOffloadKindMask);
1045 if (!OutputOrErr)
1046 return OutputOrErr.takeError();
1047
1048 // Store the offloading image for each linked output file.
1049 for (OffloadKind Kind = OFK_OpenMP; Kind != OFK_LAST;
1050 Kind = static_cast<OffloadKind>((uint16_t)(Kind) << 1)) {
1051 if ((ActiveOffloadKindMask & Kind) == 0)
1052 continue;
1053 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1054 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *OutputOrErr);
1055 if (std::error_code EC = FileOrErr.getError()) {
1056 if (DryRun)
1057 FileOrErr = MemoryBuffer::getMemBuffer(InputData: "");
1058 else
1059 return createFileError(*OutputOrErr, EC);
1060 }
1061
1062 // Manually containerize offloading images not in ELF format.
1063 if (Error E = containerizeRawImage(*FileOrErr, Kind, LinkerArgs))
1064 return E;
1065
1066 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1067 OffloadingImage TheImage{};
1068 TheImage.TheImageKind =
1069 Args.hasArg(Ids: OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1070 TheImage.TheOffloadKind = Kind;
1071 TheImage.StringData["triple"] =
1072 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_triple_EQ));
1073 TheImage.StringData["arch"] =
1074 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_arch_EQ));
1075 TheImage.Image = std::move(*FileOrErr);
1076
1077 Images[Kind].emplace_back(Args: std::move(TheImage));
1078 }
1079 return Error::success();
1080 });
1081 if (Err)
1082 return std::move(Err);
1083
1084 // Create a binary image of each offloading image and either embed it into a
1085 // new object file, or if all inputs were direct offload binaries, emit the
1086 // fat binary directly (e.g. .hipfb / .fatbin).
1087 SmallVector<StringRef> WrappedOutput;
1088 for (auto &[Kind, Input] : Images) {
1089 // We sort the entries before bundling so they appear in a deterministic
1090 // order in the final binary.
1091 llvm::sort(C&: Input, Comp: [](OffloadingImage &A, OffloadingImage &B) {
1092 StringRef TripleA = A.StringData.lookup(Key: "triple");
1093 StringRef TripleB = B.StringData.lookup(Key: "triple");
1094 StringRef ArchA = A.StringData.lookup(Key: "arch");
1095 StringRef ArchB = B.StringData.lookup(Key: "arch");
1096 if (TripleA != TripleB)
1097 return TripleA > TripleB;
1098 if (ArchA != ArchB)
1099 return ArchA > ArchB;
1100 return A.TheOffloadKind < B.TheOffloadKind;
1101 });
1102 auto BundledImagesOrErr = bundleLinkedOutput(Images: Input, Args, Kind);
1103 if (!BundledImagesOrErr)
1104 return BundledImagesOrErr.takeError();
1105
1106 if (!NeedsWrapping) {
1107 if (BundledImagesOrErr->size() != 1)
1108 return createStringError(
1109 Fmt: "Expected a single bundled image for direct fat binary output");
1110
1111 Expected<std::unique_ptr<FileOutputBuffer>> FOBOrErr =
1112 FileOutputBuffer::create(
1113 FilePath: ExecutableName, Size: BundledImagesOrErr->front()->getBufferSize());
1114 if (!FOBOrErr)
1115 return FOBOrErr.takeError();
1116 std::unique_ptr<FileOutputBuffer> FOB = std::move(*FOBOrErr);
1117 llvm::copy(Range: BundledImagesOrErr->front()->getBuffer(),
1118 Out: FOB->getBufferStart());
1119 if (Error E = FOB->commit())
1120 return std::move(E);
1121
1122 continue;
1123 }
1124
1125 auto OutputOrErr = wrapDeviceImages(Buffers: *BundledImagesOrErr, Args, Kind);
1126 if (!OutputOrErr)
1127 return OutputOrErr.takeError();
1128 WrappedOutput.push_back(Elt: *OutputOrErr);
1129 }
1130
1131 return WrappedOutput;
1132}
1133
1134std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1135 const Twine &Name) {
1136 SmallString<128> Path;
1137 if (Dir.starts_with(Prefix: "="))
1138 sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name);
1139 else
1140 sys::path::append(path&: Path, a: Dir, b: Name);
1141
1142 if (sys::fs::exists(Path))
1143 return static_cast<std::string>(Path);
1144 return std::nullopt;
1145}
1146
1147std::optional<std::string>
1148findFromSearchPaths(StringRef Name, StringRef Root,
1149 ArrayRef<StringRef> SearchPaths) {
1150 for (StringRef Dir : SearchPaths)
1151 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1152 return File;
1153 return std::nullopt;
1154}
1155
1156std::optional<std::string>
1157searchLibraryBaseName(StringRef Name, StringRef Root,
1158 ArrayRef<StringRef> SearchPaths) {
1159 for (StringRef Dir : SearchPaths) {
1160 if (std::optional<std::string> File =
1161 findFile(Dir, Root, Name: "lib" + Name + ".so"))
1162 return File;
1163 if (std::optional<std::string> File =
1164 findFile(Dir, Root, Name: "lib" + Name + ".a"))
1165 return File;
1166 }
1167 return std::nullopt;
1168}
1169
1170/// Search for static libraries in the linker's library path given input like
1171/// `-lfoo` or `-l:libfoo.a`.
1172std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1173 ArrayRef<StringRef> SearchPaths) {
1174 if (Input.starts_with(Prefix: ":"))
1175 return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths);
1176 if (Input.ends_with(Suffix: ".lib"))
1177 return findFromSearchPaths(Name: Input, Root, SearchPaths);
1178 return searchLibraryBaseName(Name: Input, Root, SearchPaths);
1179}
1180
1181/// Search the input files and libraries for embedded device offloading code
1182/// and add it to the list of files to be linked. Files coming from static
1183/// libraries are only added to the input if they are used by an existing
1184/// input file. Returns a list of input files intended for a single linking job.
1185Expected<SmallVector<SmallVector<OffloadFile>>>
1186getDeviceInput(const ArgList &Args) {
1187 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1188
1189 // Skip all the input if the user is overriding the output.
1190 if (Args.hasArg(Ids: OPT_override_image))
1191 return SmallVector<SmallVector<OffloadFile>>();
1192
1193 StringRef Root = Args.getLastArgValue(Id: OPT_sysroot_EQ);
1194 SmallVector<StringRef> LibraryPaths;
1195 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path, Ids: OPT_libpath))
1196 LibraryPaths.push_back(Elt: Arg->getValue());
1197
1198 BumpPtrAllocator Alloc;
1199 StringSaver Saver(Alloc);
1200
1201 // Try to extract device code from the linker input files.
1202 bool WholeArchive = Args.hasArg(Ids: OPT_wholearchive_flag);
1203 SmallVector<OffloadFile> ObjectFilesToExtract;
1204 SmallVector<OffloadFile> ArchiveFilesToExtract;
1205 for (const opt::Arg *Arg : Args.filtered(
1206 Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
1207 if (Arg->getOption().matches(ID: OPT_whole_archive) ||
1208 Arg->getOption().matches(ID: OPT_no_whole_archive)) {
1209 WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive);
1210 continue;
1211 }
1212
1213 std::optional<std::string> Filename =
1214 Arg->getOption().matches(ID: OPT_library)
1215 ? searchLibrary(Input: Arg->getValue(), Root, SearchPaths: LibraryPaths)
1216 : std::string(Arg->getValue());
1217
1218 if (!Filename && Arg->getOption().matches(ID: OPT_library))
1219 return createStringError(Fmt: "unable to find library -l%s", Vals: Arg->getValue());
1220
1221 if (!Filename || !sys::fs::exists(Path: *Filename) ||
1222 sys::fs::is_directory(Path: *Filename))
1223 continue;
1224
1225 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1226 MemoryBuffer::getFileOrSTDIN(Filename: *Filename);
1227 if (std::error_code EC = BufferOrErr.getError())
1228 return createFileError(F: *Filename, EC);
1229
1230 MemoryBufferRef Buffer = **BufferOrErr;
1231 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::elf_shared_object)
1232 continue;
1233
1234 SmallVector<OffloadFile> Binaries;
1235 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1236 return std::move(Err);
1237
1238 for (auto &Binary : Binaries) {
1239 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::archive &&
1240 !WholeArchive)
1241 ArchiveFilesToExtract.emplace_back(Args: std::move(Binary));
1242 else
1243 ObjectFilesToExtract.emplace_back(Args: std::move(Binary));
1244 }
1245 }
1246
1247 // Link all standard input files and update the list of symbols.
1248 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1249 for (OffloadFile &Binary : ObjectFilesToExtract) {
1250 if (!Binary.getBinary())
1251 continue;
1252
1253 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1254 for (const auto &[ID, Input] : InputFiles)
1255 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1256 CompatibleTargets.emplace_back(Args: ID);
1257
1258 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1259 // If another target needs this binary it must be copied instead.
1260 if (Index == CompatibleTargets.size() - 1)
1261 InputFiles[ID].emplace_back(Args: std::move(Binary));
1262 else
1263 InputFiles[ID].emplace_back(Args: Binary.copy());
1264 }
1265 }
1266
1267 llvm::DenseSet<StringRef> ShouldExtract;
1268 for (auto &Arg : Args.getAllArgValues(Id: OPT_should_extract))
1269 ShouldExtract.insert(V: Arg);
1270
1271 // We only extract archive members from the fat binary if we find a used or
1272 // requested target. Unlike normal static archive handling, we just extract
1273 // every object file contained in the archive.
1274 for (OffloadFile &Binary : ArchiveFilesToExtract) {
1275 if (!Binary.getBinary())
1276 continue;
1277
1278 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1279 for (const auto &[ID, Input] : InputFiles)
1280 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1281 CompatibleTargets.emplace_back(Args: ID);
1282
1283 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1284 // Only extract if we have an object matching this target or it
1285 // was specifically requested.
1286 if (!InputFiles.count(Key: ID) && !ShouldExtract.contains(V: ID.second))
1287 continue;
1288
1289 // If another target needs this binary it must be copied instead.
1290 if (Index == CompatibleTargets.size() - 1)
1291 InputFiles[ID].emplace_back(Args: std::move(Binary));
1292 else
1293 InputFiles[ID].emplace_back(Args: Binary.copy());
1294 }
1295 }
1296
1297 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1298 for (auto &[ID, Input] : InputFiles)
1299 InputsForTarget.emplace_back(Args: std::move(Input));
1300
1301 return std::move(InputsForTarget);
1302}
1303
1304} // namespace
1305
1306int main(int Argc, char **Argv) {
1307 InitLLVM X(Argc, Argv);
1308 InitializeAllTargetInfos();
1309 InitializeAllTargets();
1310 InitializeAllTargetMCs();
1311 InitializeAllAsmParsers();
1312 InitializeAllAsmPrinters();
1313
1314 LinkerExecutable = Argv[0];
1315 sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]);
1316
1317 const OptTable &Tbl = getOptTable();
1318 BumpPtrAllocator Alloc;
1319 StringSaver Saver(Alloc);
1320 auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) {
1321 reportError(E: createStringError(S: Err));
1322 });
1323
1324 if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) {
1325 Tbl.printHelp(
1326 OS&: outs(),
1327 Usage: "clang-linker-wrapper [options] -- <options to pass to the linker>",
1328 Title: "\nA wrapper utility over the host linker. It scans the input files\n"
1329 "for sections that require additional processing prior to linking.\n"
1330 "It will then transparently pass all arguments and input to the\n"
1331 "specified host linker to create the final binary.\n",
1332 ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden));
1333 return EXIT_SUCCESS;
1334 }
1335 if (Args.hasArg(Ids: OPT_v)) {
1336 printVersion(OS&: outs());
1337 return EXIT_SUCCESS;
1338 }
1339
1340 // This forwards '-mllvm' arguments to LLVM if present.
1341 SmallVector<const char *> NewArgv = {Argv[0]};
1342 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
1343 NewArgv.push_back(Elt: Arg->getValue());
1344 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus))
1345 NewArgv.push_back(Elt: Arg->getValue());
1346 SmallVector<PassPlugin, 1> PluginList;
1347 PassPlugins.setCallback([&](const std::string &PluginPath) {
1348 auto Plugin = PassPlugin::Load(Filename: PluginPath);
1349 if (!Plugin)
1350 reportFatalUsageError(Err: Plugin.takeError());
1351 PluginList.emplace_back(Args&: Plugin.get());
1352 });
1353 cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]);
1354
1355 Verbose = Args.hasArg(Ids: OPT_verbose);
1356 DryRun = Args.hasArg(Ids: OPT_dry_run);
1357 SaveTemps = Args.hasArg(Ids: OPT_save_temps);
1358 CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str();
1359 CanonicalPrefixes = !Args.hasArg(Ids: OPT_no_canonical_prefixes);
1360
1361 llvm::Triple Triple(
1362 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1363 if (Args.hasArg(Ids: OPT_o))
1364 ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out");
1365 else if (Args.hasArg(Ids: OPT_out))
1366 ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe");
1367 else
1368 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1369
1370 parallel::strategy = hardware_concurrency(ThreadCount: 1);
1371 if (auto *Arg = Args.getLastArg(Ids: OPT_wrapper_jobs)) {
1372 StringRef Val = Arg->getValue();
1373 if (Val.equals_insensitive(RHS: "jobserver"))
1374 parallel::strategy = jobserver_concurrency();
1375 else {
1376 unsigned Threads = 0;
1377 if (!llvm::to_integer(S: Val, Num&: Threads) || Threads == 0)
1378 reportError(E: createStringError(
1379 Fmt: "%s: expected a positive integer or 'jobserver', got '%s'",
1380 Vals: Arg->getSpelling().data(), Vals: Val.data()));
1381 else
1382 parallel::strategy = hardware_concurrency(ThreadCount: Threads);
1383 }
1384 }
1385
1386 if (Args.hasArg(Ids: OPT_wrapper_time_trace_eq)) {
1387 unsigned Granularity;
1388 if (Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500")
1389 .getAsInteger(Radix: 10, Result&: Granularity))
1390 reportError(
1391 E: createStringError(Fmt: "invalid value for time trace granularity"));
1392 timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]);
1393 }
1394
1395 {
1396 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1397
1398 // Extract the device input files stored in the host fat binary.
1399 auto DeviceInputFiles = getDeviceInput(Args);
1400 if (!DeviceInputFiles)
1401 reportError(E: DeviceInputFiles.takeError());
1402
1403 // Check if we should emit fat binary directly without wrapping or host
1404 // linking.
1405 bool EmitFatbinOnly = Args.hasArg(Ids: OPT_emit_fatbin_only);
1406
1407 // Link and process the device images. The function may emit a direct fat
1408 // binary if --emit-fatbin-only is specified.
1409 auto FilesOrErr = linkAndWrapDeviceFiles(LinkerInputFiles: *DeviceInputFiles, Args, Argv,
1410 Argc, NeedsWrapping: !EmitFatbinOnly);
1411 if (!FilesOrErr)
1412 reportError(E: FilesOrErr.takeError());
1413
1414 // Run the host linking job with the rendered arguments.
1415 if (!EmitFatbinOnly) {
1416 if (Error Err = runLinker(Files: *FilesOrErr, Args))
1417 reportError(E: std::move(Err));
1418 }
1419 }
1420
1421 if (const opt::Arg *Arg = Args.getLastArg(Ids: OPT_wrapper_time_trace_eq)) {
1422 if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName))
1423 reportError(E: std::move(Err));
1424 timeTraceProfilerCleanup();
1425 }
1426
1427 // Remove the temporary files created.
1428 if (!SaveTemps)
1429 for (const auto &TempFile : TempFiles)
1430 if (std::error_code EC = sys::fs::remove(path: TempFile))
1431 reportError(E: createFileError(F: TempFile, EC));
1432
1433 return EXIT_SUCCESS;
1434}
1435