1//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tool works as a wrapper over a linking job. This tool is used to create
10// linked device images for offloading. It scans the linker's input for embedded
11// device offloading data stored in sections `.llvm.offloading` and extracts it
12// as a temporary file. The extracted device files will then be passed to a
13// device linking job to create a final device image.
14//
15//===----------------------------------------------------------------------===//
16
17#include "clang/Basic/TargetID.h"
18#include "clang/Basic/Version.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/Bitcode/BitcodeWriter.h"
22#include "llvm/CodeGen/CommandFlags.h"
23#include "llvm/Frontend/Offloading/OffloadWrapper.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/DiagnosticPrinter.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IRReader/IRReader.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/MC/TargetRegistry.h"
30#include "llvm/Object/Binary.h"
31#include "llvm/Object/IRObjectFile.h"
32#include "llvm/Object/ObjectFile.h"
33#include "llvm/Object/OffloadBinary.h"
34#include "llvm/Option/ArgList.h"
35#include "llvm/Option/OptTable.h"
36#include "llvm/Option/Option.h"
37#include "llvm/Plugins/PassPlugin.h"
38#include "llvm/Remarks/HotnessThresholdParser.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/FileOutputBuffer.h"
41#include "llvm/Support/FileSystem.h"
42#include "llvm/Support/InitLLVM.h"
43#include "llvm/Support/MemoryBuffer.h"
44#include "llvm/Support/Parallel.h"
45#include "llvm/Support/Path.h"
46#include "llvm/Support/Program.h"
47#include "llvm/Support/Signals.h"
48#include "llvm/Support/SourceMgr.h"
49#include "llvm/Support/StringSaver.h"
50#include "llvm/Support/TargetSelect.h"
51#include "llvm/Support/TimeProfiler.h"
52#include "llvm/Support/WithColor.h"
53#include "llvm/Support/raw_ostream.h"
54#include "llvm/Target/TargetMachine.h"
55#include "llvm/TargetParser/Host.h"
56#include <optional>
57
58using namespace llvm;
59using namespace llvm::opt;
60using namespace llvm::object;
61
62// Various tools (e.g., llc and opt) duplicate this series of declarations for
63// options related to passes and remarks.
64
65static cl::opt<bool> RemarksWithHotness(
66 "pass-remarks-with-hotness",
67 cl::desc("With PGO, include profile count in optimization remarks"),
68 cl::Hidden);
69
70static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
71 RemarksHotnessThreshold(
72 "pass-remarks-hotness-threshold",
73 cl::desc("Minimum profile count required for "
74 "an optimization remark to be output. "
75 "Use 'auto' to apply the threshold from profile summary."),
76 cl::value_desc("N or 'auto'"), cl::init(Val: 0), cl::Hidden);
77
78static cl::opt<std::string>
79 RemarksFilename("pass-remarks-output",
80 cl::desc("Output filename for pass remarks"),
81 cl::value_desc("filename"));
82
83static cl::opt<std::string>
84 RemarksPasses("pass-remarks-filter",
85 cl::desc("Only record optimization remarks from passes whose "
86 "names match the given regular expression"),
87 cl::value_desc("regex"));
88
89static cl::opt<std::string> RemarksFormat(
90 "pass-remarks-format",
91 cl::desc("The format used for serializing remarks (default: YAML)"),
92 cl::value_desc("format"), cl::init(Val: "yaml"));
93
94static cl::list<std::string>
95 PassPlugins("load-pass-plugin",
96 cl::desc("Load passes from plugin library"));
97
98static cl::opt<std::string> PassPipeline(
99 "passes",
100 cl::desc(
101 "A textual description of the pass pipeline. To have analysis passes "
102 "available before a certain pass, add 'require<foo-analysis>'. "
103 "'-passes' overrides the pass pipeline (but not all effects) from "
104 "specifying '--opt-level=O?' (O2 is the default) to "
105 "clang-linker-wrapper. Be sure to include the corresponding "
106 "'default<O?>' in '-passes'."));
107static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
108 cl::desc("Alias for -passes"));
109
110/// Path of the current binary.
111static const char *LinkerExecutable;
112
113/// Save intermediary results.
114static bool SaveTemps = false;
115
116/// Print arguments without executing.
117static bool DryRun = false;
118
119/// Print verbose output.
120static bool Verbose = false;
121
122/// Filename of the executable being created.
123static StringRef ExecutableName;
124
125/// Binary path for the CUDA installation.
126static std::string CudaBinaryPath;
127
128/// Mutex lock to protect writes to shared TempFiles in parallel.
129static std::mutex TempFilesMutex;
130
131/// Temporary files created by the linker wrapper.
132static std::list<SmallString<128>> TempFiles;
133
134/// Codegen flags for LTO backend.
135static codegen::RegisterCodeGenFlags CodeGenFlags;
136
137/// Whether or not to look through symlinks when resolving binaries.
138static bool CanonicalPrefixes = true;
139
140using OffloadingImage = OffloadBinary::OffloadingImage;
141
142namespace llvm {
143// Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
144template <> struct DenseMapInfo<OffloadKind> {
145 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
146
147 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
148 return LHS == RHS;
149 }
150};
151} // namespace llvm
152
153namespace {
154using std::error_code;
155
156/// Must not overlap with llvm::opt::DriverFlag.
157enum WrapperFlags {
158 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
159 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
160};
161
162enum ID {
163 OPT_INVALID = 0, // This is not an option ID.
164#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
165#include "LinkerWrapperOpts.inc"
166 LastOption
167#undef OPTION
168};
169
170#define OPTTABLE_STR_TABLE_CODE
171#include "LinkerWrapperOpts.inc"
172#undef OPTTABLE_STR_TABLE_CODE
173
174#define OPTTABLE_PREFIXES_TABLE_CODE
175#include "LinkerWrapperOpts.inc"
176#undef OPTTABLE_PREFIXES_TABLE_CODE
177
178static constexpr OptTable::Info InfoTable[] = {
179#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
180#include "LinkerWrapperOpts.inc"
181#undef OPTION
182};
183
184class WrapperOptTable : public opt::GenericOptTable {
185public:
186 WrapperOptTable()
187 : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
188};
189
190const OptTable &getOptTable() {
191 static const WrapperOptTable Table;
192 return Table;
193}
194
195void printCommands(ArrayRef<StringRef> CmdArgs) {
196 if (CmdArgs.empty())
197 return;
198
199 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
200 for (auto IC = std::next(x: CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
201 llvm::errs() << *IC << (std::next(x: IC) != IE ? " " : "\n");
202}
203
204[[noreturn]] void reportError(Error E) {
205 outs().flush();
206 logAllUnhandledErrors(E: std::move(E),
207 OS&: WithColor::error(OS&: errs(), Prefix: LinkerExecutable));
208 exit(EXIT_FAILURE);
209}
210
211std::string getExecutableDir(const char *Name) {
212 if (!CanonicalPrefixes)
213 return sys::path::parent_path(path: LinkerExecutable).str();
214 void *Ptr = reinterpret_cast<void *>(&getExecutableDir);
215 return sys::path::parent_path(path: sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr)).str();
216}
217
218/// Get a temporary filename suitable for output.
219Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
220 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
221 SmallString<128> OutputFile;
222 std::string PrefixStr = clang::sanitizeTargetIDInFileName(TargetID: Prefix.str());
223
224 if (SaveTemps) {
225 (PrefixStr + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile);
226 } else {
227 if (std::error_code EC = sys::fs::createTemporaryFile(
228 Prefix: sys::path::filename(path: PrefixStr), Suffix: Extension, ResultPath&: OutputFile))
229 return createFileError(F: OutputFile, EC);
230 }
231
232 TempFiles.emplace_back(args: std::move(OutputFile));
233 return TempFiles.back();
234}
235
236/// Execute the command \p ExecutablePath with the arguments \p Args.
237Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
238 if (Verbose || DryRun)
239 printCommands(CmdArgs: Args);
240
241 if (DryRun)
242 return Error::success();
243
244 // If the command line fits within system limits, execute directly.
245 if (sys::commandLineFitsWithinSystemLimits(Program: ExecutablePath, Args)) {
246 if (sys::ExecuteAndWait(Program: ExecutablePath, Args))
247 return createStringError(
248 Fmt: "'%s' failed", Vals: sys::path::filename(path: ExecutablePath).str().c_str());
249 return Error::success();
250 }
251
252 // Write the arguments to a response file and pass that instead.
253 auto TempFileOrErr = createOutputFile(Prefix: "response", Extension: "rsp");
254 if (!TempFileOrErr)
255 return TempFileOrErr.takeError();
256
257 SmallString<256> Contents;
258 raw_svector_ostream OS(Contents);
259 for (StringRef Arg : llvm::drop_begin(RangeOrContainer&: Args)) {
260 sys::printArg(OS, Arg, /*Quote=*/true);
261 OS << " ";
262 }
263
264 if (std::error_code EC = sys::writeFileWithEncoding(FileName: *TempFileOrErr, Contents))
265 return createStringError(Fmt: "failed to write response file: %s",
266 Vals: EC.message().c_str());
267
268 std::string ResponseFile = ("@" + *TempFileOrErr).str();
269 SmallVector<StringRef, 2> NewArgs = {Args.front(), ResponseFile};
270 if (sys::ExecuteAndWait(Program: ExecutablePath, Args: NewArgs))
271 return createStringError(Fmt: "'%s' failed",
272 Vals: sys::path::filename(path: ExecutablePath).str().c_str());
273 return Error::success();
274}
275
276Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
277
278 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
279 if (!Path)
280 Path = sys::findProgramByName(Name);
281 if (!Path && DryRun)
282 return Name.str();
283 if (!Path)
284 return createStringError(EC: Path.getError(),
285 S: "Unable to find '" + Name + "' in path");
286 return *Path;
287}
288
289bool linkerSupportsLTO(const ArgList &Args) {
290 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
291 return Triple.isNVPTX() || Triple.isAMDGPU() ||
292 (!Triple.isGPU() &&
293 Args.getLastArgValue(Id: OPT_linker_path_EQ).ends_with(Suffix: "lld"));
294}
295
296/// Returns the hashed value for a constant string.
297std::string getHash(StringRef Str) {
298 llvm::MD5 Hasher;
299 llvm::MD5::MD5Result Hash;
300 Hasher.update(Str);
301 Hasher.final(Result&: Hash);
302 return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true);
303}
304
305/// Renames offloading entry sections in a relocatable link so they do not
306/// conflict with a later link job.
307Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
308 llvm::Triple Triple(
309 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
310 if (Triple.isOSWindows())
311 return createStringError(
312 Fmt: "Relocatable linking is not supported on COFF targets");
313
314 Expected<std::string> ObjcopyPath =
315 findProgram(Name: "llvm-objcopy", Paths: {getExecutableDir(Name: "llvm-objcopy")});
316 if (!ObjcopyPath)
317 return ObjcopyPath.takeError();
318
319 // Use the linker output file to get a unique hash. This creates a unique
320 // identifier to rename the sections to that is deterministic to the contents.
321 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "")
322 : MemoryBuffer::getFileOrSTDIN(Filename: Output);
323 if (!BufferOrErr)
324 return createStringError(Fmt: "Failed to open %s", Vals: Output.str().c_str());
325 std::string Suffix = "_" + getHash(Str: (*BufferOrErr)->getBuffer());
326
327 SmallVector<StringRef> ObjcopyArgs = {
328 *ObjcopyPath,
329 Output,
330 };
331
332 // Remove the old .llvm.offloading section to prevent further linking.
333 ObjcopyArgs.emplace_back(Args: "--remove-section");
334 ObjcopyArgs.emplace_back(Args: ".llvm.offloading");
335 StringRef Prefix = "llvm";
336 auto Section = (Prefix + "_offload_entries").str();
337 // Rename the offloading entries to make them private to this link unit.
338 ObjcopyArgs.emplace_back(Args: "--rename-section");
339 ObjcopyArgs.emplace_back(
340 Args: Args.MakeArgString(Str: Section + "=" + Section + Suffix));
341
342 // Rename the __start_ / __stop_ symbols appropriately to iterate over the
343 // newly renamed section containing the offloading entries.
344 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
345 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__start_" + Section + "=" +
346 "__start_" + Section + Suffix));
347 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
348 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__stop_" + Section + "=" +
349 "__stop_" + Section + Suffix));
350
351 if (Error Err = executeCommands(ExecutablePath: *ObjcopyPath, Args: ObjcopyArgs))
352 return Err;
353
354 return Error::success();
355}
356
357/// Runs the wrapped linker job with the newly created input.
358Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
359 llvm::TimeTraceScope TimeScope("Execute host linker");
360
361 // Render the linker arguments and add the newly created image. We add it
362 // after the output file to ensure it is linked with the correct libraries.
363 StringRef LinkerPath = Args.getLastArgValue(Id: OPT_linker_path_EQ);
364 if (LinkerPath.empty())
365 return createStringError(Fmt: "linker path missing, must pass 'linker-path'");
366 ArgStringList NewLinkerArgs;
367 for (const opt::Arg *Arg : Args) {
368 // Do not forward arguments only intended for the linker wrapper.
369 if (Arg->getOption().hasFlag(Val: WrapperOnlyOption))
370 continue;
371
372 Arg->render(Args, Output&: NewLinkerArgs);
373 if (Arg->getOption().matches(ID: OPT_o) || Arg->getOption().matches(ID: OPT_out))
374 llvm::transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs),
375 F: [&](StringRef A) { return Args.MakeArgString(Str: A); });
376 }
377
378 SmallVector<StringRef> LinkerArgs({LinkerPath});
379 for (StringRef Arg : NewLinkerArgs)
380 LinkerArgs.push_back(Elt: Arg);
381 if (Error Err = executeCommands(ExecutablePath: LinkerPath, Args: LinkerArgs))
382 return Err;
383
384 if (Args.hasArg(Ids: OPT_relocatable))
385 return relocateOffloadSection(Args, Output: ExecutableName);
386
387 return Error::success();
388}
389
390void printVersion(raw_ostream &OS) {
391 OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper") << '\n';
392}
393
394namespace nvptx {
395Expected<StringRef>
396fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
397 const ArgList &Args) {
398 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
399 // NVPTX uses the fatbinary program to bundle the linked images.
400 Expected<std::string> FatBinaryPath =
401 findProgram(Name: "fatbinary", Paths: {CudaBinaryPath + "/bin"});
402 if (!FatBinaryPath)
403 return FatBinaryPath.takeError();
404
405 llvm::Triple Triple(
406 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
407
408 // Create a new file to write the linked device image to.
409 auto TempFileOrErr = createOutputFile(Prefix: ExecutableName, Extension: "fatbin");
410 if (!TempFileOrErr)
411 return TempFileOrErr.takeError();
412
413 SmallVector<StringRef, 16> CmdArgs;
414 CmdArgs.push_back(Elt: *FatBinaryPath);
415 CmdArgs.push_back(Elt: Triple.isArch64Bit() ? "-64" : "-32");
416 CmdArgs.push_back(Elt: "--create");
417 CmdArgs.push_back(Elt: *TempFileOrErr);
418 for (const auto &[File, Arch] : InputFiles)
419 CmdArgs.push_back(Elt: Args.MakeArgString(
420 Str: "--image3=kind=elf,sm=" + Arch.drop_front(N: 3) + ",file=" + File));
421
422 if (Error Err = executeCommands(ExecutablePath: *FatBinaryPath, Args: CmdArgs))
423 return std::move(Err);
424
425 return *TempFileOrErr;
426}
427} // namespace nvptx
428
429namespace amdgcn {
430
431// Constructs a triple string for clang offload bundler.
432// NOTE: copied from HIPUtility.cpp.
433static std::string normalizeForBundler(const llvm::Triple &T,
434 bool HasTargetID) {
435 return HasTargetID ? (T.getArchName() + "-" + T.getVendorName() + "-" +
436 T.getOSName() + "-" + T.getEnvironmentName())
437 .str()
438 : T.normalize(Form: llvm::Triple::CanonicalForm::FOUR_IDENT);
439}
440
441Expected<StringRef>
442fatbinary(ArrayRef<std::tuple<StringRef, StringRef, StringRef>> InputFiles,
443 const ArgList &Args) {
444 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
445
446 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
447 Expected<std::string> OffloadBundlerPath = findProgram(
448 Name: "clang-offload-bundler", Paths: {getExecutableDir(Name: "clang-offload-bundler")});
449 if (!OffloadBundlerPath)
450 return OffloadBundlerPath.takeError();
451
452 // Create a new file to write the linked device image to.
453 auto TempFileOrErr = createOutputFile(Prefix: ExecutableName, Extension: "hipfb");
454 if (!TempFileOrErr)
455 return TempFileOrErr.takeError();
456
457 BumpPtrAllocator Alloc;
458 StringSaver Saver(Alloc);
459
460 SmallVector<StringRef, 16> CmdArgs;
461 CmdArgs.push_back(Elt: *OffloadBundlerPath);
462 CmdArgs.push_back(Elt: "-type=o");
463 CmdArgs.push_back(Elt: "-bundle-align=4096");
464
465 if (Args.hasArg(Ids: OPT_compress))
466 CmdArgs.push_back(Elt: "-compress");
467 if (auto *Arg = Args.getLastArg(Ids: OPT_compression_level_eq))
468 CmdArgs.push_back(
469 Elt: Args.MakeArgString(Str: Twine("-compression-level=") + Arg->getValue()));
470
471 llvm::Triple HostTriple(
472 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
473 SmallVector<StringRef> Targets = {
474 Saver.save(S: "-targets=host-" + HostTriple.normalize())};
475 for (const auto &[File, TripleRef, Arch] : InputFiles) {
476 std::string NormalizedTriple =
477 normalizeForBundler(T: Triple(TripleRef), HasTargetID: !Arch.empty());
478 Targets.push_back(Elt: Saver.save(S: "hip-" + NormalizedTriple + "-" + Arch));
479 }
480 CmdArgs.push_back(Elt: Saver.save(S: llvm::join(R&: Targets, Separator: ",")));
481
482#ifdef _WIN32
483 CmdArgs.push_back("-input=NUL");
484#else
485 CmdArgs.push_back(Elt: "-input=/dev/null");
486#endif
487 for (const auto &[File, Triple, Arch] : InputFiles)
488 CmdArgs.push_back(Elt: Saver.save(S: "-input=" + File));
489
490 CmdArgs.push_back(Elt: Saver.save(S: "-output=" + *TempFileOrErr));
491
492 if (Error Err = executeCommands(ExecutablePath: *OffloadBundlerPath, Args: CmdArgs))
493 return std::move(Err);
494
495 return *TempFileOrErr;
496}
497} // namespace amdgcn
498
499namespace generic {
500Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
501 uint16_t ActiveOffloadKindMask) {
502 llvm::TimeTraceScope TimeScope("Clang");
503 // Use `clang` to invoke the appropriate device tools.
504 Expected<std::string> ClangPath =
505 findProgram(Name: "clang", Paths: {getExecutableDir(Name: "clang")});
506 if (!ClangPath)
507 return ClangPath.takeError();
508
509 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
510 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
511 // Create a new file to write the linked device image to. Assume that the
512 // input filename already has the device and architecture.
513 std::string OutputFileBase =
514 "." + Triple.getArchName().str() + "." + Arch.str();
515 auto TempFileOrErr = createOutputFile(Prefix: ExecutableName + OutputFileBase, Extension: "img");
516 if (!TempFileOrErr)
517 return TempFileOrErr.takeError();
518
519 SmallVector<StringRef, 16> CmdArgs{
520 *ClangPath,
521 "--no-default-config",
522 "-o",
523 *TempFileOrErr,
524 // Without -dumpdir, Clang will place auxiliary output files in the
525 // temporary directory of TempFileOrErr, where they will not easily be
526 // found by the user and might eventually be automatically removed. Tell
527 // Clang to instead place them alongside the final executable.
528 "-dumpdir",
529 Args.MakeArgString(Str: ExecutableName + OutputFileBase + ".img."),
530 Args.MakeArgString(Str: "--target=" + Triple.getTriple()),
531 };
532
533 if (!Arch.empty())
534 Triple.isAMDGPU() ? CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-mcpu=" + Arch))
535 : CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-march=" + Arch));
536
537 // Forward all of the `--offload-opt` and `-mllvm` options to the device.
538 for (auto &Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus, Ids: OPT_mllvm))
539 CmdArgs.append(
540 IL: {"-Xlinker",
541 Args.MakeArgString(Str: "--plugin-opt=" + StringRef(Arg->getValue()))});
542
543 if (!Triple.isNVPTX() && !Triple.isSPIRV())
544 CmdArgs.push_back(Elt: "-Wl,--no-undefined");
545
546 // The device inputs are bitcode stored in files with an object extension.
547 // Force the IR input language so Clang runs the compile and backend phases
548 // instead of treating them as linker inputs, which would defer codegen to
549 // the LTO link and defeat the non-LTO pipeline.
550 // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should
551 // share a unified interface.
552 // SPIR-V has no non-LTO pipeline so a --no-lto leaked from a concrete arch in
553 // a multi-target compile is ignored. Which is a workaround to remove.
554 if (Args.hasArg(Ids: OPT_no_lto) && !Triple.isSPIRV())
555 CmdArgs.append(IL: {"-x", "ir"});
556 for (StringRef InputFile : InputFiles)
557 CmdArgs.push_back(Elt: InputFile);
558
559 // If this is CPU offloading we copy the input libraries.
560 if (!Triple.isGPU()) {
561 CmdArgs.push_back(Elt: "-Wl,-Bsymbolic");
562 CmdArgs.push_back(Elt: "-shared");
563 ArgStringList LinkerArgs;
564 for (const opt::Arg *Arg :
565 Args.filtered(Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_library_path, Ids: OPT_rpath,
566 Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
567 // Sometimes needed libraries are passed by name, such as when using
568 // sanitizers. We need to check the file magic for any libraries.
569 if (Arg->getOption().matches(ID: OPT_INPUT)) {
570 if (!sys::fs::exists(Path: Arg->getValue()) ||
571 sys::fs::is_directory(Path: Arg->getValue()))
572 continue;
573
574 file_magic Magic;
575 if (auto EC = identify_magic(path: Arg->getValue(), result&: Magic))
576 return createStringError(Fmt: "Failed to open %s", Vals: Arg->getValue());
577 if (Magic != file_magic::archive &&
578 Magic != file_magic::elf_shared_object)
579 continue;
580 }
581 if (Arg->getOption().matches(ID: OPT_whole_archive))
582 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--whole-archive"));
583 else if (Arg->getOption().matches(ID: OPT_no_whole_archive))
584 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--no-whole-archive"));
585 else
586 Arg->render(Args, Output&: LinkerArgs);
587 }
588 llvm::append_range(C&: CmdArgs, R&: LinkerArgs);
589 }
590
591 // Pass on -mllvm options to the linker invocation.
592 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
593 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(
594 Str: "-mllvm=" + StringRef(Arg->getValue()))});
595
596 if (SaveTemps && linkerSupportsLTO(Args))
597 CmdArgs.push_back(Elt: "-Wl,--save-temps");
598
599 if (Args.hasArg(Ids: OPT_embed_bitcode))
600 CmdArgs.push_back(Elt: "-Wl,--lto-emit-llvm");
601
602 // For linking device code with the SYCL offload kind, special handling is
603 // required. Passing --sycl-link to clang results in a call to
604 // clang-sycl-linker. Additional linker flags required by clang-sycl-linker
605 // will be communicated via the -Xlinker option.
606 if (ActiveOffloadKindMask & OFK_SYCL) {
607 CmdArgs.push_back(Elt: "--sycl-link");
608 CmdArgs.append(
609 IL: {"-Xlinker", Args.MakeArgString(Str: "-triple=" + Triple.getTriple())});
610 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: "-arch=" + Arch)});
611 }
612
613 for (StringRef Arg : Args.getAllArgValues(Id: OPT_linker_arg_EQ))
614 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: Arg)});
615 for (StringRef Arg : Args.getAllArgValues(Id: OPT_compiler_arg_EQ))
616 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Arg));
617
618 if (Args.hasArg(Ids: OPT_no_lto) && !Triple.isSPIRV())
619 CmdArgs.append(IL: {"-flto=none", "-Wno-unused-command-line-argument"});
620
621 if (Error Err = executeCommands(ExecutablePath: *ClangPath, Args: CmdArgs))
622 return std::move(Err);
623
624 return *TempFileOrErr;
625}
626} // namespace generic
627
628Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
629 const ArgList &Args,
630 uint16_t ActiveOffloadKindMask) {
631 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
632 switch (Triple.getArch()) {
633 case Triple::nvptx:
634 case Triple::nvptx64:
635 case Triple::amdgcn:
636 case Triple::x86:
637 case Triple::x86_64:
638 case Triple::aarch64:
639 case Triple::aarch64_be:
640 case Triple::ppc64:
641 case Triple::ppc64le:
642 case Triple::spirv64:
643 case Triple::systemz:
644 case Triple::loongarch64:
645 return generic::clang(InputFiles, Args, ActiveOffloadKindMask);
646 default:
647 return createStringError(S: Triple.getArchName() +
648 " linking is not supported");
649 }
650}
651
652Error containerizeRawImage(std::unique_ptr<MemoryBuffer> &Img, OffloadKind Kind,
653 const ArgList &Args) {
654 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
655 if (Kind == OFK_OpenMP && Triple.isSPIRV() &&
656 Triple.getVendor() == llvm::Triple::Intel)
657 return offloading::intel::containerizeOpenMPSPIRVImage(Binary&: Img, Triple);
658 return Error::success();
659}
660
661Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
662 const OffloadBinary &Binary = *File.getBinary();
663
664 StringRef Prefix =
665 sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier());
666 SmallString<128> Filename;
667 (Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch())
668 .toVector(Out&: Filename);
669 auto TempFileOrErr = createOutputFile(Prefix: Filename, Extension: "o");
670 if (!TempFileOrErr)
671 return TempFileOrErr.takeError();
672
673 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
674 FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Binary.getImage().size());
675 if (!OutputOrErr)
676 return OutputOrErr.takeError();
677 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
678 llvm::copy(Range: Binary.getImage(), Out: Output->getBufferStart());
679 if (Error E = Output->commit())
680 return std::move(E);
681
682 return *TempFileOrErr;
683}
684
685// Compile the module to an object file using the appropriate target machine for
686// the host triple.
687Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
688 llvm::TimeTraceScope TimeScope("Compile module");
689 std::string Msg;
690 const Target *T = TargetRegistry::lookupTarget(TheTriple: M.getTargetTriple(), Error&: Msg);
691 if (!T)
692 return createStringError(S: Msg);
693
694 auto Options =
695 codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: M.getTargetTriple());
696 StringRef CPU = "";
697 StringRef Features = "";
698 std::unique_ptr<TargetMachine> TM(
699 T->createTargetMachine(TT: M.getTargetTriple(), CPU, Features, Options,
700 RM: Reloc::PIC_, CM: M.getCodeModel()));
701
702 if (M.getDataLayout().isDefault())
703 M.setDataLayout(TM->createDataLayout());
704
705 int FD = -1;
706 auto TempFileOrErr = createOutputFile(
707 Prefix: ExecutableName + "." + getOffloadKindName(Name: Kind) + ".image.wrapper", Extension: "o");
708 if (!TempFileOrErr)
709 return TempFileOrErr.takeError();
710 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
711 return errorCodeToError(EC);
712
713 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true);
714
715 legacy::PassManager CodeGenPasses;
716 TargetLibraryInfoImpl TLII(M.getTargetTriple());
717 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
718 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
719 CodeGenFileType::ObjectFile))
720 return createStringError(Fmt: "Failed to execute host backend");
721 CodeGenPasses.run(M);
722
723 return *TempFileOrErr;
724}
725
726/// Creates the object file containing the device image and runtime
727/// registration code from the device images stored in \p Images.
728Expected<StringRef>
729wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
730 const ArgList &Args, OffloadKind Kind) {
731 llvm::TimeTraceScope TimeScope("Wrap bundled images");
732
733 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
734 for (const auto &Buffer : Buffers)
735 BuffersToWrap.emplace_back(
736 Args: ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
737
738 LLVMContext Context;
739 Module M("offload.wrapper.module", Context);
740 M.setTargetTriple(Triple(
741 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple())));
742
743 switch (Kind) {
744 case OFK_OpenMP:
745 if (Error Err = offloading::wrapOpenMPBinaries(
746 M, Images: BuffersToWrap, EntryArray: offloading::getOffloadEntryArray(M),
747 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(Ids: OPT_relocatable)))
748 return std::move(Err);
749 break;
750 case OFK_Cuda:
751 if (Error Err = offloading::wrapCudaBinary(
752 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M),
753 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
754 return std::move(Err);
755 break;
756 case OFK_HIP:
757 if (Error Err = offloading::wrapHIPBinary(
758 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M)))
759 return std::move(Err);
760 break;
761 case OFK_SYCL: {
762 // TODO: fill these options once the Driver supports them.
763 offloading::SYCLJITOptions Options;
764 if (Error Err =
765 offloading::wrapSYCLBinaries(M, Buffer: BuffersToWrap.front(), Options))
766 return std::move(Err);
767 break;
768 }
769 default:
770 return createStringError(S: getOffloadKindName(Name: Kind) +
771 " wrapping is not supported");
772 }
773
774 if (Args.hasArg(Ids: OPT_print_wrapped_module))
775 errs() << M;
776 if (Args.hasArg(Ids: OPT_save_temps)) {
777 int FD = -1;
778 auto TempFileOrErr = createOutputFile(
779 Prefix: ExecutableName + "." + getOffloadKindName(Name: Kind) + ".image.wrapper",
780 Extension: "bc");
781 if (!TempFileOrErr)
782 return TempFileOrErr.takeError();
783 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
784 return errorCodeToError(EC);
785 llvm::raw_fd_ostream OS(FD, true);
786 WriteBitcodeToFile(M, Out&: OS);
787 }
788
789 auto FileOrErr = compileModule(M, Kind);
790 if (!FileOrErr)
791 return FileOrErr.takeError();
792 return *FileOrErr;
793}
794
795Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
796bundleOpenMP(ArrayRef<OffloadingImage> Images) {
797 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
798 for (const OffloadingImage &Image : Images)
799 Buffers.emplace_back(
800 Args: MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(OffloadingData: Image)));
801
802 return std::move(Buffers);
803}
804
805Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
806bundleSYCL(ArrayRef<OffloadingImage> Images) {
807 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
808 for (const OffloadingImage &Image : Images) {
809 // clang-sycl-linker packs outputs into one binary blob. Therefore, it is
810 // passed to Offload Wrapper as is.
811 StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
812 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: S));
813 }
814
815 return std::move(Buffers);
816}
817
818Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
819bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
820 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
821 for (const OffloadingImage &Image : Images)
822 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
823 y: Image.StringData.lookup(Key: "arch")));
824
825 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
826 if (!FileOrErr)
827 return FileOrErr.takeError();
828
829 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
830 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
831
832 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
833 if (std::error_code EC = ImageOrError.getError())
834 return createFileError(F: *FileOrErr, EC);
835 Buffers.emplace_back(Args: std::move(*ImageOrError));
836
837 return std::move(Buffers);
838}
839
840Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
841bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
842 SmallVector<std::tuple<StringRef, StringRef, StringRef>, 4> InputFiles;
843 for (const OffloadingImage &Image : Images)
844 InputFiles.emplace_back(Args: std::make_tuple(args: Image.Image->getBufferIdentifier(),
845 args: Image.StringData.lookup(Key: "triple"),
846 args: Image.StringData.lookup(Key: "arch")));
847
848 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
849 if (!FileOrErr)
850 return FileOrErr.takeError();
851
852 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
853 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
854
855 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
856 if (std::error_code EC = ImageOrError.getError())
857 return createFileError(F: *FileOrErr, EC);
858 Buffers.emplace_back(Args: std::move(*ImageOrError));
859
860 return std::move(Buffers);
861}
862
863/// Transforms the input \p Images into the binary format the runtime expects
864/// for the given \p Kind.
865Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
866bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
867 OffloadKind Kind) {
868 llvm::TimeTraceScope TimeScope("Bundle linked output");
869 switch (Kind) {
870 case OFK_OpenMP:
871 return bundleOpenMP(Images);
872 case OFK_SYCL:
873 return bundleSYCL(Images);
874 case OFK_Cuda:
875 return bundleCuda(Images, Args);
876 case OFK_HIP:
877 return bundleHIP(Images, Args);
878 default:
879 return createStringError(S: getOffloadKindName(Name: Kind) +
880 " bundling is not supported");
881 }
882}
883
884/// Returns a new ArgList containing arguments used for the device linking
885/// phase.
886DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
887 const InputArgList &Args) {
888 DerivedArgList DAL(Args);
889 for (Arg *A : Args)
890 DAL.append(A);
891
892 // Set the subarchitecture and target triple for this compilation.
893 const OptTable &Tbl = getOptTable();
894 StringRef Arch = Args.MakeArgString(Str: Input.front().getBinary()->getArch());
895 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_arch_EQ),
896 Value: Arch == "generic" ? "" : Arch);
897 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_triple_EQ),
898 Value: Args.MakeArgString(Str: Input.front().getBinary()->getTriple()));
899
900 // If every input file is bitcode we have whole program visibility as we
901 // do only support static linking with bitcode.
902 auto ContainsBitcode = [](const OffloadFile &F) {
903 return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode;
904 };
905 if (llvm::all_of(Range&: Input, P: ContainsBitcode))
906 DAL.AddFlagArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_whole_program));
907
908 // Forward '-Xoffload-linker' options to the appropriate backend.
909 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_linker_args_EQ)) {
910 auto [Triple, Value] = Arg.split(Separator: '=');
911 llvm::Triple TT(Triple);
912 // If this isn't a recognized triple then it's an `arg=value` option.
913 if (TT.getArch() == Triple::ArchType::UnknownArch)
914 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
915 Value: Args.MakeArgString(Str: Arg));
916 else if (Value.empty())
917 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
918 Value: Args.MakeArgString(Str: Triple));
919 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
920 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
921 Value: Args.MakeArgString(Str: Value));
922 }
923
924 // Forward '-Xoffload-compiler' options to the appropriate backend.
925 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_compiler_args_EQ)) {
926 auto [Triple, Value] = Arg.split(Separator: '=');
927 llvm::Triple TT(Triple);
928 // If this isn't a recognized triple then it's an `arg=value` option.
929 if (TT.getArch() == Triple::ArchType::UnknownArch)
930 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
931 Value: Args.MakeArgString(Str: Arg));
932 else if (Value.empty())
933 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
934 Value: Args.MakeArgString(Str: Triple));
935 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
936 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
937 Value: Args.MakeArgString(Str: Value));
938 }
939
940 return DAL;
941}
942
943Error handleOverrideImages(
944 const InputArgList &Args,
945 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
946 for (StringRef Arg : Args.getAllArgValues(Id: OPT_override_image)) {
947 OffloadKind Kind = getOffloadKind(Name: Arg.split(Separator: "=").first);
948 StringRef Filename = Arg.split(Separator: "=").second;
949
950 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
951 MemoryBuffer::getFileOrSTDIN(Filename);
952 if (std::error_code EC = BufferOrErr.getError())
953 return createFileError(F: Filename, EC);
954
955 Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
956 ObjectFile::createELFObjectFile(Object: **BufferOrErr,
957 /*InitContent=*/false);
958 if (!ElfOrErr)
959 return ElfOrErr.takeError();
960 ObjectFile &Elf = **ElfOrErr;
961
962 OffloadingImage TheImage{};
963 TheImage.TheImageKind = IMG_Object;
964 TheImage.TheOffloadKind = Kind;
965 TheImage.StringData["triple"] =
966 Args.MakeArgString(Str: Elf.makeTriple().getTriple());
967 if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
968 TheImage.StringData["arch"] = Args.MakeArgString(Str: *CPU);
969 TheImage.Image = std::move(*BufferOrErr);
970
971 Images[Kind].emplace_back(Args: std::move(TheImage));
972 }
973 return Error::success();
974}
975
976/// Transforms all the extracted offloading input files into an image that can
977/// be registered by the runtime. If NeedsWrapping is false, writes bundled
978/// output directly without wrapping or host linking.
979Expected<SmallVector<StringRef>>
980linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
981 const InputArgList &Args, char **Argv, int Argc,
982 bool NeedsWrapping) {
983 llvm::TimeTraceScope TimeScope("Handle all device input");
984
985 std::mutex ImageMtx;
986 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
987
988 // Initialize the images with any overriding inputs.
989 if (Args.hasArg(Ids: OPT_override_image))
990 if (Error Err = handleOverrideImages(Args, Images))
991 return std::move(Err);
992
993 auto Err = parallelForEachError(R&: LinkerInputFiles, Fn: [&](auto &Input) -> Error {
994 llvm::TimeTraceScope TimeScope("Link device input");
995
996 // Each thread needs its own copy of the base arguments to maintain
997 // per-device argument storage of synthetic strings.
998 const OptTable &Tbl = getOptTable();
999 BumpPtrAllocator Alloc;
1000 StringSaver Saver(Alloc);
1001 auto BaseArgs =
1002 Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) {
1003 reportError(E: createStringError(S: Err));
1004 });
1005 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
1006
1007 uint16_t ActiveOffloadKindMask = 0u;
1008 for (const auto &File : Input)
1009 ActiveOffloadKindMask |= File.getBinary()->getOffloadKind();
1010
1011 // Linking images of SYCL offload kind with images of other kind is not
1012 // supported.
1013 // TODO: Remove the above limitation.
1014 if ((ActiveOffloadKindMask & OFK_SYCL) &&
1015 ((ActiveOffloadKindMask ^ OFK_SYCL) != 0))
1016 return createStringError(Fmt: "Linking images of SYCL offload kind with "
1017 "images of any other kind is not supported");
1018
1019 // Write any remaining device inputs to an output file.
1020 SmallVector<StringRef> InputFiles;
1021 for (const OffloadFile &File : Input) {
1022 auto FileNameOrErr = writeOffloadFile(File);
1023 if (!FileNameOrErr)
1024 return FileNameOrErr.takeError();
1025 InputFiles.emplace_back(Args&: *FileNameOrErr);
1026 }
1027
1028 // Link the remaining device files using the device linker.
1029 auto OutputOrErr =
1030 linkDevice(InputFiles, LinkerArgs, ActiveOffloadKindMask);
1031 if (!OutputOrErr)
1032 return OutputOrErr.takeError();
1033
1034 // Store the offloading image for each linked output file.
1035 for (OffloadKind Kind = OFK_OpenMP; Kind != OFK_LAST;
1036 Kind = static_cast<OffloadKind>((uint16_t)(Kind) << 1)) {
1037 if ((ActiveOffloadKindMask & Kind) == 0)
1038 continue;
1039 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1040 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *OutputOrErr);
1041 if (std::error_code EC = FileOrErr.getError()) {
1042 if (DryRun)
1043 FileOrErr = MemoryBuffer::getMemBuffer(InputData: "");
1044 else
1045 return createFileError(*OutputOrErr, EC);
1046 }
1047
1048 // Manually containerize offloading images not in ELF format.
1049 if (Error E = containerizeRawImage(*FileOrErr, Kind, LinkerArgs))
1050 return E;
1051
1052 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1053 OffloadingImage TheImage{};
1054 TheImage.TheImageKind =
1055 Args.hasArg(Ids: OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1056 TheImage.TheOffloadKind = Kind;
1057 TheImage.StringData["triple"] =
1058 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_triple_EQ));
1059 TheImage.StringData["arch"] =
1060 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_arch_EQ));
1061 TheImage.Image = std::move(*FileOrErr);
1062
1063 Images[Kind].emplace_back(Args: std::move(TheImage));
1064 }
1065 return Error::success();
1066 });
1067 if (Err)
1068 return std::move(Err);
1069
1070 // Create a binary image of each offloading image and either embed it into a
1071 // new object file, or if all inputs were direct offload binaries, emit the
1072 // fat binary directly (e.g. .hipfb / .fatbin).
1073 SmallVector<StringRef> WrappedOutput;
1074 for (auto &[Kind, Input] : Images) {
1075 // We sort the entries before bundling so they appear in a deterministic
1076 // order in the final binary.
1077 llvm::sort(C&: Input, Comp: [](OffloadingImage &A, OffloadingImage &B) {
1078 StringRef TripleA = A.StringData.lookup(Key: "triple");
1079 StringRef TripleB = B.StringData.lookup(Key: "triple");
1080 StringRef ArchA = A.StringData.lookup(Key: "arch");
1081 StringRef ArchB = B.StringData.lookup(Key: "arch");
1082 if (TripleA != TripleB)
1083 return TripleA > TripleB;
1084 if (ArchA != ArchB)
1085 return ArchA > ArchB;
1086 return A.TheOffloadKind < B.TheOffloadKind;
1087 });
1088 auto BundledImagesOrErr = bundleLinkedOutput(Images: Input, Args, Kind);
1089 if (!BundledImagesOrErr)
1090 return BundledImagesOrErr.takeError();
1091
1092 if (!NeedsWrapping) {
1093 if (BundledImagesOrErr->size() != 1)
1094 return createStringError(
1095 Fmt: "Expected a single bundled image for direct fat binary output");
1096
1097 Expected<std::unique_ptr<FileOutputBuffer>> FOBOrErr =
1098 FileOutputBuffer::create(
1099 FilePath: ExecutableName, Size: BundledImagesOrErr->front()->getBufferSize());
1100 if (!FOBOrErr)
1101 return FOBOrErr.takeError();
1102 std::unique_ptr<FileOutputBuffer> FOB = std::move(*FOBOrErr);
1103 llvm::copy(Range: BundledImagesOrErr->front()->getBuffer(),
1104 Out: FOB->getBufferStart());
1105 if (Error E = FOB->commit())
1106 return std::move(E);
1107
1108 continue;
1109 }
1110
1111 auto OutputOrErr = wrapDeviceImages(Buffers: *BundledImagesOrErr, Args, Kind);
1112 if (!OutputOrErr)
1113 return OutputOrErr.takeError();
1114 WrappedOutput.push_back(Elt: *OutputOrErr);
1115 }
1116
1117 return WrappedOutput;
1118}
1119
1120std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1121 const Twine &Name) {
1122 SmallString<128> Path;
1123 if (Dir.starts_with(Prefix: "="))
1124 sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name);
1125 else
1126 sys::path::append(path&: Path, a: Dir, b: Name);
1127
1128 if (sys::fs::exists(Path))
1129 return static_cast<std::string>(Path);
1130 return std::nullopt;
1131}
1132
1133std::optional<std::string>
1134findFromSearchPaths(StringRef Name, StringRef Root,
1135 ArrayRef<StringRef> SearchPaths) {
1136 for (StringRef Dir : SearchPaths)
1137 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1138 return File;
1139 return std::nullopt;
1140}
1141
1142std::optional<std::string>
1143searchLibraryBaseName(StringRef Name, StringRef Root,
1144 ArrayRef<StringRef> SearchPaths) {
1145 for (StringRef Dir : SearchPaths) {
1146 if (std::optional<std::string> File =
1147 findFile(Dir, Root, Name: "lib" + Name + ".so"))
1148 return File;
1149 if (std::optional<std::string> File =
1150 findFile(Dir, Root, Name: "lib" + Name + ".a"))
1151 return File;
1152 }
1153 return std::nullopt;
1154}
1155
1156/// Search for static libraries in the linker's library path given input like
1157/// `-lfoo` or `-l:libfoo.a`.
1158std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1159 ArrayRef<StringRef> SearchPaths) {
1160 if (Input.starts_with(Prefix: ":"))
1161 return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths);
1162 if (Input.ends_with(Suffix: ".lib"))
1163 return findFromSearchPaths(Name: Input, Root, SearchPaths);
1164 return searchLibraryBaseName(Name: Input, Root, SearchPaths);
1165}
1166
1167/// Search the input files and libraries for embedded device offloading code
1168/// and add it to the list of files to be linked. Files coming from static
1169/// libraries are only added to the input if they are used by an existing
1170/// input file. Returns a list of input files intended for a single linking job.
1171Expected<SmallVector<SmallVector<OffloadFile>>>
1172getDeviceInput(const ArgList &Args) {
1173 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1174
1175 // Skip all the input if the user is overriding the output.
1176 if (Args.hasArg(Ids: OPT_override_image))
1177 return SmallVector<SmallVector<OffloadFile>>();
1178
1179 StringRef Root = Args.getLastArgValue(Id: OPT_sysroot_EQ);
1180 SmallVector<StringRef> LibraryPaths;
1181 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path, Ids: OPT_libpath))
1182 LibraryPaths.push_back(Elt: Arg->getValue());
1183
1184 BumpPtrAllocator Alloc;
1185 StringSaver Saver(Alloc);
1186
1187 // Try to extract device code from the linker input files.
1188 bool WholeArchive = Args.hasArg(Ids: OPT_wholearchive_flag);
1189 SmallVector<OffloadFile> ObjectFilesToExtract;
1190 SmallVector<OffloadFile> ArchiveFilesToExtract;
1191 for (const opt::Arg *Arg : Args.filtered(
1192 Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
1193 if (Arg->getOption().matches(ID: OPT_whole_archive) ||
1194 Arg->getOption().matches(ID: OPT_no_whole_archive)) {
1195 WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive);
1196 continue;
1197 }
1198
1199 std::optional<std::string> Filename =
1200 Arg->getOption().matches(ID: OPT_library)
1201 ? searchLibrary(Input: Arg->getValue(), Root, SearchPaths: LibraryPaths)
1202 : std::string(Arg->getValue());
1203
1204 if (!Filename && Arg->getOption().matches(ID: OPT_library))
1205 return createStringError(Fmt: "unable to find library -l%s", Vals: Arg->getValue());
1206
1207 if (!Filename || !sys::fs::exists(Path: *Filename) ||
1208 sys::fs::is_directory(Path: *Filename))
1209 continue;
1210
1211 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1212 MemoryBuffer::getFileOrSTDIN(Filename: *Filename);
1213 if (std::error_code EC = BufferOrErr.getError())
1214 return createFileError(F: *Filename, EC);
1215
1216 MemoryBufferRef Buffer = **BufferOrErr;
1217 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::elf_shared_object)
1218 continue;
1219
1220 SmallVector<OffloadFile> Binaries;
1221 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1222 return std::move(Err);
1223
1224 for (auto &Binary : Binaries) {
1225 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::archive &&
1226 !WholeArchive)
1227 ArchiveFilesToExtract.emplace_back(Args: std::move(Binary));
1228 else
1229 ObjectFilesToExtract.emplace_back(Args: std::move(Binary));
1230 }
1231 }
1232
1233 // Link all standard input files and update the list of symbols.
1234 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1235 for (OffloadFile &Binary : ObjectFilesToExtract) {
1236 if (!Binary.getBinary())
1237 continue;
1238
1239 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1240 for (const auto &[ID, Input] : InputFiles)
1241 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1242 CompatibleTargets.emplace_back(Args: ID);
1243
1244 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1245 // If another target needs this binary it must be copied instead.
1246 if (Index == CompatibleTargets.size() - 1)
1247 InputFiles[ID].emplace_back(Args: std::move(Binary));
1248 else
1249 InputFiles[ID].emplace_back(Args: Binary.copy());
1250 }
1251 }
1252
1253 llvm::DenseSet<StringRef> ShouldExtract;
1254 for (auto &Arg : Args.getAllArgValues(Id: OPT_should_extract))
1255 ShouldExtract.insert(V: Arg);
1256
1257 // We only extract archive members from the fat binary if we find a used or
1258 // requested target. Unlike normal static archive handling, we just extract
1259 // every object file contained in the archive.
1260 for (OffloadFile &Binary : ArchiveFilesToExtract) {
1261 if (!Binary.getBinary())
1262 continue;
1263
1264 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1265 for (const auto &[ID, Input] : InputFiles)
1266 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1267 CompatibleTargets.emplace_back(Args: ID);
1268
1269 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1270 // Only extract if we have an object matching this target or it
1271 // was specifically requested.
1272 if (!InputFiles.count(Key: ID) && !ShouldExtract.contains(V: ID.second))
1273 continue;
1274
1275 // If another target needs this binary it must be copied instead.
1276 if (Index == CompatibleTargets.size() - 1)
1277 InputFiles[ID].emplace_back(Args: std::move(Binary));
1278 else
1279 InputFiles[ID].emplace_back(Args: Binary.copy());
1280 }
1281 }
1282
1283 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1284 for (auto &[ID, Input] : InputFiles)
1285 InputsForTarget.emplace_back(Args: std::move(Input));
1286
1287 return std::move(InputsForTarget);
1288}
1289
1290} // namespace
1291
1292int main(int Argc, char **Argv) {
1293 InitLLVM X(Argc, Argv);
1294 InitializeAllTargetInfos();
1295 InitializeAllTargets();
1296 InitializeAllTargetMCs();
1297 InitializeAllAsmParsers();
1298 InitializeAllAsmPrinters();
1299
1300 LinkerExecutable = Argv[0];
1301 sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]);
1302
1303 const OptTable &Tbl = getOptTable();
1304 BumpPtrAllocator Alloc;
1305 StringSaver Saver(Alloc);
1306 auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) {
1307 reportError(E: createStringError(S: Err));
1308 });
1309
1310 if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) {
1311 Tbl.printHelp(
1312 OS&: outs(),
1313 Usage: "clang-linker-wrapper [options] -- <options to pass to the linker>",
1314 Title: "\nA wrapper utility over the host linker. It scans the input files\n"
1315 "for sections that require additional processing prior to linking.\n"
1316 "It will then transparently pass all arguments and input to the\n"
1317 "specified host linker to create the final binary.\n",
1318 ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden));
1319 return EXIT_SUCCESS;
1320 }
1321 if (Args.hasArg(Ids: OPT_version)) {
1322 printVersion(OS&: outs());
1323 return EXIT_SUCCESS;
1324 }
1325
1326 // This forwards '-mllvm' arguments to LLVM if present.
1327 SmallVector<const char *> NewArgv = {Argv[0]};
1328 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
1329 NewArgv.push_back(Elt: Arg->getValue());
1330 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus))
1331 NewArgv.push_back(Elt: Arg->getValue());
1332 SmallVector<PassPlugin, 1> PluginList;
1333 PassPlugins.setCallback([&](const std::string &PluginPath) {
1334 auto Plugin = PassPlugin::Load(Filename: PluginPath);
1335 if (!Plugin)
1336 reportFatalUsageError(Err: Plugin.takeError());
1337 PluginList.emplace_back(Args&: Plugin.get());
1338 });
1339 cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]);
1340
1341 Verbose = Args.hasArg(Ids: OPT_verbose);
1342 DryRun = Args.hasArg(Ids: OPT_dry_run);
1343 SaveTemps = Args.hasArg(Ids: OPT_save_temps);
1344 CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str();
1345 CanonicalPrefixes = !Args.hasArg(Ids: OPT_no_canonical_prefixes);
1346
1347 llvm::Triple Triple(
1348 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1349 if (Args.hasArg(Ids: OPT_o))
1350 ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out");
1351 else if (Args.hasArg(Ids: OPT_out))
1352 ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe");
1353 else
1354 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1355
1356 parallel::strategy = hardware_concurrency(ThreadCount: 1);
1357 if (auto *Arg = Args.getLastArg(Ids: OPT_wrapper_jobs)) {
1358 StringRef Val = Arg->getValue();
1359 if (Val.equals_insensitive(RHS: "jobserver"))
1360 parallel::strategy = jobserver_concurrency();
1361 else {
1362 unsigned Threads = 0;
1363 if (!llvm::to_integer(S: Val, Num&: Threads) || Threads == 0)
1364 reportError(E: createStringError(
1365 Fmt: "%s: expected a positive integer or 'jobserver', got '%s'",
1366 Vals: Arg->getSpelling().data(), Vals: Val.data()));
1367 else
1368 parallel::strategy = hardware_concurrency(ThreadCount: Threads);
1369 }
1370 }
1371
1372 if (Args.hasArg(Ids: OPT_wrapper_time_trace_eq)) {
1373 unsigned Granularity;
1374 if (Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500")
1375 .getAsInteger(Radix: 10, Result&: Granularity))
1376 reportError(
1377 E: createStringError(Fmt: "invalid value for time trace granularity"));
1378 timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]);
1379 }
1380
1381 {
1382 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1383
1384 // Extract the device input files stored in the host fat binary.
1385 auto DeviceInputFiles = getDeviceInput(Args);
1386 if (!DeviceInputFiles)
1387 reportError(E: DeviceInputFiles.takeError());
1388
1389 // Check if we should emit fat binary directly without wrapping or host
1390 // linking.
1391 bool EmitFatbinOnly = Args.hasArg(Ids: OPT_emit_fatbin_only);
1392
1393 // Link and process the device images. The function may emit a direct fat
1394 // binary if --emit-fatbin-only is specified.
1395 auto FilesOrErr = linkAndWrapDeviceFiles(LinkerInputFiles: *DeviceInputFiles, Args, Argv,
1396 Argc, NeedsWrapping: !EmitFatbinOnly);
1397 if (!FilesOrErr)
1398 reportError(E: FilesOrErr.takeError());
1399
1400 // Run the host linking job with the rendered arguments.
1401 if (!EmitFatbinOnly) {
1402 if (Error Err = runLinker(Files: *FilesOrErr, Args))
1403 reportError(E: std::move(Err));
1404 }
1405 }
1406
1407 if (const opt::Arg *Arg = Args.getLastArg(Ids: OPT_wrapper_time_trace_eq)) {
1408 if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName))
1409 reportError(E: std::move(Err));
1410 timeTraceProfilerCleanup();
1411 }
1412
1413 // Remove the temporary files created.
1414 if (!SaveTemps)
1415 for (const auto &TempFile : TempFiles)
1416 if (std::error_code EC = sys::fs::remove(path: TempFile))
1417 reportError(E: createFileError(F: TempFile, EC));
1418
1419 return EXIT_SUCCESS;
1420}
1421