1 | //===-- clang-nvlink-wrapper/ClangNVLinkWrapper.cpp - NVIDIA linker util --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===---------------------------------------------------------------------===// |
8 | // |
9 | // This tool wraps around the NVIDIA linker called 'nvlink'. The NVIDIA linker |
10 | // is required to create NVPTX applications, but does not support common |
11 | // features like LTO or archives. This utility wraps around the tool to cover |
12 | // its deficiencies. This tool can be removed once NVIDIA improves their linker |
13 | // or ports it to `ld.lld`. |
14 | // |
15 | //===---------------------------------------------------------------------===// |
16 | |
17 | #include "clang/Basic/Version.h" |
18 | |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/BinaryFormat/Magic.h" |
21 | #include "llvm/Bitcode/BitcodeWriter.h" |
22 | #include "llvm/CodeGen/CommandFlags.h" |
23 | #include "llvm/IR/DiagnosticPrinter.h" |
24 | #include "llvm/LTO/LTO.h" |
25 | #include "llvm/Object/Archive.h" |
26 | #include "llvm/Object/ArchiveWriter.h" |
27 | #include "llvm/Object/Binary.h" |
28 | #include "llvm/Object/ELFObjectFile.h" |
29 | #include "llvm/Object/IRObjectFile.h" |
30 | #include "llvm/Object/ObjectFile.h" |
31 | #include "llvm/Object/OffloadBinary.h" |
32 | #include "llvm/Option/ArgList.h" |
33 | #include "llvm/Option/OptTable.h" |
34 | #include "llvm/Option/Option.h" |
35 | #include "llvm/Remarks/HotnessThresholdParser.h" |
36 | #include "llvm/Support/CommandLine.h" |
37 | #include "llvm/Support/FileOutputBuffer.h" |
38 | #include "llvm/Support/FileSystem.h" |
39 | #include "llvm/Support/InitLLVM.h" |
40 | #include "llvm/Support/MemoryBuffer.h" |
41 | #include "llvm/Support/Path.h" |
42 | #include "llvm/Support/Program.h" |
43 | #include "llvm/Support/Signals.h" |
44 | #include "llvm/Support/StringSaver.h" |
45 | #include "llvm/Support/TargetSelect.h" |
46 | #include "llvm/Support/WithColor.h" |
47 | |
48 | using namespace llvm; |
49 | using namespace llvm::opt; |
50 | using namespace llvm::object; |
51 | |
52 | // Various tools (e.g., llc and opt) duplicate this series of declarations for |
53 | // options related to passes and remarks. |
54 | static cl::opt<bool> ( |
55 | "pass-remarks-with-hotness" , |
56 | cl::desc("With PGO, include profile count in optimization remarks" ), |
57 | cl::Hidden); |
58 | |
59 | static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser> |
60 | ( |
61 | "pass-remarks-hotness-threshold" , |
62 | cl::desc("Minimum profile count required for " |
63 | "an optimization remark to be output. " |
64 | "Use 'auto' to apply the threshold from profile summary." ), |
65 | cl::value_desc("N or 'auto'" ), cl::init(Val: 0), cl::Hidden); |
66 | |
67 | static cl::opt<std::string> |
68 | ("pass-remarks-output" , |
69 | cl::desc("Output filename for pass remarks" ), |
70 | cl::value_desc("filename" )); |
71 | |
72 | static cl::opt<std::string> |
73 | ("pass-remarks-filter" , |
74 | cl::desc("Only record optimization remarks from passes whose " |
75 | "names match the given regular expression" ), |
76 | cl::value_desc("regex" )); |
77 | |
78 | static cl::opt<std::string> ( |
79 | "pass-remarks-format" , |
80 | cl::desc("The format used for serializing remarks (default: YAML)" ), |
81 | cl::value_desc("format" ), cl::init(Val: "yaml" )); |
82 | |
83 | static cl::list<std::string> |
84 | PassPlugins("load-pass-plugin" , |
85 | cl::desc("Load passes from plugin library" )); |
86 | |
87 | static void printVersion(raw_ostream &OS) { |
88 | OS << clang::getClangToolFullVersion(ToolName: "clang-nvlink-wrapper" ) << '\n'; |
89 | } |
90 | |
91 | /// The value of `argv[0]` when run. |
92 | static const char *Executable; |
93 | |
94 | /// Temporary files to be cleaned up. |
95 | static SmallVector<SmallString<128>> TempFiles; |
96 | |
97 | /// Codegen flags for LTO backend. |
98 | static codegen::RegisterCodeGenFlags CodeGenFlags; |
99 | |
100 | namespace { |
101 | // Must not overlap with llvm::opt::DriverFlag. |
102 | enum WrapperFlags { WrapperOnlyOption = (1 << 4) }; |
103 | |
104 | enum ID { |
105 | OPT_INVALID = 0, // This is not an option ID. |
106 | #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), |
107 | #include "NVLinkOpts.inc" |
108 | LastOption |
109 | #undef OPTION |
110 | }; |
111 | |
112 | #define OPTTABLE_STR_TABLE_CODE |
113 | #include "NVLinkOpts.inc" |
114 | #undef OPTTABLE_STR_TABLE_CODE |
115 | |
116 | #define OPTTABLE_PREFIXES_TABLE_CODE |
117 | #include "NVLinkOpts.inc" |
118 | #undef OPTTABLE_PREFIXES_TABLE_CODE |
119 | |
120 | static constexpr OptTable::Info InfoTable[] = { |
121 | #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), |
122 | #include "NVLinkOpts.inc" |
123 | #undef OPTION |
124 | }; |
125 | |
126 | class WrapperOptTable : public opt::GenericOptTable { |
127 | public: |
128 | WrapperOptTable() |
129 | : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} |
130 | }; |
131 | |
132 | const OptTable &getOptTable() { |
133 | static const WrapperOptTable *Table = []() { |
134 | auto Result = std::make_unique<WrapperOptTable>(); |
135 | return Result.release(); |
136 | }(); |
137 | return *Table; |
138 | } |
139 | |
140 | [[noreturn]] void reportError(Error E) { |
141 | outs().flush(); |
142 | logAllUnhandledErrors(E: std::move(E), OS&: WithColor::error(OS&: errs(), Prefix: Executable)); |
143 | exit(EXIT_FAILURE); |
144 | } |
145 | |
146 | void diagnosticHandler(const DiagnosticInfo &DI) { |
147 | std::string ErrStorage; |
148 | raw_string_ostream OS(ErrStorage); |
149 | DiagnosticPrinterRawOStream DP(OS); |
150 | DI.print(DP); |
151 | |
152 | switch (DI.getSeverity()) { |
153 | case DS_Error: |
154 | WithColor::error(OS&: errs(), Prefix: Executable) << ErrStorage << "\n" ; |
155 | break; |
156 | case DS_Warning: |
157 | WithColor::warning(OS&: errs(), Prefix: Executable) << ErrStorage << "\n" ; |
158 | break; |
159 | case DS_Note: |
160 | WithColor::note(OS&: errs(), Prefix: Executable) << ErrStorage << "\n" ; |
161 | break; |
162 | case DS_Remark: |
163 | WithColor::remark(OS&: errs()) << ErrStorage << "\n" ; |
164 | break; |
165 | } |
166 | } |
167 | |
168 | Expected<StringRef> createTempFile(const ArgList &Args, const Twine &Prefix, |
169 | StringRef Extension) { |
170 | SmallString<128> OutputFile; |
171 | if (Args.hasArg(Ids: OPT_save_temps)) { |
172 | (Prefix + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile); |
173 | } else { |
174 | if (std::error_code EC = |
175 | sys::fs::createTemporaryFile(Prefix, Suffix: Extension, ResultPath&: OutputFile)) |
176 | return createFileError(F: OutputFile, EC); |
177 | } |
178 | |
179 | TempFiles.emplace_back(Args: std::move(OutputFile)); |
180 | return TempFiles.back(); |
181 | } |
182 | |
183 | Expected<std::string> findProgram(const ArgList &Args, StringRef Name, |
184 | ArrayRef<StringRef> Paths) { |
185 | if (Args.hasArg(Ids: OPT_dry_run)) |
186 | return Name.str(); |
187 | ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths); |
188 | if (!Path) |
189 | Path = sys::findProgramByName(Name); |
190 | if (!Path) |
191 | return createStringError(EC: Path.getError(), |
192 | S: "Unable to find '" + Name + "' in path" ); |
193 | return *Path; |
194 | } |
195 | |
196 | std::optional<std::string> findFile(StringRef Dir, StringRef Root, |
197 | const Twine &Name) { |
198 | SmallString<128> Path; |
199 | if (Dir.starts_with(Prefix: "=" )) |
200 | sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name); |
201 | else |
202 | sys::path::append(path&: Path, a: Dir, b: Name); |
203 | |
204 | if (sys::fs::exists(Path)) |
205 | return static_cast<std::string>(Path); |
206 | return std::nullopt; |
207 | } |
208 | |
209 | std::optional<std::string> |
210 | findFromSearchPaths(StringRef Name, StringRef Root, |
211 | ArrayRef<StringRef> SearchPaths) { |
212 | for (StringRef Dir : SearchPaths) |
213 | if (std::optional<std::string> File = findFile(Dir, Root, Name)) |
214 | return File; |
215 | return std::nullopt; |
216 | } |
217 | |
218 | std::optional<std::string> |
219 | searchLibraryBaseName(StringRef Name, StringRef Root, |
220 | ArrayRef<StringRef> SearchPaths) { |
221 | for (StringRef Dir : SearchPaths) |
222 | if (std::optional<std::string> File = |
223 | findFile(Dir, Root, Name: "lib" + Name + ".a" )) |
224 | return File; |
225 | return std::nullopt; |
226 | } |
227 | |
228 | /// Search for static libraries in the linker's library path given input like |
229 | /// `-lfoo` or `-l:libfoo.a`. |
230 | std::optional<std::string> searchLibrary(StringRef Input, StringRef Root, |
231 | ArrayRef<StringRef> SearchPaths) { |
232 | if (Input.starts_with(Prefix: ":" )) |
233 | return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths); |
234 | return searchLibraryBaseName(Name: Input, Root, SearchPaths); |
235 | } |
236 | |
237 | void printCommands(ArrayRef<StringRef> CmdArgs) { |
238 | if (CmdArgs.empty()) |
239 | return; |
240 | |
241 | errs() << " \"" << CmdArgs.front() << "\" " ; |
242 | errs() << join(Begin: std::next(x: CmdArgs.begin()), End: CmdArgs.end(), Separator: " " ) << "\n" ; |
243 | } |
244 | |
245 | /// A minimum symbol interface that provides the necessary information to |
246 | /// extract archive members and resolve LTO symbols. |
247 | struct Symbol { |
248 | enum Flags { |
249 | None = 0, |
250 | Undefined = 1 << 0, |
251 | Weak = 1 << 1, |
252 | }; |
253 | |
254 | Symbol() : File(), Flags(None), UsedInRegularObj(false) {} |
255 | Symbol(Symbol::Flags Flags) : File(), Flags(Flags), UsedInRegularObj(true) {} |
256 | |
257 | Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym) |
258 | : File(File), Flags(0), UsedInRegularObj(false) { |
259 | if (Sym.isUndefined()) |
260 | Flags |= Undefined; |
261 | if (Sym.isWeak()) |
262 | Flags |= Weak; |
263 | } |
264 | |
265 | Symbol(MemoryBufferRef File, const SymbolRef Sym) |
266 | : File(File), Flags(0), UsedInRegularObj(false) { |
267 | auto FlagsOrErr = Sym.getFlags(); |
268 | if (!FlagsOrErr) |
269 | reportError(E: FlagsOrErr.takeError()); |
270 | if (*FlagsOrErr & SymbolRef::SF_Undefined) |
271 | Flags |= Undefined; |
272 | if (*FlagsOrErr & SymbolRef::SF_Weak) |
273 | Flags |= Weak; |
274 | |
275 | auto NameOrErr = Sym.getName(); |
276 | if (!NameOrErr) |
277 | reportError(E: NameOrErr.takeError()); |
278 | } |
279 | |
280 | bool isWeak() const { return Flags & Weak; } |
281 | bool isUndefined() const { return Flags & Undefined; } |
282 | |
283 | MemoryBufferRef File; |
284 | uint32_t Flags; |
285 | bool UsedInRegularObj; |
286 | }; |
287 | |
288 | Expected<StringRef> runPTXAs(StringRef File, const ArgList &Args) { |
289 | std::string CudaPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str(); |
290 | std::string GivenPath = Args.getLastArgValue(Id: OPT_ptxas_path_EQ).str(); |
291 | Expected<std::string> PTXAsPath = |
292 | findProgram(Args, Name: "ptxas" , Paths: {CudaPath + "/bin" , GivenPath}); |
293 | if (!PTXAsPath) |
294 | return PTXAsPath.takeError(); |
295 | if (!Args.hasArg(Ids: OPT_arch)) |
296 | return createStringError( |
297 | Fmt: "must pass in an explicit nvptx64 gpu architecture to 'ptxas'" ); |
298 | |
299 | auto TempFileOrErr = createTempFile( |
300 | Args, Prefix: sys::path::stem(path: Args.getLastArgValue(Id: OPT_o, Default: "a.out" )), Extension: "cubin" ); |
301 | if (!TempFileOrErr) |
302 | return TempFileOrErr.takeError(); |
303 | |
304 | SmallVector<StringRef> AssemblerArgs({*PTXAsPath, "-m64" , "-c" , File}); |
305 | if (Args.hasArg(Ids: OPT_verbose)) |
306 | AssemblerArgs.push_back(Elt: "-v" ); |
307 | if (Args.hasArg(Ids: OPT_g)) { |
308 | if (Args.hasArg(Ids: OPT_O)) |
309 | WithColor::warning(OS&: errs(), Prefix: Executable) |
310 | << "Optimized debugging not supported, overriding to '-O0'\n" ; |
311 | AssemblerArgs.push_back(Elt: "-O0" ); |
312 | } else |
313 | AssemblerArgs.push_back( |
314 | Elt: Args.MakeArgString(Str: "-O" + Args.getLastArgValue(Id: OPT_O, Default: "3" ))); |
315 | AssemblerArgs.append(IL: {"-arch" , Args.getLastArgValue(Id: OPT_arch)}); |
316 | AssemblerArgs.append(IL: {"-o" , *TempFileOrErr}); |
317 | |
318 | if (Args.hasArg(Ids: OPT_dry_run) || Args.hasArg(Ids: OPT_verbose)) |
319 | printCommands(CmdArgs: AssemblerArgs); |
320 | if (Args.hasArg(Ids: OPT_dry_run)) |
321 | return Args.MakeArgString(Str: *TempFileOrErr); |
322 | if (sys::ExecuteAndWait(Program: *PTXAsPath, Args: AssemblerArgs)) |
323 | return createStringError(S: "'" + sys::path::filename(path: *PTXAsPath) + "'" + |
324 | " failed" ); |
325 | return Args.MakeArgString(Str: *TempFileOrErr); |
326 | } |
327 | |
328 | Expected<std::unique_ptr<lto::LTO>> createLTO(const ArgList &Args) { |
329 | const llvm::Triple Triple("nvptx64-nvidia-cuda" ); |
330 | lto::Config Conf; |
331 | lto::ThinBackend Backend; |
332 | unsigned Jobs = 0; |
333 | if (auto *Arg = Args.getLastArg(Ids: OPT_jobs)) |
334 | if (!to_integer(S: Arg->getValue(), Num&: Jobs) || Jobs == 0) |
335 | reportError(E: createStringError(Fmt: "%s: expected a positive integer, got '%s'" , |
336 | Vals: Arg->getSpelling().data(), |
337 | Vals: Arg->getValue())); |
338 | Backend = |
339 | lto::createInProcessThinBackend(Parallelism: heavyweight_hardware_concurrency(ThreadCount: Jobs)); |
340 | |
341 | Conf.CPU = Args.getLastArgValue(Id: OPT_arch); |
342 | Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: Triple); |
343 | |
344 | Conf.RemarksFilename = |
345 | Args.getLastArgValue(Id: OPT_opt_remarks_filename, Default: RemarksFilename); |
346 | Conf.RemarksPasses = |
347 | Args.getLastArgValue(Id: OPT_opt_remarks_filter, Default: RemarksPasses); |
348 | Conf.RemarksFormat = |
349 | Args.getLastArgValue(Id: OPT_opt_remarks_format, Default: RemarksFormat); |
350 | |
351 | Conf.RemarksWithHotness = |
352 | Args.hasArg(Ids: OPT_opt_remarks_with_hotness) || RemarksWithHotness; |
353 | Conf.RemarksHotnessThreshold = RemarksHotnessThreshold; |
354 | |
355 | Conf.MAttrs = llvm::codegen::getMAttrs(); |
356 | std::optional<CodeGenOptLevel> CGOptLevelOrNone = |
357 | CodeGenOpt::parseLevel(C: Args.getLastArgValue(Id: OPT_O, Default: "2" )[0]); |
358 | assert(CGOptLevelOrNone && "Invalid optimization level" ); |
359 | Conf.CGOptLevel = *CGOptLevelOrNone; |
360 | Conf.OptLevel = Args.getLastArgValue(Id: OPT_O, Default: "2" )[0] - '0'; |
361 | Conf.DefaultTriple = Triple.getTriple(); |
362 | |
363 | Conf.OptPipeline = Args.getLastArgValue(Id: OPT_lto_newpm_passes, Default: "" ); |
364 | Conf.PassPlugins = PassPlugins; |
365 | Conf.DebugPassManager = Args.hasArg(Ids: OPT_lto_debug_pass_manager); |
366 | |
367 | Conf.DiagHandler = diagnosticHandler; |
368 | Conf.CGFileType = CodeGenFileType::AssemblyFile; |
369 | |
370 | if (Args.hasArg(Ids: OPT_lto_emit_llvm)) { |
371 | Conf.PreCodeGenModuleHook = [&](size_t, const Module &M) { |
372 | std::error_code EC; |
373 | raw_fd_ostream LinkedBitcode(Args.getLastArgValue(Id: OPT_o, Default: "a.out" ), EC); |
374 | if (EC) |
375 | reportError(E: errorCodeToError(EC)); |
376 | WriteBitcodeToFile(M, Out&: LinkedBitcode); |
377 | return false; |
378 | }; |
379 | } |
380 | |
381 | if (Args.hasArg(Ids: OPT_save_temps)) |
382 | if (Error Err = Conf.addSaveTemps( |
383 | OutputFileName: (Args.getLastArgValue(Id: OPT_o, Default: "a.out" ) + "." ).str())) |
384 | return Err; |
385 | |
386 | unsigned Partitions = 1; |
387 | if (auto *Arg = Args.getLastArg(Ids: OPT_lto_partitions)) |
388 | if (!to_integer(S: Arg->getValue(), Num&: Partitions) || Partitions == 0) |
389 | reportError(E: createStringError(Fmt: "%s: expected a positive integer, got '%s'" , |
390 | Vals: Arg->getSpelling().data(), |
391 | Vals: Arg->getValue())); |
392 | lto::LTO::LTOKind Kind = Args.hasArg(Ids: OPT_thinlto) ? lto::LTO::LTOK_UnifiedThin |
393 | : lto::LTO::LTOK_Default; |
394 | return std::make_unique<lto::LTO>(args: std::move(Conf), args&: Backend, args&: Partitions, args&: Kind); |
395 | } |
396 | |
397 | Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, |
398 | StringMap<Symbol> &SymTab, bool IsLazy) { |
399 | Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(MBRef: Buffer); |
400 | if (!IRSymtabOrErr) |
401 | return IRSymtabOrErr.takeError(); |
402 | bool = !IsLazy; |
403 | StringMap<Symbol> PendingSymbols; |
404 | for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { |
405 | for (const auto &IRSym : IRSymtabOrErr->TheReader.module_symbols(I)) { |
406 | if (IRSym.isFormatSpecific() || !IRSym.isGlobal()) |
407 | continue; |
408 | |
409 | Symbol &OldSym = !SymTab.count(Key: IRSym.getName()) && IsLazy |
410 | ? PendingSymbols[IRSym.getName()] |
411 | : SymTab[IRSym.getName()]; |
412 | Symbol Sym = Symbol(Buffer, IRSym); |
413 | if (OldSym.File.getBuffer().empty()) |
414 | OldSym = Sym; |
415 | |
416 | bool ResolvesReference = |
417 | !Sym.isUndefined() && |
418 | (OldSym.isUndefined() || (OldSym.isWeak() && !Sym.isWeak())) && |
419 | !(OldSym.isWeak() && OldSym.isUndefined() && IsLazy); |
420 | Extracted |= ResolvesReference; |
421 | |
422 | Sym.UsedInRegularObj = OldSym.UsedInRegularObj; |
423 | if (ResolvesReference) |
424 | OldSym = Sym; |
425 | } |
426 | } |
427 | if (Extracted) |
428 | for (const auto &[Name, Symbol] : PendingSymbols) |
429 | SymTab[Name] = Symbol; |
430 | return Extracted; |
431 | } |
432 | |
433 | Expected<bool> getSymbolsFromObject(ObjectFile &ObjFile, |
434 | StringMap<Symbol> &SymTab, bool IsLazy) { |
435 | bool = !IsLazy; |
436 | StringMap<Symbol> PendingSymbols; |
437 | for (SymbolRef ObjSym : ObjFile.symbols()) { |
438 | auto NameOrErr = ObjSym.getName(); |
439 | if (!NameOrErr) |
440 | return NameOrErr.takeError(); |
441 | |
442 | Symbol &OldSym = !SymTab.count(Key: *NameOrErr) && IsLazy |
443 | ? PendingSymbols[*NameOrErr] |
444 | : SymTab[*NameOrErr]; |
445 | Symbol Sym = Symbol(ObjFile.getMemoryBufferRef(), ObjSym); |
446 | if (OldSym.File.getBuffer().empty()) |
447 | OldSym = Sym; |
448 | |
449 | bool ResolvesReference = OldSym.isUndefined() && !Sym.isUndefined() && |
450 | (!OldSym.isWeak() || !IsLazy); |
451 | Extracted |= ResolvesReference; |
452 | |
453 | if (ResolvesReference) |
454 | OldSym = Sym; |
455 | OldSym.UsedInRegularObj = true; |
456 | } |
457 | if (Extracted) |
458 | for (const auto &[Name, Symbol] : PendingSymbols) |
459 | SymTab[Name] = Symbol; |
460 | return Extracted; |
461 | } |
462 | |
463 | Expected<bool> getSymbols(MemoryBufferRef Buffer, StringMap<Symbol> &SymTab, |
464 | bool IsLazy) { |
465 | switch (identify_magic(magic: Buffer.getBuffer())) { |
466 | case file_magic::bitcode: { |
467 | return getSymbolsFromBitcode(Buffer, SymTab, IsLazy); |
468 | } |
469 | case file_magic::elf_relocatable: { |
470 | Expected<std::unique_ptr<ObjectFile>> ObjFile = |
471 | ObjectFile::createObjectFile(Object: Buffer); |
472 | if (!ObjFile) |
473 | return ObjFile.takeError(); |
474 | return getSymbolsFromObject(ObjFile&: **ObjFile, SymTab, IsLazy); |
475 | } |
476 | default: |
477 | return createStringError(Fmt: "Unsupported file type" ); |
478 | } |
479 | } |
480 | |
481 | Expected<SmallVector<StringRef>> getInput(const ArgList &Args) { |
482 | SmallVector<StringRef> LibraryPaths; |
483 | for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path)) |
484 | LibraryPaths.push_back(Elt: Arg->getValue()); |
485 | |
486 | bool WholeArchive = false; |
487 | SmallVector<std::pair<std::unique_ptr<MemoryBuffer>, bool>> InputFiles; |
488 | for (const opt::Arg *Arg : Args.filtered( |
489 | Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) { |
490 | if (Arg->getOption().matches(ID: OPT_whole_archive) || |
491 | Arg->getOption().matches(ID: OPT_no_whole_archive)) { |
492 | WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive); |
493 | continue; |
494 | } |
495 | |
496 | std::optional<std::string> Filename = |
497 | Arg->getOption().matches(ID: OPT_library) |
498 | ? searchLibrary(Input: Arg->getValue(), /*Root=*/"" , SearchPaths: LibraryPaths) |
499 | : std::string(Arg->getValue()); |
500 | |
501 | if (!Filename && Arg->getOption().matches(ID: OPT_library)) |
502 | return createStringError(Fmt: "unable to find library -l%s" , Vals: Arg->getValue()); |
503 | |
504 | if (!Filename || !sys::fs::exists(Path: *Filename) || |
505 | sys::fs::is_directory(Path: *Filename)) |
506 | continue; |
507 | |
508 | ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = |
509 | MemoryBuffer::getFileOrSTDIN(Filename: *Filename); |
510 | if (std::error_code EC = BufferOrErr.getError()) |
511 | return createFileError(F: *Filename, EC); |
512 | |
513 | MemoryBufferRef Buffer = **BufferOrErr; |
514 | switch (identify_magic(magic: Buffer.getBuffer())) { |
515 | case file_magic::bitcode: |
516 | case file_magic::elf_relocatable: |
517 | InputFiles.emplace_back(Args: std::move(*BufferOrErr), /*IsLazy=*/Args: false); |
518 | break; |
519 | case file_magic::archive: { |
520 | Expected<std::unique_ptr<object::Archive>> LibFile = |
521 | object::Archive::create(Source: Buffer); |
522 | if (!LibFile) |
523 | return LibFile.takeError(); |
524 | Error Err = Error::success(); |
525 | for (auto Child : (*LibFile)->children(Err)) { |
526 | auto ChildBufferOrErr = Child.getMemoryBufferRef(); |
527 | if (!ChildBufferOrErr) |
528 | return ChildBufferOrErr.takeError(); |
529 | std::unique_ptr<MemoryBuffer> ChildBuffer = |
530 | MemoryBuffer::getMemBufferCopy( |
531 | InputData: ChildBufferOrErr->getBuffer(), |
532 | BufferName: ChildBufferOrErr->getBufferIdentifier()); |
533 | InputFiles.emplace_back(Args: std::move(ChildBuffer), Args: !WholeArchive); |
534 | } |
535 | if (Err) |
536 | return Err; |
537 | break; |
538 | } |
539 | default: |
540 | return createStringError(Fmt: "Unsupported file type" ); |
541 | } |
542 | } |
543 | |
544 | bool = true; |
545 | StringMap<Symbol> SymTab; |
546 | for (auto &Sym : Args.getAllArgValues(Id: OPT_u)) |
547 | SymTab[Sym] = Symbol(Symbol::Undefined); |
548 | SmallVector<std::unique_ptr<MemoryBuffer>> LinkerInput; |
549 | while (Extracted) { |
550 | Extracted = false; |
551 | for (auto &[Input, IsLazy] : InputFiles) { |
552 | if (!Input) |
553 | continue; |
554 | |
555 | // Archive members only extract if they define needed symbols. We will |
556 | // re-scan all the inputs if any files were extracted for the link job. |
557 | Expected<bool> = getSymbols(Buffer: *Input, SymTab, IsLazy); |
558 | if (!ExtractOrErr) |
559 | return ExtractOrErr.takeError(); |
560 | |
561 | Extracted |= *ExtractOrErr; |
562 | if (!*ExtractOrErr) |
563 | continue; |
564 | |
565 | LinkerInput.emplace_back(Args: std::move(Input)); |
566 | } |
567 | } |
568 | InputFiles.clear(); |
569 | |
570 | // Extract any bitcode files to be passed to the LTO pipeline. |
571 | SmallVector<std::unique_ptr<MemoryBuffer>> BitcodeFiles; |
572 | for (auto &Input : LinkerInput) |
573 | if (identify_magic(magic: Input->getBuffer()) == file_magic::bitcode) |
574 | BitcodeFiles.emplace_back(Args: std::move(Input)); |
575 | erase_if(C&: LinkerInput, P: [](const auto &F) { return !F; }); |
576 | |
577 | // Run the LTO pipeline on the extracted inputs. |
578 | SmallVector<StringRef> Files; |
579 | if (!BitcodeFiles.empty()) { |
580 | auto LTOBackendOrErr = createLTO(Args); |
581 | if (!LTOBackendOrErr) |
582 | return LTOBackendOrErr.takeError(); |
583 | lto::LTO <OBackend = **LTOBackendOrErr; |
584 | for (auto &BitcodeFile : BitcodeFiles) { |
585 | Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr = |
586 | lto::InputFile::create(Object: *BitcodeFile); |
587 | if (!BitcodeFileOrErr) |
588 | return BitcodeFileOrErr.takeError(); |
589 | |
590 | const auto Symbols = (*BitcodeFileOrErr)->symbols(); |
591 | SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size()); |
592 | size_t Idx = 0; |
593 | for (auto &Sym : Symbols) { |
594 | lto::SymbolResolution &Res = Resolutions[Idx++]; |
595 | Symbol ObjSym = SymTab[Sym.getName()]; |
596 | // We will use this as the prevailing symbol in LTO if it is not |
597 | // undefined and it is from the file that contained the canonical |
598 | // definition. |
599 | Res.Prevailing = !Sym.isUndefined() && ObjSym.File == *BitcodeFile; |
600 | |
601 | // We need LTO to preseve the following global symbols: |
602 | // 1) All symbols during a relocatable link. |
603 | // 2) Symbols used in regular objects. |
604 | // 3) Prevailing symbols that are needed visible to the gpu runtime. |
605 | Res.VisibleToRegularObj = |
606 | Args.hasArg(Ids: OPT_relocatable) || ObjSym.UsedInRegularObj || |
607 | (Res.Prevailing && |
608 | (Sym.getVisibility() != GlobalValue::HiddenVisibility && |
609 | !Sym.canBeOmittedFromSymbolTable())); |
610 | |
611 | // Identify symbols that must be exported dynamically and can be |
612 | // referenced by other files, (i.e. the runtime). |
613 | Res.ExportDynamic = |
614 | Sym.getVisibility() != GlobalValue::HiddenVisibility && |
615 | !Sym.canBeOmittedFromSymbolTable(); |
616 | |
617 | // The NVIDIA platform does not support any symbol preemption. |
618 | Res.FinalDefinitionInLinkageUnit = true; |
619 | |
620 | // We do not support linker redefined symbols (e.g. --wrap) for device |
621 | // image linking, so the symbols will not be changed after LTO. |
622 | Res.LinkerRedefined = false; |
623 | } |
624 | |
625 | // Add the bitcode file with its resolved symbols to the LTO job. |
626 | if (Error Err = LTOBackend.add(Obj: std::move(*BitcodeFileOrErr), Res: Resolutions)) |
627 | return Err; |
628 | } |
629 | |
630 | // Run the LTO job to compile the bitcode. |
631 | size_t MaxTasks = LTOBackend.getMaxTasks(); |
632 | SmallVector<StringRef> LTOFiles(MaxTasks); |
633 | auto AddStream = |
634 | [&](size_t Task, |
635 | const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> { |
636 | int FD = -1; |
637 | auto &TempFile = LTOFiles[Task]; |
638 | if (Args.hasArg(Ids: OPT_lto_emit_asm)) |
639 | TempFile = Args.getLastArgValue(Id: OPT_o, Default: "a.out" ); |
640 | else { |
641 | auto TempFileOrErr = createTempFile( |
642 | Args, Prefix: sys::path::stem(path: Args.getLastArgValue(Id: OPT_o, Default: "a.out" )), Extension: "s" ); |
643 | if (!TempFileOrErr) |
644 | reportError(E: TempFileOrErr.takeError()); |
645 | TempFile = Args.MakeArgString(Str: *TempFileOrErr); |
646 | } |
647 | if (std::error_code EC = sys::fs::openFileForWrite(Name: TempFile, ResultFD&: FD)) |
648 | reportError(E: errorCodeToError(EC)); |
649 | return std::make_unique<CachedFileStream>( |
650 | args: std::make_unique<raw_fd_ostream>(args&: FD, args: true)); |
651 | }; |
652 | |
653 | if (Error Err = LTOBackend.run(AddStream)) |
654 | return Err; |
655 | |
656 | if (Args.hasArg(Ids: OPT_lto_emit_llvm) || Args.hasArg(Ids: OPT_lto_emit_asm)) |
657 | return Files; |
658 | |
659 | for (StringRef LTOFile : LTOFiles) { |
660 | auto FileOrErr = runPTXAs(File: LTOFile, Args); |
661 | if (!FileOrErr) |
662 | return FileOrErr.takeError(); |
663 | Files.emplace_back(Args&: *FileOrErr); |
664 | } |
665 | } |
666 | |
667 | // Create a copy for each file to a new file ending in `.cubin`. The 'nvlink' |
668 | // linker requires all NVPTX inputs to have this extension for some reason. |
669 | // We don't use a symbolic link because it's not supported on Windows and some |
670 | // of this input files could be extracted from an archive. |
671 | for (auto &Input : LinkerInput) { |
672 | auto TempFileOrErr = createTempFile( |
673 | Args, Prefix: sys::path::stem(path: Input->getBufferIdentifier()), Extension: "cubin" ); |
674 | if (!TempFileOrErr) |
675 | return TempFileOrErr.takeError(); |
676 | Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr = |
677 | FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Input->getBuffer().size()); |
678 | if (!OutputOrErr) |
679 | return OutputOrErr.takeError(); |
680 | std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr); |
681 | copy(Range: Input->getBuffer(), Out: Output->getBufferStart()); |
682 | if (Error E = Output->commit()) |
683 | return E; |
684 | Files.emplace_back(Args: Args.MakeArgString(Str: *TempFileOrErr)); |
685 | } |
686 | |
687 | return Files; |
688 | } |
689 | |
690 | Error runNVLink(ArrayRef<StringRef> Files, const ArgList &Args) { |
691 | if (Args.hasArg(Ids: OPT_lto_emit_asm) || Args.hasArg(Ids: OPT_lto_emit_llvm)) |
692 | return Error::success(); |
693 | |
694 | std::string CudaPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str(); |
695 | Expected<std::string> NVLinkPath = |
696 | findProgram(Args, Name: "nvlink" , Paths: {CudaPath + "/bin" }); |
697 | if (!NVLinkPath) |
698 | return NVLinkPath.takeError(); |
699 | |
700 | if (!Args.hasArg(Ids: OPT_arch)) |
701 | return createStringError( |
702 | Fmt: "must pass in an explicit nvptx64 gpu architecture to 'nvlink'" ); |
703 | |
704 | ArgStringList NewLinkerArgs; |
705 | for (const opt::Arg *Arg : Args) { |
706 | // Do not forward arguments only intended for the linker wrapper. |
707 | if (Arg->getOption().hasFlag(Val: WrapperOnlyOption)) |
708 | continue; |
709 | |
710 | // Do not forward any inputs that we have processed. |
711 | if (Arg->getOption().matches(ID: OPT_INPUT) || |
712 | Arg->getOption().matches(ID: OPT_library)) |
713 | continue; |
714 | |
715 | Arg->render(Args, Output&: NewLinkerArgs); |
716 | } |
717 | |
718 | transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs), |
719 | F: [&](StringRef Arg) { return Args.MakeArgString(Str: Arg); }); |
720 | |
721 | SmallVector<StringRef> LinkerArgs({*NVLinkPath}); |
722 | if (!Args.hasArg(Ids: OPT_o)) |
723 | LinkerArgs.append(IL: {"-o" , "a.out" }); |
724 | for (StringRef Arg : NewLinkerArgs) |
725 | LinkerArgs.push_back(Elt: Arg); |
726 | |
727 | if (Args.hasArg(Ids: OPT_dry_run) || Args.hasArg(Ids: OPT_verbose)) |
728 | printCommands(CmdArgs: LinkerArgs); |
729 | if (Args.hasArg(Ids: OPT_dry_run)) |
730 | return Error::success(); |
731 | if (sys::ExecuteAndWait(Program: *NVLinkPath, Args: LinkerArgs)) |
732 | return createStringError(S: "'" + sys::path::filename(path: *NVLinkPath) + "'" + |
733 | " failed" ); |
734 | return Error::success(); |
735 | } |
736 | |
737 | } // namespace |
738 | |
739 | int main(int argc, char **argv) { |
740 | InitLLVM X(argc, argv); |
741 | InitializeAllTargetInfos(); |
742 | InitializeAllTargets(); |
743 | InitializeAllTargetMCs(); |
744 | InitializeAllAsmParsers(); |
745 | InitializeAllAsmPrinters(); |
746 | |
747 | Executable = argv[0]; |
748 | sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]); |
749 | |
750 | const OptTable &Tbl = getOptTable(); |
751 | BumpPtrAllocator Alloc; |
752 | StringSaver Saver(Alloc); |
753 | auto Args = Tbl.parseArgs(Argc: argc, Argv: argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) { |
754 | reportError(E: createStringError(EC: inconvertibleErrorCode(), S: Err)); |
755 | }); |
756 | |
757 | if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) { |
758 | Tbl.printHelp( |
759 | OS&: outs(), Usage: "clang-nvlink-wrapper [options] <options to passed to nvlink>" , |
760 | Title: "A utility that wraps around the NVIDIA 'nvlink' linker.\n" |
761 | "This enables static linking and LTO handling for NVPTX targets." , |
762 | ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden)); |
763 | return EXIT_SUCCESS; |
764 | } |
765 | |
766 | if (Args.hasArg(Ids: OPT_version)) |
767 | printVersion(OS&: outs()); |
768 | |
769 | // This forwards '-mllvm' arguments to LLVM if present. |
770 | SmallVector<const char *> NewArgv = {argv[0]}; |
771 | for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm)) |
772 | NewArgv.push_back(Elt: Arg->getValue()); |
773 | for (const opt::Arg *Arg : Args.filtered(Ids: OPT_plugin_opt)) |
774 | NewArgv.push_back(Elt: Arg->getValue()); |
775 | cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]); |
776 | |
777 | // Get the input files to pass to 'nvlink'. |
778 | auto FilesOrErr = getInput(Args); |
779 | if (!FilesOrErr) |
780 | reportError(E: FilesOrErr.takeError()); |
781 | |
782 | // Run 'nvlink' on the generated inputs. |
783 | if (Error Err = runNVLink(Files: *FilesOrErr, Args)) |
784 | reportError(E: std::move(Err)); |
785 | |
786 | // Remove the temporary files created. |
787 | if (!Args.hasArg(Ids: OPT_save_temps)) |
788 | for (const auto &TempFile : TempFiles) |
789 | if (std::error_code EC = sys::fs::remove(path: TempFile)) |
790 | reportError(E: createFileError(F: TempFile, EC)); |
791 | |
792 | return EXIT_SUCCESS; |
793 | } |
794 | |