1//===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the "backend" phase of LTO, i.e. it performs
10// optimization and code generation on a loaded module. It is generally used
11// internally by the LTO class but can also be used independently, for example
12// to implement a standalone ThinLTO backend.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/LTO/LTOBackend.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/CGSCCPassManager.h"
19#include "llvm/Analysis/ModuleSummaryAnalysis.h"
20#include "llvm/Analysis/RuntimeLibcallInfo.h"
21#include "llvm/Analysis/TargetLibraryInfo.h"
22#include "llvm/Bitcode/BitcodeReader.h"
23#include "llvm/Bitcode/BitcodeWriter.h"
24#include "llvm/CGData/CodeGenData.h"
25#include "llvm/IR/LLVMRemarkStreamer.h"
26#include "llvm/IR/LegacyPassManager.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/IR/Verifier.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/MC/TargetRegistry.h"
31#include "llvm/Object/ModuleSymbolTable.h"
32#include "llvm/Passes/PassBuilder.h"
33#include "llvm/Passes/StandardInstrumentations.h"
34#include "llvm/Plugins/PassPlugin.h"
35#include "llvm/Support/Error.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/MemoryBuffer.h"
38#include "llvm/Support/Path.h"
39#include "llvm/Support/ThreadPool.h"
40#include "llvm/Support/ToolOutputFile.h"
41#include "llvm/Support/VirtualFileSystem.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/Target/TargetMachine.h"
44#include "llvm/TargetParser/SubtargetFeature.h"
45#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
46#include "llvm/Transforms/Utils/FunctionImportUtils.h"
47#include "llvm/Transforms/Utils/SplitModule.h"
48#include <optional>
49
50using namespace llvm;
51using namespace lto;
52
53#define DEBUG_TYPE "lto-backend"
54
55enum class LTOBitcodeEmbedding {
56 DoNotEmbed = 0,
57 EmbedOptimized = 1,
58 EmbedPostMergePreOptimized = 2
59};
60
61static cl::opt<LTOBitcodeEmbedding> EmbedBitcode(
62 "lto-embed-bitcode", cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed),
63 cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none",
64 "Do not embed"),
65 clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized",
66 "Embed after all optimization passes"),
67 clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized,
68 "post-merge-pre-opt",
69 "Embed post merge, but before optimizations")),
70 cl::desc("Embed LLVM bitcode in object files produced by LTO"));
71
72static cl::opt<bool> ThinLTOAssumeMerged(
73 "thinlto-assume-merged", cl::init(Val: false),
74 cl::desc("Assume the input has already undergone ThinLTO function "
75 "importing and the other pre-optimization pipeline changes."));
76
77static cl::list<std::string>
78 SaveModulesList("filter-save-modules", cl::value_desc("module names"),
79 cl::desc("Only save bitcode for module whose name without "
80 "path matches this for -save-temps options"),
81 cl::CommaSeparated, cl::Hidden);
82
83namespace llvm {
84extern cl::opt<bool> NoPGOWarnMismatch;
85}
86
87[[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) {
88 errs() << "failed to open " << Path << ": " << Msg << '\n';
89 errs().flush();
90 exit(status: 1);
91}
92
93Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath,
94 const DenseSet<StringRef> &SaveTempsArgs) {
95 ShouldDiscardValueNames = false;
96
97 std::error_code EC;
98 if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution")) {
99 ResolutionFile =
100 std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt", args&: EC,
101 args: sys::fs::OpenFlags::OF_TextWithCRLF);
102 if (EC) {
103 ResolutionFile.reset();
104 return errorCodeToError(EC);
105 }
106 }
107
108 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
109 // Keep track of the hook provided by the linker, which also needs to run.
110 ModuleHookFn LinkerHook = Hook;
111 Hook = [=, SaveModNames = llvm::SmallVector<std::string, 1>(
112 SaveModulesList.begin(), SaveModulesList.end())](
113 unsigned Task, const Module &M) {
114 // If SaveModulesList is not empty, only do save-temps if the module's
115 // filename (without path) matches a name in the list.
116 if (!SaveModNames.empty() &&
117 !llvm::is_contained(
118 Range: SaveModNames,
119 Element: std::string(llvm::sys::path::filename(path: M.getName()))))
120 return false;
121
122 // If the linker's hook returned false, we need to pass that result
123 // through.
124 if (LinkerHook && !LinkerHook(Task, M))
125 return false;
126
127 std::string PathPrefix;
128 // If this is the combined module (not a ThinLTO backend compile) or the
129 // user hasn't requested using the input module's path, emit to a file
130 // named from the provided OutputFileName with the Task ID appended.
131 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
132 PathPrefix = OutputFileName;
133 if (Task != (unsigned)-1)
134 PathPrefix += utostr(X: Task) + ".";
135 } else
136 PathPrefix = M.getModuleIdentifier() + ".";
137 std::string Path = PathPrefix + PathSuffix + ".bc";
138 std::error_code EC;
139 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
140 // Because -save-temps is a debugging feature, we report the error
141 // directly and exit.
142 if (EC)
143 reportOpenError(Path, Msg: EC.message());
144 WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false);
145 return true;
146 };
147 };
148
149 auto SaveCombinedIndex =
150 [=](const ModuleSummaryIndex &Index,
151 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
152 std::string Path = OutputFileName + "index.bc";
153 std::error_code EC;
154 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
155 // Because -save-temps is a debugging feature, we report the error
156 // directly and exit.
157 if (EC)
158 reportOpenError(Path, Msg: EC.message());
159 writeIndexToFile(Index, Out&: OS);
160
161 Path = OutputFileName + "index.dot";
162 raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_Text);
163 if (EC)
164 reportOpenError(Path, Msg: EC.message());
165 Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols);
166 return true;
167 };
168
169 if (SaveTempsArgs.empty()) {
170 setHook("0.preopt", PreOptModuleHook);
171 setHook("1.promote", PostPromoteModuleHook);
172 setHook("2.internalize", PostInternalizeModuleHook);
173 setHook("3.import", PostImportModuleHook);
174 setHook("4.opt", PostOptModuleHook);
175 setHook("5.precodegen", PreCodeGenModuleHook);
176 CombinedIndexHook = SaveCombinedIndex;
177 } else {
178 if (SaveTempsArgs.contains(V: "preopt"))
179 setHook("0.preopt", PreOptModuleHook);
180 if (SaveTempsArgs.contains(V: "promote"))
181 setHook("1.promote", PostPromoteModuleHook);
182 if (SaveTempsArgs.contains(V: "internalize"))
183 setHook("2.internalize", PostInternalizeModuleHook);
184 if (SaveTempsArgs.contains(V: "import"))
185 setHook("3.import", PostImportModuleHook);
186 if (SaveTempsArgs.contains(V: "opt"))
187 setHook("4.opt", PostOptModuleHook);
188 if (SaveTempsArgs.contains(V: "precodegen"))
189 setHook("5.precodegen", PreCodeGenModuleHook);
190 if (SaveTempsArgs.contains(V: "combinedindex"))
191 CombinedIndexHook = SaveCombinedIndex;
192 }
193
194 return Error::success();
195}
196
197#define HANDLE_EXTENSION(Ext) \
198 llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
199#include "llvm/Support/Extension.def"
200#undef HANDLE_EXTENSION
201
202static void RegisterPassPlugins(const Config &Conf, PassBuilder &PB) {
203#define HANDLE_EXTENSION(Ext) \
204 get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
205#include "llvm/Support/Extension.def"
206#undef HANDLE_EXTENSION
207
208 // Load requested pass plugins and let them register pass builder callbacks
209 for (auto &PluginFN : Conf.PassPluginFilenames) {
210 auto PassPlugin = PassPlugin::Load(Filename: PluginFN);
211 if (!PassPlugin)
212 reportFatalUsageError(Err: PassPlugin.takeError());
213 PassPlugin->registerPassBuilderCallbacks(PB);
214 }
215
216 // Register already loaded plugins
217 for (auto *LoadedPlugin : Conf.LoadedPassPlugins)
218 LoadedPlugin->registerPassBuilderCallbacks(PB);
219}
220
221static std::unique_ptr<TargetMachine>
222createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
223 const Triple &TheTriple = M.getTargetTriple();
224 SubtargetFeatures Features;
225 Features.getDefaultSubtargetFeatures(Triple: TheTriple);
226 for (const std::string &A : Conf.MAttrs)
227 Features.AddFeature(String: A);
228
229 std::optional<Reloc::Model> RelocModel;
230 if (Conf.RelocModel)
231 RelocModel = *Conf.RelocModel;
232 else if (M.getModuleFlag(Key: "PIC Level"))
233 RelocModel =
234 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
235
236 std::optional<CodeModel::Model> CodeModel;
237 if (Conf.CodeModel)
238 CodeModel = *Conf.CodeModel;
239 else
240 CodeModel = M.getCodeModel();
241
242 TargetOptions TargetOpts = Conf.Options;
243 if (TargetOpts.MCOptions.ABIName.empty()) {
244 TargetOpts.MCOptions.ABIName = M.getTargetABIFromMD();
245 }
246
247 std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
248 TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: TargetOpts, RM: RelocModel,
249 CM: CodeModel, OL: Conf.CGOptLevel));
250
251 assert(TM && "Failed to create target machine");
252
253 if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold())
254 TM->setLargeDataThreshold(*LargeDataThreshold);
255
256 return TM;
257}
258
259static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
260 unsigned OptLevel, bool IsThinLTO,
261 ModuleSummaryIndex *ExportSummary,
262 const ModuleSummaryIndex *ImportSummary,
263 const DenseSet<StringRef> &BitcodeLibFuncs) {
264 std::optional<PGOOptions> PGOOpt;
265 if (!Conf.SampleProfile.empty())
266 PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
267 /*MemoryProfile=*/"", PGOOptions::SampleUse,
268 PGOOptions::NoCSAction,
269 PGOOptions::ColdFuncOpt::Default, true);
270 else if (Conf.RunCSIRInstr) {
271 PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
272 /*MemoryProfile=*/"", PGOOptions::IRUse,
273 PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default,
274 Conf.AddFSDiscriminator);
275 } else if (!Conf.CSIRProfile.empty()) {
276 PGOOpt =
277 PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
278 /*MemoryProfile=*/"", PGOOptions::IRUse, PGOOptions::CSIRUse,
279 PGOOptions::ColdFuncOpt::Default, Conf.AddFSDiscriminator);
280 NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
281 } else if (Conf.AddFSDiscriminator) {
282 PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", PGOOptions::NoAction,
283 PGOOptions::NoCSAction,
284 PGOOptions::ColdFuncOpt::Default, true);
285 }
286 TM->setPGOOption(PGOOpt);
287
288 LoopAnalysisManager LAM;
289 FunctionAnalysisManager FAM;
290 CGSCCAnalysisManager CGAM;
291 ModuleAnalysisManager MAM;
292
293 PassInstrumentationCallbacks PIC;
294 StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager,
295 Conf.VerifyEach);
296 SI.registerCallbacks(PIC, MAM: &MAM);
297 PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
298
299 RegisterPassPlugins(Conf, PB);
300
301 std::unique_ptr<TargetLibraryInfoImpl> TLII(
302 new TargetLibraryInfoImpl(TM->getTargetTriple(), TM->Options.VecLib));
303 if (Conf.Freestanding)
304 TLII->disableAllFunctions();
305
306 // Determine whether or not its safe to emit calls to each libfunc. Libfuncs
307 // that might have been present in the current LTO unit, but are not, have
308 // lost their only opportunity to be defined, and calls must not be emitted to
309 // them.
310 // FIXME: BitcodeLibFuncs isn't yet set for distributed ThinLTO.
311 TargetLibraryInfo TLI(*TLII);
312 for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs); I != E;
313 ++I) {
314 LibFunc F = static_cast<LibFunc>(I);
315 if (BitcodeLibFuncs.contains(V: TLI.getName(F)))
316 TLII->setUnavailable(F);
317 }
318
319 FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); });
320
321 // Parse a custom AA pipeline if asked to.
322 if (!Conf.AAPipeline.empty()) {
323 AAManager AA;
324 if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) {
325 report_fatal_error(reason: Twine("unable to parse AA pipeline description '") +
326 Conf.AAPipeline + "': " + toString(E: std::move(Err)));
327 }
328 // Register the AA manager first so that our version is the one used.
329 FAM.registerPass(PassBuilder: [&] { return std::move(AA); });
330 }
331
332 // Register all the basic analyses with the managers.
333 PB.registerModuleAnalyses(MAM);
334 PB.registerCGSCCAnalyses(CGAM);
335 PB.registerFunctionAnalyses(FAM);
336 PB.registerLoopAnalyses(LAM);
337 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
338
339 ModulePassManager MPM;
340
341 if (!Conf.DisableVerify)
342 MPM.addPass(Pass: VerifierPass());
343
344 OptimizationLevel OL;
345
346 switch (OptLevel) {
347 default:
348 llvm_unreachable("Invalid optimization level");
349 case 0:
350 OL = OptimizationLevel::O0;
351 break;
352 case 1:
353 OL = OptimizationLevel::O1;
354 break;
355 case 2:
356 OL = OptimizationLevel::O2;
357 break;
358 case 3:
359 OL = OptimizationLevel::O3;
360 break;
361 }
362
363 // Parse a custom pipeline if asked to.
364 if (!Conf.OptPipeline.empty()) {
365 if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) {
366 report_fatal_error(reason: Twine("unable to parse pass pipeline description '") +
367 Conf.OptPipeline + "': " + toString(E: std::move(Err)));
368 }
369 } else if (IsThinLTO) {
370 MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary));
371 } else {
372 MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary));
373 }
374
375 if (!Conf.DisableVerify)
376 MPM.addPass(Pass: VerifierPass());
377
378 if (PrintPipelinePasses) {
379 std::string PipelineStr;
380 raw_string_ostream OS(PipelineStr);
381 MPM.printPipeline(OS, MapClassName2PassName: [&PIC](StringRef ClassName) {
382 auto PassName = PIC.getPassNameForClassName(ClassName);
383 return PassName.empty() ? ClassName : PassName;
384 });
385 outs() << "pipeline-passes: " << PipelineStr << '\n';
386 }
387
388 MPM.run(IR&: Mod, AM&: MAM);
389}
390
391static bool isEmptyModule(const Module &Mod) {
392 // Module is empty if it has no functions, no globals, no inline asm and no
393 // named metadata (aliases and ifuncs require functions or globals so we
394 // don't need to check those explicitly).
395 return Mod.empty() && Mod.global_empty() && Mod.named_metadata_empty() &&
396 Mod.getModuleInlineAsm().empty();
397}
398
399bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
400 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
401 const ModuleSummaryIndex *ImportSummary,
402 const std::vector<uint8_t> &CmdArgs,
403 ArrayRef<StringRef> BitcodeLibFuncs) {
404 llvm::TimeTraceScope timeScope("opt");
405 if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
406 // FIXME: the motivation for capturing post-merge bitcode and command line
407 // is replicating the compilation environment from bitcode, without needing
408 // to understand the dependencies (the functions to be imported). This
409 // assumes a clang - based invocation, case in which we have the command
410 // line.
411 // It's not very clear how the above motivation would map in the
412 // linker-based case, so we currently don't plumb the command line args in
413 // that case.
414 if (CmdArgs.empty())
415 LLVM_DEBUG(
416 dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
417 "command line arguments are not available");
418 llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(),
419 /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
420 /*Cmdline*/ CmdArgs);
421 }
422 // No need to run any opt passes if the module is empty.
423 // In theory these passes should take almost no time for an empty
424 // module, however, this guards against doing any unnecessary summary-based
425 // analysis in the case of a ThinLTO build where this might be an empty
426 // regular LTO combined module, with a large combined index from ThinLTO.
427 if (!isEmptyModule(Mod)) {
428 DenseSet<StringRef> BitcodeLibFuncsSet(BitcodeLibFuncs.begin(),
429 BitcodeLibFuncs.end());
430 // FIXME: Plumb the combined index into the new pass manager.
431 runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary,
432 ImportSummary, BitcodeLibFuncs: BitcodeLibFuncsSet);
433 }
434 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
435}
436
437static void codegen(const Config &Conf, TargetMachine *TM,
438 AddStreamFn AddStream, unsigned Task, Module &Mod,
439 const ModuleSummaryIndex &CombinedIndex) {
440 llvm::TimeTraceScope timeScope("codegen");
441 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
442 return;
443
444 if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
445 llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(),
446 /*EmbedBitcode*/ true,
447 /*EmbedCmdline*/ false,
448 /*CmdArgs*/ std::vector<uint8_t>());
449
450 std::unique_ptr<ToolOutputFile> DwoOut;
451 SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
452 if (!Conf.DwoDir.empty()) {
453 std::error_code EC;
454 if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir))
455 report_fatal_error(reason: Twine("Failed to create directory ") + Conf.DwoDir +
456 ": " + EC.message());
457
458 DwoFile = Conf.DwoDir;
459 sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo");
460 TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile);
461 } else
462 TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile;
463
464 if (!DwoFile.empty()) {
465 std::error_code EC;
466 DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None);
467 if (EC)
468 report_fatal_error(reason: Twine("Failed to open ") + DwoFile + ": " +
469 EC.message());
470 }
471
472 Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
473 AddStream(Task, Mod.getModuleIdentifier());
474 if (Error Err = StreamOrErr.takeError())
475 report_fatal_error(Err: std::move(Err));
476 std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
477 TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName;
478
479 // Create the codegen pipeline in its own scope so it gets deleted before
480 // Stream->commit() is called. The commit function of CacheStream deletes
481 // the raw stream, which is too early as streamers (e.g. MCAsmStreamer)
482 // keep the pointer and may use it until their destruction. See #138194.
483 {
484 legacy::PassManager CodeGenPasses;
485 TargetLibraryInfoImpl TLII(Mod.getTargetTriple(), TM->Options.VecLib);
486 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
487 CodeGenPasses.add(P: new RuntimeLibraryInfoWrapper(
488 Mod.getTargetTriple(), TM->Options.ExceptionModel,
489 TM->Options.FloatABIType, TM->Options.EABIVersion,
490 TM->Options.MCOptions.ABIName, TM->Options.VecLib));
491
492 // No need to make index available if the module is empty.
493 // In theory these passes should not use the index for an empty
494 // module, however, this guards against doing any unnecessary summary-based
495 // analysis in the case of a ThinLTO build where this might be an empty
496 // regular LTO combined module, with a large combined index from ThinLTO.
497 if (!isEmptyModule(Mod))
498 CodeGenPasses.add(
499 P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex));
500 if (Conf.PreCodeGenPassesHook)
501 Conf.PreCodeGenPassesHook(CodeGenPasses);
502 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
503 DwoOut ? &DwoOut->os() : nullptr,
504 Conf.CGFileType))
505 report_fatal_error(reason: "Failed to setup codegen");
506 CodeGenPasses.run(M&: Mod);
507
508 if (DwoOut)
509 DwoOut->keep();
510 }
511
512 if (Error Err = Stream->commit())
513 report_fatal_error(Err: std::move(Err));
514}
515
516static void splitCodeGen(const Config &C, TargetMachine *TM,
517 AddStreamFn AddStream,
518 unsigned ParallelCodeGenParallelismLevel, Module &Mod,
519 const ModuleSummaryIndex &CombinedIndex) {
520 DefaultThreadPool CodegenThreadPool(
521 heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel));
522 unsigned ThreadCount = 0;
523 const Target *T = &TM->getTarget();
524
525 const auto HandleModulePartition =
526 [&](std::unique_ptr<Module> MPart) {
527 // We want to clone the module in a new context to multi-thread the
528 // codegen. We do it by serializing partition modules to bitcode
529 // (while still on the main thread, in order to avoid data races) and
530 // spinning up new threads which deserialize the partitions into
531 // separate contexts.
532 // FIXME: Provide a more direct way to do this in LLVM.
533 SmallString<0> BC;
534 raw_svector_ostream BCOS(BC);
535 WriteBitcodeToFile(M: *MPart, Out&: BCOS);
536
537 // Enqueue the task
538 CodegenThreadPool.async(
539 F: [&](const SmallString<0> &BC, unsigned ThreadId) {
540 LTOLLVMContext Ctx(C);
541 Expected<std::unique_ptr<Module>> MOrErr =
542 parseBitcodeFile(Buffer: MemoryBufferRef(BC.str(), "ld-temp.o"), Context&: Ctx);
543 if (!MOrErr)
544 report_fatal_error(reason: "Failed to read bitcode");
545 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
546
547 std::unique_ptr<TargetMachine> TM =
548 createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx);
549
550 codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx,
551 CombinedIndex);
552 },
553 // Pass BC using std::move to ensure that it get moved rather than
554 // copied into the thread's context.
555 ArgList: std::move(BC), ArgList: ThreadCount++);
556 };
557
558 // Try target-specific module splitting first, then fallback to the default.
559 if (!TM->splitModule(M&: Mod, NumParts: ParallelCodeGenParallelismLevel,
560 ModuleCallback: HandleModulePartition)) {
561 SplitModule(M&: Mod, N: ParallelCodeGenParallelismLevel, ModuleCallback: HandleModulePartition,
562 PreserveLocals: false);
563 }
564
565 // Because the inner lambda (which runs in a worker thread) captures our local
566 // variables, we need to wait for the worker threads to terminate before we
567 // can leave the function scope.
568 CodegenThreadPool.wait();
569}
570
571static Expected<const Target *> initAndLookupTarget(const Config &C,
572 Module &Mod) {
573 if (!C.OverrideTriple.empty())
574 Mod.setTargetTriple(Triple(C.OverrideTriple));
575 else if (Mod.getTargetTriple().empty())
576 Mod.setTargetTriple(Triple(C.DefaultTriple));
577
578 std::string Msg;
579 const Target *T = TargetRegistry::lookupTarget(TheTriple: Mod.getTargetTriple(), Error&: Msg);
580 if (!T)
581 return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode());
582 return T;
583}
584
585Error lto::finalizeOptimizationRemarks(LLVMRemarkFileHandle DiagOutputFile) {
586 // Make sure we flush the diagnostic remarks file in case the linker doesn't
587 // call the global destructors before exiting.
588 if (!DiagOutputFile)
589 return Error::success();
590 DiagOutputFile.finalize();
591 DiagOutputFile->keep();
592 DiagOutputFile->os().flush();
593 return Error::success();
594}
595
596Error lto::backend(const Config &C, AddStreamFn AddStream,
597 unsigned ParallelCodeGenParallelismLevel, Module &Mod,
598 ModuleSummaryIndex &CombinedIndex,
599 ArrayRef<StringRef> BitcodeLibFuncs) {
600 llvm::TimeTraceScope timeScope("LTO backend");
601 Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod);
602 if (!TOrErr)
603 return TOrErr.takeError();
604
605 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod);
606
607 LLVM_DEBUG(dbgs() << "Running regular LTO\n");
608 if (!C.CodeGenOnly) {
609 if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false,
610 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
611 /*CmdArgs*/ std::vector<uint8_t>(), BitcodeLibFuncs))
612 return Error::success();
613 }
614
615 if (ParallelCodeGenParallelismLevel == 1) {
616 codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex);
617 } else {
618 splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod,
619 CombinedIndex);
620 }
621 return Error::success();
622}
623
624static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
625 const ModuleSummaryIndex &Index) {
626 llvm::TimeTraceScope timeScope("Drop dead symbols");
627 std::vector<GlobalValue*> DeadGVs;
628 for (auto &GV : Mod.global_values())
629 if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID()))
630 if (!Index.isGlobalValueLive(GVS)) {
631 DeadGVs.push_back(x: &GV);
632 convertToDeclaration(GV);
633 }
634
635 // Now that all dead bodies have been dropped, delete the actual objects
636 // themselves when possible.
637 for (GlobalValue *GV : DeadGVs) {
638 GV->removeDeadConstantUsers();
639 // Might reference something defined in native object (i.e. dropped a
640 // non-prevailing IR def, but we need to keep the declaration).
641 if (GV->use_empty())
642 GV->eraseFromParent();
643 }
644}
645
646Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
647 Module &Mod, const ModuleSummaryIndex &CombinedIndex,
648 const FunctionImporter::ImportMapTy &ImportList,
649 const GVSummaryMapTy &DefinedGlobals,
650 MapVector<StringRef, BitcodeModule> *ModuleMap,
651 bool CodeGenOnly, ArrayRef<StringRef> BitcodeLibFuncs,
652 AddStreamFn IRAddStream,
653 const std::vector<uint8_t> &CmdArgs) {
654 llvm::TimeTraceScope timeScope("Thin backend", Mod.getModuleIdentifier());
655 Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod);
656 if (!TOrErr)
657 return TOrErr.takeError();
658
659 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod);
660
661 // Setup optimization remarks.
662 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
663 Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses,
664 RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold,
665 Count: Task);
666 if (!DiagFileOrErr)
667 return DiagFileOrErr.takeError();
668 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
669
670 // Set the partial sample profile ratio in the profile summary module flag of
671 // the module, if applicable.
672 Mod.setPartialSampleProfileRatio(CombinedIndex);
673
674 LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
675 if (CodeGenOnly) {
676 // If CodeGenOnly is set, we only perform code generation and skip
677 // optimization. This value may differ from Conf.CodeGenOnly.
678 codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex);
679 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
680 }
681
682 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
683 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
684
685 auto OptimizeAndCodegen =
686 [&](Module &Mod, TargetMachine *TM,
687 LLVMRemarkFileHandle DiagnosticOutputFile) {
688 // Perform optimization and code generation for ThinLTO.
689 if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
690 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
691 CmdArgs, BitcodeLibFuncs))
692 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
693
694 // Save the current module before the first codegen round.
695 // Note that the second codegen round runs only `codegen()` without
696 // running `opt()`. We're not reaching here as it's bailed out earlier
697 // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
698 if (IRAddStream)
699 cgdata::saveModuleForTwoRounds(TheModule: Mod, Task, AddStream: IRAddStream);
700
701 codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
702 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
703 };
704
705 if (ThinLTOAssumeMerged)
706 return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
707
708 // When linking an ELF shared object, dso_local should be dropped. We
709 // conservatively do this for -fpic.
710 bool ClearDSOLocalOnDeclarations =
711 TM->getTargetTriple().isOSBinFormatELF() &&
712 TM->getRelocationModel() != Reloc::Static &&
713 Mod.getPIELevel() == PIELevel::Default;
714 renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations);
715
716 dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex);
717
718 thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true);
719
720 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
721 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
722
723 if (!DefinedGlobals.empty())
724 thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals);
725
726 if (Conf.PostInternalizeModuleHook &&
727 !Conf.PostInternalizeModuleHook(Task, Mod))
728 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
729
730 auto ModuleLoader = [&](StringRef Identifier) {
731 llvm::TimeTraceScope moduleLoaderScope("Module loader", Identifier);
732 assert(Mod.getContext().isODRUniquingDebugTypes() &&
733 "ODR Type uniquing should be enabled on the context");
734 if (ModuleMap) {
735 auto I = ModuleMap->find(Key: Identifier);
736 assert(I != ModuleMap->end());
737 return I->second.getLazyModule(Context&: Mod.getContext(),
738 /*ShouldLazyLoadMetadata=*/true,
739 /*IsImporting*/ true);
740 }
741
742 ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr =
743 llvm::MemoryBuffer::getFile(Filename: Identifier);
744 if (!MBOrErr)
745 return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>(
746 Args: Twine("Error loading imported file ") + Identifier + " : ",
747 Args: MBOrErr.getError()));
748
749 Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr);
750 if (!BMOrErr)
751 return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>(
752 Args: Twine("Error loading imported file ") + Identifier + " : " +
753 toString(E: BMOrErr.takeError()),
754 Args: inconvertibleErrorCode()));
755
756 Expected<std::unique_ptr<Module>> MOrErr =
757 BMOrErr->getLazyModule(Context&: Mod.getContext(),
758 /*ShouldLazyLoadMetadata=*/true,
759 /*IsImporting*/ true);
760 if (MOrErr)
761 (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr));
762 return MOrErr;
763 };
764
765 {
766 llvm::TimeTraceScope importScope("Import functions");
767 FunctionImporter Importer(CombinedIndex, ModuleLoader,
768 ClearDSOLocalOnDeclarations);
769 if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError())
770 return Err;
771 }
772
773 // Do this after any importing so that imported code is updated.
774 updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility());
775
776 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
777 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
778
779 return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
780}
781
782BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
783 if (ThinLTOAssumeMerged && BMs.size() == 1)
784 return BMs.begin();
785
786 for (BitcodeModule &BM : BMs) {
787 Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
788 if (LTOInfo && LTOInfo->IsThinLTO)
789 return &BM;
790 }
791 return nullptr;
792}
793
794Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) {
795 Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef);
796 if (!BMsOrErr)
797 return BMsOrErr.takeError();
798
799 // The bitcode file may contain multiple modules, we want the one that is
800 // marked as being the ThinLTO module.
801 if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr))
802 return *Bm;
803
804 return make_error<StringError>(Args: "Could not find module summary",
805 Args: inconvertibleErrorCode());
806}
807
808bool lto::initImportList(const Module &M,
809 const ModuleSummaryIndex &CombinedIndex,
810 FunctionImporter::ImportMapTy &ImportList) {
811 if (ThinLTOAssumeMerged)
812 return true;
813 // We can simply import the values mentioned in the combined index, since
814 // we should only invoke this using the individual indexes written out
815 // via a WriteIndexesThinBackend.
816 for (const auto &GlobalList : CombinedIndex) {
817 // Ignore entries for undefined references.
818 if (GlobalList.second.getSummaryList().empty())
819 continue;
820
821 auto GUID = GlobalList.first;
822 for (const auto &Summary : GlobalList.second.getSummaryList()) {
823 // Skip the summaries for the importing module. These are included to
824 // e.g. record required linkage changes.
825 if (Summary->modulePath() == M.getModuleIdentifier())
826 continue;
827 // Add an entry to provoke importing by thinBackend.
828 ImportList.addGUID(FromModule: Summary->modulePath(), GUID, ImportKind: Summary->importType());
829 }
830 }
831 return true;
832}
833