1 | //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the "backend" phase of LTO, i.e. it performs |
10 | // optimization and code generation on a loaded module. It is generally used |
11 | // internally by the LTO class but can also be used independently, for example |
12 | // to implement a standalone ThinLTO backend. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/LTO/LTOBackend.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/Analysis/CGSCCPassManager.h" |
19 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
20 | #include "llvm/Analysis/TargetLibraryInfo.h" |
21 | #include "llvm/Bitcode/BitcodeReader.h" |
22 | #include "llvm/Bitcode/BitcodeWriter.h" |
23 | #include "llvm/IR/LLVMRemarkStreamer.h" |
24 | #include "llvm/IR/LegacyPassManager.h" |
25 | #include "llvm/IR/PassManager.h" |
26 | #include "llvm/IR/Verifier.h" |
27 | #include "llvm/LTO/LTO.h" |
28 | #include "llvm/MC/TargetRegistry.h" |
29 | #include "llvm/Object/ModuleSymbolTable.h" |
30 | #include "llvm/Passes/PassBuilder.h" |
31 | #include "llvm/Passes/PassPlugin.h" |
32 | #include "llvm/Passes/StandardInstrumentations.h" |
33 | #include "llvm/Support/Error.h" |
34 | #include "llvm/Support/FileSystem.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/Path.h" |
37 | #include "llvm/Support/Program.h" |
38 | #include "llvm/Support/ThreadPool.h" |
39 | #include "llvm/Support/ToolOutputFile.h" |
40 | #include "llvm/Support/VirtualFileSystem.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Target/TargetMachine.h" |
43 | #include "llvm/TargetParser/SubtargetFeature.h" |
44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
45 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
46 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
47 | #include "llvm/Transforms/Utils/SplitModule.h" |
48 | #include <optional> |
49 | |
50 | using namespace llvm; |
51 | using namespace lto; |
52 | |
53 | #define DEBUG_TYPE "lto-backend" |
54 | |
55 | enum class LTOBitcodeEmbedding { |
56 | DoNotEmbed = 0, |
57 | EmbedOptimized = 1, |
58 | EmbedPostMergePreOptimized = 2 |
59 | }; |
60 | |
61 | static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( |
62 | "lto-embed-bitcode" , cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed), |
63 | cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none" , |
64 | "Do not embed" ), |
65 | clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized" , |
66 | "Embed after all optimization passes" ), |
67 | clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, |
68 | "post-merge-pre-opt" , |
69 | "Embed post merge, but before optimizations" )), |
70 | cl::desc("Embed LLVM bitcode in object files produced by LTO" )); |
71 | |
72 | static cl::opt<bool> ThinLTOAssumeMerged( |
73 | "thinlto-assume-merged" , cl::init(Val: false), |
74 | cl::desc("Assume the input has already undergone ThinLTO function " |
75 | "importing and the other pre-optimization pipeline changes." )); |
76 | |
77 | namespace llvm { |
78 | extern cl::opt<bool> NoPGOWarnMismatch; |
79 | } |
80 | |
81 | [[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) { |
82 | errs() << "failed to open " << Path << ": " << Msg << '\n'; |
83 | errs().flush(); |
84 | exit(status: 1); |
85 | } |
86 | |
87 | Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, |
88 | const DenseSet<StringRef> &SaveTempsArgs) { |
89 | ShouldDiscardValueNames = false; |
90 | |
91 | std::error_code EC; |
92 | if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution" )) { |
93 | ResolutionFile = |
94 | std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt" , args&: EC, |
95 | args: sys::fs::OpenFlags::OF_TextWithCRLF); |
96 | if (EC) { |
97 | ResolutionFile.reset(); |
98 | return errorCodeToError(EC); |
99 | } |
100 | } |
101 | |
102 | auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { |
103 | // Keep track of the hook provided by the linker, which also needs to run. |
104 | ModuleHookFn LinkerHook = Hook; |
105 | Hook = [=](unsigned Task, const Module &M) { |
106 | // If the linker's hook returned false, we need to pass that result |
107 | // through. |
108 | if (LinkerHook && !LinkerHook(Task, M)) |
109 | return false; |
110 | |
111 | std::string PathPrefix; |
112 | // If this is the combined module (not a ThinLTO backend compile) or the |
113 | // user hasn't requested using the input module's path, emit to a file |
114 | // named from the provided OutputFileName with the Task ID appended. |
115 | if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { |
116 | PathPrefix = OutputFileName; |
117 | if (Task != (unsigned)-1) |
118 | PathPrefix += utostr(X: Task) + "." ; |
119 | } else |
120 | PathPrefix = M.getModuleIdentifier() + "." ; |
121 | std::string Path = PathPrefix + PathSuffix + ".bc" ; |
122 | std::error_code EC; |
123 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
124 | // Because -save-temps is a debugging feature, we report the error |
125 | // directly and exit. |
126 | if (EC) |
127 | reportOpenError(Path, Msg: EC.message()); |
128 | WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false); |
129 | return true; |
130 | }; |
131 | }; |
132 | |
133 | auto SaveCombinedIndex = |
134 | [=](const ModuleSummaryIndex &Index, |
135 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
136 | std::string Path = OutputFileName + "index.bc" ; |
137 | std::error_code EC; |
138 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
139 | // Because -save-temps is a debugging feature, we report the error |
140 | // directly and exit. |
141 | if (EC) |
142 | reportOpenError(Path, Msg: EC.message()); |
143 | writeIndexToFile(Index, Out&: OS); |
144 | |
145 | Path = OutputFileName + "index.dot" ; |
146 | raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_Text); |
147 | if (EC) |
148 | reportOpenError(Path, Msg: EC.message()); |
149 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols); |
150 | return true; |
151 | }; |
152 | |
153 | if (SaveTempsArgs.empty()) { |
154 | setHook("0.preopt" , PreOptModuleHook); |
155 | setHook("1.promote" , PostPromoteModuleHook); |
156 | setHook("2.internalize" , PostInternalizeModuleHook); |
157 | setHook("3.import" , PostImportModuleHook); |
158 | setHook("4.opt" , PostOptModuleHook); |
159 | setHook("5.precodegen" , PreCodeGenModuleHook); |
160 | CombinedIndexHook = SaveCombinedIndex; |
161 | } else { |
162 | if (SaveTempsArgs.contains(V: "preopt" )) |
163 | setHook("0.preopt" , PreOptModuleHook); |
164 | if (SaveTempsArgs.contains(V: "promote" )) |
165 | setHook("1.promote" , PostPromoteModuleHook); |
166 | if (SaveTempsArgs.contains(V: "internalize" )) |
167 | setHook("2.internalize" , PostInternalizeModuleHook); |
168 | if (SaveTempsArgs.contains(V: "import" )) |
169 | setHook("3.import" , PostImportModuleHook); |
170 | if (SaveTempsArgs.contains(V: "opt" )) |
171 | setHook("4.opt" , PostOptModuleHook); |
172 | if (SaveTempsArgs.contains(V: "precodegen" )) |
173 | setHook("5.precodegen" , PreCodeGenModuleHook); |
174 | if (SaveTempsArgs.contains(V: "combinedindex" )) |
175 | CombinedIndexHook = SaveCombinedIndex; |
176 | } |
177 | |
178 | return Error::success(); |
179 | } |
180 | |
181 | #define HANDLE_EXTENSION(Ext) \ |
182 | llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); |
183 | #include "llvm/Support/Extension.def" |
184 | |
185 | static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, |
186 | PassBuilder &PB) { |
187 | #define HANDLE_EXTENSION(Ext) \ |
188 | get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); |
189 | #include "llvm/Support/Extension.def" |
190 | |
191 | // Load requested pass plugins and let them register pass builder callbacks |
192 | for (auto &PluginFN : PassPlugins) { |
193 | auto PassPlugin = PassPlugin::Load(Filename: PluginFN); |
194 | if (!PassPlugin) |
195 | report_fatal_error(Err: PassPlugin.takeError(), /*gen_crash_diag=*/false); |
196 | PassPlugin->registerPassBuilderCallbacks(PB); |
197 | } |
198 | } |
199 | |
200 | static std::unique_ptr<TargetMachine> |
201 | createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { |
202 | StringRef TheTriple = M.getTargetTriple(); |
203 | SubtargetFeatures Features; |
204 | Features.getDefaultSubtargetFeatures(Triple: Triple(TheTriple)); |
205 | for (const std::string &A : Conf.MAttrs) |
206 | Features.AddFeature(String: A); |
207 | |
208 | std::optional<Reloc::Model> RelocModel; |
209 | if (Conf.RelocModel) |
210 | RelocModel = *Conf.RelocModel; |
211 | else if (M.getModuleFlag(Key: "PIC Level" )) |
212 | RelocModel = |
213 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
214 | |
215 | std::optional<CodeModel::Model> CodeModel; |
216 | if (Conf.CodeModel) |
217 | CodeModel = *Conf.CodeModel; |
218 | else |
219 | CodeModel = M.getCodeModel(); |
220 | |
221 | std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( |
222 | TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: Conf.Options, RM: RelocModel, |
223 | CM: CodeModel, OL: Conf.CGOptLevel)); |
224 | |
225 | assert(TM && "Failed to create target machine" ); |
226 | |
227 | if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold()) |
228 | TM->setLargeDataThreshold(*LargeDataThreshold); |
229 | |
230 | return TM; |
231 | } |
232 | |
233 | static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, |
234 | unsigned OptLevel, bool IsThinLTO, |
235 | ModuleSummaryIndex *ExportSummary, |
236 | const ModuleSummaryIndex *ImportSummary) { |
237 | auto FS = vfs::getRealFileSystem(); |
238 | std::optional<PGOOptions> PGOOpt; |
239 | if (!Conf.SampleProfile.empty()) |
240 | PGOOpt = PGOOptions(Conf.SampleProfile, "" , Conf.ProfileRemapping, |
241 | /*MemoryProfile=*/"" , FS, PGOOptions::SampleUse, |
242 | PGOOptions::NoCSAction, |
243 | PGOOptions::ColdFuncOpt::Default, true); |
244 | else if (Conf.RunCSIRInstr) { |
245 | PGOOpt = PGOOptions("" , Conf.CSIRProfile, Conf.ProfileRemapping, |
246 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
247 | PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default, |
248 | Conf.AddFSDiscriminator); |
249 | } else if (!Conf.CSIRProfile.empty()) { |
250 | PGOOpt = PGOOptions(Conf.CSIRProfile, "" , Conf.ProfileRemapping, |
251 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
252 | PGOOptions::CSIRUse, PGOOptions::ColdFuncOpt::Default, |
253 | Conf.AddFSDiscriminator); |
254 | NoPGOWarnMismatch = !Conf.PGOWarnMismatch; |
255 | } else if (Conf.AddFSDiscriminator) { |
256 | PGOOpt = PGOOptions("" , "" , "" , /*MemoryProfile=*/"" , nullptr, |
257 | PGOOptions::NoAction, PGOOptions::NoCSAction, |
258 | PGOOptions::ColdFuncOpt::Default, true); |
259 | } |
260 | TM->setPGOOption(PGOOpt); |
261 | |
262 | LoopAnalysisManager LAM; |
263 | FunctionAnalysisManager FAM; |
264 | CGSCCAnalysisManager CGAM; |
265 | ModuleAnalysisManager MAM; |
266 | |
267 | PassInstrumentationCallbacks PIC; |
268 | StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager, |
269 | Conf.VerifyEach); |
270 | SI.registerCallbacks(PIC, MAM: &MAM); |
271 | PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); |
272 | |
273 | RegisterPassPlugins(PassPlugins: Conf.PassPlugins, PB); |
274 | |
275 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
276 | new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); |
277 | if (Conf.Freestanding) |
278 | TLII->disableAllFunctions(); |
279 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
280 | |
281 | // Parse a custom AA pipeline if asked to. |
282 | if (!Conf.AAPipeline.empty()) { |
283 | AAManager AA; |
284 | if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) { |
285 | report_fatal_error(reason: Twine("unable to parse AA pipeline description '" ) + |
286 | Conf.AAPipeline + "': " + toString(E: std::move(Err))); |
287 | } |
288 | // Register the AA manager first so that our version is the one used. |
289 | FAM.registerPass(PassBuilder: [&] { return std::move(AA); }); |
290 | } |
291 | |
292 | // Register all the basic analyses with the managers. |
293 | PB.registerModuleAnalyses(MAM); |
294 | PB.registerCGSCCAnalyses(CGAM); |
295 | PB.registerFunctionAnalyses(FAM); |
296 | PB.registerLoopAnalyses(LAM); |
297 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
298 | |
299 | ModulePassManager MPM; |
300 | |
301 | if (!Conf.DisableVerify) |
302 | MPM.addPass(Pass: VerifierPass()); |
303 | |
304 | OptimizationLevel OL; |
305 | |
306 | switch (OptLevel) { |
307 | default: |
308 | llvm_unreachable("Invalid optimization level" ); |
309 | case 0: |
310 | OL = OptimizationLevel::O0; |
311 | break; |
312 | case 1: |
313 | OL = OptimizationLevel::O1; |
314 | break; |
315 | case 2: |
316 | OL = OptimizationLevel::O2; |
317 | break; |
318 | case 3: |
319 | OL = OptimizationLevel::O3; |
320 | break; |
321 | } |
322 | |
323 | // Parse a custom pipeline if asked to. |
324 | if (!Conf.OptPipeline.empty()) { |
325 | if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) { |
326 | report_fatal_error(reason: Twine("unable to parse pass pipeline description '" ) + |
327 | Conf.OptPipeline + "': " + toString(E: std::move(Err))); |
328 | } |
329 | } else if (IsThinLTO) { |
330 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary)); |
331 | } else { |
332 | MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary)); |
333 | } |
334 | |
335 | if (!Conf.DisableVerify) |
336 | MPM.addPass(Pass: VerifierPass()); |
337 | |
338 | MPM.run(IR&: Mod, AM&: MAM); |
339 | } |
340 | |
341 | bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, |
342 | bool IsThinLTO, ModuleSummaryIndex *ExportSummary, |
343 | const ModuleSummaryIndex *ImportSummary, |
344 | const std::vector<uint8_t> &CmdArgs) { |
345 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { |
346 | // FIXME: the motivation for capturing post-merge bitcode and command line |
347 | // is replicating the compilation environment from bitcode, without needing |
348 | // to understand the dependencies (the functions to be imported). This |
349 | // assumes a clang - based invocation, case in which we have the command |
350 | // line. |
351 | // It's not very clear how the above motivation would map in the |
352 | // linker-based case, so we currently don't plumb the command line args in |
353 | // that case. |
354 | if (CmdArgs.empty()) |
355 | LLVM_DEBUG( |
356 | dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " |
357 | "command line arguments are not available" ); |
358 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
359 | /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, |
360 | /*Cmdline*/ CmdArgs); |
361 | } |
362 | // FIXME: Plumb the combined index into the new pass manager. |
363 | runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary, |
364 | ImportSummary); |
365 | return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); |
366 | } |
367 | |
368 | static void codegen(const Config &Conf, TargetMachine *TM, |
369 | AddStreamFn AddStream, unsigned Task, Module &Mod, |
370 | const ModuleSummaryIndex &CombinedIndex) { |
371 | if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) |
372 | return; |
373 | |
374 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) |
375 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
376 | /*EmbedBitcode*/ true, |
377 | /*EmbedCmdline*/ false, |
378 | /*CmdArgs*/ std::vector<uint8_t>()); |
379 | |
380 | std::unique_ptr<ToolOutputFile> DwoOut; |
381 | SmallString<1024> DwoFile(Conf.SplitDwarfOutput); |
382 | if (!Conf.DwoDir.empty()) { |
383 | std::error_code EC; |
384 | if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir)) |
385 | report_fatal_error(reason: Twine("Failed to create directory " ) + Conf.DwoDir + |
386 | ": " + EC.message()); |
387 | |
388 | DwoFile = Conf.DwoDir; |
389 | sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo" ); |
390 | TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); |
391 | } else |
392 | TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; |
393 | |
394 | if (!DwoFile.empty()) { |
395 | std::error_code EC; |
396 | DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None); |
397 | if (EC) |
398 | report_fatal_error(reason: Twine("Failed to open " ) + DwoFile + ": " + |
399 | EC.message()); |
400 | } |
401 | |
402 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
403 | AddStream(Task, Mod.getModuleIdentifier()); |
404 | if (Error Err = StreamOrErr.takeError()) |
405 | report_fatal_error(Err: std::move(Err)); |
406 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
407 | TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; |
408 | |
409 | legacy::PassManager CodeGenPasses; |
410 | TargetLibraryInfoImpl TLII(Triple(Mod.getTargetTriple())); |
411 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
412 | CodeGenPasses.add( |
413 | P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex)); |
414 | if (Conf.PreCodeGenPassesHook) |
415 | Conf.PreCodeGenPassesHook(CodeGenPasses); |
416 | if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, |
417 | DwoOut ? &DwoOut->os() : nullptr, |
418 | Conf.CGFileType)) |
419 | report_fatal_error(reason: "Failed to setup codegen" ); |
420 | CodeGenPasses.run(M&: Mod); |
421 | |
422 | if (DwoOut) |
423 | DwoOut->keep(); |
424 | } |
425 | |
426 | static void splitCodeGen(const Config &C, TargetMachine *TM, |
427 | AddStreamFn AddStream, |
428 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
429 | const ModuleSummaryIndex &CombinedIndex) { |
430 | DefaultThreadPool CodegenThreadPool( |
431 | heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel)); |
432 | unsigned ThreadCount = 0; |
433 | const Target *T = &TM->getTarget(); |
434 | |
435 | const auto HandleModulePartition = |
436 | [&](std::unique_ptr<Module> MPart) { |
437 | // We want to clone the module in a new context to multi-thread the |
438 | // codegen. We do it by serializing partition modules to bitcode |
439 | // (while still on the main thread, in order to avoid data races) and |
440 | // spinning up new threads which deserialize the partitions into |
441 | // separate contexts. |
442 | // FIXME: Provide a more direct way to do this in LLVM. |
443 | SmallString<0> BC; |
444 | raw_svector_ostream BCOS(BC); |
445 | WriteBitcodeToFile(M: *MPart, Out&: BCOS); |
446 | |
447 | // Enqueue the task |
448 | CodegenThreadPool.async( |
449 | F: [&](const SmallString<0> &BC, unsigned ThreadId) { |
450 | LTOLLVMContext Ctx(C); |
451 | Expected<std::unique_ptr<Module>> MOrErr = |
452 | parseBitcodeFile(Buffer: MemoryBufferRef(BC.str(), "ld-temp.o" ), Context&: Ctx); |
453 | if (!MOrErr) |
454 | report_fatal_error(reason: "Failed to read bitcode" ); |
455 | std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); |
456 | |
457 | std::unique_ptr<TargetMachine> TM = |
458 | createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx); |
459 | |
460 | codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx, |
461 | CombinedIndex); |
462 | }, |
463 | // Pass BC using std::move to ensure that it get moved rather than |
464 | // copied into the thread's context. |
465 | ArgList: std::move(BC), ArgList: ThreadCount++); |
466 | }; |
467 | |
468 | // Try target-specific module splitting first, then fallback to the default. |
469 | if (!TM->splitModule(M&: Mod, NumParts: ParallelCodeGenParallelismLevel, |
470 | ModuleCallback: HandleModulePartition)) { |
471 | SplitModule(M&: Mod, N: ParallelCodeGenParallelismLevel, ModuleCallback: HandleModulePartition, |
472 | PreserveLocals: false); |
473 | } |
474 | |
475 | // Because the inner lambda (which runs in a worker thread) captures our local |
476 | // variables, we need to wait for the worker threads to terminate before we |
477 | // can leave the function scope. |
478 | CodegenThreadPool.wait(); |
479 | } |
480 | |
481 | static Expected<const Target *> initAndLookupTarget(const Config &C, |
482 | Module &Mod) { |
483 | if (!C.OverrideTriple.empty()) |
484 | Mod.setTargetTriple(C.OverrideTriple); |
485 | else if (Mod.getTargetTriple().empty()) |
486 | Mod.setTargetTriple(C.DefaultTriple); |
487 | |
488 | std::string Msg; |
489 | const Target *T = TargetRegistry::lookupTarget(Triple: Mod.getTargetTriple(), Error&: Msg); |
490 | if (!T) |
491 | return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode()); |
492 | return T; |
493 | } |
494 | |
495 | Error lto::( |
496 | std::unique_ptr<ToolOutputFile> DiagOutputFile) { |
497 | // Make sure we flush the diagnostic remarks file in case the linker doesn't |
498 | // call the global destructors before exiting. |
499 | if (!DiagOutputFile) |
500 | return Error::success(); |
501 | DiagOutputFile->keep(); |
502 | DiagOutputFile->os().flush(); |
503 | return Error::success(); |
504 | } |
505 | |
506 | Error lto::backend(const Config &C, AddStreamFn AddStream, |
507 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
508 | ModuleSummaryIndex &CombinedIndex) { |
509 | Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod); |
510 | if (!TOrErr) |
511 | return TOrErr.takeError(); |
512 | |
513 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod); |
514 | |
515 | LLVM_DEBUG(dbgs() << "Running regular LTO\n" ); |
516 | if (!C.CodeGenOnly) { |
517 | if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false, |
518 | /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, |
519 | /*CmdArgs*/ std::vector<uint8_t>())) |
520 | return Error::success(); |
521 | } |
522 | |
523 | if (ParallelCodeGenParallelismLevel == 1) { |
524 | codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex); |
525 | } else { |
526 | splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, |
527 | CombinedIndex); |
528 | } |
529 | return Error::success(); |
530 | } |
531 | |
532 | static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, |
533 | const ModuleSummaryIndex &Index) { |
534 | std::vector<GlobalValue*> DeadGVs; |
535 | for (auto &GV : Mod.global_values()) |
536 | if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID())) |
537 | if (!Index.isGlobalValueLive(GVS)) { |
538 | DeadGVs.push_back(x: &GV); |
539 | convertToDeclaration(GV); |
540 | } |
541 | |
542 | // Now that all dead bodies have been dropped, delete the actual objects |
543 | // themselves when possible. |
544 | for (GlobalValue *GV : DeadGVs) { |
545 | GV->removeDeadConstantUsers(); |
546 | // Might reference something defined in native object (i.e. dropped a |
547 | // non-prevailing IR def, but we need to keep the declaration). |
548 | if (GV->use_empty()) |
549 | GV->eraseFromParent(); |
550 | } |
551 | } |
552 | |
553 | Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, |
554 | Module &Mod, const ModuleSummaryIndex &CombinedIndex, |
555 | const FunctionImporter::ImportMapTy &ImportList, |
556 | const GVSummaryMapTy &DefinedGlobals, |
557 | MapVector<StringRef, BitcodeModule> *ModuleMap, |
558 | const std::vector<uint8_t> &CmdArgs) { |
559 | Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod); |
560 | if (!TOrErr) |
561 | return TOrErr.takeError(); |
562 | |
563 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod); |
564 | |
565 | // Setup optimization remarks. |
566 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
567 | Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, |
568 | RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold, |
569 | Count: Task); |
570 | if (!DiagFileOrErr) |
571 | return DiagFileOrErr.takeError(); |
572 | auto DiagnosticOutputFile = std::move(*DiagFileOrErr); |
573 | |
574 | // Set the partial sample profile ratio in the profile summary module flag of |
575 | // the module, if applicable. |
576 | Mod.setPartialSampleProfileRatio(CombinedIndex); |
577 | |
578 | LLVM_DEBUG(dbgs() << "Running ThinLTO\n" ); |
579 | if (Conf.CodeGenOnly) { |
580 | codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex); |
581 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
582 | } |
583 | |
584 | if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) |
585 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
586 | |
587 | auto OptimizeAndCodegen = |
588 | [&](Module &Mod, TargetMachine *TM, |
589 | std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { |
590 | if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, |
591 | /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, |
592 | CmdArgs)) |
593 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
594 | |
595 | codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); |
596 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
597 | }; |
598 | |
599 | if (ThinLTOAssumeMerged) |
600 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
601 | |
602 | // When linking an ELF shared object, dso_local should be dropped. We |
603 | // conservatively do this for -fpic. |
604 | bool ClearDSOLocalOnDeclarations = |
605 | TM->getTargetTriple().isOSBinFormatELF() && |
606 | TM->getRelocationModel() != Reloc::Static && |
607 | Mod.getPIELevel() == PIELevel::Default; |
608 | renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations); |
609 | |
610 | dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex); |
611 | |
612 | thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true); |
613 | |
614 | if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) |
615 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
616 | |
617 | if (!DefinedGlobals.empty()) |
618 | thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals); |
619 | |
620 | if (Conf.PostInternalizeModuleHook && |
621 | !Conf.PostInternalizeModuleHook(Task, Mod)) |
622 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
623 | |
624 | auto ModuleLoader = [&](StringRef Identifier) { |
625 | assert(Mod.getContext().isODRUniquingDebugTypes() && |
626 | "ODR Type uniquing should be enabled on the context" ); |
627 | if (ModuleMap) { |
628 | auto I = ModuleMap->find(Key: Identifier); |
629 | assert(I != ModuleMap->end()); |
630 | return I->second.getLazyModule(Context&: Mod.getContext(), |
631 | /*ShouldLazyLoadMetadata=*/true, |
632 | /*IsImporting*/ true); |
633 | } |
634 | |
635 | ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = |
636 | llvm::MemoryBuffer::getFile(Filename: Identifier); |
637 | if (!MBOrErr) |
638 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
639 | Args: Twine("Error loading imported file " ) + Identifier + " : " , |
640 | Args: MBOrErr.getError())); |
641 | |
642 | Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr); |
643 | if (!BMOrErr) |
644 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
645 | Args: Twine("Error loading imported file " ) + Identifier + " : " + |
646 | toString(E: BMOrErr.takeError()), |
647 | Args: inconvertibleErrorCode())); |
648 | |
649 | Expected<std::unique_ptr<Module>> MOrErr = |
650 | BMOrErr->getLazyModule(Context&: Mod.getContext(), |
651 | /*ShouldLazyLoadMetadata=*/true, |
652 | /*IsImporting*/ true); |
653 | if (MOrErr) |
654 | (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr)); |
655 | return MOrErr; |
656 | }; |
657 | |
658 | FunctionImporter Importer(CombinedIndex, ModuleLoader, |
659 | ClearDSOLocalOnDeclarations); |
660 | if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError()) |
661 | return Err; |
662 | |
663 | // Do this after any importing so that imported code is updated. |
664 | updateMemProfAttributes(Mod, Index: CombinedIndex); |
665 | updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility()); |
666 | |
667 | if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) |
668 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
669 | |
670 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
671 | } |
672 | |
673 | BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { |
674 | if (ThinLTOAssumeMerged && BMs.size() == 1) |
675 | return BMs.begin(); |
676 | |
677 | for (BitcodeModule &BM : BMs) { |
678 | Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); |
679 | if (LTOInfo && LTOInfo->IsThinLTO) |
680 | return &BM; |
681 | } |
682 | return nullptr; |
683 | } |
684 | |
685 | Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { |
686 | Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef); |
687 | if (!BMsOrErr) |
688 | return BMsOrErr.takeError(); |
689 | |
690 | // The bitcode file may contain multiple modules, we want the one that is |
691 | // marked as being the ThinLTO module. |
692 | if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr)) |
693 | return *Bm; |
694 | |
695 | return make_error<StringError>(Args: "Could not find module summary" , |
696 | Args: inconvertibleErrorCode()); |
697 | } |
698 | |
699 | bool lto::initImportList(const Module &M, |
700 | const ModuleSummaryIndex &CombinedIndex, |
701 | FunctionImporter::ImportMapTy &ImportList) { |
702 | if (ThinLTOAssumeMerged) |
703 | return true; |
704 | // We can simply import the values mentioned in the combined index, since |
705 | // we should only invoke this using the individual indexes written out |
706 | // via a WriteIndexesThinBackend. |
707 | for (const auto &GlobalList : CombinedIndex) { |
708 | // Ignore entries for undefined references. |
709 | if (GlobalList.second.SummaryList.empty()) |
710 | continue; |
711 | |
712 | auto GUID = GlobalList.first; |
713 | for (const auto &Summary : GlobalList.second.SummaryList) { |
714 | // Skip the summaries for the importing module. These are included to |
715 | // e.g. record required linkage changes. |
716 | if (Summary->modulePath() == M.getModuleIdentifier()) |
717 | continue; |
718 | // Add an entry to provoke importing by thinBackend. |
719 | // Try emplace the entry first. If an entry with the same key already |
720 | // exists, set the value to 'std::min(existing-value, new-value)' to make |
721 | // sure a definition takes precedence over a declaration. |
722 | auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace( |
723 | k: GUID, args: Summary->importType()); |
724 | |
725 | if (!Inserted) |
726 | Iter->second = std::min(a: Iter->second, b: Summary->importType()); |
727 | } |
728 | } |
729 | return true; |
730 | } |
731 | |