1 | //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the "backend" phase of LTO, i.e. it performs |
10 | // optimization and code generation on a loaded module. It is generally used |
11 | // internally by the LTO class but can also be used independently, for example |
12 | // to implement a standalone ThinLTO backend. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/LTO/LTOBackend.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/Analysis/CGSCCPassManager.h" |
19 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
20 | #include "llvm/Analysis/TargetLibraryInfo.h" |
21 | #include "llvm/Bitcode/BitcodeReader.h" |
22 | #include "llvm/Bitcode/BitcodeWriter.h" |
23 | #include "llvm/CGData/CodeGenData.h" |
24 | #include "llvm/IR/LLVMRemarkStreamer.h" |
25 | #include "llvm/IR/LegacyPassManager.h" |
26 | #include "llvm/IR/PassManager.h" |
27 | #include "llvm/IR/Verifier.h" |
28 | #include "llvm/LTO/LTO.h" |
29 | #include "llvm/MC/TargetRegistry.h" |
30 | #include "llvm/Object/ModuleSymbolTable.h" |
31 | #include "llvm/Passes/PassBuilder.h" |
32 | #include "llvm/Passes/PassPlugin.h" |
33 | #include "llvm/Passes/StandardInstrumentations.h" |
34 | #include "llvm/Support/Error.h" |
35 | #include "llvm/Support/FileSystem.h" |
36 | #include "llvm/Support/MemoryBuffer.h" |
37 | #include "llvm/Support/Path.h" |
38 | #include "llvm/Support/ThreadPool.h" |
39 | #include "llvm/Support/ToolOutputFile.h" |
40 | #include "llvm/Support/VirtualFileSystem.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Target/TargetMachine.h" |
43 | #include "llvm/TargetParser/SubtargetFeature.h" |
44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
45 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
46 | #include "llvm/Transforms/Utils/SplitModule.h" |
47 | #include <optional> |
48 | |
49 | using namespace llvm; |
50 | using namespace lto; |
51 | |
52 | #define DEBUG_TYPE "lto-backend" |
53 | |
54 | enum class LTOBitcodeEmbedding { |
55 | DoNotEmbed = 0, |
56 | EmbedOptimized = 1, |
57 | EmbedPostMergePreOptimized = 2 |
58 | }; |
59 | |
60 | static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( |
61 | "lto-embed-bitcode" , cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed), |
62 | cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none" , |
63 | "Do not embed" ), |
64 | clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized" , |
65 | "Embed after all optimization passes" ), |
66 | clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, |
67 | "post-merge-pre-opt" , |
68 | "Embed post merge, but before optimizations" )), |
69 | cl::desc("Embed LLVM bitcode in object files produced by LTO" )); |
70 | |
71 | static cl::opt<bool> ThinLTOAssumeMerged( |
72 | "thinlto-assume-merged" , cl::init(Val: false), |
73 | cl::desc("Assume the input has already undergone ThinLTO function " |
74 | "importing and the other pre-optimization pipeline changes." )); |
75 | |
76 | namespace llvm { |
77 | extern cl::opt<bool> NoPGOWarnMismatch; |
78 | } |
79 | |
80 | [[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) { |
81 | errs() << "failed to open " << Path << ": " << Msg << '\n'; |
82 | errs().flush(); |
83 | exit(status: 1); |
84 | } |
85 | |
86 | Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, |
87 | const DenseSet<StringRef> &SaveTempsArgs) { |
88 | ShouldDiscardValueNames = false; |
89 | |
90 | std::error_code EC; |
91 | if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution" )) { |
92 | ResolutionFile = |
93 | std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt" , args&: EC, |
94 | args: sys::fs::OpenFlags::OF_TextWithCRLF); |
95 | if (EC) { |
96 | ResolutionFile.reset(); |
97 | return errorCodeToError(EC); |
98 | } |
99 | } |
100 | |
101 | auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { |
102 | // Keep track of the hook provided by the linker, which also needs to run. |
103 | ModuleHookFn LinkerHook = Hook; |
104 | Hook = [=](unsigned Task, const Module &M) { |
105 | // If the linker's hook returned false, we need to pass that result |
106 | // through. |
107 | if (LinkerHook && !LinkerHook(Task, M)) |
108 | return false; |
109 | |
110 | std::string PathPrefix; |
111 | // If this is the combined module (not a ThinLTO backend compile) or the |
112 | // user hasn't requested using the input module's path, emit to a file |
113 | // named from the provided OutputFileName with the Task ID appended. |
114 | if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { |
115 | PathPrefix = OutputFileName; |
116 | if (Task != (unsigned)-1) |
117 | PathPrefix += utostr(X: Task) + "." ; |
118 | } else |
119 | PathPrefix = M.getModuleIdentifier() + "." ; |
120 | std::string Path = PathPrefix + PathSuffix + ".bc" ; |
121 | std::error_code EC; |
122 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
123 | // Because -save-temps is a debugging feature, we report the error |
124 | // directly and exit. |
125 | if (EC) |
126 | reportOpenError(Path, Msg: EC.message()); |
127 | WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false); |
128 | return true; |
129 | }; |
130 | }; |
131 | |
132 | auto SaveCombinedIndex = |
133 | [=](const ModuleSummaryIndex &Index, |
134 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
135 | std::string Path = OutputFileName + "index.bc" ; |
136 | std::error_code EC; |
137 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
138 | // Because -save-temps is a debugging feature, we report the error |
139 | // directly and exit. |
140 | if (EC) |
141 | reportOpenError(Path, Msg: EC.message()); |
142 | writeIndexToFile(Index, Out&: OS); |
143 | |
144 | Path = OutputFileName + "index.dot" ; |
145 | raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_Text); |
146 | if (EC) |
147 | reportOpenError(Path, Msg: EC.message()); |
148 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols); |
149 | return true; |
150 | }; |
151 | |
152 | if (SaveTempsArgs.empty()) { |
153 | setHook("0.preopt" , PreOptModuleHook); |
154 | setHook("1.promote" , PostPromoteModuleHook); |
155 | setHook("2.internalize" , PostInternalizeModuleHook); |
156 | setHook("3.import" , PostImportModuleHook); |
157 | setHook("4.opt" , PostOptModuleHook); |
158 | setHook("5.precodegen" , PreCodeGenModuleHook); |
159 | CombinedIndexHook = SaveCombinedIndex; |
160 | } else { |
161 | if (SaveTempsArgs.contains(V: "preopt" )) |
162 | setHook("0.preopt" , PreOptModuleHook); |
163 | if (SaveTempsArgs.contains(V: "promote" )) |
164 | setHook("1.promote" , PostPromoteModuleHook); |
165 | if (SaveTempsArgs.contains(V: "internalize" )) |
166 | setHook("2.internalize" , PostInternalizeModuleHook); |
167 | if (SaveTempsArgs.contains(V: "import" )) |
168 | setHook("3.import" , PostImportModuleHook); |
169 | if (SaveTempsArgs.contains(V: "opt" )) |
170 | setHook("4.opt" , PostOptModuleHook); |
171 | if (SaveTempsArgs.contains(V: "precodegen" )) |
172 | setHook("5.precodegen" , PreCodeGenModuleHook); |
173 | if (SaveTempsArgs.contains(V: "combinedindex" )) |
174 | CombinedIndexHook = SaveCombinedIndex; |
175 | } |
176 | |
177 | return Error::success(); |
178 | } |
179 | |
180 | #define HANDLE_EXTENSION(Ext) \ |
181 | llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); |
182 | #include "llvm/Support/Extension.def" |
183 | #undef HANDLE_EXTENSION |
184 | |
185 | static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, |
186 | PassBuilder &PB) { |
187 | #define HANDLE_EXTENSION(Ext) \ |
188 | get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); |
189 | #include "llvm/Support/Extension.def" |
190 | #undef HANDLE_EXTENSION |
191 | |
192 | // Load requested pass plugins and let them register pass builder callbacks |
193 | for (auto &PluginFN : PassPlugins) { |
194 | auto PassPlugin = PassPlugin::Load(Filename: PluginFN); |
195 | if (!PassPlugin) |
196 | reportFatalUsageError(Err: PassPlugin.takeError()); |
197 | PassPlugin->registerPassBuilderCallbacks(PB); |
198 | } |
199 | } |
200 | |
201 | static std::unique_ptr<TargetMachine> |
202 | createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { |
203 | const Triple &TheTriple = M.getTargetTriple(); |
204 | SubtargetFeatures Features; |
205 | Features.getDefaultSubtargetFeatures(Triple: TheTriple); |
206 | for (const std::string &A : Conf.MAttrs) |
207 | Features.AddFeature(String: A); |
208 | |
209 | std::optional<Reloc::Model> RelocModel; |
210 | if (Conf.RelocModel) |
211 | RelocModel = *Conf.RelocModel; |
212 | else if (M.getModuleFlag(Key: "PIC Level" )) |
213 | RelocModel = |
214 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
215 | |
216 | std::optional<CodeModel::Model> CodeModel; |
217 | if (Conf.CodeModel) |
218 | CodeModel = *Conf.CodeModel; |
219 | else |
220 | CodeModel = M.getCodeModel(); |
221 | |
222 | TargetOptions TargetOpts = Conf.Options; |
223 | if (TargetOpts.MCOptions.ABIName.empty()) { |
224 | TargetOpts.MCOptions.ABIName = M.getTargetABIFromMD(); |
225 | } |
226 | |
227 | std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( |
228 | TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: TargetOpts, RM: RelocModel, |
229 | CM: CodeModel, OL: Conf.CGOptLevel)); |
230 | |
231 | assert(TM && "Failed to create target machine" ); |
232 | |
233 | if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold()) |
234 | TM->setLargeDataThreshold(*LargeDataThreshold); |
235 | |
236 | return TM; |
237 | } |
238 | |
239 | static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, |
240 | unsigned OptLevel, bool IsThinLTO, |
241 | ModuleSummaryIndex *ExportSummary, |
242 | const ModuleSummaryIndex *ImportSummary) { |
243 | auto FS = vfs::getRealFileSystem(); |
244 | std::optional<PGOOptions> PGOOpt; |
245 | if (!Conf.SampleProfile.empty()) |
246 | PGOOpt = PGOOptions(Conf.SampleProfile, "" , Conf.ProfileRemapping, |
247 | /*MemoryProfile=*/"" , FS, PGOOptions::SampleUse, |
248 | PGOOptions::NoCSAction, |
249 | PGOOptions::ColdFuncOpt::Default, true); |
250 | else if (Conf.RunCSIRInstr) { |
251 | PGOOpt = PGOOptions("" , Conf.CSIRProfile, Conf.ProfileRemapping, |
252 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
253 | PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default, |
254 | Conf.AddFSDiscriminator); |
255 | } else if (!Conf.CSIRProfile.empty()) { |
256 | PGOOpt = PGOOptions(Conf.CSIRProfile, "" , Conf.ProfileRemapping, |
257 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
258 | PGOOptions::CSIRUse, PGOOptions::ColdFuncOpt::Default, |
259 | Conf.AddFSDiscriminator); |
260 | NoPGOWarnMismatch = !Conf.PGOWarnMismatch; |
261 | } else if (Conf.AddFSDiscriminator) { |
262 | PGOOpt = PGOOptions("" , "" , "" , /*MemoryProfile=*/"" , nullptr, |
263 | PGOOptions::NoAction, PGOOptions::NoCSAction, |
264 | PGOOptions::ColdFuncOpt::Default, true); |
265 | } |
266 | TM->setPGOOption(PGOOpt); |
267 | |
268 | LoopAnalysisManager LAM; |
269 | FunctionAnalysisManager FAM; |
270 | CGSCCAnalysisManager CGAM; |
271 | ModuleAnalysisManager MAM; |
272 | |
273 | PassInstrumentationCallbacks PIC; |
274 | StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager, |
275 | Conf.VerifyEach); |
276 | SI.registerCallbacks(PIC, MAM: &MAM); |
277 | PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); |
278 | |
279 | RegisterPassPlugins(PassPlugins: Conf.PassPlugins, PB); |
280 | |
281 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
282 | new TargetLibraryInfoImpl(TM->getTargetTriple())); |
283 | if (Conf.Freestanding) |
284 | TLII->disableAllFunctions(); |
285 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
286 | |
287 | // Parse a custom AA pipeline if asked to. |
288 | if (!Conf.AAPipeline.empty()) { |
289 | AAManager AA; |
290 | if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) { |
291 | report_fatal_error(reason: Twine("unable to parse AA pipeline description '" ) + |
292 | Conf.AAPipeline + "': " + toString(E: std::move(Err))); |
293 | } |
294 | // Register the AA manager first so that our version is the one used. |
295 | FAM.registerPass(PassBuilder: [&] { return std::move(AA); }); |
296 | } |
297 | |
298 | // Register all the basic analyses with the managers. |
299 | PB.registerModuleAnalyses(MAM); |
300 | PB.registerCGSCCAnalyses(CGAM); |
301 | PB.registerFunctionAnalyses(FAM); |
302 | PB.registerLoopAnalyses(LAM); |
303 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
304 | |
305 | ModulePassManager MPM; |
306 | |
307 | if (!Conf.DisableVerify) |
308 | MPM.addPass(Pass: VerifierPass()); |
309 | |
310 | OptimizationLevel OL; |
311 | |
312 | switch (OptLevel) { |
313 | default: |
314 | llvm_unreachable("Invalid optimization level" ); |
315 | case 0: |
316 | OL = OptimizationLevel::O0; |
317 | break; |
318 | case 1: |
319 | OL = OptimizationLevel::O1; |
320 | break; |
321 | case 2: |
322 | OL = OptimizationLevel::O2; |
323 | break; |
324 | case 3: |
325 | OL = OptimizationLevel::O3; |
326 | break; |
327 | } |
328 | |
329 | // Parse a custom pipeline if asked to. |
330 | if (!Conf.OptPipeline.empty()) { |
331 | if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) { |
332 | report_fatal_error(reason: Twine("unable to parse pass pipeline description '" ) + |
333 | Conf.OptPipeline + "': " + toString(E: std::move(Err))); |
334 | } |
335 | } else if (IsThinLTO) { |
336 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary)); |
337 | } else { |
338 | MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary)); |
339 | } |
340 | |
341 | if (!Conf.DisableVerify) |
342 | MPM.addPass(Pass: VerifierPass()); |
343 | |
344 | if (PrintPipelinePasses) { |
345 | std::string PipelineStr; |
346 | raw_string_ostream OS(PipelineStr); |
347 | MPM.printPipeline(OS, MapClassName2PassName: [&PIC](StringRef ClassName) { |
348 | auto PassName = PIC.getPassNameForClassName(ClassName); |
349 | return PassName.empty() ? ClassName : PassName; |
350 | }); |
351 | outs() << "pipeline-passes: " << PipelineStr << '\n'; |
352 | } |
353 | |
354 | MPM.run(IR&: Mod, AM&: MAM); |
355 | } |
356 | |
357 | static bool isEmptyModule(const Module &Mod) { |
358 | // Module is empty if it has no functions, no globals, no inline asm and no |
359 | // named metadata (aliases and ifuncs require functions or globals so we |
360 | // don't need to check those explicitly). |
361 | return Mod.empty() && Mod.global_empty() && Mod.named_metadata_empty() && |
362 | Mod.getModuleInlineAsm().empty(); |
363 | } |
364 | |
365 | bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, |
366 | bool IsThinLTO, ModuleSummaryIndex *ExportSummary, |
367 | const ModuleSummaryIndex *ImportSummary, |
368 | const std::vector<uint8_t> &CmdArgs) { |
369 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { |
370 | // FIXME: the motivation for capturing post-merge bitcode and command line |
371 | // is replicating the compilation environment from bitcode, without needing |
372 | // to understand the dependencies (the functions to be imported). This |
373 | // assumes a clang - based invocation, case in which we have the command |
374 | // line. |
375 | // It's not very clear how the above motivation would map in the |
376 | // linker-based case, so we currently don't plumb the command line args in |
377 | // that case. |
378 | if (CmdArgs.empty()) |
379 | LLVM_DEBUG( |
380 | dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " |
381 | "command line arguments are not available" ); |
382 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
383 | /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, |
384 | /*Cmdline*/ CmdArgs); |
385 | } |
386 | // No need to run any opt passes if the module is empty. |
387 | // In theory these passes should take almost no time for an empty |
388 | // module, however, this guards against doing any unnecessary summary-based |
389 | // analysis in the case of a ThinLTO build where this might be an empty |
390 | // regular LTO combined module, with a large combined index from ThinLTO. |
391 | if (!isEmptyModule(Mod)) { |
392 | // FIXME: Plumb the combined index into the new pass manager. |
393 | runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary, |
394 | ImportSummary); |
395 | } |
396 | return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); |
397 | } |
398 | |
399 | static void codegen(const Config &Conf, TargetMachine *TM, |
400 | AddStreamFn AddStream, unsigned Task, Module &Mod, |
401 | const ModuleSummaryIndex &CombinedIndex) { |
402 | if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) |
403 | return; |
404 | |
405 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) |
406 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
407 | /*EmbedBitcode*/ true, |
408 | /*EmbedCmdline*/ false, |
409 | /*CmdArgs*/ std::vector<uint8_t>()); |
410 | |
411 | std::unique_ptr<ToolOutputFile> DwoOut; |
412 | SmallString<1024> DwoFile(Conf.SplitDwarfOutput); |
413 | if (!Conf.DwoDir.empty()) { |
414 | std::error_code EC; |
415 | if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir)) |
416 | report_fatal_error(reason: Twine("Failed to create directory " ) + Conf.DwoDir + |
417 | ": " + EC.message()); |
418 | |
419 | DwoFile = Conf.DwoDir; |
420 | sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo" ); |
421 | TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); |
422 | } else |
423 | TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; |
424 | |
425 | if (!DwoFile.empty()) { |
426 | std::error_code EC; |
427 | DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None); |
428 | if (EC) |
429 | report_fatal_error(reason: Twine("Failed to open " ) + DwoFile + ": " + |
430 | EC.message()); |
431 | } |
432 | |
433 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
434 | AddStream(Task, Mod.getModuleIdentifier()); |
435 | if (Error Err = StreamOrErr.takeError()) |
436 | report_fatal_error(Err: std::move(Err)); |
437 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
438 | TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; |
439 | |
440 | // Create the codegen pipeline in its own scope so it gets deleted before |
441 | // Stream->commit() is called. The commit function of CacheStream deletes |
442 | // the raw stream, which is too early as streamers (e.g. MCAsmStreamer) |
443 | // keep the pointer and may use it until their destruction. See #138194. |
444 | { |
445 | legacy::PassManager CodeGenPasses; |
446 | TargetLibraryInfoImpl TLII(Mod.getTargetTriple()); |
447 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
448 | // No need to make index available if the module is empty. |
449 | // In theory these passes should not use the index for an empty |
450 | // module, however, this guards against doing any unnecessary summary-based |
451 | // analysis in the case of a ThinLTO build where this might be an empty |
452 | // regular LTO combined module, with a large combined index from ThinLTO. |
453 | if (!isEmptyModule(Mod)) |
454 | CodeGenPasses.add( |
455 | P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex)); |
456 | if (Conf.PreCodeGenPassesHook) |
457 | Conf.PreCodeGenPassesHook(CodeGenPasses); |
458 | if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, |
459 | DwoOut ? &DwoOut->os() : nullptr, |
460 | Conf.CGFileType)) |
461 | report_fatal_error(reason: "Failed to setup codegen" ); |
462 | CodeGenPasses.run(M&: Mod); |
463 | |
464 | if (DwoOut) |
465 | DwoOut->keep(); |
466 | } |
467 | |
468 | if (Error Err = Stream->commit()) |
469 | report_fatal_error(Err: std::move(Err)); |
470 | } |
471 | |
472 | static void splitCodeGen(const Config &C, TargetMachine *TM, |
473 | AddStreamFn AddStream, |
474 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
475 | const ModuleSummaryIndex &CombinedIndex) { |
476 | DefaultThreadPool CodegenThreadPool( |
477 | heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel)); |
478 | unsigned ThreadCount = 0; |
479 | const Target *T = &TM->getTarget(); |
480 | |
481 | const auto HandleModulePartition = |
482 | [&](std::unique_ptr<Module> MPart) { |
483 | // We want to clone the module in a new context to multi-thread the |
484 | // codegen. We do it by serializing partition modules to bitcode |
485 | // (while still on the main thread, in order to avoid data races) and |
486 | // spinning up new threads which deserialize the partitions into |
487 | // separate contexts. |
488 | // FIXME: Provide a more direct way to do this in LLVM. |
489 | SmallString<0> BC; |
490 | raw_svector_ostream BCOS(BC); |
491 | WriteBitcodeToFile(M: *MPart, Out&: BCOS); |
492 | |
493 | // Enqueue the task |
494 | CodegenThreadPool.async( |
495 | F: [&](const SmallString<0> &BC, unsigned ThreadId) { |
496 | LTOLLVMContext Ctx(C); |
497 | Expected<std::unique_ptr<Module>> MOrErr = |
498 | parseBitcodeFile(Buffer: MemoryBufferRef(BC.str(), "ld-temp.o" ), Context&: Ctx); |
499 | if (!MOrErr) |
500 | report_fatal_error(reason: "Failed to read bitcode" ); |
501 | std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); |
502 | |
503 | std::unique_ptr<TargetMachine> TM = |
504 | createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx); |
505 | |
506 | codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx, |
507 | CombinedIndex); |
508 | }, |
509 | // Pass BC using std::move to ensure that it get moved rather than |
510 | // copied into the thread's context. |
511 | ArgList: std::move(BC), ArgList: ThreadCount++); |
512 | }; |
513 | |
514 | // Try target-specific module splitting first, then fallback to the default. |
515 | if (!TM->splitModule(M&: Mod, NumParts: ParallelCodeGenParallelismLevel, |
516 | ModuleCallback: HandleModulePartition)) { |
517 | SplitModule(M&: Mod, N: ParallelCodeGenParallelismLevel, ModuleCallback: HandleModulePartition, |
518 | PreserveLocals: false); |
519 | } |
520 | |
521 | // Because the inner lambda (which runs in a worker thread) captures our local |
522 | // variables, we need to wait for the worker threads to terminate before we |
523 | // can leave the function scope. |
524 | CodegenThreadPool.wait(); |
525 | } |
526 | |
527 | static Expected<const Target *> initAndLookupTarget(const Config &C, |
528 | Module &Mod) { |
529 | if (!C.OverrideTriple.empty()) |
530 | Mod.setTargetTriple(Triple(C.OverrideTriple)); |
531 | else if (Mod.getTargetTriple().empty()) |
532 | Mod.setTargetTriple(Triple(C.DefaultTriple)); |
533 | |
534 | std::string Msg; |
535 | const Target *T = TargetRegistry::lookupTarget(TheTriple: Mod.getTargetTriple(), Error&: Msg); |
536 | if (!T) |
537 | return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode()); |
538 | return T; |
539 | } |
540 | |
541 | Error lto::( |
542 | std::unique_ptr<ToolOutputFile> DiagOutputFile) { |
543 | // Make sure we flush the diagnostic remarks file in case the linker doesn't |
544 | // call the global destructors before exiting. |
545 | if (!DiagOutputFile) |
546 | return Error::success(); |
547 | DiagOutputFile->keep(); |
548 | DiagOutputFile->os().flush(); |
549 | return Error::success(); |
550 | } |
551 | |
552 | Error lto::backend(const Config &C, AddStreamFn AddStream, |
553 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
554 | ModuleSummaryIndex &CombinedIndex) { |
555 | Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod); |
556 | if (!TOrErr) |
557 | return TOrErr.takeError(); |
558 | |
559 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod); |
560 | |
561 | LLVM_DEBUG(dbgs() << "Running regular LTO\n" ); |
562 | if (!C.CodeGenOnly) { |
563 | if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false, |
564 | /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, |
565 | /*CmdArgs*/ std::vector<uint8_t>())) |
566 | return Error::success(); |
567 | } |
568 | |
569 | if (ParallelCodeGenParallelismLevel == 1) { |
570 | codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex); |
571 | } else { |
572 | splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, |
573 | CombinedIndex); |
574 | } |
575 | return Error::success(); |
576 | } |
577 | |
578 | static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, |
579 | const ModuleSummaryIndex &Index) { |
580 | std::vector<GlobalValue*> DeadGVs; |
581 | for (auto &GV : Mod.global_values()) |
582 | if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID())) |
583 | if (!Index.isGlobalValueLive(GVS)) { |
584 | DeadGVs.push_back(x: &GV); |
585 | convertToDeclaration(GV); |
586 | } |
587 | |
588 | // Now that all dead bodies have been dropped, delete the actual objects |
589 | // themselves when possible. |
590 | for (GlobalValue *GV : DeadGVs) { |
591 | GV->removeDeadConstantUsers(); |
592 | // Might reference something defined in native object (i.e. dropped a |
593 | // non-prevailing IR def, but we need to keep the declaration). |
594 | if (GV->use_empty()) |
595 | GV->eraseFromParent(); |
596 | } |
597 | } |
598 | |
599 | Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, |
600 | Module &Mod, const ModuleSummaryIndex &CombinedIndex, |
601 | const FunctionImporter::ImportMapTy &ImportList, |
602 | const GVSummaryMapTy &DefinedGlobals, |
603 | MapVector<StringRef, BitcodeModule> *ModuleMap, |
604 | bool CodeGenOnly, AddStreamFn IRAddStream, |
605 | const std::vector<uint8_t> &CmdArgs) { |
606 | Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod); |
607 | if (!TOrErr) |
608 | return TOrErr.takeError(); |
609 | |
610 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod); |
611 | |
612 | // Setup optimization remarks. |
613 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
614 | Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, |
615 | RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold, |
616 | Count: Task); |
617 | if (!DiagFileOrErr) |
618 | return DiagFileOrErr.takeError(); |
619 | auto DiagnosticOutputFile = std::move(*DiagFileOrErr); |
620 | |
621 | // Set the partial sample profile ratio in the profile summary module flag of |
622 | // the module, if applicable. |
623 | Mod.setPartialSampleProfileRatio(CombinedIndex); |
624 | |
625 | LLVM_DEBUG(dbgs() << "Running ThinLTO\n" ); |
626 | if (CodeGenOnly) { |
627 | // If CodeGenOnly is set, we only perform code generation and skip |
628 | // optimization. This value may differ from Conf.CodeGenOnly. |
629 | codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex); |
630 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
631 | } |
632 | |
633 | if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) |
634 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
635 | |
636 | auto OptimizeAndCodegen = |
637 | [&](Module &Mod, TargetMachine *TM, |
638 | std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { |
639 | // Perform optimization and code generation for ThinLTO. |
640 | if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, |
641 | /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, |
642 | CmdArgs)) |
643 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
644 | |
645 | // Save the current module before the first codegen round. |
646 | // Note that the second codegen round runs only `codegen()` without |
647 | // running `opt()`. We're not reaching here as it's bailed out earlier |
648 | // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. |
649 | if (IRAddStream) |
650 | cgdata::saveModuleForTwoRounds(TheModule: Mod, Task, AddStream: IRAddStream); |
651 | |
652 | codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); |
653 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
654 | }; |
655 | |
656 | if (ThinLTOAssumeMerged) |
657 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
658 | |
659 | // When linking an ELF shared object, dso_local should be dropped. We |
660 | // conservatively do this for -fpic. |
661 | bool ClearDSOLocalOnDeclarations = |
662 | TM->getTargetTriple().isOSBinFormatELF() && |
663 | TM->getRelocationModel() != Reloc::Static && |
664 | Mod.getPIELevel() == PIELevel::Default; |
665 | renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations); |
666 | |
667 | dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex); |
668 | |
669 | thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true); |
670 | |
671 | if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) |
672 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
673 | |
674 | if (!DefinedGlobals.empty()) |
675 | thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals); |
676 | |
677 | if (Conf.PostInternalizeModuleHook && |
678 | !Conf.PostInternalizeModuleHook(Task, Mod)) |
679 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
680 | |
681 | auto ModuleLoader = [&](StringRef Identifier) { |
682 | assert(Mod.getContext().isODRUniquingDebugTypes() && |
683 | "ODR Type uniquing should be enabled on the context" ); |
684 | if (ModuleMap) { |
685 | auto I = ModuleMap->find(Key: Identifier); |
686 | assert(I != ModuleMap->end()); |
687 | return I->second.getLazyModule(Context&: Mod.getContext(), |
688 | /*ShouldLazyLoadMetadata=*/true, |
689 | /*IsImporting*/ true); |
690 | } |
691 | |
692 | ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = |
693 | llvm::MemoryBuffer::getFile(Filename: Identifier); |
694 | if (!MBOrErr) |
695 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
696 | Args: Twine("Error loading imported file " ) + Identifier + " : " , |
697 | Args: MBOrErr.getError())); |
698 | |
699 | Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr); |
700 | if (!BMOrErr) |
701 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
702 | Args: Twine("Error loading imported file " ) + Identifier + " : " + |
703 | toString(E: BMOrErr.takeError()), |
704 | Args: inconvertibleErrorCode())); |
705 | |
706 | Expected<std::unique_ptr<Module>> MOrErr = |
707 | BMOrErr->getLazyModule(Context&: Mod.getContext(), |
708 | /*ShouldLazyLoadMetadata=*/true, |
709 | /*IsImporting*/ true); |
710 | if (MOrErr) |
711 | (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr)); |
712 | return MOrErr; |
713 | }; |
714 | |
715 | FunctionImporter Importer(CombinedIndex, ModuleLoader, |
716 | ClearDSOLocalOnDeclarations); |
717 | if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError()) |
718 | return Err; |
719 | |
720 | // Do this after any importing so that imported code is updated. |
721 | updateMemProfAttributes(Mod, Index: CombinedIndex); |
722 | updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility()); |
723 | |
724 | if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) |
725 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
726 | |
727 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
728 | } |
729 | |
730 | BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { |
731 | if (ThinLTOAssumeMerged && BMs.size() == 1) |
732 | return BMs.begin(); |
733 | |
734 | for (BitcodeModule &BM : BMs) { |
735 | Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); |
736 | if (LTOInfo && LTOInfo->IsThinLTO) |
737 | return &BM; |
738 | } |
739 | return nullptr; |
740 | } |
741 | |
742 | Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { |
743 | Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef); |
744 | if (!BMsOrErr) |
745 | return BMsOrErr.takeError(); |
746 | |
747 | // The bitcode file may contain multiple modules, we want the one that is |
748 | // marked as being the ThinLTO module. |
749 | if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr)) |
750 | return *Bm; |
751 | |
752 | return make_error<StringError>(Args: "Could not find module summary" , |
753 | Args: inconvertibleErrorCode()); |
754 | } |
755 | |
756 | bool lto::initImportList(const Module &M, |
757 | const ModuleSummaryIndex &CombinedIndex, |
758 | FunctionImporter::ImportMapTy &ImportList) { |
759 | if (ThinLTOAssumeMerged) |
760 | return true; |
761 | // We can simply import the values mentioned in the combined index, since |
762 | // we should only invoke this using the individual indexes written out |
763 | // via a WriteIndexesThinBackend. |
764 | for (const auto &GlobalList : CombinedIndex) { |
765 | // Ignore entries for undefined references. |
766 | if (GlobalList.second.SummaryList.empty()) |
767 | continue; |
768 | |
769 | auto GUID = GlobalList.first; |
770 | for (const auto &Summary : GlobalList.second.SummaryList) { |
771 | // Skip the summaries for the importing module. These are included to |
772 | // e.g. record required linkage changes. |
773 | if (Summary->modulePath() == M.getModuleIdentifier()) |
774 | continue; |
775 | // Add an entry to provoke importing by thinBackend. |
776 | ImportList.addGUID(FromModule: Summary->modulePath(), GUID, ImportKind: Summary->importType()); |
777 | } |
778 | } |
779 | return true; |
780 | } |
781 | |