1 | //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Thin Link Time Optimization library. This library is |
10 | // intended to be used by linker to optimize code at link time. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" |
15 | #include "llvm/Support/CommandLine.h" |
16 | |
17 | #include "llvm/ADT/ScopeExit.h" |
18 | #include "llvm/ADT/Statistic.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
21 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
22 | #include "llvm/Analysis/TargetLibraryInfo.h" |
23 | #include "llvm/Bitcode/BitcodeReader.h" |
24 | #include "llvm/Bitcode/BitcodeWriter.h" |
25 | #include "llvm/Bitcode/BitcodeWriterPass.h" |
26 | #include "llvm/Config/llvm-config.h" |
27 | #include "llvm/IR/DebugInfo.h" |
28 | #include "llvm/IR/DiagnosticPrinter.h" |
29 | #include "llvm/IR/LLVMContext.h" |
30 | #include "llvm/IR/LLVMRemarkStreamer.h" |
31 | #include "llvm/IR/LegacyPassManager.h" |
32 | #include "llvm/IR/Mangler.h" |
33 | #include "llvm/IR/PassTimingInfo.h" |
34 | #include "llvm/IR/Verifier.h" |
35 | #include "llvm/IRReader/IRReader.h" |
36 | #include "llvm/LTO/LTO.h" |
37 | #include "llvm/MC/TargetRegistry.h" |
38 | #include "llvm/Object/IRObjectFile.h" |
39 | #include "llvm/Passes/PassBuilder.h" |
40 | #include "llvm/Passes/StandardInstrumentations.h" |
41 | #include "llvm/Remarks/HotnessThresholdParser.h" |
42 | #include "llvm/Support/CachePruning.h" |
43 | #include "llvm/Support/Debug.h" |
44 | #include "llvm/Support/Error.h" |
45 | #include "llvm/Support/FileSystem.h" |
46 | #include "llvm/Support/FormatVariadic.h" |
47 | #include "llvm/Support/Path.h" |
48 | #include "llvm/Support/SHA1.h" |
49 | #include "llvm/Support/SmallVectorMemoryBuffer.h" |
50 | #include "llvm/Support/ThreadPool.h" |
51 | #include "llvm/Support/Threading.h" |
52 | #include "llvm/Support/ToolOutputFile.h" |
53 | #include "llvm/Support/raw_ostream.h" |
54 | #include "llvm/Target/TargetMachine.h" |
55 | #include "llvm/TargetParser/SubtargetFeature.h" |
56 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
57 | #include "llvm/Transforms/IPO/FunctionImport.h" |
58 | #include "llvm/Transforms/IPO/Internalize.h" |
59 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
60 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
61 | |
62 | #if !defined(_MSC_VER) && !defined(__MINGW32__) |
63 | #include <unistd.h> |
64 | #else |
65 | #include <io.h> |
66 | #endif |
67 | |
68 | using namespace llvm; |
69 | using namespace ThinLTOCodeGeneratorImpl; |
70 | |
71 | #define DEBUG_TYPE "thinlto" |
72 | |
73 | namespace llvm { |
74 | // Flags -discard-value-names, defined in LTOCodeGenerator.cpp |
75 | extern cl::opt<bool> LTODiscardValueNames; |
76 | extern cl::opt<std::string> ; |
77 | extern cl::opt<std::string> ; |
78 | extern cl::opt<bool> ; |
79 | extern cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser> |
80 | ; |
81 | extern cl::opt<std::string> ; |
82 | } |
83 | |
84 | // Default to using all available threads in the system, but using only one |
85 | // thred per core, as indicated by the usage of |
86 | // heavyweight_hardware_concurrency() below. |
87 | static cl::opt<int> ThreadCount("threads" , cl::init(Val: 0)); |
88 | |
89 | // Simple helper to save temporary files for debug. |
90 | static void saveTempBitcode(const Module &TheModule, StringRef TempDir, |
91 | unsigned count, StringRef Suffix) { |
92 | if (TempDir.empty()) |
93 | return; |
94 | // User asked to save temps, let dump the bitcode file after import. |
95 | std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); |
96 | std::error_code EC; |
97 | raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); |
98 | if (EC) |
99 | report_fatal_error(reason: Twine("Failed to open " ) + SaveTempPath + |
100 | " to save optimized bitcode\n" ); |
101 | WriteBitcodeToFile(M: TheModule, Out&: OS, /* ShouldPreserveUseListOrder */ true); |
102 | } |
103 | |
104 | static const GlobalValueSummary * |
105 | getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { |
106 | // If there is any strong definition anywhere, get it. |
107 | auto StrongDefForLinker = llvm::find_if( |
108 | Range: GVSummaryList, P: [](const std::unique_ptr<GlobalValueSummary> &Summary) { |
109 | auto Linkage = Summary->linkage(); |
110 | return !GlobalValue::isAvailableExternallyLinkage(Linkage) && |
111 | !GlobalValue::isWeakForLinker(Linkage); |
112 | }); |
113 | if (StrongDefForLinker != GVSummaryList.end()) |
114 | return StrongDefForLinker->get(); |
115 | // Get the first *linker visible* definition for this global in the summary |
116 | // list. |
117 | auto FirstDefForLinker = llvm::find_if( |
118 | Range: GVSummaryList, P: [](const std::unique_ptr<GlobalValueSummary> &Summary) { |
119 | auto Linkage = Summary->linkage(); |
120 | return !GlobalValue::isAvailableExternallyLinkage(Linkage); |
121 | }); |
122 | // Extern templates can be emitted as available_externally. |
123 | if (FirstDefForLinker == GVSummaryList.end()) |
124 | return nullptr; |
125 | return FirstDefForLinker->get(); |
126 | } |
127 | |
128 | // Populate map of GUID to the prevailing copy for any multiply defined |
129 | // symbols. Currently assume first copy is prevailing, or any strong |
130 | // definition. Can be refined with Linker information in the future. |
131 | static void computePrevailingCopies( |
132 | const ModuleSummaryIndex &Index, |
133 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) { |
134 | auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) { |
135 | return GVSummaryList.size() > 1; |
136 | }; |
137 | |
138 | for (auto &I : Index) { |
139 | if (HasMultipleCopies(I.second.SummaryList)) |
140 | PrevailingCopy[I.first] = |
141 | getFirstDefinitionForLinker(GVSummaryList: I.second.SummaryList); |
142 | } |
143 | } |
144 | |
145 | static StringMap<lto::InputFile *> |
146 | generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) { |
147 | StringMap<lto::InputFile *> ModuleMap; |
148 | for (auto &M : Modules) { |
149 | LLVM_DEBUG(dbgs() << "Adding module " << M->getName() << " to ModuleMap\n" ); |
150 | assert(!ModuleMap.contains(M->getName()) && |
151 | "Expect unique Buffer Identifier" ); |
152 | ModuleMap[M->getName()] = M.get(); |
153 | } |
154 | return ModuleMap; |
155 | } |
156 | |
157 | static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index, |
158 | bool ClearDSOLocalOnDeclarations) { |
159 | renameModuleForThinLTO(M&: TheModule, Index, ClearDSOLocalOnDeclarations); |
160 | } |
161 | |
162 | namespace { |
163 | class ThinLTODiagnosticInfo : public DiagnosticInfo { |
164 | const Twine &Msg; |
165 | public: |
166 | ThinLTODiagnosticInfo(const Twine &DiagMsg LLVM_LIFETIME_BOUND, |
167 | DiagnosticSeverity Severity = DS_Error) |
168 | : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} |
169 | void print(DiagnosticPrinter &DP) const override { DP << Msg; } |
170 | }; |
171 | } |
172 | |
173 | /// Verify the module and strip broken debug info. |
174 | static void verifyLoadedModule(Module &TheModule) { |
175 | bool BrokenDebugInfo = false; |
176 | if (verifyModule(M: TheModule, OS: &dbgs(), BrokenDebugInfo: &BrokenDebugInfo)) |
177 | report_fatal_error(reason: "Broken module found, compilation aborted!" ); |
178 | if (BrokenDebugInfo) { |
179 | TheModule.getContext().diagnose(DI: ThinLTODiagnosticInfo( |
180 | "Invalid debug info found, debug info will be stripped" , DS_Warning)); |
181 | StripDebugInfo(M&: TheModule); |
182 | } |
183 | } |
184 | |
185 | static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input, |
186 | LLVMContext &Context, |
187 | bool Lazy, |
188 | bool IsImporting) { |
189 | auto &Mod = Input->getSingleBitcodeModule(); |
190 | SMDiagnostic Err; |
191 | Expected<std::unique_ptr<Module>> ModuleOrErr = |
192 | Lazy ? Mod.getLazyModule(Context, |
193 | /* ShouldLazyLoadMetadata */ true, IsImporting) |
194 | : Mod.parseModule(Context); |
195 | if (!ModuleOrErr) { |
196 | handleAllErrors(E: ModuleOrErr.takeError(), Handlers: [&](ErrorInfoBase &EIB) { |
197 | SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(), |
198 | SourceMgr::DK_Error, EIB.message()); |
199 | Err.print(ProgName: "ThinLTO" , S&: errs()); |
200 | }); |
201 | report_fatal_error(reason: "Can't load module, abort." ); |
202 | } |
203 | if (!Lazy) |
204 | verifyLoadedModule(TheModule&: *ModuleOrErr.get()); |
205 | return std::move(*ModuleOrErr); |
206 | } |
207 | |
208 | static void |
209 | crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, |
210 | StringMap<lto::InputFile *> &ModuleMap, |
211 | const FunctionImporter::ImportMapTy &ImportList, |
212 | bool ClearDSOLocalOnDeclarations) { |
213 | auto Loader = [&](StringRef Identifier) { |
214 | auto &Input = ModuleMap[Identifier]; |
215 | return loadModuleFromInput(Input, Context&: TheModule.getContext(), |
216 | /*Lazy=*/true, /*IsImporting*/ true); |
217 | }; |
218 | |
219 | FunctionImporter Importer(Index, Loader, ClearDSOLocalOnDeclarations); |
220 | Expected<bool> Result = Importer.importFunctions(M&: TheModule, ImportList); |
221 | if (!Result) { |
222 | handleAllErrors(E: Result.takeError(), Handlers: [&](ErrorInfoBase &EIB) { |
223 | SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), |
224 | SourceMgr::DK_Error, EIB.message()); |
225 | Err.print(ProgName: "ThinLTO" , S&: errs()); |
226 | }); |
227 | report_fatal_error(reason: "importFunctions failed" ); |
228 | } |
229 | // Verify again after cross-importing. |
230 | verifyLoadedModule(TheModule); |
231 | } |
232 | |
233 | static void optimizeModule(Module &TheModule, TargetMachine &TM, |
234 | unsigned OptLevel, bool Freestanding, |
235 | bool DebugPassManager, ModuleSummaryIndex *Index) { |
236 | std::optional<PGOOptions> PGOOpt; |
237 | LoopAnalysisManager LAM; |
238 | FunctionAnalysisManager FAM; |
239 | CGSCCAnalysisManager CGAM; |
240 | ModuleAnalysisManager MAM; |
241 | |
242 | PassInstrumentationCallbacks PIC; |
243 | StandardInstrumentations SI(TheModule.getContext(), DebugPassManager); |
244 | SI.registerCallbacks(PIC, MAM: &MAM); |
245 | PipelineTuningOptions PTO; |
246 | PTO.LoopVectorization = true; |
247 | PTO.SLPVectorization = true; |
248 | PassBuilder PB(&TM, PTO, PGOOpt, &PIC); |
249 | |
250 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
251 | new TargetLibraryInfoImpl(TM.getTargetTriple())); |
252 | if (Freestanding) |
253 | TLII->disableAllFunctions(); |
254 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
255 | |
256 | // Register all the basic analyses with the managers. |
257 | PB.registerModuleAnalyses(MAM); |
258 | PB.registerCGSCCAnalyses(CGAM); |
259 | PB.registerFunctionAnalyses(FAM); |
260 | PB.registerLoopAnalyses(LAM); |
261 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
262 | |
263 | ModulePassManager MPM; |
264 | |
265 | OptimizationLevel OL; |
266 | |
267 | switch (OptLevel) { |
268 | default: |
269 | llvm_unreachable("Invalid optimization level" ); |
270 | case 0: |
271 | OL = OptimizationLevel::O0; |
272 | break; |
273 | case 1: |
274 | OL = OptimizationLevel::O1; |
275 | break; |
276 | case 2: |
277 | OL = OptimizationLevel::O2; |
278 | break; |
279 | case 3: |
280 | OL = OptimizationLevel::O3; |
281 | break; |
282 | } |
283 | |
284 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary: Index)); |
285 | |
286 | MPM.run(IR&: TheModule, AM&: MAM); |
287 | } |
288 | |
289 | static void |
290 | addUsedSymbolToPreservedGUID(const lto::InputFile &File, |
291 | DenseSet<GlobalValue::GUID> &PreservedGUID) { |
292 | for (const auto &Sym : File.symbols()) { |
293 | if (Sym.isUsed()) |
294 | PreservedGUID.insert( |
295 | V: GlobalValue::getGUIDAssumingExternalLinkage(GlobalName: Sym.getIRName())); |
296 | } |
297 | } |
298 | |
299 | // Convert the PreservedSymbols map from "Name" based to "GUID" based. |
300 | static void computeGUIDPreservedSymbols(const lto::InputFile &File, |
301 | const StringSet<> &PreservedSymbols, |
302 | const Triple &TheTriple, |
303 | DenseSet<GlobalValue::GUID> &GUIDs) { |
304 | // Iterate the symbols in the input file and if the input has preserved symbol |
305 | // compute the GUID for the symbol. |
306 | for (const auto &Sym : File.symbols()) { |
307 | if (PreservedSymbols.count(Key: Sym.getName()) && !Sym.getIRName().empty()) |
308 | GUIDs.insert(V: GlobalValue::getGUIDAssumingExternalLinkage( |
309 | GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(), |
310 | Linkage: GlobalValue::ExternalLinkage, FileName: "" ))); |
311 | } |
312 | } |
313 | |
314 | static DenseSet<GlobalValue::GUID> |
315 | computeGUIDPreservedSymbols(const lto::InputFile &File, |
316 | const StringSet<> &PreservedSymbols, |
317 | const Triple &TheTriple) { |
318 | DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); |
319 | computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple, |
320 | GUIDs&: GUIDPreservedSymbols); |
321 | return GUIDPreservedSymbols; |
322 | } |
323 | |
324 | static std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, |
325 | TargetMachine &TM) { |
326 | SmallVector<char, 128> OutputBuffer; |
327 | |
328 | // CodeGen |
329 | { |
330 | raw_svector_ostream OS(OutputBuffer); |
331 | legacy::PassManager PM; |
332 | |
333 | // Setup the codegen now. |
334 | if (TM.addPassesToEmitFile(PM, OS, nullptr, CodeGenFileType::ObjectFile, |
335 | /* DisableVerify */ true)) |
336 | report_fatal_error(reason: "Failed to setup codegen" ); |
337 | |
338 | // Run codegen now. resulting binary is in OutputBuffer. |
339 | PM.run(M&: TheModule); |
340 | } |
341 | return std::make_unique<SmallVectorMemoryBuffer>( |
342 | args: std::move(OutputBuffer), /*RequiresNullTerminator=*/args: false); |
343 | } |
344 | |
345 | namespace { |
346 | /// Manage caching for a single Module. |
347 | class ModuleCacheEntry { |
348 | SmallString<128> EntryPath; |
349 | |
350 | public: |
351 | // Create a cache entry. This compute a unique hash for the Module considering |
352 | // the current list of export/import, and offer an interface to query to |
353 | // access the content in the cache. |
354 | ModuleCacheEntry( |
355 | StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID, |
356 | const FunctionImporter::ImportMapTy &ImportList, |
357 | const FunctionImporter::ExportSetTy &ExportList, |
358 | const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, |
359 | const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel, |
360 | bool Freestanding, const TargetMachineBuilder &TMBuilder) { |
361 | if (CachePath.empty()) |
362 | return; |
363 | |
364 | if (!Index.modulePaths().count(Key: ModuleID)) |
365 | // The module does not have an entry, it can't have a hash at all |
366 | return; |
367 | |
368 | if (all_of(Range: Index.getModuleHash(ModPath: ModuleID), |
369 | P: [](uint32_t V) { return V == 0; })) |
370 | // No hash entry, no caching! |
371 | return; |
372 | |
373 | llvm::lto::Config Conf; |
374 | Conf.OptLevel = OptLevel; |
375 | Conf.Options = TMBuilder.Options; |
376 | Conf.CPU = TMBuilder.MCpu; |
377 | Conf.MAttrs.push_back(x: TMBuilder.MAttr); |
378 | Conf.RelocModel = TMBuilder.RelocModel; |
379 | Conf.CGOptLevel = TMBuilder.CGOptLevel; |
380 | Conf.Freestanding = Freestanding; |
381 | std::string Key = |
382 | computeLTOCacheKey(Conf, Index, ModuleID, ImportList, ExportList, |
383 | ResolvedODR, DefinedGlobals: DefinedGVSummaries); |
384 | |
385 | // This choice of file name allows the cache to be pruned (see pruneCache() |
386 | // in include/llvm/Support/CachePruning.h). |
387 | sys::path::append(path&: EntryPath, a: CachePath, b: Twine("llvmcache-" , Key)); |
388 | } |
389 | |
390 | // Access the path to this entry in the cache. |
391 | StringRef getEntryPath() { return EntryPath; } |
392 | |
393 | // Try loading the buffer for this cache entry. |
394 | ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() { |
395 | if (EntryPath.empty()) |
396 | return std::error_code(); |
397 | SmallString<64> ResultPath; |
398 | Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( |
399 | Name: Twine(EntryPath), Flags: sys::fs::OF_UpdateAtime, RealPath: &ResultPath); |
400 | if (!FDOrErr) |
401 | return errorToErrorCode(Err: FDOrErr.takeError()); |
402 | ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile( |
403 | FD: *FDOrErr, Filename: EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); |
404 | sys::fs::closeFile(F&: *FDOrErr); |
405 | return MBOrErr; |
406 | } |
407 | |
408 | // Cache the Produced object file |
409 | void write(const MemoryBuffer &OutputBuffer) { |
410 | if (EntryPath.empty()) |
411 | return; |
412 | |
413 | if (auto Err = llvm::writeToOutput( |
414 | OutputFileName: EntryPath, Write: [&OutputBuffer](llvm::raw_ostream &OS) -> llvm::Error { |
415 | OS << OutputBuffer.getBuffer(); |
416 | return llvm::Error::success(); |
417 | })) |
418 | report_fatal_error(reason: llvm::formatv(Fmt: "ThinLTO: Can't write file {0}: {1}" , |
419 | Vals&: EntryPath, |
420 | Vals: toString(E: std::move(Err)).c_str())); |
421 | } |
422 | }; |
423 | } // end anonymous namespace |
424 | |
425 | static std::unique_ptr<MemoryBuffer> |
426 | ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, |
427 | StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM, |
428 | const FunctionImporter::ImportMapTy &ImportList, |
429 | const FunctionImporter::ExportSetTy &ExportList, |
430 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, |
431 | const GVSummaryMapTy &DefinedGlobals, |
432 | const ThinLTOCodeGenerator::CachingOptions &CacheOptions, |
433 | bool DisableCodeGen, StringRef SaveTempsDir, |
434 | bool Freestanding, unsigned OptLevel, unsigned count, |
435 | bool DebugPassManager) { |
436 | // "Benchmark"-like optimization: single-source case |
437 | bool SingleModule = (ModuleMap.size() == 1); |
438 | |
439 | // When linking an ELF shared object, dso_local should be dropped. We |
440 | // conservatively do this for -fpic. |
441 | bool ClearDSOLocalOnDeclarations = |
442 | TM.getTargetTriple().isOSBinFormatELF() && |
443 | TM.getRelocationModel() != Reloc::Static && |
444 | TheModule.getPIELevel() == PIELevel::Default; |
445 | |
446 | if (!SingleModule) { |
447 | promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations); |
448 | |
449 | // Apply summary-based prevailing-symbol resolution decisions. |
450 | thinLTOFinalizeInModule(TheModule, DefinedGlobals, /*PropagateAttrs=*/true); |
451 | |
452 | // Save temps: after promotion. |
453 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".1.promoted.bc" ); |
454 | } |
455 | |
456 | // Be friendly and don't nuke totally the module when the client didn't |
457 | // supply anything to preserve. |
458 | if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) { |
459 | // Apply summary-based internalization decisions. |
460 | thinLTOInternalizeModule(TheModule, DefinedGlobals); |
461 | } |
462 | |
463 | // Save internalized bitcode |
464 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".2.internalized.bc" ); |
465 | |
466 | if (!SingleModule) |
467 | crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, |
468 | ClearDSOLocalOnDeclarations); |
469 | |
470 | // Do this after any importing so that imported code is updated. |
471 | // See comment at call to updateVCallVisibilityInIndex() for why |
472 | // WholeProgramVisibilityEnabledInLTO is false. |
473 | updatePublicTypeTestCalls(M&: TheModule, |
474 | /* WholeProgramVisibilityEnabledInLTO */ false); |
475 | |
476 | // Save temps: after cross-module import. |
477 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".3.imported.bc" ); |
478 | |
479 | optimizeModule(TheModule, TM, OptLevel, Freestanding, DebugPassManager, |
480 | Index: &Index); |
481 | |
482 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".4.opt.bc" ); |
483 | |
484 | if (DisableCodeGen) { |
485 | // Configured to stop before CodeGen, serialize the bitcode and return. |
486 | SmallVector<char, 128> OutputBuffer; |
487 | { |
488 | raw_svector_ostream OS(OutputBuffer); |
489 | ProfileSummaryInfo PSI(TheModule); |
490 | auto Index = buildModuleSummaryIndex(M: TheModule, GetBFICallback: nullptr, PSI: &PSI); |
491 | WriteBitcodeToFile(M: TheModule, Out&: OS, ShouldPreserveUseListOrder: true, Index: &Index); |
492 | } |
493 | return std::make_unique<SmallVectorMemoryBuffer>( |
494 | args: std::move(OutputBuffer), /*RequiresNullTerminator=*/args: false); |
495 | } |
496 | |
497 | return codegenModule(TheModule, TM); |
498 | } |
499 | |
500 | /// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map |
501 | /// for caching, and in the \p Index for application during the ThinLTO |
502 | /// backends. This is needed for correctness for exported symbols (ensure |
503 | /// at least one copy kept) and a compile-time optimization (to drop duplicate |
504 | /// copies when possible). |
505 | static void resolvePrevailingInIndex( |
506 | ModuleSummaryIndex &Index, |
507 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> |
508 | &ResolvedODR, |
509 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, |
510 | const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> |
511 | &PrevailingCopy) { |
512 | |
513 | auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { |
514 | const auto &Prevailing = PrevailingCopy.find(Val: GUID); |
515 | // Not in map means that there was only one copy, which must be prevailing. |
516 | if (Prevailing == PrevailingCopy.end()) |
517 | return true; |
518 | return Prevailing->second == S; |
519 | }; |
520 | |
521 | auto recordNewLinkage = [&](StringRef ModuleIdentifier, |
522 | GlobalValue::GUID GUID, |
523 | GlobalValue::LinkageTypes NewLinkage) { |
524 | ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; |
525 | }; |
526 | |
527 | // TODO Conf.VisibilityScheme can be lto::Config::ELF for ELF. |
528 | lto::Config Conf; |
529 | thinLTOResolvePrevailingInIndex(C: Conf, Index, isPrevailing, recordNewLinkage, |
530 | GUIDPreservedSymbols); |
531 | } |
532 | |
533 | // Initialize the TargetMachine builder for a given Triple |
534 | static void initTMBuilder(TargetMachineBuilder &TMBuilder, |
535 | const Triple &TheTriple) { |
536 | if (TMBuilder.MCpu.empty()) |
537 | TMBuilder.MCpu = lto::getThinLTODefaultCPU(TheTriple); |
538 | TMBuilder.TheTriple = std::move(TheTriple); |
539 | } |
540 | |
541 | void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { |
542 | MemoryBufferRef Buffer(Data, Identifier); |
543 | |
544 | auto InputOrError = lto::InputFile::create(Object: Buffer); |
545 | if (!InputOrError) |
546 | report_fatal_error(reason: Twine("ThinLTO cannot create input file: " ) + |
547 | toString(E: InputOrError.takeError())); |
548 | |
549 | auto TripleStr = (*InputOrError)->getTargetTriple(); |
550 | Triple TheTriple(TripleStr); |
551 | |
552 | if (Modules.empty()) |
553 | initTMBuilder(TMBuilder, TheTriple: Triple(TheTriple)); |
554 | else if (TMBuilder.TheTriple != TheTriple) { |
555 | if (!TMBuilder.TheTriple.isCompatibleWith(Other: TheTriple)) |
556 | report_fatal_error(reason: "ThinLTO modules with incompatible triples not " |
557 | "supported" ); |
558 | initTMBuilder(TMBuilder, TheTriple: Triple(TMBuilder.TheTriple.merge(Other: TheTriple))); |
559 | } |
560 | |
561 | Modules.emplace_back(args: std::move(*InputOrError)); |
562 | } |
563 | |
564 | void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { |
565 | PreservedSymbols.insert(key: Name); |
566 | } |
567 | |
568 | void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { |
569 | // FIXME: At the moment, we don't take advantage of this extra information, |
570 | // we're conservatively considering cross-references as preserved. |
571 | // CrossReferencedSymbols.insert(Name); |
572 | PreservedSymbols.insert(key: Name); |
573 | } |
574 | |
575 | // TargetMachine factory |
576 | std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { |
577 | std::string ErrMsg; |
578 | const Target *TheTarget = TargetRegistry::lookupTarget(TheTriple, Error&: ErrMsg); |
579 | if (!TheTarget) { |
580 | report_fatal_error(reason: Twine("Can't load target for this Triple: " ) + ErrMsg); |
581 | } |
582 | |
583 | // Use MAttr as the default set of features. |
584 | SubtargetFeatures Features(MAttr); |
585 | Features.getDefaultSubtargetFeatures(Triple: TheTriple); |
586 | std::string FeatureStr = Features.getString(); |
587 | |
588 | std::unique_ptr<TargetMachine> TM( |
589 | TheTarget->createTargetMachine(TT: TheTriple, CPU: MCpu, Features: FeatureStr, Options, |
590 | RM: RelocModel, CM: std::nullopt, OL: CGOptLevel)); |
591 | assert(TM && "Cannot create target machine" ); |
592 | |
593 | return TM; |
594 | } |
595 | |
596 | /** |
597 | * Produce the combined summary index from all the bitcode files: |
598 | * "thin-link". |
599 | */ |
600 | std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { |
601 | std::unique_ptr<ModuleSummaryIndex> CombinedIndex = |
602 | std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/args: false); |
603 | for (auto &Mod : Modules) { |
604 | auto &M = Mod->getSingleBitcodeModule(); |
605 | if (Error Err = M.readSummary(CombinedIndex&: *CombinedIndex, ModulePath: Mod->getName())) { |
606 | // FIXME diagnose |
607 | logAllUnhandledErrors( |
608 | E: std::move(Err), OS&: errs(), |
609 | ErrorBanner: "error: can't create module summary index for buffer: " ); |
610 | return nullptr; |
611 | } |
612 | } |
613 | return CombinedIndex; |
614 | } |
615 | |
616 | namespace { |
617 | struct IsExported { |
618 | const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists; |
619 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols; |
620 | |
621 | IsExported( |
622 | const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists, |
623 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) |
624 | : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} |
625 | |
626 | bool operator()(StringRef ModuleIdentifier, ValueInfo VI) const { |
627 | const auto &ExportList = ExportLists.find(Val: ModuleIdentifier); |
628 | return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) || |
629 | GUIDPreservedSymbols.count(V: VI.getGUID()); |
630 | } |
631 | }; |
632 | |
633 | struct IsPrevailing { |
634 | const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy; |
635 | IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> |
636 | &PrevailingCopy) |
637 | : PrevailingCopy(PrevailingCopy) {} |
638 | |
639 | bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { |
640 | const auto &Prevailing = PrevailingCopy.find(Val: GUID); |
641 | // Not in map means that there was only one copy, which must be prevailing. |
642 | if (Prevailing == PrevailingCopy.end()) |
643 | return true; |
644 | return Prevailing->second == S; |
645 | }; |
646 | }; |
647 | } // namespace |
648 | |
649 | static void computeDeadSymbolsInIndex( |
650 | ModuleSummaryIndex &Index, |
651 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
652 | // We have no symbols resolution available. And can't do any better now in the |
653 | // case where the prevailing symbol is in a native object. It can be refined |
654 | // with linker information in the future. |
655 | auto isPrevailing = [&](GlobalValue::GUID G) { |
656 | return PrevailingType::Unknown; |
657 | }; |
658 | computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing, |
659 | /* ImportEnabled = */ true); |
660 | } |
661 | |
662 | /** |
663 | * Perform promotion and renaming of exported internal functions. |
664 | * Index is updated to reflect linkage changes from weak resolution. |
665 | */ |
666 | void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, |
667 | const lto::InputFile &File) { |
668 | auto ModuleCount = Index.modulePaths().size(); |
669 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
670 | |
671 | // Collect for each module the list of function it defines (GUID -> Summary). |
672 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries; |
673 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
674 | |
675 | // Convert the preserved symbols set from string to GUID |
676 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
677 | File, PreservedSymbols, TheTriple: TheModule.getTargetTriple()); |
678 | |
679 | // Add used symbol to the preserved symbols. |
680 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
681 | |
682 | // Compute "dead" symbols, we don't want to import/export these! |
683 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
684 | |
685 | // Compute prevailing symbols |
686 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
687 | computePrevailingCopies(Index, PrevailingCopy); |
688 | |
689 | // Generate import/export list |
690 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
691 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
692 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
693 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
694 | ExportLists); |
695 | |
696 | // Resolve prevailing symbols |
697 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
698 | resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, |
699 | PrevailingCopy); |
700 | |
701 | thinLTOFinalizeInModule(TheModule, |
702 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], |
703 | /*PropagateAttrs=*/false); |
704 | |
705 | // Promote the exported values in the index, so that they are promoted |
706 | // in the module. |
707 | thinLTOInternalizeAndPromoteInIndex( |
708 | Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
709 | isPrevailing: IsPrevailing(PrevailingCopy)); |
710 | |
711 | // FIXME Set ClearDSOLocalOnDeclarations. |
712 | promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); |
713 | } |
714 | |
715 | /** |
716 | * Perform cross-module importing for the module identified by ModuleIdentifier. |
717 | */ |
718 | void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, |
719 | ModuleSummaryIndex &Index, |
720 | const lto::InputFile &File) { |
721 | auto ModuleMap = generateModuleMap(Modules); |
722 | auto ModuleCount = Index.modulePaths().size(); |
723 | |
724 | // Collect for each module the list of function it defines (GUID -> Summary). |
725 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
726 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
727 | |
728 | // Convert the preserved symbols set from string to GUID |
729 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
730 | File, PreservedSymbols, TheTriple: TheModule.getTargetTriple()); |
731 | |
732 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
733 | |
734 | // Compute "dead" symbols, we don't want to import/export these! |
735 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
736 | |
737 | // Compute prevailing symbols |
738 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
739 | computePrevailingCopies(Index, PrevailingCopy); |
740 | |
741 | // Generate import/export list |
742 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
743 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
744 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
745 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
746 | ExportLists); |
747 | auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; |
748 | |
749 | // FIXME Set ClearDSOLocalOnDeclarations. |
750 | crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, |
751 | /*ClearDSOLocalOnDeclarations=*/false); |
752 | } |
753 | |
754 | /** |
755 | * Compute the list of summaries needed for importing into module. |
756 | */ |
757 | void ThinLTOCodeGenerator::gatherImportedSummariesForModule( |
758 | Module &TheModule, ModuleSummaryIndex &Index, |
759 | ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, |
760 | GVSummaryPtrSet &DecSummaries, const lto::InputFile &File) { |
761 | auto ModuleCount = Index.modulePaths().size(); |
762 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
763 | |
764 | // Collect for each module the list of function it defines (GUID -> Summary). |
765 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
766 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
767 | |
768 | // Convert the preserved symbols set from string to GUID |
769 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
770 | File, PreservedSymbols, TheTriple: TheModule.getTargetTriple()); |
771 | |
772 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
773 | |
774 | // Compute "dead" symbols, we don't want to import/export these! |
775 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
776 | |
777 | // Compute prevailing symbols |
778 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
779 | computePrevailingCopies(Index, PrevailingCopy); |
780 | |
781 | // Generate import/export list |
782 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
783 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
784 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
785 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
786 | ExportLists); |
787 | |
788 | llvm::gatherImportedSummariesForModule( |
789 | ModulePath: ModuleIdentifier, ModuleToDefinedGVSummaries, |
790 | ImportList: ImportLists[ModuleIdentifier], ModuleToSummariesForIndex, DecSummaries); |
791 | } |
792 | |
793 | /** |
794 | * Emit the list of files needed for importing into module. |
795 | */ |
796 | void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName, |
797 | ModuleSummaryIndex &Index, |
798 | const lto::InputFile &File) { |
799 | auto ModuleCount = Index.modulePaths().size(); |
800 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
801 | |
802 | // Collect for each module the list of function it defines (GUID -> Summary). |
803 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
804 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
805 | |
806 | // Convert the preserved symbols set from string to GUID |
807 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
808 | File, PreservedSymbols, TheTriple: TheModule.getTargetTriple()); |
809 | |
810 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
811 | |
812 | // Compute "dead" symbols, we don't want to import/export these! |
813 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
814 | |
815 | // Compute prevailing symbols |
816 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
817 | computePrevailingCopies(Index, PrevailingCopy); |
818 | |
819 | // Generate import/export list |
820 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
821 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
822 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
823 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
824 | ExportLists); |
825 | |
826 | // 'EmitImportsFiles' emits the list of modules from which to import from, and |
827 | // the set of keys in `ModuleToSummariesForIndex` should be a superset of keys |
828 | // in `DecSummaries`, so no need to use `DecSummaries` in `EmitImportsFiles`. |
829 | GVSummaryPtrSet DecSummaries; |
830 | ModuleToSummariesForIndexTy ModuleToSummariesForIndex; |
831 | llvm::gatherImportedSummariesForModule( |
832 | ModulePath: ModuleIdentifier, ModuleToDefinedGVSummaries, |
833 | ImportList: ImportLists[ModuleIdentifier], ModuleToSummariesForIndex, DecSummaries); |
834 | |
835 | if (Error EC = EmitImportsFiles(ModulePath: ModuleIdentifier, OutputFilename: OutputName, |
836 | ModuleToSummariesForIndex)) |
837 | report_fatal_error(reason: Twine("Failed to open " ) + OutputName + |
838 | " to save imports lists\n" ); |
839 | } |
840 | |
841 | /** |
842 | * Perform internalization. Runs promote and internalization together. |
843 | * Index is updated to reflect linkage changes. |
844 | */ |
845 | void ThinLTOCodeGenerator::internalize(Module &TheModule, |
846 | ModuleSummaryIndex &Index, |
847 | const lto::InputFile &File) { |
848 | initTMBuilder(TMBuilder, TheTriple: TheModule.getTargetTriple()); |
849 | auto ModuleCount = Index.modulePaths().size(); |
850 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
851 | |
852 | // Convert the preserved symbols set from string to GUID |
853 | auto GUIDPreservedSymbols = |
854 | computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple: TMBuilder.TheTriple); |
855 | |
856 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
857 | |
858 | // Collect for each module the list of function it defines (GUID -> Summary). |
859 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
860 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
861 | |
862 | // Compute "dead" symbols, we don't want to import/export these! |
863 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
864 | |
865 | // Compute prevailing symbols |
866 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
867 | computePrevailingCopies(Index, PrevailingCopy); |
868 | |
869 | // Generate import/export list |
870 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
871 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
872 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
873 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
874 | ExportLists); |
875 | auto &ExportList = ExportLists[ModuleIdentifier]; |
876 | |
877 | // Be friendly and don't nuke totally the module when the client didn't |
878 | // supply anything to preserve. |
879 | if (ExportList.empty() && GUIDPreservedSymbols.empty()) |
880 | return; |
881 | |
882 | // Resolve prevailing symbols |
883 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
884 | resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, |
885 | PrevailingCopy); |
886 | |
887 | // Promote the exported values in the index, so that they are promoted |
888 | // in the module. |
889 | thinLTOInternalizeAndPromoteInIndex( |
890 | Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
891 | isPrevailing: IsPrevailing(PrevailingCopy)); |
892 | |
893 | // FIXME Set ClearDSOLocalOnDeclarations. |
894 | promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); |
895 | |
896 | // Internalization |
897 | thinLTOFinalizeInModule(TheModule, |
898 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], |
899 | /*PropagateAttrs=*/false); |
900 | |
901 | thinLTOInternalizeModule(TheModule, |
902 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier]); |
903 | } |
904 | |
905 | /** |
906 | * Perform post-importing ThinLTO optimizations. |
907 | */ |
908 | void ThinLTOCodeGenerator::optimize(Module &TheModule) { |
909 | initTMBuilder(TMBuilder, TheTriple: TheModule.getTargetTriple()); |
910 | |
911 | // Optimize now |
912 | optimizeModule(TheModule, TM&: *TMBuilder.create(), OptLevel, Freestanding, |
913 | DebugPassManager, Index: nullptr); |
914 | } |
915 | |
916 | /// Write out the generated object file, either from CacheEntryPath or from |
917 | /// OutputBuffer, preferring hard-link when possible. |
918 | /// Returns the path to the generated file in SavedObjectsDirectoryPath. |
919 | std::string |
920 | ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, |
921 | const MemoryBuffer &OutputBuffer) { |
922 | auto ArchName = TMBuilder.TheTriple.getArchName(); |
923 | SmallString<128> OutputPath(SavedObjectsDirectoryPath); |
924 | llvm::sys::path::append(path&: OutputPath, |
925 | a: Twine(count) + "." + ArchName + ".thinlto.o" ); |
926 | OutputPath.c_str(); // Ensure the string is null terminated. |
927 | if (sys::fs::exists(Path: OutputPath)) |
928 | sys::fs::remove(path: OutputPath); |
929 | |
930 | // We don't return a memory buffer to the linker, just a list of files. |
931 | if (!CacheEntryPath.empty()) { |
932 | // Cache is enabled, hard-link the entry (or copy if hard-link fails). |
933 | auto Err = sys::fs::create_hard_link(to: CacheEntryPath, from: OutputPath); |
934 | if (!Err) |
935 | return std::string(OutputPath); |
936 | // Hard linking failed, try to copy. |
937 | Err = sys::fs::copy_file(From: CacheEntryPath, To: OutputPath); |
938 | if (!Err) |
939 | return std::string(OutputPath); |
940 | // Copy failed (could be because the CacheEntry was removed from the cache |
941 | // in the meantime by another process), fall back and try to write down the |
942 | // buffer to the output. |
943 | errs() << "remark: can't link or copy from cached entry '" << CacheEntryPath |
944 | << "' to '" << OutputPath << "'\n" ; |
945 | } |
946 | // No cache entry, just write out the buffer. |
947 | std::error_code Err; |
948 | raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None); |
949 | if (Err) |
950 | report_fatal_error(reason: Twine("Can't open output '" ) + OutputPath + "'\n" ); |
951 | OS << OutputBuffer.getBuffer(); |
952 | return std::string(OutputPath); |
953 | } |
954 | |
955 | // Main entry point for the ThinLTO processing |
956 | void ThinLTOCodeGenerator::run() { |
957 | timeTraceProfilerBegin(Name: "ThinLink" , Detail: StringRef("" )); |
958 | auto TimeTraceScopeExit = llvm::make_scope_exit(F: []() { |
959 | if (llvm::timeTraceProfilerEnabled()) |
960 | llvm::timeTraceProfilerEnd(); |
961 | }); |
962 | // Prepare the resulting object vector |
963 | assert(ProducedBinaries.empty() && "The generator should not be reused" ); |
964 | if (SavedObjectsDirectoryPath.empty()) |
965 | ProducedBinaries.resize(new_size: Modules.size()); |
966 | else { |
967 | sys::fs::create_directories(path: SavedObjectsDirectoryPath); |
968 | bool IsDir; |
969 | sys::fs::is_directory(path: SavedObjectsDirectoryPath, result&: IsDir); |
970 | if (!IsDir) |
971 | report_fatal_error(reason: Twine("Unexistent dir: '" ) + SavedObjectsDirectoryPath + "'" ); |
972 | ProducedBinaryFiles.resize(new_size: Modules.size()); |
973 | } |
974 | |
975 | if (CodeGenOnly) { |
976 | // Perform only parallel codegen and return. |
977 | DefaultThreadPool Pool; |
978 | int count = 0; |
979 | for (auto &Mod : Modules) { |
980 | Pool.async(F: [&](int count) { |
981 | LLVMContext Context; |
982 | Context.setDiscardValueNames(LTODiscardValueNames); |
983 | |
984 | // Parse module now |
985 | auto TheModule = loadModuleFromInput(Input: Mod.get(), Context, Lazy: false, |
986 | /*IsImporting*/ false); |
987 | |
988 | // CodeGen |
989 | auto OutputBuffer = codegenModule(TheModule&: *TheModule, TM&: *TMBuilder.create()); |
990 | if (SavedObjectsDirectoryPath.empty()) |
991 | ProducedBinaries[count] = std::move(OutputBuffer); |
992 | else |
993 | ProducedBinaryFiles[count] = |
994 | writeGeneratedObject(count, CacheEntryPath: "" , OutputBuffer: *OutputBuffer); |
995 | }, ArgList: count++); |
996 | } |
997 | |
998 | return; |
999 | } |
1000 | |
1001 | // Sequential linking phase |
1002 | auto Index = linkCombinedIndex(); |
1003 | |
1004 | // Save temps: index. |
1005 | if (!SaveTempsDir.empty()) { |
1006 | auto SaveTempPath = SaveTempsDir + "index.bc" ; |
1007 | std::error_code EC; |
1008 | raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); |
1009 | if (EC) |
1010 | report_fatal_error(reason: Twine("Failed to open " ) + SaveTempPath + |
1011 | " to save optimized bitcode\n" ); |
1012 | writeIndexToFile(Index: *Index, Out&: OS); |
1013 | } |
1014 | |
1015 | |
1016 | // Prepare the module map. |
1017 | auto ModuleMap = generateModuleMap(Modules); |
1018 | auto ModuleCount = Modules.size(); |
1019 | |
1020 | // Collect for each module the list of function it defines (GUID -> Summary). |
1021 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
1022 | Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
1023 | |
1024 | // Convert the preserved symbols set from string to GUID, this is needed for |
1025 | // computing the caching hash and the internalization. |
1026 | DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; |
1027 | for (const auto &M : Modules) |
1028 | computeGUIDPreservedSymbols(File: *M, PreservedSymbols, TheTriple: TMBuilder.TheTriple, |
1029 | GUIDs&: GUIDPreservedSymbols); |
1030 | |
1031 | // Add used symbol from inputs to the preserved symbols. |
1032 | for (const auto &M : Modules) |
1033 | addUsedSymbolToPreservedGUID(File: *M, PreservedGUID&: GUIDPreservedSymbols); |
1034 | |
1035 | // Compute "dead" symbols, we don't want to import/export these! |
1036 | computeDeadSymbolsInIndex(Index&: *Index, GUIDPreservedSymbols); |
1037 | |
1038 | // Currently there is no support for enabling whole program visibility via a |
1039 | // linker option in the old LTO API, but this call allows it to be specified |
1040 | // via the internal option. Must be done before WPD below. |
1041 | if (hasWholeProgramVisibility(/* WholeProgramVisibilityEnabledInLTO */ false)) |
1042 | Index->setWithWholeProgramVisibility(); |
1043 | |
1044 | // FIXME: This needs linker information via a TBD new interface |
1045 | updateVCallVisibilityInIndex(Index&: *Index, |
1046 | /*WholeProgramVisibilityEnabledInLTO=*/false, |
1047 | // FIXME: These need linker information via a |
1048 | // TBD new interface. |
1049 | /*DynamicExportSymbols=*/{}, |
1050 | /*VisibleToRegularObjSymbols=*/{}); |
1051 | |
1052 | // Perform index-based WPD. This will return immediately if there are |
1053 | // no index entries in the typeIdMetadata map (e.g. if we are instead |
1054 | // performing IR-based WPD in hybrid regular/thin LTO mode). |
1055 | std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap; |
1056 | std::set<GlobalValue::GUID> ExportedGUIDs; |
1057 | runWholeProgramDevirtOnIndex(Summary&: *Index, ExportedGUIDs, LocalWPDTargetsMap); |
1058 | GUIDPreservedSymbols.insert_range(R&: ExportedGUIDs); |
1059 | |
1060 | // Compute prevailing symbols |
1061 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
1062 | computePrevailingCopies(Index: *Index, PrevailingCopy); |
1063 | |
1064 | // Collect the import/export lists for all modules from the call-graph in the |
1065 | // combined index. |
1066 | FunctionImporter::ImportListsTy ImportLists(ModuleCount); |
1067 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
1068 | ComputeCrossModuleImport(Index: *Index, ModuleToDefinedGVSummaries, |
1069 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
1070 | ExportLists); |
1071 | |
1072 | // We use a std::map here to be able to have a defined ordering when |
1073 | // producing a hash for the cache entry. |
1074 | // FIXME: we should be able to compute the caching hash for the entry based |
1075 | // on the index, and nuke this map. |
1076 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
1077 | |
1078 | // Resolve prevailing symbols, this has to be computed early because it |
1079 | // impacts the caching. |
1080 | resolvePrevailingInIndex(Index&: *Index, ResolvedODR, GUIDPreservedSymbols, |
1081 | PrevailingCopy); |
1082 | |
1083 | // Use global summary-based analysis to identify symbols that can be |
1084 | // internalized (because they aren't exported or preserved as per callback). |
1085 | // Changes are made in the index, consumed in the ThinLTO backends. |
1086 | updateIndexWPDForExports(Summary&: *Index, |
1087 | isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
1088 | LocalWPDTargetsMap); |
1089 | thinLTOInternalizeAndPromoteInIndex( |
1090 | Index&: *Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
1091 | isPrevailing: IsPrevailing(PrevailingCopy)); |
1092 | |
1093 | thinLTOPropagateFunctionAttrs(Index&: *Index, isPrevailing: IsPrevailing(PrevailingCopy)); |
1094 | |
1095 | // Make sure that every module has an entry in the ExportLists, ImportList, |
1096 | // GVSummary and ResolvedODR maps to enable threaded access to these maps |
1097 | // below. |
1098 | for (auto &Module : Modules) { |
1099 | auto ModuleIdentifier = Module->getName(); |
1100 | ExportLists[ModuleIdentifier]; |
1101 | ImportLists[ModuleIdentifier]; |
1102 | ResolvedODR[ModuleIdentifier]; |
1103 | ModuleToDefinedGVSummaries[ModuleIdentifier]; |
1104 | } |
1105 | |
1106 | std::vector<BitcodeModule *> ModulesVec; |
1107 | ModulesVec.reserve(n: Modules.size()); |
1108 | for (auto &Mod : Modules) |
1109 | ModulesVec.push_back(x: &Mod->getSingleBitcodeModule()); |
1110 | std::vector<int> ModulesOrdering = lto::generateModulesOrdering(R: ModulesVec); |
1111 | |
1112 | if (llvm::timeTraceProfilerEnabled()) |
1113 | llvm::timeTraceProfilerEnd(); |
1114 | |
1115 | TimeTraceScopeExit.release(); |
1116 | |
1117 | // Parallel optimizer + codegen |
1118 | { |
1119 | DefaultThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount)); |
1120 | for (auto IndexCount : ModulesOrdering) { |
1121 | auto &Mod = Modules[IndexCount]; |
1122 | Pool.async(F: [&](int count) { |
1123 | auto ModuleIdentifier = Mod->getName(); |
1124 | auto &ExportList = ExportLists[ModuleIdentifier]; |
1125 | |
1126 | auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier]; |
1127 | |
1128 | // The module may be cached, this helps handling it. |
1129 | ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, |
1130 | ImportLists[ModuleIdentifier], ExportList, |
1131 | ResolvedODR[ModuleIdentifier], |
1132 | DefinedGVSummaries, OptLevel, Freestanding, |
1133 | TMBuilder); |
1134 | auto CacheEntryPath = CacheEntry.getEntryPath(); |
1135 | |
1136 | { |
1137 | auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); |
1138 | LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss" ) |
1139 | << " '" << CacheEntryPath << "' for buffer " |
1140 | << count << " " << ModuleIdentifier << "\n" ); |
1141 | |
1142 | if (ErrOrBuffer) { |
1143 | // Cache Hit! |
1144 | if (SavedObjectsDirectoryPath.empty()) |
1145 | ProducedBinaries[count] = std::move(ErrOrBuffer.get()); |
1146 | else |
1147 | ProducedBinaryFiles[count] = writeGeneratedObject( |
1148 | count, CacheEntryPath, OutputBuffer: *ErrOrBuffer.get()); |
1149 | return; |
1150 | } |
1151 | } |
1152 | |
1153 | LLVMContext Context; |
1154 | Context.setDiscardValueNames(LTODiscardValueNames); |
1155 | Context.enableDebugTypeODRUniquing(); |
1156 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
1157 | Context, RemarksFilename, RemarksPasses, RemarksFormat, |
1158 | RemarksWithHotness, RemarksHotnessThreshold, Count: count); |
1159 | if (!DiagFileOrErr) { |
1160 | errs() << "Error: " << toString(E: DiagFileOrErr.takeError()) << "\n" ; |
1161 | report_fatal_error(reason: "ThinLTO: Can't get an output file for the " |
1162 | "remarks" ); |
1163 | } |
1164 | |
1165 | // Parse module now |
1166 | auto TheModule = loadModuleFromInput(Input: Mod.get(), Context, Lazy: false, |
1167 | /*IsImporting*/ false); |
1168 | |
1169 | // Save temps: original file. |
1170 | saveTempBitcode(TheModule: *TheModule, TempDir: SaveTempsDir, count, Suffix: ".0.original.bc" ); |
1171 | |
1172 | auto &ImportList = ImportLists[ModuleIdentifier]; |
1173 | // Run the main process now, and generates a binary |
1174 | auto OutputBuffer = ProcessThinLTOModule( |
1175 | TheModule&: *TheModule, Index&: *Index, ModuleMap, TM&: *TMBuilder.create(), ImportList, |
1176 | ExportList, GUIDPreservedSymbols, |
1177 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, |
1178 | DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count, |
1179 | DebugPassManager); |
1180 | |
1181 | // Commit to the cache (if enabled) |
1182 | CacheEntry.write(OutputBuffer: *OutputBuffer); |
1183 | |
1184 | if (SavedObjectsDirectoryPath.empty()) { |
1185 | // We need to generated a memory buffer for the linker. |
1186 | if (!CacheEntryPath.empty()) { |
1187 | // When cache is enabled, reload from the cache if possible. |
1188 | // Releasing the buffer from the heap and reloading it from the |
1189 | // cache file with mmap helps us to lower memory pressure. |
1190 | // The freed memory can be used for the next input file. |
1191 | // The final binary link will read from the VFS cache (hopefully!) |
1192 | // or from disk (if the memory pressure was too high). |
1193 | auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); |
1194 | if (auto EC = ReloadedBufferOrErr.getError()) { |
1195 | // On error, keep the preexisting buffer and print a diagnostic. |
1196 | errs() << "remark: can't reload cached file '" << CacheEntryPath |
1197 | << "': " << EC.message() << "\n" ; |
1198 | } else { |
1199 | OutputBuffer = std::move(*ReloadedBufferOrErr); |
1200 | } |
1201 | } |
1202 | ProducedBinaries[count] = std::move(OutputBuffer); |
1203 | return; |
1204 | } |
1205 | ProducedBinaryFiles[count] = writeGeneratedObject( |
1206 | count, CacheEntryPath, OutputBuffer: *OutputBuffer); |
1207 | }, ArgList&: IndexCount); |
1208 | } |
1209 | } |
1210 | |
1211 | pruneCache(Path: CacheOptions.Path, Policy: CacheOptions.Policy, Files: ProducedBinaries); |
1212 | |
1213 | // If statistics were requested, print them out now. |
1214 | if (llvm::AreStatisticsEnabled()) |
1215 | llvm::PrintStatistics(); |
1216 | reportAndResetTimings(); |
1217 | } |
1218 | |