1//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions and classes used to support LTO.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/LTO/LTO.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/ScopeExit.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/StableHashing.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Analysis/OptimizationRemarkEmitter.h"
21#include "llvm/Analysis/StackSafetyAnalysis.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Bitcode/BitcodeReader.h"
24#include "llvm/Bitcode/BitcodeWriter.h"
25#include "llvm/CGData/CodeGenData.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/Config/llvm-config.h"
28#include "llvm/IR/AutoUpgrade.h"
29#include "llvm/IR/DiagnosticPrinter.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/LLVMRemarkStreamer.h"
32#include "llvm/IR/LegacyPassManager.h"
33#include "llvm/IR/Mangler.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/RuntimeLibcalls.h"
36#include "llvm/LTO/LTOBackend.h"
37#include "llvm/Linker/IRMover.h"
38#include "llvm/MC/TargetRegistry.h"
39#include "llvm/Object/IRObjectFile.h"
40#include "llvm/Support/Caching.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
43#include "llvm/Support/Error.h"
44#include "llvm/Support/FileSystem.h"
45#include "llvm/Support/JSON.h"
46#include "llvm/Support/MemoryBuffer.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/Process.h"
49#include "llvm/Support/SHA1.h"
50#include "llvm/Support/Signals.h"
51#include "llvm/Support/SourceMgr.h"
52#include "llvm/Support/ThreadPool.h"
53#include "llvm/Support/Threading.h"
54#include "llvm/Support/TimeProfiler.h"
55#include "llvm/Support/ToolOutputFile.h"
56#include "llvm/Support/VCSRevision.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Target/TargetOptions.h"
59#include "llvm/Transforms/IPO.h"
60#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
61#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
62#include "llvm/Transforms/Utils/FunctionImportUtils.h"
63#include "llvm/Transforms/Utils/SplitModule.h"
64
65#include <optional>
66#include <set>
67
68using namespace llvm;
69using namespace lto;
70using namespace object;
71
72#define DEBUG_TYPE "lto"
73
74Error LTO::setupOptimizationRemarks() {
75 // Setup the remark streamer according to the provided configuration.
76 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
77 Context&: RegularLTO.Ctx, RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses,
78 RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness,
79 RemarksHotnessThreshold: Conf.RemarksHotnessThreshold);
80 if (!DiagFileOrErr)
81 return DiagFileOrErr.takeError();
82
83 DiagnosticOutputFile = std::move(*DiagFileOrErr);
84
85 // Create a dummy function to serve as a context for LTO-link remarks.
86 // This is required because OptimizationRemark requires a valid Function,
87 // and in ThinLTO we may not have any IR functions available during the
88 // thin link. Host it in a private module to avoid interfering with the LTO
89 // process.
90 if (!LinkerRemarkFunction) {
91 DummyModule = std::make_unique<Module>(args: "remark_dummy", args&: RegularLTO.Ctx);
92 LinkerRemarkFunction = Function::Create(
93 Ty: FunctionType::get(Result: Type::getVoidTy(C&: RegularLTO.Ctx), isVarArg: false),
94 Linkage: GlobalValue::ExternalLinkage, N: "thinlto_remark_dummy",
95 M: DummyModule.get());
96 }
97
98 return Error::success();
99}
100
101void LTO::emitRemark(OptimizationRemark &Remark) {
102 const Function &F = Remark.getFunction();
103 OptimizationRemarkEmitter ORE(const_cast<Function *>(&F));
104 ORE.emit(OptDiag&: Remark);
105}
106
107static cl::opt<bool>
108 DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(Val: false), cl::Hidden,
109 cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
110namespace llvm {
111extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
112extern cl::opt<bool> ForceImportAll;
113extern cl::opt<bool> AlwaysRenamePromotedLocals;
114} // end namespace llvm
115
116namespace llvm {
117/// Enable global value internalization in LTO.
118cl::opt<bool> EnableLTOInternalization(
119 "enable-lto-internalization", cl::init(Val: true), cl::Hidden,
120 cl::desc("Enable global value internalization in LTO"));
121
122static cl::opt<bool>
123 LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(Val: false), cl::Hidden,
124 cl::desc("Keep copies of symbols in LTO indexing"));
125
126/// Indicate we are linking with an allocator that supports hot/cold operator
127/// new interfaces.
128extern cl::opt<bool> SupportsHotColdNew;
129
130/// Enable MemProf context disambiguation for thin link.
131extern cl::opt<bool> EnableMemProfContextDisambiguation;
132} // namespace llvm
133
134// Computes a unique hash for the Module considering the current list of
135// export/import and other global analysis results.
136// Returns the hash in its hexadecimal representation.
137std::string llvm::computeLTOCacheKey(
138 const Config &Conf, const ModuleSummaryIndex &Index, StringRef ModuleID,
139 const FunctionImporter::ImportMapTy &ImportList,
140 const FunctionImporter::ExportSetTy &ExportList,
141 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
142 const GVSummaryMapTy &DefinedGlobals,
143 const DenseSet<GlobalValue::GUID> &CfiFunctionDefs,
144 const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) {
145 // Compute the unique hash for this entry.
146 // This is based on the current compiler version, the module itself, the
147 // export list, the hash for every single module in the import list, the
148 // list of ResolvedODR for the module, and the list of preserved symbols.
149 SHA1 Hasher;
150
151 // Start with the compiler revision
152 Hasher.update(LLVM_VERSION_STRING);
153#ifdef LLVM_REVISION
154 Hasher.update(LLVM_REVISION);
155#endif
156
157 // Include the parts of the LTO configuration that affect code generation.
158 auto AddString = [&](StringRef Str) {
159 Hasher.update(Str);
160 Hasher.update(Data: ArrayRef<uint8_t>{0});
161 };
162 auto AddUnsigned = [&](unsigned I) {
163 uint8_t Data[4];
164 support::endian::write32le(P: Data, V: I);
165 Hasher.update(Data);
166 };
167 auto AddUint64 = [&](uint64_t I) {
168 uint8_t Data[8];
169 support::endian::write64le(P: Data, V: I);
170 Hasher.update(Data);
171 };
172 auto AddUint8 = [&](const uint8_t I) {
173 Hasher.update(Data: ArrayRef<uint8_t>(&I, 1));
174 };
175 AddString(Conf.CPU);
176 // FIXME: Hash more of Options. For now all clients initialize Options from
177 // command-line flags (which is unsupported in production), but may set
178 // X86RelaxRelocations. The clang driver can also pass FunctionSections,
179 // DataSections and DebuggerTuning via command line flags.
180 AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations);
181 AddUnsigned(Conf.Options.FunctionSections);
182 AddUnsigned(Conf.Options.DataSections);
183 AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
184 for (auto &A : Conf.MAttrs)
185 AddString(A);
186 if (Conf.RelocModel)
187 AddUnsigned(*Conf.RelocModel);
188 else
189 AddUnsigned(-1);
190 if (Conf.CodeModel)
191 AddUnsigned(*Conf.CodeModel);
192 else
193 AddUnsigned(-1);
194 for (const auto &S : Conf.MllvmArgs)
195 AddString(S);
196 AddUnsigned(static_cast<int>(Conf.CGOptLevel));
197 AddUnsigned(static_cast<int>(Conf.CGFileType));
198 AddUnsigned(Conf.OptLevel);
199 AddUnsigned(Conf.Freestanding);
200 AddString(Conf.OptPipeline);
201 AddString(Conf.AAPipeline);
202 AddString(Conf.OverrideTriple);
203 AddString(Conf.DefaultTriple);
204 AddString(Conf.DwoDir);
205 AddUint8(Conf.Dtlto);
206
207 // Include the hash for the current module
208 auto ModHash = Index.getModuleHash(ModPath: ModuleID);
209 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
210
211 // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
212 // used to compute cache key, we could omit hashing `ExportList` here.
213 std::vector<uint64_t> ExportsGUID;
214 ExportsGUID.reserve(n: ExportList.size());
215 for (const auto &VI : ExportList)
216 ExportsGUID.push_back(x: VI.getGUID());
217
218 // Sort the export list elements GUIDs.
219 llvm::sort(C&: ExportsGUID);
220 for (auto GUID : ExportsGUID)
221 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
222
223 // Order using module hash, to be both independent of module name and
224 // module order.
225 auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
226 const std::pair<StringRef, GlobalValue::GUID> &R) {
227 return std::make_pair(x: Index.getModule(ModPath: L.first)->second, y: L.second) <
228 std::make_pair(x: Index.getModule(ModPath: R.first)->second, y: R.second);
229 };
230 FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
231
232 // Count the number of imports for each source module.
233 DenseMap<StringRef, unsigned> ModuleToNumImports;
234 for (const auto &[FromModule, GUID, Type] : SortedImportList)
235 ++ModuleToNumImports[FromModule];
236
237 std::optional<StringRef> LastModule;
238 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
239 if (LastModule != FromModule) {
240 // Include the hash for every module we import functions from. The set of
241 // imported symbols for each module may affect code generation and is
242 // sensitive to link order, so include that as well.
243 LastModule = FromModule;
244 auto ModHash = Index.getModule(ModPath: FromModule)->second;
245 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
246 AddUint64(ModuleToNumImports[FromModule]);
247 }
248 AddUint64(GUID);
249 AddUint8(Type);
250 }
251
252 // Include the hash for the resolved ODR.
253 for (auto &Entry : ResolvedODR) {
254 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
255 sizeof(GlobalValue::GUID)));
256 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
257 sizeof(GlobalValue::LinkageTypes)));
258 }
259
260 // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or
261 // defined in this module.
262 std::set<GlobalValue::GUID> UsedCfiDefs;
263 std::set<GlobalValue::GUID> UsedCfiDecls;
264
265 // Typeids used in this module.
266 std::set<GlobalValue::GUID> UsedTypeIds;
267
268 auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) {
269 if (CfiFunctionDefs.contains(V: ValueGUID))
270 UsedCfiDefs.insert(x: ValueGUID);
271 if (CfiFunctionDecls.contains(V: ValueGUID))
272 UsedCfiDecls.insert(x: ValueGUID);
273 };
274
275 auto AddUsedThings = [&](GlobalValueSummary *GS) {
276 if (!GS) return;
277 AddUnsigned(GS->getVisibility());
278 AddUnsigned(GS->isLive());
279 AddUnsigned(GS->canAutoHide());
280 for (const ValueInfo &VI : GS->refs()) {
281 AddUnsigned(VI.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
282 AddUsedCfiGlobal(VI.getGUID());
283 }
284 if (auto *GVS = dyn_cast<GlobalVarSummary>(Val: GS)) {
285 AddUnsigned(GVS->maybeReadOnly());
286 AddUnsigned(GVS->maybeWriteOnly());
287 }
288 if (auto *FS = dyn_cast<FunctionSummary>(Val: GS)) {
289 for (auto &TT : FS->type_tests())
290 UsedTypeIds.insert(x: TT);
291 for (auto &TT : FS->type_test_assume_vcalls())
292 UsedTypeIds.insert(x: TT.GUID);
293 for (auto &TT : FS->type_checked_load_vcalls())
294 UsedTypeIds.insert(x: TT.GUID);
295 for (auto &TT : FS->type_test_assume_const_vcalls())
296 UsedTypeIds.insert(x: TT.VFunc.GUID);
297 for (auto &TT : FS->type_checked_load_const_vcalls())
298 UsedTypeIds.insert(x: TT.VFunc.GUID);
299 for (auto &ET : FS->calls()) {
300 AddUnsigned(ET.first.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
301 AddUsedCfiGlobal(ET.first.getGUID());
302 }
303 }
304 };
305
306 // Include the hash for the linkage type to reflect internalization and weak
307 // resolution, and collect any used type identifier resolutions.
308 for (auto &GS : DefinedGlobals) {
309 GlobalValue::LinkageTypes Linkage = GS.second->linkage();
310 Hasher.update(
311 Data: ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
312 AddUsedCfiGlobal(GS.first);
313 AddUsedThings(GS.second);
314 }
315
316 // Imported functions may introduce new uses of type identifier resolutions,
317 // so we need to collect their used resolutions as well.
318 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
319 GlobalValueSummary *S = Index.findSummaryInModule(ValueGUID: GUID, ModuleId: FromModule);
320 AddUsedThings(S);
321 // If this is an alias, we also care about any types/etc. that the aliasee
322 // may reference.
323 if (auto *AS = dyn_cast_or_null<AliasSummary>(Val: S))
324 AddUsedThings(AS->getBaseObject());
325 }
326
327 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
328 AddString(TId);
329
330 AddUnsigned(S.TTRes.TheKind);
331 AddUnsigned(S.TTRes.SizeM1BitWidth);
332
333 AddUint64(S.TTRes.AlignLog2);
334 AddUint64(S.TTRes.SizeM1);
335 AddUint64(S.TTRes.BitMask);
336 AddUint64(S.TTRes.InlineBits);
337
338 AddUint64(S.WPDRes.size());
339 for (auto &WPD : S.WPDRes) {
340 AddUnsigned(WPD.first);
341 AddUnsigned(WPD.second.TheKind);
342 AddString(WPD.second.SingleImplName);
343
344 AddUint64(WPD.second.ResByArg.size());
345 for (auto &ByArg : WPD.second.ResByArg) {
346 AddUint64(ByArg.first.size());
347 for (uint64_t Arg : ByArg.first)
348 AddUint64(Arg);
349 AddUnsigned(ByArg.second.TheKind);
350 AddUint64(ByArg.second.Info);
351 AddUnsigned(ByArg.second.Byte);
352 AddUnsigned(ByArg.second.Bit);
353 }
354 }
355 };
356
357 // Include the hash for all type identifiers used by this module.
358 for (GlobalValue::GUID TId : UsedTypeIds) {
359 auto TidIter = Index.typeIds().equal_range(x: TId);
360 for (const auto &I : make_range(p: TidIter))
361 AddTypeIdSummary(I.second.first, I.second.second);
362 }
363
364 AddUnsigned(UsedCfiDefs.size());
365 for (auto &V : UsedCfiDefs)
366 AddUint64(V);
367
368 AddUnsigned(UsedCfiDecls.size());
369 for (auto &V : UsedCfiDecls)
370 AddUint64(V);
371
372 if (!Conf.SampleProfile.empty()) {
373 auto FileOrErr = MemoryBuffer::getFile(Filename: Conf.SampleProfile);
374 if (FileOrErr) {
375 Hasher.update(Str: FileOrErr.get()->getBuffer());
376
377 if (!Conf.ProfileRemapping.empty()) {
378 FileOrErr = MemoryBuffer::getFile(Filename: Conf.ProfileRemapping);
379 if (FileOrErr)
380 Hasher.update(Str: FileOrErr.get()->getBuffer());
381 }
382 }
383 }
384
385 return toHex(Input: Hasher.result());
386}
387
388std::string llvm::recomputeLTOCacheKey(const std::string &Key,
389 StringRef ExtraID) {
390 SHA1 Hasher;
391
392 auto AddString = [&](StringRef Str) {
393 Hasher.update(Str);
394 Hasher.update(Data: ArrayRef<uint8_t>{0});
395 };
396 AddString(Key);
397 AddString(ExtraID);
398
399 return toHex(Input: Hasher.result());
400}
401
402static void thinLTOResolvePrevailingGUID(
403 const Config &C, ValueInfo VI,
404 DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
405 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
406 isPrevailing,
407 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
408 recordNewLinkage,
409 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
410 GlobalValue::VisibilityTypes Visibility =
411 C.VisibilityScheme == Config::ELF ? VI.getELFVisibility()
412 : GlobalValue::DefaultVisibility;
413 for (auto &S : VI.getSummaryList()) {
414 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
415 // Ignore local and appending linkage values since the linker
416 // doesn't resolve them.
417 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
418 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
419 continue;
420 // We need to emit only one of these. The prevailing module will keep it,
421 // but turned into a weak, while the others will drop it when possible.
422 // This is both a compile-time optimization and a correctness
423 // transformation. This is necessary for correctness when we have exported
424 // a reference - we need to convert the linkonce to weak to
425 // ensure a copy is kept to satisfy the exported reference.
426 // FIXME: We may want to split the compile time and correctness
427 // aspects into separate routines.
428 if (isPrevailing(VI.getGUID(), S.get())) {
429 assert(!S->wasPromoted() &&
430 "promoted symbols used to be internal linkage and shouldn't have "
431 "a prevailing variant");
432 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage)) {
433 S->setLinkage(GlobalValue::getWeakLinkage(
434 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
435 // The kept copy is eligible for auto-hiding (hidden visibility) if all
436 // copies were (i.e. they were all linkonce_odr global unnamed addr).
437 // If any copy is not (e.g. it was originally weak_odr), then the symbol
438 // must remain externally available (e.g. a weak_odr from an explicitly
439 // instantiated template). Additionally, if it is in the
440 // GUIDPreservedSymbols set, that means that it is visibile outside
441 // the summary (e.g. in a native object or a bitcode file without
442 // summary), and in that case we cannot hide it as it isn't possible to
443 // check all copies.
444 S->setCanAutoHide(VI.canAutoHide() &&
445 !GUIDPreservedSymbols.count(V: VI.getGUID()));
446 }
447 if (C.VisibilityScheme == Config::FromPrevailing)
448 Visibility = S->getVisibility();
449 }
450 // Alias and aliasee can't be turned into available_externally.
451 // When force-import-all is used, it indicates that object linking is not
452 // supported by the target. In this case, we can't change the linkage as
453 // well in case the global is converted to declaration.
454 // Also, if the symbol was promoted, it wouldn't have a prevailing variant,
455 // but also its linkage is set correctly (to External) already.
456 else if (!isa<AliasSummary>(Val: S.get()) &&
457 !GlobalInvolvedWithAlias.count(V: S.get()) && !ForceImportAll &&
458 !S->wasPromoted())
459 S->setLinkage(GlobalValue::AvailableExternallyLinkage);
460
461 // For ELF, set visibility to the computed visibility from summaries. We
462 // don't track visibility from declarations so this may be more relaxed than
463 // the most constraining one.
464 if (C.VisibilityScheme == Config::ELF)
465 S->setVisibility(Visibility);
466
467 if (S->linkage() != OriginalLinkage)
468 recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
469 }
470
471 if (C.VisibilityScheme == Config::FromPrevailing) {
472 for (auto &S : VI.getSummaryList()) {
473 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
474 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
475 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
476 continue;
477 S->setVisibility(Visibility);
478 }
479 }
480}
481
482/// Resolve linkage for prevailing symbols in the \p Index.
483//
484// We'd like to drop these functions if they are no longer referenced in the
485// current module. However there is a chance that another module is still
486// referencing them because of the import. We make sure we always emit at least
487// one copy.
488void llvm::thinLTOResolvePrevailingInIndex(
489 const Config &C, ModuleSummaryIndex &Index,
490 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
491 isPrevailing,
492 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
493 recordNewLinkage,
494 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
495 // We won't optimize the globals that are referenced by an alias for now
496 // Ideally we should turn the alias into a global and duplicate the definition
497 // when needed.
498 DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
499 for (auto &I : Index)
500 for (auto &S : I.second.getSummaryList())
501 if (auto AS = dyn_cast<AliasSummary>(Val: S.get()))
502 GlobalInvolvedWithAlias.insert(V: &AS->getAliasee());
503
504 for (auto &I : Index)
505 thinLTOResolvePrevailingGUID(C, VI: Index.getValueInfo(R: I),
506 GlobalInvolvedWithAlias, isPrevailing,
507 recordNewLinkage, GUIDPreservedSymbols);
508}
509
510static void thinLTOInternalizeAndPromoteGUID(
511 ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
512 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
513 isPrevailing,
514 DenseSet<StringRef> *ExternallyVisibleSymbolNamesPtr) {
515 // Before performing index-based internalization and promotion for this GUID,
516 // the local flag should be consistent with the summary list linkage types.
517 VI.verifyLocal();
518
519 const bool SingleExternallyVisibleCopy =
520 VI.getSummaryList().size() == 1 &&
521 !GlobalValue::isLocalLinkage(Linkage: VI.getSummaryList().front()->linkage());
522
523 bool NameRecorded = false;
524 for (auto &S : VI.getSummaryList()) {
525 // First see if we need to promote an internal value because it is not
526 // exported.
527 if (isExported(S->modulePath(), VI)) {
528 if (GlobalValue::isLocalLinkage(Linkage: S->linkage())) {
529 // Only the first local GlobalValue in a list of summaries does not
530 // need renaming. In rare cases if there exist more than one summaries
531 // in the list, the rest of them must have renaming (through promotion)
532 // to avoid conflict.
533 if (ExternallyVisibleSymbolNamesPtr && !NameRecorded) {
534 NameRecorded = true;
535 if (ExternallyVisibleSymbolNamesPtr->insert(V: VI.name()).second)
536 S->setNoRenameOnPromotion(true);
537 }
538
539 S->promote();
540 }
541 continue;
542 }
543
544 // Otherwise, see if we can internalize.
545 if (!EnableLTOInternalization)
546 continue;
547
548 // Non-exported values with external linkage can be internalized.
549 if (GlobalValue::isExternalLinkage(Linkage: S->linkage())) {
550 S->setLinkage(GlobalValue::InternalLinkage);
551 continue;
552 }
553
554 // Non-exported function and variable definitions with a weak-for-linker
555 // linkage can be internalized in certain cases. The minimum legality
556 // requirements would be that they are not address taken to ensure that we
557 // don't break pointer equality checks, and that variables are either read-
558 // or write-only. For functions, this is the case if either all copies are
559 // [local_]unnamed_addr, or we can propagate reference edge attributes
560 // (which is how this is guaranteed for variables, when analyzing whether
561 // they are read or write-only).
562 //
563 // However, we only get to this code for weak-for-linkage values in one of
564 // two cases:
565 // 1) The prevailing copy is not in IR (it is in native code).
566 // 2) The prevailing copy in IR is not exported from its module.
567 // Additionally, at least for the new LTO API, case 2 will only happen if
568 // there is exactly one definition of the value (i.e. in exactly one
569 // module), as duplicate defs are result in the value being marked exported.
570 // Likely, users of the legacy LTO API are similar, however, currently there
571 // are llvm-lto based tests of the legacy LTO API that do not mark
572 // duplicate linkonce_odr copies as exported via the tool, so we need
573 // to handle that case below by checking the number of copies.
574 //
575 // Generally, we only want to internalize a weak-for-linker value in case
576 // 2, because in case 1 we cannot see how the value is used to know if it
577 // is read or write-only. We also don't want to bloat the binary with
578 // multiple internalized copies of non-prevailing linkonce/weak functions.
579 // Note if we don't internalize, we will convert non-prevailing copies to
580 // available_externally anyway, so that we drop them after inlining. The
581 // only reason to internalize such a function is if we indeed have a single
582 // copy, because internalizing it won't increase binary size, and enables
583 // use of inliner heuristics that are more aggressive in the face of a
584 // single call to a static (local). For variables, internalizing a read or
585 // write only variable can enable more aggressive optimization. However, we
586 // already perform this elsewhere in the ThinLTO backend handling for
587 // read or write-only variables (processGlobalForThinLTO).
588 //
589 // Therefore, only internalize linkonce/weak if there is a single copy, that
590 // is prevailing in this IR module. We can do so aggressively, without
591 // requiring the address to be insignificant, or that a variable be read or
592 // write-only.
593 if (!GlobalValue::isWeakForLinker(Linkage: S->linkage()) ||
594 GlobalValue::isExternalWeakLinkage(Linkage: S->linkage()))
595 continue;
596
597 // We may have a single summary copy that is externally visible but not
598 // prevailing if the prevailing copy is in a native object.
599 if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get()))
600 S->setLinkage(GlobalValue::InternalLinkage);
601 }
602}
603
604// Update the linkages in the given \p Index to mark exported values
605// as external and non-exported values as internal.
606void llvm::thinLTOInternalizeAndPromoteInIndex(
607 ModuleSummaryIndex &Index,
608 function_ref<bool(StringRef, ValueInfo)> isExported,
609 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
610 isPrevailing,
611 DenseSet<StringRef> *ExternallyVisibleSymbolNamesPtr) {
612 assert(!Index.withInternalizeAndPromote());
613
614 for (auto &I : Index)
615 thinLTOInternalizeAndPromoteGUID(VI: Index.getValueInfo(R: I), isExported,
616 isPrevailing,
617 ExternallyVisibleSymbolNamesPtr);
618 Index.setWithInternalizeAndPromote();
619}
620
621// Requires a destructor for std::vector<InputModule>.
622InputFile::~InputFile() = default;
623
624Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
625 std::unique_ptr<InputFile> File(new InputFile);
626
627 Expected<IRSymtabFile> FOrErr = readIRSymtab(MBRef: Object);
628 if (!FOrErr)
629 return FOrErr.takeError();
630
631 File->TargetTriple = FOrErr->TheReader.getTargetTriple();
632 File->SourceFileName = FOrErr->TheReader.getSourceFileName();
633 File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
634 File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
635 File->ComdatTable = FOrErr->TheReader.getComdatTable();
636 File->MbRef =
637 Object; // Save a memory buffer reference to an input file object.
638
639 for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
640 size_t Begin = File->Symbols.size();
641 for (const irsymtab::Reader::SymbolRef &Sym :
642 FOrErr->TheReader.module_symbols(I))
643 // Skip symbols that are irrelevant to LTO. Note that this condition needs
644 // to match the one in Skip() in LTO::addRegularLTO().
645 if (Sym.isGlobal() && !Sym.isFormatSpecific())
646 File->Symbols.push_back(x: Sym);
647 File->ModuleSymIndices.push_back(x: {Begin, File->Symbols.size()});
648 }
649
650 File->Mods = FOrErr->Mods;
651 File->Strtab = std::move(FOrErr->Strtab);
652 return std::move(File);
653}
654
655bool InputFile::Symbol::isLibcall(
656 const RTLIB::RuntimeLibcallsInfo &Libcalls) const {
657 return Libcalls.getSupportedLibcallImpl(FuncName: IRName) != RTLIB::Unsupported;
658}
659
660StringRef InputFile::getName() const {
661 return Mods[0].getModuleIdentifier();
662}
663
664BitcodeModule &InputFile::getSingleBitcodeModule() {
665 assert(Mods.size() == 1 && "Expect only one bitcode module");
666 return Mods[0];
667}
668
669BitcodeModule &InputFile::getPrimaryBitcodeModule() { return Mods[0]; }
670
671LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
672 const Config &Conf)
673 : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
674 Ctx(Conf), CombinedModule(std::make_unique<Module>(args: "ld-temp.o", args&: Ctx)),
675 Mover(std::make_unique<IRMover>(args&: *CombinedModule)) {}
676
677LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam)
678 : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) {
679 if (!Backend.isValid())
680 Backend =
681 createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency());
682}
683
684LTO::LTO(Config Conf, ThinBackend Backend,
685 unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
686 : Conf(std::move(Conf)),
687 RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
688 ThinLTO(std::move(Backend)),
689 GlobalResolutions(
690 std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
691 LTOMode(LTOMode) {
692 if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
693 Alloc = std::make_unique<BumpPtrAllocator>();
694 GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(args&: *Alloc);
695 }
696}
697
698// Requires a destructor for MapVector<BitcodeModule>.
699LTO::~LTO() = default;
700
701void LTO::cleanup() {
702 DummyModule.reset();
703 LinkerRemarkFunction = nullptr;
704 consumeError(Err: finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)));
705}
706
707// Add the symbols in the given module to the GlobalResolutions map, and resolve
708// their partitions.
709void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
710 ArrayRef<SymbolResolution> Res,
711 unsigned Partition, bool InSummary,
712 const Triple &TT) {
713 llvm::TimeTraceScope timeScope("LTO add module to global resolution");
714 auto *ResI = Res.begin();
715 auto *ResE = Res.end();
716 (void)ResE;
717 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
718 for (const InputFile::Symbol &Sym : Syms) {
719 assert(ResI != ResE);
720 SymbolResolution Res = *ResI++;
721
722 StringRef SymbolName = Sym.getName();
723 // Keep copies of symbols if the client of LTO says so.
724 if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(Val: SymbolName))
725 SymbolName = GlobalResolutionSymbolSaver->save(S: SymbolName);
726
727 auto &GlobalRes = (*GlobalResolutions)[SymbolName];
728 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
729 if (Res.Prevailing) {
730 assert(!GlobalRes.Prevailing &&
731 "Multiple prevailing defs are not allowed");
732 GlobalRes.Prevailing = true;
733 GlobalRes.IRName = std::string(Sym.getIRName());
734 } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) {
735 // Sometimes it can be two copies of symbol in a module and prevailing
736 // symbol can have no IR name. That might happen if symbol is defined in
737 // module level inline asm block. In case we have multiple modules with
738 // the same symbol we want to use IR name of the prevailing symbol.
739 // Otherwise, if we haven't seen a prevailing symbol, set the name so that
740 // we can later use it to check if there is any prevailing copy in IR.
741 GlobalRes.IRName = std::string(Sym.getIRName());
742 }
743
744 // In rare occasion, the symbol used to initialize GlobalRes has a different
745 // IRName from the inspected Symbol. This can happen on macOS + iOS, when a
746 // symbol is referenced through its mangled name, say @"\01_symbol" while
747 // the IRName is @symbol (the prefix underscore comes from MachO mangling).
748 // In that case, we have the same actual Symbol that can get two different
749 // GUID, leading to some invalid internalization. Workaround this by marking
750 // the GlobalRes external.
751
752 // FIXME: instead of this check, it would be desirable to compute GUIDs
753 // based on mangled name, but this requires an access to the Target Triple
754 // and would be relatively invasive on the codebase.
755 if (GlobalRes.IRName != Sym.getIRName()) {
756 GlobalRes.Partition = GlobalResolution::External;
757 GlobalRes.VisibleOutsideSummary = true;
758 }
759
760 bool IsLibcall = Sym.isLibcall(Libcalls);
761
762 // Set the partition to external if we know it is re-defined by the linker
763 // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
764 // regular object, is referenced from llvm.compiler.used/llvm.used, or was
765 // already recorded as being referenced from a different partition.
766 if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
767 IsLibcall ||
768 (GlobalRes.Partition != GlobalResolution::Unknown &&
769 GlobalRes.Partition != Partition)) {
770 GlobalRes.Partition = GlobalResolution::External;
771 } else
772 // First recorded reference, save the current partition.
773 GlobalRes.Partition = Partition;
774
775 // Flag as visible outside of summary if visible from a regular object or
776 // from a module that does not have a summary.
777 GlobalRes.VisibleOutsideSummary |=
778 (Res.VisibleToRegularObj || Sym.isUsed() || IsLibcall || !InSummary);
779
780 GlobalRes.ExportDynamic |= Res.ExportDynamic;
781 }
782}
783
784void LTO::releaseGlobalResolutionsMemory() {
785 // Release GlobalResolutions dense-map itself.
786 GlobalResolutions.reset();
787 // Release the string saver memory.
788 GlobalResolutionSymbolSaver.reset();
789 Alloc.reset();
790}
791
792static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
793 ArrayRef<SymbolResolution> Res) {
794 StringRef Path = Input->getName();
795 OS << Path << '\n';
796 auto ResI = Res.begin();
797 for (const InputFile::Symbol &Sym : Input->symbols()) {
798 assert(ResI != Res.end());
799 SymbolResolution Res = *ResI++;
800
801 OS << "-r=" << Path << ',' << Sym.getName() << ',';
802 if (Res.Prevailing)
803 OS << 'p';
804 if (Res.FinalDefinitionInLinkageUnit)
805 OS << 'l';
806 if (Res.VisibleToRegularObj)
807 OS << 'x';
808 if (Res.LinkerRedefined)
809 OS << 'r';
810 OS << '\n';
811 }
812 OS.flush();
813 assert(ResI == Res.end());
814}
815
816Error LTO::add(std::unique_ptr<InputFile> InputPtr,
817 ArrayRef<SymbolResolution> Res) {
818 llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
819 assert(!CalledGetMaxTasks);
820
821 Expected<std::shared_ptr<InputFile>> InputOrErr =
822 addInput(InputPtr: std::move(InputPtr));
823 if (!InputOrErr)
824 return InputOrErr.takeError();
825 InputFile *Input = (*InputOrErr).get();
826
827 if (Conf.ResolutionFile)
828 writeToResolutionFile(OS&: *Conf.ResolutionFile, Input, Res);
829
830 if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
831 Triple InputTriple(Input->getTargetTriple());
832 RegularLTO.CombinedModule->setTargetTriple(InputTriple);
833 if (InputTriple.isOSBinFormatELF())
834 Conf.VisibilityScheme = Config::ELF;
835 }
836
837 ArrayRef<SymbolResolution> InputRes = Res;
838 for (unsigned I = 0; I != Input->Mods.size(); ++I) {
839 if (auto Err = addModule(Input&: *Input, InputRes, ModI: I, Res).moveInto(Value&: Res))
840 return Err;
841 }
842
843 assert(Res.empty());
844 return Error::success();
845}
846
847Expected<ArrayRef<SymbolResolution>>
848LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
849 unsigned ModI, ArrayRef<SymbolResolution> Res) {
850 llvm::TimeTraceScope timeScope("LTO add module", Input.getName());
851 Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
852 if (!LTOInfo)
853 return LTOInfo.takeError();
854
855 if (EnableSplitLTOUnit) {
856 // If only some modules were split, flag this in the index so that
857 // we can skip or error on optimizations that need consistently split
858 // modules (whole program devirt and lower type tests).
859 if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
860 ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
861 } else
862 EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
863
864 BitcodeModule BM = Input.Mods[ModI];
865
866 if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
867 !LTOInfo->UnifiedLTO)
868 return make_error<StringError>(
869 Args: "unified LTO compilation must use "
870 "compatible bitcode modules (use -funified-lto)",
871 Args: inconvertibleErrorCode());
872
873 if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
874 LTOMode = LTOK_UnifiedThin;
875
876 bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
877 // If any of the modules inside of a input bitcode file was compiled with
878 // ThinLTO, we assume that the whole input file also was compiled with
879 // ThinLTO.
880 Input.IsThinLTO |= IsThinLTO;
881
882 auto ModSyms = Input.module_symbols(I: ModI);
883 addModuleToGlobalRes(Syms: ModSyms, Res,
884 Partition: IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
885 InSummary: LTOInfo->HasSummary, TT: Triple(Input.getTargetTriple()));
886
887 if (IsThinLTO)
888 return addThinLTO(BM, Syms: ModSyms, Res);
889
890 RegularLTO.EmptyCombinedModule = false;
891 auto ModOrErr = addRegularLTO(Input, InputRes, BM, Syms: ModSyms, Res);
892 if (!ModOrErr)
893 return ModOrErr.takeError();
894 Res = ModOrErr->second;
895
896 if (!LTOInfo->HasSummary) {
897 if (Error Err = linkRegularLTO(Mod: std::move(ModOrErr->first),
898 /*LivenessFromIndex=*/false))
899 return Err;
900 return Res;
901 }
902
903 // Regular LTO module summaries are added to a dummy module that represents
904 // the combined regular LTO module.
905 if (Error Err = BM.readSummary(CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: ""))
906 return Err;
907 RegularLTO.ModsWithSummaries.push_back(x: std::move(ModOrErr->first));
908 return Res;
909}
910
911// Checks whether the given global value is in a non-prevailing comdat
912// (comdat containing values the linker indicated were not prevailing,
913// which we then dropped to available_externally), and if so, removes
914// it from the comdat. This is called for all global values to ensure the
915// comdat is empty rather than leaving an incomplete comdat. It is needed for
916// regular LTO modules, in case we are in a mixed-LTO mode (both regular
917// and thin LTO modules) compilation. Since the regular LTO module will be
918// linked first in the final native link, we want to make sure the linker
919// doesn't select any of these incomplete comdats that would be left
920// in the regular LTO module without this cleanup.
921static void
922handleNonPrevailingComdat(GlobalValue &GV,
923 std::set<const Comdat *> &NonPrevailingComdats) {
924 Comdat *C = GV.getComdat();
925 if (!C)
926 return;
927
928 if (!NonPrevailingComdats.count(x: C))
929 return;
930
931 // Additionally need to drop all global values from the comdat to
932 // available_externally, to satisfy the COMDAT requirement that all members
933 // are discarded as a unit. The non-local linkage global values avoid
934 // duplicate definition linker errors.
935 GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
936
937 if (auto GO = dyn_cast<GlobalObject>(Val: &GV))
938 GO->setComdat(nullptr);
939}
940
941// Add a regular LTO object to the link.
942// The resulting module needs to be linked into the combined LTO module with
943// linkRegularLTO.
944Expected<
945 std::pair<LTO::RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>>
946LTO::addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
947 BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
948 ArrayRef<SymbolResolution> Res) {
949 llvm::TimeTraceScope timeScope("LTO add regular LTO");
950 RegularLTOState::AddedModule Mod;
951 Expected<std::unique_ptr<Module>> MOrErr =
952 BM.getLazyModule(Context&: RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
953 /*IsImporting*/ false);
954 if (!MOrErr)
955 return MOrErr.takeError();
956 Module &M = **MOrErr;
957 Mod.M = std::move(*MOrErr);
958
959 if (Error Err = M.materializeMetadata())
960 return std::move(Err);
961
962 if (LTOMode == LTOK_UnifiedRegular) {
963 // cfi.functions metadata is intended to be used with ThinLTO and may
964 // trigger invalid IR transformations if they are present when doing regular
965 // LTO, so delete it.
966 if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions"))
967 M.eraseNamedMetadata(NMD: CfiFunctionsMD);
968 } else if (NamedMDNode *AliasesMD = M.getNamedMetadata(Name: "aliases")) {
969 // Delete aliases entries for non-prevailing symbols on the ThinLTO side of
970 // this input file.
971 DenseSet<StringRef> Prevailing;
972 for (auto [I, R] : zip(t: Input.symbols(), u&: InputRes))
973 if (R.Prevailing && !I.getIRName().empty())
974 Prevailing.insert(V: I.getIRName());
975 std::vector<MDNode *> AliasGroups;
976 for (MDNode *AliasGroup : AliasesMD->operands()) {
977 std::vector<Metadata *> Aliases;
978 for (Metadata *Alias : AliasGroup->operands()) {
979 if (isa<MDString>(Val: Alias) &&
980 Prevailing.count(V: cast<MDString>(Val: Alias)->getString()))
981 Aliases.push_back(x: Alias);
982 }
983 if (Aliases.size() > 1)
984 AliasGroups.push_back(x: MDTuple::get(Context&: RegularLTO.Ctx, MDs: Aliases));
985 }
986 AliasesMD->clearOperands();
987 for (MDNode *G : AliasGroups)
988 AliasesMD->addOperand(M: G);
989 }
990
991 UpgradeDebugInfo(M);
992
993 ModuleSymbolTable SymTab;
994 SymTab.addModule(M: &M);
995
996 for (GlobalVariable &GV : M.globals())
997 if (GV.hasAppendingLinkage())
998 Mod.Keep.push_back(x: &GV);
999
1000 DenseSet<GlobalObject *> AliasedGlobals;
1001 for (auto &GA : M.aliases())
1002 if (GlobalObject *GO = GA.getAliaseeObject())
1003 AliasedGlobals.insert(V: GO);
1004
1005 // In this function we need IR GlobalValues matching the symbols in Syms
1006 // (which is not backed by a module), so we need to enumerate them in the same
1007 // order. The symbol enumeration order of a ModuleSymbolTable intentionally
1008 // matches the order of an irsymtab, but when we read the irsymtab in
1009 // InputFile::create we omit some symbols that are irrelevant to LTO. The
1010 // Skip() function skips the same symbols from the module as InputFile does
1011 // from the symbol table.
1012 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
1013 auto Skip = [&]() {
1014 while (MsymI != MsymE) {
1015 auto Flags = SymTab.getSymbolFlags(S: *MsymI);
1016 if ((Flags & object::BasicSymbolRef::SF_Global) &&
1017 !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
1018 return;
1019 ++MsymI;
1020 }
1021 };
1022 Skip();
1023
1024 std::set<const Comdat *> NonPrevailingComdats;
1025 SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
1026 for (const InputFile::Symbol &Sym : Syms) {
1027 assert(!Res.empty());
1028 const SymbolResolution &R = Res.consume_front();
1029
1030 assert(MsymI != MsymE);
1031 ModuleSymbolTable::Symbol Msym = *MsymI++;
1032 Skip();
1033
1034 if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym)) {
1035 if (R.Prevailing) {
1036 if (Sym.isUndefined())
1037 continue;
1038 Mod.Keep.push_back(x: GV);
1039 // For symbols re-defined with linker -wrap and -defsym options,
1040 // set the linkage to weak to inhibit IPO. The linkage will be
1041 // restored by the linker.
1042 if (R.LinkerRedefined)
1043 GV->setLinkage(GlobalValue::WeakAnyLinkage);
1044
1045 GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
1046 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage))
1047 GV->setLinkage(GlobalValue::getWeakLinkage(
1048 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
1049 } else if (isa<GlobalObject>(Val: GV) &&
1050 (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
1051 GV->hasAvailableExternallyLinkage()) &&
1052 !AliasedGlobals.count(V: cast<GlobalObject>(Val: GV))) {
1053 // Any of the above three types of linkage indicates that the
1054 // chosen prevailing symbol will have the same semantics as this copy of
1055 // the symbol, so we may be able to link it with available_externally
1056 // linkage. We will decide later whether to do that when we link this
1057 // module (in linkRegularLTO), based on whether it is undefined.
1058 Mod.Keep.push_back(x: GV);
1059 GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
1060 if (GV->hasComdat())
1061 NonPrevailingComdats.insert(x: GV->getComdat());
1062 cast<GlobalObject>(Val: GV)->setComdat(nullptr);
1063 }
1064
1065 // Set the 'local' flag based on the linker resolution for this symbol.
1066 if (R.FinalDefinitionInLinkageUnit) {
1067 GV->setDSOLocal(true);
1068 if (GV->hasDLLImportStorageClass())
1069 GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
1070 DefaultStorageClass);
1071 }
1072 } else if (auto *AS =
1073 dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Val&: Msym)) {
1074 // Collect non-prevailing symbols.
1075 if (!R.Prevailing)
1076 NonPrevailingAsmSymbols.insert(V: AS->first);
1077 } else {
1078 llvm_unreachable("unknown symbol type");
1079 }
1080
1081 // Common resolution: collect the maximum size/alignment over all commons.
1082 // We also record if we see an instance of a common as prevailing, so that
1083 // if none is prevailing we can ignore it later.
1084 if (Sym.isCommon()) {
1085 // FIXME: We should figure out what to do about commons defined by asm.
1086 // For now they aren't reported correctly by ModuleSymbolTable.
1087 auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
1088 CommonRes.Size = std::max(a: CommonRes.Size, b: Sym.getCommonSize());
1089 if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
1090 CommonRes.Alignment =
1091 std::max(a: Align(SymAlignValue), b: CommonRes.Alignment);
1092 }
1093 CommonRes.Prevailing |= R.Prevailing;
1094 }
1095 }
1096
1097 if (!M.getComdatSymbolTable().empty())
1098 for (GlobalValue &GV : M.global_values())
1099 handleNonPrevailingComdat(GV, NonPrevailingComdats);
1100
1101 // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
1102 // block.
1103 if (!M.getModuleInlineAsm().empty()) {
1104 std::string NewIA = ".lto_discard";
1105 if (!NonPrevailingAsmSymbols.empty()) {
1106 // Don't dicard a symbol if there is a live .symver for it.
1107 ModuleSymbolTable::CollectAsmSymvers(
1108 M, AsmSymver: [&](StringRef Name, StringRef Alias) {
1109 if (!NonPrevailingAsmSymbols.count(V: Alias))
1110 NonPrevailingAsmSymbols.erase(V: Name);
1111 });
1112 NewIA += " " + llvm::join(R&: NonPrevailingAsmSymbols, Separator: ", ");
1113 }
1114 NewIA += "\n";
1115 M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
1116 }
1117
1118 assert(MsymI == MsymE);
1119 return std::make_pair(x: std::move(Mod), y&: Res);
1120}
1121
1122Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
1123 bool LivenessFromIndex) {
1124 llvm::TimeTraceScope timeScope("LTO link regular LTO");
1125 std::vector<GlobalValue *> Keep;
1126 for (GlobalValue *GV : Mod.Keep) {
1127 if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GUID: GV->getGUID())) {
1128 if (Function *F = dyn_cast<Function>(Val: GV)) {
1129 if (DiagnosticOutputFile) {
1130 if (Error Err = F->materialize())
1131 return Err;
1132 auto R = OptimizationRemark(DEBUG_TYPE, "deadfunction", F);
1133 R << ore::NV("Function", F) << " not added to the combined module ";
1134 emitRemark(Remark&: R);
1135 }
1136 }
1137 continue;
1138 }
1139
1140 if (!GV->hasAvailableExternallyLinkage()) {
1141 Keep.push_back(x: GV);
1142 continue;
1143 }
1144
1145 // Only link available_externally definitions if we don't already have a
1146 // definition.
1147 GlobalValue *CombinedGV =
1148 RegularLTO.CombinedModule->getNamedValue(Name: GV->getName());
1149 if (CombinedGV && !CombinedGV->isDeclaration())
1150 continue;
1151
1152 Keep.push_back(x: GV);
1153 }
1154
1155 return RegularLTO.Mover->move(Src: std::move(Mod.M), ValuesToLink: Keep, AddLazyFor: nullptr,
1156 /* IsPerformingImport */ false);
1157}
1158
1159// Add a ThinLTO module to the link.
1160Expected<ArrayRef<SymbolResolution>>
1161LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
1162 ArrayRef<SymbolResolution> Res) {
1163 llvm::TimeTraceScope timeScope("LTO add thin LTO");
1164 const auto BMID = BM.getModuleIdentifier();
1165 ArrayRef<SymbolResolution> ResTmp = Res;
1166 for (const InputFile::Symbol &Sym : Syms) {
1167 assert(!ResTmp.empty());
1168 const SymbolResolution &R = ResTmp.consume_front();
1169
1170 if (!Sym.getIRName().empty() && R.Prevailing) {
1171 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1172 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1173 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1174 ThinLTO.setPrevailingModuleForGUID(GUID, Module: BMID);
1175 }
1176 }
1177
1178 if (Error Err = BM.readSummary(
1179 CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: BMID, IsPrevailing: [&](GlobalValue::GUID GUID) {
1180 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: BMID);
1181 }))
1182 return Err;
1183 LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");
1184
1185 for (const InputFile::Symbol &Sym : Syms) {
1186 assert(!Res.empty());
1187 const SymbolResolution &R = Res.consume_front();
1188
1189 if (!Sym.getIRName().empty() &&
1190 (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {
1191 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1192 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1193 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1194 if (R.Prevailing) {
1195 assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));
1196
1197 // For linker redefined symbols (via --wrap or --defsym) we want to
1198 // switch the linkage to `weak` to prevent IPOs from happening.
1199 // Find the summary in the module for this very GV and record the new
1200 // linkage so that we can switch it when we import the GV.
1201 if (R.LinkerRedefined)
1202 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID))
1203 S->setLinkage(GlobalValue::WeakAnyLinkage);
1204 }
1205
1206 // If the linker resolved the symbol to a local definition then mark it
1207 // as local in the summary for the module we are adding.
1208 if (R.FinalDefinitionInLinkageUnit) {
1209 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID)) {
1210 S->setDSOLocal(true);
1211 }
1212 }
1213 }
1214 }
1215
1216 if (!ThinLTO.ModuleMap.insert(KV: {BMID, BM}).second)
1217 return make_error<StringError>(
1218 Args: "Expected at most one ThinLTO module per bitcode file",
1219 Args: inconvertibleErrorCode());
1220
1221 if (!Conf.ThinLTOModulesToCompile.empty()) {
1222 if (!ThinLTO.ModulesToCompile)
1223 ThinLTO.ModulesToCompile = ModuleMapType();
1224 // This is a fuzzy name matching where only modules with name containing the
1225 // specified switch values are going to be compiled.
1226 for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
1227 if (BMID.contains(Other: Name)) {
1228 ThinLTO.ModulesToCompile->insert(KV: {BMID, BM});
1229 LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n");
1230 break;
1231 }
1232 }
1233 }
1234
1235 return Res;
1236}
1237
1238unsigned LTO::getMaxTasks() const {
1239 CalledGetMaxTasks = true;
1240 auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size()
1241 : ThinLTO.ModuleMap.size();
1242 return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount;
1243}
1244
1245// If only some of the modules were split, we cannot correctly handle
1246// code that contains type tests or type checked loads.
1247Error LTO::checkPartiallySplit() {
1248 if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
1249 return Error::success();
1250
1251 const Module *Combined = RegularLTO.CombinedModule.get();
1252 Function *TypeTestFunc =
1253 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_test);
1254 Function *TypeCheckedLoadFunc =
1255 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_checked_load);
1256 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
1257 M: Combined, id: Intrinsic::type_checked_load_relative);
1258
1259 // First check if there are type tests / type checked loads in the
1260 // merged regular LTO module IR.
1261 if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
1262 (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
1263 (TypeCheckedLoadRelativeFunc &&
1264 !TypeCheckedLoadRelativeFunc->use_empty()))
1265 return make_error<StringError>(
1266 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1267 Args: inconvertibleErrorCode());
1268
1269 // Otherwise check if there are any recorded in the combined summary from the
1270 // ThinLTO modules.
1271 for (auto &P : ThinLTO.CombinedIndex) {
1272 for (auto &S : P.second.getSummaryList()) {
1273 auto *FS = dyn_cast<FunctionSummary>(Val: S.get());
1274 if (!FS)
1275 continue;
1276 if (!FS->type_test_assume_vcalls().empty() ||
1277 !FS->type_checked_load_vcalls().empty() ||
1278 !FS->type_test_assume_const_vcalls().empty() ||
1279 !FS->type_checked_load_const_vcalls().empty() ||
1280 !FS->type_tests().empty())
1281 return make_error<StringError>(
1282 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1283 Args: inconvertibleErrorCode());
1284 }
1285 }
1286 return Error::success();
1287}
1288
1289Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
1290 llvm::scope_exit CleanUp([this]() { cleanup(); });
1291
1292 if (Error EC = serializeInputsForDistribution())
1293 return EC;
1294
1295 // Compute "dead" symbols, we don't want to import/export these!
1296 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1297 DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
1298 for (auto &Res : *GlobalResolutions) {
1299 // Normally resolution have IR name of symbol. We can do nothing here
1300 // otherwise. See comments in GlobalResolution struct for more details.
1301 if (Res.second.IRName.empty())
1302 continue;
1303
1304 GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1305 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
1306
1307 if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
1308 GUIDPreservedSymbols.insert(V: GUID);
1309
1310 if (Res.second.ExportDynamic)
1311 DynamicExportSymbols.insert(V: GUID);
1312
1313 GUIDPrevailingResolutions[GUID] =
1314 Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
1315 }
1316
1317 auto isPrevailing = [&](GlobalValue::GUID G) {
1318 auto It = GUIDPrevailingResolutions.find(Val: G);
1319 if (It == GUIDPrevailingResolutions.end())
1320 return PrevailingType::Unknown;
1321 return It->second;
1322 };
1323 computeDeadSymbolsWithConstProp(Index&: ThinLTO.CombinedIndex, GUIDPreservedSymbols,
1324 isPrevailing, ImportEnabled: Conf.OptLevel > 0);
1325
1326 // Setup output file to emit statistics.
1327 auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
1328 if (!StatsFileOrErr)
1329 return StatsFileOrErr.takeError();
1330 std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
1331
1332 if (Error Err = setupOptimizationRemarks())
1333 return Err;
1334
1335 // TODO: Ideally this would be controlled automatically by detecting that we
1336 // are linking with an allocator that supports these interfaces, rather than
1337 // an internal option (which would still be needed for tests, however). For
1338 // example, if the library exported a symbol like __malloc_hot_cold the linker
1339 // could recognize that and set a flag in the lto::Config.
1340 if (SupportsHotColdNew)
1341 ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
1342
1343 Error Result = runRegularLTO(AddStream);
1344 if (!Result)
1345 // This will reset the GlobalResolutions optional once done with it to
1346 // reduce peak memory before importing.
1347 Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
1348
1349 if (StatsFile)
1350 PrintStatisticsJSON(OS&: StatsFile->os());
1351
1352 return Result;
1353}
1354
1355Error LTO::runRegularLTO(AddStreamFn AddStream) {
1356 llvm::TimeTraceScope timeScope("Run regular LTO");
1357 LLVM_DEBUG(dbgs() << "Running regular LTO\n");
1358
1359 // Finalize linking of regular LTO modules containing summaries now that
1360 // we have computed liveness information.
1361 {
1362 llvm::TimeTraceScope timeScope("Link regular LTO");
1363 for (auto &M : RegularLTO.ModsWithSummaries)
1364 if (Error Err = linkRegularLTO(Mod: std::move(M), /*LivenessFromIndex=*/true))
1365 return Err;
1366 }
1367
1368 // Ensure we don't have inconsistently split LTO units with type tests.
1369 // FIXME: this checks both LTO and ThinLTO. It happens to work as we take
1370 // this path both cases but eventually this should be split into two and
1371 // do the ThinLTO checks in `runThinLTO`.
1372 if (Error Err = checkPartiallySplit())
1373 return Err;
1374
1375 // Make sure commons have the right size/alignment: we kept the largest from
1376 // all the prevailing when adding the inputs, and we apply it here.
1377 const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
1378 for (auto &I : RegularLTO.Commons) {
1379 if (!I.second.Prevailing)
1380 // Don't do anything if no instance of this common was prevailing.
1381 continue;
1382 GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(Name: I.first);
1383 if (OldGV && OldGV->getGlobalSize(DL) == I.second.Size) {
1384 // Don't create a new global if the type is already correct, just make
1385 // sure the alignment is correct.
1386 OldGV->setAlignment(I.second.Alignment);
1387 continue;
1388 }
1389 ArrayType *Ty =
1390 ArrayType::get(ElementType: Type::getInt8Ty(C&: RegularLTO.Ctx), NumElements: I.second.Size);
1391 auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
1392 GlobalValue::CommonLinkage,
1393 ConstantAggregateZero::get(Ty), "");
1394 GV->setAlignment(I.second.Alignment);
1395 if (OldGV) {
1396 OldGV->replaceAllUsesWith(V: GV);
1397 GV->takeName(V: OldGV);
1398 OldGV->eraseFromParent();
1399 } else {
1400 GV->setName(I.first);
1401 }
1402 }
1403
1404 bool WholeProgramVisibilityEnabledInLTO =
1405 Conf.HasWholeProgramVisibility &&
1406 // If validation is enabled, upgrade visibility only when all vtables
1407 // have typeinfos.
1408 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1409
1410 // This returns true when the name is local or not defined. Locals are
1411 // expected to be handled separately.
1412 auto IsVisibleToRegularObj = [&](StringRef name) {
1413 auto It = GlobalResolutions->find(Val: name);
1414 return (It == GlobalResolutions->end() ||
1415 It->second.VisibleOutsideSummary || !It->second.Prevailing);
1416 };
1417
1418 // If allowed, upgrade public vcall visibility metadata to linkage unit
1419 // visibility before whole program devirtualization in the optimizer.
1420 updateVCallVisibilityInModule(
1421 M&: *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
1422 DynamicExportSymbols, ValidateAllVtablesHaveTypeInfos: Conf.ValidateAllVtablesHaveTypeInfos,
1423 IsVisibleToRegularObj);
1424 updatePublicTypeTestCalls(M&: *RegularLTO.CombinedModule,
1425 WholeProgramVisibilityEnabledInLTO);
1426
1427 if (Conf.PreOptModuleHook &&
1428 !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
1429 return Error::success();
1430
1431 if (!Conf.CodeGenOnly) {
1432 for (const auto &R : *GlobalResolutions) {
1433 GlobalValue *GV =
1434 RegularLTO.CombinedModule->getNamedValue(Name: R.second.IRName);
1435 if (!R.second.isPrevailingIRSymbol())
1436 continue;
1437 if (R.second.Partition != 0 &&
1438 R.second.Partition != GlobalResolution::External)
1439 continue;
1440
1441 // Ignore symbols defined in other partitions.
1442 // Also skip declarations, which are not allowed to have internal linkage.
1443 if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
1444 continue;
1445
1446 // Symbols that are marked DLLImport or DLLExport should not be
1447 // internalized, as they are either externally visible or referencing
1448 // external symbols. Symbols that have AvailableExternally or Appending
1449 // linkage might be used by future passes and should be kept as is.
1450 // These linkages are seen in Unified regular LTO, because the process
1451 // of creating split LTO units introduces symbols with that linkage into
1452 // one of the created modules. Normally, only the ThinLTO backend would
1453 // compile this module, but Unified Regular LTO processes both
1454 // modules created by the splitting process as regular LTO modules.
1455 if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
1456 ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
1457 GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
1458 continue;
1459
1460 GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
1461 : GlobalValue::UnnamedAddr::None);
1462 if (EnableLTOInternalization && R.second.Partition == 0)
1463 GV->setLinkage(GlobalValue::InternalLinkage);
1464 }
1465
1466 if (Conf.PostInternalizeModuleHook &&
1467 !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
1468 return Error::success();
1469 }
1470
1471 if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
1472 if (Error Err =
1473 backend(C: Conf, AddStream, ParallelCodeGenParallelismLevel: RegularLTO.ParallelCodeGenParallelismLevel,
1474 M&: *RegularLTO.CombinedModule, CombinedIndex&: ThinLTO.CombinedIndex))
1475 return Err;
1476 }
1477
1478 return Error::success();
1479}
1480
1481SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
1482 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
1483 SmallVector<const char *> LibcallSymbols;
1484 LibcallSymbols.reserve(N: Libcalls.getNumAvailableLibcallImpls());
1485
1486 for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) {
1487 if (Libcalls.isAvailable(Impl))
1488 LibcallSymbols.push_back(Elt: Libcalls.getLibcallImplName(CallImpl: Impl).data());
1489 }
1490
1491 return LibcallSymbols;
1492}
1493
1494Error ThinBackendProc::emitFiles(
1495 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1496 const std::string &NewModulePath) const {
1497 return emitFiles(ImportList, ModulePath, NewModulePath,
1498 SummaryPath: NewModulePath + ".thinlto.bc",
1499 /*ImportsFiles=*/std::nullopt);
1500}
1501
1502Error ThinBackendProc::emitFiles(
1503 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1504 const std::string &NewModulePath, StringRef SummaryPath,
1505 std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
1506 const {
1507 ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
1508 GVSummaryPtrSet DeclarationSummaries;
1509
1510 std::error_code EC;
1511 gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
1512 ImportList, ModuleToSummariesForIndex,
1513 DecSummaries&: DeclarationSummaries);
1514
1515 raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
1516 if (EC)
1517 return createFileError(F: "cannot open " + Twine(SummaryPath), EC);
1518
1519 writeIndexToFile(Index: CombinedIndex, Out&: OS, ModuleToSummariesForIndex: &ModuleToSummariesForIndex,
1520 DecSummaries: &DeclarationSummaries);
1521
1522 if (ShouldEmitImportsFiles) {
1523 Error ImportsFilesError = EmitImportsFiles(
1524 ModulePath, OutputFilename: NewModulePath + ".imports", ModuleToSummariesForIndex);
1525 if (ImportsFilesError)
1526 return ImportsFilesError;
1527 }
1528
1529 // Optionally, store the imports files.
1530 if (ImportsFiles)
1531 processImportsFiles(
1532 ModulePath, ModuleToSummariesForIndex,
1533 F: [&](StringRef M) { ImportsFiles->get().push_back(Elt: M.str()); });
1534
1535 return Error::success();
1536}
1537
1538namespace {
1539/// Base class for ThinLTO backends that perform code generation and insert the
1540/// generated files back into the link.
1541class CGThinBackend : public ThinBackendProc {
1542protected:
1543 AddStreamFn AddStream;
1544 DenseSet<GlobalValue::GUID> CfiFunctionDefs;
1545 DenseSet<GlobalValue::GUID> CfiFunctionDecls;
1546 bool ShouldEmitIndexFiles;
1547
1548public:
1549 CGThinBackend(
1550 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1551 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1552 AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
1553 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
1554 ThreadPoolStrategy ThinLTOParallelism)
1555 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1556 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1557 AddStream(std::move(AddStream)),
1558 ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
1559 auto &Defs = CombinedIndex.cfiFunctionDefs();
1560 CfiFunctionDefs.insert_range(R: Defs.guids());
1561 auto &Decls = CombinedIndex.cfiFunctionDecls();
1562 CfiFunctionDecls.insert_range(R: Decls.guids());
1563 }
1564};
1565
1566/// This backend performs code generation by scheduling a job to run on
1567/// an in-process thread when invoked for each task.
1568class InProcessThinBackend : public CGThinBackend {
1569protected:
1570 FileCache Cache;
1571
1572public:
1573 InProcessThinBackend(
1574 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1575 ThreadPoolStrategy ThinLTOParallelism,
1576 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1577 AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
1578 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
1579 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1580 AddStream, OnWrite, ShouldEmitIndexFiles,
1581 ShouldEmitImportsFiles, ThinLTOParallelism),
1582 Cache(std::move(Cache)) {}
1583
1584 virtual Error runThinLTOBackendThread(
1585 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1586 ModuleSummaryIndex &CombinedIndex,
1587 const FunctionImporter::ImportMapTy &ImportList,
1588 const FunctionImporter::ExportSetTy &ExportList,
1589 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1590 const GVSummaryMapTy &DefinedGlobals,
1591 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1592 auto ModuleID = BM.getModuleIdentifier();
1593 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (in-process)",
1594 ModuleID);
1595 auto RunThinBackend = [&](AddStreamFn AddStream) {
1596 LTOLLVMContext BackendContext(Conf);
1597 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1598 if (!MOrErr)
1599 return MOrErr.takeError();
1600
1601 return thinBackend(C: Conf, Task, AddStream, M&: **MOrErr, CombinedIndex,
1602 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1603 CodeGenOnly: Conf.CodeGenOnly);
1604 };
1605 if (ShouldEmitIndexFiles) {
1606 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1607 return E;
1608 }
1609
1610 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1611 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1612 P: [](uint32_t V) { return V == 0; }))
1613 // Cache disabled or no entry for this module in the combined index or
1614 // no module hash.
1615 return RunThinBackend(AddStream);
1616
1617 // The module may be cached, this helps handling it.
1618 std::string Key = computeLTOCacheKey(
1619 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1620 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1621 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1622 if (Error Err = CacheAddStreamOrErr.takeError())
1623 return Err;
1624 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1625 if (CacheAddStream)
1626 return RunThinBackend(CacheAddStream);
1627
1628 return Error::success();
1629 }
1630
1631 Error start(
1632 unsigned Task, BitcodeModule BM,
1633 const FunctionImporter::ImportMapTy &ImportList,
1634 const FunctionImporter::ExportSetTy &ExportList,
1635 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1636 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1637 StringRef ModulePath = BM.getModuleIdentifier();
1638 assert(ModuleToDefinedGVSummaries.count(ModulePath));
1639 const GVSummaryMapTy &DefinedGlobals =
1640 ModuleToDefinedGVSummaries.find(Val: ModulePath)->second;
1641 BackendThreadPool.async(
1642 F: [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1643 const FunctionImporter::ImportMapTy &ImportList,
1644 const FunctionImporter::ExportSetTy &ExportList,
1645 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
1646 &ResolvedODR,
1647 const GVSummaryMapTy &DefinedGlobals,
1648 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1649 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1650 timeTraceProfilerInitialize(TimeTraceGranularity: Conf.TimeTraceGranularity,
1651 ProcName: "thin backend");
1652 Error E = runThinLTOBackendThread(
1653 AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
1654 ResolvedODR, DefinedGlobals, ModuleMap);
1655 if (E) {
1656 std::unique_lock<std::mutex> L(ErrMu);
1657 if (Err)
1658 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1659 else
1660 Err = std::move(E);
1661 }
1662 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1663 timeTraceProfilerFinishThread();
1664 },
1665 ArgList&: BM, ArgList: std::ref(t&: CombinedIndex), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
1666 ArgList: std::ref(t: ResolvedODR), ArgList: std::ref(t: DefinedGlobals), ArgList: std::ref(t&: ModuleMap));
1667
1668 if (OnWrite)
1669 OnWrite(std::string(ModulePath));
1670 return Error::success();
1671 }
1672};
1673
1674/// This backend is utilized in the first round of a two-codegen round process.
1675/// It first saves optimized bitcode files to disk before the codegen process
1676/// begins. After codegen, it stores the resulting object files in a scratch
1677/// buffer. Note the codegen data stored in the scratch buffer will be extracted
1678/// and merged in the subsequent step.
1679class FirstRoundThinBackend : public InProcessThinBackend {
1680 AddStreamFn IRAddStream;
1681 FileCache IRCache;
1682
1683public:
1684 FirstRoundThinBackend(
1685 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1686 ThreadPoolStrategy ThinLTOParallelism,
1687 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1688 AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream,
1689 FileCache IRCache)
1690 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1691 ModuleToDefinedGVSummaries, std::move(CGAddStream),
1692 std::move(CGCache), /*OnWrite=*/nullptr,
1693 /*ShouldEmitIndexFiles=*/false,
1694 /*ShouldEmitImportsFiles=*/false),
1695 IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {}
1696
1697 Error runThinLTOBackendThread(
1698 AddStreamFn CGAddStream, FileCache CGCache, unsigned Task,
1699 BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1700 const FunctionImporter::ImportMapTy &ImportList,
1701 const FunctionImporter::ExportSetTy &ExportList,
1702 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1703 const GVSummaryMapTy &DefinedGlobals,
1704 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1705 auto ModuleID = BM.getModuleIdentifier();
1706 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (first round)",
1707 ModuleID);
1708 auto RunThinBackend = [&](AddStreamFn CGAddStream,
1709 AddStreamFn IRAddStream) {
1710 LTOLLVMContext BackendContext(Conf);
1711 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1712 if (!MOrErr)
1713 return MOrErr.takeError();
1714
1715 return thinBackend(C: Conf, Task, AddStream: CGAddStream, M&: **MOrErr, CombinedIndex,
1716 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1717 CodeGenOnly: Conf.CodeGenOnly, IRAddStream);
1718 };
1719 // Like InProcessThinBackend, we produce index files as needed for
1720 // FirstRoundThinBackend. However, these files are not generated for
1721 // SecondRoundThinBackend.
1722 if (ShouldEmitIndexFiles) {
1723 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1724 return E;
1725 }
1726
1727 assert((CGCache.isValid() == IRCache.isValid()) &&
1728 "Both caches for CG and IR should have matching availability");
1729 if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1730 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1731 P: [](uint32_t V) { return V == 0; }))
1732 // Cache disabled or no entry for this module in the combined index or
1733 // no module hash.
1734 return RunThinBackend(CGAddStream, IRAddStream);
1735
1736 // Get CGKey for caching object in CGCache.
1737 std::string CGKey = computeLTOCacheKey(
1738 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1739 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1740 Expected<AddStreamFn> CacheCGAddStreamOrErr =
1741 CGCache(Task, CGKey, ModuleID);
1742 if (Error Err = CacheCGAddStreamOrErr.takeError())
1743 return Err;
1744 AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr;
1745
1746 // Get IRKey for caching (optimized) IR in IRCache with an extra ID.
1747 std::string IRKey = recomputeLTOCacheKey(Key: CGKey, /*ExtraID=*/"IR");
1748 Expected<AddStreamFn> CacheIRAddStreamOrErr =
1749 IRCache(Task, IRKey, ModuleID);
1750 if (Error Err = CacheIRAddStreamOrErr.takeError())
1751 return Err;
1752 AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr;
1753
1754 // Ideally, both CG and IR caching should be synchronized. However, in
1755 // practice, their availability may differ due to different expiration
1756 // times. Therefore, if either cache is missing, the backend process is
1757 // triggered.
1758 if (CacheCGAddStream || CacheIRAddStream) {
1759 LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for "
1760 << BM.getModuleIdentifier() << "\n");
1761 return RunThinBackend(CacheCGAddStream ? CacheCGAddStream : CGAddStream,
1762 CacheIRAddStream ? CacheIRAddStream : IRAddStream);
1763 }
1764
1765 return Error::success();
1766 }
1767};
1768
1769/// This backend operates in the second round of a two-codegen round process.
1770/// It starts by reading the optimized bitcode files that were saved during the
1771/// first round. The backend then executes the codegen only to further optimize
1772/// the code, utilizing the codegen data merged from the first round. Finally,
1773/// it writes the resulting object files as usual.
1774class SecondRoundThinBackend : public InProcessThinBackend {
1775 std::unique_ptr<SmallVector<StringRef>> IRFiles;
1776 stable_hash CombinedCGDataHash;
1777
1778public:
1779 SecondRoundThinBackend(
1780 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1781 ThreadPoolStrategy ThinLTOParallelism,
1782 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1783 AddStreamFn AddStream, FileCache Cache,
1784 std::unique_ptr<SmallVector<StringRef>> IRFiles,
1785 stable_hash CombinedCGDataHash)
1786 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1787 ModuleToDefinedGVSummaries, std::move(AddStream),
1788 std::move(Cache),
1789 /*OnWrite=*/nullptr,
1790 /*ShouldEmitIndexFiles=*/false,
1791 /*ShouldEmitImportsFiles=*/false),
1792 IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {}
1793
1794 Error runThinLTOBackendThread(
1795 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1796 ModuleSummaryIndex &CombinedIndex,
1797 const FunctionImporter::ImportMapTy &ImportList,
1798 const FunctionImporter::ExportSetTy &ExportList,
1799 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1800 const GVSummaryMapTy &DefinedGlobals,
1801 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1802 auto ModuleID = BM.getModuleIdentifier();
1803 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (second round)",
1804 ModuleID);
1805 auto RunThinBackend = [&](AddStreamFn AddStream) {
1806 LTOLLVMContext BackendContext(Conf);
1807 std::unique_ptr<Module> LoadedModule =
1808 cgdata::loadModuleForTwoRounds(OrigModule&: BM, Task, Context&: BackendContext, IRFiles: *IRFiles);
1809
1810 return thinBackend(C: Conf, Task, AddStream, M&: *LoadedModule, CombinedIndex,
1811 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1812 /*CodeGenOnly=*/true);
1813 };
1814 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1815 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1816 P: [](uint32_t V) { return V == 0; }))
1817 // Cache disabled or no entry for this module in the combined index or
1818 // no module hash.
1819 return RunThinBackend(AddStream);
1820
1821 // Get Key for caching the final object file in Cache with the combined
1822 // CGData hash.
1823 std::string Key = computeLTOCacheKey(
1824 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1825 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1826 Key = recomputeLTOCacheKey(Key,
1827 /*ExtraID=*/std::to_string(val: CombinedCGDataHash));
1828 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1829 if (Error Err = CacheAddStreamOrErr.takeError())
1830 return Err;
1831 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1832
1833 if (CacheAddStream) {
1834 LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for "
1835 << BM.getModuleIdentifier() << "\n");
1836 return RunThinBackend(CacheAddStream);
1837 }
1838
1839 return Error::success();
1840 }
1841};
1842} // end anonymous namespace
1843
1844ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
1845 lto::IndexWriteCallback OnWrite,
1846 bool ShouldEmitIndexFiles,
1847 bool ShouldEmitImportsFiles) {
1848 auto Func =
1849 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1850 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1851 AddStreamFn AddStream, FileCache Cache) {
1852 return std::make_unique<InProcessThinBackend>(
1853 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1854 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
1855 args: ShouldEmitImportsFiles);
1856 };
1857 return ThinBackend(Func, Parallelism);
1858}
1859
1860StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
1861 if (!TheTriple.isOSDarwin())
1862 return "";
1863 if (TheTriple.getArch() == Triple::x86_64)
1864 return "core2";
1865 if (TheTriple.getArch() == Triple::x86)
1866 return "yonah";
1867 if (TheTriple.isArm64e())
1868 return "apple-a12";
1869 if (TheTriple.getArch() == Triple::aarch64 ||
1870 TheTriple.getArch() == Triple::aarch64_32)
1871 return "cyclone";
1872 return "";
1873}
1874
1875// Given the original \p Path to an output file, replace any path
1876// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
1877// resulting directory if it does not yet exist.
1878std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
1879 StringRef NewPrefix) {
1880 if (OldPrefix.empty() && NewPrefix.empty())
1881 return std::string(Path);
1882 SmallString<128> NewPath(Path);
1883 llvm::sys::path::replace_path_prefix(Path&: NewPath, OldPrefix, NewPrefix);
1884 StringRef ParentPath = llvm::sys::path::parent_path(path: NewPath.str());
1885 if (!ParentPath.empty()) {
1886 // Make sure the new directory exists, creating it if necessary.
1887 if (std::error_code EC = llvm::sys::fs::create_directories(path: ParentPath))
1888 llvm::errs() << "warning: could not create directory '" << ParentPath
1889 << "': " << EC.message() << '\n';
1890 }
1891 return std::string(NewPath);
1892}
1893
1894namespace {
1895class WriteIndexesThinBackend : public ThinBackendProc {
1896 std::string OldPrefix, NewPrefix, NativeObjectPrefix;
1897 raw_fd_ostream *LinkedObjectsFile;
1898
1899public:
1900 WriteIndexesThinBackend(
1901 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1902 ThreadPoolStrategy ThinLTOParallelism,
1903 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1904 std::string OldPrefix, std::string NewPrefix,
1905 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1906 raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
1907 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1908 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1909 OldPrefix(OldPrefix), NewPrefix(NewPrefix),
1910 NativeObjectPrefix(NativeObjectPrefix),
1911 LinkedObjectsFile(LinkedObjectsFile) {}
1912
1913 Error start(
1914 unsigned Task, BitcodeModule BM,
1915 const FunctionImporter::ImportMapTy &ImportList,
1916 const FunctionImporter::ExportSetTy &ExportList,
1917 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1918 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1919 StringRef ModulePath = BM.getModuleIdentifier();
1920
1921 // The contents of this file may be used as input to a native link, and must
1922 // therefore contain the processed modules in a determinstic order that
1923 // match the order they are provided on the command line. For that reason,
1924 // we cannot include this in the asynchronously executed lambda below.
1925 if (LinkedObjectsFile) {
1926 std::string ObjectPrefix =
1927 NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
1928 std::string LinkedObjectsFilePath =
1929 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix: ObjectPrefix);
1930 *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
1931 }
1932
1933 BackendThreadPool.async(
1934 F: [this](const StringRef ModulePath,
1935 const FunctionImporter::ImportMapTy &ImportList,
1936 const std::string &OldPrefix, const std::string &NewPrefix) {
1937 std::string NewModulePath =
1938 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix);
1939 auto E = emitFiles(ImportList, ModulePath, NewModulePath);
1940 if (E) {
1941 std::unique_lock<std::mutex> L(ErrMu);
1942 if (Err)
1943 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1944 else
1945 Err = std::move(E);
1946 return;
1947 }
1948 },
1949 ArgList&: ModulePath, ArgList: ImportList, ArgList&: OldPrefix, ArgList&: NewPrefix);
1950
1951 if (OnWrite)
1952 OnWrite(std::string(ModulePath));
1953 return Error::success();
1954 }
1955
1956 bool isSensitiveToInputOrder() override {
1957 // The order which modules are written to LinkedObjectsFile should be
1958 // deterministic and match the order they are passed on the command line.
1959 return true;
1960 }
1961};
1962} // end anonymous namespace
1963
1964ThinBackend lto::createWriteIndexesThinBackend(
1965 ThreadPoolStrategy Parallelism, std::string OldPrefix,
1966 std::string NewPrefix, std::string NativeObjectPrefix,
1967 bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
1968 IndexWriteCallback OnWrite) {
1969 auto Func =
1970 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1971 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1972 AddStreamFn AddStream, FileCache Cache) {
1973 return std::make_unique<WriteIndexesThinBackend>(
1974 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1975 args: OldPrefix, args: NewPrefix, args: NativeObjectPrefix, args: ShouldEmitImportsFiles,
1976 args: LinkedObjectsFile, args: OnWrite);
1977 };
1978 return ThinBackend(Func, Parallelism);
1979}
1980
1981Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
1982 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1983 llvm::TimeTraceScope timeScope("Run ThinLTO");
1984 LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
1985 ThinLTO.CombinedIndex.releaseTemporaryMemory();
1986 timeTraceProfilerBegin(Name: "ThinLink", Detail: StringRef(""));
1987 llvm::scope_exit TimeTraceScopeExit([]() {
1988 if (llvm::timeTraceProfilerEnabled())
1989 llvm::timeTraceProfilerEnd();
1990 });
1991 if (ThinLTO.ModuleMap.empty())
1992 return Error::success();
1993
1994 if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) {
1995 llvm::errs() << "warning: [ThinLTO] No module compiled\n";
1996 return Error::success();
1997 }
1998
1999 if (Conf.CombinedIndexHook &&
2000 !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
2001 return Error::success();
2002
2003 // Collect for each module the list of function it defines (GUID ->
2004 // Summary).
2005 DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
2006 ThinLTO.ModuleMap.size());
2007 ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
2008 ModuleToDefinedGVSummaries);
2009 // Create entries for any modules that didn't have any GV summaries
2010 // (either they didn't have any GVs to start with, or we suppressed
2011 // generation of the summaries because they e.g. had inline assembly
2012 // uses that couldn't be promoted/renamed on export). This is so
2013 // InProcessThinBackend::start can still launch a backend thread, which
2014 // is passed the map of summaries for the module, without any special
2015 // handling for this case.
2016 for (auto &Mod : ThinLTO.ModuleMap)
2017 if (!ModuleToDefinedGVSummaries.count(Val: Mod.first))
2018 ModuleToDefinedGVSummaries.try_emplace(Key: Mod.first);
2019
2020 FunctionImporter::ImportListsTy ImportLists(ThinLTO.ModuleMap.size());
2021 DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
2022 ThinLTO.ModuleMap.size());
2023 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
2024
2025 if (DumpThinCGSCCs)
2026 ThinLTO.CombinedIndex.dumpSCCs(OS&: outs());
2027
2028 std::set<GlobalValue::GUID> ExportedGUIDs;
2029
2030 bool WholeProgramVisibilityEnabledInLTO =
2031 Conf.HasWholeProgramVisibility &&
2032 // If validation is enabled, upgrade visibility only when all vtables
2033 // have typeinfos.
2034 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
2035 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
2036 ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
2037
2038 // If we're validating, get the vtable symbols that should not be
2039 // upgraded because they correspond to typeIDs outside of index-based
2040 // WPD info.
2041 DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
2042 if (WholeProgramVisibilityEnabledInLTO &&
2043 Conf.ValidateAllVtablesHaveTypeInfos) {
2044 // This returns true when the name is local or not defined. Locals are
2045 // expected to be handled separately.
2046 auto IsVisibleToRegularObj = [&](StringRef name) {
2047 auto It = GlobalResolutions->find(Val: name);
2048 return (It == GlobalResolutions->end() ||
2049 It->second.VisibleOutsideSummary || !It->second.Prevailing);
2050 };
2051
2052 getVisibleToRegularObjVtableGUIDs(Index&: ThinLTO.CombinedIndex,
2053 VisibleToRegularObjSymbols,
2054 IsVisibleToRegularObj);
2055 }
2056
2057 // If allowed, upgrade public vcall visibility to linkage unit visibility in
2058 // the summaries before whole program devirtualization below.
2059 updateVCallVisibilityInIndex(
2060 Index&: ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
2061 DynamicExportSymbols, VisibleToRegularObjSymbols);
2062
2063 // Perform index-based WPD. This will return immediately if there are
2064 // no index entries in the typeIdMetadata map (e.g. if we are instead
2065 // performing IR-based WPD in hybrid regular/thin LTO mode).
2066 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
2067 DenseSet<StringRef> ExternallyVisibleSymbolNames;
2068
2069 // Used by the promotion-time renaming logic. When non-null, this set
2070 // identifies symbols that should not be renamed during promotion.
2071 // It is non-null only when whole-program visibility is enabled and
2072 // renaming is not forced. Otherwise, the default renaming behavior applies.
2073 DenseSet<StringRef> *ExternallyVisibleSymbolNamesPtr =
2074 (WholeProgramVisibilityEnabledInLTO && !AlwaysRenamePromotedLocals)
2075 ? &ExternallyVisibleSymbolNames
2076 : nullptr;
2077 runWholeProgramDevirtOnIndex(Summary&: ThinLTO.CombinedIndex, ExportedGUIDs,
2078 LocalWPDTargetsMap,
2079 ExternallyVisibleSymbolNamesPtr);
2080
2081 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
2082 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: S->modulePath());
2083 };
2084 if (EnableMemProfContextDisambiguation) {
2085 MemProfContextDisambiguation ContextDisambiguation;
2086 ContextDisambiguation.run(
2087 Index&: ThinLTO.CombinedIndex, isPrevailing, Ctx&: RegularLTO.Ctx,
2088 EmitRemark: [&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
2089 auto R = OptimizationRemark(PassName.data(), RemarkName,
2090 LinkerRemarkFunction);
2091 R << Msg.str();
2092 emitRemark(Remark&: R);
2093 });
2094 }
2095
2096 // Figure out which symbols need to be internalized. This also needs to happen
2097 // at -O0 because summary-based DCE is implemented using internalization, and
2098 // we must apply DCE consistently with the full LTO module in order to avoid
2099 // undefined references during the final link.
2100 for (auto &Res : *GlobalResolutions) {
2101 // If the symbol does not have external references or it is not prevailing,
2102 // then not need to mark it as exported from a ThinLTO partition.
2103 if (Res.second.Partition != GlobalResolution::External ||
2104 !Res.second.isPrevailingIRSymbol())
2105 continue;
2106 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
2107 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
2108 // Mark exported unless index-based analysis determined it to be dead.
2109 if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
2110 ExportedGUIDs.insert(x: GUID);
2111 }
2112
2113 // Reset the GlobalResolutions to deallocate the associated memory, as there
2114 // are no further accesses. We specifically want to do this before computing
2115 // cross module importing, which adds to peak memory via the computed import
2116 // and export lists.
2117 releaseGlobalResolutionsMemory();
2118
2119 if (Conf.OptLevel > 0)
2120 ComputeCrossModuleImport(Index: ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2121 isPrevailing, ImportLists, ExportLists);
2122
2123 // Any functions referenced by the jump table in the regular LTO object must
2124 // be exported.
2125 auto &Defs = ThinLTO.CombinedIndex.cfiFunctionDefs();
2126 ExportedGUIDs.insert(first: Defs.guid_begin(), last: Defs.guid_end());
2127 auto &Decls = ThinLTO.CombinedIndex.cfiFunctionDecls();
2128 ExportedGUIDs.insert(first: Decls.guid_begin(), last: Decls.guid_end());
2129
2130 auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
2131 const auto &ExportList = ExportLists.find(Val: ModuleIdentifier);
2132 return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) ||
2133 ExportedGUIDs.count(x: VI.getGUID());
2134 };
2135
2136 // Update local devirtualized targets that were exported by cross-module
2137 // importing or by other devirtualizations marked in the ExportedGUIDs set.
2138 updateIndexWPDForExports(Summary&: ThinLTO.CombinedIndex, isExported,
2139 LocalWPDTargetsMap, ExternallyVisibleSymbolNamesPtr);
2140
2141 if (ExternallyVisibleSymbolNamesPtr) {
2142 // Add to ExternallyVisibleSymbolNames the set of unique names used by all
2143 // externally visible symbols in the index.
2144 for (auto &I : ThinLTO.CombinedIndex) {
2145 ValueInfo VI = ThinLTO.CombinedIndex.getValueInfo(R: I);
2146 for (const auto &Summary : VI.getSummaryList()) {
2147 const GlobalValueSummary *Base = Summary->getBaseObject();
2148 if (GlobalValue::isLocalLinkage(Linkage: Base->linkage()))
2149 continue;
2150
2151 ExternallyVisibleSymbolNamesPtr->insert(V: VI.name());
2152 break;
2153 }
2154 }
2155 }
2156
2157 thinLTOInternalizeAndPromoteInIndex(Index&: ThinLTO.CombinedIndex, isExported,
2158 isPrevailing,
2159 ExternallyVisibleSymbolNamesPtr);
2160
2161 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
2162 GlobalValue::GUID GUID,
2163 GlobalValue::LinkageTypes NewLinkage) {
2164 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
2165 };
2166 thinLTOResolvePrevailingInIndex(C: Conf, Index&: ThinLTO.CombinedIndex, isPrevailing,
2167 recordNewLinkage, GUIDPreservedSymbols);
2168
2169 thinLTOPropagateFunctionAttrs(Index&: ThinLTO.CombinedIndex, isPrevailing);
2170
2171 generateParamAccessSummary(Index&: ThinLTO.CombinedIndex);
2172
2173 if (llvm::timeTraceProfilerEnabled())
2174 llvm::timeTraceProfilerEnd();
2175
2176 TimeTraceScopeExit.release();
2177
2178 auto &ModuleMap =
2179 ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
2180
2181 auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error {
2182 auto ProcessOneModule = [&](int I) -> Error {
2183 auto &Mod = *(ModuleMap.begin() + I);
2184 // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
2185 // combined module and parallel code generation partitions.
2186 return BackendProcess->start(
2187 Task: RegularLTO.ParallelCodeGenParallelismLevel + I, BM: Mod.second,
2188 ImportList: ImportLists[Mod.first], ExportList: ExportLists[Mod.first],
2189 ResolvedODR: ResolvedODR[Mod.first], ModuleMap&: ThinLTO.ModuleMap);
2190 };
2191
2192 BackendProcess->setup(ThinLTONumTasks: ModuleMap.size(),
2193 ThinLTOTaskOffset: RegularLTO.ParallelCodeGenParallelismLevel,
2194 Triple: RegularLTO.CombinedModule->getTargetTriple());
2195
2196 if (BackendProcess->getThreadCount() == 1 ||
2197 BackendProcess->isSensitiveToInputOrder()) {
2198 // Process the modules in the order they were provided on the
2199 // command-line. It is important for this codepath to be used for
2200 // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
2201 // ThinLTO objects in the same order as the inputs, which otherwise would
2202 // affect the final link order.
2203 for (int I = 0, E = ModuleMap.size(); I != E; ++I)
2204 if (Error E = ProcessOneModule(I))
2205 return E;
2206 } else {
2207 // When executing in parallel, process largest bitsize modules first to
2208 // improve parallelism, and avoid starving the thread pool near the end.
2209 // This saves about 15 sec on a 36-core machine while link `clang.exe`
2210 // (out of 100 sec).
2211 std::vector<BitcodeModule *> ModulesVec;
2212 ModulesVec.reserve(n: ModuleMap.size());
2213 for (auto &Mod : ModuleMap)
2214 ModulesVec.push_back(x: &Mod.second);
2215 for (int I : generateModulesOrdering(R: ModulesVec))
2216 if (Error E = ProcessOneModule(I))
2217 return E;
2218 }
2219 return BackendProcess->wait();
2220 };
2221
2222 if (!CodeGenDataThinLTOTwoRounds) {
2223 std::unique_ptr<ThinBackendProc> BackendProc =
2224 ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2225 AddStream, Cache);
2226 return RunBackends(BackendProc.get());
2227 }
2228
2229 // Perform two rounds of code generation for ThinLTO:
2230 // 1. First round: Perform optimization and code generation, outputting to
2231 // temporary scratch objects.
2232 // 2. Merge code generation data extracted from the temporary scratch objects.
2233 // 3. Second round: Execute code generation again using the merged data.
2234 LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n");
2235
2236 unsigned MaxTasks = getMaxTasks();
2237 auto Parallelism = ThinLTO.Backend.getParallelism();
2238 // Set up two additional streams and caches for storing temporary scratch
2239 // objects and optimized IRs, using the same cache directory as the original.
2240 cgdata::StreamCacheData CG(MaxTasks, Cache, "CG"), IR(MaxTasks, Cache, "IR");
2241
2242 // First round: Execute optimization and code generation, outputting to
2243 // temporary scratch objects. Serialize the optimized IRs before initiating
2244 // code generation.
2245 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n");
2246 auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
2247 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2248 args&: CG.AddStream, args&: CG.Cache, args&: IR.AddStream, args&: IR.Cache);
2249 if (Error E = RunBackends(FirstRoundLTO.get()))
2250 return E;
2251
2252 LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n");
2253 auto CombinedHashOrErr = cgdata::mergeCodeGenData(ObjectFiles: *CG.getResult());
2254 if (Error E = CombinedHashOrErr.takeError())
2255 return E;
2256 auto CombinedHash = *CombinedHashOrErr;
2257 LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n");
2258
2259 // Second round: Read the optimized IRs and execute code generation using the
2260 // merged data.
2261 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n");
2262 auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>(
2263 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2264 args&: AddStream, args&: Cache, args: IR.getResult(), args&: CombinedHash);
2265 return RunBackends(SecondRoundLTO.get());
2266}
2267
2268Expected<LLVMRemarkFileHandle> lto::setupLLVMOptimizationRemarks(
2269 LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
2270 StringRef RemarksFormat, bool RemarksWithHotness,
2271 std::optional<uint64_t> RemarksHotnessThreshold, int Count) {
2272 std::string Filename = std::string(RemarksFilename);
2273 // For ThinLTO, file.opt.<format> becomes
2274 // file.opt.<format>.thin.<num>.<format>.
2275 if (!Filename.empty() && Count != -1)
2276 Filename =
2277 (Twine(Filename) + ".thin." + llvm::utostr(X: Count) + "." + RemarksFormat)
2278 .str();
2279
2280 auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
2281 Context, RemarksFilename: Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
2282 RemarksHotnessThreshold);
2283 if (Error E = ResultOrErr.takeError())
2284 return std::move(E);
2285
2286 if (*ResultOrErr)
2287 (*ResultOrErr)->keep();
2288
2289 return ResultOrErr;
2290}
2291
2292Expected<std::unique_ptr<ToolOutputFile>>
2293lto::setupStatsFile(StringRef StatsFilename) {
2294 // Setup output file to emit statistics.
2295 if (StatsFilename.empty())
2296 return nullptr;
2297
2298 llvm::EnableStatistics(DoPrintOnExit: false);
2299 std::error_code EC;
2300 auto StatsFile =
2301 std::make_unique<ToolOutputFile>(args&: StatsFilename, args&: EC, args: sys::fs::OF_None);
2302 if (EC)
2303 return errorCodeToError(EC);
2304
2305 StatsFile->keep();
2306 return std::move(StatsFile);
2307}
2308
2309// Compute the ordering we will process the inputs: the rough heuristic here
2310// is to sort them per size so that the largest module get schedule as soon as
2311// possible. This is purely a compile-time optimization.
2312std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
2313 auto Seq = llvm::seq<int>(Begin: 0, End: R.size());
2314 std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
2315 llvm::sort(C&: ModulesOrdering, Comp: [&](int LeftIndex, int RightIndex) {
2316 auto LSize = R[LeftIndex]->getBuffer().size();
2317 auto RSize = R[RightIndex]->getBuffer().size();
2318 return LSize > RSize;
2319 });
2320 return ModulesOrdering;
2321}
2322
2323namespace {
2324/// This out-of-process backend does not perform code generation when invoked
2325/// for each task. Instead, it generates the necessary information (e.g., the
2326/// summary index shard, import list, etc.) to enable code generation to be
2327/// performed externally, similar to WriteIndexesThinBackend. The backend's
2328/// `wait` function then invokes an external distributor process to carry out
2329/// the backend compilations.
2330class OutOfProcessThinBackend : public CGThinBackend {
2331 using SString = SmallString<128>;
2332
2333 BumpPtrAllocator Alloc;
2334 StringSaver Saver{Alloc};
2335
2336 SString LinkerOutputFile;
2337
2338 SString DistributorPath;
2339 ArrayRef<StringRef> DistributorArgs;
2340
2341 SString RemoteCompiler;
2342 ArrayRef<StringRef> RemoteCompilerPrependArgs;
2343 ArrayRef<StringRef> RemoteCompilerArgs;
2344
2345 bool SaveTemps;
2346
2347 SmallVector<StringRef, 0> CodegenOptions;
2348 DenseSet<StringRef> CommonInputs;
2349 // Number of the object files that have been already cached.
2350 std::atomic<size_t> CachedJobs{0};
2351 // Information specific to individual backend compilation job.
2352 struct Job {
2353 unsigned Task;
2354 StringRef ModuleID;
2355 StringRef NativeObjectPath;
2356 StringRef SummaryIndexPath;
2357 ImportsFilesContainer ImportsFiles;
2358 std::string CacheKey;
2359 AddStreamFn CacheAddStream;
2360 bool Cached = false;
2361 };
2362 // The set of backend compilations jobs.
2363 SmallVector<Job> Jobs;
2364
2365 // A unique string to identify the current link.
2366 SmallString<8> UID;
2367
2368 // The offset to the first ThinLTO task.
2369 unsigned ThinLTOTaskOffset;
2370
2371 // The target triple to supply for backend compilations.
2372 llvm::Triple Triple;
2373
2374 // Cache
2375 FileCache Cache;
2376
2377public:
2378 OutOfProcessThinBackend(
2379 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2380 ThreadPoolStrategy ThinLTOParallelism,
2381 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2382 AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
2383 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2384 StringRef LinkerOutputFile, StringRef Distributor,
2385 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2386 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2387 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps)
2388 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
2389 AddStream, OnWrite, ShouldEmitIndexFiles,
2390 ShouldEmitImportsFiles, ThinLTOParallelism),
2391 LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
2392 DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
2393 RemoteCompilerPrependArgs(RemoteCompilerPrependArgs),
2394 RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
2395 Cache(std::move(CacheFn)) {}
2396
2397 void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
2398 llvm::Triple Triple) override {
2399 UID = itostr(X: sys::Process::getProcessId());
2400 Jobs.resize(N: (size_t)ThinLTONumTasks);
2401 this->ThinLTOTaskOffset = ThinLTOTaskOffset;
2402 this->Triple = std::move(Triple);
2403 this->Conf.Dtlto = 1;
2404 }
2405
2406 virtual Error runThinLTOBackendThread(
2407 Job &J, const FunctionImporter::ImportMapTy &ImportList,
2408 const FunctionImporter::ExportSetTy &ExportList,
2409 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2410 &ResolvedODR) {
2411 {
2412 TimeTraceScope TimeScope("Emit individual index for DTLTO",
2413 J.SummaryIndexPath);
2414 if (auto E = emitFiles(ImportList, ModulePath: J.ModuleID, NewModulePath: J.ModuleID.str(),
2415 SummaryPath: J.SummaryIndexPath, ImportsFiles: J.ImportsFiles))
2416 return E;
2417 }
2418
2419 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: J.ModuleID) ||
2420 all_of(Range: CombinedIndex.getModuleHash(ModPath: J.ModuleID),
2421 P: [](uint32_t V) { return V == 0; }))
2422 // Cache disabled or no entry for this module in the combined index or
2423 // no module hash.
2424 return Error::success();
2425
2426 TimeTraceScope TimeScope("Check cache for DTLTO", J.SummaryIndexPath);
2427 const GVSummaryMapTy &DefinedGlobals =
2428 ModuleToDefinedGVSummaries.find(Val: J.ModuleID)->second;
2429
2430 // The module may be cached, this helps handling it.
2431 J.CacheKey = computeLTOCacheKey(Conf, Index: CombinedIndex, ModuleID: J.ModuleID, ImportList,
2432 ExportList, ResolvedODR, DefinedGlobals,
2433 CfiFunctionDefs, CfiFunctionDecls);
2434
2435 // The module may be cached, this helps handling it.
2436 auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
2437 if (Error Err = CacheAddStreamExp.takeError())
2438 return Err;
2439 AddStreamFn &CacheAddStream = *CacheAddStreamExp;
2440 // If CacheAddStream is null, we have a cache hit and at this point
2441 // object file is already passed back to the linker.
2442 if (!CacheAddStream) {
2443 J.Cached = true; // Cache hit, mark the job as cached.
2444 CachedJobs.fetch_add(i: 1);
2445 } else {
2446 // If CacheAddStream is not null, we have a cache miss and we need to
2447 // run the backend for codegen. Save cache 'add stream'
2448 // function for a later use.
2449 J.CacheAddStream = std::move(CacheAddStream);
2450 }
2451 return Error::success();
2452 }
2453
2454 Error start(
2455 unsigned Task, BitcodeModule BM,
2456 const FunctionImporter::ImportMapTy &ImportList,
2457 const FunctionImporter::ExportSetTy &ExportList,
2458 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
2459 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
2460
2461 StringRef ModulePath = BM.getModuleIdentifier();
2462
2463 SString ObjFilePath = sys::path::parent_path(path: LinkerOutputFile);
2464 sys::path::append(path&: ObjFilePath, a: sys::path::stem(path: ModulePath) + "." +
2465 itostr(X: Task) + "." + UID + ".native.o");
2466
2467 Job &J = Jobs[Task - ThinLTOTaskOffset];
2468 J = {.Task: Task,
2469 .ModuleID: ModulePath,
2470 .NativeObjectPath: Saver.save(S: ObjFilePath.str()),
2471 .SummaryIndexPath: Saver.save(S: ObjFilePath.str() + ".thinlto.bc"),
2472 .ImportsFiles: {}, // Filled in by emitFiles below.
2473 .CacheKey: "", /*CacheKey=*/
2474 .CacheAddStream: nullptr,
2475 .Cached: false};
2476
2477 // Cleanup per-job temporary files on abnormal process exit.
2478 if (!SaveTemps) {
2479 llvm::sys::RemoveFileOnSignal(Filename: J.NativeObjectPath);
2480 if (!ShouldEmitIndexFiles)
2481 llvm::sys::RemoveFileOnSignal(Filename: J.SummaryIndexPath);
2482 }
2483
2484 assert(ModuleToDefinedGVSummaries.count(ModulePath));
2485
2486 // The BackendThreadPool is only used here to write the sharded index files
2487 // (similar to WriteIndexesThinBackend).
2488 BackendThreadPool.async(
2489 F: [=](Job &J, const FunctionImporter::ImportMapTy &ImportList,
2490 const FunctionImporter::ExportSetTy &ExportList,
2491 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2492 &ResolvedODR) {
2493 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2494 timeTraceProfilerInitialize(
2495 TimeTraceGranularity: Conf.TimeTraceGranularity,
2496 ProcName: "Emit individual index and check cache for DTLTO");
2497 Error E =
2498 runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR);
2499 if (E) {
2500 std::unique_lock<std::mutex> L(ErrMu);
2501 if (Err)
2502 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
2503 else
2504 Err = std::move(E);
2505 }
2506 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2507 timeTraceProfilerFinishThread();
2508 },
2509 ArgList: std::ref(t&: J), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
2510 ArgList: std::ref(t: ResolvedODR));
2511
2512 return Error::success();
2513 }
2514
2515 // Derive a set of Clang options that will be shared/common for all DTLTO
2516 // backend compilations. We are intentionally minimal here as these options
2517 // must remain synchronized with the behavior of Clang. DTLTO does not support
2518 // all the features available with in-process LTO. More features are expected
2519 // to be added over time. Users can specify Clang options directly if a
2520 // feature is not supported. Note that explicitly specified options that imply
2521 // additional input or output file dependencies must be communicated to the
2522 // distribution system, potentially by setting extra options on the
2523 // distributor program.
2524 void buildCommonRemoteCompilerOptions() {
2525 const lto::Config &C = Conf;
2526 auto &Ops = CodegenOptions;
2527
2528 Ops.push_back(Elt: Saver.save(S: "-O" + Twine(C.OptLevel)));
2529
2530 if (C.Options.EmitAddrsig)
2531 Ops.push_back(Elt: "-faddrsig");
2532 if (C.Options.FunctionSections)
2533 Ops.push_back(Elt: "-ffunction-sections");
2534 if (C.Options.DataSections)
2535 Ops.push_back(Elt: "-fdata-sections");
2536
2537 if (C.RelocModel == Reloc::PIC_)
2538 // Clang doesn't have -fpic for all triples.
2539 if (!Triple.isOSBinFormatCOFF())
2540 Ops.push_back(Elt: "-fpic");
2541
2542 // Turn on/off warnings about profile cfg mismatch (default on)
2543 // --lto-pgo-warn-mismatch.
2544 if (!C.PGOWarnMismatch) {
2545 Ops.push_back(Elt: "-mllvm");
2546 Ops.push_back(Elt: "-no-pgo-warn-mismatch");
2547 }
2548
2549 // Enable sample-based profile guided optimizations.
2550 // Sample profile file path --lto-sample-profile=<value>.
2551 if (!C.SampleProfile.empty()) {
2552 Ops.push_back(
2553 Elt: Saver.save(S: "-fprofile-sample-use=" + Twine(C.SampleProfile)));
2554 CommonInputs.insert(V: C.SampleProfile);
2555 }
2556
2557 // We don't know which of options will be used by Clang.
2558 Ops.push_back(Elt: "-Wno-unused-command-line-argument");
2559
2560 // Forward any supplied options.
2561 if (!RemoteCompilerArgs.empty())
2562 for (auto &a : RemoteCompilerArgs)
2563 Ops.push_back(Elt: a);
2564 }
2565
2566 // Generates a JSON file describing the backend compilations, for the
2567 // distributor.
2568 bool emitDistributorJson(StringRef DistributorJson) {
2569 using json::Array;
2570 std::error_code EC;
2571 raw_fd_ostream OS(DistributorJson, EC);
2572 if (EC)
2573 return false;
2574
2575 json::OStream JOS(OS);
2576 JOS.object(Contents: [&]() {
2577 // Information common to all jobs.
2578 JOS.attributeObject(Key: "common", Contents: [&]() {
2579 JOS.attribute(Key: "linker_output", Contents: LinkerOutputFile);
2580
2581 JOS.attributeArray(Key: "args", Contents: [&]() {
2582 JOS.value(V: RemoteCompiler);
2583
2584 // Forward any supplied prepend options.
2585 if (!RemoteCompilerPrependArgs.empty())
2586 for (auto &A : RemoteCompilerPrependArgs)
2587 JOS.value(V: A);
2588
2589 JOS.value(V: "-c");
2590
2591 JOS.value(V: Saver.save(S: "--target=" + Triple.str()));
2592
2593 for (const auto &A : CodegenOptions)
2594 JOS.value(V: A);
2595 });
2596
2597 JOS.attribute(Key: "inputs", Contents: Array(CommonInputs));
2598 });
2599
2600 // Per-compilation-job information.
2601 JOS.attributeArray(Key: "jobs", Contents: [&]() {
2602 for (const auto &J : Jobs) {
2603 assert(J.Task != 0);
2604 if (J.Cached) {
2605 assert(!Cache.getCacheDirectoryPath().empty());
2606 continue;
2607 }
2608
2609 SmallVector<StringRef, 2> Inputs;
2610 SmallVector<StringRef, 1> Outputs;
2611
2612 JOS.object(Contents: [&]() {
2613 JOS.attributeArray(Key: "args", Contents: [&]() {
2614 JOS.value(V: J.ModuleID);
2615 Inputs.push_back(Elt: J.ModuleID);
2616
2617 JOS.value(
2618 V: Saver.save(S: "-fthinlto-index=" + Twine(J.SummaryIndexPath)));
2619 Inputs.push_back(Elt: J.SummaryIndexPath);
2620
2621 JOS.value(V: "-o");
2622 JOS.value(V: J.NativeObjectPath);
2623 Outputs.push_back(Elt: J.NativeObjectPath);
2624 });
2625
2626 // Add the bitcode files from which imports will be made. These do
2627 // not explicitly appear on the backend compilation command lines
2628 // but are recorded in the summary index shards.
2629 llvm::append_range(C&: Inputs, R: J.ImportsFiles);
2630 JOS.attribute(Key: "inputs", Contents: Array(Inputs));
2631
2632 JOS.attribute(Key: "outputs", Contents: Array(Outputs));
2633 });
2634 }
2635 });
2636 });
2637
2638 return true;
2639 }
2640
2641 void removeFile(StringRef FileName) {
2642 std::error_code EC = sys::fs::remove(path: FileName, IgnoreNonExisting: true);
2643 if (EC && EC != std::make_error_code(e: std::errc::no_such_file_or_directory))
2644 errs() << "warning: could not remove the file '" << FileName
2645 << "': " << EC.message() << "\n";
2646 }
2647
2648 Error wait() override {
2649 // Wait for the information on the required backend compilations to be
2650 // gathered.
2651 BackendThreadPool.wait();
2652 if (Err)
2653 return std::move(*Err);
2654
2655 llvm::scope_exit CleanPerJobFiles([&] {
2656 llvm::TimeTraceScope TimeScope("Remove DTLTO temporary files");
2657 if (!SaveTemps)
2658 for (auto &Job : Jobs) {
2659 removeFile(FileName: Job.NativeObjectPath);
2660 if (!ShouldEmitIndexFiles)
2661 removeFile(FileName: Job.SummaryIndexPath);
2662 }
2663 });
2664
2665 const StringRef BCError = "DTLTO backend compilation: ";
2666
2667 buildCommonRemoteCompilerOptions();
2668
2669 SString JsonFile = sys::path::parent_path(path: LinkerOutputFile);
2670 {
2671 llvm::TimeTraceScope TimeScope("Emit DTLTO JSON");
2672 sys::path::append(path&: JsonFile, a: sys::path::stem(path: LinkerOutputFile) + "." +
2673 UID + ".dist-file.json");
2674 // Cleanup DTLTO JSON file on abnormal process exit.
2675 if (!SaveTemps)
2676 llvm::sys::RemoveFileOnSignal(Filename: JsonFile);
2677 if (!emitDistributorJson(DistributorJson: JsonFile))
2678 return make_error<StringError>(
2679 Args: BCError + "failed to generate distributor JSON script: " + JsonFile,
2680 Args: inconvertibleErrorCode());
2681 }
2682 llvm::scope_exit CleanJson([&] {
2683 if (!SaveTemps)
2684 removeFile(FileName: JsonFile);
2685 });
2686
2687 {
2688 llvm::TimeTraceScope TimeScope("Execute DTLTO distributor",
2689 DistributorPath);
2690 // Checks if we have any jobs that don't have corresponding cache entries.
2691 if (CachedJobs.load() < Jobs.size()) {
2692 SmallVector<StringRef, 3> Args = {DistributorPath};
2693 llvm::append_range(C&: Args, R&: DistributorArgs);
2694 Args.push_back(Elt: JsonFile);
2695 std::string ErrMsg;
2696 if (sys::ExecuteAndWait(Program: Args[0], Args,
2697 /*Env=*/std::nullopt, /*Redirects=*/{},
2698 /*SecondsToWait=*/0, /*MemoryLimit=*/0,
2699 ErrMsg: &ErrMsg)) {
2700 return make_error<StringError>(
2701 Args: BCError + "distributor execution failed" +
2702 (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2703 Args: inconvertibleErrorCode());
2704 }
2705 }
2706 }
2707
2708 {
2709 llvm::TimeTraceScope FilesScope("Add DTLTO files to the link");
2710 for (auto &Job : Jobs) {
2711 if (!Job.CacheKey.empty() && Job.Cached) {
2712 assert(Cache.isValid());
2713 continue;
2714 }
2715 // Load the native object from a file into a memory buffer
2716 // and store its contents in the output buffer.
2717 auto ObjFileMbOrErr =
2718 MemoryBuffer::getFile(Filename: Job.NativeObjectPath, /*IsText=*/false,
2719 /*RequiresNullTerminator=*/false);
2720 if (std::error_code EC = ObjFileMbOrErr.getError())
2721 return make_error<StringError>(
2722 Args: BCError + "cannot open native object file: " +
2723 Job.NativeObjectPath + ": " + EC.message(),
2724 Args: inconvertibleErrorCode());
2725
2726 MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
2727 if (Cache.isValid()) {
2728 // Cache hits are taken care of earlier. At this point, we could only
2729 // have cache misses.
2730 assert(Job.CacheAddStream);
2731 // Obtain a file stream for a storing a cache entry.
2732 auto CachedFileStreamOrErr =
2733 Job.CacheAddStream(Job.Task, Job.ModuleID);
2734 if (!CachedFileStreamOrErr)
2735 return joinErrors(
2736 E1: CachedFileStreamOrErr.takeError(),
2737 E2: createStringError(EC: inconvertibleErrorCode(),
2738 Fmt: "Cannot get a cache file stream: %s",
2739 Vals: Job.NativeObjectPath.data()));
2740 // Store a file buffer into the cache stream.
2741 auto &CacheStream = *(CachedFileStreamOrErr->get());
2742 *(CacheStream.OS) << ObjFileMbRef.getBuffer();
2743 if (Error Err = CacheStream.commit())
2744 return Err;
2745 } else {
2746 auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2747 if (Error Err = StreamOrErr.takeError())
2748 report_fatal_error(Err: std::move(Err));
2749 auto &Stream = *StreamOrErr->get();
2750 *Stream.OS << ObjFileMbRef.getBuffer();
2751 if (Error Err = Stream.commit())
2752 report_fatal_error(Err: std::move(Err));
2753 }
2754 }
2755 }
2756 return Error::success();
2757 }
2758};
2759} // end anonymous namespace
2760
2761ThinBackend lto::createOutOfProcessThinBackend(
2762 ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
2763 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2764 StringRef LinkerOutputFile, StringRef Distributor,
2765 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2766 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2767 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) {
2768 auto Func =
2769 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2770 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2771 AddStreamFn AddStream, FileCache Cache) {
2772 return std::make_unique<OutOfProcessThinBackend>(
2773 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
2774 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
2775 args: ShouldEmitImportsFiles, args: LinkerOutputFile, args: Distributor,
2776 args: DistributorArgs, args: RemoteCompiler, args: RemoteCompilerPrependArgs,
2777 args: RemoteCompilerArgs, args: SaveTemps);
2778 };
2779 return ThinBackend(Func, Parallelism);
2780}
2781