1//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions and classes used to support LTO.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/LTO/LTO.h"
14#include "llvm/ADT/ScopeExit.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/Analysis/OptimizationRemarkEmitter.h"
19#include "llvm/Analysis/StackSafetyAnalysis.h"
20#include "llvm/Analysis/TargetLibraryInfo.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Bitcode/BitcodeReader.h"
23#include "llvm/Bitcode/BitcodeWriter.h"
24#include "llvm/CodeGen/Analysis.h"
25#include "llvm/Config/llvm-config.h"
26#include "llvm/IR/AutoUpgrade.h"
27#include "llvm/IR/DiagnosticPrinter.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/LLVMRemarkStreamer.h"
30#include "llvm/IR/LegacyPassManager.h"
31#include "llvm/IR/Mangler.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/RuntimeLibcalls.h"
34#include "llvm/LTO/LTOBackend.h"
35#include "llvm/LTO/SummaryBasedOptimizations.h"
36#include "llvm/Linker/IRMover.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Object/IRObjectFile.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Error.h"
41#include "llvm/Support/FileSystem.h"
42#include "llvm/Support/ManagedStatic.h"
43#include "llvm/Support/MemoryBuffer.h"
44#include "llvm/Support/Path.h"
45#include "llvm/Support/SHA1.h"
46#include "llvm/Support/SourceMgr.h"
47#include "llvm/Support/ThreadPool.h"
48#include "llvm/Support/Threading.h"
49#include "llvm/Support/TimeProfiler.h"
50#include "llvm/Support/ToolOutputFile.h"
51#include "llvm/Support/VCSRevision.h"
52#include "llvm/Support/raw_ostream.h"
53#include "llvm/Target/TargetOptions.h"
54#include "llvm/Transforms/IPO.h"
55#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
56#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
57#include "llvm/Transforms/Utils/FunctionImportUtils.h"
58#include "llvm/Transforms/Utils/SplitModule.h"
59
60#include <optional>
61#include <set>
62
63using namespace llvm;
64using namespace lto;
65using namespace object;
66
67#define DEBUG_TYPE "lto"
68
69extern cl::opt<bool> UseNewDbgInfoFormat;
70
71static cl::opt<bool>
72 DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(Val: false), cl::Hidden,
73 cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
74
75namespace llvm {
76/// Enable global value internalization in LTO.
77cl::opt<bool> EnableLTOInternalization(
78 "enable-lto-internalization", cl::init(Val: true), cl::Hidden,
79 cl::desc("Enable global value internalization in LTO"));
80
81/// Indicate we are linking with an allocator that supports hot/cold operator
82/// new interfaces.
83extern cl::opt<bool> SupportsHotColdNew;
84
85/// Enable MemProf context disambiguation for thin link.
86extern cl::opt<bool> EnableMemProfContextDisambiguation;
87} // namespace llvm
88
89// Computes a unique hash for the Module considering the current list of
90// export/import and other global analysis results.
91// The hash is produced in \p Key.
92void llvm::computeLTOCacheKey(
93 SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index,
94 StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
95 const FunctionImporter::ExportSetTy &ExportList,
96 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
97 const GVSummaryMapTy &DefinedGlobals,
98 const std::set<GlobalValue::GUID> &CfiFunctionDefs,
99 const std::set<GlobalValue::GUID> &CfiFunctionDecls) {
100 // Compute the unique hash for this entry.
101 // This is based on the current compiler version, the module itself, the
102 // export list, the hash for every single module in the import list, the
103 // list of ResolvedODR for the module, and the list of preserved symbols.
104 SHA1 Hasher;
105
106 // Start with the compiler revision
107 Hasher.update(LLVM_VERSION_STRING);
108#ifdef LLVM_REVISION
109 Hasher.update(LLVM_REVISION);
110#endif
111
112 // Include the parts of the LTO configuration that affect code generation.
113 auto AddString = [&](StringRef Str) {
114 Hasher.update(Str);
115 Hasher.update(Data: ArrayRef<uint8_t>{0});
116 };
117 auto AddUnsigned = [&](unsigned I) {
118 uint8_t Data[4];
119 support::endian::write32le(P: Data, V: I);
120 Hasher.update(Data);
121 };
122 auto AddUint64 = [&](uint64_t I) {
123 uint8_t Data[8];
124 support::endian::write64le(P: Data, V: I);
125 Hasher.update(Data);
126 };
127 auto AddUint8 = [&](const uint8_t I) {
128 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&I, 1));
129 };
130 AddString(Conf.CPU);
131 // FIXME: Hash more of Options. For now all clients initialize Options from
132 // command-line flags (which is unsupported in production), but may set
133 // X86RelaxRelocations. The clang driver can also pass FunctionSections,
134 // DataSections and DebuggerTuning via command line flags.
135 AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations);
136 AddUnsigned(Conf.Options.FunctionSections);
137 AddUnsigned(Conf.Options.DataSections);
138 AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
139 for (auto &A : Conf.MAttrs)
140 AddString(A);
141 if (Conf.RelocModel)
142 AddUnsigned(*Conf.RelocModel);
143 else
144 AddUnsigned(-1);
145 if (Conf.CodeModel)
146 AddUnsigned(*Conf.CodeModel);
147 else
148 AddUnsigned(-1);
149 for (const auto &S : Conf.MllvmArgs)
150 AddString(S);
151 AddUnsigned(static_cast<int>(Conf.CGOptLevel));
152 AddUnsigned(static_cast<int>(Conf.CGFileType));
153 AddUnsigned(Conf.OptLevel);
154 AddUnsigned(Conf.Freestanding);
155 AddString(Conf.OptPipeline);
156 AddString(Conf.AAPipeline);
157 AddString(Conf.OverrideTriple);
158 AddString(Conf.DefaultTriple);
159 AddString(Conf.DwoDir);
160
161 // Include the hash for the current module
162 auto ModHash = Index.getModuleHash(ModPath: ModuleID);
163 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
164
165 // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
166 // used to compute cache key, we could omit hashing `ExportList` here.
167 std::vector<uint64_t> ExportsGUID;
168 ExportsGUID.reserve(n: ExportList.size());
169 for (const auto &VI : ExportList)
170 ExportsGUID.push_back(x: VI.getGUID());
171
172 // Sort the export list elements GUIDs.
173 llvm::sort(C&: ExportsGUID);
174 for (auto GUID : ExportsGUID)
175 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
176
177 // Include the hash for every module we import functions from. The set of
178 // imported symbols for each module may affect code generation and is
179 // sensitive to link order, so include that as well.
180 using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
181 struct ImportModule {
182 ImportMapIteratorTy ModIt;
183 const ModuleSummaryIndex::ModuleInfo *ModInfo;
184
185 StringRef getIdentifier() const { return ModIt->getFirst(); }
186 const FunctionImporter::FunctionsToImportTy &getFunctions() const {
187 return ModIt->second;
188 }
189
190 const ModuleHash &getHash() const { return ModInfo->second; }
191 };
192
193 std::vector<ImportModule> ImportModulesVector;
194 ImportModulesVector.reserve(n: ImportList.size());
195
196 for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
197 ++It) {
198 ImportModulesVector.push_back(x: {.ModIt: It, .ModInfo: Index.getModule(ModPath: It->getFirst())});
199 }
200 // Order using module hash, to be both independent of module name and
201 // module order.
202 llvm::sort(C&: ImportModulesVector,
203 Comp: [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
204 return Lhs.getHash() < Rhs.getHash();
205 });
206 std::vector<std::pair<uint64_t, uint8_t>> ImportedGUIDs;
207 for (const ImportModule &Entry : ImportModulesVector) {
208 auto ModHash = Entry.getHash();
209 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
210
211 AddUint64(Entry.getFunctions().size());
212
213 ImportedGUIDs.clear();
214 for (auto &[Fn, ImportType] : Entry.getFunctions())
215 ImportedGUIDs.push_back(x: std::make_pair(x: Fn, y: ImportType));
216 llvm::sort(C&: ImportedGUIDs);
217 for (auto &[GUID, Type] : ImportedGUIDs) {
218 AddUint64(GUID);
219 AddUint8(Type);
220 }
221 }
222
223 // Include the hash for the resolved ODR.
224 for (auto &Entry : ResolvedODR) {
225 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
226 sizeof(GlobalValue::GUID)));
227 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
228 sizeof(GlobalValue::LinkageTypes)));
229 }
230
231 // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or
232 // defined in this module.
233 std::set<GlobalValue::GUID> UsedCfiDefs;
234 std::set<GlobalValue::GUID> UsedCfiDecls;
235
236 // Typeids used in this module.
237 std::set<GlobalValue::GUID> UsedTypeIds;
238
239 auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) {
240 if (CfiFunctionDefs.count(x: ValueGUID))
241 UsedCfiDefs.insert(x: ValueGUID);
242 if (CfiFunctionDecls.count(x: ValueGUID))
243 UsedCfiDecls.insert(x: ValueGUID);
244 };
245
246 auto AddUsedThings = [&](GlobalValueSummary *GS) {
247 if (!GS) return;
248 AddUnsigned(GS->getVisibility());
249 AddUnsigned(GS->isLive());
250 AddUnsigned(GS->canAutoHide());
251 for (const ValueInfo &VI : GS->refs()) {
252 AddUnsigned(VI.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
253 AddUsedCfiGlobal(VI.getGUID());
254 }
255 if (auto *GVS = dyn_cast<GlobalVarSummary>(Val: GS)) {
256 AddUnsigned(GVS->maybeReadOnly());
257 AddUnsigned(GVS->maybeWriteOnly());
258 }
259 if (auto *FS = dyn_cast<FunctionSummary>(Val: GS)) {
260 for (auto &TT : FS->type_tests())
261 UsedTypeIds.insert(x: TT);
262 for (auto &TT : FS->type_test_assume_vcalls())
263 UsedTypeIds.insert(x: TT.GUID);
264 for (auto &TT : FS->type_checked_load_vcalls())
265 UsedTypeIds.insert(x: TT.GUID);
266 for (auto &TT : FS->type_test_assume_const_vcalls())
267 UsedTypeIds.insert(x: TT.VFunc.GUID);
268 for (auto &TT : FS->type_checked_load_const_vcalls())
269 UsedTypeIds.insert(x: TT.VFunc.GUID);
270 for (auto &ET : FS->calls()) {
271 AddUnsigned(ET.first.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
272 AddUsedCfiGlobal(ET.first.getGUID());
273 }
274 }
275 };
276
277 // Include the hash for the linkage type to reflect internalization and weak
278 // resolution, and collect any used type identifier resolutions.
279 for (auto &GS : DefinedGlobals) {
280 GlobalValue::LinkageTypes Linkage = GS.second->linkage();
281 Hasher.update(
282 Data: ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
283 AddUsedCfiGlobal(GS.first);
284 AddUsedThings(GS.second);
285 }
286
287 // Imported functions may introduce new uses of type identifier resolutions,
288 // so we need to collect their used resolutions as well.
289 for (const ImportModule &ImpM : ImportModulesVector)
290 for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
291 GlobalValueSummary *S =
292 Index.findSummaryInModule(ValueGUID: GUID, ModuleId: ImpM.getIdentifier());
293 AddUsedThings(S);
294 // If this is an alias, we also care about any types/etc. that the aliasee
295 // may reference.
296 if (auto *AS = dyn_cast_or_null<AliasSummary>(Val: S))
297 AddUsedThings(AS->getBaseObject());
298 }
299
300 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
301 AddString(TId);
302
303 AddUnsigned(S.TTRes.TheKind);
304 AddUnsigned(S.TTRes.SizeM1BitWidth);
305
306 AddUint64(S.TTRes.AlignLog2);
307 AddUint64(S.TTRes.SizeM1);
308 AddUint64(S.TTRes.BitMask);
309 AddUint64(S.TTRes.InlineBits);
310
311 AddUint64(S.WPDRes.size());
312 for (auto &WPD : S.WPDRes) {
313 AddUnsigned(WPD.first);
314 AddUnsigned(WPD.second.TheKind);
315 AddString(WPD.second.SingleImplName);
316
317 AddUint64(WPD.second.ResByArg.size());
318 for (auto &ByArg : WPD.second.ResByArg) {
319 AddUint64(ByArg.first.size());
320 for (uint64_t Arg : ByArg.first)
321 AddUint64(Arg);
322 AddUnsigned(ByArg.second.TheKind);
323 AddUint64(ByArg.second.Info);
324 AddUnsigned(ByArg.second.Byte);
325 AddUnsigned(ByArg.second.Bit);
326 }
327 }
328 };
329
330 // Include the hash for all type identifiers used by this module.
331 for (GlobalValue::GUID TId : UsedTypeIds) {
332 auto TidIter = Index.typeIds().equal_range(x: TId);
333 for (auto It = TidIter.first; It != TidIter.second; ++It)
334 AddTypeIdSummary(It->second.first, It->second.second);
335 }
336
337 AddUnsigned(UsedCfiDefs.size());
338 for (auto &V : UsedCfiDefs)
339 AddUint64(V);
340
341 AddUnsigned(UsedCfiDecls.size());
342 for (auto &V : UsedCfiDecls)
343 AddUint64(V);
344
345 if (!Conf.SampleProfile.empty()) {
346 auto FileOrErr = MemoryBuffer::getFile(Filename: Conf.SampleProfile);
347 if (FileOrErr) {
348 Hasher.update(Str: FileOrErr.get()->getBuffer());
349
350 if (!Conf.ProfileRemapping.empty()) {
351 FileOrErr = MemoryBuffer::getFile(Filename: Conf.ProfileRemapping);
352 if (FileOrErr)
353 Hasher.update(Str: FileOrErr.get()->getBuffer());
354 }
355 }
356 }
357
358 Key = toHex(Input: Hasher.result());
359}
360
361static void thinLTOResolvePrevailingGUID(
362 const Config &C, ValueInfo VI,
363 DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
364 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
365 isPrevailing,
366 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
367 recordNewLinkage,
368 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
369 GlobalValue::VisibilityTypes Visibility =
370 C.VisibilityScheme == Config::ELF ? VI.getELFVisibility()
371 : GlobalValue::DefaultVisibility;
372 for (auto &S : VI.getSummaryList()) {
373 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
374 // Ignore local and appending linkage values since the linker
375 // doesn't resolve them.
376 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
377 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
378 continue;
379 // We need to emit only one of these. The prevailing module will keep it,
380 // but turned into a weak, while the others will drop it when possible.
381 // This is both a compile-time optimization and a correctness
382 // transformation. This is necessary for correctness when we have exported
383 // a reference - we need to convert the linkonce to weak to
384 // ensure a copy is kept to satisfy the exported reference.
385 // FIXME: We may want to split the compile time and correctness
386 // aspects into separate routines.
387 if (isPrevailing(VI.getGUID(), S.get())) {
388 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage)) {
389 S->setLinkage(GlobalValue::getWeakLinkage(
390 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
391 // The kept copy is eligible for auto-hiding (hidden visibility) if all
392 // copies were (i.e. they were all linkonce_odr global unnamed addr).
393 // If any copy is not (e.g. it was originally weak_odr), then the symbol
394 // must remain externally available (e.g. a weak_odr from an explicitly
395 // instantiated template). Additionally, if it is in the
396 // GUIDPreservedSymbols set, that means that it is visibile outside
397 // the summary (e.g. in a native object or a bitcode file without
398 // summary), and in that case we cannot hide it as it isn't possible to
399 // check all copies.
400 S->setCanAutoHide(VI.canAutoHide() &&
401 !GUIDPreservedSymbols.count(V: VI.getGUID()));
402 }
403 if (C.VisibilityScheme == Config::FromPrevailing)
404 Visibility = S->getVisibility();
405 }
406 // Alias and aliasee can't be turned into available_externally.
407 else if (!isa<AliasSummary>(Val: S.get()) &&
408 !GlobalInvolvedWithAlias.count(V: S.get()))
409 S->setLinkage(GlobalValue::AvailableExternallyLinkage);
410
411 // For ELF, set visibility to the computed visibility from summaries. We
412 // don't track visibility from declarations so this may be more relaxed than
413 // the most constraining one.
414 if (C.VisibilityScheme == Config::ELF)
415 S->setVisibility(Visibility);
416
417 if (S->linkage() != OriginalLinkage)
418 recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
419 }
420
421 if (C.VisibilityScheme == Config::FromPrevailing) {
422 for (auto &S : VI.getSummaryList()) {
423 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
424 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
425 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
426 continue;
427 S->setVisibility(Visibility);
428 }
429 }
430}
431
432/// Resolve linkage for prevailing symbols in the \p Index.
433//
434// We'd like to drop these functions if they are no longer referenced in the
435// current module. However there is a chance that another module is still
436// referencing them because of the import. We make sure we always emit at least
437// one copy.
438void llvm::thinLTOResolvePrevailingInIndex(
439 const Config &C, ModuleSummaryIndex &Index,
440 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
441 isPrevailing,
442 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
443 recordNewLinkage,
444 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
445 // We won't optimize the globals that are referenced by an alias for now
446 // Ideally we should turn the alias into a global and duplicate the definition
447 // when needed.
448 DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
449 for (auto &I : Index)
450 for (auto &S : I.second.SummaryList)
451 if (auto AS = dyn_cast<AliasSummary>(Val: S.get()))
452 GlobalInvolvedWithAlias.insert(V: &AS->getAliasee());
453
454 for (auto &I : Index)
455 thinLTOResolvePrevailingGUID(C, VI: Index.getValueInfo(R: I),
456 GlobalInvolvedWithAlias, isPrevailing,
457 recordNewLinkage, GUIDPreservedSymbols);
458}
459
460static void thinLTOInternalizeAndPromoteGUID(
461 ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
462 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
463 isPrevailing) {
464 auto ExternallyVisibleCopies =
465 llvm::count_if(Range: VI.getSummaryList(),
466 P: [](const std::unique_ptr<GlobalValueSummary> &Summary) {
467 return !GlobalValue::isLocalLinkage(Linkage: Summary->linkage());
468 });
469
470 for (auto &S : VI.getSummaryList()) {
471 // First see if we need to promote an internal value because it is not
472 // exported.
473 if (isExported(S->modulePath(), VI)) {
474 if (GlobalValue::isLocalLinkage(Linkage: S->linkage()))
475 S->setLinkage(GlobalValue::ExternalLinkage);
476 continue;
477 }
478
479 // Otherwise, see if we can internalize.
480 if (!EnableLTOInternalization)
481 continue;
482
483 // Non-exported values with external linkage can be internalized.
484 if (GlobalValue::isExternalLinkage(Linkage: S->linkage())) {
485 S->setLinkage(GlobalValue::InternalLinkage);
486 continue;
487 }
488
489 // Non-exported function and variable definitions with a weak-for-linker
490 // linkage can be internalized in certain cases. The minimum legality
491 // requirements would be that they are not address taken to ensure that we
492 // don't break pointer equality checks, and that variables are either read-
493 // or write-only. For functions, this is the case if either all copies are
494 // [local_]unnamed_addr, or we can propagate reference edge attributes
495 // (which is how this is guaranteed for variables, when analyzing whether
496 // they are read or write-only).
497 //
498 // However, we only get to this code for weak-for-linkage values in one of
499 // two cases:
500 // 1) The prevailing copy is not in IR (it is in native code).
501 // 2) The prevailing copy in IR is not exported from its module.
502 // Additionally, at least for the new LTO API, case 2 will only happen if
503 // there is exactly one definition of the value (i.e. in exactly one
504 // module), as duplicate defs are result in the value being marked exported.
505 // Likely, users of the legacy LTO API are similar, however, currently there
506 // are llvm-lto based tests of the legacy LTO API that do not mark
507 // duplicate linkonce_odr copies as exported via the tool, so we need
508 // to handle that case below by checking the number of copies.
509 //
510 // Generally, we only want to internalize a weak-for-linker value in case
511 // 2, because in case 1 we cannot see how the value is used to know if it
512 // is read or write-only. We also don't want to bloat the binary with
513 // multiple internalized copies of non-prevailing linkonce/weak functions.
514 // Note if we don't internalize, we will convert non-prevailing copies to
515 // available_externally anyway, so that we drop them after inlining. The
516 // only reason to internalize such a function is if we indeed have a single
517 // copy, because internalizing it won't increase binary size, and enables
518 // use of inliner heuristics that are more aggressive in the face of a
519 // single call to a static (local). For variables, internalizing a read or
520 // write only variable can enable more aggressive optimization. However, we
521 // already perform this elsewhere in the ThinLTO backend handling for
522 // read or write-only variables (processGlobalForThinLTO).
523 //
524 // Therefore, only internalize linkonce/weak if there is a single copy, that
525 // is prevailing in this IR module. We can do so aggressively, without
526 // requiring the address to be insignificant, or that a variable be read or
527 // write-only.
528 if (!GlobalValue::isWeakForLinker(Linkage: S->linkage()) ||
529 GlobalValue::isExternalWeakLinkage(Linkage: S->linkage()))
530 continue;
531
532 if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1)
533 S->setLinkage(GlobalValue::InternalLinkage);
534 }
535}
536
537// Update the linkages in the given \p Index to mark exported values
538// as external and non-exported values as internal.
539void llvm::thinLTOInternalizeAndPromoteInIndex(
540 ModuleSummaryIndex &Index,
541 function_ref<bool(StringRef, ValueInfo)> isExported,
542 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
543 isPrevailing) {
544 for (auto &I : Index)
545 thinLTOInternalizeAndPromoteGUID(VI: Index.getValueInfo(R: I), isExported,
546 isPrevailing);
547}
548
549// Requires a destructor for std::vector<InputModule>.
550InputFile::~InputFile() = default;
551
552Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
553 std::unique_ptr<InputFile> File(new InputFile);
554
555 Expected<IRSymtabFile> FOrErr = readIRSymtab(MBRef: Object);
556 if (!FOrErr)
557 return FOrErr.takeError();
558
559 File->TargetTriple = FOrErr->TheReader.getTargetTriple();
560 File->SourceFileName = FOrErr->TheReader.getSourceFileName();
561 File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
562 File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
563 File->ComdatTable = FOrErr->TheReader.getComdatTable();
564
565 for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
566 size_t Begin = File->Symbols.size();
567 for (const irsymtab::Reader::SymbolRef &Sym :
568 FOrErr->TheReader.module_symbols(I))
569 // Skip symbols that are irrelevant to LTO. Note that this condition needs
570 // to match the one in Skip() in LTO::addRegularLTO().
571 if (Sym.isGlobal() && !Sym.isFormatSpecific())
572 File->Symbols.push_back(x: Sym);
573 File->ModuleSymIndices.push_back(x: {Begin, File->Symbols.size()});
574 }
575
576 File->Mods = FOrErr->Mods;
577 File->Strtab = std::move(FOrErr->Strtab);
578 return std::move(File);
579}
580
581StringRef InputFile::getName() const {
582 return Mods[0].getModuleIdentifier();
583}
584
585BitcodeModule &InputFile::getSingleBitcodeModule() {
586 assert(Mods.size() == 1 && "Expect only one bitcode module");
587 return Mods[0];
588}
589
590LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
591 const Config &Conf)
592 : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
593 Ctx(Conf), CombinedModule(std::make_unique<Module>(args: "ld-temp.o", args&: Ctx)),
594 Mover(std::make_unique<IRMover>(args&: *CombinedModule)) {
595 CombinedModule->IsNewDbgInfoFormat = UseNewDbgInfoFormat;
596}
597
598LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
599 : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) {
600 if (!Backend)
601 this->Backend =
602 createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency());
603}
604
605LTO::LTO(Config Conf, ThinBackend Backend,
606 unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
607 : Conf(std::move(Conf)),
608 RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
609 ThinLTO(std::move(Backend)),
610 GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()),
611 LTOMode(LTOMode) {}
612
613// Requires a destructor for MapVector<BitcodeModule>.
614LTO::~LTO() = default;
615
616// Add the symbols in the given module to the GlobalResolutions map, and resolve
617// their partitions.
618void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
619 ArrayRef<SymbolResolution> Res,
620 unsigned Partition, bool InSummary) {
621 auto *ResI = Res.begin();
622 auto *ResE = Res.end();
623 (void)ResE;
624 const Triple TT(RegularLTO.CombinedModule->getTargetTriple());
625 for (const InputFile::Symbol &Sym : Syms) {
626 assert(ResI != ResE);
627 SymbolResolution Res = *ResI++;
628
629 auto &GlobalRes = (*GlobalResolutions)[Sym.getName()];
630 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
631 if (Res.Prevailing) {
632 assert(!GlobalRes.Prevailing &&
633 "Multiple prevailing defs are not allowed");
634 GlobalRes.Prevailing = true;
635 GlobalRes.IRName = std::string(Sym.getIRName());
636 } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) {
637 // Sometimes it can be two copies of symbol in a module and prevailing
638 // symbol can have no IR name. That might happen if symbol is defined in
639 // module level inline asm block. In case we have multiple modules with
640 // the same symbol we want to use IR name of the prevailing symbol.
641 // Otherwise, if we haven't seen a prevailing symbol, set the name so that
642 // we can later use it to check if there is any prevailing copy in IR.
643 GlobalRes.IRName = std::string(Sym.getIRName());
644 }
645
646 // In rare occasion, the symbol used to initialize GlobalRes has a different
647 // IRName from the inspected Symbol. This can happen on macOS + iOS, when a
648 // symbol is referenced through its mangled name, say @"\01_symbol" while
649 // the IRName is @symbol (the prefix underscore comes from MachO mangling).
650 // In that case, we have the same actual Symbol that can get two different
651 // GUID, leading to some invalid internalization. Workaround this by marking
652 // the GlobalRes external.
653
654 // FIXME: instead of this check, it would be desirable to compute GUIDs
655 // based on mangled name, but this requires an access to the Target Triple
656 // and would be relatively invasive on the codebase.
657 if (GlobalRes.IRName != Sym.getIRName()) {
658 GlobalRes.Partition = GlobalResolution::External;
659 GlobalRes.VisibleOutsideSummary = true;
660 }
661
662 // Set the partition to external if we know it is re-defined by the linker
663 // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
664 // regular object, is referenced from llvm.compiler.used/llvm.used, or was
665 // already recorded as being referenced from a different partition.
666 if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
667 (GlobalRes.Partition != GlobalResolution::Unknown &&
668 GlobalRes.Partition != Partition)) {
669 GlobalRes.Partition = GlobalResolution::External;
670 } else
671 // First recorded reference, save the current partition.
672 GlobalRes.Partition = Partition;
673
674 // Flag as visible outside of summary if visible from a regular object or
675 // from a module that does not have a summary.
676 GlobalRes.VisibleOutsideSummary |=
677 (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary);
678
679 GlobalRes.ExportDynamic |= Res.ExportDynamic;
680 }
681}
682
683static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
684 ArrayRef<SymbolResolution> Res) {
685 StringRef Path = Input->getName();
686 OS << Path << '\n';
687 auto ResI = Res.begin();
688 for (const InputFile::Symbol &Sym : Input->symbols()) {
689 assert(ResI != Res.end());
690 SymbolResolution Res = *ResI++;
691
692 OS << "-r=" << Path << ',' << Sym.getName() << ',';
693 if (Res.Prevailing)
694 OS << 'p';
695 if (Res.FinalDefinitionInLinkageUnit)
696 OS << 'l';
697 if (Res.VisibleToRegularObj)
698 OS << 'x';
699 if (Res.LinkerRedefined)
700 OS << 'r';
701 OS << '\n';
702 }
703 OS.flush();
704 assert(ResI == Res.end());
705}
706
707Error LTO::add(std::unique_ptr<InputFile> Input,
708 ArrayRef<SymbolResolution> Res) {
709 assert(!CalledGetMaxTasks);
710
711 if (Conf.ResolutionFile)
712 writeToResolutionFile(OS&: *Conf.ResolutionFile, Input: Input.get(), Res);
713
714 if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
715 RegularLTO.CombinedModule->setTargetTriple(Input->getTargetTriple());
716 if (Triple(Input->getTargetTriple()).isOSBinFormatELF())
717 Conf.VisibilityScheme = Config::ELF;
718 }
719
720 const SymbolResolution *ResI = Res.begin();
721 for (unsigned I = 0; I != Input->Mods.size(); ++I)
722 if (Error Err = addModule(Input&: *Input, ModI: I, ResI, ResE: Res.end()))
723 return Err;
724
725 assert(ResI == Res.end());
726 return Error::success();
727}
728
729Error LTO::addModule(InputFile &Input, unsigned ModI,
730 const SymbolResolution *&ResI,
731 const SymbolResolution *ResE) {
732 Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
733 if (!LTOInfo)
734 return LTOInfo.takeError();
735
736 if (EnableSplitLTOUnit) {
737 // If only some modules were split, flag this in the index so that
738 // we can skip or error on optimizations that need consistently split
739 // modules (whole program devirt and lower type tests).
740 if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
741 ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
742 } else
743 EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
744
745 BitcodeModule BM = Input.Mods[ModI];
746
747 if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
748 !LTOInfo->UnifiedLTO)
749 return make_error<StringError>(
750 Args: "unified LTO compilation must use "
751 "compatible bitcode modules (use -funified-lto)",
752 Args: inconvertibleErrorCode());
753
754 if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
755 LTOMode = LTOK_UnifiedThin;
756
757 bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
758
759 auto ModSyms = Input.module_symbols(I: ModI);
760 addModuleToGlobalRes(Syms: ModSyms, Res: {ResI, ResE},
761 Partition: IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
762 InSummary: LTOInfo->HasSummary);
763
764 if (IsThinLTO)
765 return addThinLTO(BM, Syms: ModSyms, ResI, ResE);
766
767 RegularLTO.EmptyCombinedModule = false;
768 Expected<RegularLTOState::AddedModule> ModOrErr =
769 addRegularLTO(BM, Syms: ModSyms, ResI, ResE);
770 if (!ModOrErr)
771 return ModOrErr.takeError();
772
773 if (!LTOInfo->HasSummary)
774 return linkRegularLTO(Mod: std::move(*ModOrErr), /*LivenessFromIndex=*/false);
775
776 // Regular LTO module summaries are added to a dummy module that represents
777 // the combined regular LTO module.
778 if (Error Err = BM.readSummary(CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: ""))
779 return Err;
780 RegularLTO.ModsWithSummaries.push_back(x: std::move(*ModOrErr));
781 return Error::success();
782}
783
784// Checks whether the given global value is in a non-prevailing comdat
785// (comdat containing values the linker indicated were not prevailing,
786// which we then dropped to available_externally), and if so, removes
787// it from the comdat. This is called for all global values to ensure the
788// comdat is empty rather than leaving an incomplete comdat. It is needed for
789// regular LTO modules, in case we are in a mixed-LTO mode (both regular
790// and thin LTO modules) compilation. Since the regular LTO module will be
791// linked first in the final native link, we want to make sure the linker
792// doesn't select any of these incomplete comdats that would be left
793// in the regular LTO module without this cleanup.
794static void
795handleNonPrevailingComdat(GlobalValue &GV,
796 std::set<const Comdat *> &NonPrevailingComdats) {
797 Comdat *C = GV.getComdat();
798 if (!C)
799 return;
800
801 if (!NonPrevailingComdats.count(x: C))
802 return;
803
804 // Additionally need to drop all global values from the comdat to
805 // available_externally, to satisfy the COMDAT requirement that all members
806 // are discarded as a unit. The non-local linkage global values avoid
807 // duplicate definition linker errors.
808 GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
809
810 if (auto GO = dyn_cast<GlobalObject>(Val: &GV))
811 GO->setComdat(nullptr);
812}
813
814// Add a regular LTO object to the link.
815// The resulting module needs to be linked into the combined LTO module with
816// linkRegularLTO.
817Expected<LTO::RegularLTOState::AddedModule>
818LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
819 const SymbolResolution *&ResI,
820 const SymbolResolution *ResE) {
821 RegularLTOState::AddedModule Mod;
822 Expected<std::unique_ptr<Module>> MOrErr =
823 BM.getLazyModule(Context&: RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
824 /*IsImporting*/ false);
825 if (!MOrErr)
826 return MOrErr.takeError();
827 Module &M = **MOrErr;
828 Mod.M = std::move(*MOrErr);
829
830 if (Error Err = M.materializeMetadata())
831 return std::move(Err);
832
833 // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests
834 // will rename local functions in the merged module as "<function name>.1".
835 // This causes linking errors, since other parts of the module expect the
836 // original function name.
837 if (LTOMode == LTOK_UnifiedRegular)
838 if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions"))
839 M.eraseNamedMetadata(NMD: CfiFunctionsMD);
840
841 UpgradeDebugInfo(M);
842
843 ModuleSymbolTable SymTab;
844 SymTab.addModule(M: &M);
845
846 for (GlobalVariable &GV : M.globals())
847 if (GV.hasAppendingLinkage())
848 Mod.Keep.push_back(x: &GV);
849
850 DenseSet<GlobalObject *> AliasedGlobals;
851 for (auto &GA : M.aliases())
852 if (GlobalObject *GO = GA.getAliaseeObject())
853 AliasedGlobals.insert(V: GO);
854
855 // In this function we need IR GlobalValues matching the symbols in Syms
856 // (which is not backed by a module), so we need to enumerate them in the same
857 // order. The symbol enumeration order of a ModuleSymbolTable intentionally
858 // matches the order of an irsymtab, but when we read the irsymtab in
859 // InputFile::create we omit some symbols that are irrelevant to LTO. The
860 // Skip() function skips the same symbols from the module as InputFile does
861 // from the symbol table.
862 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
863 auto Skip = [&]() {
864 while (MsymI != MsymE) {
865 auto Flags = SymTab.getSymbolFlags(S: *MsymI);
866 if ((Flags & object::BasicSymbolRef::SF_Global) &&
867 !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
868 return;
869 ++MsymI;
870 }
871 };
872 Skip();
873
874 std::set<const Comdat *> NonPrevailingComdats;
875 SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
876 for (const InputFile::Symbol &Sym : Syms) {
877 assert(ResI != ResE);
878 SymbolResolution Res = *ResI++;
879
880 assert(MsymI != MsymE);
881 ModuleSymbolTable::Symbol Msym = *MsymI++;
882 Skip();
883
884 if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym)) {
885 if (Res.Prevailing) {
886 if (Sym.isUndefined())
887 continue;
888 Mod.Keep.push_back(x: GV);
889 // For symbols re-defined with linker -wrap and -defsym options,
890 // set the linkage to weak to inhibit IPO. The linkage will be
891 // restored by the linker.
892 if (Res.LinkerRedefined)
893 GV->setLinkage(GlobalValue::WeakAnyLinkage);
894
895 GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
896 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage))
897 GV->setLinkage(GlobalValue::getWeakLinkage(
898 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
899 } else if (isa<GlobalObject>(Val: GV) &&
900 (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
901 GV->hasAvailableExternallyLinkage()) &&
902 !AliasedGlobals.count(V: cast<GlobalObject>(Val: GV))) {
903 // Any of the above three types of linkage indicates that the
904 // chosen prevailing symbol will have the same semantics as this copy of
905 // the symbol, so we may be able to link it with available_externally
906 // linkage. We will decide later whether to do that when we link this
907 // module (in linkRegularLTO), based on whether it is undefined.
908 Mod.Keep.push_back(x: GV);
909 GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
910 if (GV->hasComdat())
911 NonPrevailingComdats.insert(x: GV->getComdat());
912 cast<GlobalObject>(Val: GV)->setComdat(nullptr);
913 }
914
915 // Set the 'local' flag based on the linker resolution for this symbol.
916 if (Res.FinalDefinitionInLinkageUnit) {
917 GV->setDSOLocal(true);
918 if (GV->hasDLLImportStorageClass())
919 GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
920 DefaultStorageClass);
921 }
922 } else if (auto *AS =
923 dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Val&: Msym)) {
924 // Collect non-prevailing symbols.
925 if (!Res.Prevailing)
926 NonPrevailingAsmSymbols.insert(V: AS->first);
927 } else {
928 llvm_unreachable("unknown symbol type");
929 }
930
931 // Common resolution: collect the maximum size/alignment over all commons.
932 // We also record if we see an instance of a common as prevailing, so that
933 // if none is prevailing we can ignore it later.
934 if (Sym.isCommon()) {
935 // FIXME: We should figure out what to do about commons defined by asm.
936 // For now they aren't reported correctly by ModuleSymbolTable.
937 auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
938 CommonRes.Size = std::max(a: CommonRes.Size, b: Sym.getCommonSize());
939 if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
940 CommonRes.Alignment =
941 std::max(a: Align(SymAlignValue), b: CommonRes.Alignment);
942 }
943 CommonRes.Prevailing |= Res.Prevailing;
944 }
945 }
946
947 if (!M.getComdatSymbolTable().empty())
948 for (GlobalValue &GV : M.global_values())
949 handleNonPrevailingComdat(GV, NonPrevailingComdats);
950
951 // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
952 // block.
953 if (!M.getModuleInlineAsm().empty()) {
954 std::string NewIA = ".lto_discard";
955 if (!NonPrevailingAsmSymbols.empty()) {
956 // Don't dicard a symbol if there is a live .symver for it.
957 ModuleSymbolTable::CollectAsmSymvers(
958 M, AsmSymver: [&](StringRef Name, StringRef Alias) {
959 if (!NonPrevailingAsmSymbols.count(V: Alias))
960 NonPrevailingAsmSymbols.erase(V: Name);
961 });
962 NewIA += " " + llvm::join(R&: NonPrevailingAsmSymbols, Separator: ", ");
963 }
964 NewIA += "\n";
965 M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
966 }
967
968 assert(MsymI == MsymE);
969 return std::move(Mod);
970}
971
972Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
973 bool LivenessFromIndex) {
974 std::vector<GlobalValue *> Keep;
975 for (GlobalValue *GV : Mod.Keep) {
976 if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GUID: GV->getGUID())) {
977 if (Function *F = dyn_cast<Function>(Val: GV)) {
978 if (DiagnosticOutputFile) {
979 if (Error Err = F->materialize())
980 return Err;
981 OptimizationRemarkEmitter ORE(F, nullptr);
982 ORE.emit(OptDiag&: OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
983 << ore::NV("Function", F)
984 << " not added to the combined module ");
985 }
986 }
987 continue;
988 }
989
990 if (!GV->hasAvailableExternallyLinkage()) {
991 Keep.push_back(x: GV);
992 continue;
993 }
994
995 // Only link available_externally definitions if we don't already have a
996 // definition.
997 GlobalValue *CombinedGV =
998 RegularLTO.CombinedModule->getNamedValue(Name: GV->getName());
999 if (CombinedGV && !CombinedGV->isDeclaration())
1000 continue;
1001
1002 Keep.push_back(x: GV);
1003 }
1004
1005 return RegularLTO.Mover->move(Src: std::move(Mod.M), ValuesToLink: Keep, AddLazyFor: nullptr,
1006 /* IsPerformingImport */ false);
1007}
1008
1009// Add a ThinLTO module to the link.
1010Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
1011 const SymbolResolution *&ResI,
1012 const SymbolResolution *ResE) {
1013 const SymbolResolution *ResITmp = ResI;
1014 for (const InputFile::Symbol &Sym : Syms) {
1015 assert(ResITmp != ResE);
1016 SymbolResolution Res = *ResITmp++;
1017
1018 if (!Sym.getIRName().empty()) {
1019 auto GUID = GlobalValue::getGUID(GlobalName: GlobalValue::getGlobalIdentifier(
1020 Name: Sym.getIRName(), Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1021 if (Res.Prevailing)
1022 ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
1023 }
1024 }
1025
1026 if (Error Err =
1027 BM.readSummary(CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: BM.getModuleIdentifier(),
1028 IsPrevailing: [&](GlobalValue::GUID GUID) {
1029 return ThinLTO.PrevailingModuleForGUID[GUID] ==
1030 BM.getModuleIdentifier();
1031 }))
1032 return Err;
1033 LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
1034
1035 for (const InputFile::Symbol &Sym : Syms) {
1036 assert(ResI != ResE);
1037 SymbolResolution Res = *ResI++;
1038
1039 if (!Sym.getIRName().empty()) {
1040 auto GUID = GlobalValue::getGUID(GlobalName: GlobalValue::getGlobalIdentifier(
1041 Name: Sym.getIRName(), Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1042 if (Res.Prevailing) {
1043 assert(ThinLTO.PrevailingModuleForGUID[GUID] ==
1044 BM.getModuleIdentifier());
1045
1046 // For linker redefined symbols (via --wrap or --defsym) we want to
1047 // switch the linkage to `weak` to prevent IPOs from happening.
1048 // Find the summary in the module for this very GV and record the new
1049 // linkage so that we can switch it when we import the GV.
1050 if (Res.LinkerRedefined)
1051 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
1052 ValueGUID: GUID, ModuleId: BM.getModuleIdentifier()))
1053 S->setLinkage(GlobalValue::WeakAnyLinkage);
1054 }
1055
1056 // If the linker resolved the symbol to a local definition then mark it
1057 // as local in the summary for the module we are adding.
1058 if (Res.FinalDefinitionInLinkageUnit) {
1059 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
1060 ValueGUID: GUID, ModuleId: BM.getModuleIdentifier())) {
1061 S->setDSOLocal(true);
1062 }
1063 }
1064 }
1065 }
1066
1067 if (!ThinLTO.ModuleMap.insert(KV: {BM.getModuleIdentifier(), BM}).second)
1068 return make_error<StringError>(
1069 Args: "Expected at most one ThinLTO module per bitcode file",
1070 Args: inconvertibleErrorCode());
1071
1072 if (!Conf.ThinLTOModulesToCompile.empty()) {
1073 if (!ThinLTO.ModulesToCompile)
1074 ThinLTO.ModulesToCompile = ModuleMapType();
1075 // This is a fuzzy name matching where only modules with name containing the
1076 // specified switch values are going to be compiled.
1077 for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
1078 if (BM.getModuleIdentifier().contains(Other: Name)) {
1079 ThinLTO.ModulesToCompile->insert(KV: {BM.getModuleIdentifier(), BM});
1080 llvm::errs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
1081 << " to compile\n";
1082 }
1083 }
1084 }
1085
1086 return Error::success();
1087}
1088
1089unsigned LTO::getMaxTasks() const {
1090 CalledGetMaxTasks = true;
1091 auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size()
1092 : ThinLTO.ModuleMap.size();
1093 return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount;
1094}
1095
1096// If only some of the modules were split, we cannot correctly handle
1097// code that contains type tests or type checked loads.
1098Error LTO::checkPartiallySplit() {
1099 if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
1100 return Error::success();
1101
1102 Function *TypeTestFunc = RegularLTO.CombinedModule->getFunction(
1103 Name: Intrinsic::getName(id: Intrinsic::type_test));
1104 Function *TypeCheckedLoadFunc = RegularLTO.CombinedModule->getFunction(
1105 Name: Intrinsic::getName(id: Intrinsic::type_checked_load));
1106 Function *TypeCheckedLoadRelativeFunc =
1107 RegularLTO.CombinedModule->getFunction(
1108 Name: Intrinsic::getName(id: Intrinsic::type_checked_load_relative));
1109
1110 // First check if there are type tests / type checked loads in the
1111 // merged regular LTO module IR.
1112 if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
1113 (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
1114 (TypeCheckedLoadRelativeFunc &&
1115 !TypeCheckedLoadRelativeFunc->use_empty()))
1116 return make_error<StringError>(
1117 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1118 Args: inconvertibleErrorCode());
1119
1120 // Otherwise check if there are any recorded in the combined summary from the
1121 // ThinLTO modules.
1122 for (auto &P : ThinLTO.CombinedIndex) {
1123 for (auto &S : P.second.SummaryList) {
1124 auto *FS = dyn_cast<FunctionSummary>(Val: S.get());
1125 if (!FS)
1126 continue;
1127 if (!FS->type_test_assume_vcalls().empty() ||
1128 !FS->type_checked_load_vcalls().empty() ||
1129 !FS->type_test_assume_const_vcalls().empty() ||
1130 !FS->type_checked_load_const_vcalls().empty() ||
1131 !FS->type_tests().empty())
1132 return make_error<StringError>(
1133 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1134 Args: inconvertibleErrorCode());
1135 }
1136 }
1137 return Error::success();
1138}
1139
1140Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
1141 // Compute "dead" symbols, we don't want to import/export these!
1142 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1143 DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
1144 for (auto &Res : *GlobalResolutions) {
1145 // Normally resolution have IR name of symbol. We can do nothing here
1146 // otherwise. See comments in GlobalResolution struct for more details.
1147 if (Res.second.IRName.empty())
1148 continue;
1149
1150 GlobalValue::GUID GUID = GlobalValue::getGUID(
1151 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
1152
1153 if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
1154 GUIDPreservedSymbols.insert(V: GUID);
1155
1156 if (Res.second.ExportDynamic)
1157 DynamicExportSymbols.insert(V: GUID);
1158
1159 GUIDPrevailingResolutions[GUID] =
1160 Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
1161 }
1162
1163 auto isPrevailing = [&](GlobalValue::GUID G) {
1164 auto It = GUIDPrevailingResolutions.find(Val: G);
1165 if (It == GUIDPrevailingResolutions.end())
1166 return PrevailingType::Unknown;
1167 return It->second;
1168 };
1169 computeDeadSymbolsWithConstProp(Index&: ThinLTO.CombinedIndex, GUIDPreservedSymbols,
1170 isPrevailing, ImportEnabled: Conf.OptLevel > 0);
1171
1172 // Setup output file to emit statistics.
1173 auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
1174 if (!StatsFileOrErr)
1175 return StatsFileOrErr.takeError();
1176 std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
1177
1178 // TODO: Ideally this would be controlled automatically by detecting that we
1179 // are linking with an allocator that supports these interfaces, rather than
1180 // an internal option (which would still be needed for tests, however). For
1181 // example, if the library exported a symbol like __malloc_hot_cold the linker
1182 // could recognize that and set a flag in the lto::Config.
1183 if (SupportsHotColdNew)
1184 ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
1185
1186 Error Result = runRegularLTO(AddStream);
1187 if (!Result)
1188 // This will reset the GlobalResolutions optional once done with it to
1189 // reduce peak memory before importing.
1190 Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
1191
1192 if (StatsFile)
1193 PrintStatisticsJSON(OS&: StatsFile->os());
1194
1195 return Result;
1196}
1197
1198void lto::updateMemProfAttributes(Module &Mod,
1199 const ModuleSummaryIndex &Index) {
1200 if (Index.withSupportsHotColdNew())
1201 return;
1202
1203 // The profile matcher applies hotness attributes directly for allocations,
1204 // and those will cause us to generate calls to the hot/cold interfaces
1205 // unconditionally. If supports-hot-cold-new was not enabled in the LTO
1206 // link then assume we don't want these calls (e.g. not linking with
1207 // the appropriate library, or otherwise trying to disable this behavior).
1208 for (auto &F : Mod) {
1209 for (auto &BB : F) {
1210 for (auto &I : BB) {
1211 auto *CI = dyn_cast<CallBase>(Val: &I);
1212 if (!CI)
1213 continue;
1214 if (CI->hasFnAttr(Kind: "memprof"))
1215 CI->removeFnAttr(Kind: "memprof");
1216 // Strip off all memprof metadata as it is no longer needed.
1217 // Importantly, this avoids the addition of new memprof attributes
1218 // after inlining propagation.
1219 // TODO: If we support additional types of MemProf metadata beyond hot
1220 // and cold, we will need to update the metadata based on the allocator
1221 // APIs supported instead of completely stripping all.
1222 CI->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
1223 CI->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
1224 }
1225 }
1226 }
1227}
1228
1229Error LTO::runRegularLTO(AddStreamFn AddStream) {
1230 // Setup optimization remarks.
1231 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
1232 Context&: RegularLTO.CombinedModule->getContext(), RemarksFilename: Conf.RemarksFilename,
1233 RemarksPasses: Conf.RemarksPasses, RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness,
1234 RemarksHotnessThreshold: Conf.RemarksHotnessThreshold);
1235 LLVM_DEBUG(dbgs() << "Running regular LTO\n");
1236 if (!DiagFileOrErr)
1237 return DiagFileOrErr.takeError();
1238 DiagnosticOutputFile = std::move(*DiagFileOrErr);
1239
1240 // Finalize linking of regular LTO modules containing summaries now that
1241 // we have computed liveness information.
1242 for (auto &M : RegularLTO.ModsWithSummaries)
1243 if (Error Err = linkRegularLTO(Mod: std::move(M),
1244 /*LivenessFromIndex=*/true))
1245 return Err;
1246
1247 // Ensure we don't have inconsistently split LTO units with type tests.
1248 // FIXME: this checks both LTO and ThinLTO. It happens to work as we take
1249 // this path both cases but eventually this should be split into two and
1250 // do the ThinLTO checks in `runThinLTO`.
1251 if (Error Err = checkPartiallySplit())
1252 return Err;
1253
1254 // Make sure commons have the right size/alignment: we kept the largest from
1255 // all the prevailing when adding the inputs, and we apply it here.
1256 const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
1257 for (auto &I : RegularLTO.Commons) {
1258 if (!I.second.Prevailing)
1259 // Don't do anything if no instance of this common was prevailing.
1260 continue;
1261 GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(Name: I.first);
1262 if (OldGV && DL.getTypeAllocSize(Ty: OldGV->getValueType()) == I.second.Size) {
1263 // Don't create a new global if the type is already correct, just make
1264 // sure the alignment is correct.
1265 OldGV->setAlignment(I.second.Alignment);
1266 continue;
1267 }
1268 ArrayType *Ty =
1269 ArrayType::get(ElementType: Type::getInt8Ty(C&: RegularLTO.Ctx), NumElements: I.second.Size);
1270 auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
1271 GlobalValue::CommonLinkage,
1272 ConstantAggregateZero::get(Ty), "");
1273 GV->setAlignment(I.second.Alignment);
1274 if (OldGV) {
1275 OldGV->replaceAllUsesWith(V: GV);
1276 GV->takeName(V: OldGV);
1277 OldGV->eraseFromParent();
1278 } else {
1279 GV->setName(I.first);
1280 }
1281 }
1282
1283 updateMemProfAttributes(Mod&: *RegularLTO.CombinedModule, Index: ThinLTO.CombinedIndex);
1284
1285 bool WholeProgramVisibilityEnabledInLTO =
1286 Conf.HasWholeProgramVisibility &&
1287 // If validation is enabled, upgrade visibility only when all vtables
1288 // have typeinfos.
1289 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1290
1291 // This returns true when the name is local or not defined. Locals are
1292 // expected to be handled separately.
1293 auto IsVisibleToRegularObj = [&](StringRef name) {
1294 auto It = GlobalResolutions->find(Key: name);
1295 return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary);
1296 };
1297
1298 // If allowed, upgrade public vcall visibility metadata to linkage unit
1299 // visibility before whole program devirtualization in the optimizer.
1300 updateVCallVisibilityInModule(
1301 M&: *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
1302 DynamicExportSymbols, ValidateAllVtablesHaveTypeInfos: Conf.ValidateAllVtablesHaveTypeInfos,
1303 IsVisibleToRegularObj);
1304 updatePublicTypeTestCalls(M&: *RegularLTO.CombinedModule,
1305 WholeProgramVisibilityEnabledInLTO);
1306
1307 if (Conf.PreOptModuleHook &&
1308 !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
1309 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1310
1311 if (!Conf.CodeGenOnly) {
1312 for (const auto &R : *GlobalResolutions) {
1313 GlobalValue *GV =
1314 RegularLTO.CombinedModule->getNamedValue(Name: R.second.IRName);
1315 if (!R.second.isPrevailingIRSymbol())
1316 continue;
1317 if (R.second.Partition != 0 &&
1318 R.second.Partition != GlobalResolution::External)
1319 continue;
1320
1321 // Ignore symbols defined in other partitions.
1322 // Also skip declarations, which are not allowed to have internal linkage.
1323 if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
1324 continue;
1325
1326 // Symbols that are marked DLLImport or DLLExport should not be
1327 // internalized, as they are either externally visible or referencing
1328 // external symbols. Symbols that have AvailableExternally or Appending
1329 // linkage might be used by future passes and should be kept as is.
1330 // These linkages are seen in Unified regular LTO, because the process
1331 // of creating split LTO units introduces symbols with that linkage into
1332 // one of the created modules. Normally, only the ThinLTO backend would
1333 // compile this module, but Unified Regular LTO processes both
1334 // modules created by the splitting process as regular LTO modules.
1335 if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
1336 ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
1337 GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
1338 continue;
1339
1340 GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
1341 : GlobalValue::UnnamedAddr::None);
1342 if (EnableLTOInternalization && R.second.Partition == 0)
1343 GV->setLinkage(GlobalValue::InternalLinkage);
1344 }
1345
1346 if (Conf.PostInternalizeModuleHook &&
1347 !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
1348 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1349 }
1350
1351 if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
1352 if (Error Err =
1353 backend(C: Conf, AddStream, ParallelCodeGenParallelismLevel: RegularLTO.ParallelCodeGenParallelismLevel,
1354 M&: *RegularLTO.CombinedModule, CombinedIndex&: ThinLTO.CombinedIndex))
1355 return Err;
1356 }
1357
1358 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1359}
1360
1361SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
1362 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
1363 SmallVector<const char *> LibcallSymbols;
1364 copy_if(Range: Libcalls.getLibcallNames(), Out: std::back_inserter(x&: LibcallSymbols),
1365 P: [](const char *Name) { return Name; });
1366 return LibcallSymbols;
1367}
1368
1369/// This class defines the interface to the ThinLTO backend.
1370class lto::ThinBackendProc {
1371protected:
1372 const Config &Conf;
1373 ModuleSummaryIndex &CombinedIndex;
1374 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
1375 lto::IndexWriteCallback OnWrite;
1376 bool ShouldEmitImportsFiles;
1377
1378public:
1379 ThinBackendProc(
1380 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1381 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1382 lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
1383 : Conf(Conf), CombinedIndex(CombinedIndex),
1384 ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
1385 OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {}
1386
1387 virtual ~ThinBackendProc() = default;
1388 virtual Error start(
1389 unsigned Task, BitcodeModule BM,
1390 const FunctionImporter::ImportMapTy &ImportList,
1391 const FunctionImporter::ExportSetTy &ExportList,
1392 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1393 MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
1394 virtual Error wait() = 0;
1395 virtual unsigned getThreadCount() = 0;
1396
1397 // Write sharded indices and (optionally) imports to disk
1398 Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
1399 llvm::StringRef ModulePath,
1400 const std::string &NewModulePath) {
1401 std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
1402 GVSummaryPtrSet DeclarationSummaries;
1403
1404 std::error_code EC;
1405 gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
1406 ImportList, ModuleToSummariesForIndex,
1407 DecSummaries&: DeclarationSummaries);
1408
1409 raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
1410 sys::fs::OpenFlags::OF_None);
1411 if (EC)
1412 return errorCodeToError(EC);
1413
1414 writeIndexToFile(Index: CombinedIndex, Out&: OS, ModuleToSummariesForIndex: &ModuleToSummariesForIndex,
1415 DecSummaries: &DeclarationSummaries);
1416
1417 if (ShouldEmitImportsFiles) {
1418 EC = EmitImportsFiles(ModulePath, OutputFilename: NewModulePath + ".imports",
1419 ModuleToSummariesForIndex);
1420 if (EC)
1421 return errorCodeToError(EC);
1422 }
1423 return Error::success();
1424 }
1425};
1426
1427namespace {
1428class InProcessThinBackend : public ThinBackendProc {
1429 DefaultThreadPool BackendThreadPool;
1430 AddStreamFn AddStream;
1431 FileCache Cache;
1432 std::set<GlobalValue::GUID> CfiFunctionDefs;
1433 std::set<GlobalValue::GUID> CfiFunctionDecls;
1434
1435 std::optional<Error> Err;
1436 std::mutex ErrMu;
1437
1438 bool ShouldEmitIndexFiles;
1439
1440public:
1441 InProcessThinBackend(
1442 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1443 ThreadPoolStrategy ThinLTOParallelism,
1444 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1445 AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
1446 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
1447 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1448 OnWrite, ShouldEmitImportsFiles),
1449 BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
1450 Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
1451 for (auto &Name : CombinedIndex.cfiFunctionDefs())
1452 CfiFunctionDefs.insert(
1453 x: GlobalValue::getGUID(GlobalName: GlobalValue::dropLLVMManglingEscape(Name)));
1454 for (auto &Name : CombinedIndex.cfiFunctionDecls())
1455 CfiFunctionDecls.insert(
1456 x: GlobalValue::getGUID(GlobalName: GlobalValue::dropLLVMManglingEscape(Name)));
1457 }
1458
1459 Error runThinLTOBackendThread(
1460 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1461 ModuleSummaryIndex &CombinedIndex,
1462 const FunctionImporter::ImportMapTy &ImportList,
1463 const FunctionImporter::ExportSetTy &ExportList,
1464 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1465 const GVSummaryMapTy &DefinedGlobals,
1466 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1467 auto RunThinBackend = [&](AddStreamFn AddStream) {
1468 LTOLLVMContext BackendContext(Conf);
1469 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1470 if (!MOrErr)
1471 return MOrErr.takeError();
1472
1473 return thinBackend(C: Conf, Task, AddStream, M&: **MOrErr, CombinedIndex,
1474 ImportList, DefinedGlobals, ModuleMap: &ModuleMap);
1475 };
1476
1477 auto ModuleID = BM.getModuleIdentifier();
1478
1479 if (ShouldEmitIndexFiles) {
1480 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1481 return E;
1482 }
1483
1484 if (!Cache || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1485 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1486 P: [](uint32_t V) { return V == 0; }))
1487 // Cache disabled or no entry for this module in the combined index or
1488 // no module hash.
1489 return RunThinBackend(AddStream);
1490
1491 SmallString<40> Key;
1492 // The module may be cached, this helps handling it.
1493 computeLTOCacheKey(Key, Conf, Index: CombinedIndex, ModuleID, ImportList,
1494 ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs,
1495 CfiFunctionDecls);
1496 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1497 if (Error Err = CacheAddStreamOrErr.takeError())
1498 return Err;
1499 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1500 if (CacheAddStream)
1501 return RunThinBackend(CacheAddStream);
1502
1503 return Error::success();
1504 }
1505
1506 Error start(
1507 unsigned Task, BitcodeModule BM,
1508 const FunctionImporter::ImportMapTy &ImportList,
1509 const FunctionImporter::ExportSetTy &ExportList,
1510 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1511 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1512 StringRef ModulePath = BM.getModuleIdentifier();
1513 assert(ModuleToDefinedGVSummaries.count(ModulePath));
1514 const GVSummaryMapTy &DefinedGlobals =
1515 ModuleToDefinedGVSummaries.find(Val: ModulePath)->second;
1516 BackendThreadPool.async(
1517 F: [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1518 const FunctionImporter::ImportMapTy &ImportList,
1519 const FunctionImporter::ExportSetTy &ExportList,
1520 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
1521 &ResolvedODR,
1522 const GVSummaryMapTy &DefinedGlobals,
1523 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1524 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1525 timeTraceProfilerInitialize(TimeTraceGranularity: Conf.TimeTraceGranularity,
1526 ProcName: "thin backend");
1527 Error E = runThinLTOBackendThread(
1528 AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
1529 ResolvedODR, DefinedGlobals, ModuleMap);
1530 if (E) {
1531 std::unique_lock<std::mutex> L(ErrMu);
1532 if (Err)
1533 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1534 else
1535 Err = std::move(E);
1536 }
1537 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1538 timeTraceProfilerFinishThread();
1539 },
1540 ArgList&: BM, ArgList: std::ref(t&: CombinedIndex), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
1541 ArgList: std::ref(t: ResolvedODR), ArgList: std::ref(t: DefinedGlobals), ArgList: std::ref(t&: ModuleMap));
1542
1543 if (OnWrite)
1544 OnWrite(std::string(ModulePath));
1545 return Error::success();
1546 }
1547
1548 Error wait() override {
1549 BackendThreadPool.wait();
1550 if (Err)
1551 return std::move(*Err);
1552 else
1553 return Error::success();
1554 }
1555
1556 unsigned getThreadCount() override {
1557 return BackendThreadPool.getMaxConcurrency();
1558 }
1559};
1560} // end anonymous namespace
1561
1562ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
1563 lto::IndexWriteCallback OnWrite,
1564 bool ShouldEmitIndexFiles,
1565 bool ShouldEmitImportsFiles) {
1566 return
1567 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1568 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1569 AddStreamFn AddStream, FileCache Cache) {
1570 return std::make_unique<InProcessThinBackend>(
1571 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1572 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
1573 args: ShouldEmitImportsFiles);
1574 };
1575}
1576
1577StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
1578 if (!TheTriple.isOSDarwin())
1579 return "";
1580 if (TheTriple.getArch() == Triple::x86_64)
1581 return "core2";
1582 if (TheTriple.getArch() == Triple::x86)
1583 return "yonah";
1584 if (TheTriple.isArm64e())
1585 return "apple-a12";
1586 if (TheTriple.getArch() == Triple::aarch64 ||
1587 TheTriple.getArch() == Triple::aarch64_32)
1588 return "cyclone";
1589 return "";
1590}
1591
1592// Given the original \p Path to an output file, replace any path
1593// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
1594// resulting directory if it does not yet exist.
1595std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
1596 StringRef NewPrefix) {
1597 if (OldPrefix.empty() && NewPrefix.empty())
1598 return std::string(Path);
1599 SmallString<128> NewPath(Path);
1600 llvm::sys::path::replace_path_prefix(Path&: NewPath, OldPrefix, NewPrefix);
1601 StringRef ParentPath = llvm::sys::path::parent_path(path: NewPath.str());
1602 if (!ParentPath.empty()) {
1603 // Make sure the new directory exists, creating it if necessary.
1604 if (std::error_code EC = llvm::sys::fs::create_directories(path: ParentPath))
1605 llvm::errs() << "warning: could not create directory '" << ParentPath
1606 << "': " << EC.message() << '\n';
1607 }
1608 return std::string(NewPath);
1609}
1610
1611namespace {
1612class WriteIndexesThinBackend : public ThinBackendProc {
1613 std::string OldPrefix, NewPrefix, NativeObjectPrefix;
1614 raw_fd_ostream *LinkedObjectsFile;
1615
1616public:
1617 WriteIndexesThinBackend(
1618 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1619 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1620 std::string OldPrefix, std::string NewPrefix,
1621 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1622 raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
1623 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1624 OnWrite, ShouldEmitImportsFiles),
1625 OldPrefix(OldPrefix), NewPrefix(NewPrefix),
1626 NativeObjectPrefix(NativeObjectPrefix),
1627 LinkedObjectsFile(LinkedObjectsFile) {}
1628
1629 Error start(
1630 unsigned Task, BitcodeModule BM,
1631 const FunctionImporter::ImportMapTy &ImportList,
1632 const FunctionImporter::ExportSetTy &ExportList,
1633 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1634 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1635 StringRef ModulePath = BM.getModuleIdentifier();
1636 std::string NewModulePath =
1637 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix);
1638
1639 if (LinkedObjectsFile) {
1640 std::string ObjectPrefix =
1641 NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
1642 std::string LinkedObjectsFilePath =
1643 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix: ObjectPrefix);
1644 *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
1645 }
1646
1647 if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
1648 return E;
1649
1650 if (OnWrite)
1651 OnWrite(std::string(ModulePath));
1652 return Error::success();
1653 }
1654
1655 Error wait() override { return Error::success(); }
1656
1657 // WriteIndexesThinBackend should always return 1 to prevent module
1658 // re-ordering and avoid non-determinism in the final link.
1659 unsigned getThreadCount() override { return 1; }
1660};
1661} // end anonymous namespace
1662
1663ThinBackend lto::createWriteIndexesThinBackend(
1664 std::string OldPrefix, std::string NewPrefix,
1665 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1666 raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
1667 return
1668 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1669 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1670 AddStreamFn AddStream, FileCache Cache) {
1671 return std::make_unique<WriteIndexesThinBackend>(
1672 args: Conf, args&: CombinedIndex, args: ModuleToDefinedGVSummaries, args: OldPrefix,
1673 args: NewPrefix, args: NativeObjectPrefix, args: ShouldEmitImportsFiles,
1674 args: LinkedObjectsFile, args: OnWrite);
1675 };
1676}
1677
1678Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
1679 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1680 LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
1681 ThinLTO.CombinedIndex.releaseTemporaryMemory();
1682 timeTraceProfilerBegin(Name: "ThinLink", Detail: StringRef(""));
1683 auto TimeTraceScopeExit = llvm::make_scope_exit(F: []() {
1684 if (llvm::timeTraceProfilerEnabled())
1685 llvm::timeTraceProfilerEnd();
1686 });
1687 if (ThinLTO.ModuleMap.empty())
1688 return Error::success();
1689
1690 if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) {
1691 llvm::errs() << "warning: [ThinLTO] No module compiled\n";
1692 return Error::success();
1693 }
1694
1695 if (Conf.CombinedIndexHook &&
1696 !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
1697 return Error::success();
1698
1699 // Collect for each module the list of function it defines (GUID ->
1700 // Summary).
1701 DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
1702 ThinLTO.ModuleMap.size());
1703 ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
1704 ModuleToDefinedGVSummaries);
1705 // Create entries for any modules that didn't have any GV summaries
1706 // (either they didn't have any GVs to start with, or we suppressed
1707 // generation of the summaries because they e.g. had inline assembly
1708 // uses that couldn't be promoted/renamed on export). This is so
1709 // InProcessThinBackend::start can still launch a backend thread, which
1710 // is passed the map of summaries for the module, without any special
1711 // handling for this case.
1712 for (auto &Mod : ThinLTO.ModuleMap)
1713 if (!ModuleToDefinedGVSummaries.count(Val: Mod.first))
1714 ModuleToDefinedGVSummaries.try_emplace(Key: Mod.first);
1715
1716 // Synthesize entry counts for functions in the CombinedIndex.
1717 computeSyntheticCounts(Index&: ThinLTO.CombinedIndex);
1718
1719 DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(
1720 ThinLTO.ModuleMap.size());
1721 DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
1722 ThinLTO.ModuleMap.size());
1723 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
1724
1725 if (DumpThinCGSCCs)
1726 ThinLTO.CombinedIndex.dumpSCCs(OS&: outs());
1727
1728 std::set<GlobalValue::GUID> ExportedGUIDs;
1729
1730 bool WholeProgramVisibilityEnabledInLTO =
1731 Conf.HasWholeProgramVisibility &&
1732 // If validation is enabled, upgrade visibility only when all vtables
1733 // have typeinfos.
1734 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1735 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
1736 ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
1737
1738 // If we're validating, get the vtable symbols that should not be
1739 // upgraded because they correspond to typeIDs outside of index-based
1740 // WPD info.
1741 DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
1742 if (WholeProgramVisibilityEnabledInLTO &&
1743 Conf.ValidateAllVtablesHaveTypeInfos) {
1744 // This returns true when the name is local or not defined. Locals are
1745 // expected to be handled separately.
1746 auto IsVisibleToRegularObj = [&](StringRef name) {
1747 auto It = GlobalResolutions->find(Key: name);
1748 return (It == GlobalResolutions->end() ||
1749 It->second.VisibleOutsideSummary);
1750 };
1751
1752 getVisibleToRegularObjVtableGUIDs(Index&: ThinLTO.CombinedIndex,
1753 VisibleToRegularObjSymbols,
1754 IsVisibleToRegularObj);
1755 }
1756
1757 // If allowed, upgrade public vcall visibility to linkage unit visibility in
1758 // the summaries before whole program devirtualization below.
1759 updateVCallVisibilityInIndex(
1760 Index&: ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
1761 DynamicExportSymbols, VisibleToRegularObjSymbols);
1762
1763 // Perform index-based WPD. This will return immediately if there are
1764 // no index entries in the typeIdMetadata map (e.g. if we are instead
1765 // performing IR-based WPD in hybrid regular/thin LTO mode).
1766 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
1767 runWholeProgramDevirtOnIndex(Summary&: ThinLTO.CombinedIndex, ExportedGUIDs,
1768 LocalWPDTargetsMap);
1769
1770 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
1771 return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
1772 };
1773 if (EnableMemProfContextDisambiguation) {
1774 MemProfContextDisambiguation ContextDisambiguation;
1775 ContextDisambiguation.run(Index&: ThinLTO.CombinedIndex, isPrevailing);
1776 }
1777
1778 // Figure out which symbols need to be internalized. This also needs to happen
1779 // at -O0 because summary-based DCE is implemented using internalization, and
1780 // we must apply DCE consistently with the full LTO module in order to avoid
1781 // undefined references during the final link.
1782 for (auto &Res : *GlobalResolutions) {
1783 // If the symbol does not have external references or it is not prevailing,
1784 // then not need to mark it as exported from a ThinLTO partition.
1785 if (Res.second.Partition != GlobalResolution::External ||
1786 !Res.second.isPrevailingIRSymbol())
1787 continue;
1788 auto GUID = GlobalValue::getGUID(
1789 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
1790 // Mark exported unless index-based analysis determined it to be dead.
1791 if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
1792 ExportedGUIDs.insert(x: GUID);
1793 }
1794
1795 // Reset the GlobalResolutions to deallocate the associated memory, as there
1796 // are no further accesses. We specifically want to do this before computing
1797 // cross module importing, which adds to peak memory via the computed import
1798 // and export lists.
1799 GlobalResolutions.reset();
1800
1801 if (Conf.OptLevel > 0)
1802 ComputeCrossModuleImport(Index: ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1803 isPrevailing, ImportLists, ExportLists);
1804
1805 // Any functions referenced by the jump table in the regular LTO object must
1806 // be exported.
1807 for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs())
1808 ExportedGUIDs.insert(
1809 x: GlobalValue::getGUID(GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Def)));
1810 for (auto &Decl : ThinLTO.CombinedIndex.cfiFunctionDecls())
1811 ExportedGUIDs.insert(
1812 x: GlobalValue::getGUID(GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Decl)));
1813
1814 auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
1815 const auto &ExportList = ExportLists.find(Val: ModuleIdentifier);
1816 return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) ||
1817 ExportedGUIDs.count(x: VI.getGUID());
1818 };
1819
1820 // Update local devirtualized targets that were exported by cross-module
1821 // importing or by other devirtualizations marked in the ExportedGUIDs set.
1822 updateIndexWPDForExports(Summary&: ThinLTO.CombinedIndex, isExported,
1823 LocalWPDTargetsMap);
1824
1825 thinLTOInternalizeAndPromoteInIndex(Index&: ThinLTO.CombinedIndex, isExported,
1826 isPrevailing);
1827
1828 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
1829 GlobalValue::GUID GUID,
1830 GlobalValue::LinkageTypes NewLinkage) {
1831 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
1832 };
1833 thinLTOResolvePrevailingInIndex(C: Conf, Index&: ThinLTO.CombinedIndex, isPrevailing,
1834 recordNewLinkage, GUIDPreservedSymbols);
1835
1836 thinLTOPropagateFunctionAttrs(Index&: ThinLTO.CombinedIndex, isPrevailing);
1837
1838 generateParamAccessSummary(Index&: ThinLTO.CombinedIndex);
1839
1840 if (llvm::timeTraceProfilerEnabled())
1841 llvm::timeTraceProfilerEnd();
1842
1843 TimeTraceScopeExit.release();
1844
1845 std::unique_ptr<ThinBackendProc> BackendProc =
1846 ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1847 AddStream, Cache);
1848
1849 auto &ModuleMap =
1850 ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
1851
1852 auto ProcessOneModule = [&](int I) -> Error {
1853 auto &Mod = *(ModuleMap.begin() + I);
1854 // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
1855 // combined module and parallel code generation partitions.
1856 return BackendProc->start(Task: RegularLTO.ParallelCodeGenParallelismLevel + I,
1857 BM: Mod.second, ImportList: ImportLists[Mod.first],
1858 ExportList: ExportLists[Mod.first], ResolvedODR: ResolvedODR[Mod.first],
1859 ModuleMap&: ThinLTO.ModuleMap);
1860 };
1861
1862 if (BackendProc->getThreadCount() == 1) {
1863 // Process the modules in the order they were provided on the command-line.
1864 // It is important for this codepath to be used for WriteIndexesThinBackend,
1865 // to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
1866 // order as the inputs, which otherwise would affect the final link order.
1867 for (int I = 0, E = ModuleMap.size(); I != E; ++I)
1868 if (Error E = ProcessOneModule(I))
1869 return E;
1870 } else {
1871 // When executing in parallel, process largest bitsize modules first to
1872 // improve parallelism, and avoid starving the thread pool near the end.
1873 // This saves about 15 sec on a 36-core machine while link `clang.exe` (out
1874 // of 100 sec).
1875 std::vector<BitcodeModule *> ModulesVec;
1876 ModulesVec.reserve(n: ModuleMap.size());
1877 for (auto &Mod : ModuleMap)
1878 ModulesVec.push_back(x: &Mod.second);
1879 for (int I : generateModulesOrdering(R: ModulesVec))
1880 if (Error E = ProcessOneModule(I))
1881 return E;
1882 }
1883 return BackendProc->wait();
1884}
1885
1886Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
1887 LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
1888 StringRef RemarksFormat, bool RemarksWithHotness,
1889 std::optional<uint64_t> RemarksHotnessThreshold, int Count) {
1890 std::string Filename = std::string(RemarksFilename);
1891 // For ThinLTO, file.opt.<format> becomes
1892 // file.opt.<format>.thin.<num>.<format>.
1893 if (!Filename.empty() && Count != -1)
1894 Filename =
1895 (Twine(Filename) + ".thin." + llvm::utostr(X: Count) + "." + RemarksFormat)
1896 .str();
1897
1898 auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
1899 Context, RemarksFilename: Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
1900 RemarksHotnessThreshold);
1901 if (Error E = ResultOrErr.takeError())
1902 return std::move(E);
1903
1904 if (*ResultOrErr)
1905 (*ResultOrErr)->keep();
1906
1907 return ResultOrErr;
1908}
1909
1910Expected<std::unique_ptr<ToolOutputFile>>
1911lto::setupStatsFile(StringRef StatsFilename) {
1912 // Setup output file to emit statistics.
1913 if (StatsFilename.empty())
1914 return nullptr;
1915
1916 llvm::EnableStatistics(DoPrintOnExit: false);
1917 std::error_code EC;
1918 auto StatsFile =
1919 std::make_unique<ToolOutputFile>(args&: StatsFilename, args&: EC, args: sys::fs::OF_None);
1920 if (EC)
1921 return errorCodeToError(EC);
1922
1923 StatsFile->keep();
1924 return std::move(StatsFile);
1925}
1926
1927// Compute the ordering we will process the inputs: the rough heuristic here
1928// is to sort them per size so that the largest module get schedule as soon as
1929// possible. This is purely a compile-time optimization.
1930std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
1931 auto Seq = llvm::seq<int>(Begin: 0, End: R.size());
1932 std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
1933 llvm::sort(C&: ModulesOrdering, Comp: [&](int LeftIndex, int RightIndex) {
1934 auto LSize = R[LeftIndex]->getBuffer().size();
1935 auto RSize = R[RightIndex]->getBuffer().size();
1936 return LSize > RSize;
1937 });
1938 return ModulesOrdering;
1939}
1940