1//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions and classes used to support LTO.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/LTO/LTO.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/ScopeExit.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/StableHashing.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Analysis/OptimizationRemarkEmitter.h"
21#include "llvm/Analysis/StackSafetyAnalysis.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Bitcode/BitcodeReader.h"
24#include "llvm/Bitcode/BitcodeWriter.h"
25#include "llvm/CGData/CodeGenData.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/Config/llvm-config.h"
28#include "llvm/IR/AutoUpgrade.h"
29#include "llvm/IR/DiagnosticPrinter.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/LLVMRemarkStreamer.h"
32#include "llvm/IR/LegacyPassManager.h"
33#include "llvm/IR/Mangler.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/RuntimeLibcalls.h"
36#include "llvm/LTO/LTOBackend.h"
37#include "llvm/Linker/IRMover.h"
38#include "llvm/MC/TargetRegistry.h"
39#include "llvm/Object/IRObjectFile.h"
40#include "llvm/Support/Caching.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
43#include "llvm/Support/Error.h"
44#include "llvm/Support/FileSystem.h"
45#include "llvm/Support/JSON.h"
46#include "llvm/Support/MemoryBuffer.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/Process.h"
49#include "llvm/Support/SHA1.h"
50#include "llvm/Support/Signals.h"
51#include "llvm/Support/SourceMgr.h"
52#include "llvm/Support/ThreadPool.h"
53#include "llvm/Support/Threading.h"
54#include "llvm/Support/TimeProfiler.h"
55#include "llvm/Support/ToolOutputFile.h"
56#include "llvm/Support/VCSRevision.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Target/TargetOptions.h"
59#include "llvm/Transforms/IPO.h"
60#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
61#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
62#include "llvm/Transforms/Utils/FunctionImportUtils.h"
63#include "llvm/Transforms/Utils/SplitModule.h"
64
65#include <optional>
66#include <set>
67
68using namespace llvm;
69using namespace lto;
70using namespace object;
71
72#define DEBUG_TYPE "lto"
73
74static cl::opt<bool>
75 DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(Val: false), cl::Hidden,
76 cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
77
78namespace llvm {
79extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
80extern cl::opt<bool> ForceImportAll;
81} // end namespace llvm
82
83namespace llvm {
84/// Enable global value internalization in LTO.
85cl::opt<bool> EnableLTOInternalization(
86 "enable-lto-internalization", cl::init(Val: true), cl::Hidden,
87 cl::desc("Enable global value internalization in LTO"));
88
89static cl::opt<bool>
90 LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(Val: false), cl::Hidden,
91 cl::desc("Keep copies of symbols in LTO indexing"));
92
93/// Indicate we are linking with an allocator that supports hot/cold operator
94/// new interfaces.
95extern cl::opt<bool> SupportsHotColdNew;
96
97/// Enable MemProf context disambiguation for thin link.
98extern cl::opt<bool> EnableMemProfContextDisambiguation;
99} // namespace llvm
100
101// Computes a unique hash for the Module considering the current list of
102// export/import and other global analysis results.
103// Returns the hash in its hexadecimal representation.
104std::string llvm::computeLTOCacheKey(
105 const Config &Conf, const ModuleSummaryIndex &Index, StringRef ModuleID,
106 const FunctionImporter::ImportMapTy &ImportList,
107 const FunctionImporter::ExportSetTy &ExportList,
108 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
109 const GVSummaryMapTy &DefinedGlobals,
110 const DenseSet<GlobalValue::GUID> &CfiFunctionDefs,
111 const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) {
112 // Compute the unique hash for this entry.
113 // This is based on the current compiler version, the module itself, the
114 // export list, the hash for every single module in the import list, the
115 // list of ResolvedODR for the module, and the list of preserved symbols.
116 SHA1 Hasher;
117
118 // Start with the compiler revision
119 Hasher.update(LLVM_VERSION_STRING);
120#ifdef LLVM_REVISION
121 Hasher.update(LLVM_REVISION);
122#endif
123
124 // Include the parts of the LTO configuration that affect code generation.
125 auto AddString = [&](StringRef Str) {
126 Hasher.update(Str);
127 Hasher.update(Data: ArrayRef<uint8_t>{0});
128 };
129 auto AddUnsigned = [&](unsigned I) {
130 uint8_t Data[4];
131 support::endian::write32le(P: Data, V: I);
132 Hasher.update(Data);
133 };
134 auto AddUint64 = [&](uint64_t I) {
135 uint8_t Data[8];
136 support::endian::write64le(P: Data, V: I);
137 Hasher.update(Data);
138 };
139 auto AddUint8 = [&](const uint8_t I) {
140 Hasher.update(Data: ArrayRef<uint8_t>(&I, 1));
141 };
142 AddString(Conf.CPU);
143 // FIXME: Hash more of Options. For now all clients initialize Options from
144 // command-line flags (which is unsupported in production), but may set
145 // X86RelaxRelocations. The clang driver can also pass FunctionSections,
146 // DataSections and DebuggerTuning via command line flags.
147 AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations);
148 AddUnsigned(Conf.Options.FunctionSections);
149 AddUnsigned(Conf.Options.DataSections);
150 AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
151 for (auto &A : Conf.MAttrs)
152 AddString(A);
153 if (Conf.RelocModel)
154 AddUnsigned(*Conf.RelocModel);
155 else
156 AddUnsigned(-1);
157 if (Conf.CodeModel)
158 AddUnsigned(*Conf.CodeModel);
159 else
160 AddUnsigned(-1);
161 for (const auto &S : Conf.MllvmArgs)
162 AddString(S);
163 AddUnsigned(static_cast<int>(Conf.CGOptLevel));
164 AddUnsigned(static_cast<int>(Conf.CGFileType));
165 AddUnsigned(Conf.OptLevel);
166 AddUnsigned(Conf.Freestanding);
167 AddString(Conf.OptPipeline);
168 AddString(Conf.AAPipeline);
169 AddString(Conf.OverrideTriple);
170 AddString(Conf.DefaultTriple);
171 AddString(Conf.DwoDir);
172 AddUint8(Conf.Dtlto);
173
174 // Include the hash for the current module
175 auto ModHash = Index.getModuleHash(ModPath: ModuleID);
176 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
177
178 // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
179 // used to compute cache key, we could omit hashing `ExportList` here.
180 std::vector<uint64_t> ExportsGUID;
181 ExportsGUID.reserve(n: ExportList.size());
182 for (const auto &VI : ExportList)
183 ExportsGUID.push_back(x: VI.getGUID());
184
185 // Sort the export list elements GUIDs.
186 llvm::sort(C&: ExportsGUID);
187 for (auto GUID : ExportsGUID)
188 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
189
190 // Order using module hash, to be both independent of module name and
191 // module order.
192 auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
193 const std::pair<StringRef, GlobalValue::GUID> &R) {
194 return std::make_pair(x: Index.getModule(ModPath: L.first)->second, y: L.second) <
195 std::make_pair(x: Index.getModule(ModPath: R.first)->second, y: R.second);
196 };
197 FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
198
199 // Count the number of imports for each source module.
200 DenseMap<StringRef, unsigned> ModuleToNumImports;
201 for (const auto &[FromModule, GUID, Type] : SortedImportList)
202 ++ModuleToNumImports[FromModule];
203
204 std::optional<StringRef> LastModule;
205 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
206 if (LastModule != FromModule) {
207 // Include the hash for every module we import functions from. The set of
208 // imported symbols for each module may affect code generation and is
209 // sensitive to link order, so include that as well.
210 LastModule = FromModule;
211 auto ModHash = Index.getModule(ModPath: FromModule)->second;
212 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
213 AddUint64(ModuleToNumImports[FromModule]);
214 }
215 AddUint64(GUID);
216 AddUint8(Type);
217 }
218
219 // Include the hash for the resolved ODR.
220 for (auto &Entry : ResolvedODR) {
221 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
222 sizeof(GlobalValue::GUID)));
223 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
224 sizeof(GlobalValue::LinkageTypes)));
225 }
226
227 // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or
228 // defined in this module.
229 std::set<GlobalValue::GUID> UsedCfiDefs;
230 std::set<GlobalValue::GUID> UsedCfiDecls;
231
232 // Typeids used in this module.
233 std::set<GlobalValue::GUID> UsedTypeIds;
234
235 auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) {
236 if (CfiFunctionDefs.contains(V: ValueGUID))
237 UsedCfiDefs.insert(x: ValueGUID);
238 if (CfiFunctionDecls.contains(V: ValueGUID))
239 UsedCfiDecls.insert(x: ValueGUID);
240 };
241
242 auto AddUsedThings = [&](GlobalValueSummary *GS) {
243 if (!GS) return;
244 AddUnsigned(GS->getVisibility());
245 AddUnsigned(GS->isLive());
246 AddUnsigned(GS->canAutoHide());
247 for (const ValueInfo &VI : GS->refs()) {
248 AddUnsigned(VI.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
249 AddUsedCfiGlobal(VI.getGUID());
250 }
251 if (auto *GVS = dyn_cast<GlobalVarSummary>(Val: GS)) {
252 AddUnsigned(GVS->maybeReadOnly());
253 AddUnsigned(GVS->maybeWriteOnly());
254 }
255 if (auto *FS = dyn_cast<FunctionSummary>(Val: GS)) {
256 for (auto &TT : FS->type_tests())
257 UsedTypeIds.insert(x: TT);
258 for (auto &TT : FS->type_test_assume_vcalls())
259 UsedTypeIds.insert(x: TT.GUID);
260 for (auto &TT : FS->type_checked_load_vcalls())
261 UsedTypeIds.insert(x: TT.GUID);
262 for (auto &TT : FS->type_test_assume_const_vcalls())
263 UsedTypeIds.insert(x: TT.VFunc.GUID);
264 for (auto &TT : FS->type_checked_load_const_vcalls())
265 UsedTypeIds.insert(x: TT.VFunc.GUID);
266 for (auto &ET : FS->calls()) {
267 AddUnsigned(ET.first.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
268 AddUsedCfiGlobal(ET.first.getGUID());
269 }
270 }
271 };
272
273 // Include the hash for the linkage type to reflect internalization and weak
274 // resolution, and collect any used type identifier resolutions.
275 for (auto &GS : DefinedGlobals) {
276 GlobalValue::LinkageTypes Linkage = GS.second->linkage();
277 Hasher.update(
278 Data: ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
279 AddUsedCfiGlobal(GS.first);
280 AddUsedThings(GS.second);
281 }
282
283 // Imported functions may introduce new uses of type identifier resolutions,
284 // so we need to collect their used resolutions as well.
285 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
286 GlobalValueSummary *S = Index.findSummaryInModule(ValueGUID: GUID, ModuleId: FromModule);
287 AddUsedThings(S);
288 // If this is an alias, we also care about any types/etc. that the aliasee
289 // may reference.
290 if (auto *AS = dyn_cast_or_null<AliasSummary>(Val: S))
291 AddUsedThings(AS->getBaseObject());
292 }
293
294 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
295 AddString(TId);
296
297 AddUnsigned(S.TTRes.TheKind);
298 AddUnsigned(S.TTRes.SizeM1BitWidth);
299
300 AddUint64(S.TTRes.AlignLog2);
301 AddUint64(S.TTRes.SizeM1);
302 AddUint64(S.TTRes.BitMask);
303 AddUint64(S.TTRes.InlineBits);
304
305 AddUint64(S.WPDRes.size());
306 for (auto &WPD : S.WPDRes) {
307 AddUnsigned(WPD.first);
308 AddUnsigned(WPD.second.TheKind);
309 AddString(WPD.second.SingleImplName);
310
311 AddUint64(WPD.second.ResByArg.size());
312 for (auto &ByArg : WPD.second.ResByArg) {
313 AddUint64(ByArg.first.size());
314 for (uint64_t Arg : ByArg.first)
315 AddUint64(Arg);
316 AddUnsigned(ByArg.second.TheKind);
317 AddUint64(ByArg.second.Info);
318 AddUnsigned(ByArg.second.Byte);
319 AddUnsigned(ByArg.second.Bit);
320 }
321 }
322 };
323
324 // Include the hash for all type identifiers used by this module.
325 for (GlobalValue::GUID TId : UsedTypeIds) {
326 auto TidIter = Index.typeIds().equal_range(x: TId);
327 for (const auto &I : make_range(p: TidIter))
328 AddTypeIdSummary(I.second.first, I.second.second);
329 }
330
331 AddUnsigned(UsedCfiDefs.size());
332 for (auto &V : UsedCfiDefs)
333 AddUint64(V);
334
335 AddUnsigned(UsedCfiDecls.size());
336 for (auto &V : UsedCfiDecls)
337 AddUint64(V);
338
339 if (!Conf.SampleProfile.empty()) {
340 auto FileOrErr = MemoryBuffer::getFile(Filename: Conf.SampleProfile);
341 if (FileOrErr) {
342 Hasher.update(Str: FileOrErr.get()->getBuffer());
343
344 if (!Conf.ProfileRemapping.empty()) {
345 FileOrErr = MemoryBuffer::getFile(Filename: Conf.ProfileRemapping);
346 if (FileOrErr)
347 Hasher.update(Str: FileOrErr.get()->getBuffer());
348 }
349 }
350 }
351
352 return toHex(Input: Hasher.result());
353}
354
355std::string llvm::recomputeLTOCacheKey(const std::string &Key,
356 StringRef ExtraID) {
357 SHA1 Hasher;
358
359 auto AddString = [&](StringRef Str) {
360 Hasher.update(Str);
361 Hasher.update(Data: ArrayRef<uint8_t>{0});
362 };
363 AddString(Key);
364 AddString(ExtraID);
365
366 return toHex(Input: Hasher.result());
367}
368
369static void thinLTOResolvePrevailingGUID(
370 const Config &C, ValueInfo VI,
371 DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
372 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
373 isPrevailing,
374 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
375 recordNewLinkage,
376 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
377 GlobalValue::VisibilityTypes Visibility =
378 C.VisibilityScheme == Config::ELF ? VI.getELFVisibility()
379 : GlobalValue::DefaultVisibility;
380 for (auto &S : VI.getSummaryList()) {
381 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
382 // Ignore local and appending linkage values since the linker
383 // doesn't resolve them.
384 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
385 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
386 continue;
387 // We need to emit only one of these. The prevailing module will keep it,
388 // but turned into a weak, while the others will drop it when possible.
389 // This is both a compile-time optimization and a correctness
390 // transformation. This is necessary for correctness when we have exported
391 // a reference - we need to convert the linkonce to weak to
392 // ensure a copy is kept to satisfy the exported reference.
393 // FIXME: We may want to split the compile time and correctness
394 // aspects into separate routines.
395 if (isPrevailing(VI.getGUID(), S.get())) {
396 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage)) {
397 S->setLinkage(GlobalValue::getWeakLinkage(
398 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
399 // The kept copy is eligible for auto-hiding (hidden visibility) if all
400 // copies were (i.e. they were all linkonce_odr global unnamed addr).
401 // If any copy is not (e.g. it was originally weak_odr), then the symbol
402 // must remain externally available (e.g. a weak_odr from an explicitly
403 // instantiated template). Additionally, if it is in the
404 // GUIDPreservedSymbols set, that means that it is visibile outside
405 // the summary (e.g. in a native object or a bitcode file without
406 // summary), and in that case we cannot hide it as it isn't possible to
407 // check all copies.
408 S->setCanAutoHide(VI.canAutoHide() &&
409 !GUIDPreservedSymbols.count(V: VI.getGUID()));
410 }
411 if (C.VisibilityScheme == Config::FromPrevailing)
412 Visibility = S->getVisibility();
413 }
414 // Alias and aliasee can't be turned into available_externally.
415 // When force-import-all is used, it indicates that object linking is not
416 // supported by the target. In this case, we can't change the linkage as
417 // well in case the global is converted to declaration.
418 else if (!isa<AliasSummary>(Val: S.get()) &&
419 !GlobalInvolvedWithAlias.count(V: S.get()) && !ForceImportAll)
420 S->setLinkage(GlobalValue::AvailableExternallyLinkage);
421
422 // For ELF, set visibility to the computed visibility from summaries. We
423 // don't track visibility from declarations so this may be more relaxed than
424 // the most constraining one.
425 if (C.VisibilityScheme == Config::ELF)
426 S->setVisibility(Visibility);
427
428 if (S->linkage() != OriginalLinkage)
429 recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
430 }
431
432 if (C.VisibilityScheme == Config::FromPrevailing) {
433 for (auto &S : VI.getSummaryList()) {
434 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
435 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
436 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
437 continue;
438 S->setVisibility(Visibility);
439 }
440 }
441}
442
443/// Resolve linkage for prevailing symbols in the \p Index.
444//
445// We'd like to drop these functions if they are no longer referenced in the
446// current module. However there is a chance that another module is still
447// referencing them because of the import. We make sure we always emit at least
448// one copy.
449void llvm::thinLTOResolvePrevailingInIndex(
450 const Config &C, ModuleSummaryIndex &Index,
451 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
452 isPrevailing,
453 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
454 recordNewLinkage,
455 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
456 // We won't optimize the globals that are referenced by an alias for now
457 // Ideally we should turn the alias into a global and duplicate the definition
458 // when needed.
459 DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
460 for (auto &I : Index)
461 for (auto &S : I.second.getSummaryList())
462 if (auto AS = dyn_cast<AliasSummary>(Val: S.get()))
463 GlobalInvolvedWithAlias.insert(V: &AS->getAliasee());
464
465 for (auto &I : Index)
466 thinLTOResolvePrevailingGUID(C, VI: Index.getValueInfo(R: I),
467 GlobalInvolvedWithAlias, isPrevailing,
468 recordNewLinkage, GUIDPreservedSymbols);
469}
470
471static void thinLTOInternalizeAndPromoteGUID(
472 ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
473 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
474 isPrevailing) {
475 // Before performing index-based internalization and promotion for this GUID,
476 // the local flag should be consistent with the summary list linkage types.
477 VI.verifyLocal();
478
479 const bool SingleExternallyVisibleCopy =
480 VI.getSummaryList().size() == 1 &&
481 !GlobalValue::isLocalLinkage(Linkage: VI.getSummaryList().front()->linkage());
482
483 for (auto &S : VI.getSummaryList()) {
484 // First see if we need to promote an internal value because it is not
485 // exported.
486 if (isExported(S->modulePath(), VI)) {
487 if (GlobalValue::isLocalLinkage(Linkage: S->linkage()))
488 S->setLinkage(GlobalValue::ExternalLinkage);
489 continue;
490 }
491
492 // Otherwise, see if we can internalize.
493 if (!EnableLTOInternalization)
494 continue;
495
496 // Non-exported values with external linkage can be internalized.
497 if (GlobalValue::isExternalLinkage(Linkage: S->linkage())) {
498 S->setLinkage(GlobalValue::InternalLinkage);
499 continue;
500 }
501
502 // Non-exported function and variable definitions with a weak-for-linker
503 // linkage can be internalized in certain cases. The minimum legality
504 // requirements would be that they are not address taken to ensure that we
505 // don't break pointer equality checks, and that variables are either read-
506 // or write-only. For functions, this is the case if either all copies are
507 // [local_]unnamed_addr, or we can propagate reference edge attributes
508 // (which is how this is guaranteed for variables, when analyzing whether
509 // they are read or write-only).
510 //
511 // However, we only get to this code for weak-for-linkage values in one of
512 // two cases:
513 // 1) The prevailing copy is not in IR (it is in native code).
514 // 2) The prevailing copy in IR is not exported from its module.
515 // Additionally, at least for the new LTO API, case 2 will only happen if
516 // there is exactly one definition of the value (i.e. in exactly one
517 // module), as duplicate defs are result in the value being marked exported.
518 // Likely, users of the legacy LTO API are similar, however, currently there
519 // are llvm-lto based tests of the legacy LTO API that do not mark
520 // duplicate linkonce_odr copies as exported via the tool, so we need
521 // to handle that case below by checking the number of copies.
522 //
523 // Generally, we only want to internalize a weak-for-linker value in case
524 // 2, because in case 1 we cannot see how the value is used to know if it
525 // is read or write-only. We also don't want to bloat the binary with
526 // multiple internalized copies of non-prevailing linkonce/weak functions.
527 // Note if we don't internalize, we will convert non-prevailing copies to
528 // available_externally anyway, so that we drop them after inlining. The
529 // only reason to internalize such a function is if we indeed have a single
530 // copy, because internalizing it won't increase binary size, and enables
531 // use of inliner heuristics that are more aggressive in the face of a
532 // single call to a static (local). For variables, internalizing a read or
533 // write only variable can enable more aggressive optimization. However, we
534 // already perform this elsewhere in the ThinLTO backend handling for
535 // read or write-only variables (processGlobalForThinLTO).
536 //
537 // Therefore, only internalize linkonce/weak if there is a single copy, that
538 // is prevailing in this IR module. We can do so aggressively, without
539 // requiring the address to be insignificant, or that a variable be read or
540 // write-only.
541 if (!GlobalValue::isWeakForLinker(Linkage: S->linkage()) ||
542 GlobalValue::isExternalWeakLinkage(Linkage: S->linkage()))
543 continue;
544
545 // We may have a single summary copy that is externally visible but not
546 // prevailing if the prevailing copy is in a native object.
547 if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get()))
548 S->setLinkage(GlobalValue::InternalLinkage);
549 }
550}
551
552// Update the linkages in the given \p Index to mark exported values
553// as external and non-exported values as internal.
554void llvm::thinLTOInternalizeAndPromoteInIndex(
555 ModuleSummaryIndex &Index,
556 function_ref<bool(StringRef, ValueInfo)> isExported,
557 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
558 isPrevailing) {
559 assert(!Index.withInternalizeAndPromote());
560 for (auto &I : Index)
561 thinLTOInternalizeAndPromoteGUID(VI: Index.getValueInfo(R: I), isExported,
562 isPrevailing);
563 Index.setWithInternalizeAndPromote();
564}
565
566// Requires a destructor for std::vector<InputModule>.
567InputFile::~InputFile() = default;
568
569Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
570 std::unique_ptr<InputFile> File(new InputFile);
571
572 Expected<IRSymtabFile> FOrErr = readIRSymtab(MBRef: Object);
573 if (!FOrErr)
574 return FOrErr.takeError();
575
576 File->TargetTriple = FOrErr->TheReader.getTargetTriple();
577 File->SourceFileName = FOrErr->TheReader.getSourceFileName();
578 File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
579 File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
580 File->ComdatTable = FOrErr->TheReader.getComdatTable();
581 File->MbRef =
582 Object; // Save a memory buffer reference to an input file object.
583
584 for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
585 size_t Begin = File->Symbols.size();
586 for (const irsymtab::Reader::SymbolRef &Sym :
587 FOrErr->TheReader.module_symbols(I))
588 // Skip symbols that are irrelevant to LTO. Note that this condition needs
589 // to match the one in Skip() in LTO::addRegularLTO().
590 if (Sym.isGlobal() && !Sym.isFormatSpecific())
591 File->Symbols.push_back(x: Sym);
592 File->ModuleSymIndices.push_back(x: {Begin, File->Symbols.size()});
593 }
594
595 File->Mods = FOrErr->Mods;
596 File->Strtab = std::move(FOrErr->Strtab);
597 return std::move(File);
598}
599
600bool InputFile::Symbol::isLibcall(
601 const RTLIB::RuntimeLibcallsInfo &Libcalls) const {
602 return Libcalls.getSupportedLibcallImpl(FuncName: IRName) != RTLIB::Unsupported;
603}
604
605StringRef InputFile::getName() const {
606 return Mods[0].getModuleIdentifier();
607}
608
609BitcodeModule &InputFile::getSingleBitcodeModule() {
610 assert(Mods.size() == 1 && "Expect only one bitcode module");
611 return Mods[0];
612}
613
614BitcodeModule &InputFile::getPrimaryBitcodeModule() { return Mods[0]; }
615
616LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
617 const Config &Conf)
618 : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
619 Ctx(Conf), CombinedModule(std::make_unique<Module>(args: "ld-temp.o", args&: Ctx)),
620 Mover(std::make_unique<IRMover>(args&: *CombinedModule)) {}
621
622LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam)
623 : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) {
624 if (!Backend.isValid())
625 Backend =
626 createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency());
627}
628
629LTO::LTO(Config Conf, ThinBackend Backend,
630 unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
631 : Conf(std::move(Conf)),
632 RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
633 ThinLTO(std::move(Backend)),
634 GlobalResolutions(
635 std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
636 LTOMode(LTOMode) {
637 if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
638 Alloc = std::make_unique<BumpPtrAllocator>();
639 GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(args&: *Alloc);
640 }
641}
642
643// Requires a destructor for MapVector<BitcodeModule>.
644LTO::~LTO() = default;
645
646// Add the symbols in the given module to the GlobalResolutions map, and resolve
647// their partitions.
648void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
649 ArrayRef<SymbolResolution> Res,
650 unsigned Partition, bool InSummary,
651 const Triple &TT) {
652 llvm::TimeTraceScope timeScope("LTO add module to global resolution");
653 auto *ResI = Res.begin();
654 auto *ResE = Res.end();
655 (void)ResE;
656 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
657 for (const InputFile::Symbol &Sym : Syms) {
658 assert(ResI != ResE);
659 SymbolResolution Res = *ResI++;
660
661 StringRef SymbolName = Sym.getName();
662 // Keep copies of symbols if the client of LTO says so.
663 if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(Val: SymbolName))
664 SymbolName = GlobalResolutionSymbolSaver->save(S: SymbolName);
665
666 auto &GlobalRes = (*GlobalResolutions)[SymbolName];
667 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
668 if (Res.Prevailing) {
669 assert(!GlobalRes.Prevailing &&
670 "Multiple prevailing defs are not allowed");
671 GlobalRes.Prevailing = true;
672 GlobalRes.IRName = std::string(Sym.getIRName());
673 } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) {
674 // Sometimes it can be two copies of symbol in a module and prevailing
675 // symbol can have no IR name. That might happen if symbol is defined in
676 // module level inline asm block. In case we have multiple modules with
677 // the same symbol we want to use IR name of the prevailing symbol.
678 // Otherwise, if we haven't seen a prevailing symbol, set the name so that
679 // we can later use it to check if there is any prevailing copy in IR.
680 GlobalRes.IRName = std::string(Sym.getIRName());
681 }
682
683 // In rare occasion, the symbol used to initialize GlobalRes has a different
684 // IRName from the inspected Symbol. This can happen on macOS + iOS, when a
685 // symbol is referenced through its mangled name, say @"\01_symbol" while
686 // the IRName is @symbol (the prefix underscore comes from MachO mangling).
687 // In that case, we have the same actual Symbol that can get two different
688 // GUID, leading to some invalid internalization. Workaround this by marking
689 // the GlobalRes external.
690
691 // FIXME: instead of this check, it would be desirable to compute GUIDs
692 // based on mangled name, but this requires an access to the Target Triple
693 // and would be relatively invasive on the codebase.
694 if (GlobalRes.IRName != Sym.getIRName()) {
695 GlobalRes.Partition = GlobalResolution::External;
696 GlobalRes.VisibleOutsideSummary = true;
697 }
698
699 bool IsLibcall = Sym.isLibcall(Libcalls);
700
701 // Set the partition to external if we know it is re-defined by the linker
702 // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
703 // regular object, is referenced from llvm.compiler.used/llvm.used, or was
704 // already recorded as being referenced from a different partition.
705 if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
706 IsLibcall ||
707 (GlobalRes.Partition != GlobalResolution::Unknown &&
708 GlobalRes.Partition != Partition)) {
709 GlobalRes.Partition = GlobalResolution::External;
710 } else
711 // First recorded reference, save the current partition.
712 GlobalRes.Partition = Partition;
713
714 // Flag as visible outside of summary if visible from a regular object or
715 // from a module that does not have a summary.
716 GlobalRes.VisibleOutsideSummary |=
717 (Res.VisibleToRegularObj || Sym.isUsed() || IsLibcall || !InSummary);
718
719 GlobalRes.ExportDynamic |= Res.ExportDynamic;
720 }
721}
722
723void LTO::releaseGlobalResolutionsMemory() {
724 // Release GlobalResolutions dense-map itself.
725 GlobalResolutions.reset();
726 // Release the string saver memory.
727 GlobalResolutionSymbolSaver.reset();
728 Alloc.reset();
729}
730
731static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
732 ArrayRef<SymbolResolution> Res) {
733 StringRef Path = Input->getName();
734 OS << Path << '\n';
735 auto ResI = Res.begin();
736 for (const InputFile::Symbol &Sym : Input->symbols()) {
737 assert(ResI != Res.end());
738 SymbolResolution Res = *ResI++;
739
740 OS << "-r=" << Path << ',' << Sym.getName() << ',';
741 if (Res.Prevailing)
742 OS << 'p';
743 if (Res.FinalDefinitionInLinkageUnit)
744 OS << 'l';
745 if (Res.VisibleToRegularObj)
746 OS << 'x';
747 if (Res.LinkerRedefined)
748 OS << 'r';
749 OS << '\n';
750 }
751 OS.flush();
752 assert(ResI == Res.end());
753}
754
755Error LTO::add(std::unique_ptr<InputFile> InputPtr,
756 ArrayRef<SymbolResolution> Res) {
757 llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
758 assert(!CalledGetMaxTasks);
759
760 Expected<std::shared_ptr<InputFile>> InputOrErr =
761 addInput(InputPtr: std::move(InputPtr));
762 if (!InputOrErr)
763 return InputOrErr.takeError();
764 InputFile *Input = (*InputOrErr).get();
765
766 if (Conf.ResolutionFile)
767 writeToResolutionFile(OS&: *Conf.ResolutionFile, Input, Res);
768
769 if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
770 Triple InputTriple(Input->getTargetTriple());
771 RegularLTO.CombinedModule->setTargetTriple(InputTriple);
772 if (InputTriple.isOSBinFormatELF())
773 Conf.VisibilityScheme = Config::ELF;
774 }
775
776 ArrayRef<SymbolResolution> InputRes = Res;
777 for (unsigned I = 0; I != Input->Mods.size(); ++I) {
778 if (auto Err = addModule(Input&: *Input, InputRes, ModI: I, Res).moveInto(Value&: Res))
779 return Err;
780 }
781
782 assert(Res.empty());
783 return Error::success();
784}
785
786Expected<ArrayRef<SymbolResolution>>
787LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
788 unsigned ModI, ArrayRef<SymbolResolution> Res) {
789 llvm::TimeTraceScope timeScope("LTO add module", Input.getName());
790 Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
791 if (!LTOInfo)
792 return LTOInfo.takeError();
793
794 if (EnableSplitLTOUnit) {
795 // If only some modules were split, flag this in the index so that
796 // we can skip or error on optimizations that need consistently split
797 // modules (whole program devirt and lower type tests).
798 if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
799 ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
800 } else
801 EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
802
803 BitcodeModule BM = Input.Mods[ModI];
804
805 if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
806 !LTOInfo->UnifiedLTO)
807 return make_error<StringError>(
808 Args: "unified LTO compilation must use "
809 "compatible bitcode modules (use -funified-lto)",
810 Args: inconvertibleErrorCode());
811
812 if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
813 LTOMode = LTOK_UnifiedThin;
814
815 bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
816 // If any of the modules inside of a input bitcode file was compiled with
817 // ThinLTO, we assume that the whole input file also was compiled with
818 // ThinLTO.
819 Input.IsThinLTO |= IsThinLTO;
820
821 auto ModSyms = Input.module_symbols(I: ModI);
822 addModuleToGlobalRes(Syms: ModSyms, Res,
823 Partition: IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
824 InSummary: LTOInfo->HasSummary, TT: Triple(Input.getTargetTriple()));
825
826 if (IsThinLTO)
827 return addThinLTO(BM, Syms: ModSyms, Res);
828
829 RegularLTO.EmptyCombinedModule = false;
830 auto ModOrErr = addRegularLTO(Input, InputRes, BM, Syms: ModSyms, Res);
831 if (!ModOrErr)
832 return ModOrErr.takeError();
833 Res = ModOrErr->second;
834
835 if (!LTOInfo->HasSummary) {
836 if (Error Err = linkRegularLTO(Mod: std::move(ModOrErr->first),
837 /*LivenessFromIndex=*/false))
838 return Err;
839 return Res;
840 }
841
842 // Regular LTO module summaries are added to a dummy module that represents
843 // the combined regular LTO module.
844 if (Error Err = BM.readSummary(CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: ""))
845 return Err;
846 RegularLTO.ModsWithSummaries.push_back(x: std::move(ModOrErr->first));
847 return Res;
848}
849
850// Checks whether the given global value is in a non-prevailing comdat
851// (comdat containing values the linker indicated were not prevailing,
852// which we then dropped to available_externally), and if so, removes
853// it from the comdat. This is called for all global values to ensure the
854// comdat is empty rather than leaving an incomplete comdat. It is needed for
855// regular LTO modules, in case we are in a mixed-LTO mode (both regular
856// and thin LTO modules) compilation. Since the regular LTO module will be
857// linked first in the final native link, we want to make sure the linker
858// doesn't select any of these incomplete comdats that would be left
859// in the regular LTO module without this cleanup.
860static void
861handleNonPrevailingComdat(GlobalValue &GV,
862 std::set<const Comdat *> &NonPrevailingComdats) {
863 Comdat *C = GV.getComdat();
864 if (!C)
865 return;
866
867 if (!NonPrevailingComdats.count(x: C))
868 return;
869
870 // Additionally need to drop all global values from the comdat to
871 // available_externally, to satisfy the COMDAT requirement that all members
872 // are discarded as a unit. The non-local linkage global values avoid
873 // duplicate definition linker errors.
874 GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
875
876 if (auto GO = dyn_cast<GlobalObject>(Val: &GV))
877 GO->setComdat(nullptr);
878}
879
880// Add a regular LTO object to the link.
881// The resulting module needs to be linked into the combined LTO module with
882// linkRegularLTO.
883Expected<
884 std::pair<LTO::RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>>
885LTO::addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
886 BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
887 ArrayRef<SymbolResolution> Res) {
888 llvm::TimeTraceScope timeScope("LTO add regular LTO");
889 RegularLTOState::AddedModule Mod;
890 Expected<std::unique_ptr<Module>> MOrErr =
891 BM.getLazyModule(Context&: RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
892 /*IsImporting*/ false);
893 if (!MOrErr)
894 return MOrErr.takeError();
895 Module &M = **MOrErr;
896 Mod.M = std::move(*MOrErr);
897
898 if (Error Err = M.materializeMetadata())
899 return std::move(Err);
900
901 if (LTOMode == LTOK_UnifiedRegular) {
902 // cfi.functions metadata is intended to be used with ThinLTO and may
903 // trigger invalid IR transformations if they are present when doing regular
904 // LTO, so delete it.
905 if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions"))
906 M.eraseNamedMetadata(NMD: CfiFunctionsMD);
907 } else if (NamedMDNode *AliasesMD = M.getNamedMetadata(Name: "aliases")) {
908 // Delete aliases entries for non-prevailing symbols on the ThinLTO side of
909 // this input file.
910 DenseSet<StringRef> Prevailing;
911 for (auto [I, R] : zip(t: Input.symbols(), u&: InputRes))
912 if (R.Prevailing && !I.getIRName().empty())
913 Prevailing.insert(V: I.getIRName());
914 std::vector<MDNode *> AliasGroups;
915 for (MDNode *AliasGroup : AliasesMD->operands()) {
916 std::vector<Metadata *> Aliases;
917 for (Metadata *Alias : AliasGroup->operands()) {
918 if (isa<MDString>(Val: Alias) &&
919 Prevailing.count(V: cast<MDString>(Val: Alias)->getString()))
920 Aliases.push_back(x: Alias);
921 }
922 if (Aliases.size() > 1)
923 AliasGroups.push_back(x: MDTuple::get(Context&: RegularLTO.Ctx, MDs: Aliases));
924 }
925 AliasesMD->clearOperands();
926 for (MDNode *G : AliasGroups)
927 AliasesMD->addOperand(M: G);
928 }
929
930 UpgradeDebugInfo(M);
931
932 ModuleSymbolTable SymTab;
933 SymTab.addModule(M: &M);
934
935 for (GlobalVariable &GV : M.globals())
936 if (GV.hasAppendingLinkage())
937 Mod.Keep.push_back(x: &GV);
938
939 DenseSet<GlobalObject *> AliasedGlobals;
940 for (auto &GA : M.aliases())
941 if (GlobalObject *GO = GA.getAliaseeObject())
942 AliasedGlobals.insert(V: GO);
943
944 // In this function we need IR GlobalValues matching the symbols in Syms
945 // (which is not backed by a module), so we need to enumerate them in the same
946 // order. The symbol enumeration order of a ModuleSymbolTable intentionally
947 // matches the order of an irsymtab, but when we read the irsymtab in
948 // InputFile::create we omit some symbols that are irrelevant to LTO. The
949 // Skip() function skips the same symbols from the module as InputFile does
950 // from the symbol table.
951 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
952 auto Skip = [&]() {
953 while (MsymI != MsymE) {
954 auto Flags = SymTab.getSymbolFlags(S: *MsymI);
955 if ((Flags & object::BasicSymbolRef::SF_Global) &&
956 !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
957 return;
958 ++MsymI;
959 }
960 };
961 Skip();
962
963 std::set<const Comdat *> NonPrevailingComdats;
964 SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
965 for (const InputFile::Symbol &Sym : Syms) {
966 assert(!Res.empty());
967 const SymbolResolution &R = Res.consume_front();
968
969 assert(MsymI != MsymE);
970 ModuleSymbolTable::Symbol Msym = *MsymI++;
971 Skip();
972
973 if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym)) {
974 if (R.Prevailing) {
975 if (Sym.isUndefined())
976 continue;
977 Mod.Keep.push_back(x: GV);
978 // For symbols re-defined with linker -wrap and -defsym options,
979 // set the linkage to weak to inhibit IPO. The linkage will be
980 // restored by the linker.
981 if (R.LinkerRedefined)
982 GV->setLinkage(GlobalValue::WeakAnyLinkage);
983
984 GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
985 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage))
986 GV->setLinkage(GlobalValue::getWeakLinkage(
987 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
988 } else if (isa<GlobalObject>(Val: GV) &&
989 (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
990 GV->hasAvailableExternallyLinkage()) &&
991 !AliasedGlobals.count(V: cast<GlobalObject>(Val: GV))) {
992 // Any of the above three types of linkage indicates that the
993 // chosen prevailing symbol will have the same semantics as this copy of
994 // the symbol, so we may be able to link it with available_externally
995 // linkage. We will decide later whether to do that when we link this
996 // module (in linkRegularLTO), based on whether it is undefined.
997 Mod.Keep.push_back(x: GV);
998 GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
999 if (GV->hasComdat())
1000 NonPrevailingComdats.insert(x: GV->getComdat());
1001 cast<GlobalObject>(Val: GV)->setComdat(nullptr);
1002 }
1003
1004 // Set the 'local' flag based on the linker resolution for this symbol.
1005 if (R.FinalDefinitionInLinkageUnit) {
1006 GV->setDSOLocal(true);
1007 if (GV->hasDLLImportStorageClass())
1008 GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
1009 DefaultStorageClass);
1010 }
1011 } else if (auto *AS =
1012 dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Val&: Msym)) {
1013 // Collect non-prevailing symbols.
1014 if (!R.Prevailing)
1015 NonPrevailingAsmSymbols.insert(V: AS->first);
1016 } else {
1017 llvm_unreachable("unknown symbol type");
1018 }
1019
1020 // Common resolution: collect the maximum size/alignment over all commons.
1021 // We also record if we see an instance of a common as prevailing, so that
1022 // if none is prevailing we can ignore it later.
1023 if (Sym.isCommon()) {
1024 // FIXME: We should figure out what to do about commons defined by asm.
1025 // For now they aren't reported correctly by ModuleSymbolTable.
1026 auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
1027 CommonRes.Size = std::max(a: CommonRes.Size, b: Sym.getCommonSize());
1028 if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
1029 CommonRes.Alignment =
1030 std::max(a: Align(SymAlignValue), b: CommonRes.Alignment);
1031 }
1032 CommonRes.Prevailing |= R.Prevailing;
1033 }
1034 }
1035
1036 if (!M.getComdatSymbolTable().empty())
1037 for (GlobalValue &GV : M.global_values())
1038 handleNonPrevailingComdat(GV, NonPrevailingComdats);
1039
1040 // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
1041 // block.
1042 if (!M.getModuleInlineAsm().empty()) {
1043 std::string NewIA = ".lto_discard";
1044 if (!NonPrevailingAsmSymbols.empty()) {
1045 // Don't dicard a symbol if there is a live .symver for it.
1046 ModuleSymbolTable::CollectAsmSymvers(
1047 M, AsmSymver: [&](StringRef Name, StringRef Alias) {
1048 if (!NonPrevailingAsmSymbols.count(V: Alias))
1049 NonPrevailingAsmSymbols.erase(V: Name);
1050 });
1051 NewIA += " " + llvm::join(R&: NonPrevailingAsmSymbols, Separator: ", ");
1052 }
1053 NewIA += "\n";
1054 M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
1055 }
1056
1057 assert(MsymI == MsymE);
1058 return std::make_pair(x: std::move(Mod), y&: Res);
1059}
1060
1061Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
1062 bool LivenessFromIndex) {
1063 llvm::TimeTraceScope timeScope("LTO link regular LTO");
1064 std::vector<GlobalValue *> Keep;
1065 for (GlobalValue *GV : Mod.Keep) {
1066 if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GUID: GV->getGUID())) {
1067 if (Function *F = dyn_cast<Function>(Val: GV)) {
1068 if (DiagnosticOutputFile) {
1069 if (Error Err = F->materialize())
1070 return Err;
1071 OptimizationRemarkEmitter ORE(F, nullptr);
1072 ORE.emit(OptDiag: OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
1073 << ore::NV("Function", F)
1074 << " not added to the combined module ");
1075 }
1076 }
1077 continue;
1078 }
1079
1080 if (!GV->hasAvailableExternallyLinkage()) {
1081 Keep.push_back(x: GV);
1082 continue;
1083 }
1084
1085 // Only link available_externally definitions if we don't already have a
1086 // definition.
1087 GlobalValue *CombinedGV =
1088 RegularLTO.CombinedModule->getNamedValue(Name: GV->getName());
1089 if (CombinedGV && !CombinedGV->isDeclaration())
1090 continue;
1091
1092 Keep.push_back(x: GV);
1093 }
1094
1095 return RegularLTO.Mover->move(Src: std::move(Mod.M), ValuesToLink: Keep, AddLazyFor: nullptr,
1096 /* IsPerformingImport */ false);
1097}
1098
1099// Add a ThinLTO module to the link.
1100Expected<ArrayRef<SymbolResolution>>
1101LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
1102 ArrayRef<SymbolResolution> Res) {
1103 llvm::TimeTraceScope timeScope("LTO add thin LTO");
1104 const auto BMID = BM.getModuleIdentifier();
1105 ArrayRef<SymbolResolution> ResTmp = Res;
1106 for (const InputFile::Symbol &Sym : Syms) {
1107 assert(!ResTmp.empty());
1108 const SymbolResolution &R = ResTmp.consume_front();
1109
1110 if (!Sym.getIRName().empty() && R.Prevailing) {
1111 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1112 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1113 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1114 ThinLTO.setPrevailingModuleForGUID(GUID, Module: BMID);
1115 }
1116 }
1117
1118 if (Error Err = BM.readSummary(
1119 CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: BMID, IsPrevailing: [&](GlobalValue::GUID GUID) {
1120 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: BMID);
1121 }))
1122 return Err;
1123 LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");
1124
1125 for (const InputFile::Symbol &Sym : Syms) {
1126 assert(!Res.empty());
1127 const SymbolResolution &R = Res.consume_front();
1128
1129 if (!Sym.getIRName().empty() &&
1130 (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {
1131 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1132 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1133 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1134 if (R.Prevailing) {
1135 assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));
1136
1137 // For linker redefined symbols (via --wrap or --defsym) we want to
1138 // switch the linkage to `weak` to prevent IPOs from happening.
1139 // Find the summary in the module for this very GV and record the new
1140 // linkage so that we can switch it when we import the GV.
1141 if (R.LinkerRedefined)
1142 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID))
1143 S->setLinkage(GlobalValue::WeakAnyLinkage);
1144 }
1145
1146 // If the linker resolved the symbol to a local definition then mark it
1147 // as local in the summary for the module we are adding.
1148 if (R.FinalDefinitionInLinkageUnit) {
1149 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID)) {
1150 S->setDSOLocal(true);
1151 }
1152 }
1153 }
1154 }
1155
1156 if (!ThinLTO.ModuleMap.insert(KV: {BMID, BM}).second)
1157 return make_error<StringError>(
1158 Args: "Expected at most one ThinLTO module per bitcode file",
1159 Args: inconvertibleErrorCode());
1160
1161 if (!Conf.ThinLTOModulesToCompile.empty()) {
1162 if (!ThinLTO.ModulesToCompile)
1163 ThinLTO.ModulesToCompile = ModuleMapType();
1164 // This is a fuzzy name matching where only modules with name containing the
1165 // specified switch values are going to be compiled.
1166 for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
1167 if (BMID.contains(Other: Name)) {
1168 ThinLTO.ModulesToCompile->insert(KV: {BMID, BM});
1169 LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n");
1170 break;
1171 }
1172 }
1173 }
1174
1175 return Res;
1176}
1177
1178unsigned LTO::getMaxTasks() const {
1179 CalledGetMaxTasks = true;
1180 auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size()
1181 : ThinLTO.ModuleMap.size();
1182 return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount;
1183}
1184
1185// If only some of the modules were split, we cannot correctly handle
1186// code that contains type tests or type checked loads.
1187Error LTO::checkPartiallySplit() {
1188 if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
1189 return Error::success();
1190
1191 const Module *Combined = RegularLTO.CombinedModule.get();
1192 Function *TypeTestFunc =
1193 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_test);
1194 Function *TypeCheckedLoadFunc =
1195 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_checked_load);
1196 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
1197 M: Combined, id: Intrinsic::type_checked_load_relative);
1198
1199 // First check if there are type tests / type checked loads in the
1200 // merged regular LTO module IR.
1201 if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
1202 (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
1203 (TypeCheckedLoadRelativeFunc &&
1204 !TypeCheckedLoadRelativeFunc->use_empty()))
1205 return make_error<StringError>(
1206 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1207 Args: inconvertibleErrorCode());
1208
1209 // Otherwise check if there are any recorded in the combined summary from the
1210 // ThinLTO modules.
1211 for (auto &P : ThinLTO.CombinedIndex) {
1212 for (auto &S : P.second.getSummaryList()) {
1213 auto *FS = dyn_cast<FunctionSummary>(Val: S.get());
1214 if (!FS)
1215 continue;
1216 if (!FS->type_test_assume_vcalls().empty() ||
1217 !FS->type_checked_load_vcalls().empty() ||
1218 !FS->type_test_assume_const_vcalls().empty() ||
1219 !FS->type_checked_load_const_vcalls().empty() ||
1220 !FS->type_tests().empty())
1221 return make_error<StringError>(
1222 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1223 Args: inconvertibleErrorCode());
1224 }
1225 }
1226 return Error::success();
1227}
1228
1229Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
1230 llvm::scope_exit CleanUp([this]() { cleanup(); });
1231
1232 if (Error EC = handleArchiveInputs())
1233 return EC;
1234
1235 // Compute "dead" symbols, we don't want to import/export these!
1236 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1237 DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
1238 for (auto &Res : *GlobalResolutions) {
1239 // Normally resolution have IR name of symbol. We can do nothing here
1240 // otherwise. See comments in GlobalResolution struct for more details.
1241 if (Res.second.IRName.empty())
1242 continue;
1243
1244 GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1245 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
1246
1247 if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
1248 GUIDPreservedSymbols.insert(V: GUID);
1249
1250 if (Res.second.ExportDynamic)
1251 DynamicExportSymbols.insert(V: GUID);
1252
1253 GUIDPrevailingResolutions[GUID] =
1254 Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
1255 }
1256
1257 auto isPrevailing = [&](GlobalValue::GUID G) {
1258 auto It = GUIDPrevailingResolutions.find(Val: G);
1259 if (It == GUIDPrevailingResolutions.end())
1260 return PrevailingType::Unknown;
1261 return It->second;
1262 };
1263 computeDeadSymbolsWithConstProp(Index&: ThinLTO.CombinedIndex, GUIDPreservedSymbols,
1264 isPrevailing, ImportEnabled: Conf.OptLevel > 0);
1265
1266 // Setup output file to emit statistics.
1267 auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
1268 if (!StatsFileOrErr)
1269 return StatsFileOrErr.takeError();
1270 std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
1271
1272 // TODO: Ideally this would be controlled automatically by detecting that we
1273 // are linking with an allocator that supports these interfaces, rather than
1274 // an internal option (which would still be needed for tests, however). For
1275 // example, if the library exported a symbol like __malloc_hot_cold the linker
1276 // could recognize that and set a flag in the lto::Config.
1277 if (SupportsHotColdNew)
1278 ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
1279
1280 Error Result = runRegularLTO(AddStream);
1281 if (!Result)
1282 // This will reset the GlobalResolutions optional once done with it to
1283 // reduce peak memory before importing.
1284 Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
1285
1286 if (StatsFile)
1287 PrintStatisticsJSON(OS&: StatsFile->os());
1288
1289 return Result;
1290}
1291
1292Error LTO::runRegularLTO(AddStreamFn AddStream) {
1293 llvm::TimeTraceScope timeScope("Run regular LTO");
1294 LLVMContext &CombinedCtx = RegularLTO.CombinedModule->getContext();
1295 // Setup optimization remarks.
1296 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
1297 Context&: CombinedCtx, RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, RemarksFormat: Conf.RemarksFormat,
1298 RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold);
1299 LLVM_DEBUG(dbgs() << "Running regular LTO\n");
1300 if (!DiagFileOrErr)
1301 return DiagFileOrErr.takeError();
1302 DiagnosticOutputFile = std::move(*DiagFileOrErr);
1303
1304 // Finalize linking of regular LTO modules containing summaries now that
1305 // we have computed liveness information.
1306 {
1307 llvm::TimeTraceScope timeScope("Link regular LTO");
1308 for (auto &M : RegularLTO.ModsWithSummaries)
1309 if (Error Err = linkRegularLTO(Mod: std::move(M), /*LivenessFromIndex=*/true))
1310 return Err;
1311 }
1312
1313 // Ensure we don't have inconsistently split LTO units with type tests.
1314 // FIXME: this checks both LTO and ThinLTO. It happens to work as we take
1315 // this path both cases but eventually this should be split into two and
1316 // do the ThinLTO checks in `runThinLTO`.
1317 if (Error Err = checkPartiallySplit())
1318 return Err;
1319
1320 // Make sure commons have the right size/alignment: we kept the largest from
1321 // all the prevailing when adding the inputs, and we apply it here.
1322 const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
1323 for (auto &I : RegularLTO.Commons) {
1324 if (!I.second.Prevailing)
1325 // Don't do anything if no instance of this common was prevailing.
1326 continue;
1327 GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(Name: I.first);
1328 if (OldGV && OldGV->getGlobalSize(DL) == I.second.Size) {
1329 // Don't create a new global if the type is already correct, just make
1330 // sure the alignment is correct.
1331 OldGV->setAlignment(I.second.Alignment);
1332 continue;
1333 }
1334 ArrayType *Ty =
1335 ArrayType::get(ElementType: Type::getInt8Ty(C&: RegularLTO.Ctx), NumElements: I.second.Size);
1336 auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
1337 GlobalValue::CommonLinkage,
1338 ConstantAggregateZero::get(Ty), "");
1339 GV->setAlignment(I.second.Alignment);
1340 if (OldGV) {
1341 OldGV->replaceAllUsesWith(V: GV);
1342 GV->takeName(V: OldGV);
1343 OldGV->eraseFromParent();
1344 } else {
1345 GV->setName(I.first);
1346 }
1347 }
1348
1349 bool WholeProgramVisibilityEnabledInLTO =
1350 Conf.HasWholeProgramVisibility &&
1351 // If validation is enabled, upgrade visibility only when all vtables
1352 // have typeinfos.
1353 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1354
1355 // This returns true when the name is local or not defined. Locals are
1356 // expected to be handled separately.
1357 auto IsVisibleToRegularObj = [&](StringRef name) {
1358 auto It = GlobalResolutions->find(Val: name);
1359 return (It == GlobalResolutions->end() ||
1360 It->second.VisibleOutsideSummary || !It->second.Prevailing);
1361 };
1362
1363 // If allowed, upgrade public vcall visibility metadata to linkage unit
1364 // visibility before whole program devirtualization in the optimizer.
1365 updateVCallVisibilityInModule(
1366 M&: *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
1367 DynamicExportSymbols, ValidateAllVtablesHaveTypeInfos: Conf.ValidateAllVtablesHaveTypeInfos,
1368 IsVisibleToRegularObj);
1369 updatePublicTypeTestCalls(M&: *RegularLTO.CombinedModule,
1370 WholeProgramVisibilityEnabledInLTO);
1371
1372 if (Conf.PreOptModuleHook &&
1373 !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
1374 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1375
1376 if (!Conf.CodeGenOnly) {
1377 for (const auto &R : *GlobalResolutions) {
1378 GlobalValue *GV =
1379 RegularLTO.CombinedModule->getNamedValue(Name: R.second.IRName);
1380 if (!R.second.isPrevailingIRSymbol())
1381 continue;
1382 if (R.second.Partition != 0 &&
1383 R.second.Partition != GlobalResolution::External)
1384 continue;
1385
1386 // Ignore symbols defined in other partitions.
1387 // Also skip declarations, which are not allowed to have internal linkage.
1388 if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
1389 continue;
1390
1391 // Symbols that are marked DLLImport or DLLExport should not be
1392 // internalized, as they are either externally visible or referencing
1393 // external symbols. Symbols that have AvailableExternally or Appending
1394 // linkage might be used by future passes and should be kept as is.
1395 // These linkages are seen in Unified regular LTO, because the process
1396 // of creating split LTO units introduces symbols with that linkage into
1397 // one of the created modules. Normally, only the ThinLTO backend would
1398 // compile this module, but Unified Regular LTO processes both
1399 // modules created by the splitting process as regular LTO modules.
1400 if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
1401 ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
1402 GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
1403 continue;
1404
1405 GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
1406 : GlobalValue::UnnamedAddr::None);
1407 if (EnableLTOInternalization && R.second.Partition == 0)
1408 GV->setLinkage(GlobalValue::InternalLinkage);
1409 }
1410
1411 if (Conf.PostInternalizeModuleHook &&
1412 !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
1413 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1414 }
1415
1416 if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
1417 if (Error Err =
1418 backend(C: Conf, AddStream, ParallelCodeGenParallelismLevel: RegularLTO.ParallelCodeGenParallelismLevel,
1419 M&: *RegularLTO.CombinedModule, CombinedIndex&: ThinLTO.CombinedIndex))
1420 return Err;
1421 }
1422
1423 return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile));
1424}
1425
1426SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
1427 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
1428 SmallVector<const char *> LibcallSymbols;
1429 LibcallSymbols.reserve(N: Libcalls.getNumAvailableLibcallImpls());
1430
1431 for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) {
1432 if (Libcalls.isAvailable(Impl))
1433 LibcallSymbols.push_back(Elt: Libcalls.getLibcallImplName(CallImpl: Impl).data());
1434 }
1435
1436 return LibcallSymbols;
1437}
1438
1439Error ThinBackendProc::emitFiles(
1440 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1441 const std::string &NewModulePath) const {
1442 return emitFiles(ImportList, ModulePath, NewModulePath,
1443 SummaryPath: NewModulePath + ".thinlto.bc",
1444 /*ImportsFiles=*/std::nullopt);
1445}
1446
1447Error ThinBackendProc::emitFiles(
1448 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1449 const std::string &NewModulePath, StringRef SummaryPath,
1450 std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
1451 const {
1452 ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
1453 GVSummaryPtrSet DeclarationSummaries;
1454
1455 std::error_code EC;
1456 gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
1457 ImportList, ModuleToSummariesForIndex,
1458 DecSummaries&: DeclarationSummaries);
1459
1460 raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
1461 if (EC)
1462 return createFileError(F: "cannot open " + Twine(SummaryPath), EC);
1463
1464 writeIndexToFile(Index: CombinedIndex, Out&: OS, ModuleToSummariesForIndex: &ModuleToSummariesForIndex,
1465 DecSummaries: &DeclarationSummaries);
1466
1467 if (ShouldEmitImportsFiles) {
1468 Error ImportsFilesError = EmitImportsFiles(
1469 ModulePath, OutputFilename: NewModulePath + ".imports", ModuleToSummariesForIndex);
1470 if (ImportsFilesError)
1471 return ImportsFilesError;
1472 }
1473
1474 // Optionally, store the imports files.
1475 if (ImportsFiles)
1476 processImportsFiles(
1477 ModulePath, ModuleToSummariesForIndex,
1478 F: [&](StringRef M) { ImportsFiles->get().push_back(Elt: M.str()); });
1479
1480 return Error::success();
1481}
1482
1483namespace {
1484/// Base class for ThinLTO backends that perform code generation and insert the
1485/// generated files back into the link.
1486class CGThinBackend : public ThinBackendProc {
1487protected:
1488 AddStreamFn AddStream;
1489 DenseSet<GlobalValue::GUID> CfiFunctionDefs;
1490 DenseSet<GlobalValue::GUID> CfiFunctionDecls;
1491 bool ShouldEmitIndexFiles;
1492
1493public:
1494 CGThinBackend(
1495 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1496 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1497 AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
1498 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
1499 ThreadPoolStrategy ThinLTOParallelism)
1500 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1501 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1502 AddStream(std::move(AddStream)),
1503 ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
1504 auto &Defs = CombinedIndex.cfiFunctionDefs();
1505 CfiFunctionDefs.insert_range(R: Defs.guids());
1506 auto &Decls = CombinedIndex.cfiFunctionDecls();
1507 CfiFunctionDecls.insert_range(R: Decls.guids());
1508 }
1509};
1510
1511/// This backend performs code generation by scheduling a job to run on
1512/// an in-process thread when invoked for each task.
1513class InProcessThinBackend : public CGThinBackend {
1514protected:
1515 FileCache Cache;
1516
1517public:
1518 InProcessThinBackend(
1519 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1520 ThreadPoolStrategy ThinLTOParallelism,
1521 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1522 AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
1523 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
1524 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1525 AddStream, OnWrite, ShouldEmitIndexFiles,
1526 ShouldEmitImportsFiles, ThinLTOParallelism),
1527 Cache(std::move(Cache)) {}
1528
1529 virtual Error runThinLTOBackendThread(
1530 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1531 ModuleSummaryIndex &CombinedIndex,
1532 const FunctionImporter::ImportMapTy &ImportList,
1533 const FunctionImporter::ExportSetTy &ExportList,
1534 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1535 const GVSummaryMapTy &DefinedGlobals,
1536 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1537 auto ModuleID = BM.getModuleIdentifier();
1538 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (in-process)",
1539 ModuleID);
1540 auto RunThinBackend = [&](AddStreamFn AddStream) {
1541 LTOLLVMContext BackendContext(Conf);
1542 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1543 if (!MOrErr)
1544 return MOrErr.takeError();
1545
1546 return thinBackend(C: Conf, Task, AddStream, M&: **MOrErr, CombinedIndex,
1547 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1548 CodeGenOnly: Conf.CodeGenOnly);
1549 };
1550 if (ShouldEmitIndexFiles) {
1551 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1552 return E;
1553 }
1554
1555 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1556 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1557 P: [](uint32_t V) { return V == 0; }))
1558 // Cache disabled or no entry for this module in the combined index or
1559 // no module hash.
1560 return RunThinBackend(AddStream);
1561
1562 // The module may be cached, this helps handling it.
1563 std::string Key = computeLTOCacheKey(
1564 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1565 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1566 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1567 if (Error Err = CacheAddStreamOrErr.takeError())
1568 return Err;
1569 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1570 if (CacheAddStream)
1571 return RunThinBackend(CacheAddStream);
1572
1573 return Error::success();
1574 }
1575
1576 Error start(
1577 unsigned Task, BitcodeModule BM,
1578 const FunctionImporter::ImportMapTy &ImportList,
1579 const FunctionImporter::ExportSetTy &ExportList,
1580 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1581 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1582 StringRef ModulePath = BM.getModuleIdentifier();
1583 assert(ModuleToDefinedGVSummaries.count(ModulePath));
1584 const GVSummaryMapTy &DefinedGlobals =
1585 ModuleToDefinedGVSummaries.find(Val: ModulePath)->second;
1586 BackendThreadPool.async(
1587 F: [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1588 const FunctionImporter::ImportMapTy &ImportList,
1589 const FunctionImporter::ExportSetTy &ExportList,
1590 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
1591 &ResolvedODR,
1592 const GVSummaryMapTy &DefinedGlobals,
1593 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1594 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1595 timeTraceProfilerInitialize(TimeTraceGranularity: Conf.TimeTraceGranularity,
1596 ProcName: "thin backend");
1597 Error E = runThinLTOBackendThread(
1598 AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
1599 ResolvedODR, DefinedGlobals, ModuleMap);
1600 if (E) {
1601 std::unique_lock<std::mutex> L(ErrMu);
1602 if (Err)
1603 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1604 else
1605 Err = std::move(E);
1606 }
1607 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1608 timeTraceProfilerFinishThread();
1609 },
1610 ArgList&: BM, ArgList: std::ref(t&: CombinedIndex), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
1611 ArgList: std::ref(t: ResolvedODR), ArgList: std::ref(t: DefinedGlobals), ArgList: std::ref(t&: ModuleMap));
1612
1613 if (OnWrite)
1614 OnWrite(std::string(ModulePath));
1615 return Error::success();
1616 }
1617};
1618
1619/// This backend is utilized in the first round of a two-codegen round process.
1620/// It first saves optimized bitcode files to disk before the codegen process
1621/// begins. After codegen, it stores the resulting object files in a scratch
1622/// buffer. Note the codegen data stored in the scratch buffer will be extracted
1623/// and merged in the subsequent step.
1624class FirstRoundThinBackend : public InProcessThinBackend {
1625 AddStreamFn IRAddStream;
1626 FileCache IRCache;
1627
1628public:
1629 FirstRoundThinBackend(
1630 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1631 ThreadPoolStrategy ThinLTOParallelism,
1632 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1633 AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream,
1634 FileCache IRCache)
1635 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1636 ModuleToDefinedGVSummaries, std::move(CGAddStream),
1637 std::move(CGCache), /*OnWrite=*/nullptr,
1638 /*ShouldEmitIndexFiles=*/false,
1639 /*ShouldEmitImportsFiles=*/false),
1640 IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {}
1641
1642 Error runThinLTOBackendThread(
1643 AddStreamFn CGAddStream, FileCache CGCache, unsigned Task,
1644 BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1645 const FunctionImporter::ImportMapTy &ImportList,
1646 const FunctionImporter::ExportSetTy &ExportList,
1647 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1648 const GVSummaryMapTy &DefinedGlobals,
1649 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1650 auto ModuleID = BM.getModuleIdentifier();
1651 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (first round)",
1652 ModuleID);
1653 auto RunThinBackend = [&](AddStreamFn CGAddStream,
1654 AddStreamFn IRAddStream) {
1655 LTOLLVMContext BackendContext(Conf);
1656 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1657 if (!MOrErr)
1658 return MOrErr.takeError();
1659
1660 return thinBackend(C: Conf, Task, AddStream: CGAddStream, M&: **MOrErr, CombinedIndex,
1661 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1662 CodeGenOnly: Conf.CodeGenOnly, IRAddStream);
1663 };
1664 // Like InProcessThinBackend, we produce index files as needed for
1665 // FirstRoundThinBackend. However, these files are not generated for
1666 // SecondRoundThinBackend.
1667 if (ShouldEmitIndexFiles) {
1668 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1669 return E;
1670 }
1671
1672 assert((CGCache.isValid() == IRCache.isValid()) &&
1673 "Both caches for CG and IR should have matching availability");
1674 if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1675 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1676 P: [](uint32_t V) { return V == 0; }))
1677 // Cache disabled or no entry for this module in the combined index or
1678 // no module hash.
1679 return RunThinBackend(CGAddStream, IRAddStream);
1680
1681 // Get CGKey for caching object in CGCache.
1682 std::string CGKey = computeLTOCacheKey(
1683 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1684 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1685 Expected<AddStreamFn> CacheCGAddStreamOrErr =
1686 CGCache(Task, CGKey, ModuleID);
1687 if (Error Err = CacheCGAddStreamOrErr.takeError())
1688 return Err;
1689 AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr;
1690
1691 // Get IRKey for caching (optimized) IR in IRCache with an extra ID.
1692 std::string IRKey = recomputeLTOCacheKey(Key: CGKey, /*ExtraID=*/"IR");
1693 Expected<AddStreamFn> CacheIRAddStreamOrErr =
1694 IRCache(Task, IRKey, ModuleID);
1695 if (Error Err = CacheIRAddStreamOrErr.takeError())
1696 return Err;
1697 AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr;
1698
1699 // Ideally, both CG and IR caching should be synchronized. However, in
1700 // practice, their availability may differ due to different expiration
1701 // times. Therefore, if either cache is missing, the backend process is
1702 // triggered.
1703 if (CacheCGAddStream || CacheIRAddStream) {
1704 LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for "
1705 << BM.getModuleIdentifier() << "\n");
1706 return RunThinBackend(CacheCGAddStream ? CacheCGAddStream : CGAddStream,
1707 CacheIRAddStream ? CacheIRAddStream : IRAddStream);
1708 }
1709
1710 return Error::success();
1711 }
1712};
1713
1714/// This backend operates in the second round of a two-codegen round process.
1715/// It starts by reading the optimized bitcode files that were saved during the
1716/// first round. The backend then executes the codegen only to further optimize
1717/// the code, utilizing the codegen data merged from the first round. Finally,
1718/// it writes the resulting object files as usual.
1719class SecondRoundThinBackend : public InProcessThinBackend {
1720 std::unique_ptr<SmallVector<StringRef>> IRFiles;
1721 stable_hash CombinedCGDataHash;
1722
1723public:
1724 SecondRoundThinBackend(
1725 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1726 ThreadPoolStrategy ThinLTOParallelism,
1727 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1728 AddStreamFn AddStream, FileCache Cache,
1729 std::unique_ptr<SmallVector<StringRef>> IRFiles,
1730 stable_hash CombinedCGDataHash)
1731 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1732 ModuleToDefinedGVSummaries, std::move(AddStream),
1733 std::move(Cache),
1734 /*OnWrite=*/nullptr,
1735 /*ShouldEmitIndexFiles=*/false,
1736 /*ShouldEmitImportsFiles=*/false),
1737 IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {}
1738
1739 Error runThinLTOBackendThread(
1740 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1741 ModuleSummaryIndex &CombinedIndex,
1742 const FunctionImporter::ImportMapTy &ImportList,
1743 const FunctionImporter::ExportSetTy &ExportList,
1744 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1745 const GVSummaryMapTy &DefinedGlobals,
1746 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1747 auto ModuleID = BM.getModuleIdentifier();
1748 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (second round)",
1749 ModuleID);
1750 auto RunThinBackend = [&](AddStreamFn AddStream) {
1751 LTOLLVMContext BackendContext(Conf);
1752 std::unique_ptr<Module> LoadedModule =
1753 cgdata::loadModuleForTwoRounds(OrigModule&: BM, Task, Context&: BackendContext, IRFiles: *IRFiles);
1754
1755 return thinBackend(C: Conf, Task, AddStream, M&: *LoadedModule, CombinedIndex,
1756 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1757 /*CodeGenOnly=*/true);
1758 };
1759 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1760 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1761 P: [](uint32_t V) { return V == 0; }))
1762 // Cache disabled or no entry for this module in the combined index or
1763 // no module hash.
1764 return RunThinBackend(AddStream);
1765
1766 // Get Key for caching the final object file in Cache with the combined
1767 // CGData hash.
1768 std::string Key = computeLTOCacheKey(
1769 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1770 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1771 Key = recomputeLTOCacheKey(Key,
1772 /*ExtraID=*/std::to_string(val: CombinedCGDataHash));
1773 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1774 if (Error Err = CacheAddStreamOrErr.takeError())
1775 return Err;
1776 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1777
1778 if (CacheAddStream) {
1779 LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for "
1780 << BM.getModuleIdentifier() << "\n");
1781 return RunThinBackend(CacheAddStream);
1782 }
1783
1784 return Error::success();
1785 }
1786};
1787} // end anonymous namespace
1788
1789ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
1790 lto::IndexWriteCallback OnWrite,
1791 bool ShouldEmitIndexFiles,
1792 bool ShouldEmitImportsFiles) {
1793 auto Func =
1794 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1795 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1796 AddStreamFn AddStream, FileCache Cache) {
1797 return std::make_unique<InProcessThinBackend>(
1798 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1799 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
1800 args: ShouldEmitImportsFiles);
1801 };
1802 return ThinBackend(Func, Parallelism);
1803}
1804
1805StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
1806 if (!TheTriple.isOSDarwin())
1807 return "";
1808 if (TheTriple.getArch() == Triple::x86_64)
1809 return "core2";
1810 if (TheTriple.getArch() == Triple::x86)
1811 return "yonah";
1812 if (TheTriple.isArm64e())
1813 return "apple-a12";
1814 if (TheTriple.getArch() == Triple::aarch64 ||
1815 TheTriple.getArch() == Triple::aarch64_32)
1816 return "cyclone";
1817 return "";
1818}
1819
1820// Given the original \p Path to an output file, replace any path
1821// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
1822// resulting directory if it does not yet exist.
1823std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
1824 StringRef NewPrefix) {
1825 if (OldPrefix.empty() && NewPrefix.empty())
1826 return std::string(Path);
1827 SmallString<128> NewPath(Path);
1828 llvm::sys::path::replace_path_prefix(Path&: NewPath, OldPrefix, NewPrefix);
1829 StringRef ParentPath = llvm::sys::path::parent_path(path: NewPath.str());
1830 if (!ParentPath.empty()) {
1831 // Make sure the new directory exists, creating it if necessary.
1832 if (std::error_code EC = llvm::sys::fs::create_directories(path: ParentPath))
1833 llvm::errs() << "warning: could not create directory '" << ParentPath
1834 << "': " << EC.message() << '\n';
1835 }
1836 return std::string(NewPath);
1837}
1838
1839namespace {
1840class WriteIndexesThinBackend : public ThinBackendProc {
1841 std::string OldPrefix, NewPrefix, NativeObjectPrefix;
1842 raw_fd_ostream *LinkedObjectsFile;
1843
1844public:
1845 WriteIndexesThinBackend(
1846 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1847 ThreadPoolStrategy ThinLTOParallelism,
1848 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1849 std::string OldPrefix, std::string NewPrefix,
1850 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1851 raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
1852 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1853 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1854 OldPrefix(OldPrefix), NewPrefix(NewPrefix),
1855 NativeObjectPrefix(NativeObjectPrefix),
1856 LinkedObjectsFile(LinkedObjectsFile) {}
1857
1858 Error start(
1859 unsigned Task, BitcodeModule BM,
1860 const FunctionImporter::ImportMapTy &ImportList,
1861 const FunctionImporter::ExportSetTy &ExportList,
1862 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1863 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1864 StringRef ModulePath = BM.getModuleIdentifier();
1865
1866 // The contents of this file may be used as input to a native link, and must
1867 // therefore contain the processed modules in a determinstic order that
1868 // match the order they are provided on the command line. For that reason,
1869 // we cannot include this in the asynchronously executed lambda below.
1870 if (LinkedObjectsFile) {
1871 std::string ObjectPrefix =
1872 NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
1873 std::string LinkedObjectsFilePath =
1874 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix: ObjectPrefix);
1875 *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
1876 }
1877
1878 BackendThreadPool.async(
1879 F: [this](const StringRef ModulePath,
1880 const FunctionImporter::ImportMapTy &ImportList,
1881 const std::string &OldPrefix, const std::string &NewPrefix) {
1882 std::string NewModulePath =
1883 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix);
1884 auto E = emitFiles(ImportList, ModulePath, NewModulePath);
1885 if (E) {
1886 std::unique_lock<std::mutex> L(ErrMu);
1887 if (Err)
1888 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1889 else
1890 Err = std::move(E);
1891 return;
1892 }
1893 },
1894 ArgList&: ModulePath, ArgList: ImportList, ArgList&: OldPrefix, ArgList&: NewPrefix);
1895
1896 if (OnWrite)
1897 OnWrite(std::string(ModulePath));
1898 return Error::success();
1899 }
1900
1901 bool isSensitiveToInputOrder() override {
1902 // The order which modules are written to LinkedObjectsFile should be
1903 // deterministic and match the order they are passed on the command line.
1904 return true;
1905 }
1906};
1907} // end anonymous namespace
1908
1909ThinBackend lto::createWriteIndexesThinBackend(
1910 ThreadPoolStrategy Parallelism, std::string OldPrefix,
1911 std::string NewPrefix, std::string NativeObjectPrefix,
1912 bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
1913 IndexWriteCallback OnWrite) {
1914 auto Func =
1915 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1916 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1917 AddStreamFn AddStream, FileCache Cache) {
1918 return std::make_unique<WriteIndexesThinBackend>(
1919 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1920 args: OldPrefix, args: NewPrefix, args: NativeObjectPrefix, args: ShouldEmitImportsFiles,
1921 args: LinkedObjectsFile, args: OnWrite);
1922 };
1923 return ThinBackend(Func, Parallelism);
1924}
1925
1926Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
1927 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1928 llvm::TimeTraceScope timeScope("Run ThinLTO");
1929 LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
1930 ThinLTO.CombinedIndex.releaseTemporaryMemory();
1931 timeTraceProfilerBegin(Name: "ThinLink", Detail: StringRef(""));
1932 llvm::scope_exit TimeTraceScopeExit([]() {
1933 if (llvm::timeTraceProfilerEnabled())
1934 llvm::timeTraceProfilerEnd();
1935 });
1936 if (ThinLTO.ModuleMap.empty())
1937 return Error::success();
1938
1939 if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) {
1940 llvm::errs() << "warning: [ThinLTO] No module compiled\n";
1941 return Error::success();
1942 }
1943
1944 if (Conf.CombinedIndexHook &&
1945 !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
1946 return Error::success();
1947
1948 // Collect for each module the list of function it defines (GUID ->
1949 // Summary).
1950 DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
1951 ThinLTO.ModuleMap.size());
1952 ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
1953 ModuleToDefinedGVSummaries);
1954 // Create entries for any modules that didn't have any GV summaries
1955 // (either they didn't have any GVs to start with, or we suppressed
1956 // generation of the summaries because they e.g. had inline assembly
1957 // uses that couldn't be promoted/renamed on export). This is so
1958 // InProcessThinBackend::start can still launch a backend thread, which
1959 // is passed the map of summaries for the module, without any special
1960 // handling for this case.
1961 for (auto &Mod : ThinLTO.ModuleMap)
1962 if (!ModuleToDefinedGVSummaries.count(Val: Mod.first))
1963 ModuleToDefinedGVSummaries.try_emplace(Key: Mod.first);
1964
1965 FunctionImporter::ImportListsTy ImportLists(ThinLTO.ModuleMap.size());
1966 DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
1967 ThinLTO.ModuleMap.size());
1968 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
1969
1970 if (DumpThinCGSCCs)
1971 ThinLTO.CombinedIndex.dumpSCCs(OS&: outs());
1972
1973 std::set<GlobalValue::GUID> ExportedGUIDs;
1974
1975 bool WholeProgramVisibilityEnabledInLTO =
1976 Conf.HasWholeProgramVisibility &&
1977 // If validation is enabled, upgrade visibility only when all vtables
1978 // have typeinfos.
1979 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1980 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
1981 ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
1982
1983 // If we're validating, get the vtable symbols that should not be
1984 // upgraded because they correspond to typeIDs outside of index-based
1985 // WPD info.
1986 DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
1987 if (WholeProgramVisibilityEnabledInLTO &&
1988 Conf.ValidateAllVtablesHaveTypeInfos) {
1989 // This returns true when the name is local or not defined. Locals are
1990 // expected to be handled separately.
1991 auto IsVisibleToRegularObj = [&](StringRef name) {
1992 auto It = GlobalResolutions->find(Val: name);
1993 return (It == GlobalResolutions->end() ||
1994 It->second.VisibleOutsideSummary || !It->second.Prevailing);
1995 };
1996
1997 getVisibleToRegularObjVtableGUIDs(Index&: ThinLTO.CombinedIndex,
1998 VisibleToRegularObjSymbols,
1999 IsVisibleToRegularObj);
2000 }
2001
2002 // If allowed, upgrade public vcall visibility to linkage unit visibility in
2003 // the summaries before whole program devirtualization below.
2004 updateVCallVisibilityInIndex(
2005 Index&: ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
2006 DynamicExportSymbols, VisibleToRegularObjSymbols);
2007
2008 // Perform index-based WPD. This will return immediately if there are
2009 // no index entries in the typeIdMetadata map (e.g. if we are instead
2010 // performing IR-based WPD in hybrid regular/thin LTO mode).
2011 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
2012 runWholeProgramDevirtOnIndex(Summary&: ThinLTO.CombinedIndex, ExportedGUIDs,
2013 LocalWPDTargetsMap);
2014
2015 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
2016 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: S->modulePath());
2017 };
2018 if (EnableMemProfContextDisambiguation) {
2019 MemProfContextDisambiguation ContextDisambiguation;
2020 ContextDisambiguation.run(Index&: ThinLTO.CombinedIndex, isPrevailing);
2021 }
2022
2023 // Figure out which symbols need to be internalized. This also needs to happen
2024 // at -O0 because summary-based DCE is implemented using internalization, and
2025 // we must apply DCE consistently with the full LTO module in order to avoid
2026 // undefined references during the final link.
2027 for (auto &Res : *GlobalResolutions) {
2028 // If the symbol does not have external references or it is not prevailing,
2029 // then not need to mark it as exported from a ThinLTO partition.
2030 if (Res.second.Partition != GlobalResolution::External ||
2031 !Res.second.isPrevailingIRSymbol())
2032 continue;
2033 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
2034 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
2035 // Mark exported unless index-based analysis determined it to be dead.
2036 if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
2037 ExportedGUIDs.insert(x: GUID);
2038 }
2039
2040 // Reset the GlobalResolutions to deallocate the associated memory, as there
2041 // are no further accesses. We specifically want to do this before computing
2042 // cross module importing, which adds to peak memory via the computed import
2043 // and export lists.
2044 releaseGlobalResolutionsMemory();
2045
2046 if (Conf.OptLevel > 0)
2047 ComputeCrossModuleImport(Index: ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2048 isPrevailing, ImportLists, ExportLists);
2049
2050 // Any functions referenced by the jump table in the regular LTO object must
2051 // be exported.
2052 auto &Defs = ThinLTO.CombinedIndex.cfiFunctionDefs();
2053 ExportedGUIDs.insert(first: Defs.guid_begin(), last: Defs.guid_end());
2054 auto &Decls = ThinLTO.CombinedIndex.cfiFunctionDecls();
2055 ExportedGUIDs.insert(first: Decls.guid_begin(), last: Decls.guid_end());
2056
2057 auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
2058 const auto &ExportList = ExportLists.find(Val: ModuleIdentifier);
2059 return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) ||
2060 ExportedGUIDs.count(x: VI.getGUID());
2061 };
2062
2063 // Update local devirtualized targets that were exported by cross-module
2064 // importing or by other devirtualizations marked in the ExportedGUIDs set.
2065 updateIndexWPDForExports(Summary&: ThinLTO.CombinedIndex, isExported,
2066 LocalWPDTargetsMap);
2067
2068 thinLTOInternalizeAndPromoteInIndex(Index&: ThinLTO.CombinedIndex, isExported,
2069 isPrevailing);
2070
2071 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
2072 GlobalValue::GUID GUID,
2073 GlobalValue::LinkageTypes NewLinkage) {
2074 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
2075 };
2076 thinLTOResolvePrevailingInIndex(C: Conf, Index&: ThinLTO.CombinedIndex, isPrevailing,
2077 recordNewLinkage, GUIDPreservedSymbols);
2078
2079 thinLTOPropagateFunctionAttrs(Index&: ThinLTO.CombinedIndex, isPrevailing);
2080
2081 generateParamAccessSummary(Index&: ThinLTO.CombinedIndex);
2082
2083 if (llvm::timeTraceProfilerEnabled())
2084 llvm::timeTraceProfilerEnd();
2085
2086 TimeTraceScopeExit.release();
2087
2088 auto &ModuleMap =
2089 ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
2090
2091 auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error {
2092 auto ProcessOneModule = [&](int I) -> Error {
2093 auto &Mod = *(ModuleMap.begin() + I);
2094 // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
2095 // combined module and parallel code generation partitions.
2096 return BackendProcess->start(
2097 Task: RegularLTO.ParallelCodeGenParallelismLevel + I, BM: Mod.second,
2098 ImportList: ImportLists[Mod.first], ExportList: ExportLists[Mod.first],
2099 ResolvedODR: ResolvedODR[Mod.first], ModuleMap&: ThinLTO.ModuleMap);
2100 };
2101
2102 BackendProcess->setup(ThinLTONumTasks: ModuleMap.size(),
2103 ThinLTOTaskOffset: RegularLTO.ParallelCodeGenParallelismLevel,
2104 Triple: RegularLTO.CombinedModule->getTargetTriple());
2105
2106 if (BackendProcess->getThreadCount() == 1 ||
2107 BackendProcess->isSensitiveToInputOrder()) {
2108 // Process the modules in the order they were provided on the
2109 // command-line. It is important for this codepath to be used for
2110 // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
2111 // ThinLTO objects in the same order as the inputs, which otherwise would
2112 // affect the final link order.
2113 for (int I = 0, E = ModuleMap.size(); I != E; ++I)
2114 if (Error E = ProcessOneModule(I))
2115 return E;
2116 } else {
2117 // When executing in parallel, process largest bitsize modules first to
2118 // improve parallelism, and avoid starving the thread pool near the end.
2119 // This saves about 15 sec on a 36-core machine while link `clang.exe`
2120 // (out of 100 sec).
2121 std::vector<BitcodeModule *> ModulesVec;
2122 ModulesVec.reserve(n: ModuleMap.size());
2123 for (auto &Mod : ModuleMap)
2124 ModulesVec.push_back(x: &Mod.second);
2125 for (int I : generateModulesOrdering(R: ModulesVec))
2126 if (Error E = ProcessOneModule(I))
2127 return E;
2128 }
2129 return BackendProcess->wait();
2130 };
2131
2132 if (!CodeGenDataThinLTOTwoRounds) {
2133 std::unique_ptr<ThinBackendProc> BackendProc =
2134 ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2135 AddStream, Cache);
2136 return RunBackends(BackendProc.get());
2137 }
2138
2139 // Perform two rounds of code generation for ThinLTO:
2140 // 1. First round: Perform optimization and code generation, outputting to
2141 // temporary scratch objects.
2142 // 2. Merge code generation data extracted from the temporary scratch objects.
2143 // 3. Second round: Execute code generation again using the merged data.
2144 LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n");
2145
2146 unsigned MaxTasks = getMaxTasks();
2147 auto Parallelism = ThinLTO.Backend.getParallelism();
2148 // Set up two additional streams and caches for storing temporary scratch
2149 // objects and optimized IRs, using the same cache directory as the original.
2150 cgdata::StreamCacheData CG(MaxTasks, Cache, "CG"), IR(MaxTasks, Cache, "IR");
2151
2152 // First round: Execute optimization and code generation, outputting to
2153 // temporary scratch objects. Serialize the optimized IRs before initiating
2154 // code generation.
2155 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n");
2156 auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
2157 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2158 args&: CG.AddStream, args&: CG.Cache, args&: IR.AddStream, args&: IR.Cache);
2159 if (Error E = RunBackends(FirstRoundLTO.get()))
2160 return E;
2161
2162 LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n");
2163 auto CombinedHashOrErr = cgdata::mergeCodeGenData(ObjectFiles: *CG.getResult());
2164 if (Error E = CombinedHashOrErr.takeError())
2165 return E;
2166 auto CombinedHash = *CombinedHashOrErr;
2167 LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n");
2168
2169 // Second round: Read the optimized IRs and execute code generation using the
2170 // merged data.
2171 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n");
2172 auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>(
2173 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2174 args&: AddStream, args&: Cache, args: IR.getResult(), args&: CombinedHash);
2175 return RunBackends(SecondRoundLTO.get());
2176}
2177
2178Expected<LLVMRemarkFileHandle> lto::setupLLVMOptimizationRemarks(
2179 LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
2180 StringRef RemarksFormat, bool RemarksWithHotness,
2181 std::optional<uint64_t> RemarksHotnessThreshold, int Count) {
2182 std::string Filename = std::string(RemarksFilename);
2183 // For ThinLTO, file.opt.<format> becomes
2184 // file.opt.<format>.thin.<num>.<format>.
2185 if (!Filename.empty() && Count != -1)
2186 Filename =
2187 (Twine(Filename) + ".thin." + llvm::utostr(X: Count) + "." + RemarksFormat)
2188 .str();
2189
2190 auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
2191 Context, RemarksFilename: Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
2192 RemarksHotnessThreshold);
2193 if (Error E = ResultOrErr.takeError())
2194 return std::move(E);
2195
2196 if (*ResultOrErr)
2197 (*ResultOrErr)->keep();
2198
2199 return ResultOrErr;
2200}
2201
2202Expected<std::unique_ptr<ToolOutputFile>>
2203lto::setupStatsFile(StringRef StatsFilename) {
2204 // Setup output file to emit statistics.
2205 if (StatsFilename.empty())
2206 return nullptr;
2207
2208 llvm::EnableStatistics(DoPrintOnExit: false);
2209 std::error_code EC;
2210 auto StatsFile =
2211 std::make_unique<ToolOutputFile>(args&: StatsFilename, args&: EC, args: sys::fs::OF_None);
2212 if (EC)
2213 return errorCodeToError(EC);
2214
2215 StatsFile->keep();
2216 return std::move(StatsFile);
2217}
2218
2219// Compute the ordering we will process the inputs: the rough heuristic here
2220// is to sort them per size so that the largest module get schedule as soon as
2221// possible. This is purely a compile-time optimization.
2222std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
2223 auto Seq = llvm::seq<int>(Begin: 0, End: R.size());
2224 std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
2225 llvm::sort(C&: ModulesOrdering, Comp: [&](int LeftIndex, int RightIndex) {
2226 auto LSize = R[LeftIndex]->getBuffer().size();
2227 auto RSize = R[RightIndex]->getBuffer().size();
2228 return LSize > RSize;
2229 });
2230 return ModulesOrdering;
2231}
2232
2233namespace {
2234/// This out-of-process backend does not perform code generation when invoked
2235/// for each task. Instead, it generates the necessary information (e.g., the
2236/// summary index shard, import list, etc.) to enable code generation to be
2237/// performed externally, similar to WriteIndexesThinBackend. The backend's
2238/// `wait` function then invokes an external distributor process to carry out
2239/// the backend compilations.
2240class OutOfProcessThinBackend : public CGThinBackend {
2241 using SString = SmallString<128>;
2242
2243 BumpPtrAllocator Alloc;
2244 StringSaver Saver{Alloc};
2245
2246 SString LinkerOutputFile;
2247
2248 SString DistributorPath;
2249 ArrayRef<StringRef> DistributorArgs;
2250
2251 SString RemoteCompiler;
2252 ArrayRef<StringRef> RemoteCompilerPrependArgs;
2253 ArrayRef<StringRef> RemoteCompilerArgs;
2254
2255 bool SaveTemps;
2256
2257 SmallVector<StringRef, 0> CodegenOptions;
2258 DenseSet<StringRef> CommonInputs;
2259 // Number of the object files that have been already cached.
2260 std::atomic<size_t> CachedJobs{0};
2261 // Information specific to individual backend compilation job.
2262 struct Job {
2263 unsigned Task;
2264 StringRef ModuleID;
2265 StringRef NativeObjectPath;
2266 StringRef SummaryIndexPath;
2267 ImportsFilesContainer ImportsFiles;
2268 std::string CacheKey;
2269 AddStreamFn CacheAddStream;
2270 bool Cached = false;
2271 };
2272 // The set of backend compilations jobs.
2273 SmallVector<Job> Jobs;
2274
2275 // A unique string to identify the current link.
2276 SmallString<8> UID;
2277
2278 // The offset to the first ThinLTO task.
2279 unsigned ThinLTOTaskOffset;
2280
2281 // The target triple to supply for backend compilations.
2282 llvm::Triple Triple;
2283
2284 // Cache
2285 FileCache Cache;
2286
2287public:
2288 OutOfProcessThinBackend(
2289 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2290 ThreadPoolStrategy ThinLTOParallelism,
2291 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2292 AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
2293 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2294 StringRef LinkerOutputFile, StringRef Distributor,
2295 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2296 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2297 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps)
2298 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
2299 AddStream, OnWrite, ShouldEmitIndexFiles,
2300 ShouldEmitImportsFiles, ThinLTOParallelism),
2301 LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
2302 DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
2303 RemoteCompilerPrependArgs(RemoteCompilerPrependArgs),
2304 RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
2305 Cache(std::move(CacheFn)) {}
2306
2307 void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
2308 llvm::Triple Triple) override {
2309 UID = itostr(X: sys::Process::getProcessId());
2310 Jobs.resize(N: (size_t)ThinLTONumTasks);
2311 this->ThinLTOTaskOffset = ThinLTOTaskOffset;
2312 this->Triple = Triple;
2313 this->Conf.Dtlto = 1;
2314 }
2315
2316 virtual Error runThinLTOBackendThread(
2317 Job &J, const FunctionImporter::ImportMapTy &ImportList,
2318 const FunctionImporter::ExportSetTy &ExportList,
2319 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2320 &ResolvedODR) {
2321 {
2322 TimeTraceScope TimeScope("Emit individual index for DTLTO",
2323 J.SummaryIndexPath);
2324 if (auto E = emitFiles(ImportList, ModulePath: J.ModuleID, NewModulePath: J.ModuleID.str(),
2325 SummaryPath: J.SummaryIndexPath, ImportsFiles: J.ImportsFiles))
2326 return E;
2327 }
2328
2329 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: J.ModuleID) ||
2330 all_of(Range: CombinedIndex.getModuleHash(ModPath: J.ModuleID),
2331 P: [](uint32_t V) { return V == 0; }))
2332 // Cache disabled or no entry for this module in the combined index or
2333 // no module hash.
2334 return Error::success();
2335
2336 TimeTraceScope TimeScope("Check cache for DTLTO", J.SummaryIndexPath);
2337 const GVSummaryMapTy &DefinedGlobals =
2338 ModuleToDefinedGVSummaries.find(Val: J.ModuleID)->second;
2339
2340 // The module may be cached, this helps handling it.
2341 J.CacheKey = computeLTOCacheKey(Conf, Index: CombinedIndex, ModuleID: J.ModuleID, ImportList,
2342 ExportList, ResolvedODR, DefinedGlobals,
2343 CfiFunctionDefs, CfiFunctionDecls);
2344
2345 // The module may be cached, this helps handling it.
2346 auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
2347 if (Error Err = CacheAddStreamExp.takeError())
2348 return Err;
2349 AddStreamFn &CacheAddStream = *CacheAddStreamExp;
2350 // If CacheAddStream is null, we have a cache hit and at this point
2351 // object file is already passed back to the linker.
2352 if (!CacheAddStream) {
2353 J.Cached = true; // Cache hit, mark the job as cached.
2354 CachedJobs.fetch_add(i: 1);
2355 } else {
2356 // If CacheAddStream is not null, we have a cache miss and we need to
2357 // run the backend for codegen. Save cache 'add stream'
2358 // function for a later use.
2359 J.CacheAddStream = std::move(CacheAddStream);
2360 }
2361 return Error::success();
2362 }
2363
2364 Error start(
2365 unsigned Task, BitcodeModule BM,
2366 const FunctionImporter::ImportMapTy &ImportList,
2367 const FunctionImporter::ExportSetTy &ExportList,
2368 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
2369 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
2370
2371 StringRef ModulePath = BM.getModuleIdentifier();
2372
2373 SString ObjFilePath = sys::path::parent_path(path: LinkerOutputFile);
2374 sys::path::append(path&: ObjFilePath, a: sys::path::stem(path: ModulePath) + "." +
2375 itostr(X: Task) + "." + UID + ".native.o");
2376
2377 Job &J = Jobs[Task - ThinLTOTaskOffset];
2378 J = {.Task: Task,
2379 .ModuleID: ModulePath,
2380 .NativeObjectPath: Saver.save(S: ObjFilePath.str()),
2381 .SummaryIndexPath: Saver.save(S: ObjFilePath.str() + ".thinlto.bc"),
2382 .ImportsFiles: {}, // Filled in by emitFiles below.
2383 .CacheKey: "", /*CacheKey=*/
2384 .CacheAddStream: nullptr,
2385 .Cached: false};
2386
2387 // Cleanup per-job temporary files on abnormal process exit.
2388 if (!SaveTemps) {
2389 llvm::sys::RemoveFileOnSignal(Filename: J.NativeObjectPath);
2390 if (!ShouldEmitIndexFiles)
2391 llvm::sys::RemoveFileOnSignal(Filename: J.SummaryIndexPath);
2392 }
2393
2394 assert(ModuleToDefinedGVSummaries.count(ModulePath));
2395
2396 // The BackendThreadPool is only used here to write the sharded index files
2397 // (similar to WriteIndexesThinBackend).
2398 BackendThreadPool.async(
2399 F: [=](Job &J, const FunctionImporter::ImportMapTy &ImportList,
2400 const FunctionImporter::ExportSetTy &ExportList,
2401 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2402 &ResolvedODR) {
2403 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2404 timeTraceProfilerInitialize(
2405 TimeTraceGranularity: Conf.TimeTraceGranularity,
2406 ProcName: "Emit individual index and check cache for DTLTO");
2407 Error E =
2408 runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR);
2409 if (E) {
2410 std::unique_lock<std::mutex> L(ErrMu);
2411 if (Err)
2412 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
2413 else
2414 Err = std::move(E);
2415 }
2416 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2417 timeTraceProfilerFinishThread();
2418 },
2419 ArgList: std::ref(t&: J), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
2420 ArgList: std::ref(t: ResolvedODR));
2421
2422 return Error::success();
2423 }
2424
2425 // Derive a set of Clang options that will be shared/common for all DTLTO
2426 // backend compilations. We are intentionally minimal here as these options
2427 // must remain synchronized with the behavior of Clang. DTLTO does not support
2428 // all the features available with in-process LTO. More features are expected
2429 // to be added over time. Users can specify Clang options directly if a
2430 // feature is not supported. Note that explicitly specified options that imply
2431 // additional input or output file dependencies must be communicated to the
2432 // distribution system, potentially by setting extra options on the
2433 // distributor program.
2434 void buildCommonRemoteCompilerOptions() {
2435 const lto::Config &C = Conf;
2436 auto &Ops = CodegenOptions;
2437
2438 Ops.push_back(Elt: Saver.save(S: "-O" + Twine(C.OptLevel)));
2439
2440 if (C.Options.EmitAddrsig)
2441 Ops.push_back(Elt: "-faddrsig");
2442 if (C.Options.FunctionSections)
2443 Ops.push_back(Elt: "-ffunction-sections");
2444 if (C.Options.DataSections)
2445 Ops.push_back(Elt: "-fdata-sections");
2446
2447 if (C.RelocModel == Reloc::PIC_)
2448 // Clang doesn't have -fpic for all triples.
2449 if (!Triple.isOSBinFormatCOFF())
2450 Ops.push_back(Elt: "-fpic");
2451
2452 // Turn on/off warnings about profile cfg mismatch (default on)
2453 // --lto-pgo-warn-mismatch.
2454 if (!C.PGOWarnMismatch) {
2455 Ops.push_back(Elt: "-mllvm");
2456 Ops.push_back(Elt: "-no-pgo-warn-mismatch");
2457 }
2458
2459 // Enable sample-based profile guided optimizations.
2460 // Sample profile file path --lto-sample-profile=<value>.
2461 if (!C.SampleProfile.empty()) {
2462 Ops.push_back(
2463 Elt: Saver.save(S: "-fprofile-sample-use=" + Twine(C.SampleProfile)));
2464 CommonInputs.insert(V: C.SampleProfile);
2465 }
2466
2467 // We don't know which of options will be used by Clang.
2468 Ops.push_back(Elt: "-Wno-unused-command-line-argument");
2469
2470 // Forward any supplied options.
2471 if (!RemoteCompilerArgs.empty())
2472 for (auto &a : RemoteCompilerArgs)
2473 Ops.push_back(Elt: a);
2474 }
2475
2476 // Generates a JSON file describing the backend compilations, for the
2477 // distributor.
2478 bool emitDistributorJson(StringRef DistributorJson) {
2479 using json::Array;
2480 std::error_code EC;
2481 raw_fd_ostream OS(DistributorJson, EC);
2482 if (EC)
2483 return false;
2484
2485 json::OStream JOS(OS);
2486 JOS.object(Contents: [&]() {
2487 // Information common to all jobs.
2488 JOS.attributeObject(Key: "common", Contents: [&]() {
2489 JOS.attribute(Key: "linker_output", Contents: LinkerOutputFile);
2490
2491 JOS.attributeArray(Key: "args", Contents: [&]() {
2492 JOS.value(V: RemoteCompiler);
2493
2494 // Forward any supplied prepend options.
2495 if (!RemoteCompilerPrependArgs.empty())
2496 for (auto &A : RemoteCompilerPrependArgs)
2497 JOS.value(V: A);
2498
2499 JOS.value(V: "-c");
2500
2501 JOS.value(V: Saver.save(S: "--target=" + Triple.str()));
2502
2503 for (const auto &A : CodegenOptions)
2504 JOS.value(V: A);
2505 });
2506
2507 JOS.attribute(Key: "inputs", Contents: Array(CommonInputs));
2508 });
2509
2510 // Per-compilation-job information.
2511 JOS.attributeArray(Key: "jobs", Contents: [&]() {
2512 for (const auto &J : Jobs) {
2513 assert(J.Task != 0);
2514 if (J.Cached) {
2515 assert(!Cache.getCacheDirectoryPath().empty());
2516 continue;
2517 }
2518
2519 SmallVector<StringRef, 2> Inputs;
2520 SmallVector<StringRef, 1> Outputs;
2521
2522 JOS.object(Contents: [&]() {
2523 JOS.attributeArray(Key: "args", Contents: [&]() {
2524 JOS.value(V: J.ModuleID);
2525 Inputs.push_back(Elt: J.ModuleID);
2526
2527 JOS.value(
2528 V: Saver.save(S: "-fthinlto-index=" + Twine(J.SummaryIndexPath)));
2529 Inputs.push_back(Elt: J.SummaryIndexPath);
2530
2531 JOS.value(V: "-o");
2532 JOS.value(V: J.NativeObjectPath);
2533 Outputs.push_back(Elt: J.NativeObjectPath);
2534 });
2535
2536 // Add the bitcode files from which imports will be made. These do
2537 // not explicitly appear on the backend compilation command lines
2538 // but are recorded in the summary index shards.
2539 llvm::append_range(C&: Inputs, R: J.ImportsFiles);
2540 JOS.attribute(Key: "inputs", Contents: Array(Inputs));
2541
2542 JOS.attribute(Key: "outputs", Contents: Array(Outputs));
2543 });
2544 }
2545 });
2546 });
2547
2548 return true;
2549 }
2550
2551 void removeFile(StringRef FileName) {
2552 std::error_code EC = sys::fs::remove(path: FileName, IgnoreNonExisting: true);
2553 if (EC && EC != std::make_error_code(e: std::errc::no_such_file_or_directory))
2554 errs() << "warning: could not remove the file '" << FileName
2555 << "': " << EC.message() << "\n";
2556 }
2557
2558 Error wait() override {
2559 // Wait for the information on the required backend compilations to be
2560 // gathered.
2561 BackendThreadPool.wait();
2562 if (Err)
2563 return std::move(*Err);
2564
2565 llvm::scope_exit CleanPerJobFiles([&] {
2566 llvm::TimeTraceScope TimeScope("Remove DTLTO temporary files");
2567 if (!SaveTemps)
2568 for (auto &Job : Jobs) {
2569 removeFile(FileName: Job.NativeObjectPath);
2570 if (!ShouldEmitIndexFiles)
2571 removeFile(FileName: Job.SummaryIndexPath);
2572 }
2573 });
2574
2575 const StringRef BCError = "DTLTO backend compilation: ";
2576
2577 buildCommonRemoteCompilerOptions();
2578
2579 SString JsonFile = sys::path::parent_path(path: LinkerOutputFile);
2580 {
2581 llvm::TimeTraceScope TimeScope("Emit DTLTO JSON");
2582 sys::path::append(path&: JsonFile, a: sys::path::stem(path: LinkerOutputFile) + "." +
2583 UID + ".dist-file.json");
2584 // Cleanup DTLTO JSON file on abnormal process exit.
2585 if (!SaveTemps)
2586 llvm::sys::RemoveFileOnSignal(Filename: JsonFile);
2587 if (!emitDistributorJson(DistributorJson: JsonFile))
2588 return make_error<StringError>(
2589 Args: BCError + "failed to generate distributor JSON script: " + JsonFile,
2590 Args: inconvertibleErrorCode());
2591 }
2592 llvm::scope_exit CleanJson([&] {
2593 if (!SaveTemps)
2594 removeFile(FileName: JsonFile);
2595 });
2596
2597 {
2598 llvm::TimeTraceScope TimeScope("Execute DTLTO distributor",
2599 DistributorPath);
2600 // Checks if we have any jobs that don't have corresponding cache entries.
2601 if (CachedJobs.load() < Jobs.size()) {
2602 SmallVector<StringRef, 3> Args = {DistributorPath};
2603 llvm::append_range(C&: Args, R&: DistributorArgs);
2604 Args.push_back(Elt: JsonFile);
2605 std::string ErrMsg;
2606 if (sys::ExecuteAndWait(Program: Args[0], Args,
2607 /*Env=*/std::nullopt, /*Redirects=*/{},
2608 /*SecondsToWait=*/0, /*MemoryLimit=*/0,
2609 ErrMsg: &ErrMsg)) {
2610 return make_error<StringError>(
2611 Args: BCError + "distributor execution failed" +
2612 (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2613 Args: inconvertibleErrorCode());
2614 }
2615 }
2616 }
2617
2618 {
2619 llvm::TimeTraceScope FilesScope("Add DTLTO files to the link");
2620 for (auto &Job : Jobs) {
2621 if (!Job.CacheKey.empty() && Job.Cached) {
2622 assert(Cache.isValid());
2623 continue;
2624 }
2625 // Load the native object from a file into a memory buffer
2626 // and store its contents in the output buffer.
2627 auto ObjFileMbOrErr =
2628 MemoryBuffer::getFile(Filename: Job.NativeObjectPath, /*IsText=*/false,
2629 /*RequiresNullTerminator=*/false);
2630 if (std::error_code EC = ObjFileMbOrErr.getError())
2631 return make_error<StringError>(
2632 Args: BCError + "cannot open native object file: " +
2633 Job.NativeObjectPath + ": " + EC.message(),
2634 Args: inconvertibleErrorCode());
2635
2636 MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
2637 if (Cache.isValid()) {
2638 // Cache hits are taken care of earlier. At this point, we could only
2639 // have cache misses.
2640 assert(Job.CacheAddStream);
2641 // Obtain a file stream for a storing a cache entry.
2642 auto CachedFileStreamOrErr =
2643 Job.CacheAddStream(Job.Task, Job.ModuleID);
2644 if (!CachedFileStreamOrErr)
2645 return joinErrors(
2646 E1: CachedFileStreamOrErr.takeError(),
2647 E2: createStringError(EC: inconvertibleErrorCode(),
2648 Fmt: "Cannot get a cache file stream: %s",
2649 Vals: Job.NativeObjectPath.data()));
2650 // Store a file buffer into the cache stream.
2651 auto &CacheStream = *(CachedFileStreamOrErr->get());
2652 *(CacheStream.OS) << ObjFileMbRef.getBuffer();
2653 if (Error Err = CacheStream.commit())
2654 return Err;
2655 } else {
2656 auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2657 if (Error Err = StreamOrErr.takeError())
2658 report_fatal_error(Err: std::move(Err));
2659 auto &Stream = *StreamOrErr->get();
2660 *Stream.OS << ObjFileMbRef.getBuffer();
2661 if (Error Err = Stream.commit())
2662 report_fatal_error(Err: std::move(Err));
2663 }
2664 }
2665 }
2666 return Error::success();
2667 }
2668};
2669} // end anonymous namespace
2670
2671ThinBackend lto::createOutOfProcessThinBackend(
2672 ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
2673 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2674 StringRef LinkerOutputFile, StringRef Distributor,
2675 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2676 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2677 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) {
2678 auto Func =
2679 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2680 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2681 AddStreamFn AddStream, FileCache Cache) {
2682 return std::make_unique<OutOfProcessThinBackend>(
2683 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
2684 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
2685 args: ShouldEmitImportsFiles, args: LinkerOutputFile, args: Distributor,
2686 args: DistributorArgs, args: RemoteCompiler, args: RemoteCompilerPrependArgs,
2687 args: RemoteCompilerArgs, args: SaveTemps);
2688 };
2689 return ThinBackend(Func, Parallelism);
2690}
2691