1//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions and classes used to support LTO.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/LTO/LTO.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/ScopeExit.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/StableHashing.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Analysis/OptimizationRemarkEmitter.h"
21#include "llvm/Analysis/StackSafetyAnalysis.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Bitcode/BitcodeReader.h"
24#include "llvm/Bitcode/BitcodeWriter.h"
25#include "llvm/CGData/CodeGenData.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/Config/llvm-config.h"
28#include "llvm/IR/AutoUpgrade.h"
29#include "llvm/IR/DiagnosticPrinter.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/LLVMRemarkStreamer.h"
32#include "llvm/IR/LegacyPassManager.h"
33#include "llvm/IR/Mangler.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/RuntimeLibcalls.h"
36#include "llvm/LTO/LTOBackend.h"
37#include "llvm/Linker/IRMover.h"
38#include "llvm/MC/TargetRegistry.h"
39#include "llvm/Object/IRObjectFile.h"
40#include "llvm/Support/Caching.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
43#include "llvm/Support/Error.h"
44#include "llvm/Support/FileSystem.h"
45#include "llvm/Support/JSON.h"
46#include "llvm/Support/MemoryBuffer.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/Process.h"
49#include "llvm/Support/SHA1.h"
50#include "llvm/Support/Signals.h"
51#include "llvm/Support/SourceMgr.h"
52#include "llvm/Support/ThreadPool.h"
53#include "llvm/Support/Threading.h"
54#include "llvm/Support/TimeProfiler.h"
55#include "llvm/Support/ToolOutputFile.h"
56#include "llvm/Support/VCSRevision.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Target/TargetOptions.h"
59#include "llvm/Transforms/IPO.h"
60#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
61#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
62#include "llvm/Transforms/Utils/FunctionImportUtils.h"
63#include "llvm/Transforms/Utils/SplitModule.h"
64
65#include <optional>
66#include <set>
67
68using namespace llvm;
69using namespace lto;
70using namespace object;
71
72#define DEBUG_TYPE "lto"
73
74Error LTO::setupOptimizationRemarks() {
75 // Setup the remark streamer according to the provided configuration.
76 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
77 Context&: RegularLTO.Ctx, RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses,
78 RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness,
79 RemarksHotnessThreshold: Conf.RemarksHotnessThreshold);
80 if (!DiagFileOrErr)
81 return DiagFileOrErr.takeError();
82
83 DiagnosticOutputFile = std::move(*DiagFileOrErr);
84
85 // Create a dummy function to serve as a context for LTO-link remarks.
86 // This is required because OptimizationRemark requires a valid Function,
87 // and in ThinLTO we may not have any IR functions available during the
88 // thin link. Host it in a private module to avoid interfering with the LTO
89 // process.
90 if (!LinkerRemarkFunction) {
91 DummyModule = std::make_unique<Module>(args: "remark_dummy", args&: RegularLTO.Ctx);
92 LinkerRemarkFunction = Function::Create(
93 Ty: FunctionType::get(Result: Type::getVoidTy(C&: RegularLTO.Ctx), isVarArg: false),
94 Linkage: GlobalValue::ExternalLinkage, N: "thinlto_remark_dummy",
95 M: DummyModule.get());
96 }
97
98 return Error::success();
99}
100
101void LTO::emitRemark(OptimizationRemark &Remark) {
102 const Function &F = Remark.getFunction();
103 OptimizationRemarkEmitter ORE(const_cast<Function *>(&F));
104 ORE.emit(OptDiag&: Remark);
105}
106
107static cl::opt<bool>
108 DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(Val: false), cl::Hidden,
109 cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
110namespace llvm {
111extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
112extern cl::opt<bool> ForceImportAll;
113} // end namespace llvm
114
115namespace llvm {
116/// Enable global value internalization in LTO.
117cl::opt<bool> EnableLTOInternalization(
118 "enable-lto-internalization", cl::init(Val: true), cl::Hidden,
119 cl::desc("Enable global value internalization in LTO"));
120
121static cl::opt<bool>
122 LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(Val: false), cl::Hidden,
123 cl::desc("Keep copies of symbols in LTO indexing"));
124
125/// Indicate we are linking with an allocator that supports hot/cold operator
126/// new interfaces.
127extern cl::opt<bool> SupportsHotColdNew;
128
129/// Enable MemProf context disambiguation for thin link.
130extern cl::opt<bool> EnableMemProfContextDisambiguation;
131} // namespace llvm
132
133// Computes a unique hash for the Module considering the current list of
134// export/import and other global analysis results.
135// Returns the hash in its hexadecimal representation.
136std::string llvm::computeLTOCacheKey(
137 const Config &Conf, const ModuleSummaryIndex &Index, StringRef ModuleID,
138 const FunctionImporter::ImportMapTy &ImportList,
139 const FunctionImporter::ExportSetTy &ExportList,
140 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
141 const GVSummaryMapTy &DefinedGlobals,
142 const DenseSet<GlobalValue::GUID> &CfiFunctionDefs,
143 const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) {
144 // Compute the unique hash for this entry.
145 // This is based on the current compiler version, the module itself, the
146 // export list, the hash for every single module in the import list, the
147 // list of ResolvedODR for the module, and the list of preserved symbols.
148 SHA1 Hasher;
149
150 // Start with the compiler revision
151 Hasher.update(LLVM_VERSION_STRING);
152#ifdef LLVM_REVISION
153 Hasher.update(LLVM_REVISION);
154#endif
155
156 // Include the parts of the LTO configuration that affect code generation.
157 auto AddString = [&](StringRef Str) {
158 Hasher.update(Str);
159 Hasher.update(Data: ArrayRef<uint8_t>{0});
160 };
161 auto AddUnsigned = [&](unsigned I) {
162 uint8_t Data[4];
163 support::endian::write32le(P: Data, V: I);
164 Hasher.update(Data);
165 };
166 auto AddUint64 = [&](uint64_t I) {
167 uint8_t Data[8];
168 support::endian::write64le(P: Data, V: I);
169 Hasher.update(Data);
170 };
171 auto AddUint8 = [&](const uint8_t I) {
172 Hasher.update(Data: ArrayRef<uint8_t>(&I, 1));
173 };
174 AddString(Conf.CPU);
175 // FIXME: Hash more of Options. For now all clients initialize Options from
176 // command-line flags (which is unsupported in production), but may set
177 // X86RelaxRelocations. The clang driver can also pass FunctionSections,
178 // DataSections and DebuggerTuning via command line flags.
179 AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations);
180 AddUnsigned(Conf.Options.FunctionSections);
181 AddUnsigned(Conf.Options.DataSections);
182 AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
183 for (auto &A : Conf.MAttrs)
184 AddString(A);
185 if (Conf.RelocModel)
186 AddUnsigned(*Conf.RelocModel);
187 else
188 AddUnsigned(-1);
189 if (Conf.CodeModel)
190 AddUnsigned(*Conf.CodeModel);
191 else
192 AddUnsigned(-1);
193 for (const auto &S : Conf.MllvmArgs)
194 AddString(S);
195 AddUnsigned(static_cast<int>(Conf.CGOptLevel));
196 AddUnsigned(static_cast<int>(Conf.CGFileType));
197 AddUnsigned(Conf.OptLevel);
198 AddUnsigned(Conf.Freestanding);
199 AddString(Conf.OptPipeline);
200 AddString(Conf.AAPipeline);
201 AddString(Conf.OverrideTriple);
202 AddString(Conf.DefaultTriple);
203 AddString(Conf.DwoDir);
204 AddUint8(Conf.Dtlto);
205
206 // Include the hash for the current module
207 auto ModHash = Index.getModuleHash(ModPath: ModuleID);
208 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
209
210 // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
211 // used to compute cache key, we could omit hashing `ExportList` here.
212 std::vector<uint64_t> ExportsGUID;
213 ExportsGUID.reserve(n: ExportList.size());
214 for (const auto &VI : ExportList)
215 ExportsGUID.push_back(x: VI.getGUID());
216
217 // Sort the export list elements GUIDs.
218 llvm::sort(C&: ExportsGUID);
219 for (auto GUID : ExportsGUID)
220 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
221
222 // Order using module hash, to be both independent of module name and
223 // module order.
224 auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
225 const std::pair<StringRef, GlobalValue::GUID> &R) {
226 return std::make_pair(x: Index.getModule(ModPath: L.first)->second, y: L.second) <
227 std::make_pair(x: Index.getModule(ModPath: R.first)->second, y: R.second);
228 };
229 FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
230
231 // Count the number of imports for each source module.
232 DenseMap<StringRef, unsigned> ModuleToNumImports;
233 for (const auto &[FromModule, GUID, Type] : SortedImportList)
234 ++ModuleToNumImports[FromModule];
235
236 std::optional<StringRef> LastModule;
237 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
238 if (LastModule != FromModule) {
239 // Include the hash for every module we import functions from. The set of
240 // imported symbols for each module may affect code generation and is
241 // sensitive to link order, so include that as well.
242 LastModule = FromModule;
243 auto ModHash = Index.getModule(ModPath: FromModule)->second;
244 Hasher.update(Data: ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
245 AddUint64(ModuleToNumImports[FromModule]);
246 }
247 AddUint64(GUID);
248 AddUint8(Type);
249 }
250
251 // Include the hash for the resolved ODR.
252 for (auto &Entry : ResolvedODR) {
253 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
254 sizeof(GlobalValue::GUID)));
255 Hasher.update(Data: ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
256 sizeof(GlobalValue::LinkageTypes)));
257 }
258
259 // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or
260 // defined in this module.
261 std::set<GlobalValue::GUID> UsedCfiDefs;
262 std::set<GlobalValue::GUID> UsedCfiDecls;
263
264 // Typeids used in this module.
265 std::set<GlobalValue::GUID> UsedTypeIds;
266
267 auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) {
268 if (CfiFunctionDefs.contains(V: ValueGUID))
269 UsedCfiDefs.insert(x: ValueGUID);
270 if (CfiFunctionDecls.contains(V: ValueGUID))
271 UsedCfiDecls.insert(x: ValueGUID);
272 };
273
274 auto AddUsedThings = [&](GlobalValueSummary *GS) {
275 if (!GS) return;
276 AddUnsigned(GS->getVisibility());
277 AddUnsigned(GS->isLive());
278 AddUnsigned(GS->canAutoHide());
279 for (const ValueInfo &VI : GS->refs()) {
280 AddUnsigned(VI.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
281 AddUsedCfiGlobal(VI.getGUID());
282 }
283 if (auto *GVS = dyn_cast<GlobalVarSummary>(Val: GS)) {
284 AddUnsigned(GVS->maybeReadOnly());
285 AddUnsigned(GVS->maybeWriteOnly());
286 }
287 if (auto *FS = dyn_cast<FunctionSummary>(Val: GS)) {
288 for (auto &TT : FS->type_tests())
289 UsedTypeIds.insert(x: TT);
290 for (auto &TT : FS->type_test_assume_vcalls())
291 UsedTypeIds.insert(x: TT.GUID);
292 for (auto &TT : FS->type_checked_load_vcalls())
293 UsedTypeIds.insert(x: TT.GUID);
294 for (auto &TT : FS->type_test_assume_const_vcalls())
295 UsedTypeIds.insert(x: TT.VFunc.GUID);
296 for (auto &TT : FS->type_checked_load_const_vcalls())
297 UsedTypeIds.insert(x: TT.VFunc.GUID);
298 for (auto &ET : FS->calls()) {
299 AddUnsigned(ET.first.isDSOLocal(WithDSOLocalPropagation: Index.withDSOLocalPropagation()));
300 AddUsedCfiGlobal(ET.first.getGUID());
301 }
302 }
303 };
304
305 // Include the hash for the linkage type to reflect internalization and weak
306 // resolution, and collect any used type identifier resolutions.
307 for (auto &GS : DefinedGlobals) {
308 GlobalValue::LinkageTypes Linkage = GS.second->linkage();
309 Hasher.update(
310 Data: ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
311 AddUsedCfiGlobal(GS.first);
312 AddUsedThings(GS.second);
313 }
314
315 // Imported functions may introduce new uses of type identifier resolutions,
316 // so we need to collect their used resolutions as well.
317 for (const auto &[FromModule, GUID, Type] : SortedImportList) {
318 GlobalValueSummary *S = Index.findSummaryInModule(ValueGUID: GUID, ModuleId: FromModule);
319 AddUsedThings(S);
320 // If this is an alias, we also care about any types/etc. that the aliasee
321 // may reference.
322 if (auto *AS = dyn_cast_or_null<AliasSummary>(Val: S))
323 AddUsedThings(AS->getBaseObject());
324 }
325
326 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
327 AddString(TId);
328
329 AddUnsigned(S.TTRes.TheKind);
330 AddUnsigned(S.TTRes.SizeM1BitWidth);
331
332 AddUint64(S.TTRes.AlignLog2);
333 AddUint64(S.TTRes.SizeM1);
334 AddUint64(S.TTRes.BitMask);
335 AddUint64(S.TTRes.InlineBits);
336
337 AddUint64(S.WPDRes.size());
338 for (auto &WPD : S.WPDRes) {
339 AddUnsigned(WPD.first);
340 AddUnsigned(WPD.second.TheKind);
341 AddString(WPD.second.SingleImplName);
342
343 AddUint64(WPD.second.ResByArg.size());
344 for (auto &ByArg : WPD.second.ResByArg) {
345 AddUint64(ByArg.first.size());
346 for (uint64_t Arg : ByArg.first)
347 AddUint64(Arg);
348 AddUnsigned(ByArg.second.TheKind);
349 AddUint64(ByArg.second.Info);
350 AddUnsigned(ByArg.second.Byte);
351 AddUnsigned(ByArg.second.Bit);
352 }
353 }
354 };
355
356 // Include the hash for all type identifiers used by this module.
357 for (GlobalValue::GUID TId : UsedTypeIds) {
358 auto TidIter = Index.typeIds().equal_range(x: TId);
359 for (const auto &I : make_range(p: TidIter))
360 AddTypeIdSummary(I.second.first, I.second.second);
361 }
362
363 AddUnsigned(UsedCfiDefs.size());
364 for (auto &V : UsedCfiDefs)
365 AddUint64(V);
366
367 AddUnsigned(UsedCfiDecls.size());
368 for (auto &V : UsedCfiDecls)
369 AddUint64(V);
370
371 if (!Conf.SampleProfile.empty()) {
372 auto FileOrErr = MemoryBuffer::getFile(Filename: Conf.SampleProfile);
373 if (FileOrErr) {
374 Hasher.update(Str: FileOrErr.get()->getBuffer());
375
376 if (!Conf.ProfileRemapping.empty()) {
377 FileOrErr = MemoryBuffer::getFile(Filename: Conf.ProfileRemapping);
378 if (FileOrErr)
379 Hasher.update(Str: FileOrErr.get()->getBuffer());
380 }
381 }
382 }
383
384 return toHex(Input: Hasher.result());
385}
386
387std::string llvm::recomputeLTOCacheKey(const std::string &Key,
388 StringRef ExtraID) {
389 SHA1 Hasher;
390
391 auto AddString = [&](StringRef Str) {
392 Hasher.update(Str);
393 Hasher.update(Data: ArrayRef<uint8_t>{0});
394 };
395 AddString(Key);
396 AddString(ExtraID);
397
398 return toHex(Input: Hasher.result());
399}
400
401static void thinLTOResolvePrevailingGUID(
402 const Config &C, ValueInfo VI,
403 DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
404 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
405 isPrevailing,
406 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
407 recordNewLinkage,
408 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
409 GlobalValue::VisibilityTypes Visibility =
410 C.VisibilityScheme == Config::ELF ? VI.getELFVisibility()
411 : GlobalValue::DefaultVisibility;
412 for (auto &S : VI.getSummaryList()) {
413 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
414 // Ignore local and appending linkage values since the linker
415 // doesn't resolve them.
416 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
417 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
418 continue;
419 // We need to emit only one of these. The prevailing module will keep it,
420 // but turned into a weak, while the others will drop it when possible.
421 // This is both a compile-time optimization and a correctness
422 // transformation. This is necessary for correctness when we have exported
423 // a reference - we need to convert the linkonce to weak to
424 // ensure a copy is kept to satisfy the exported reference.
425 // FIXME: We may want to split the compile time and correctness
426 // aspects into separate routines.
427 if (isPrevailing(VI.getGUID(), S.get())) {
428 assert(!S->wasPromoted() &&
429 "promoted symbols used to be internal linkage and shouldn't have "
430 "a prevailing variant");
431 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage)) {
432 S->setLinkage(GlobalValue::getWeakLinkage(
433 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
434 // The kept copy is eligible for auto-hiding (hidden visibility) if all
435 // copies were (i.e. they were all linkonce_odr global unnamed addr).
436 // If any copy is not (e.g. it was originally weak_odr), then the symbol
437 // must remain externally available (e.g. a weak_odr from an explicitly
438 // instantiated template). Additionally, if it is in the
439 // GUIDPreservedSymbols set, that means that it is visibile outside
440 // the summary (e.g. in a native object or a bitcode file without
441 // summary), and in that case we cannot hide it as it isn't possible to
442 // check all copies.
443 S->setCanAutoHide(VI.canAutoHide() &&
444 !GUIDPreservedSymbols.count(V: VI.getGUID()));
445 }
446 if (C.VisibilityScheme == Config::FromPrevailing)
447 Visibility = S->getVisibility();
448 }
449 // Alias and aliasee can't be turned into available_externally.
450 // When force-import-all is used, it indicates that object linking is not
451 // supported by the target. In this case, we can't change the linkage as
452 // well in case the global is converted to declaration.
453 // Also, if the symbol was promoted, it wouldn't have a prevailing variant,
454 // but also its linkage is set correctly (to External) already.
455 else if (!isa<AliasSummary>(Val: S.get()) &&
456 !GlobalInvolvedWithAlias.count(V: S.get()) && !ForceImportAll &&
457 !S->wasPromoted())
458 S->setLinkage(GlobalValue::AvailableExternallyLinkage);
459
460 // For ELF, set visibility to the computed visibility from summaries. We
461 // don't track visibility from declarations so this may be more relaxed than
462 // the most constraining one.
463 if (C.VisibilityScheme == Config::ELF)
464 S->setVisibility(Visibility);
465
466 if (S->linkage() != OriginalLinkage)
467 recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
468 }
469
470 if (C.VisibilityScheme == Config::FromPrevailing) {
471 for (auto &S : VI.getSummaryList()) {
472 GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
473 if (GlobalValue::isLocalLinkage(Linkage: OriginalLinkage) ||
474 GlobalValue::isAppendingLinkage(Linkage: S->linkage()))
475 continue;
476 S->setVisibility(Visibility);
477 }
478 }
479}
480
481/// Resolve linkage for prevailing symbols in the \p Index.
482//
483// We'd like to drop these functions if they are no longer referenced in the
484// current module. However there is a chance that another module is still
485// referencing them because of the import. We make sure we always emit at least
486// one copy.
487void llvm::thinLTOResolvePrevailingInIndex(
488 const Config &C, ModuleSummaryIndex &Index,
489 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
490 isPrevailing,
491 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
492 recordNewLinkage,
493 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
494 // We won't optimize the globals that are referenced by an alias for now
495 // Ideally we should turn the alias into a global and duplicate the definition
496 // when needed.
497 DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
498 for (auto &I : Index)
499 for (auto &S : I.second.getSummaryList())
500 if (auto AS = dyn_cast<AliasSummary>(Val: S.get()))
501 GlobalInvolvedWithAlias.insert(V: &AS->getAliasee());
502
503 for (auto &I : Index)
504 thinLTOResolvePrevailingGUID(C, VI: Index.getValueInfo(R: I),
505 GlobalInvolvedWithAlias, isPrevailing,
506 recordNewLinkage, GUIDPreservedSymbols);
507}
508
509static void thinLTOInternalizeAndPromoteGUID(
510 ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
511 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
512 isPrevailing) {
513 // Before performing index-based internalization and promotion for this GUID,
514 // the local flag should be consistent with the summary list linkage types.
515 VI.verifyLocal();
516
517 const bool SingleExternallyVisibleCopy =
518 VI.getSummaryList().size() == 1 &&
519 !GlobalValue::isLocalLinkage(Linkage: VI.getSummaryList().front()->linkage());
520
521 for (auto &S : VI.getSummaryList()) {
522 // First see if we need to promote an internal value because it is not
523 // exported.
524 if (isExported(S->modulePath(), VI)) {
525 if (GlobalValue::isLocalLinkage(Linkage: S->linkage()))
526 S->promote();
527 continue;
528 }
529
530 // Otherwise, see if we can internalize.
531 if (!EnableLTOInternalization)
532 continue;
533
534 // Non-exported values with external linkage can be internalized.
535 if (GlobalValue::isExternalLinkage(Linkage: S->linkage())) {
536 S->setLinkage(GlobalValue::InternalLinkage);
537 continue;
538 }
539
540 // Non-exported function and variable definitions with a weak-for-linker
541 // linkage can be internalized in certain cases. The minimum legality
542 // requirements would be that they are not address taken to ensure that we
543 // don't break pointer equality checks, and that variables are either read-
544 // or write-only. For functions, this is the case if either all copies are
545 // [local_]unnamed_addr, or we can propagate reference edge attributes
546 // (which is how this is guaranteed for variables, when analyzing whether
547 // they are read or write-only).
548 //
549 // However, we only get to this code for weak-for-linkage values in one of
550 // two cases:
551 // 1) The prevailing copy is not in IR (it is in native code).
552 // 2) The prevailing copy in IR is not exported from its module.
553 // Additionally, at least for the new LTO API, case 2 will only happen if
554 // there is exactly one definition of the value (i.e. in exactly one
555 // module), as duplicate defs are result in the value being marked exported.
556 // Likely, users of the legacy LTO API are similar, however, currently there
557 // are llvm-lto based tests of the legacy LTO API that do not mark
558 // duplicate linkonce_odr copies as exported via the tool, so we need
559 // to handle that case below by checking the number of copies.
560 //
561 // Generally, we only want to internalize a weak-for-linker value in case
562 // 2, because in case 1 we cannot see how the value is used to know if it
563 // is read or write-only. We also don't want to bloat the binary with
564 // multiple internalized copies of non-prevailing linkonce/weak functions.
565 // Note if we don't internalize, we will convert non-prevailing copies to
566 // available_externally anyway, so that we drop them after inlining. The
567 // only reason to internalize such a function is if we indeed have a single
568 // copy, because internalizing it won't increase binary size, and enables
569 // use of inliner heuristics that are more aggressive in the face of a
570 // single call to a static (local). For variables, internalizing a read or
571 // write only variable can enable more aggressive optimization. However, we
572 // already perform this elsewhere in the ThinLTO backend handling for
573 // read or write-only variables (processGlobalForThinLTO).
574 //
575 // Therefore, only internalize linkonce/weak if there is a single copy, that
576 // is prevailing in this IR module. We can do so aggressively, without
577 // requiring the address to be insignificant, or that a variable be read or
578 // write-only.
579 if (!GlobalValue::isWeakForLinker(Linkage: S->linkage()) ||
580 GlobalValue::isExternalWeakLinkage(Linkage: S->linkage()))
581 continue;
582
583 // We may have a single summary copy that is externally visible but not
584 // prevailing if the prevailing copy is in a native object.
585 if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get()))
586 S->setLinkage(GlobalValue::InternalLinkage);
587 }
588}
589
590// Update the linkages in the given \p Index to mark exported values
591// as external and non-exported values as internal.
592void llvm::thinLTOInternalizeAndPromoteInIndex(
593 ModuleSummaryIndex &Index,
594 function_ref<bool(StringRef, ValueInfo)> isExported,
595 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
596 isPrevailing) {
597 assert(!Index.withInternalizeAndPromote());
598 for (auto &I : Index)
599 thinLTOInternalizeAndPromoteGUID(VI: Index.getValueInfo(R: I), isExported,
600 isPrevailing);
601 Index.setWithInternalizeAndPromote();
602}
603
604// Requires a destructor for std::vector<InputModule>.
605InputFile::~InputFile() = default;
606
607Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
608 std::unique_ptr<InputFile> File(new InputFile);
609
610 Expected<IRSymtabFile> FOrErr = readIRSymtab(MBRef: Object);
611 if (!FOrErr)
612 return FOrErr.takeError();
613
614 File->TargetTriple = FOrErr->TheReader.getTargetTriple();
615 File->SourceFileName = FOrErr->TheReader.getSourceFileName();
616 File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
617 File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
618 File->ComdatTable = FOrErr->TheReader.getComdatTable();
619 File->MbRef =
620 Object; // Save a memory buffer reference to an input file object.
621
622 for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
623 size_t Begin = File->Symbols.size();
624 for (const irsymtab::Reader::SymbolRef &Sym :
625 FOrErr->TheReader.module_symbols(I))
626 // Skip symbols that are irrelevant to LTO. Note that this condition needs
627 // to match the one in Skip() in LTO::addRegularLTO().
628 if (Sym.isGlobal() && !Sym.isFormatSpecific())
629 File->Symbols.push_back(x: Sym);
630 File->ModuleSymIndices.push_back(x: {Begin, File->Symbols.size()});
631 }
632
633 File->Mods = FOrErr->Mods;
634 File->Strtab = std::move(FOrErr->Strtab);
635 return std::move(File);
636}
637
638bool InputFile::Symbol::isLibcall(
639 const RTLIB::RuntimeLibcallsInfo &Libcalls) const {
640 return Libcalls.getSupportedLibcallImpl(FuncName: IRName) != RTLIB::Unsupported;
641}
642
643StringRef InputFile::getName() const {
644 return Mods[0].getModuleIdentifier();
645}
646
647BitcodeModule &InputFile::getSingleBitcodeModule() {
648 assert(Mods.size() == 1 && "Expect only one bitcode module");
649 return Mods[0];
650}
651
652BitcodeModule &InputFile::getPrimaryBitcodeModule() { return Mods[0]; }
653
654LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
655 const Config &Conf)
656 : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
657 Ctx(Conf), CombinedModule(std::make_unique<Module>(args: "ld-temp.o", args&: Ctx)),
658 Mover(std::make_unique<IRMover>(args&: *CombinedModule)) {}
659
660LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam)
661 : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) {
662 if (!Backend.isValid())
663 Backend =
664 createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency());
665}
666
667LTO::LTO(Config Conf, ThinBackend Backend,
668 unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
669 : Conf(std::move(Conf)),
670 RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
671 ThinLTO(std::move(Backend)),
672 GlobalResolutions(
673 std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
674 LTOMode(LTOMode) {
675 if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
676 Alloc = std::make_unique<BumpPtrAllocator>();
677 GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(args&: *Alloc);
678 }
679}
680
681// Requires a destructor for MapVector<BitcodeModule>.
682LTO::~LTO() = default;
683
684void LTO::cleanup() {
685 DummyModule.reset();
686 LinkerRemarkFunction = nullptr;
687 consumeError(Err: finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)));
688}
689
690// Add the symbols in the given module to the GlobalResolutions map, and resolve
691// their partitions.
692void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
693 ArrayRef<SymbolResolution> Res,
694 unsigned Partition, bool InSummary,
695 const Triple &TT) {
696 llvm::TimeTraceScope timeScope("LTO add module to global resolution");
697 auto *ResI = Res.begin();
698 auto *ResE = Res.end();
699 (void)ResE;
700 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
701 for (const InputFile::Symbol &Sym : Syms) {
702 assert(ResI != ResE);
703 SymbolResolution Res = *ResI++;
704
705 StringRef SymbolName = Sym.getName();
706 // Keep copies of symbols if the client of LTO says so.
707 if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(Val: SymbolName))
708 SymbolName = GlobalResolutionSymbolSaver->save(S: SymbolName);
709
710 auto &GlobalRes = (*GlobalResolutions)[SymbolName];
711 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
712 if (Res.Prevailing) {
713 assert(!GlobalRes.Prevailing &&
714 "Multiple prevailing defs are not allowed");
715 GlobalRes.Prevailing = true;
716 GlobalRes.IRName = std::string(Sym.getIRName());
717 } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) {
718 // Sometimes it can be two copies of symbol in a module and prevailing
719 // symbol can have no IR name. That might happen if symbol is defined in
720 // module level inline asm block. In case we have multiple modules with
721 // the same symbol we want to use IR name of the prevailing symbol.
722 // Otherwise, if we haven't seen a prevailing symbol, set the name so that
723 // we can later use it to check if there is any prevailing copy in IR.
724 GlobalRes.IRName = std::string(Sym.getIRName());
725 }
726
727 // In rare occasion, the symbol used to initialize GlobalRes has a different
728 // IRName from the inspected Symbol. This can happen on macOS + iOS, when a
729 // symbol is referenced through its mangled name, say @"\01_symbol" while
730 // the IRName is @symbol (the prefix underscore comes from MachO mangling).
731 // In that case, we have the same actual Symbol that can get two different
732 // GUID, leading to some invalid internalization. Workaround this by marking
733 // the GlobalRes external.
734
735 // FIXME: instead of this check, it would be desirable to compute GUIDs
736 // based on mangled name, but this requires an access to the Target Triple
737 // and would be relatively invasive on the codebase.
738 if (GlobalRes.IRName != Sym.getIRName()) {
739 GlobalRes.Partition = GlobalResolution::External;
740 GlobalRes.VisibleOutsideSummary = true;
741 }
742
743 bool IsLibcall = Sym.isLibcall(Libcalls);
744
745 // Set the partition to external if we know it is re-defined by the linker
746 // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
747 // regular object, is referenced from llvm.compiler.used/llvm.used, or was
748 // already recorded as being referenced from a different partition.
749 if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
750 IsLibcall ||
751 (GlobalRes.Partition != GlobalResolution::Unknown &&
752 GlobalRes.Partition != Partition)) {
753 GlobalRes.Partition = GlobalResolution::External;
754 } else
755 // First recorded reference, save the current partition.
756 GlobalRes.Partition = Partition;
757
758 // Flag as visible outside of summary if visible from a regular object or
759 // from a module that does not have a summary.
760 GlobalRes.VisibleOutsideSummary |=
761 (Res.VisibleToRegularObj || Sym.isUsed() || IsLibcall || !InSummary);
762
763 GlobalRes.ExportDynamic |= Res.ExportDynamic;
764 }
765}
766
767void LTO::releaseGlobalResolutionsMemory() {
768 // Release GlobalResolutions dense-map itself.
769 GlobalResolutions.reset();
770 // Release the string saver memory.
771 GlobalResolutionSymbolSaver.reset();
772 Alloc.reset();
773}
774
775static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
776 ArrayRef<SymbolResolution> Res) {
777 StringRef Path = Input->getName();
778 OS << Path << '\n';
779 auto ResI = Res.begin();
780 for (const InputFile::Symbol &Sym : Input->symbols()) {
781 assert(ResI != Res.end());
782 SymbolResolution Res = *ResI++;
783
784 OS << "-r=" << Path << ',' << Sym.getName() << ',';
785 if (Res.Prevailing)
786 OS << 'p';
787 if (Res.FinalDefinitionInLinkageUnit)
788 OS << 'l';
789 if (Res.VisibleToRegularObj)
790 OS << 'x';
791 if (Res.LinkerRedefined)
792 OS << 'r';
793 OS << '\n';
794 }
795 OS.flush();
796 assert(ResI == Res.end());
797}
798
799Error LTO::add(std::unique_ptr<InputFile> InputPtr,
800 ArrayRef<SymbolResolution> Res) {
801 llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
802 assert(!CalledGetMaxTasks);
803
804 Expected<std::shared_ptr<InputFile>> InputOrErr =
805 addInput(InputPtr: std::move(InputPtr));
806 if (!InputOrErr)
807 return InputOrErr.takeError();
808 InputFile *Input = (*InputOrErr).get();
809
810 if (Conf.ResolutionFile)
811 writeToResolutionFile(OS&: *Conf.ResolutionFile, Input, Res);
812
813 if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
814 Triple InputTriple(Input->getTargetTriple());
815 RegularLTO.CombinedModule->setTargetTriple(InputTriple);
816 if (InputTriple.isOSBinFormatELF())
817 Conf.VisibilityScheme = Config::ELF;
818 }
819
820 ArrayRef<SymbolResolution> InputRes = Res;
821 for (unsigned I = 0; I != Input->Mods.size(); ++I) {
822 if (auto Err = addModule(Input&: *Input, InputRes, ModI: I, Res).moveInto(Value&: Res))
823 return Err;
824 }
825
826 assert(Res.empty());
827 return Error::success();
828}
829
830Expected<ArrayRef<SymbolResolution>>
831LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
832 unsigned ModI, ArrayRef<SymbolResolution> Res) {
833 llvm::TimeTraceScope timeScope("LTO add module", Input.getName());
834 Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
835 if (!LTOInfo)
836 return LTOInfo.takeError();
837
838 if (EnableSplitLTOUnit) {
839 // If only some modules were split, flag this in the index so that
840 // we can skip or error on optimizations that need consistently split
841 // modules (whole program devirt and lower type tests).
842 if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
843 ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
844 } else
845 EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
846
847 BitcodeModule BM = Input.Mods[ModI];
848
849 if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
850 !LTOInfo->UnifiedLTO)
851 return make_error<StringError>(
852 Args: "unified LTO compilation must use "
853 "compatible bitcode modules (use -funified-lto)",
854 Args: inconvertibleErrorCode());
855
856 if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
857 LTOMode = LTOK_UnifiedThin;
858
859 bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
860 // If any of the modules inside of a input bitcode file was compiled with
861 // ThinLTO, we assume that the whole input file also was compiled with
862 // ThinLTO.
863 Input.IsThinLTO |= IsThinLTO;
864
865 auto ModSyms = Input.module_symbols(I: ModI);
866 addModuleToGlobalRes(Syms: ModSyms, Res,
867 Partition: IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
868 InSummary: LTOInfo->HasSummary, TT: Triple(Input.getTargetTriple()));
869
870 if (IsThinLTO)
871 return addThinLTO(BM, Syms: ModSyms, Res);
872
873 RegularLTO.EmptyCombinedModule = false;
874 auto ModOrErr = addRegularLTO(Input, InputRes, BM, Syms: ModSyms, Res);
875 if (!ModOrErr)
876 return ModOrErr.takeError();
877 Res = ModOrErr->second;
878
879 if (!LTOInfo->HasSummary) {
880 if (Error Err = linkRegularLTO(Mod: std::move(ModOrErr->first),
881 /*LivenessFromIndex=*/false))
882 return Err;
883 return Res;
884 }
885
886 // Regular LTO module summaries are added to a dummy module that represents
887 // the combined regular LTO module.
888 if (Error Err = BM.readSummary(CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: ""))
889 return Err;
890 RegularLTO.ModsWithSummaries.push_back(x: std::move(ModOrErr->first));
891 return Res;
892}
893
894// Checks whether the given global value is in a non-prevailing comdat
895// (comdat containing values the linker indicated were not prevailing,
896// which we then dropped to available_externally), and if so, removes
897// it from the comdat. This is called for all global values to ensure the
898// comdat is empty rather than leaving an incomplete comdat. It is needed for
899// regular LTO modules, in case we are in a mixed-LTO mode (both regular
900// and thin LTO modules) compilation. Since the regular LTO module will be
901// linked first in the final native link, we want to make sure the linker
902// doesn't select any of these incomplete comdats that would be left
903// in the regular LTO module without this cleanup.
904static void
905handleNonPrevailingComdat(GlobalValue &GV,
906 std::set<const Comdat *> &NonPrevailingComdats) {
907 Comdat *C = GV.getComdat();
908 if (!C)
909 return;
910
911 if (!NonPrevailingComdats.count(x: C))
912 return;
913
914 // Additionally need to drop all global values from the comdat to
915 // available_externally, to satisfy the COMDAT requirement that all members
916 // are discarded as a unit. The non-local linkage global values avoid
917 // duplicate definition linker errors.
918 GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
919
920 if (auto GO = dyn_cast<GlobalObject>(Val: &GV))
921 GO->setComdat(nullptr);
922}
923
924// Add a regular LTO object to the link.
925// The resulting module needs to be linked into the combined LTO module with
926// linkRegularLTO.
927Expected<
928 std::pair<LTO::RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>>
929LTO::addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
930 BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
931 ArrayRef<SymbolResolution> Res) {
932 llvm::TimeTraceScope timeScope("LTO add regular LTO");
933 RegularLTOState::AddedModule Mod;
934 Expected<std::unique_ptr<Module>> MOrErr =
935 BM.getLazyModule(Context&: RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
936 /*IsImporting*/ false);
937 if (!MOrErr)
938 return MOrErr.takeError();
939 Module &M = **MOrErr;
940 Mod.M = std::move(*MOrErr);
941
942 if (Error Err = M.materializeMetadata())
943 return std::move(Err);
944
945 if (LTOMode == LTOK_UnifiedRegular) {
946 // cfi.functions metadata is intended to be used with ThinLTO and may
947 // trigger invalid IR transformations if they are present when doing regular
948 // LTO, so delete it.
949 if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions"))
950 M.eraseNamedMetadata(NMD: CfiFunctionsMD);
951 } else if (NamedMDNode *AliasesMD = M.getNamedMetadata(Name: "aliases")) {
952 // Delete aliases entries for non-prevailing symbols on the ThinLTO side of
953 // this input file.
954 DenseSet<StringRef> Prevailing;
955 for (auto [I, R] : zip(t: Input.symbols(), u&: InputRes))
956 if (R.Prevailing && !I.getIRName().empty())
957 Prevailing.insert(V: I.getIRName());
958 std::vector<MDNode *> AliasGroups;
959 for (MDNode *AliasGroup : AliasesMD->operands()) {
960 std::vector<Metadata *> Aliases;
961 for (Metadata *Alias : AliasGroup->operands()) {
962 if (isa<MDString>(Val: Alias) &&
963 Prevailing.count(V: cast<MDString>(Val: Alias)->getString()))
964 Aliases.push_back(x: Alias);
965 }
966 if (Aliases.size() > 1)
967 AliasGroups.push_back(x: MDTuple::get(Context&: RegularLTO.Ctx, MDs: Aliases));
968 }
969 AliasesMD->clearOperands();
970 for (MDNode *G : AliasGroups)
971 AliasesMD->addOperand(M: G);
972 }
973
974 UpgradeDebugInfo(M);
975
976 ModuleSymbolTable SymTab;
977 SymTab.addModule(M: &M);
978
979 for (GlobalVariable &GV : M.globals())
980 if (GV.hasAppendingLinkage())
981 Mod.Keep.push_back(x: &GV);
982
983 DenseSet<GlobalObject *> AliasedGlobals;
984 for (auto &GA : M.aliases())
985 if (GlobalObject *GO = GA.getAliaseeObject())
986 AliasedGlobals.insert(V: GO);
987
988 // In this function we need IR GlobalValues matching the symbols in Syms
989 // (which is not backed by a module), so we need to enumerate them in the same
990 // order. The symbol enumeration order of a ModuleSymbolTable intentionally
991 // matches the order of an irsymtab, but when we read the irsymtab in
992 // InputFile::create we omit some symbols that are irrelevant to LTO. The
993 // Skip() function skips the same symbols from the module as InputFile does
994 // from the symbol table.
995 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
996 auto Skip = [&]() {
997 while (MsymI != MsymE) {
998 auto Flags = SymTab.getSymbolFlags(S: *MsymI);
999 if ((Flags & object::BasicSymbolRef::SF_Global) &&
1000 !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
1001 return;
1002 ++MsymI;
1003 }
1004 };
1005 Skip();
1006
1007 std::set<const Comdat *> NonPrevailingComdats;
1008 SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
1009 for (const InputFile::Symbol &Sym : Syms) {
1010 assert(!Res.empty());
1011 const SymbolResolution &R = Res.consume_front();
1012
1013 assert(MsymI != MsymE);
1014 ModuleSymbolTable::Symbol Msym = *MsymI++;
1015 Skip();
1016
1017 if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym)) {
1018 if (R.Prevailing) {
1019 if (Sym.isUndefined())
1020 continue;
1021 Mod.Keep.push_back(x: GV);
1022 // For symbols re-defined with linker -wrap and -defsym options,
1023 // set the linkage to weak to inhibit IPO. The linkage will be
1024 // restored by the linker.
1025 if (R.LinkerRedefined)
1026 GV->setLinkage(GlobalValue::WeakAnyLinkage);
1027
1028 GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
1029 if (GlobalValue::isLinkOnceLinkage(Linkage: OriginalLinkage))
1030 GV->setLinkage(GlobalValue::getWeakLinkage(
1031 ODR: GlobalValue::isLinkOnceODRLinkage(Linkage: OriginalLinkage)));
1032 } else if (isa<GlobalObject>(Val: GV) &&
1033 (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
1034 GV->hasAvailableExternallyLinkage()) &&
1035 !AliasedGlobals.count(V: cast<GlobalObject>(Val: GV))) {
1036 // Any of the above three types of linkage indicates that the
1037 // chosen prevailing symbol will have the same semantics as this copy of
1038 // the symbol, so we may be able to link it with available_externally
1039 // linkage. We will decide later whether to do that when we link this
1040 // module (in linkRegularLTO), based on whether it is undefined.
1041 Mod.Keep.push_back(x: GV);
1042 GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
1043 if (GV->hasComdat())
1044 NonPrevailingComdats.insert(x: GV->getComdat());
1045 cast<GlobalObject>(Val: GV)->setComdat(nullptr);
1046 }
1047
1048 // Set the 'local' flag based on the linker resolution for this symbol.
1049 if (R.FinalDefinitionInLinkageUnit) {
1050 GV->setDSOLocal(true);
1051 if (GV->hasDLLImportStorageClass())
1052 GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
1053 DefaultStorageClass);
1054 }
1055 } else if (auto *AS =
1056 dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Val&: Msym)) {
1057 // Collect non-prevailing symbols.
1058 if (!R.Prevailing)
1059 NonPrevailingAsmSymbols.insert(V: AS->first);
1060 } else {
1061 llvm_unreachable("unknown symbol type");
1062 }
1063
1064 // Common resolution: collect the maximum size/alignment over all commons.
1065 // We also record if we see an instance of a common as prevailing, so that
1066 // if none is prevailing we can ignore it later.
1067 if (Sym.isCommon()) {
1068 // FIXME: We should figure out what to do about commons defined by asm.
1069 // For now they aren't reported correctly by ModuleSymbolTable.
1070 auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
1071 CommonRes.Size = std::max(a: CommonRes.Size, b: Sym.getCommonSize());
1072 if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
1073 CommonRes.Alignment =
1074 std::max(a: Align(SymAlignValue), b: CommonRes.Alignment);
1075 }
1076 CommonRes.Prevailing |= R.Prevailing;
1077 }
1078 }
1079
1080 if (!M.getComdatSymbolTable().empty())
1081 for (GlobalValue &GV : M.global_values())
1082 handleNonPrevailingComdat(GV, NonPrevailingComdats);
1083
1084 // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
1085 // block.
1086 if (!M.getModuleInlineAsm().empty()) {
1087 std::string NewIA = ".lto_discard";
1088 if (!NonPrevailingAsmSymbols.empty()) {
1089 // Don't dicard a symbol if there is a live .symver for it.
1090 ModuleSymbolTable::CollectAsmSymvers(
1091 M, AsmSymver: [&](StringRef Name, StringRef Alias) {
1092 if (!NonPrevailingAsmSymbols.count(V: Alias))
1093 NonPrevailingAsmSymbols.erase(V: Name);
1094 });
1095 NewIA += " " + llvm::join(R&: NonPrevailingAsmSymbols, Separator: ", ");
1096 }
1097 NewIA += "\n";
1098 M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
1099 }
1100
1101 assert(MsymI == MsymE);
1102 return std::make_pair(x: std::move(Mod), y&: Res);
1103}
1104
1105Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
1106 bool LivenessFromIndex) {
1107 llvm::TimeTraceScope timeScope("LTO link regular LTO");
1108 std::vector<GlobalValue *> Keep;
1109 for (GlobalValue *GV : Mod.Keep) {
1110 if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GUID: GV->getGUID())) {
1111 if (Function *F = dyn_cast<Function>(Val: GV)) {
1112 if (DiagnosticOutputFile) {
1113 if (Error Err = F->materialize())
1114 return Err;
1115 auto R = OptimizationRemark(DEBUG_TYPE, "deadfunction", F);
1116 R << ore::NV("Function", F) << " not added to the combined module ";
1117 emitRemark(Remark&: R);
1118 }
1119 }
1120 continue;
1121 }
1122
1123 if (!GV->hasAvailableExternallyLinkage()) {
1124 Keep.push_back(x: GV);
1125 continue;
1126 }
1127
1128 // Only link available_externally definitions if we don't already have a
1129 // definition.
1130 GlobalValue *CombinedGV =
1131 RegularLTO.CombinedModule->getNamedValue(Name: GV->getName());
1132 if (CombinedGV && !CombinedGV->isDeclaration())
1133 continue;
1134
1135 Keep.push_back(x: GV);
1136 }
1137
1138 return RegularLTO.Mover->move(Src: std::move(Mod.M), ValuesToLink: Keep, AddLazyFor: nullptr,
1139 /* IsPerformingImport */ false);
1140}
1141
1142// Add a ThinLTO module to the link.
1143Expected<ArrayRef<SymbolResolution>>
1144LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
1145 ArrayRef<SymbolResolution> Res) {
1146 llvm::TimeTraceScope timeScope("LTO add thin LTO");
1147 const auto BMID = BM.getModuleIdentifier();
1148 ArrayRef<SymbolResolution> ResTmp = Res;
1149 for (const InputFile::Symbol &Sym : Syms) {
1150 assert(!ResTmp.empty());
1151 const SymbolResolution &R = ResTmp.consume_front();
1152
1153 if (!Sym.getIRName().empty() && R.Prevailing) {
1154 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1155 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1156 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1157 ThinLTO.setPrevailingModuleForGUID(GUID, Module: BMID);
1158 }
1159 }
1160
1161 if (Error Err = BM.readSummary(
1162 CombinedIndex&: ThinLTO.CombinedIndex, ModulePath: BMID, IsPrevailing: [&](GlobalValue::GUID GUID) {
1163 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: BMID);
1164 }))
1165 return Err;
1166 LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");
1167
1168 for (const InputFile::Symbol &Sym : Syms) {
1169 assert(!Res.empty());
1170 const SymbolResolution &R = Res.consume_front();
1171
1172 if (!Sym.getIRName().empty() &&
1173 (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {
1174 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1175 GlobalName: GlobalValue::getGlobalIdentifier(Name: Sym.getIRName(),
1176 Linkage: GlobalValue::ExternalLinkage, FileName: ""));
1177 if (R.Prevailing) {
1178 assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));
1179
1180 // For linker redefined symbols (via --wrap or --defsym) we want to
1181 // switch the linkage to `weak` to prevent IPOs from happening.
1182 // Find the summary in the module for this very GV and record the new
1183 // linkage so that we can switch it when we import the GV.
1184 if (R.LinkerRedefined)
1185 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID))
1186 S->setLinkage(GlobalValue::WeakAnyLinkage);
1187 }
1188
1189 // If the linker resolved the symbol to a local definition then mark it
1190 // as local in the summary for the module we are adding.
1191 if (R.FinalDefinitionInLinkageUnit) {
1192 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(ValueGUID: GUID, ModuleId: BMID)) {
1193 S->setDSOLocal(true);
1194 }
1195 }
1196 }
1197 }
1198
1199 if (!ThinLTO.ModuleMap.insert(KV: {BMID, BM}).second)
1200 return make_error<StringError>(
1201 Args: "Expected at most one ThinLTO module per bitcode file",
1202 Args: inconvertibleErrorCode());
1203
1204 if (!Conf.ThinLTOModulesToCompile.empty()) {
1205 if (!ThinLTO.ModulesToCompile)
1206 ThinLTO.ModulesToCompile = ModuleMapType();
1207 // This is a fuzzy name matching where only modules with name containing the
1208 // specified switch values are going to be compiled.
1209 for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
1210 if (BMID.contains(Other: Name)) {
1211 ThinLTO.ModulesToCompile->insert(KV: {BMID, BM});
1212 LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n");
1213 break;
1214 }
1215 }
1216 }
1217
1218 return Res;
1219}
1220
1221unsigned LTO::getMaxTasks() const {
1222 CalledGetMaxTasks = true;
1223 auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size()
1224 : ThinLTO.ModuleMap.size();
1225 return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount;
1226}
1227
1228// If only some of the modules were split, we cannot correctly handle
1229// code that contains type tests or type checked loads.
1230Error LTO::checkPartiallySplit() {
1231 if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
1232 return Error::success();
1233
1234 const Module *Combined = RegularLTO.CombinedModule.get();
1235 Function *TypeTestFunc =
1236 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_test);
1237 Function *TypeCheckedLoadFunc =
1238 Intrinsic::getDeclarationIfExists(M: Combined, id: Intrinsic::type_checked_load);
1239 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
1240 M: Combined, id: Intrinsic::type_checked_load_relative);
1241
1242 // First check if there are type tests / type checked loads in the
1243 // merged regular LTO module IR.
1244 if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
1245 (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
1246 (TypeCheckedLoadRelativeFunc &&
1247 !TypeCheckedLoadRelativeFunc->use_empty()))
1248 return make_error<StringError>(
1249 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1250 Args: inconvertibleErrorCode());
1251
1252 // Otherwise check if there are any recorded in the combined summary from the
1253 // ThinLTO modules.
1254 for (auto &P : ThinLTO.CombinedIndex) {
1255 for (auto &S : P.second.getSummaryList()) {
1256 auto *FS = dyn_cast<FunctionSummary>(Val: S.get());
1257 if (!FS)
1258 continue;
1259 if (!FS->type_test_assume_vcalls().empty() ||
1260 !FS->type_checked_load_vcalls().empty() ||
1261 !FS->type_test_assume_const_vcalls().empty() ||
1262 !FS->type_checked_load_const_vcalls().empty() ||
1263 !FS->type_tests().empty())
1264 return make_error<StringError>(
1265 Args: "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1266 Args: inconvertibleErrorCode());
1267 }
1268 }
1269 return Error::success();
1270}
1271
1272Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
1273 llvm::scope_exit CleanUp([this]() { cleanup(); });
1274
1275 if (Error EC = serializeInputsForDistribution())
1276 return EC;
1277
1278 // Compute "dead" symbols, we don't want to import/export these!
1279 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1280 DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
1281 for (auto &Res : *GlobalResolutions) {
1282 // Normally resolution have IR name of symbol. We can do nothing here
1283 // otherwise. See comments in GlobalResolution struct for more details.
1284 if (Res.second.IRName.empty())
1285 continue;
1286
1287 GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1288 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
1289
1290 if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
1291 GUIDPreservedSymbols.insert(V: GUID);
1292
1293 if (Res.second.ExportDynamic)
1294 DynamicExportSymbols.insert(V: GUID);
1295
1296 GUIDPrevailingResolutions[GUID] =
1297 Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
1298 }
1299
1300 auto isPrevailing = [&](GlobalValue::GUID G) {
1301 auto It = GUIDPrevailingResolutions.find(Val: G);
1302 if (It == GUIDPrevailingResolutions.end())
1303 return PrevailingType::Unknown;
1304 return It->second;
1305 };
1306 computeDeadSymbolsWithConstProp(Index&: ThinLTO.CombinedIndex, GUIDPreservedSymbols,
1307 isPrevailing, ImportEnabled: Conf.OptLevel > 0);
1308
1309 // Setup output file to emit statistics.
1310 auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
1311 if (!StatsFileOrErr)
1312 return StatsFileOrErr.takeError();
1313 std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
1314
1315 if (Error Err = setupOptimizationRemarks())
1316 return Err;
1317
1318 // TODO: Ideally this would be controlled automatically by detecting that we
1319 // are linking with an allocator that supports these interfaces, rather than
1320 // an internal option (which would still be needed for tests, however). For
1321 // example, if the library exported a symbol like __malloc_hot_cold the linker
1322 // could recognize that and set a flag in the lto::Config.
1323 if (SupportsHotColdNew)
1324 ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
1325
1326 Error Result = runRegularLTO(AddStream);
1327 if (!Result)
1328 // This will reset the GlobalResolutions optional once done with it to
1329 // reduce peak memory before importing.
1330 Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
1331
1332 if (StatsFile)
1333 PrintStatisticsJSON(OS&: StatsFile->os());
1334
1335 return Result;
1336}
1337
1338Error LTO::runRegularLTO(AddStreamFn AddStream) {
1339 llvm::TimeTraceScope timeScope("Run regular LTO");
1340 LLVM_DEBUG(dbgs() << "Running regular LTO\n");
1341
1342 // Finalize linking of regular LTO modules containing summaries now that
1343 // we have computed liveness information.
1344 {
1345 llvm::TimeTraceScope timeScope("Link regular LTO");
1346 for (auto &M : RegularLTO.ModsWithSummaries)
1347 if (Error Err = linkRegularLTO(Mod: std::move(M), /*LivenessFromIndex=*/true))
1348 return Err;
1349 }
1350
1351 // Ensure we don't have inconsistently split LTO units with type tests.
1352 // FIXME: this checks both LTO and ThinLTO. It happens to work as we take
1353 // this path both cases but eventually this should be split into two and
1354 // do the ThinLTO checks in `runThinLTO`.
1355 if (Error Err = checkPartiallySplit())
1356 return Err;
1357
1358 // Make sure commons have the right size/alignment: we kept the largest from
1359 // all the prevailing when adding the inputs, and we apply it here.
1360 const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
1361 for (auto &I : RegularLTO.Commons) {
1362 if (!I.second.Prevailing)
1363 // Don't do anything if no instance of this common was prevailing.
1364 continue;
1365 GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(Name: I.first);
1366 if (OldGV && OldGV->getGlobalSize(DL) == I.second.Size) {
1367 // Don't create a new global if the type is already correct, just make
1368 // sure the alignment is correct.
1369 OldGV->setAlignment(I.second.Alignment);
1370 continue;
1371 }
1372 ArrayType *Ty =
1373 ArrayType::get(ElementType: Type::getInt8Ty(C&: RegularLTO.Ctx), NumElements: I.second.Size);
1374 auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
1375 GlobalValue::CommonLinkage,
1376 ConstantAggregateZero::get(Ty), "");
1377 GV->setAlignment(I.second.Alignment);
1378 if (OldGV) {
1379 OldGV->replaceAllUsesWith(V: GV);
1380 GV->takeName(V: OldGV);
1381 OldGV->eraseFromParent();
1382 } else {
1383 GV->setName(I.first);
1384 }
1385 }
1386
1387 bool WholeProgramVisibilityEnabledInLTO =
1388 Conf.HasWholeProgramVisibility &&
1389 // If validation is enabled, upgrade visibility only when all vtables
1390 // have typeinfos.
1391 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1392
1393 // This returns true when the name is local or not defined. Locals are
1394 // expected to be handled separately.
1395 auto IsVisibleToRegularObj = [&](StringRef name) {
1396 auto It = GlobalResolutions->find(Val: name);
1397 return (It == GlobalResolutions->end() ||
1398 It->second.VisibleOutsideSummary || !It->second.Prevailing);
1399 };
1400
1401 // If allowed, upgrade public vcall visibility metadata to linkage unit
1402 // visibility before whole program devirtualization in the optimizer.
1403 updateVCallVisibilityInModule(
1404 M&: *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
1405 DynamicExportSymbols, ValidateAllVtablesHaveTypeInfos: Conf.ValidateAllVtablesHaveTypeInfos,
1406 IsVisibleToRegularObj);
1407 updatePublicTypeTestCalls(M&: *RegularLTO.CombinedModule,
1408 WholeProgramVisibilityEnabledInLTO);
1409
1410 if (Conf.PreOptModuleHook &&
1411 !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
1412 return Error::success();
1413
1414 if (!Conf.CodeGenOnly) {
1415 for (const auto &R : *GlobalResolutions) {
1416 GlobalValue *GV =
1417 RegularLTO.CombinedModule->getNamedValue(Name: R.second.IRName);
1418 if (!R.second.isPrevailingIRSymbol())
1419 continue;
1420 if (R.second.Partition != 0 &&
1421 R.second.Partition != GlobalResolution::External)
1422 continue;
1423
1424 // Ignore symbols defined in other partitions.
1425 // Also skip declarations, which are not allowed to have internal linkage.
1426 if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
1427 continue;
1428
1429 // Symbols that are marked DLLImport or DLLExport should not be
1430 // internalized, as they are either externally visible or referencing
1431 // external symbols. Symbols that have AvailableExternally or Appending
1432 // linkage might be used by future passes and should be kept as is.
1433 // These linkages are seen in Unified regular LTO, because the process
1434 // of creating split LTO units introduces symbols with that linkage into
1435 // one of the created modules. Normally, only the ThinLTO backend would
1436 // compile this module, but Unified Regular LTO processes both
1437 // modules created by the splitting process as regular LTO modules.
1438 if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
1439 ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
1440 GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
1441 continue;
1442
1443 GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
1444 : GlobalValue::UnnamedAddr::None);
1445 if (EnableLTOInternalization && R.second.Partition == 0)
1446 GV->setLinkage(GlobalValue::InternalLinkage);
1447 }
1448
1449 if (Conf.PostInternalizeModuleHook &&
1450 !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
1451 return Error::success();
1452 }
1453
1454 if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
1455 if (Error Err =
1456 backend(C: Conf, AddStream, ParallelCodeGenParallelismLevel: RegularLTO.ParallelCodeGenParallelismLevel,
1457 M&: *RegularLTO.CombinedModule, CombinedIndex&: ThinLTO.CombinedIndex))
1458 return Err;
1459 }
1460
1461 return Error::success();
1462}
1463
1464SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
1465 RTLIB::RuntimeLibcallsInfo Libcalls(TT);
1466 SmallVector<const char *> LibcallSymbols;
1467 LibcallSymbols.reserve(N: Libcalls.getNumAvailableLibcallImpls());
1468
1469 for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) {
1470 if (Libcalls.isAvailable(Impl))
1471 LibcallSymbols.push_back(Elt: Libcalls.getLibcallImplName(CallImpl: Impl).data());
1472 }
1473
1474 return LibcallSymbols;
1475}
1476
1477Error ThinBackendProc::emitFiles(
1478 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1479 const std::string &NewModulePath) const {
1480 return emitFiles(ImportList, ModulePath, NewModulePath,
1481 SummaryPath: NewModulePath + ".thinlto.bc",
1482 /*ImportsFiles=*/std::nullopt);
1483}
1484
1485Error ThinBackendProc::emitFiles(
1486 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1487 const std::string &NewModulePath, StringRef SummaryPath,
1488 std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
1489 const {
1490 ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
1491 GVSummaryPtrSet DeclarationSummaries;
1492
1493 std::error_code EC;
1494 gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
1495 ImportList, ModuleToSummariesForIndex,
1496 DecSummaries&: DeclarationSummaries);
1497
1498 raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
1499 if (EC)
1500 return createFileError(F: "cannot open " + Twine(SummaryPath), EC);
1501
1502 writeIndexToFile(Index: CombinedIndex, Out&: OS, ModuleToSummariesForIndex: &ModuleToSummariesForIndex,
1503 DecSummaries: &DeclarationSummaries);
1504
1505 if (ShouldEmitImportsFiles) {
1506 Error ImportsFilesError = EmitImportsFiles(
1507 ModulePath, OutputFilename: NewModulePath + ".imports", ModuleToSummariesForIndex);
1508 if (ImportsFilesError)
1509 return ImportsFilesError;
1510 }
1511
1512 // Optionally, store the imports files.
1513 if (ImportsFiles)
1514 processImportsFiles(
1515 ModulePath, ModuleToSummariesForIndex,
1516 F: [&](StringRef M) { ImportsFiles->get().push_back(Elt: M.str()); });
1517
1518 return Error::success();
1519}
1520
1521namespace {
1522/// Base class for ThinLTO backends that perform code generation and insert the
1523/// generated files back into the link.
1524class CGThinBackend : public ThinBackendProc {
1525protected:
1526 AddStreamFn AddStream;
1527 DenseSet<GlobalValue::GUID> CfiFunctionDefs;
1528 DenseSet<GlobalValue::GUID> CfiFunctionDecls;
1529 bool ShouldEmitIndexFiles;
1530
1531public:
1532 CGThinBackend(
1533 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1534 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1535 AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
1536 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
1537 ThreadPoolStrategy ThinLTOParallelism)
1538 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1539 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1540 AddStream(std::move(AddStream)),
1541 ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
1542 auto &Defs = CombinedIndex.cfiFunctionDefs();
1543 CfiFunctionDefs.insert_range(R: Defs.guids());
1544 auto &Decls = CombinedIndex.cfiFunctionDecls();
1545 CfiFunctionDecls.insert_range(R: Decls.guids());
1546 }
1547};
1548
1549/// This backend performs code generation by scheduling a job to run on
1550/// an in-process thread when invoked for each task.
1551class InProcessThinBackend : public CGThinBackend {
1552protected:
1553 FileCache Cache;
1554
1555public:
1556 InProcessThinBackend(
1557 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1558 ThreadPoolStrategy ThinLTOParallelism,
1559 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1560 AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
1561 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
1562 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1563 AddStream, OnWrite, ShouldEmitIndexFiles,
1564 ShouldEmitImportsFiles, ThinLTOParallelism),
1565 Cache(std::move(Cache)) {}
1566
1567 virtual Error runThinLTOBackendThread(
1568 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1569 ModuleSummaryIndex &CombinedIndex,
1570 const FunctionImporter::ImportMapTy &ImportList,
1571 const FunctionImporter::ExportSetTy &ExportList,
1572 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1573 const GVSummaryMapTy &DefinedGlobals,
1574 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1575 auto ModuleID = BM.getModuleIdentifier();
1576 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (in-process)",
1577 ModuleID);
1578 auto RunThinBackend = [&](AddStreamFn AddStream) {
1579 LTOLLVMContext BackendContext(Conf);
1580 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1581 if (!MOrErr)
1582 return MOrErr.takeError();
1583
1584 return thinBackend(C: Conf, Task, AddStream, M&: **MOrErr, CombinedIndex,
1585 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1586 CodeGenOnly: Conf.CodeGenOnly);
1587 };
1588 if (ShouldEmitIndexFiles) {
1589 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1590 return E;
1591 }
1592
1593 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1594 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1595 P: [](uint32_t V) { return V == 0; }))
1596 // Cache disabled or no entry for this module in the combined index or
1597 // no module hash.
1598 return RunThinBackend(AddStream);
1599
1600 // The module may be cached, this helps handling it.
1601 std::string Key = computeLTOCacheKey(
1602 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1603 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1604 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1605 if (Error Err = CacheAddStreamOrErr.takeError())
1606 return Err;
1607 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1608 if (CacheAddStream)
1609 return RunThinBackend(CacheAddStream);
1610
1611 return Error::success();
1612 }
1613
1614 Error start(
1615 unsigned Task, BitcodeModule BM,
1616 const FunctionImporter::ImportMapTy &ImportList,
1617 const FunctionImporter::ExportSetTy &ExportList,
1618 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1619 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1620 StringRef ModulePath = BM.getModuleIdentifier();
1621 assert(ModuleToDefinedGVSummaries.count(ModulePath));
1622 const GVSummaryMapTy &DefinedGlobals =
1623 ModuleToDefinedGVSummaries.find(Val: ModulePath)->second;
1624 BackendThreadPool.async(
1625 F: [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1626 const FunctionImporter::ImportMapTy &ImportList,
1627 const FunctionImporter::ExportSetTy &ExportList,
1628 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
1629 &ResolvedODR,
1630 const GVSummaryMapTy &DefinedGlobals,
1631 MapVector<StringRef, BitcodeModule> &ModuleMap) {
1632 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1633 timeTraceProfilerInitialize(TimeTraceGranularity: Conf.TimeTraceGranularity,
1634 ProcName: "thin backend");
1635 Error E = runThinLTOBackendThread(
1636 AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
1637 ResolvedODR, DefinedGlobals, ModuleMap);
1638 if (E) {
1639 std::unique_lock<std::mutex> L(ErrMu);
1640 if (Err)
1641 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1642 else
1643 Err = std::move(E);
1644 }
1645 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1646 timeTraceProfilerFinishThread();
1647 },
1648 ArgList&: BM, ArgList: std::ref(t&: CombinedIndex), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
1649 ArgList: std::ref(t: ResolvedODR), ArgList: std::ref(t: DefinedGlobals), ArgList: std::ref(t&: ModuleMap));
1650
1651 if (OnWrite)
1652 OnWrite(std::string(ModulePath));
1653 return Error::success();
1654 }
1655};
1656
1657/// This backend is utilized in the first round of a two-codegen round process.
1658/// It first saves optimized bitcode files to disk before the codegen process
1659/// begins. After codegen, it stores the resulting object files in a scratch
1660/// buffer. Note the codegen data stored in the scratch buffer will be extracted
1661/// and merged in the subsequent step.
1662class FirstRoundThinBackend : public InProcessThinBackend {
1663 AddStreamFn IRAddStream;
1664 FileCache IRCache;
1665
1666public:
1667 FirstRoundThinBackend(
1668 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1669 ThreadPoolStrategy ThinLTOParallelism,
1670 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1671 AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream,
1672 FileCache IRCache)
1673 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1674 ModuleToDefinedGVSummaries, std::move(CGAddStream),
1675 std::move(CGCache), /*OnWrite=*/nullptr,
1676 /*ShouldEmitIndexFiles=*/false,
1677 /*ShouldEmitImportsFiles=*/false),
1678 IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {}
1679
1680 Error runThinLTOBackendThread(
1681 AddStreamFn CGAddStream, FileCache CGCache, unsigned Task,
1682 BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1683 const FunctionImporter::ImportMapTy &ImportList,
1684 const FunctionImporter::ExportSetTy &ExportList,
1685 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1686 const GVSummaryMapTy &DefinedGlobals,
1687 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1688 auto ModuleID = BM.getModuleIdentifier();
1689 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (first round)",
1690 ModuleID);
1691 auto RunThinBackend = [&](AddStreamFn CGAddStream,
1692 AddStreamFn IRAddStream) {
1693 LTOLLVMContext BackendContext(Conf);
1694 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(Context&: BackendContext);
1695 if (!MOrErr)
1696 return MOrErr.takeError();
1697
1698 return thinBackend(C: Conf, Task, AddStream: CGAddStream, M&: **MOrErr, CombinedIndex,
1699 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1700 CodeGenOnly: Conf.CodeGenOnly, IRAddStream);
1701 };
1702 // Like InProcessThinBackend, we produce index files as needed for
1703 // FirstRoundThinBackend. However, these files are not generated for
1704 // SecondRoundThinBackend.
1705 if (ShouldEmitIndexFiles) {
1706 if (auto E = emitFiles(ImportList, ModulePath: ModuleID, NewModulePath: ModuleID.str()))
1707 return E;
1708 }
1709
1710 assert((CGCache.isValid() == IRCache.isValid()) &&
1711 "Both caches for CG and IR should have matching availability");
1712 if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1713 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1714 P: [](uint32_t V) { return V == 0; }))
1715 // Cache disabled or no entry for this module in the combined index or
1716 // no module hash.
1717 return RunThinBackend(CGAddStream, IRAddStream);
1718
1719 // Get CGKey for caching object in CGCache.
1720 std::string CGKey = computeLTOCacheKey(
1721 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1722 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1723 Expected<AddStreamFn> CacheCGAddStreamOrErr =
1724 CGCache(Task, CGKey, ModuleID);
1725 if (Error Err = CacheCGAddStreamOrErr.takeError())
1726 return Err;
1727 AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr;
1728
1729 // Get IRKey for caching (optimized) IR in IRCache with an extra ID.
1730 std::string IRKey = recomputeLTOCacheKey(Key: CGKey, /*ExtraID=*/"IR");
1731 Expected<AddStreamFn> CacheIRAddStreamOrErr =
1732 IRCache(Task, IRKey, ModuleID);
1733 if (Error Err = CacheIRAddStreamOrErr.takeError())
1734 return Err;
1735 AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr;
1736
1737 // Ideally, both CG and IR caching should be synchronized. However, in
1738 // practice, their availability may differ due to different expiration
1739 // times. Therefore, if either cache is missing, the backend process is
1740 // triggered.
1741 if (CacheCGAddStream || CacheIRAddStream) {
1742 LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for "
1743 << BM.getModuleIdentifier() << "\n");
1744 return RunThinBackend(CacheCGAddStream ? CacheCGAddStream : CGAddStream,
1745 CacheIRAddStream ? CacheIRAddStream : IRAddStream);
1746 }
1747
1748 return Error::success();
1749 }
1750};
1751
1752/// This backend operates in the second round of a two-codegen round process.
1753/// It starts by reading the optimized bitcode files that were saved during the
1754/// first round. The backend then executes the codegen only to further optimize
1755/// the code, utilizing the codegen data merged from the first round. Finally,
1756/// it writes the resulting object files as usual.
1757class SecondRoundThinBackend : public InProcessThinBackend {
1758 std::unique_ptr<SmallVector<StringRef>> IRFiles;
1759 stable_hash CombinedCGDataHash;
1760
1761public:
1762 SecondRoundThinBackend(
1763 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1764 ThreadPoolStrategy ThinLTOParallelism,
1765 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1766 AddStreamFn AddStream, FileCache Cache,
1767 std::unique_ptr<SmallVector<StringRef>> IRFiles,
1768 stable_hash CombinedCGDataHash)
1769 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1770 ModuleToDefinedGVSummaries, std::move(AddStream),
1771 std::move(Cache),
1772 /*OnWrite=*/nullptr,
1773 /*ShouldEmitIndexFiles=*/false,
1774 /*ShouldEmitImportsFiles=*/false),
1775 IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {}
1776
1777 Error runThinLTOBackendThread(
1778 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1779 ModuleSummaryIndex &CombinedIndex,
1780 const FunctionImporter::ImportMapTy &ImportList,
1781 const FunctionImporter::ExportSetTy &ExportList,
1782 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1783 const GVSummaryMapTy &DefinedGlobals,
1784 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1785 auto ModuleID = BM.getModuleIdentifier();
1786 llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (second round)",
1787 ModuleID);
1788 auto RunThinBackend = [&](AddStreamFn AddStream) {
1789 LTOLLVMContext BackendContext(Conf);
1790 std::unique_ptr<Module> LoadedModule =
1791 cgdata::loadModuleForTwoRounds(OrigModule&: BM, Task, Context&: BackendContext, IRFiles: *IRFiles);
1792
1793 return thinBackend(C: Conf, Task, AddStream, M&: *LoadedModule, CombinedIndex,
1794 ImportList, DefinedGlobals, ModuleMap: &ModuleMap,
1795 /*CodeGenOnly=*/true);
1796 };
1797 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: ModuleID) ||
1798 all_of(Range: CombinedIndex.getModuleHash(ModPath: ModuleID),
1799 P: [](uint32_t V) { return V == 0; }))
1800 // Cache disabled or no entry for this module in the combined index or
1801 // no module hash.
1802 return RunThinBackend(AddStream);
1803
1804 // Get Key for caching the final object file in Cache with the combined
1805 // CGData hash.
1806 std::string Key = computeLTOCacheKey(
1807 Conf, Index: CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1808 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1809 Key = recomputeLTOCacheKey(Key,
1810 /*ExtraID=*/std::to_string(val: CombinedCGDataHash));
1811 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1812 if (Error Err = CacheAddStreamOrErr.takeError())
1813 return Err;
1814 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1815
1816 if (CacheAddStream) {
1817 LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for "
1818 << BM.getModuleIdentifier() << "\n");
1819 return RunThinBackend(CacheAddStream);
1820 }
1821
1822 return Error::success();
1823 }
1824};
1825} // end anonymous namespace
1826
1827ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
1828 lto::IndexWriteCallback OnWrite,
1829 bool ShouldEmitIndexFiles,
1830 bool ShouldEmitImportsFiles) {
1831 auto Func =
1832 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1833 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1834 AddStreamFn AddStream, FileCache Cache) {
1835 return std::make_unique<InProcessThinBackend>(
1836 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1837 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
1838 args: ShouldEmitImportsFiles);
1839 };
1840 return ThinBackend(Func, Parallelism);
1841}
1842
1843StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
1844 if (!TheTriple.isOSDarwin())
1845 return "";
1846 if (TheTriple.getArch() == Triple::x86_64)
1847 return "core2";
1848 if (TheTriple.getArch() == Triple::x86)
1849 return "yonah";
1850 if (TheTriple.isArm64e())
1851 return "apple-a12";
1852 if (TheTriple.getArch() == Triple::aarch64 ||
1853 TheTriple.getArch() == Triple::aarch64_32)
1854 return "cyclone";
1855 return "";
1856}
1857
1858// Given the original \p Path to an output file, replace any path
1859// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
1860// resulting directory if it does not yet exist.
1861std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
1862 StringRef NewPrefix) {
1863 if (OldPrefix.empty() && NewPrefix.empty())
1864 return std::string(Path);
1865 SmallString<128> NewPath(Path);
1866 llvm::sys::path::replace_path_prefix(Path&: NewPath, OldPrefix, NewPrefix);
1867 StringRef ParentPath = llvm::sys::path::parent_path(path: NewPath.str());
1868 if (!ParentPath.empty()) {
1869 // Make sure the new directory exists, creating it if necessary.
1870 if (std::error_code EC = llvm::sys::fs::create_directories(path: ParentPath))
1871 llvm::errs() << "warning: could not create directory '" << ParentPath
1872 << "': " << EC.message() << '\n';
1873 }
1874 return std::string(NewPath);
1875}
1876
1877namespace {
1878class WriteIndexesThinBackend : public ThinBackendProc {
1879 std::string OldPrefix, NewPrefix, NativeObjectPrefix;
1880 raw_fd_ostream *LinkedObjectsFile;
1881
1882public:
1883 WriteIndexesThinBackend(
1884 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1885 ThreadPoolStrategy ThinLTOParallelism,
1886 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1887 std::string OldPrefix, std::string NewPrefix,
1888 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1889 raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
1890 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1891 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1892 OldPrefix(OldPrefix), NewPrefix(NewPrefix),
1893 NativeObjectPrefix(NativeObjectPrefix),
1894 LinkedObjectsFile(LinkedObjectsFile) {}
1895
1896 Error start(
1897 unsigned Task, BitcodeModule BM,
1898 const FunctionImporter::ImportMapTy &ImportList,
1899 const FunctionImporter::ExportSetTy &ExportList,
1900 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1901 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1902 StringRef ModulePath = BM.getModuleIdentifier();
1903
1904 // The contents of this file may be used as input to a native link, and must
1905 // therefore contain the processed modules in a determinstic order that
1906 // match the order they are provided on the command line. For that reason,
1907 // we cannot include this in the asynchronously executed lambda below.
1908 if (LinkedObjectsFile) {
1909 std::string ObjectPrefix =
1910 NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
1911 std::string LinkedObjectsFilePath =
1912 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix: ObjectPrefix);
1913 *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
1914 }
1915
1916 BackendThreadPool.async(
1917 F: [this](const StringRef ModulePath,
1918 const FunctionImporter::ImportMapTy &ImportList,
1919 const std::string &OldPrefix, const std::string &NewPrefix) {
1920 std::string NewModulePath =
1921 getThinLTOOutputFile(Path: ModulePath, OldPrefix, NewPrefix);
1922 auto E = emitFiles(ImportList, ModulePath, NewModulePath);
1923 if (E) {
1924 std::unique_lock<std::mutex> L(ErrMu);
1925 if (Err)
1926 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
1927 else
1928 Err = std::move(E);
1929 return;
1930 }
1931 },
1932 ArgList&: ModulePath, ArgList: ImportList, ArgList&: OldPrefix, ArgList&: NewPrefix);
1933
1934 if (OnWrite)
1935 OnWrite(std::string(ModulePath));
1936 return Error::success();
1937 }
1938
1939 bool isSensitiveToInputOrder() override {
1940 // The order which modules are written to LinkedObjectsFile should be
1941 // deterministic and match the order they are passed on the command line.
1942 return true;
1943 }
1944};
1945} // end anonymous namespace
1946
1947ThinBackend lto::createWriteIndexesThinBackend(
1948 ThreadPoolStrategy Parallelism, std::string OldPrefix,
1949 std::string NewPrefix, std::string NativeObjectPrefix,
1950 bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
1951 IndexWriteCallback OnWrite) {
1952 auto Func =
1953 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1954 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1955 AddStreamFn AddStream, FileCache Cache) {
1956 return std::make_unique<WriteIndexesThinBackend>(
1957 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
1958 args: OldPrefix, args: NewPrefix, args: NativeObjectPrefix, args: ShouldEmitImportsFiles,
1959 args: LinkedObjectsFile, args: OnWrite);
1960 };
1961 return ThinBackend(Func, Parallelism);
1962}
1963
1964Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
1965 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1966 llvm::TimeTraceScope timeScope("Run ThinLTO");
1967 LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
1968 ThinLTO.CombinedIndex.releaseTemporaryMemory();
1969 timeTraceProfilerBegin(Name: "ThinLink", Detail: StringRef(""));
1970 llvm::scope_exit TimeTraceScopeExit([]() {
1971 if (llvm::timeTraceProfilerEnabled())
1972 llvm::timeTraceProfilerEnd();
1973 });
1974 if (ThinLTO.ModuleMap.empty())
1975 return Error::success();
1976
1977 if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) {
1978 llvm::errs() << "warning: [ThinLTO] No module compiled\n";
1979 return Error::success();
1980 }
1981
1982 if (Conf.CombinedIndexHook &&
1983 !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
1984 return Error::success();
1985
1986 // Collect for each module the list of function it defines (GUID ->
1987 // Summary).
1988 DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
1989 ThinLTO.ModuleMap.size());
1990 ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
1991 ModuleToDefinedGVSummaries);
1992 // Create entries for any modules that didn't have any GV summaries
1993 // (either they didn't have any GVs to start with, or we suppressed
1994 // generation of the summaries because they e.g. had inline assembly
1995 // uses that couldn't be promoted/renamed on export). This is so
1996 // InProcessThinBackend::start can still launch a backend thread, which
1997 // is passed the map of summaries for the module, without any special
1998 // handling for this case.
1999 for (auto &Mod : ThinLTO.ModuleMap)
2000 if (!ModuleToDefinedGVSummaries.count(Val: Mod.first))
2001 ModuleToDefinedGVSummaries.try_emplace(Key: Mod.first);
2002
2003 FunctionImporter::ImportListsTy ImportLists(ThinLTO.ModuleMap.size());
2004 DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
2005 ThinLTO.ModuleMap.size());
2006 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
2007
2008 if (DumpThinCGSCCs)
2009 ThinLTO.CombinedIndex.dumpSCCs(OS&: outs());
2010
2011 std::set<GlobalValue::GUID> ExportedGUIDs;
2012
2013 bool WholeProgramVisibilityEnabledInLTO =
2014 Conf.HasWholeProgramVisibility &&
2015 // If validation is enabled, upgrade visibility only when all vtables
2016 // have typeinfos.
2017 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
2018 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
2019 ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
2020
2021 // If we're validating, get the vtable symbols that should not be
2022 // upgraded because they correspond to typeIDs outside of index-based
2023 // WPD info.
2024 DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
2025 if (WholeProgramVisibilityEnabledInLTO &&
2026 Conf.ValidateAllVtablesHaveTypeInfos) {
2027 // This returns true when the name is local or not defined. Locals are
2028 // expected to be handled separately.
2029 auto IsVisibleToRegularObj = [&](StringRef name) {
2030 auto It = GlobalResolutions->find(Val: name);
2031 return (It == GlobalResolutions->end() ||
2032 It->second.VisibleOutsideSummary || !It->second.Prevailing);
2033 };
2034
2035 getVisibleToRegularObjVtableGUIDs(Index&: ThinLTO.CombinedIndex,
2036 VisibleToRegularObjSymbols,
2037 IsVisibleToRegularObj);
2038 }
2039
2040 // If allowed, upgrade public vcall visibility to linkage unit visibility in
2041 // the summaries before whole program devirtualization below.
2042 updateVCallVisibilityInIndex(
2043 Index&: ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
2044 DynamicExportSymbols, VisibleToRegularObjSymbols);
2045
2046 // Perform index-based WPD. This will return immediately if there are
2047 // no index entries in the typeIdMetadata map (e.g. if we are instead
2048 // performing IR-based WPD in hybrid regular/thin LTO mode).
2049 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
2050 runWholeProgramDevirtOnIndex(Summary&: ThinLTO.CombinedIndex, ExportedGUIDs,
2051 LocalWPDTargetsMap);
2052
2053 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
2054 return ThinLTO.isPrevailingModuleForGUID(GUID, Module: S->modulePath());
2055 };
2056 if (EnableMemProfContextDisambiguation) {
2057 MemProfContextDisambiguation ContextDisambiguation;
2058 ContextDisambiguation.run(
2059 Index&: ThinLTO.CombinedIndex, isPrevailing,
2060 EmitRemark: [&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
2061 auto R = OptimizationRemark(PassName.data(), RemarkName,
2062 LinkerRemarkFunction);
2063 R << Msg.str();
2064 emitRemark(Remark&: R);
2065 });
2066 }
2067
2068 // Figure out which symbols need to be internalized. This also needs to happen
2069 // at -O0 because summary-based DCE is implemented using internalization, and
2070 // we must apply DCE consistently with the full LTO module in order to avoid
2071 // undefined references during the final link.
2072 for (auto &Res : *GlobalResolutions) {
2073 // If the symbol does not have external references or it is not prevailing,
2074 // then not need to mark it as exported from a ThinLTO partition.
2075 if (Res.second.Partition != GlobalResolution::External ||
2076 !Res.second.isPrevailingIRSymbol())
2077 continue;
2078 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
2079 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: Res.second.IRName));
2080 // Mark exported unless index-based analysis determined it to be dead.
2081 if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
2082 ExportedGUIDs.insert(x: GUID);
2083 }
2084
2085 // Reset the GlobalResolutions to deallocate the associated memory, as there
2086 // are no further accesses. We specifically want to do this before computing
2087 // cross module importing, which adds to peak memory via the computed import
2088 // and export lists.
2089 releaseGlobalResolutionsMemory();
2090
2091 if (Conf.OptLevel > 0)
2092 ComputeCrossModuleImport(Index: ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2093 isPrevailing, ImportLists, ExportLists);
2094
2095 // Any functions referenced by the jump table in the regular LTO object must
2096 // be exported.
2097 auto &Defs = ThinLTO.CombinedIndex.cfiFunctionDefs();
2098 ExportedGUIDs.insert(first: Defs.guid_begin(), last: Defs.guid_end());
2099 auto &Decls = ThinLTO.CombinedIndex.cfiFunctionDecls();
2100 ExportedGUIDs.insert(first: Decls.guid_begin(), last: Decls.guid_end());
2101
2102 auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
2103 const auto &ExportList = ExportLists.find(Val: ModuleIdentifier);
2104 return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) ||
2105 ExportedGUIDs.count(x: VI.getGUID());
2106 };
2107
2108 // Update local devirtualized targets that were exported by cross-module
2109 // importing or by other devirtualizations marked in the ExportedGUIDs set.
2110 updateIndexWPDForExports(Summary&: ThinLTO.CombinedIndex, isExported,
2111 LocalWPDTargetsMap);
2112
2113 thinLTOInternalizeAndPromoteInIndex(Index&: ThinLTO.CombinedIndex, isExported,
2114 isPrevailing);
2115
2116 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
2117 GlobalValue::GUID GUID,
2118 GlobalValue::LinkageTypes NewLinkage) {
2119 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
2120 };
2121 thinLTOResolvePrevailingInIndex(C: Conf, Index&: ThinLTO.CombinedIndex, isPrevailing,
2122 recordNewLinkage, GUIDPreservedSymbols);
2123
2124 thinLTOPropagateFunctionAttrs(Index&: ThinLTO.CombinedIndex, isPrevailing);
2125
2126 generateParamAccessSummary(Index&: ThinLTO.CombinedIndex);
2127
2128 if (llvm::timeTraceProfilerEnabled())
2129 llvm::timeTraceProfilerEnd();
2130
2131 TimeTraceScopeExit.release();
2132
2133 auto &ModuleMap =
2134 ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
2135
2136 auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error {
2137 auto ProcessOneModule = [&](int I) -> Error {
2138 auto &Mod = *(ModuleMap.begin() + I);
2139 // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
2140 // combined module and parallel code generation partitions.
2141 return BackendProcess->start(
2142 Task: RegularLTO.ParallelCodeGenParallelismLevel + I, BM: Mod.second,
2143 ImportList: ImportLists[Mod.first], ExportList: ExportLists[Mod.first],
2144 ResolvedODR: ResolvedODR[Mod.first], ModuleMap&: ThinLTO.ModuleMap);
2145 };
2146
2147 BackendProcess->setup(ThinLTONumTasks: ModuleMap.size(),
2148 ThinLTOTaskOffset: RegularLTO.ParallelCodeGenParallelismLevel,
2149 Triple: RegularLTO.CombinedModule->getTargetTriple());
2150
2151 if (BackendProcess->getThreadCount() == 1 ||
2152 BackendProcess->isSensitiveToInputOrder()) {
2153 // Process the modules in the order they were provided on the
2154 // command-line. It is important for this codepath to be used for
2155 // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
2156 // ThinLTO objects in the same order as the inputs, which otherwise would
2157 // affect the final link order.
2158 for (int I = 0, E = ModuleMap.size(); I != E; ++I)
2159 if (Error E = ProcessOneModule(I))
2160 return E;
2161 } else {
2162 // When executing in parallel, process largest bitsize modules first to
2163 // improve parallelism, and avoid starving the thread pool near the end.
2164 // This saves about 15 sec on a 36-core machine while link `clang.exe`
2165 // (out of 100 sec).
2166 std::vector<BitcodeModule *> ModulesVec;
2167 ModulesVec.reserve(n: ModuleMap.size());
2168 for (auto &Mod : ModuleMap)
2169 ModulesVec.push_back(x: &Mod.second);
2170 for (int I : generateModulesOrdering(R: ModulesVec))
2171 if (Error E = ProcessOneModule(I))
2172 return E;
2173 }
2174 return BackendProcess->wait();
2175 };
2176
2177 if (!CodeGenDataThinLTOTwoRounds) {
2178 std::unique_ptr<ThinBackendProc> BackendProc =
2179 ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2180 AddStream, Cache);
2181 return RunBackends(BackendProc.get());
2182 }
2183
2184 // Perform two rounds of code generation for ThinLTO:
2185 // 1. First round: Perform optimization and code generation, outputting to
2186 // temporary scratch objects.
2187 // 2. Merge code generation data extracted from the temporary scratch objects.
2188 // 3. Second round: Execute code generation again using the merged data.
2189 LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n");
2190
2191 unsigned MaxTasks = getMaxTasks();
2192 auto Parallelism = ThinLTO.Backend.getParallelism();
2193 // Set up two additional streams and caches for storing temporary scratch
2194 // objects and optimized IRs, using the same cache directory as the original.
2195 cgdata::StreamCacheData CG(MaxTasks, Cache, "CG"), IR(MaxTasks, Cache, "IR");
2196
2197 // First round: Execute optimization and code generation, outputting to
2198 // temporary scratch objects. Serialize the optimized IRs before initiating
2199 // code generation.
2200 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n");
2201 auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
2202 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2203 args&: CG.AddStream, args&: CG.Cache, args&: IR.AddStream, args&: IR.Cache);
2204 if (Error E = RunBackends(FirstRoundLTO.get()))
2205 return E;
2206
2207 LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n");
2208 auto CombinedHashOrErr = cgdata::mergeCodeGenData(ObjectFiles: *CG.getResult());
2209 if (Error E = CombinedHashOrErr.takeError())
2210 return E;
2211 auto CombinedHash = *CombinedHashOrErr;
2212 LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n");
2213
2214 // Second round: Read the optimized IRs and execute code generation using the
2215 // merged data.
2216 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n");
2217 auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>(
2218 args&: Conf, args&: ThinLTO.CombinedIndex, args&: Parallelism, args&: ModuleToDefinedGVSummaries,
2219 args&: AddStream, args&: Cache, args: IR.getResult(), args&: CombinedHash);
2220 return RunBackends(SecondRoundLTO.get());
2221}
2222
2223Expected<LLVMRemarkFileHandle> lto::setupLLVMOptimizationRemarks(
2224 LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
2225 StringRef RemarksFormat, bool RemarksWithHotness,
2226 std::optional<uint64_t> RemarksHotnessThreshold, int Count) {
2227 std::string Filename = std::string(RemarksFilename);
2228 // For ThinLTO, file.opt.<format> becomes
2229 // file.opt.<format>.thin.<num>.<format>.
2230 if (!Filename.empty() && Count != -1)
2231 Filename =
2232 (Twine(Filename) + ".thin." + llvm::utostr(X: Count) + "." + RemarksFormat)
2233 .str();
2234
2235 auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
2236 Context, RemarksFilename: Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
2237 RemarksHotnessThreshold);
2238 if (Error E = ResultOrErr.takeError())
2239 return std::move(E);
2240
2241 if (*ResultOrErr)
2242 (*ResultOrErr)->keep();
2243
2244 return ResultOrErr;
2245}
2246
2247Expected<std::unique_ptr<ToolOutputFile>>
2248lto::setupStatsFile(StringRef StatsFilename) {
2249 // Setup output file to emit statistics.
2250 if (StatsFilename.empty())
2251 return nullptr;
2252
2253 llvm::EnableStatistics(DoPrintOnExit: false);
2254 std::error_code EC;
2255 auto StatsFile =
2256 std::make_unique<ToolOutputFile>(args&: StatsFilename, args&: EC, args: sys::fs::OF_None);
2257 if (EC)
2258 return errorCodeToError(EC);
2259
2260 StatsFile->keep();
2261 return std::move(StatsFile);
2262}
2263
2264// Compute the ordering we will process the inputs: the rough heuristic here
2265// is to sort them per size so that the largest module get schedule as soon as
2266// possible. This is purely a compile-time optimization.
2267std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
2268 auto Seq = llvm::seq<int>(Begin: 0, End: R.size());
2269 std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
2270 llvm::sort(C&: ModulesOrdering, Comp: [&](int LeftIndex, int RightIndex) {
2271 auto LSize = R[LeftIndex]->getBuffer().size();
2272 auto RSize = R[RightIndex]->getBuffer().size();
2273 return LSize > RSize;
2274 });
2275 return ModulesOrdering;
2276}
2277
2278namespace {
2279/// This out-of-process backend does not perform code generation when invoked
2280/// for each task. Instead, it generates the necessary information (e.g., the
2281/// summary index shard, import list, etc.) to enable code generation to be
2282/// performed externally, similar to WriteIndexesThinBackend. The backend's
2283/// `wait` function then invokes an external distributor process to carry out
2284/// the backend compilations.
2285class OutOfProcessThinBackend : public CGThinBackend {
2286 using SString = SmallString<128>;
2287
2288 BumpPtrAllocator Alloc;
2289 StringSaver Saver{Alloc};
2290
2291 SString LinkerOutputFile;
2292
2293 SString DistributorPath;
2294 ArrayRef<StringRef> DistributorArgs;
2295
2296 SString RemoteCompiler;
2297 ArrayRef<StringRef> RemoteCompilerPrependArgs;
2298 ArrayRef<StringRef> RemoteCompilerArgs;
2299
2300 bool SaveTemps;
2301
2302 SmallVector<StringRef, 0> CodegenOptions;
2303 DenseSet<StringRef> CommonInputs;
2304 // Number of the object files that have been already cached.
2305 std::atomic<size_t> CachedJobs{0};
2306 // Information specific to individual backend compilation job.
2307 struct Job {
2308 unsigned Task;
2309 StringRef ModuleID;
2310 StringRef NativeObjectPath;
2311 StringRef SummaryIndexPath;
2312 ImportsFilesContainer ImportsFiles;
2313 std::string CacheKey;
2314 AddStreamFn CacheAddStream;
2315 bool Cached = false;
2316 };
2317 // The set of backend compilations jobs.
2318 SmallVector<Job> Jobs;
2319
2320 // A unique string to identify the current link.
2321 SmallString<8> UID;
2322
2323 // The offset to the first ThinLTO task.
2324 unsigned ThinLTOTaskOffset;
2325
2326 // The target triple to supply for backend compilations.
2327 llvm::Triple Triple;
2328
2329 // Cache
2330 FileCache Cache;
2331
2332public:
2333 OutOfProcessThinBackend(
2334 const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2335 ThreadPoolStrategy ThinLTOParallelism,
2336 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2337 AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
2338 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2339 StringRef LinkerOutputFile, StringRef Distributor,
2340 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2341 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2342 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps)
2343 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
2344 AddStream, OnWrite, ShouldEmitIndexFiles,
2345 ShouldEmitImportsFiles, ThinLTOParallelism),
2346 LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
2347 DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
2348 RemoteCompilerPrependArgs(RemoteCompilerPrependArgs),
2349 RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
2350 Cache(std::move(CacheFn)) {}
2351
2352 void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
2353 llvm::Triple Triple) override {
2354 UID = itostr(X: sys::Process::getProcessId());
2355 Jobs.resize(N: (size_t)ThinLTONumTasks);
2356 this->ThinLTOTaskOffset = ThinLTOTaskOffset;
2357 this->Triple = Triple;
2358 this->Conf.Dtlto = 1;
2359 }
2360
2361 virtual Error runThinLTOBackendThread(
2362 Job &J, const FunctionImporter::ImportMapTy &ImportList,
2363 const FunctionImporter::ExportSetTy &ExportList,
2364 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2365 &ResolvedODR) {
2366 {
2367 TimeTraceScope TimeScope("Emit individual index for DTLTO",
2368 J.SummaryIndexPath);
2369 if (auto E = emitFiles(ImportList, ModulePath: J.ModuleID, NewModulePath: J.ModuleID.str(),
2370 SummaryPath: J.SummaryIndexPath, ImportsFiles: J.ImportsFiles))
2371 return E;
2372 }
2373
2374 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(Key: J.ModuleID) ||
2375 all_of(Range: CombinedIndex.getModuleHash(ModPath: J.ModuleID),
2376 P: [](uint32_t V) { return V == 0; }))
2377 // Cache disabled or no entry for this module in the combined index or
2378 // no module hash.
2379 return Error::success();
2380
2381 TimeTraceScope TimeScope("Check cache for DTLTO", J.SummaryIndexPath);
2382 const GVSummaryMapTy &DefinedGlobals =
2383 ModuleToDefinedGVSummaries.find(Val: J.ModuleID)->second;
2384
2385 // The module may be cached, this helps handling it.
2386 J.CacheKey = computeLTOCacheKey(Conf, Index: CombinedIndex, ModuleID: J.ModuleID, ImportList,
2387 ExportList, ResolvedODR, DefinedGlobals,
2388 CfiFunctionDefs, CfiFunctionDecls);
2389
2390 // The module may be cached, this helps handling it.
2391 auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
2392 if (Error Err = CacheAddStreamExp.takeError())
2393 return Err;
2394 AddStreamFn &CacheAddStream = *CacheAddStreamExp;
2395 // If CacheAddStream is null, we have a cache hit and at this point
2396 // object file is already passed back to the linker.
2397 if (!CacheAddStream) {
2398 J.Cached = true; // Cache hit, mark the job as cached.
2399 CachedJobs.fetch_add(i: 1);
2400 } else {
2401 // If CacheAddStream is not null, we have a cache miss and we need to
2402 // run the backend for codegen. Save cache 'add stream'
2403 // function for a later use.
2404 J.CacheAddStream = std::move(CacheAddStream);
2405 }
2406 return Error::success();
2407 }
2408
2409 Error start(
2410 unsigned Task, BitcodeModule BM,
2411 const FunctionImporter::ImportMapTy &ImportList,
2412 const FunctionImporter::ExportSetTy &ExportList,
2413 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
2414 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
2415
2416 StringRef ModulePath = BM.getModuleIdentifier();
2417
2418 SString ObjFilePath = sys::path::parent_path(path: LinkerOutputFile);
2419 sys::path::append(path&: ObjFilePath, a: sys::path::stem(path: ModulePath) + "." +
2420 itostr(X: Task) + "." + UID + ".native.o");
2421
2422 Job &J = Jobs[Task - ThinLTOTaskOffset];
2423 J = {.Task: Task,
2424 .ModuleID: ModulePath,
2425 .NativeObjectPath: Saver.save(S: ObjFilePath.str()),
2426 .SummaryIndexPath: Saver.save(S: ObjFilePath.str() + ".thinlto.bc"),
2427 .ImportsFiles: {}, // Filled in by emitFiles below.
2428 .CacheKey: "", /*CacheKey=*/
2429 .CacheAddStream: nullptr,
2430 .Cached: false};
2431
2432 // Cleanup per-job temporary files on abnormal process exit.
2433 if (!SaveTemps) {
2434 llvm::sys::RemoveFileOnSignal(Filename: J.NativeObjectPath);
2435 if (!ShouldEmitIndexFiles)
2436 llvm::sys::RemoveFileOnSignal(Filename: J.SummaryIndexPath);
2437 }
2438
2439 assert(ModuleToDefinedGVSummaries.count(ModulePath));
2440
2441 // The BackendThreadPool is only used here to write the sharded index files
2442 // (similar to WriteIndexesThinBackend).
2443 BackendThreadPool.async(
2444 F: [=](Job &J, const FunctionImporter::ImportMapTy &ImportList,
2445 const FunctionImporter::ExportSetTy &ExportList,
2446 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
2447 &ResolvedODR) {
2448 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2449 timeTraceProfilerInitialize(
2450 TimeTraceGranularity: Conf.TimeTraceGranularity,
2451 ProcName: "Emit individual index and check cache for DTLTO");
2452 Error E =
2453 runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR);
2454 if (E) {
2455 std::unique_lock<std::mutex> L(ErrMu);
2456 if (Err)
2457 Err = joinErrors(E1: std::move(*Err), E2: std::move(E));
2458 else
2459 Err = std::move(E);
2460 }
2461 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2462 timeTraceProfilerFinishThread();
2463 },
2464 ArgList: std::ref(t&: J), ArgList: std::ref(t: ImportList), ArgList: std::ref(t: ExportList),
2465 ArgList: std::ref(t: ResolvedODR));
2466
2467 return Error::success();
2468 }
2469
2470 // Derive a set of Clang options that will be shared/common for all DTLTO
2471 // backend compilations. We are intentionally minimal here as these options
2472 // must remain synchronized with the behavior of Clang. DTLTO does not support
2473 // all the features available with in-process LTO. More features are expected
2474 // to be added over time. Users can specify Clang options directly if a
2475 // feature is not supported. Note that explicitly specified options that imply
2476 // additional input or output file dependencies must be communicated to the
2477 // distribution system, potentially by setting extra options on the
2478 // distributor program.
2479 void buildCommonRemoteCompilerOptions() {
2480 const lto::Config &C = Conf;
2481 auto &Ops = CodegenOptions;
2482
2483 Ops.push_back(Elt: Saver.save(S: "-O" + Twine(C.OptLevel)));
2484
2485 if (C.Options.EmitAddrsig)
2486 Ops.push_back(Elt: "-faddrsig");
2487 if (C.Options.FunctionSections)
2488 Ops.push_back(Elt: "-ffunction-sections");
2489 if (C.Options.DataSections)
2490 Ops.push_back(Elt: "-fdata-sections");
2491
2492 if (C.RelocModel == Reloc::PIC_)
2493 // Clang doesn't have -fpic for all triples.
2494 if (!Triple.isOSBinFormatCOFF())
2495 Ops.push_back(Elt: "-fpic");
2496
2497 // Turn on/off warnings about profile cfg mismatch (default on)
2498 // --lto-pgo-warn-mismatch.
2499 if (!C.PGOWarnMismatch) {
2500 Ops.push_back(Elt: "-mllvm");
2501 Ops.push_back(Elt: "-no-pgo-warn-mismatch");
2502 }
2503
2504 // Enable sample-based profile guided optimizations.
2505 // Sample profile file path --lto-sample-profile=<value>.
2506 if (!C.SampleProfile.empty()) {
2507 Ops.push_back(
2508 Elt: Saver.save(S: "-fprofile-sample-use=" + Twine(C.SampleProfile)));
2509 CommonInputs.insert(V: C.SampleProfile);
2510 }
2511
2512 // We don't know which of options will be used by Clang.
2513 Ops.push_back(Elt: "-Wno-unused-command-line-argument");
2514
2515 // Forward any supplied options.
2516 if (!RemoteCompilerArgs.empty())
2517 for (auto &a : RemoteCompilerArgs)
2518 Ops.push_back(Elt: a);
2519 }
2520
2521 // Generates a JSON file describing the backend compilations, for the
2522 // distributor.
2523 bool emitDistributorJson(StringRef DistributorJson) {
2524 using json::Array;
2525 std::error_code EC;
2526 raw_fd_ostream OS(DistributorJson, EC);
2527 if (EC)
2528 return false;
2529
2530 json::OStream JOS(OS);
2531 JOS.object(Contents: [&]() {
2532 // Information common to all jobs.
2533 JOS.attributeObject(Key: "common", Contents: [&]() {
2534 JOS.attribute(Key: "linker_output", Contents: LinkerOutputFile);
2535
2536 JOS.attributeArray(Key: "args", Contents: [&]() {
2537 JOS.value(V: RemoteCompiler);
2538
2539 // Forward any supplied prepend options.
2540 if (!RemoteCompilerPrependArgs.empty())
2541 for (auto &A : RemoteCompilerPrependArgs)
2542 JOS.value(V: A);
2543
2544 JOS.value(V: "-c");
2545
2546 JOS.value(V: Saver.save(S: "--target=" + Triple.str()));
2547
2548 for (const auto &A : CodegenOptions)
2549 JOS.value(V: A);
2550 });
2551
2552 JOS.attribute(Key: "inputs", Contents: Array(CommonInputs));
2553 });
2554
2555 // Per-compilation-job information.
2556 JOS.attributeArray(Key: "jobs", Contents: [&]() {
2557 for (const auto &J : Jobs) {
2558 assert(J.Task != 0);
2559 if (J.Cached) {
2560 assert(!Cache.getCacheDirectoryPath().empty());
2561 continue;
2562 }
2563
2564 SmallVector<StringRef, 2> Inputs;
2565 SmallVector<StringRef, 1> Outputs;
2566
2567 JOS.object(Contents: [&]() {
2568 JOS.attributeArray(Key: "args", Contents: [&]() {
2569 JOS.value(V: J.ModuleID);
2570 Inputs.push_back(Elt: J.ModuleID);
2571
2572 JOS.value(
2573 V: Saver.save(S: "-fthinlto-index=" + Twine(J.SummaryIndexPath)));
2574 Inputs.push_back(Elt: J.SummaryIndexPath);
2575
2576 JOS.value(V: "-o");
2577 JOS.value(V: J.NativeObjectPath);
2578 Outputs.push_back(Elt: J.NativeObjectPath);
2579 });
2580
2581 // Add the bitcode files from which imports will be made. These do
2582 // not explicitly appear on the backend compilation command lines
2583 // but are recorded in the summary index shards.
2584 llvm::append_range(C&: Inputs, R: J.ImportsFiles);
2585 JOS.attribute(Key: "inputs", Contents: Array(Inputs));
2586
2587 JOS.attribute(Key: "outputs", Contents: Array(Outputs));
2588 });
2589 }
2590 });
2591 });
2592
2593 return true;
2594 }
2595
2596 void removeFile(StringRef FileName) {
2597 std::error_code EC = sys::fs::remove(path: FileName, IgnoreNonExisting: true);
2598 if (EC && EC != std::make_error_code(e: std::errc::no_such_file_or_directory))
2599 errs() << "warning: could not remove the file '" << FileName
2600 << "': " << EC.message() << "\n";
2601 }
2602
2603 Error wait() override {
2604 // Wait for the information on the required backend compilations to be
2605 // gathered.
2606 BackendThreadPool.wait();
2607 if (Err)
2608 return std::move(*Err);
2609
2610 llvm::scope_exit CleanPerJobFiles([&] {
2611 llvm::TimeTraceScope TimeScope("Remove DTLTO temporary files");
2612 if (!SaveTemps)
2613 for (auto &Job : Jobs) {
2614 removeFile(FileName: Job.NativeObjectPath);
2615 if (!ShouldEmitIndexFiles)
2616 removeFile(FileName: Job.SummaryIndexPath);
2617 }
2618 });
2619
2620 const StringRef BCError = "DTLTO backend compilation: ";
2621
2622 buildCommonRemoteCompilerOptions();
2623
2624 SString JsonFile = sys::path::parent_path(path: LinkerOutputFile);
2625 {
2626 llvm::TimeTraceScope TimeScope("Emit DTLTO JSON");
2627 sys::path::append(path&: JsonFile, a: sys::path::stem(path: LinkerOutputFile) + "." +
2628 UID + ".dist-file.json");
2629 // Cleanup DTLTO JSON file on abnormal process exit.
2630 if (!SaveTemps)
2631 llvm::sys::RemoveFileOnSignal(Filename: JsonFile);
2632 if (!emitDistributorJson(DistributorJson: JsonFile))
2633 return make_error<StringError>(
2634 Args: BCError + "failed to generate distributor JSON script: " + JsonFile,
2635 Args: inconvertibleErrorCode());
2636 }
2637 llvm::scope_exit CleanJson([&] {
2638 if (!SaveTemps)
2639 removeFile(FileName: JsonFile);
2640 });
2641
2642 {
2643 llvm::TimeTraceScope TimeScope("Execute DTLTO distributor",
2644 DistributorPath);
2645 // Checks if we have any jobs that don't have corresponding cache entries.
2646 if (CachedJobs.load() < Jobs.size()) {
2647 SmallVector<StringRef, 3> Args = {DistributorPath};
2648 llvm::append_range(C&: Args, R&: DistributorArgs);
2649 Args.push_back(Elt: JsonFile);
2650 std::string ErrMsg;
2651 if (sys::ExecuteAndWait(Program: Args[0], Args,
2652 /*Env=*/std::nullopt, /*Redirects=*/{},
2653 /*SecondsToWait=*/0, /*MemoryLimit=*/0,
2654 ErrMsg: &ErrMsg)) {
2655 return make_error<StringError>(
2656 Args: BCError + "distributor execution failed" +
2657 (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2658 Args: inconvertibleErrorCode());
2659 }
2660 }
2661 }
2662
2663 {
2664 llvm::TimeTraceScope FilesScope("Add DTLTO files to the link");
2665 for (auto &Job : Jobs) {
2666 if (!Job.CacheKey.empty() && Job.Cached) {
2667 assert(Cache.isValid());
2668 continue;
2669 }
2670 // Load the native object from a file into a memory buffer
2671 // and store its contents in the output buffer.
2672 auto ObjFileMbOrErr =
2673 MemoryBuffer::getFile(Filename: Job.NativeObjectPath, /*IsText=*/false,
2674 /*RequiresNullTerminator=*/false);
2675 if (std::error_code EC = ObjFileMbOrErr.getError())
2676 return make_error<StringError>(
2677 Args: BCError + "cannot open native object file: " +
2678 Job.NativeObjectPath + ": " + EC.message(),
2679 Args: inconvertibleErrorCode());
2680
2681 MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
2682 if (Cache.isValid()) {
2683 // Cache hits are taken care of earlier. At this point, we could only
2684 // have cache misses.
2685 assert(Job.CacheAddStream);
2686 // Obtain a file stream for a storing a cache entry.
2687 auto CachedFileStreamOrErr =
2688 Job.CacheAddStream(Job.Task, Job.ModuleID);
2689 if (!CachedFileStreamOrErr)
2690 return joinErrors(
2691 E1: CachedFileStreamOrErr.takeError(),
2692 E2: createStringError(EC: inconvertibleErrorCode(),
2693 Fmt: "Cannot get a cache file stream: %s",
2694 Vals: Job.NativeObjectPath.data()));
2695 // Store a file buffer into the cache stream.
2696 auto &CacheStream = *(CachedFileStreamOrErr->get());
2697 *(CacheStream.OS) << ObjFileMbRef.getBuffer();
2698 if (Error Err = CacheStream.commit())
2699 return Err;
2700 } else {
2701 auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2702 if (Error Err = StreamOrErr.takeError())
2703 report_fatal_error(Err: std::move(Err));
2704 auto &Stream = *StreamOrErr->get();
2705 *Stream.OS << ObjFileMbRef.getBuffer();
2706 if (Error Err = Stream.commit())
2707 report_fatal_error(Err: std::move(Err));
2708 }
2709 }
2710 }
2711 return Error::success();
2712 }
2713};
2714} // end anonymous namespace
2715
2716ThinBackend lto::createOutOfProcessThinBackend(
2717 ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
2718 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2719 StringRef LinkerOutputFile, StringRef Distributor,
2720 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2721 ArrayRef<StringRef> RemoteCompilerPrependArgs,
2722 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) {
2723 auto Func =
2724 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2725 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2726 AddStreamFn AddStream, FileCache Cache) {
2727 return std::make_unique<OutOfProcessThinBackend>(
2728 args: Conf, args&: CombinedIndex, args: Parallelism, args: ModuleToDefinedGVSummaries,
2729 args&: AddStream, args&: Cache, args: OnWrite, args: ShouldEmitIndexFiles,
2730 args: ShouldEmitImportsFiles, args: LinkerOutputFile, args: Distributor,
2731 args: DistributorArgs, args: RemoteCompiler, args: RemoteCompilerPrependArgs,
2732 args: RemoteCompilerArgs, args: SaveTemps);
2733 };
2734 return ThinBackend(Func, Parallelism);
2735}
2736