InputFiles.cpp source code [llvm_projects/lld/MachO/InputFiles.cpp]

1	//===- InputFiles.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains functions to parse Mach-O object files. In this comment,
10	// we describe the Mach-O file structure and how we parse it.
11	//
12	// Mach-O is not very different from ELF or COFF. The notion of symbols,
13	// sections and relocations exists in Mach-O as it does in ELF and COFF.
14	//
15	// Perhaps the notion that is new to those who know ELF/COFF is "subsections".
16	// In ELF/COFF, sections are an atomic unit of data copied from input files to
17	// output files. When we merge or garbage-collect sections, we treat each
18	// section as an atomic unit. In Mach-O, that's not the case. Sections can
19	// consist of multiple subsections, and subsections are a unit of merging and
20	// garbage-collecting. Therefore, Mach-O's subsections are more similar to
21	// ELF/COFF's sections than Mach-O's sections are.
22	//
23	// A section can have multiple symbols. A symbol that does not have the
24	// N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
25	// definition, a symbol is always present at the beginning of each subsection. A
26	// symbol with N_ALT_ENTRY attribute does not start a new subsection and can
27	// point to a middle of a subsection.
28	//
29	// The notion of subsections also affects how relocations are represented in
30	// Mach-O. All references within a section need to be explicitly represented as
31	// relocations if they refer to different subsections, because we obviously need
32	// to fix up addresses if subsections are laid out in an output file differently
33	// than they were in object files. To represent that, Mach-O relocations can
34	// refer to an unnamed location via its address. Scattered relocations (those
35	// with the R_SCATTERED bit set) always refer to unnamed locations.
36	// Non-scattered relocations refer to an unnamed location if r_extern is not set
37	// and r_symbolnum is zero.
38	//
39	// Without the above differences, I think you can use your knowledge about ELF
40	// and COFF for Mach-O.
41	//
42	//===----------------------------------------------------------------------===//
43
44	#include "InputFiles.h"
45	#include "Config.h"
46	#include "Driver.h"
47	#include "Dwarf.h"
48	#include "EhFrame.h"
49	#include "ExportTrie.h"
50	#include "InputSection.h"
51	#include "MachOStructs.h"
52	#include "ObjC.h"
53	#include "OutputSection.h"
54	#include "OutputSegment.h"
55	#include "SymbolTable.h"
56	#include "Symbols.h"
57	#include "SyntheticSections.h"
58	#include "Target.h"
59
60	#include "lld/Common/CommonLinkerContext.h"
61	#include "lld/Common/DWARF.h"
62	#include "lld/Common/Reproduce.h"
63	#include "llvm/ADT/iterator.h"
64	#include "llvm/BinaryFormat/MachO.h"
65	#include "llvm/LTO/LTO.h"
66	#include "llvm/Support/BinaryStreamReader.h"
67	#include "llvm/Support/Endian.h"
68	#include "llvm/Support/LEB128.h"
69	#include "llvm/Support/MemoryBuffer.h"
70	#include "llvm/Support/Path.h"
71	#include "llvm/Support/TarWriter.h"
72	#include "llvm/Support/TimeProfiler.h"
73	#include "llvm/TextAPI/Architecture.h"
74	#include "llvm/TextAPI/InterfaceFile.h"
75
76	#include <optional>
77	#include <type_traits>
78
79	using namespace llvm;
80	using namespace llvm::MachO;
81	using namespace llvm::support::endian;
82	using namespace llvm::sys;
83	using namespace lld;
84	using namespace lld::macho;
85
86	// Returns "<internal>", "foo.a(bar.o)", or "baz.o".
87	std::string lld::toString(const InputFile *f) {
88	if (!f)
89	return "<internal>";
90
91	// Multiple dylibs can be defined in one .tbd file.
92	if (const auto *dylibFile = dyn_cast<DylibFile>(Val: f))
93	if (f->getName().ends_with(Suffix: ".tbd"))
94	return (f->getName() + "(" + dylibFile->installName + ")").str();
95
96	if (f->archiveName.empty())
97	return std::string (f->getName());
98	return (f->archiveName + "(" + path::filename(path: f->getName()) + ")").str();
99	}
100
101	std::string lld::toString(const Section &sec) {
102	return (toString(f: sec.file) + ":(" + sec.name + ")").str();
103	}
104
105	SetVector<InputFile *> macho::inputFiles;
106	std::unique_ptr<TarWriter> macho::tar;
107	int InputFile::idCount = `0`;
108
109	static VersionTuple decodeVersion(uint32_t version) {
110	unsigned major = version >> `16`;
111	unsigned minor = (version >> `8`) & `0xffu`;
112	unsigned subMinor = version & `0xffu`;
113	return VersionTuple (major, minor, subMinor);
114	}
115
116	static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
117	if (!isa<ObjFile>(Val: input) && !isa<DylibFile>(Val: input))
118	return {};
119
120	const char *hdr = input->mb.getBufferStart();
121
122	// "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
123	std::vector<PlatformInfo> platformInfos;
124	for (auto *cmd : findCommands<build_version_command>(anyHdr: hdr, types: LC_BUILD_VERSION)) {
125	PlatformInfo info;
126	info.target.Platform = static_cast<PlatformType>(cmd->platform);
127	info.target.MinDeployment = decodeVersion(version: cmd->minos);
128	platformInfos.emplace_back(args: std::move(info));
129	}
130	for (auto *cmd : findCommands<version_min_command>(
131	anyHdr: hdr, types: LC_VERSION_MIN_MACOSX, types: LC_VERSION_MIN_IPHONEOS,
132	types: LC_VERSION_MIN_TVOS, types: LC_VERSION_MIN_WATCHOS)) {
133	PlatformInfo info;
134	switch (cmd->cmd) {
135	case LC_VERSION_MIN_MACOSX:
136	info.target.Platform = PLATFORM_MACOS;
137	break;
138	case LC_VERSION_MIN_IPHONEOS:
139	info.target.Platform = PLATFORM_IOS;
140	break;
141	case LC_VERSION_MIN_TVOS:
142	info.target.Platform = PLATFORM_TVOS;
143	break;
144	case LC_VERSION_MIN_WATCHOS:
145	info.target.Platform = PLATFORM_WATCHOS;
146	break;
147	}
148	info.target.MinDeployment = decodeVersion(version: cmd->version);
149	platformInfos.emplace_back(args: std::move(info));
150	}
151
152	return platformInfos;
153	}
154
155	static bool checkCompatibility(const InputFile *input) {
156	std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
157	if (platformInfos.empty())
158	return true;
159
160	auto it = find_if(Range&: platformInfos, P: [&](const PlatformInfo &info) {
161	return removeSimulator(platform: info.target.Platform) ==
162	removeSimulator(platform: config ->platform());
163	});
164	if (it == platformInfos.end()) {
165	std::string platformNames;
166	raw_string_ostream os(platformNames);
167	interleave(
168	c: platformInfos, os,
169	each_fn: [&](const PlatformInfo &info) {
170	os << getPlatformName(Platform: info.target.Platform);
171	},
172	separator: "/");
173	error(msg: toString(f: input) + " has platform " + platformNames +
174	Twine (", which is different from target platform ") +
175	getPlatformName(Platform: config ->platform()));
176	return false;
177	}
178
179	if (it ->target.MinDeployment > config ->platformInfo.target.MinDeployment)
180	warn(msg: toString(f: input) + " has version " +
181	it ->target.MinDeployment.getAsString() +
182	", which is newer than target minimum of " +
183	config ->platformInfo.target.MinDeployment.getAsString());
184
185	return true;
186	}
187
188	template <class Header>
189	static bool compatWithTargetArch(const InputFile file, const* Header *hdr) {
190	uint32_t cpuType;
191	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: config ->arch());
192
193	if (hdr->cputype != cpuType) {
194	Architecture arch =
195	getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
196	auto msg = config ->errorForArchMismatch
197	? static_cast<void ()(const* Twine &)>(error)
198	: warn;
199
200	msg(toString(f: file) + " has architecture " + getArchitectureName(Arch: arch) +
201	" which is incompatible with target architecture " +
202	getArchitectureName(Arch: config ->arch()));
203	return false;
204	}
205
206	return checkCompatibility(input: file);
207	}
208
209	// This cache mostly exists to store system libraries (and .tbds) as they're
210	// loaded, rather than the input archives, which are already cached at a higher
211	// level, and other files like the filelist that are only read once.
212	// Theoretically this caching could be more efficient by hoisting it, but that
213	// would require altering many callers to track the state.
214	DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
215	// Open a given file path and return it as a memory-mapped file.
216	std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
217	CachedHashStringRef key(path);
218	auto entry = cachedReads.find(Val: key);
219	if (entry != cachedReads.end())
220	return entry ->second;
221
222	ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(Filename: path);
223	if (std::error_code ec = mbOrErr.getError()) {
224	error(msg: "cannot open " + path + ": " + ec.message());
225	return std::nullopt;
226	}
227
228	std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
229	MemoryBufferRef mbref = mb ->getMemBufferRef();
230	make<std::unique_ptr<MemoryBuffer>>(args: std::move(mb)); // take mb ownership
231
232	// If this is a regular non-fat file, return it.
233	const char *buf = mbref.getBufferStart();
234	const auto hdr = reinterpret_cast<const* fat_header *>(buf);
235	if (mbref.getBufferSize() < sizeof(uint32_t) \|\|
236	read32be(P: &hdr->magic) != FAT_MAGIC) {
237	if (tar)
238	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
239	return cachedReads [key] = mbref;
240	}
241
242	llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
243
244	// Object files and archive files may be fat files, which contain multiple
245	// real files for different CPU ISAs. Here, we search for a file that matches
246	// with the current link target and returns it as a MemoryBufferRef.
247	const auto arch = reinterpret_cast<const* fat_arch >(buf + sizeof(hdr));
248	auto getArchName = [](uint32_t cpuType, uint32_t cpuSubtype) {
249	return getArchitectureName(Arch: getArchitectureFromCpuType(CPUType: cpuType, CPUSubType: cpuSubtype));
250	};
251
252	std::vector<StringRef> archs;
253	for (uint32_t i = `0`, n = read32be(P: &hdr->nfat_arch); i < n; ++i) {
254	if (reinterpret_cast<const char *>(arch + i + `1`) >
255	buf + mbref.getBufferSize()) {
256	error(msg: path + ": fat_arch struct extends beyond end of file");
257	return std::nullopt;
258	}
259
260	uint32_t cpuType = read32be(P: &arch[i].cputype);
261	uint32_t cpuSubtype =
262	read32be(P: &arch[i].cpusubtype) & ~MachO::CPU_SUBTYPE_MASK;
263
264	// FIXME: LD64 has a more complex fallback logic here.
265	// Consider implementing that as well?
266	if (cpuType != static_cast<uint32_t>(target->cpuType) \|\|
267	cpuSubtype != target->cpuSubtype) {
268	archs.emplace_back(args: getArchName (cpuType, cpuSubtype));
269	continue;
270	}
271
272	uint32_t offset = read32be(P: &arch[i].offset);
273	uint32_t size = read32be(P: &arch[i].size);
274	if (offset + size > mbref.getBufferSize())
275	error(msg: path + ": slice extends beyond end of file");
276	if (tar)
277	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
278	return cachedReads [key] = MemoryBufferRef (StringRef (buf + offset, size),
279	path.copy(A&: bAlloc));
280	}
281
282	auto targetArchName = getArchName (target->cpuType, target->cpuSubtype);
283	warn(msg: path + ": ignoring file because it is universal (" + join(R&: archs, Separator: ",") +
284	") but does not contain the " + targetArchName + " architecture");
285	return std::nullopt;
286	}
287
288	InputFile::InputFile(Kind kind, const InterfaceFile &interface)
289	: id(idCount++), fileKind(kind), name(saver().save(S: interface.getPath())) {}
290
291	// Some sections comprise of fixed-size records, so instead of splitting them at
292	// symbol boundaries, we split them based on size. Records are distinct from
293	// literals in that they may contain references to other sections, instead of
294	// being leaf nodes in the InputSection graph.
295	//
296	// Note that "record" is a term I came up with. In contrast, "literal" is a term
297	// used by the Mach-O format.
298	static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) {
299	if (name == section_names::compactUnwind) {
300	if (segname == segment_names::ld)
301	return target->wordSize == `8` ? `32` : `20`;
302	}
303	if (!config ->dedupStrings)
304	return {};
305
306	if (name == section_names::cfString && segname == segment_names::data)
307	return target->wordSize == `8` ? `32` : `16`;
308
309	if (config ->icfLevel == ICFLevel::none)
310	return {};
311
312	if (name == section_names::objcClassRefs && segname == segment_names::data)
313	return target->wordSize;
314
315	if (name == section_names::objcSelrefs && segname == segment_names::data)
316	return target->wordSize;
317	return {};
318	}
319
320	static Error parseCallGraph(ArrayRef<uint8_t> data,
321	std::vector<CallGraphEntry> &callGraph) {
322	TimeTraceScope timeScope("Parsing call graph section");
323	BinaryStreamReader reader(data, llvm::endianness::little);
324	while (!reader.empty()) {
325	uint32_t fromIndex, toIndex;
326	uint64_t count;
327	if (Error err = reader.readInteger(Dest&: fromIndex))
328	return err;
329	if (Error err = reader.readInteger(Dest&: toIndex))
330	return err;
331	if (Error err = reader.readInteger(Dest&: count))
332	return err;
333	callGraph.emplace_back(args&: fromIndex, args&: toIndex, args&: count);
334	}
335	return Error::success();
336	}
337
338	// Parse the sequence of sections within a single LC_SEGMENT(_64).
339	// Split each section into subsections.
340	template <class SectionHeader>
341	void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
342	sections.reserve(n: sectionHeaders.size());
343	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
344
345	for (const SectionHeader &sec : sectionHeaders) {
346	StringRef name =
347	StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
348	StringRef segname =
349	StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
350	sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
351	if (sec.align >= `32`) {
352	error("alignment " + std::to_string(sec.align) + " of section " + name +
353	" is too large");
354	continue;
355	}
356	Section &section = *sections.back();
357	uint32_t align = `1` << sec.align;
358	ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
359	: buf + sec.offset,
360	static_cast<size_t>(sec.size)};
361
362	auto splitRecords = [&](size_t recordSize) -> void {
363	if (data.empty())
364	return;
365	Subsections &subsections = section.subsections;
366	subsections.reserve(n: data.size() / recordSize);
367	for (uint64_t off = `0`; off < data.size(); off += recordSize) {
368	auto *isec = make<ConcatInputSection>(
369	args&: section, args: data.slice(N: off, M: std::min(a: data.size(), b: recordSize)), args&: align);
370	subsections.push_back(x: {.offset: off, .isec: isec});
371	}
372	section.doneSplitting = true;
373	};
374
375	if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
376	if (sec.nreloc)
377	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
378	" contains relocations, which is unsupported");
379	bool dedupLiterals =
380	name == section_names::objcMethname \|\| config ->dedupStrings;
381	InputSection *isec =
382	make<CStringInputSection>(args&: section, args&: data, args&: align, args&: dedupLiterals);
383	// FIXME: parallelize this?
384	cast<CStringInputSection>(Val: isec)->splitIntoPieces();
385	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
386	} else if (isWordLiteralSection(sec.flags)) {
387	if (sec.nreloc)
388	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
389	" contains relocations, which is unsupported");
390	InputSection *isec = make<WordLiteralInputSection>(args&: section, args&: data, args&: align);
391	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
392	} else if (auto recordSize = getRecordSize(segname, name)) {
393	splitRecords(*recordSize);
394	} else if (name == section_names::ehFrame &&
395	segname == segment_names::text) {
396	splitEhFrames(dataArr: data, ehFrameSection&: *sections.back());
397	} else if (segname == segment_names::llvm) {
398	if (config ->callGraphProfileSort && name == section_names::cgProfile)
399	checkError(e: parseCallGraph(data, callGraph));
400	// ld64 does not appear to emit contents from sections within the __LLVM
401	// segment. Symbols within those sections point to bitcode metadata
402	// instead of actual symbols. Global symbols within those sections could
403	// have the same name without causing duplicate symbol errors. To avoid
404	// spurious duplicate symbol errors, we do not parse these sections.
405	// TODO: Evaluate whether the bitcode metadata is needed.
406	} else if (name == section_names::objCImageInfo &&
407	segname == segment_names::data) {
408	objCImageInfo = data;
409	} else {
410	if (name == section_names::addrSig)
411	addrSigSection = sections.back();
412
413	auto *isec = make<ConcatInputSection>(args&: section, args&: data, args&: align);
414	if (isDebugSection(flags: isec->getFlags()) &&
415	isec->getSegName() == segment_names::dwarf) {
416	// Instead of emitting DWARF sections, we emit STABS symbols to the
417	// object files that contain them. We filter them out early to avoid
418	// parsing their relocations unnecessarily.
419	debugSections.push_back(x: isec);
420	} else {
421	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
422	}
423	}
424	}
425	}
426
427	void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
428	EhReader reader(this, data, /dataOff=/`0`);
429	size_t off = `0`;
430	while (off < reader.size()) {
431	uint64_t frameOff = off;
432	uint64_t length = reader.readLength(off: &off);
433	if (length == `0`)
434	break;
435	uint64_t fullLength = length + (off - frameOff);
436	off += length;
437	// We hard-code an alignment of 1 here because we don't actually want our
438	// EH frames to be aligned to the section alignment. EH frame decoders don't
439	// expect this alignment. Moreover, each EH frame must start where the
440	// previous one ends, and where it ends is indicated by the length field.
441	// Unless we update the length field (troublesome), we should keep the
442	// alignment to 1.
443	// Note that we still want to preserve the alignment of the overall section,
444	// just not of the individual EH frames.
445	ehFrameSection.subsections.push_back(
446	x: {.offset: frameOff, .isec: make<ConcatInputSection>(args&: ehFrameSection,
447	args: data.slice(N: frameOff, M: fullLength),
448	/align=/args: `1`)});
449	}
450	ehFrameSection.doneSplitting = true;
451	}
452
453	template <class T>
454	static Section findContainingSection(const* std::vector<Section *> &sections,
455	T *offset) {
456	static_assert(std::is_same<uint64_t, T>::value \|\|
457	std::is_same<uint32_t, T>::value,
458	"unexpected type for offset");
459	auto it = std::prev(llvm::upper_bound(
460	sections, *offset,
461	[](uint64_t value, const Section sec) { return* value < sec->addr; }));
462	offset -= (it)->addr;
463	return *it;
464	}
465
466	// Find the subsection corresponding to the greatest section offset that is <=
467	// that of the given offset.
468	//
469	// offset: an offset relative to the start of the original InputSection (before
470	// any subsection splitting has occurred). It will be updated to represent the
471	// same location as an offset relative to the start of the containing
472	// subsection.
473	template <class T>
474	static InputSection findContainingSubsection(const* Section &section,
475	T *offset) {
476	static_assert(std::is_same<uint64_t, T>::value \|\|
477	std::is_same<uint32_t, T>::value,
478	"unexpected type for offset");
479	auto it = std::prev(llvm::upper_bound(
480	section.subsections, *offset,
481	[](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
482	*offset -= it->offset;
483	return it->isec;
484	}
485
486	// Find a symbol at offset `off` within `isec`.
487	static Defined findSymbolAtOffset(const* ConcatInputSection *isec,
488	uint64_t off) {
489	auto it = llvm::lower_bound(Range: isec->symbols, Value&: off, C: [](Defined *d, uint64_t off) {
490	return d->value < off;
491	});
492	// The offset should point at the exact address of a symbol (with no addend.)
493	if (it == isec->symbols.end() \|\| (*it)->value != off) {
494	assert(isec->wasCoalesced);
495	return nullptr;
496	}
497	return *it;
498	}
499
500	template <class SectionHeader>
501	static bool validateRelocationInfo(InputFile file, const* SectionHeader &sec,
502	relocation_info rel) {
503	const RelocAttrs &relocAttrs = target->getRelocAttrs(type: rel.r_type);
504	bool valid = true;
505	auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
506	valid = false;
507	return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
508	std::to_string(val: rel.r_address) + " of " + sec.segname + "," +
509	sec.sectname + " in " + toString(f: file))
510	.str();
511	};
512
513	if (!relocAttrs.hasAttr(b: RelocAttrBits::LOCAL) && !rel.r_extern)
514	error(message("must be extern"));
515	if (relocAttrs.hasAttr(b: RelocAttrBits::PCREL) != rel.r_pcrel)
516	error(message(Twine ("must ") + (rel.r_pcrel ? "not " : "") +
517	"be PC-relative"));
518	if (isThreadLocalVariables(sec.flags) &&
519	!relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED))
520	error(message("not allowed in thread-local section, must be UNSIGNED"));
521	if (rel.r_length < `2` \|\| rel.r_length > `3` \|\|
522	!relocAttrs.hasAttr(b: static_cast<RelocAttrBits>(`1` << rel.r_length))) {
523	static SmallVector<StringRef, `4`> widths{"0", "4", "8", "4 or 8"};
524	error(message("has width " + std::to_string(val: `1` << rel.r_length) +
525	" bytes, but must be " +
526	widths [(static_cast<int>(relocAttrs.bits) >> `2`) & `3`] +
527	" bytes"));
528	}
529	return valid;
530	}
531
532	template <class SectionHeader>
533	void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
534	const SectionHeader &sec, Section &section) {
535	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
536	ArrayRef<relocation_info> relInfos(
537	reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
538
539	Subsections &subsections = section.subsections;
540	auto subsecIt = subsections.rbegin();
541	for (size_t i = `0`; i < relInfos.size(); i++) {
542	// Paired relocations serve as Mach-O's method for attaching a
543	// supplemental datum to a primary relocation record. ELF does not
544	// need them because the _RELOC_RELA records contain the extra*
545	// addend field, vs. _RELOC_REL which omit the addend.*
546	//
547	// The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
548	// and the paired _RELOC_UNSIGNED record holds the minuend. The*
549	// datum for each is a symbolic address. The result is the offset
550	// between two addresses.
551	//
552	// The ARM64_RELOC_ADDEND record holds the addend, and the paired
553	// ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
554	// base symbolic address.
555	//
556	// Note: X86 does not use _RELOC_ADDEND because it can embed an addend into*
557	// the instruction stream. On X86, a relocatable address field always
558	// occupies an entire contiguous sequence of byte(s), so there is no need to
559	// merge opcode bits with address bits. Therefore, it's easy and convenient
560	// to store addends in the instruction-stream bytes that would otherwise
561	// contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
562	// address bits so that bitwise arithmetic is necessary to extract and
563	// insert them. Storing addends in the instruction stream is possible, but
564	// inconvenient and more costly at link time.
565
566	relocation_info relInfo = relInfos [i];
567	bool isSubtrahend =
568	target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::SUBTRAHEND);
569	int64_t pairedAddend = `0`;
570	if (target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::ADDEND)) {
571	pairedAddend = SignExtend64<`24`>(x: relInfo.r_symbolnum);
572	relInfo = relInfos [++i];
573	}
574	assert(i < relInfos.size());
575	if (!validateRelocationInfo(this, sec, relInfo))
576	continue;
577	if (relInfo.r_address & R_SCATTERED)
578	fatal(msg: "TODO: Scattered relocations not supported");
579
580	int64_t embeddedAddend = target->getEmbeddedAddend(mb, offset: sec.offset, relInfo);
581	assert(!(embeddedAddend && pairedAddend));
582	int64_t totalAddend = pairedAddend + embeddedAddend;
583	Reloc r;
584	r.type = relInfo.r_type;
585	r.pcrel = relInfo.r_pcrel;
586	r.length = relInfo.r_length;
587	r.offset = relInfo.r_address;
588	if (relInfo.r_extern) {
589	r.referent = symbols [relInfo.r_symbolnum];
590	r.addend = isSubtrahend ? `0` : totalAddend;
591	} else {
592	assert(!isSubtrahend);
593	const SectionHeader &referentSecHead =
594	sectionHeaders[relInfo.r_symbolnum - `1`];
595	uint64_t referentOffset;
596	if (relInfo.r_pcrel) {
597	// The implicit addend for pcrel section relocations is the pcrel offset
598	// in terms of the addresses in the input file. Here we adjust it so
599	// that it describes the offset from the start of the referent section.
600	// FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
601	// have pcrel section relocations. We may want to factor this out into
602	// the arch-specific .cpp file.
603	assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
604	referentOffset = sec.addr + relInfo.r_address + `4` + totalAddend -
605	referentSecHead.addr;
606	} else {
607	// The addend for a non-pcrel relocation is its absolute address.
608	referentOffset = totalAddend - referentSecHead.addr;
609	}
610	r.referent = findContainingSubsection(section: *sections [relInfo.r_symbolnum - `1`],
611	offset: &referentOffset);
612	r.addend = referentOffset;
613	}
614
615	// Find the subsection that this relocation belongs to.
616	// Though not required by the Mach-O format, clang and gcc seem to emit
617	// relocations in order, so let's take advantage of it. However, ld64 emits
618	// unsorted relocations (in `-r` mode), so we have a fallback for that
619	// uncommon case.
620	InputSection *subsec;
621	while (subsecIt != subsections.rend() && subsecIt ->offset > r.offset)
622	++subsecIt;
623	if (subsecIt == subsections.rend() \|\|
624	subsecIt ->offset + subsecIt ->isec->getSize() <= r.offset) {
625	subsec = findContainingSubsection(section, offset: &r.offset);
626	// Now that we know the relocs are unsorted, avoid trying the 'fast path'
627	// for the other relocations.
628	subsecIt = subsections.rend();
629	} else {
630	subsec = subsecIt ->isec;
631	r.offset -= subsecIt ->offset;
632	}
633	subsec->relocs.push_back(x: r);
634
635	if (isSubtrahend) {
636	relocation_info minuendInfo = relInfos [++i];
637	// SUBTRACTOR relocations should always be followed by an UNSIGNED one
638	// attached to the same address.
639	assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
640	relInfo.r_address == minuendInfo.r_address);
641	Reloc p;
642	p.type = minuendInfo.r_type;
643	if (minuendInfo.r_extern) {
644	p.referent = symbols [minuendInfo.r_symbolnum];
645	p.addend = totalAddend;
646	} else {
647	uint64_t referentOffset =
648	totalAddend - sectionHeaders[minuendInfo.r_symbolnum - `1`].addr;
649	p.referent = findContainingSubsection(
650	section: *sections [minuendInfo.r_symbolnum - `1`], offset: &referentOffset);
651	p.addend = referentOffset;
652	}
653	subsec->relocs.push_back(x: p);
654	}
655	}
656	}
657
658	template <class NList>
659	static macho::Symbol createDefined(const* NList &sym, StringRef name,
660	InputSection *isec, uint64_t value,
661	uint64_t size, bool forceHidden) {
662	// Symbol scope is determined by sym.n_type & (N_EXT \| N_PEXT):
663	// N_EXT: Global symbols. These go in the symbol table during the link,
664	// and also in the export table of the output so that the dynamic
665	// linker sees them.
666	// N_EXT \| N_PEXT: Linkage unit (think: dylib) scoped. These go in the
667	// symbol table during the link so that duplicates are
668	// either reported (for non-weak symbols) or merged
669	// (for weak symbols), but they do not go in the export
670	// table of the output.
671	// N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
672	// object files) may produce them. LLD does not yet support -r.
673	// These are translation-unit scoped, identical to the `0` case.
674	// 0: Translation-unit scoped. These are not in the symbol table during
675	// link, and not in the export table of the output either.
676	bool isWeakDefCanBeHidden =
677	(sym.n_desc & (N_WEAK_DEF \| N_WEAK_REF)) == (N_WEAK_DEF \| N_WEAK_REF);
678
679	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
680
681	if (sym.n_type & N_EXT) {
682	// -load_hidden makes us treat global symbols as linkage unit scoped.
683	// Duplicates are reported but the symbol does not go in the export trie.
684	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
685
686	// lld's behavior for merging symbols is slightly different from ld64:
687	// ld64 picks the winning symbol based on several criteria (see
688	// pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
689	// just merges metadata and keeps the contents of the first symbol
690	// with that name (see SymbolTable::addDefined). For:
691	// inline function F in a TU built with -fvisibility-inlines-hidden*
692	// and inline function F in another TU built without that flag*
693	// ld64 will pick the one from the file built without
694	// -fvisibility-inlines-hidden.
695	// lld will instead pick the one listed first on the link command line and
696	// give it visibility as if the function was built without
697	// -fvisibility-inlines-hidden.
698	// If both functions have the same contents, this will have the same
699	// behavior. If not, it won't, but the input had an ODR violation in
700	// that case.
701	//
702	// Similarly, merging a symbol
703	// that's isPrivateExtern and not isWeakDefCanBeHidden with one
704	// that's not isPrivateExtern but isWeakDefCanBeHidden technically
705	// should produce one
706	// that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
707	// with ld64's semantics, because it means the non-private-extern
708	// definition will continue to take priority if more private extern
709	// definitions are encountered. With lld's semantics there's no observable
710	// difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
711	// that's privateExtern -- neither makes it into the dynamic symbol table,
712	// unless the autohide symbol is explicitly exported.
713	// But if a symbol is both privateExtern and autohide then it can't
714	// be exported.
715	// So we nullify the autohide flag when privateExtern is present
716	// and promote the symbol to privateExtern when it is not already.
717	if (isWeakDefCanBeHidden && isPrivateExtern)
718	isWeakDefCanBeHidden = false;
719	else if (isWeakDefCanBeHidden)
720	isPrivateExtern = true;
721	return symtab ->addDefined(
722	name, isec->getFile(), isec, value, size, isWeakDef: sym.n_desc & N_WEAK_DEF,
723	isPrivateExtern, isReferencedDynamically: sym.n_desc & REFERENCED_DYNAMICALLY,
724	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
725	}
726	bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
727	return make<Defined>(
728	name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
729	/isExternal=/false, /isPrivateExtern=/false, includeInSymtab,
730	sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
731	}
732
733	// Absolute symbols are defined symbols that do not have an associated
734	// InputSection. They cannot be weak.
735	template <class NList>
736	static macho::Symbol createAbsolute(const* NList &sym, InputFile *file,
737	StringRef name, bool forceHidden) {
738	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
739
740	if (sym.n_type & N_EXT) {
741	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
742	return symtab ->addDefined(name, file, nullptr, value: sym.n_value, /size=/`0`,
743	/isWeakDef=/false, isPrivateExtern,
744	/isReferencedDynamically=/false,
745	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP,
746	/isWeakDefCanBeHidden=/false);
747	}
748	return make<Defined>(name, file, nullptr, sym.n_value, /size=/`0`,
749	/isWeakDef=/false,
750	/isExternal=/false, /isPrivateExtern=/false,
751	/includeInSymtab=/true,
752	/isReferencedDynamically=/false,
753	sym.n_desc & N_NO_DEAD_STRIP);
754	}
755
756	template <class NList>
757	macho::Symbol ObjFile::parseNonSectionSymbol(const* NList &sym,
758	const char *strtab) {
759	StringRef name = StringRef(strtab + sym.n_strx);
760	uint8_t type = sym.n_type & N_TYPE;
761	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
762	switch (type) {
763	case N_UNDF:
764	return sym.n_value == `0`
765	? symtab ->addUndefined(name, this, isWeakRef: sym.n_desc & N_WEAK_REF)
766	: symtab ->addCommon(name, this, size: sym.n_value,
767	align: `1` << GET_COMM_ALIGN(sym.n_desc),
768	isPrivateExtern);
769	case N_ABS:
770	return createAbsolute(sym, this, name, forceHidden);
771	case N_INDR: {
772	// Not much point in making local aliases -- relocs in the current file can
773	// just refer to the actual symbol itself. ld64 ignores these symbols too.
774	if (!(sym.n_type & N_EXT))
775	return nullptr;
776	StringRef aliasedName = StringRef(strtab + sym.n_value);
777	// isPrivateExtern is the only symbol flag that has an impact on the final
778	// aliased symbol.
779	auto alias = make<AliasSymbol>(args: this*, args&: name, args&: aliasedName, args&: isPrivateExtern);
780	aliases.push_back(x: alias);
781	return alias;
782	}
783	case N_PBUD:
784	error(msg: "TODO: support symbols of type N_PBUD");
785	return nullptr;
786	case N_SECT:
787	llvm_unreachable(
788	"N_SECT symbols should not be passed to parseNonSectionSymbol");
789	default:
790	llvm_unreachable("invalid symbol type");
791	}
792	}
793
794	template <class NList> static bool isUndef(const NList &sym) {
795	return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == `0`;
796	}
797
798	template <class LP>
799	void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
800	ArrayRef<typename LP::nlist> nList,
801	const char strtab, bool* subsectionsViaSymbols) {
802	using NList = typename LP::nlist;
803
804	// Groups indices of the symbols by the sections that contain them.
805	std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
806	symbols.resize(nList.size());
807	SmallVector<unsigned, `32`> undefineds;
808	for (uint32_t i = `0`; i < nList.size(); ++i) {
809	const NList &sym = nList[i];
810
811	// Ignore debug symbols for now.
812	// FIXME: may need special handling.
813	if (sym.n_type & N_STAB)
814	continue;
815
816	if ((sym.n_type & N_TYPE) == N_SECT) {
817	Subsections &subsections = sections[sym.n_sect - `1`]->subsections;
818	// parseSections() may have chosen not to parse this section.
819	if (subsections.empty())
820	continue;
821	symbolsBySection[sym.n_sect - `1`].push_back(i);
822	} else if (isUndef(sym)) {
823	undefineds.push_back(Elt: i);
824	} else {
825	symbols [i] = parseNonSectionSymbol(sym, strtab);
826	}
827	}
828
829	for (size_t i = `0`; i < sections.size(); ++i) {
830	Subsections &subsections = sections [i]->subsections;
831	if (subsections.empty())
832	continue;
833	std::vector<uint32_t> &symbolIndices = symbolsBySection [i];
834	uint64_t sectionAddr = sectionHeaders[i].addr;
835	uint32_t sectionAlign = `1u` << sectionHeaders[i].align;
836
837	// Some sections have already been split into subsections during
838	// parseSections(), so we simply need to match Symbols to the corresponding
839	// subsection here.
840	if (sections [i]->doneSplitting) {
841	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
842	const uint32_t symIndex = symbolIndices [j];
843	const NList &sym = nList[symIndex];
844	StringRef name = strtab + sym.n_strx;
845	uint64_t symbolOffset = sym.n_value - sectionAddr;
846	InputSection *isec =
847	findContainingSubsection(section: *sections [i], offset: &symbolOffset);
848	if (symbolOffset != `0`) {
849	error(msg: toString(sec: *sections [i]) + ": symbol " + name +
850	" at misaligned offset");
851	continue;
852	}
853	symbols [symIndex] =
854	createDefined(sym, name, isec, `0`, isec->getSize(), forceHidden);
855	}
856	continue;
857	}
858	sections [i]->doneSplitting = true;
859
860	auto getSymName = [strtab](const NList& sym) -> StringRef {
861	return StringRef(strtab + sym.n_strx);
862	};
863
864	// Calculate symbol sizes and create subsections by splitting the sections
865	// along symbol boundaries.
866	// We populate subsections by repeatedly splitting the last (highest
867	// address) subsection.
868	llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
869	// Put extern weak symbols after other symbols at the same address so
870	// that weak symbol coalescing works correctly. See
871	// SymbolTable::addDefined() for details.
872	if (nList[lhs].n_value == nList[rhs].n_value &&
873	nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
874	return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
875	return nList[lhs].n_value < nList[rhs].n_value;
876	});
877	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
878	const uint32_t symIndex = symbolIndices [j];
879	const NList &sym = nList[symIndex];
880	StringRef name = getSymName(sym);
881	Subsection &subsec = subsections.back();
882	InputSection *isec = subsec.isec;
883
884	uint64_t subsecAddr = sectionAddr + subsec.offset;
885	size_t symbolOffset = sym.n_value - subsecAddr;
886	uint64_t symbolSize =
887	j + `1` < symbolIndices.size()
888	? nList[symbolIndices [j + `1`]].n_value - sym.n_value
889	: isec->data.size() - symbolOffset;
890	// There are 4 cases where we do not need to create a new subsection:
891	// 1. If the input file does not use subsections-via-symbols.
892	// 2. Multiple symbols at the same address only induce one subsection.
893	// (The symbolOffset == 0 check covers both this case as well as
894	// the first loop iteration.)
895	// 3. Alternative entry points do not induce new subsections.
896	// 4. If we have a literal section (e.g. __cstring and __literal4).
897	if (!subsectionsViaSymbols \|\| symbolOffset == `0` \|\|
898	sym.n_desc & N_ALT_ENTRY \|\| !isa<ConcatInputSection>(Val: isec)) {
899	isec->hasAltEntry = symbolOffset != `0`;
900	symbols [symIndex] = createDefined(sym, name, isec, symbolOffset,
901	symbolSize, forceHidden);
902	continue;
903	}
904	auto *concatIsec = cast<ConcatInputSection>(Val: isec);
905
906	auto nextIsec = make<ConcatInputSection>(args&: concatIsec);
907	nextIsec->wasCoalesced = false;
908	if (isZeroFill(flags: isec->getFlags())) {
909	// Zero-fill sections have NULL data.data() non-zero data.size()
910	nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
911	isec->data = {nullptr, symbolOffset};
912	} else {
913	nextIsec->data = isec->data.slice(N: symbolOffset);
914	isec->data = isec->data.slice(N: `0`, M: symbolOffset);
915	}
916
917	// By construction, the symbol will be at offset zero in the new
918	// subsection.
919	symbols [symIndex] = createDefined(sym, name, nextIsec, /value=/`0`,
920	symbolSize, forceHidden);
921	// TODO: ld64 appears to preserve the original alignment as well as each
922	// subsection's offset from the last aligned address. We should consider
923	// emulating that behavior.
924	nextIsec->align = MinAlign(sectionAlign, sym.n_value);
925	subsections.push_back({sym.n_value - sectionAddr, nextIsec});
926	}
927	}
928
929	// Undefined symbols can trigger recursive fetch from Archives due to
930	// LazySymbols. Process defined symbols first so that the relative order
931	// between a defined symbol and an undefined symbol does not change the
932	// symbol resolution behavior. In addition, a set of interconnected symbols
933	// will all be resolved to the same file, instead of being resolved to
934	// different files.
935	for (unsigned i : undefineds)
936	symbols [i] = parseNonSectionSymbol(nList[i], strtab);
937	}
938
939	OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
940	StringRef sectName)
941	: InputFile (OpaqueKind, mb) {
942	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
943	ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
944	sections.push_back(x: make<Section>(/file=/args: this, args: segName.take_front(N: `16`),
945	args: sectName.take_front(N: `16`),
946	/flags=/args: `0`, /addr=/args: `0`));
947	Section &section = *sections.back();
948	ConcatInputSection *isec = make<ConcatInputSection>(args&: section, args&: data);
949	isec->live = true;
950	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
951	}
952
953	template <class LP>
954	void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
955	using Header = typename LP::mach_header;
956	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
957
958	for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
959	StringRef data{reinterpret_cast<const char *>(cmd + `1`),
960	cmd->cmdsize - sizeof(linker_option_command)};
961	parseLCLinkerOption(LCLinkerOptions, this, cmd->count, data);
962	}
963	}
964
965	SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
966	ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
967	bool lazy, bool forceHidden, bool compatArch,
968	bool builtFromBitcode)
969	: InputFile (ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
970	builtFromBitcode(builtFromBitcode) {
971	this->archiveName = std::string (archiveName);
972	this->compatArch = compatArch;
973	if (lazy) {
974	if (target->wordSize == `8`)
975	parseLazy<LP64>();
976	else
977	parseLazy<ILP32>();
978	} else {
979	if (target->wordSize == `8`)
980	parse<LP64>();
981	else
982	parse<ILP32>();
983	}
984	}
985
986	template <class LP> void ObjFile::parse() {
987	using Header = typename LP::mach_header;
988	using SegmentCommand = typename LP::segment_command;
989	using SectionHeader = typename LP::section;
990	using NList = typename LP::nlist;
991
992	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
993	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
994
995	// If we've already checked the arch, then don't need to check again.
996	if (!compatArch)
997	return;
998	if (!(compatArch = compatWithTargetArch(this, hdr)))
999	return;
1000
1001	// We will resolve LC linker options once all native objects are loaded after
1002	// LTO is finished.
1003	SmallVector<StringRef, `4`> LCLinkerOptions;
1004	parseLinkerOptions<LP>(LCLinkerOptions);
1005	unprocessedLCLinkerOptions.append(RHS: LCLinkerOptions);
1006
1007	ArrayRef<SectionHeader> sectionHeaders;
1008	if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
1009	auto c = reinterpret_cast<const* SegmentCommand *>(cmd);
1010	sectionHeaders = ArrayRef<SectionHeader>{
1011	reinterpret_cast<const SectionHeader *>(c + `1`), c->nsects};
1012	parseSections(sectionHeaders);
1013	}
1014
1015	// TODO: Error on missing LC_SYMTAB?
1016	if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
1017	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1018	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1019	c->nsyms);
1020	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1021	bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
1022	parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
1023	}
1024
1025	// The relocations may refer to the symbols, so we parse them after we have
1026	// parsed all the symbols.
1027	for (size_t i = `0`, n = sections.size(); i < n; ++i)
1028	if (!sections [i]->subsections.empty())
1029	parseRelocations(sectionHeaders, sectionHeaders[i], *sections [i]);
1030
1031	parseDebugInfo();
1032
1033	Section ehFrameSection = nullptr*;
1034	Section compactUnwindSection = nullptr*;
1035	for (Section *sec : sections) {
1036	Section s = StringSwitch<Section >(sec->name)
1037	.Case(S: section_names::compactUnwind, Value: &compactUnwindSection)
1038	.Case(S: section_names::ehFrame, Value: &ehFrameSection)
1039	.Default(Value: nullptr);
1040	if (s)
1041	*s = sec;
1042	}
1043	if (compactUnwindSection)
1044	registerCompactUnwind(compactUnwindSection&: *compactUnwindSection);
1045	if (ehFrameSection)
1046	registerEhFrames(ehFrameSection&: *ehFrameSection);
1047	}
1048
1049	template <class LP> void ObjFile::parseLazy() {
1050	using Header = typename LP::mach_header;
1051	using NList = typename LP::nlist;
1052
1053	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1054	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
1055
1056	if (!compatArch)
1057	return;
1058	if (!(compatArch = compatWithTargetArch(this, hdr)))
1059	return;
1060
1061	const load_command *cmd = findCommand(hdr, LC_SYMTAB);
1062	if (!cmd)
1063	return;
1064	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1065	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1066	c->nsyms);
1067	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1068	symbols.resize(nList.size());
1069	for (const auto &[i, sym] : llvm::enumerate(nList)) {
1070	if ((sym.n_type & N_EXT) && !isUndef(sym)) {
1071	// TODO: Bound checking
1072	StringRef name = strtab + sym.n_strx;
1073	symbols[i] = symtab ->addLazyObject(name, file&: *this);
1074	if (!lazy)
1075	break;
1076	}
1077	}
1078	}
1079
1080	void ObjFile::parseDebugInfo() {
1081	std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
1082	if (!dObj)
1083	return;
1084
1085	// We do not re-use the context from getDwarf() here as that function
1086	// constructs an expensive DWARFCache object.
1087	auto *ctx = make<DWARFContext>(
1088	args: std::move(dObj), args: "",
1089	args: [&](Error err) {
1090	warn(msg: toString(f: this) + ": " + toString(E: std::move(err)));
1091	},
1092	args: [&](Error warning) {
1093	warn(msg: toString(f: this) + ": " + toString(E: std::move(warning)));
1094	});
1095
1096	// TODO: Since object files can contain a lot of DWARF info, we should verify
1097	// that we are parsing just the info we need
1098	const DWARFContext::compile_unit_range &units = ctx->compile_units();
1099	// FIXME: There can be more than one compile unit per object file. See
1100	// PR48637.
1101	auto it = units.begin();
1102	compileUnit = it != units.end() ? it ->get() : nullptr;
1103	}
1104
1105	ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1106	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1107	const load_command *cmd = findCommand(anyHdr: buf, types: LC_DATA_IN_CODE);
1108	if (!cmd)
1109	return {};
1110	const auto c = reinterpret_cast<const* linkedit_data_command *>(cmd);
1111	return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
1112	c->datasize / sizeof(data_in_code_entry)};
1113	}
1114
1115	ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1116	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1117	if (auto *cmd =
1118	findCommand<linkedit_data_command>(anyHdr: buf, types: LC_LINKER_OPTIMIZATION_HINT))
1119	return {buf + cmd->dataoff, cmd->datasize};
1120	return {};
1121	}
1122
1123	// Create pointers from symbols to their associated compact unwind entries.
1124	void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
1125	for (const Subsection &subsection : compactUnwindSection.subsections) {
1126	ConcatInputSection *isec = cast<ConcatInputSection>(Val: subsection.isec);
1127	// Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1128	// their addends in its data. Thus if ICF operated naively and compared the
1129	// entire contents of each CUE, entries with identical unwind info but e.g.
1130	// belonging to different functions would never be considered equivalent. To
1131	// work around this problem, we remove some parts of the data containing the
1132	// embedded addends. In particular, we remove the function address and LSDA
1133	// pointers. Since these locations are at the start and end of the entry,
1134	// we can do this using a simple, efficient slice rather than performing a
1135	// copy. We are not losing any information here because the embedded
1136	// addends have already been parsed in the corresponding Reloc structs.
1137	//
1138	// Removing these pointers would not be safe if they were pointers to
1139	// absolute symbols. In that case, there would be no corresponding
1140	// relocation. However, (AFAIK) MC cannot emit references to absolute
1141	// symbols for either the function address or the LSDA. However, it can* do*
1142	// so for the personality pointer, so we are not slicing that field away.
1143	//
1144	// Note that we do not adjust the offsets of the corresponding relocations;
1145	// instead, we rely on `relocateCompactUnwind()` to correctly handle these
1146	// truncated input sections.
1147	isec->data = isec->data.slice(N: target->wordSize, M: `8` + target->wordSize);
1148	uint32_t encoding = read32le(P: isec->data.data() + sizeof(uint32_t));
1149	// llvm-mc omits CU entries for functions that need DWARF encoding, but
1150	// `ld -r` doesn't. We can ignore them because we will re-synthesize these
1151	// CU entries from the DWARF info during the output phase.
1152	if ((encoding & static_cast<uint32_t>(UNWIND_MODE_MASK)) ==
1153	target->modeDwarfEncoding)
1154	continue;
1155
1156	ConcatInputSection *referentIsec;
1157	for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
1158	Reloc &r = *it;
1159	// CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1160	if (r.offset != `0`) {
1161	++it;
1162	continue;
1163	}
1164	uint64_t add = r.addend;
1165	if (auto sym = cast_or_null<Defined>(Val: r.referent.dyn_cast<Symbol >())) {
1166	// Check whether the symbol defined in this file is the prevailing one.
1167	// Skip if it is e.g. a weak def that didn't prevail.
1168	if (sym->getFile() != this) {
1169	++it;
1170	continue;
1171	}
1172	add += sym->value;
1173	referentIsec = cast<ConcatInputSection>(Val: sym->isec());
1174	} else {
1175	referentIsec =
1176	cast<ConcatInputSection>(Val: r.referent.dyn_cast<InputSection *>());
1177	}
1178	// Unwind info lives in __DATA, and finalization of __TEXT will occur
1179	// before finalization of __DATA. Moreover, the finalization of unwind
1180	// info depends on the exact addresses that it references. So it is safe
1181	// for compact unwind to reference addresses in __TEXT, but not addresses
1182	// in any other segment.
1183	if (referentIsec->getSegName() != segment_names::text)
1184	error(msg: isec->getLocation(off: r.offset) + " references section " +
1185	referentIsec->getName() + " which is not in segment __TEXT");
1186	// The functionAddress relocations are typically section relocations.
1187	// However, unwind info operates on a per-symbol basis, so we search for
1188	// the function symbol here.
1189	Defined *d = findSymbolAtOffset(isec: referentIsec, off: add);
1190	if (!d) {
1191	++it;
1192	continue;
1193	}
1194	d->originalUnwindEntry = isec;
1195	// Now that the symbol points to the unwind entry, we can remove the reloc
1196	// that points from the unwind entry back to the symbol.
1197	//
1198	// First, the symbol keeps the unwind entry alive (and not vice versa), so
1199	// this keeps dead-stripping simple.
1200	//
1201	// Moreover, it reduces the work that ICF needs to do to figure out if
1202	// functions with unwind info are foldable.
1203	//
1204	// However, this does make it possible for ICF to fold CUEs that point to
1205	// distinct functions (if the CUEs are otherwise identical).
1206	// UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1207	// each one can hold a distinct functionAddress value.
1208	//
1209	// Given that clang emits relocations in reverse order of address, this
1210	// relocation should be at the end of the vector for most of our input
1211	// object files, so this erase() is typically an O(1) operation.
1212	it = isec->relocs.erase(position: it);
1213	}
1214	}
1215	}
1216
1217	struct CIE {
1218	macho::Symbol personalitySymbol = nullptr*;
1219	bool fdesHaveAug = false;
1220	uint8_t lsdaPtrSize = `0`; // 0 => no LSDA
1221	uint8_t funcPtrSize = `0`;
1222	};
1223
1224	static uint8_t pointerEncodingToSize(uint8_t enc) {
1225	switch (enc & `0xf`) {
1226	case dwarf::DW_EH_PE_absptr:
1227	return target->wordSize;
1228	case dwarf::DW_EH_PE_sdata4:
1229	return `4`;
1230	case dwarf::DW_EH_PE_sdata8:
1231	// ld64 doesn't actually support sdata8, but this seems simple enough...
1232	return `8`;
1233	default:
1234	return `0`;
1235	};
1236	}
1237
1238	static CIE parseCIE(const InputSection isec, const* EhReader &reader,
1239	size_t off) {
1240	// Handling the full generality of possible DWARF encodings would be a major
1241	// pain. We instead take advantage of our knowledge of how llvm-mc encodes
1242	// DWARF and handle just that.
1243	constexpr uint8_t expectedPersonalityEnc =
1244	dwarf::DW_EH_PE_pcrel \| dwarf::DW_EH_PE_indirect \| dwarf::DW_EH_PE_sdata4;
1245
1246	CIE cie;
1247	uint8_t version = reader.readByte(off: &off);
1248	if (version != `1` && version != `3`)
1249	fatal(msg: "Expected CIE version of 1 or 3, got " + Twine (version));
1250	StringRef aug = reader.readString(off: &off);
1251	reader.skipLeb128(off: &off); // skip code alignment
1252	reader.skipLeb128(off: &off); // skip data alignment
1253	reader.skipLeb128(off: &off); // skip return address register
1254	reader.skipLeb128(off: &off); // skip aug data length
1255	uint64_t personalityAddrOff = `0`;
1256	for (char c : aug) {
1257	switch (c) {
1258	case `'z'`:
1259	cie.fdesHaveAug = true;
1260	break;
1261	case `'P'`: {
1262	uint8_t personalityEnc = reader.readByte(off: &off);
1263	if (personalityEnc != expectedPersonalityEnc)
1264	reader.failOn(errOff: off, msg: "unexpected personality encoding 0x" +
1265	Twine::utohexstr(Val: personalityEnc));
1266	personalityAddrOff = off;
1267	off += `4`;
1268	break;
1269	}
1270	case `'L'`: {
1271	uint8_t lsdaEnc = reader.readByte(off: &off);
1272	cie.lsdaPtrSize = pointerEncodingToSize(enc: lsdaEnc);
1273	if (cie.lsdaPtrSize == `0`)
1274	reader.failOn(errOff: off, msg: "unexpected LSDA encoding 0x" +
1275	Twine::utohexstr(Val: lsdaEnc));
1276	break;
1277	}
1278	case `'R'`: {
1279	uint8_t pointerEnc = reader.readByte(off: &off);
1280	cie.funcPtrSize = pointerEncodingToSize(enc: pointerEnc);
1281	if (cie.funcPtrSize == `0` \|\| !(pointerEnc & dwarf::DW_EH_PE_pcrel))
1282	reader.failOn(errOff: off, msg: "unexpected pointer encoding 0x" +
1283	Twine::utohexstr(Val: pointerEnc));
1284	break;
1285	}
1286	default:
1287	break;
1288	}
1289	}
1290	if (personalityAddrOff != `0`) {
1291	const auto *personalityReloc = isec->getRelocAt(off: personalityAddrOff);
1292	if (!personalityReloc)
1293	reader.failOn(errOff: off, msg: "Failed to locate relocation for personality symbol");
1294	cie.personalitySymbol = personalityReloc->referent.get<macho::Symbol *>();
1295	}
1296	return cie;
1297	}
1298
1299	// EH frame target addresses may be encoded as pcrel offsets. However, instead
1300	// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
1301	// This function recovers the target address from the subtractors, essentially
1302	// performing the inverse operation of EhRelocator.
1303	//
1304	// Concretely, we expect our relocations to write the value of `PC -
1305	// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
1306	// points to a symbol plus an addend.
1307	//
1308	// It is important that the minuend relocation point to a symbol within the
1309	// same section as the fixup value, since sections may get moved around.
1310	//
1311	// For example, for arm64, llvm-mc emits relocations for the target function
1312	// address like so:
1313	//
1314	// ltmp:
1315	// <CIE start>
1316	// ...
1317	// <CIE end>
1318	// ... multiple FDEs ...
1319	// <FDE start>
1320	// <target function address - (ltmp + pcrel offset)>
1321	// ...
1322	//
1323	// If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
1324	// will move to an earlier address, and `ltmp + pcrel offset` will no longer
1325	// reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
1326	// our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
1327	// the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
1328	//
1329	// If `Invert` is set, then we instead expect `target_addr - PC` to be written
1330	// to `PC`.
1331	template <bool Invert = false>
1332	Defined *
1333	targetSymFromCanonicalSubtractor(const InputSection *isec,
1334	std::vector<macho::Reloc>::iterator relocIt) {
1335	macho::Reloc &subtrahend = *relocIt;
1336	macho::Reloc &minuend = *std::next(x: relocIt);
1337	assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
1338	assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
1339	// Note: pcSym may not* be exactly at the PC; there's usually a non-zero*
1340	// addend.
1341	auto pcSym = cast<Defined>(Val: subtrahend.referent.get<macho::Symbol >());
1342	Defined *target =
1343	cast_or_null<Defined>(Val: minuend.referent.dyn_cast<macho::Symbol *>());
1344	if (!pcSym) {
1345	auto *targetIsec =
1346	cast<ConcatInputSection>(Val: minuend.referent.get<InputSection *>());
1347	target = findSymbolAtOffset(isec: targetIsec, off: minuend.addend);
1348	}
1349	if (Invert)
1350	std::swap(a&: pcSym, b&: target);
1351	if (pcSym->isec() == isec) {
1352	if (pcSym->value - (Invert ? -`1` : `1`) * minuend.addend != subtrahend.offset)
1353	fatal(msg: "invalid FDE relocation in __eh_frame");
1354	} else {
1355	// Ensure the pcReloc points to a symbol within the current EH frame.
1356	// HACK: we should really verify that the original relocation's semantics
1357	// are preserved. In particular, we should have
1358	// `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
1359	// have an easy way to access the offsets from this point in the code; some
1360	// refactoring is needed for that.
1361	macho::Reloc &pcReloc = Invert ? minuend : subtrahend;
1362	pcReloc.referent = isec->symbols [`0`];
1363	assert(isec->symbols[`0`]->value == `0`);
1364	minuend.addend = pcReloc.offset * (Invert ? `1LL` : -`1LL`);
1365	}
1366	return target;
1367	}
1368
1369	Defined findSymbolAtAddress(const* std::vector<Section *> &sections,
1370	uint64_t addr) {
1371	Section *sec = findContainingSection(sections, offset: &addr);
1372	auto isec = cast<ConcatInputSection>(Val: findContainingSubsection(section: sec, offset: &addr));
1373	return findSymbolAtOffset(isec, off: addr);
1374	}
1375
1376	// For symbols that don't have compact unwind info, associate them with the more
1377	// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
1378	//
1379	// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
1380	// description of its format.
1381	//
1382	// While parsing, we also look for what MC calls "abs-ified" relocations -- they
1383	// are relocations which are implicitly encoded as offsets in the section data.
1384	// We convert them into explicit Reloc structs so that the EH frames can be
1385	// handled just like a regular ConcatInputSection later in our output phase.
1386	//
1387	// We also need to handle the case where our input object file has explicit
1388	// relocations. This is the case when e.g. it's the output of `ld -r`. We only
1389	// look for the "abs-ified" relocation if an explicit relocation is absent.
1390	void ObjFile::registerEhFrames(Section &ehFrameSection) {
1391	DenseMap<const InputSection *, CIE> cieMap;
1392	for (const Subsection &subsec : ehFrameSection.subsections) {
1393	auto *isec = cast<ConcatInputSection>(Val: subsec.isec);
1394	uint64_t isecOff = subsec.offset;
1395
1396	// Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
1397	// that all EH frames have an associated symbol so that we can generate
1398	// subtractor relocs that reference them.
1399	if (isec->symbols.size() == `0`)
1400	make<Defined>(args: "EH_Frame", args: isec->getFile(), args&: isec, /value=/args: `0`,
1401	args: isec->getSize(), /isWeakDef=/args: false, /isExternal=/args: false,
1402	/isPrivateExtern=/args: false, /includeInSymtab=/args: false,
1403	/isReferencedDynamically=/args: false,
1404	/noDeadStrip=/args: false);
1405	else if (isec->symbols [`0`]->value != `0`)
1406	fatal(msg: "found symbol at unexpected offset in __eh_frame");
1407
1408	EhReader reader(this, isec->data, subsec.offset);
1409	size_t dataOff = `0`; // Offset from the start of the EH frame.
1410	reader.skipValidLength(off: &dataOff); // readLength() already validated this.
1411	// cieOffOff is the offset from the start of the EH frame to the cieOff
1412	// value, which is itself an offset from the current PC to a CIE.
1413	const size_t cieOffOff = dataOff;
1414
1415	EhRelocator ehRelocator(isec);
1416	auto cieOffRelocIt = llvm::find_if(
1417	Range&: isec->relocs, P: [=](const Reloc &r) { return r.offset == cieOffOff; });
1418	InputSection cieIsec = nullptr*;
1419	if (cieOffRelocIt != isec->relocs.end()) {
1420	// We already have an explicit relocation for the CIE offset.
1421	cieIsec =
1422	targetSymFromCanonicalSubtractor</Invert=/true>(isec, relocIt: cieOffRelocIt)
1423	->isec();
1424	dataOff += sizeof(uint32_t);
1425	} else {
1426	// If we haven't found a relocation, then the CIE offset is most likely
1427	// embedded in the section data (AKA an "abs-ified" reloc.). Parse that
1428	// and generate a Reloc struct.
1429	uint32_t cieMinuend = reader.readU32(off: &dataOff);
1430	if (cieMinuend == `0`) {
1431	cieIsec = isec;
1432	} else {
1433	uint32_t cieOff = isecOff + dataOff - cieMinuend;
1434	cieIsec = findContainingSubsection(section: ehFrameSection, offset: &cieOff);
1435	if (cieIsec == nullptr)
1436	fatal(msg: "failed to find CIE");
1437	}
1438	if (cieIsec != isec)
1439	ehRelocator.makeNegativePcRel(off: cieOffOff, target: cieIsec->symbols [`0`],
1440	/length=/`2`);
1441	}
1442	if (cieIsec == isec) {
1443	cieMap [cieIsec] = parseCIE(isec, reader, off: dataOff);
1444	continue;
1445	}
1446
1447	assert(cieMap.count(cieIsec));
1448	const CIE &cie = cieMap [cieIsec];
1449	// Offset of the function address within the EH frame.
1450	const size_t funcAddrOff = dataOff;
1451	uint64_t funcAddr = reader.readPointer(off: &dataOff, size: cie.funcPtrSize) +
1452	ehFrameSection.addr + isecOff + funcAddrOff;
1453	uint32_t funcLength = reader.readPointer(off: &dataOff, size: cie.funcPtrSize);
1454	size_t lsdaAddrOff = `0`; // Offset of the LSDA address within the EH frame.
1455	std::optional<uint64_t> lsdaAddrOpt;
1456	if (cie.fdesHaveAug) {
1457	reader.skipLeb128(off: &dataOff);
1458	lsdaAddrOff = dataOff;
1459	if (cie.lsdaPtrSize != `0`) {
1460	uint64_t lsdaOff = reader.readPointer(off: &dataOff, size: cie.lsdaPtrSize);
1461	if (lsdaOff != `0`) // FIXME possible to test this?
1462	lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
1463	}
1464	}
1465
1466	auto funcAddrRelocIt = isec->relocs.end();
1467	auto lsdaAddrRelocIt = isec->relocs.end();
1468	for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
1469	if (it ->offset == funcAddrOff)
1470	funcAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1471	else if (lsdaAddrOpt && it ->offset == lsdaAddrOff)
1472	lsdaAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1473	}
1474
1475	Defined *funcSym;
1476	if (funcAddrRelocIt != isec->relocs.end()) {
1477	funcSym = targetSymFromCanonicalSubtractor(isec, relocIt: funcAddrRelocIt);
1478	// Canonicalize the symbol. If there are multiple symbols at the same
1479	// address, we want both `registerEhFrame` and `registerCompactUnwind`
1480	// to register the unwind entry under same symbol.
1481	// This is not particularly efficient, but we should run into this case
1482	// infrequently (only when handling the output of `ld -r`).
1483	if (funcSym->isec())
1484	funcSym = findSymbolAtOffset(isec: cast<ConcatInputSection>(Val: funcSym->isec()),
1485	off: funcSym->value);
1486	} else {
1487	funcSym = findSymbolAtAddress(sections, addr: funcAddr);
1488	ehRelocator.makePcRel(off: funcAddrOff, target: funcSym, length: target->p2WordSize);
1489	}
1490	// The symbol has been coalesced, or already has a compact unwind entry.
1491	if (!funcSym \|\| funcSym->getFile() != this \|\| funcSym->unwindEntry()) {
1492	// We must prune unused FDEs for correctness, so we cannot rely on
1493	// -dead_strip being enabled.
1494	isec->live = false;
1495	continue;
1496	}
1497
1498	InputSection lsdaIsec = nullptr*;
1499	if (lsdaAddrRelocIt != isec->relocs.end()) {
1500	lsdaIsec =
1501	targetSymFromCanonicalSubtractor(isec, relocIt: lsdaAddrRelocIt)->isec();
1502	} else if (lsdaAddrOpt) {
1503	uint64_t lsdaAddr = *lsdaAddrOpt;
1504	Section *sec = findContainingSection(sections, offset: &lsdaAddr);
1505	lsdaIsec =
1506	cast<ConcatInputSection>(Val: findContainingSubsection(section: *sec, offset: &lsdaAddr));
1507	ehRelocator.makePcRel(off: lsdaAddrOff, target: lsdaIsec, length: target->p2WordSize);
1508	}
1509
1510	fdes [isec] = {.funcLength: funcLength, .personality: cie.personalitySymbol, .lsda: lsdaIsec};
1511	funcSym->originalUnwindEntry = isec;
1512	ehRelocator.commit();
1513	}
1514
1515	// __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
1516	// are normally required to be kept alive if they reference a live symbol.
1517	// However, we've explicitly created a dependency from a symbol to its FDE, so
1518	// dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
1519	// serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
1520	// live symbol (e.g. if there were multiple weak copies). Remove this flag to
1521	// let dead-stripping proceed correctly.
1522	ehFrameSection.flags &= ~S_ATTR_LIVE_SUPPORT;
1523	}
1524
1525	std::string ObjFile::sourceFile() const {
1526	const char *unitName = compileUnit->getUnitDIE().getShortName();
1527	// DWARF allows DW_AT_name to be absolute, in which case nothing should be
1528	// prepended. As for the styles, debug info can contain paths from any OS, not
1529	// necessarily an OS we're currently running on. Moreover different
1530	// compilation units can be compiled on different operating systems and linked
1531	// together later.
1532	if (sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::posix) \|\|
1533	sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::windows))
1534	return unitName;
1535	SmallString<`261`> dir(compileUnit->getCompilationDir());
1536	StringRef sep = sys::path::get_separator();
1537	// We don't use `path::append` here because we want an empty `dir` to result
1538	// in an absolute path. `append` would give us a relative path for that case.
1539	if (!dir.ends_with(Suffix: sep))
1540	dir += sep;
1541	return (dir + unitName).str();
1542	}
1543
1544	lld::DWARFCache *ObjFile::getDwarf() {
1545	llvm::call_once(flag&: initDwarf, F: [this]() {
1546	auto dwObj = DwarfObject::create(this);
1547	if (!dwObj)
1548	return;
1549	dwarfCache = std::make_unique<DWARFCache>(args: std::make_unique<DWARFContext>(
1550	args: std::move(dwObj), args: "",
1551	args: [&](Error err) { warn(msg: getName() + ": " + toString(E: std::move(err))); },
1552	args: [&](Error warning) {
1553	warn(msg: getName() + ": " + toString(E: std::move(warning)));
1554	}));
1555	});
1556
1557	return dwarfCache.get();
1558	}
1559	// The path can point to either a dylib or a .tbd file.
1560	static DylibFile loadDylib(StringRef path, DylibFile umbrella) {
1561	std::optional<MemoryBufferRef> mbref = readFile(path);
1562	if (!mbref) {
1563	error(msg: "could not read dylib file at " + path);
1564	return nullptr;
1565	}
1566	return loadDylib(mbref: *mbref, umbrella);
1567	}
1568
1569	// TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1570	// the first document storing child pointers to the rest of them. When we are
1571	// processing a given TBD file, we store that top-level document in
1572	// currentTopLevelTapi. When processing re-exports, we search its children for
1573	// potentially matching documents in the same TBD file. Note that the children
1574	// themselves don't point to further documents, i.e. this is a two-level tree.
1575	//
1576	// Re-exports can either refer to on-disk files, or to documents within .tbd
1577	// files.
1578	static DylibFile findDylib(StringRef path, DylibFile umbrella,
1579	const InterfaceFile *currentTopLevelTapi) {
1580	// Search order:
1581	// 1. Install name basename in -F / -L directories.
1582	{
1583	StringRef stem = path::stem(path);
1584	SmallString<`128`> frameworkName;
1585	path::append(path&: frameworkName, style: path::Style::posix, a: stem + ".framework", b: stem);
1586	bool isFramework = path.ends_with(Suffix: frameworkName);
1587	if (isFramework) {
1588	for (StringRef dir : config ->frameworkSearchPaths) {
1589	SmallString<`128`> candidate = dir;
1590	path::append(path&: candidate, a: frameworkName);
1591	if (std::optional<StringRef> dylibPath =
1592	resolveDylibPath(path: candidate.str()))
1593	return loadDylib(path: *dylibPath, umbrella);
1594	}
1595	} else if (std::optional<StringRef> dylibPath = findPathCombination(
1596	name: stem, roots: config ->librarySearchPaths, extensions: {".tbd", ".dylib", ".so"}))
1597	return loadDylib(path: *dylibPath, umbrella);
1598	}
1599
1600	// 2. As absolute path.
1601	if (path::is_absolute(path, style: path::Style::posix))
1602	for (StringRef root : config ->systemLibraryRoots)
1603	if (std::optional<StringRef> dylibPath =
1604	resolveDylibPath(path: (root + path).str()))
1605	return loadDylib(path: *dylibPath, umbrella);
1606
1607	// 3. As relative path.
1608
1609	// TODO: Handle -dylib_file
1610
1611	// Replace @executable_path, @loader_path, @rpath prefixes in install name.
1612	SmallString<`128`> newPath;
1613	if (config ->outputType == MH_EXECUTE &&
1614	path.consume_front(Prefix: "@executable_path/")) {
1615	// ld64 allows overriding this with the undocumented flag -executable_path.
1616	// lld doesn't currently implement that flag.
1617	// FIXME: Consider using finalOutput instead of outputFile.
1618	path::append(path&: newPath, a: path::parent_path(path: config ->outputFile), b: path);
1619	path = newPath;
1620	} else if (path.consume_front(Prefix: "@loader_path/")) {
1621	fs::real_path(path: umbrella->getName(), output&: newPath);
1622	path::remove_filename(path&: newPath);
1623	path::append(path&: newPath, a: path);
1624	path = newPath;
1625	} else if (path.starts_with(Prefix: "@rpath/")) {
1626	for (StringRef rpath : umbrella->rpaths) {
1627	newPath.clear();
1628	if (rpath.consume_front(Prefix: "@loader_path/")) {
1629	fs::real_path(path: umbrella->getName(), output&: newPath);
1630	path::remove_filename(path&: newPath);
1631	}
1632	path::append(path&: newPath, a: rpath, b: path.drop_front(N: strlen(s: "@rpath/")));
1633	if (std::optional<StringRef> dylibPath = resolveDylibPath(path: newPath.str()))
1634	return loadDylib(path: *dylibPath, umbrella);
1635	}
1636	}
1637
1638	// FIXME: Should this be further up?
1639	if (currentTopLevelTapi) {
1640	for (InterfaceFile &child :
1641	make_pointee_range(Range: currentTopLevelTapi->documents())) {
1642	assert(child.documents().empty());
1643	if (path == child.getInstallName()) {
1644	auto file = make<DylibFile>(args&: child, args&: umbrella, /isBundleLoader=/args: false*,
1645	/explicitlyLinked=/args: false);
1646	file->parseReexports(interface: child);
1647	return file;
1648	}
1649	}
1650	}
1651
1652	if (std::optional<StringRef> dylibPath = resolveDylibPath(path))
1653	return loadDylib(path: *dylibPath, umbrella);
1654
1655	return nullptr;
1656	}
1657
1658	// If a re-exported dylib is public (lives in /usr/lib or
1659	// /System/Library/Frameworks), then it is considered implicitly linked: we
1660	// should bind to its symbols directly instead of via the re-exporting umbrella
1661	// library.
1662	static bool isImplicitlyLinked(StringRef path) {
1663	if (!config ->implicitDylibs)
1664	return false;
1665
1666	if (path::parent_path(path) == "/usr/lib")
1667	return true;
1668
1669	// Match /System/Library/Frameworks/$FOO.framework//$FOO
1670	if (path.consume_front(Prefix: "/System/Library/Frameworks/")) {
1671	StringRef frameworkName = path.take_until(F: [](char c) { return c == `'.'`; });
1672	return path::filename(path) == frameworkName;
1673	}
1674
1675	return false;
1676	}
1677
1678	void DylibFile::loadReexport(StringRef path, DylibFile *umbrella,
1679	const InterfaceFile *currentTopLevelTapi) {
1680	DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
1681	if (!reexport)
1682	error(msg: toString(f: this) + ": unable to locate re-export with install name " +
1683	path);
1684	}
1685
1686	DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
1687	bool isBundleLoader, bool explicitlyLinked)
1688	: InputFile (DylibKind, mb), refState(RefState::Unreferenced),
1689	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1690	assert(!isBundleLoader \|\| !umbrella);
1691	if (umbrella == nullptr)
1692	umbrella = this;
1693	this->umbrella = umbrella;
1694
1695	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1696
1697	// Initialize installName.
1698	if (const load_command *cmd = findCommand(anyHdr: hdr, types: LC_ID_DYLIB)) {
1699	auto c = reinterpret_cast<const* dylib_command *>(cmd);
1700	currentVersion = read32le(P: &c->dylib.current_version);
1701	compatibilityVersion = read32le(P: &c->dylib.compatibility_version);
1702	installName =
1703	reinterpret_cast<const char *>(cmd) + read32le(P: &c->dylib.name);
1704	} else if (!isBundleLoader) {
1705	// macho_executable and macho_bundle don't have LC_ID_DYLIB,
1706	// so it's OK.
1707	error(msg: toString(f: this) + ": dylib missing LC_ID_DYLIB load command");
1708	return;
1709	}
1710
1711	if (config ->printEachFile)
1712	message(msg: toString(f: this));
1713	inputFiles.insert(X: this);
1714
1715	deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1716
1717	if (!checkCompatibility(input: this))
1718	return;
1719
1720	checkAppExtensionSafety(dylibIsAppExtensionSafe: hdr->flags & MH_APP_EXTENSION_SAFE);
1721
1722	for (auto *cmd : findCommands<rpath_command>(anyHdr: hdr, types: LC_RPATH)) {
1723	StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1724	rpaths.push_back(Elt: rpath);
1725	}
1726
1727	// Initialize symbols.
1728	bool canBeImplicitlyLinked = findCommand(anyHdr: hdr, types: LC_SUB_CLIENT) == nullptr;
1729	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1730	? this
1731	: this->umbrella;
1732
1733	const auto *dyldInfo = findCommand<dyld_info_command>(anyHdr: hdr, types: LC_DYLD_INFO_ONLY);
1734	const auto *exportsTrie =
1735	findCommand<linkedit_data_command>(anyHdr: hdr, types: LC_DYLD_EXPORTS_TRIE);
1736	if (dyldInfo && exportsTrie) {
1737	// It's unclear what should happen in this case. Maybe we should only error
1738	// out if the two load commands refer to different data?
1739	error(msg: toString(f: this) +
1740	": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1741	return;
1742	}
1743
1744	if (dyldInfo) {
1745	parseExportedSymbols(offset: dyldInfo->export_off, size: dyldInfo->export_size);
1746	} else if (exportsTrie) {
1747	parseExportedSymbols(offset: exportsTrie->dataoff, size: exportsTrie->datasize);
1748	} else {
1749	error(msg: "No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1750	toString(f: this));
1751	}
1752	}
1753
1754	void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
1755	struct TrieEntry {
1756	StringRef name;
1757	uint64_t flags;
1758	};
1759
1760	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1761	std::vector<TrieEntry> entries;
1762	// Find all the $ld$ symbols to process first.*
1763	parseTrie(buf: buf + offset, size, [&](const Twine &name, uint64_t flags) {
1764	StringRef savedName = saver().save(S: name);
1765	if (handleLDSymbol(originalName: savedName))
1766	return;
1767	entries.push_back(x: {.name: savedName, .flags: flags});
1768	});
1769
1770	// Process the "normal" symbols.
1771	for (TrieEntry &entry : entries) {
1772	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (entry.name)))
1773	continue;
1774
1775	bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1776	bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
1777
1778	symbols.push_back(
1779	x: symtab ->addDylib(name: entry.name, file: exportingFile, isWeakDef, isTlv));
1780	}
1781	}
1782
1783	void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1784	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1785	const uint8_t p = reinterpret_cast<const* uint8_t *>(mb.getBufferStart()) +
1786	target->headerSize;
1787	for (uint32_t i = `0`, n = hdr->ncmds; i < n; ++i) {
1788	auto cmd = reinterpret_cast<const* load_command *>(p);
1789	p += cmd->cmdsize;
1790
1791	if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1792	cmd->cmd == LC_REEXPORT_DYLIB) {
1793	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1794	StringRef reexportPath =
1795	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1796	loadReexport(path: reexportPath, umbrella: exportingFile, currentTopLevelTapi: nullptr);
1797	}
1798
1799	// FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1800	// LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1801	// MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1802	if (config ->namespaceKind == NamespaceKind::flat &&
1803	cmd->cmd == LC_LOAD_DYLIB) {
1804	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1805	StringRef dylibPath =
1806	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1807	DylibFile dylib = findDylib(path: dylibPath, umbrella, currentTopLevelTapi: nullptr*);
1808	if (!dylib)
1809	error(msg: Twine ("unable to locate library '") + dylibPath +
1810	"' loaded from '" + toString(f: this) + "' for -flat_namespace");
1811	}
1812	}
1813	}
1814
1815	// Some versions of Xcode ship with .tbd files that don't have the right
1816	// platform settings.
1817	constexpr std::array<StringRef, `3`> skipPlatformChecks{
1818	"/usr/lib/system/libsystem_kernel.dylib",
1819	"/usr/lib/system/libsystem_platform.dylib",
1820	"/usr/lib/system/libsystem_pthread.dylib"};
1821
1822	static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
1823	bool explicitlyLinked) {
1824	// Catalyst outputs can link against implicitly linked macOS-only libraries.
1825	if (config ->platform() != PLATFORM_MACCATALYST \|\| explicitlyLinked)
1826	return false;
1827	return is_contained(Range: interface.targets(),
1828	Element: MachO::Target (config ->arch(), PLATFORM_MACOS));
1829	}
1830
1831	static bool isArchABICompatible(ArchitectureSet archSet,
1832	Architecture targetArch) {
1833	uint32_t cpuType;
1834	uint32_t targetCpuType;
1835	std::tie(args&: targetCpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: targetArch);
1836
1837	return llvm::any_of(Range&: archSet, P: [&](const auto &p) {
1838	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(p);
1839	return cpuType == targetCpuType;
1840	});
1841	}
1842
1843	static bool isTargetPlatformArchCompatible(
1844	InterfaceFile::const_target_range interfaceTargets, Target target) {
1845	if (is_contained(Range&: interfaceTargets, Element: target))
1846	return true;
1847
1848	if (config ->forceExactCpuSubtypeMatch)
1849	return false;
1850
1851	ArchitectureSet archSet;
1852	for (const auto &p : interfaceTargets)
1853	if (p.Platform == target.Platform)
1854	archSet.set(p.Arch);
1855	if (archSet.empty())
1856	return false;
1857
1858	return isArchABICompatible(archSet, targetArch: target.Arch);
1859	}
1860
1861	DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
1862	bool isBundleLoader, bool explicitlyLinked)
1863	: InputFile (DylibKind, interface), refState(RefState::Unreferenced),
1864	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1865	// FIXME: Add test for the missing TBD code path.
1866
1867	if (umbrella == nullptr)
1868	umbrella = this;
1869	this->umbrella = umbrella;
1870
1871	installName = saver().save(S: interface.getInstallName());
1872	compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1873	currentVersion = interface.getCurrentVersion().rawValue();
1874
1875	if (config ->printEachFile)
1876	message(msg: toString(f: this));
1877	inputFiles.insert(X: this);
1878
1879	if (!is_contained(Range: skipPlatformChecks, Element: installName) &&
1880	!isTargetPlatformArchCompatible(interfaceTargets: interface.targets(),
1881	target: config ->platformInfo.target) &&
1882	!skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
1883	error(msg: toString(f: this) + " is incompatible with " +
1884	std::string(config ->platformInfo.target));
1885	return;
1886	}
1887
1888	checkAppExtensionSafety(dylibIsAppExtensionSafe: interface.isApplicationExtensionSafe());
1889
1890	bool canBeImplicitlyLinked = interface.allowableClients().size() == `0`;
1891	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1892	? this
1893	: umbrella;
1894	auto addSymbol = [&](const llvm::MachO::Symbol &symbol,
1895	const Twine &name) -> void {
1896	StringRef savedName = saver().save(S: name);
1897	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (savedName)))
1898	return;
1899
1900	symbols.push_back(x: symtab ->addDylib(name: savedName, file: exportingFile,
1901	isWeakDef: symbol.isWeakDefined(),
1902	isTlv: symbol.isThreadLocalValue()));
1903	};
1904
1905	std::vector<const llvm::MachO::Symbol *> normalSymbols;
1906	normalSymbols.reserve(n: interface.symbolsCount());
1907	for (const auto *symbol : interface.symbols()) {
1908	if (!isArchABICompatible(archSet: symbol->getArchitectures(), targetArch: config ->arch()))
1909	continue;
1910	if (handleLDSymbol(originalName: symbol->getName()))
1911	continue;
1912
1913	switch (symbol->getKind()) {
1914	case EncodeKind::GlobalSymbol:
1915	case EncodeKind::ObjectiveCClass:
1916	case EncodeKind::ObjectiveCClassEHType:
1917	case EncodeKind::ObjectiveCInstanceVariable:
1918	normalSymbols.push_back(x: symbol);
1919	}
1920	}
1921	// interface.symbols() order is non-deterministic.
1922	llvm::sort(C&: normalSymbols,
1923	Comp: [](auto l, auto* r) { return* l->getName() < r->getName(); });
1924
1925	// TODO(compnerd) filter out symbols based on the target platform
1926	for (const auto *symbol : normalSymbols) {
1927	switch (symbol->getKind()) {
1928	case EncodeKind::GlobalSymbol:
1929	addSymbol (*symbol, symbol->getName());
1930	break;
1931	case EncodeKind::ObjectiveCClass:
1932	// XXX ld64 only creates these symbols when -ObjC is passed in. We may
1933	// want to emulate that.
1934	addSymbol (*symbol, objc::symbol_names::klass + symbol->getName());
1935	addSymbol (*symbol, objc::symbol_names::metaclass + symbol->getName());
1936	break;
1937	case EncodeKind::ObjectiveCClassEHType:
1938	addSymbol (*symbol, objc::symbol_names::ehtype + symbol->getName());
1939	break;
1940	case EncodeKind::ObjectiveCInstanceVariable:
1941	addSymbol (*symbol, objc::symbol_names::ivar + symbol->getName());
1942	break;
1943	}
1944	}
1945	}
1946
1947	DylibFile::DylibFile(DylibFile *umbrella)
1948	: InputFile (DylibKind, MemoryBufferRef {}), refState(RefState::Unreferenced),
1949	explicitlyLinked(false), isBundleLoader(false) {
1950	if (umbrella == nullptr)
1951	umbrella = this;
1952	this->umbrella = umbrella;
1953	}
1954
1955	void DylibFile::parseReexports(const InterfaceFile &interface) {
1956	const InterfaceFile *topLevel =
1957	interface.getParent() == nullptr ? &interface : interface.getParent();
1958	for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
1959	InterfaceFile::const_target_range targets = intfRef.targets();
1960	if (is_contained(Range: skipPlatformChecks, Element: intfRef.getInstallName()) \|\|
1961	isTargetPlatformArchCompatible(interfaceTargets: targets, target: config ->platformInfo.target))
1962	loadReexport(path: intfRef.getInstallName(), umbrella: exportingFile, currentTopLevelTapi: topLevel);
1963	}
1964	}
1965
1966	bool DylibFile::isExplicitlyLinked() const {
1967	if (!explicitlyLinked)
1968	return false;
1969
1970	// If this dylib was explicitly linked, but at least one of the symbols
1971	// of the synthetic dylibs it created via $ld$previous symbols is
1972	// referenced, then that synthetic dylib fulfils the explicit linkedness
1973	// and we can deadstrip this dylib if it's unreferenced.
1974	for (const auto *dylib : extraDylibs)
1975	if (dylib->isReferenced())
1976	return false;
1977
1978	return true;
1979	}
1980
1981	DylibFile *DylibFile::getSyntheticDylib(StringRef installName,
1982	uint32_t currentVersion,
1983	uint32_t compatVersion) {
1984	for (DylibFile *dylib : extraDylibs)
1985	if (dylib->installName == installName) {
1986	// FIXME: Check what to do if different $ld$previous symbols
1987	// request the same dylib, but with different versions.
1988	return dylib;
1989	}
1990
1991	auto dylib = make<DylibFile>(args: umbrella == this* ? nullptr : umbrella);
1992	dylib->installName = saver().save(S: installName);
1993	dylib->currentVersion = currentVersion;
1994	dylib->compatibilityVersion = compatVersion;
1995	extraDylibs.push_back(Elt: dylib);
1996	return dylib;
1997	}
1998
1999	// $ld$ symbols modify the properties/behavior of the library (e.g. its install
2000	// name, compatibility version or hide/add symbols) for specific target
2001	// versions.
2002	bool DylibFile::handleLDSymbol(StringRef originalName) {
2003	if (!originalName.starts_with(Prefix: "$ld$"))
2004	return false;
2005
2006	StringRef action;
2007	StringRef name;
2008	std::tie(args&: action, args&: name) = originalName.drop_front(N: strlen(s: "$ld$")).split(Separator: `'$'`);
2009	if (action == "previous")
2010	handleLDPreviousSymbol(name, originalName);
2011	else if (action == "install_name")
2012	handleLDInstallNameSymbol(name, originalName);
2013	else if (action == "hide")
2014	handleLDHideSymbol(name, originalName);
2015	return true;
2016	}
2017
2018	void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
2019	// originalName: $ld$ previous $ <installname> $ <compatversion> $
2020	// <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
2021	StringRef installName;
2022	StringRef compatVersion;
2023	StringRef platformStr;
2024	StringRef startVersion;
2025	StringRef endVersion;
2026	StringRef symbolName;
2027	StringRef rest;
2028
2029	std::tie(args&: installName, args&: name) = name.split(Separator: `'$'`);
2030	std::tie(args&: compatVersion, args&: name) = name.split(Separator: `'$'`);
2031	std::tie(args&: platformStr, args&: name) = name.split(Separator: `'$'`);
2032	std::tie(args&: startVersion, args&: name) = name.split(Separator: `'$'`);
2033	std::tie(args&: endVersion, args&: name) = name.split(Separator: `'$'`);
2034	std::tie(args&: symbolName, args&: rest) = name.rsplit(Separator: `'$'`);
2035
2036	// FIXME: Does this do the right thing for zippered files?
2037	unsigned platform;
2038	if (platformStr.getAsInteger(Radix: `10`, Result&: platform) \|\|
2039	platform != static_cast<unsigned>(config ->platform()))
2040	return;
2041
2042	VersionTuple start;
2043	if (start.tryParse(string: startVersion)) {
2044	warn(msg: toString(f: this) + ": failed to parse start version, symbol '" +
2045	originalName + "' ignored");
2046	return;
2047	}
2048	VersionTuple end;
2049	if (end.tryParse(string: endVersion)) {
2050	warn(msg: toString(f: this) + ": failed to parse end version, symbol '" +
2051	originalName + "' ignored");
2052	return;
2053	}
2054	if (config ->platformInfo.target.MinDeployment < start \|\|
2055	config ->platformInfo.target.MinDeployment >= end)
2056	return;
2057
2058	// Initialized to compatibilityVersion for the symbolName branch below.
2059	uint32_t newCompatibilityVersion = compatibilityVersion;
2060	uint32_t newCurrentVersionForSymbol = currentVersion;
2061	if (!compatVersion.empty()) {
2062	VersionTuple cVersion;
2063	if (cVersion.tryParse(string: compatVersion)) {
2064	warn(msg: toString(f: this) +
2065	": failed to parse compatibility version, symbol '" + originalName +
2066	"' ignored");
2067	return;
2068	}
2069	newCompatibilityVersion = encodeVersion(version: cVersion);
2070	newCurrentVersionForSymbol = newCompatibilityVersion;
2071	}
2072
2073	if (!symbolName.empty()) {
2074	// A $ld$previous$ symbol with symbol name adds a symbol with that name to
2075	// a dylib with given name and version.
2076	auto *dylib = getSyntheticDylib(installName, currentVersion: newCurrentVersionForSymbol,
2077	compatVersion: newCompatibilityVersion);
2078
2079	// The tbd file usually contains the $ld$previous symbol for an old version,
2080	// and then the symbol itself later, for newer deployment targets, like so:
2081	// symbols: [
2082	// '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2083	// _zzz,
2084	// ]
2085	// Since the symbols are sorted, adding them to the symtab in the given
2086	// order means the $ld$previous version of _zzz will prevail, as desired.
2087	dylib->symbols.push_back(x: symtab ->addDylib(
2088	name: saver().save(S: symbolName), file: dylib, /isWeakDef=/false, /isTlv=/false));
2089	return;
2090	}
2091
2092	// A $ld$previous$ symbol without symbol name modifies the dylib it's in.
2093	this->installName = saver().save(S: installName);
2094	this->compatibilityVersion = newCompatibilityVersion;
2095	}
2096
2097	void DylibFile::handleLDInstallNameSymbol(StringRef name,
2098	StringRef originalName) {
2099	// originalName: $ld$ install_name $ os<version> $ install_name
2100	StringRef condition, installName;
2101	std::tie(args&: condition, args&: installName) = name.split(Separator: `'$'`);
2102	VersionTuple version;
2103	if (!condition.consume_front(Prefix: "os") \|\| version.tryParse(string: condition))
2104	warn(msg: toString(f: this) + ": failed to parse os version, symbol '" +
2105	originalName + "' ignored");
2106	else if (version == config ->platformInfo.target.MinDeployment)
2107	this->installName = saver().save(S: installName);
2108	}
2109
2110	void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
2111	StringRef symbolName;
2112	bool shouldHide = true;
2113	if (name.starts_with(Prefix: "os")) {
2114	// If it's hidden based on versions.
2115	name = name.drop_front(N: `2`);
2116	StringRef minVersion;
2117	std::tie(args&: minVersion, args&: symbolName) = name.split(Separator: `'$'`);
2118	VersionTuple versionTup;
2119	if (versionTup.tryParse(string: minVersion)) {
2120	warn(msg: toString(f: this) + ": failed to parse hidden version, symbol `" + originalName +
2121	"` ignored.");
2122	return;
2123	}
2124	shouldHide = versionTup == config ->platformInfo.target.MinDeployment;
2125	} else {
2126	symbolName = name;
2127	}
2128
2129	if (shouldHide)
2130	exportingFile->hiddenSymbols.insert(V: CachedHashStringRef (symbolName));
2131	}
2132
2133	void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
2134	if (config ->applicationExtension && !dylibIsAppExtensionSafe)
2135	warn(msg: "using '-application_extension' with unsafe dylib: " + toString(f: this));
2136	}
2137
2138	ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
2139	: InputFile (ArchiveKind, f ->getMemoryBufferRef()), file (std::move(f)),
2140	forceHidden(forceHidden) {}
2141
2142	void ArchiveFile::addLazySymbols() {
2143	// Avoid calling getMemoryBufferRef() on zero-symbol archive
2144	// since that crashes.
2145	if (file ->isEmpty() \|\| file ->getNumberOfSymbols() == `0`)
2146	return;
2147
2148	Error err = Error::success();
2149	auto child = file ->child_begin(Err&: err);
2150	// Ignore the I/O error here - will be reported later.
2151	if (!err) {
2152	Expected<MemoryBufferRef> mbOrErr = child ->getMemoryBufferRef();
2153	if (!mbOrErr) {
2154	llvm::consumeError(Err: mbOrErr.takeError());
2155	} else {
2156	if (identify_magic(magic: mbOrErr ->getBuffer()) == file_magic::macho_object) {
2157	if (target->wordSize == `8`)
2158	compatArch = compatWithTargetArch(
2159	file: this, hdr: reinterpret_cast<const LP64::mach_header *>(
2160	mbOrErr ->getBufferStart()));
2161	else
2162	compatArch = compatWithTargetArch(
2163	file: this, hdr: reinterpret_cast<const ILP32::mach_header *>(
2164	mbOrErr ->getBufferStart()));
2165	if (!compatArch)
2166	return;
2167	}
2168	}
2169	}
2170
2171	for (const object::Archive::Symbol &sym : file ->symbols())
2172	symtab ->addLazyArchive(name: sym.getName(), file: this, sym);
2173	}
2174
2175	static Expected<InputFile *>
2176	loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
2177	uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
2178	if (config ->zeroModTime)
2179	modTime = `0`;
2180
2181	switch (identify_magic(magic: mb.getBuffer())) {
2182	case file_magic::macho_object:
2183	return make<ObjFile>(args&: mb, args&: modTime, args&: archiveName, /lazy=/args: false, args&: forceHidden,
2184	args&: compatArch);
2185	case file_magic::bitcode:
2186	return make<BitcodeFile>(args&: mb, args&: archiveName, args&: offsetInArchive, /lazy=/args: false,
2187	args&: forceHidden, args&: compatArch);
2188	default:
2189	return createStringError(EC: inconvertibleErrorCode(),
2190	S: mb.getBufferIdentifier() +
2191	" has unhandled file type");
2192	}
2193	}
2194
2195	Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
2196	if (!seen.insert(V: c.getChildOffset()).second)
2197	return Error::success();
2198
2199	Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
2200	if (!mb)
2201	return mb.takeError();
2202
2203	Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
2204	if (!modTime)
2205	return modTime.takeError();
2206
2207	Expected<InputFile *> file =
2208	loadArchiveMember(mb: mb, modTime: toTimeT(TP: modTime), archiveName: getName(), offsetInArchive: c.getChildOffset(),
2209	forceHidden, compatArch);
2210
2211	if (!file)
2212	return file.takeError();
2213
2214	inputFiles.insert(X: *file);
2215	printArchiveMemberLoad(reason, *file);
2216	return Error::success();
2217	}
2218
2219	void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
2220	object::Archive::Child c =
2221	CHECK(sym.getMember(), toString(this) +
2222	": could not get the member defining symbol " +
2223	toMachOString(sym));
2224
2225	// `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2226	// and become invalid after that call. Copy it to the stack so we can refer
2227	// to it later.
2228	const object::Archive::Symbol symCopy = sym;
2229
2230	// ld64 doesn't demangle sym here even with -demangle.
2231	// Match that: intentionally don't call toMachOString().
2232	if (Error e = fetch(c, reason: symCopy.getName()))
2233	error(msg: toString(f: this) + ": could not get the member defining symbol " +
2234	toMachOString(symCopy) + ": " + toString(E: std::move(e)));
2235	}
2236
2237	static macho::Symbol createBitcodeSymbol(const* lto::InputFile::Symbol &objSym,
2238	BitcodeFile &file) {
2239	StringRef name = saver().save(S: objSym.getName());
2240
2241	if (objSym.isUndefined())
2242	return symtab ->addUndefined(name, &file, /isWeakRef=/objSym.isWeak());
2243
2244	// TODO: Write a test demonstrating why computing isPrivateExtern before
2245	// LTO compilation is important.
2246	bool isPrivateExtern = false;
2247	switch (objSym.getVisibility()) {
2248	case GlobalValue::HiddenVisibility:
2249	isPrivateExtern = true;
2250	break;
2251	case GlobalValue::ProtectedVisibility:
2252	error(msg: name + " has protected visibility, which is not supported by Mach-O");
2253	break;
2254	case GlobalValue::DefaultVisibility:
2255	break;
2256	}
2257	isPrivateExtern = isPrivateExtern \|\| objSym.canBeOmittedFromSymbolTable() \|\|
2258	file.forceHidden;
2259
2260	if (objSym.isCommon())
2261	return symtab ->addCommon(name, &file, size: objSym.getCommonSize(),
2262	align: objSym.getCommonAlignment(), isPrivateExtern);
2263
2264	return symtab ->addDefined(name, &file, /isec=/nullptr, /value=/`0`,
2265	/size=/`0`, isWeakDef: objSym.isWeak(), isPrivateExtern,
2266	/isReferencedDynamically=/false,
2267	/noDeadStrip=/false,
2268	/isWeakDefCanBeHidden=/false);
2269	}
2270
2271	BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
2272	uint64_t offsetInArchive, bool lazy, bool forceHidden,
2273	bool compatArch)
2274	: InputFile (BitcodeKind, mb, lazy), forceHidden(forceHidden) {
2275	this->archiveName = std::string (archiveName);
2276	this->compatArch = compatArch;
2277	std::string path = mb.getBufferIdentifier().str();
2278	if (config ->thinLTOIndexOnly)
2279	path = replaceThinLTOSuffix(path: mb.getBufferIdentifier());
2280
2281	// If the parent archive already determines that the arch is not compat with
2282	// target, then just return.
2283	if (!compatArch)
2284	return;
2285
2286	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2287	// name. If two members with the same name are provided, this causes a
2288	// collision and ThinLTO can't proceed.
2289	// So, we append the archive name to disambiguate two members with the same
2290	// name from multiple different archives, and offset within the archive to
2291	// disambiguate two members of the same name from a single archive.
2292	MemoryBufferRef mbref(mb.getBuffer(),
2293	saver().save(S: archiveName.empty()
2294	? path
2295	: archiveName + "(" +
2296	sys::path::filename(path) + ")" +
2297	utostr(X: offsetInArchive)));
2298	obj = check(e: lto::InputFile::create(Object: mbref));
2299	if (lazy)
2300	parseLazy();
2301	else
2302	parse();
2303	}
2304
2305	void BitcodeFile::parse() {
2306	// Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2307	// "winning" symbol will then be marked as Prevailing at LTO compilation
2308	// time.
2309	symbols.resize(new_size: obj ->symbols().size());
2310
2311	// Process defined symbols first. See the comment at the end of
2312	// ObjFile<>::parseSymbols.
2313	for (auto it : llvm::enumerate(First: obj ->symbols()))
2314	if (!it.value().isUndefined())
2315	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2316	for (auto it : llvm::enumerate(First: obj ->symbols()))
2317	if (it.value().isUndefined())
2318	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2319	}
2320
2321	void BitcodeFile::parseLazy() {
2322	symbols.resize(new_size: obj ->symbols().size());
2323	for (const auto &[i, objSym] : llvm::enumerate(First: obj ->symbols())) {
2324	if (!objSym.isUndefined()) {
2325	symbols [i] = symtab ->addLazyObject(name: saver().save(S: objSym.getName()), file&: *this);
2326	if (!lazy)
2327	break;
2328	}
2329	}
2330	}
2331
2332	std::string macho::replaceThinLTOSuffix(StringRef path) {
2333	auto [suffix, repl] = config ->thinLTOObjectSuffixReplace;
2334	if (path.consume_back(Suffix: suffix))
2335	return (path + repl).str();
2336	return std::string (path);
2337	}
2338
2339	void macho::extract(InputFile &file, StringRef reason) {
2340	if (!file.lazy)
2341	return;
2342	file.lazy = false;
2343
2344	printArchiveMemberLoad(reason, &file);
2345	if (auto *bitcode = dyn_cast<BitcodeFile>(Val: &file)) {
2346	bitcode->parse();
2347	} else {
2348	auto &f = cast<ObjFile>(Val&: file);
2349	if (target->wordSize == `8`)
2350	f.parse<LP64>();
2351	else
2352	f.parse<ILP32>();
2353	}
2354	}
2355
2356	template void ObjFile::parse<LP64>();
2357

Browse the source code of llvm_projects/lld/MachO/InputFiles.cpp