InputFiles.cpp source code [llvm_projects/lld/MachO/InputFiles.cpp]

1	//===- InputFiles.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains functions to parse Mach-O object files. In this comment,
10	// we describe the Mach-O file structure and how we parse it.
11	//
12	// Mach-O is not very different from ELF or COFF. The notion of symbols,
13	// sections and relocations exists in Mach-O as it does in ELF and COFF.
14	//
15	// Perhaps the notion that is new to those who know ELF/COFF is "subsections".
16	// In ELF/COFF, sections are an atomic unit of data copied from input files to
17	// output files. When we merge or garbage-collect sections, we treat each
18	// section as an atomic unit. In Mach-O, that's not the case. Sections can
19	// consist of multiple subsections, and subsections are a unit of merging and
20	// garbage-collecting. Therefore, Mach-O's subsections are more similar to
21	// ELF/COFF's sections than Mach-O's sections are.
22	//
23	// A section can have multiple symbols. A symbol that does not have the
24	// N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
25	// definition, a symbol is always present at the beginning of each subsection. A
26	// symbol with N_ALT_ENTRY attribute does not start a new subsection and can
27	// point to a middle of a subsection.
28	//
29	// The notion of subsections also affects how relocations are represented in
30	// Mach-O. All references within a section need to be explicitly represented as
31	// relocations if they refer to different subsections, because we obviously need
32	// to fix up addresses if subsections are laid out in an output file differently
33	// than they were in object files. To represent that, Mach-O relocations can
34	// refer to an unnamed location via its address. Scattered relocations (those
35	// with the R_SCATTERED bit set) always refer to unnamed locations.
36	// Non-scattered relocations refer to an unnamed location if r_extern is not set
37	// and r_symbolnum is zero.
38	//
39	// Without the above differences, I think you can use your knowledge about ELF
40	// and COFF for Mach-O.
41	//
42	//===----------------------------------------------------------------------===//
43
44	#include "InputFiles.h"
45	#include "Config.h"
46	#include "Driver.h"
47	#include "Dwarf.h"
48	#include "EhFrame.h"
49	#include "ExportTrie.h"
50	#include "InputSection.h"
51	#include "ObjC.h"
52	#include "OutputSection.h"
53	#include "OutputSegment.h"
54	#include "SymbolTable.h"
55	#include "Symbols.h"
56	#include "SyntheticSections.h"
57	#include "Target.h"
58
59	#include "lld/Common/CommonLinkerContext.h"
60	#include "lld/Common/DWARF.h"
61	#include "lld/Common/Reproduce.h"
62	#include "llvm/ADT/iterator.h"
63	#include "llvm/BinaryFormat/MachO.h"
64	#include "llvm/LTO/LTO.h"
65	#include "llvm/Support/BinaryStreamReader.h"
66	#include "llvm/Support/Endian.h"
67	#include "llvm/Support/MemoryBuffer.h"
68	#include "llvm/Support/Path.h"
69	#include "llvm/Support/TarWriter.h"
70	#include "llvm/Support/TimeProfiler.h"
71	#include "llvm/TextAPI/Architecture.h"
72	#include "llvm/TextAPI/InterfaceFile.h"
73
74	#include <optional>
75	#include <type_traits>
76
77	using namespace llvm;
78	using namespace llvm::MachO;
79	using namespace llvm::support::endian;
80	using namespace llvm::sys;
81	using namespace lld;
82	using namespace lld::macho;
83
84	// Returns "<internal>", "foo.a(bar.o)", or "baz.o".
85	std::string lld::toString(const InputFile *f) {
86	if (!f)
87	return "<internal>";
88
89	// Multiple dylibs can be defined in one .tbd file.
90	if (const auto *dylibFile = dyn_cast<DylibFile>(Val: f))
91	if (f->getName().ends_with(Suffix: ".tbd"))
92	return (f->getName() + "(" + dylibFile->installName + ")").str();
93
94	if (f->archiveName.empty())
95	return std::string (f->getName());
96	return (f->archiveName + "(" + path::filename(path: f->getName()) + ")").str();
97	}
98
99	std::string lld::toString(const Section &sec) {
100	return (toString(f: sec.file) + ":(" + sec.name + ")").str();
101	}
102
103	SetVector<InputFile *> macho::inputFiles;
104	std::unique_ptr<TarWriter> macho::tar;
105	int InputFile::idCount = `0`;
106
107	static VersionTuple decodeVersion(uint32_t version) {
108	unsigned major = version >> `16`;
109	unsigned minor = (version >> `8`) & `0xffu`;
110	unsigned subMinor = version & `0xffu`;
111	return VersionTuple (major, minor, subMinor);
112	}
113
114	static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
115	if (!isa<ObjFile>(Val: input) && !isa<DylibFile>(Val: input))
116	return {};
117
118	const char *hdr = input->mb.getBufferStart();
119
120	// "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
121	std::vector<PlatformInfo> platformInfos;
122	for (auto *cmd : findCommands<build_version_command>(anyHdr: hdr, types: LC_BUILD_VERSION)) {
123	PlatformInfo info;
124	info.target.Platform = static_cast<PlatformType>(cmd->platform);
125	info.target.MinDeployment = decodeVersion(version: cmd->minos);
126	platformInfos.emplace_back(args: std::move(info));
127	}
128	for (auto *cmd : findCommands<version_min_command>(
129	anyHdr: hdr, types: LC_VERSION_MIN_MACOSX, types: LC_VERSION_MIN_IPHONEOS,
130	types: LC_VERSION_MIN_TVOS, types: LC_VERSION_MIN_WATCHOS)) {
131	PlatformInfo info;
132	switch (cmd->cmd) {
133	case LC_VERSION_MIN_MACOSX:
134	info.target.Platform = PLATFORM_MACOS;
135	break;
136	case LC_VERSION_MIN_IPHONEOS:
137	info.target.Platform = PLATFORM_IOS;
138	break;
139	case LC_VERSION_MIN_TVOS:
140	info.target.Platform = PLATFORM_TVOS;
141	break;
142	case LC_VERSION_MIN_WATCHOS:
143	info.target.Platform = PLATFORM_WATCHOS;
144	break;
145	}
146	info.target.MinDeployment = decodeVersion(version: cmd->version);
147	platformInfos.emplace_back(args: std::move(info));
148	}
149
150	return platformInfos;
151	}
152
153	static bool checkCompatibility(const InputFile *input) {
154	std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
155	if (platformInfos.empty())
156	return true;
157
158	auto it = find_if(Range&: platformInfos, P: [&](const PlatformInfo &info) {
159	return removeSimulator(platform: info.target.Platform) ==
160	removeSimulator(platform: config ->platform());
161	});
162	if (it == platformInfos.end()) {
163	std::string platformNames;
164	raw_string_ostream os(platformNames);
165	interleave(
166	c: platformInfos, os,
167	each_fn: [&](const PlatformInfo &info) {
168	os << getPlatformName(Platform: info.target.Platform);
169	},
170	separator: "/");
171	error(msg: toString(f: input) + " has platform " + platformNames +
172	Twine (", which is different from target platform ") +
173	getPlatformName(Platform: config ->platform()));
174	return false;
175	}
176
177	if (it ->target.MinDeployment > config ->platformInfo.target.MinDeployment)
178	warn(msg: toString(f: input) + " has version " +
179	it ->target.MinDeployment.getAsString() +
180	", which is newer than target minimum of " +
181	config ->platformInfo.target.MinDeployment.getAsString());
182
183	return true;
184	}
185
186	template <class Header>
187	static bool compatWithTargetArch(const InputFile file, const* Header *hdr) {
188	uint32_t cpuType;
189	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: config ->arch());
190
191	if (hdr->cputype != cpuType) {
192	Architecture arch =
193	getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
194	auto msg = config ->errorForArchMismatch
195	? static_cast<void ()(const* Twine &)>(error)
196	: warn;
197
198	msg(toString(f: file) + " has architecture " + getArchitectureName(Arch: arch) +
199	" which is incompatible with target architecture " +
200	getArchitectureName(Arch: config ->arch()));
201	return false;
202	}
203
204	return checkCompatibility(input: file);
205	}
206
207	// This cache mostly exists to store system libraries (and .tbds) as they're
208	// loaded, rather than the input archives, which are already cached at a higher
209	// level, and other files like the filelist that are only read once.
210	// Theoretically this caching could be more efficient by hoisting it, but that
211	// would require altering many callers to track the state.
212	DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
213	// Open a given file path and return it as a memory-mapped file.
214	std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
215	CachedHashStringRef key(path);
216	auto entry = cachedReads.find(Val: key);
217	if (entry != cachedReads.end())
218	return entry ->second;
219
220	ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(Filename: path);
221	if (std::error_code ec = mbOrErr.getError()) {
222	error(msg: "cannot open " + path + ": " + ec.message());
223	return std::nullopt;
224	}
225
226	std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
227	MemoryBufferRef mbref = mb ->getMemBufferRef();
228	make<std::unique_ptr<MemoryBuffer>>(args: std::move(mb)); // take mb ownership
229
230	// If this is a regular non-fat file, return it.
231	const char *buf = mbref.getBufferStart();
232	const auto hdr = reinterpret_cast<const* fat_header *>(buf);
233	if (mbref.getBufferSize() < sizeof(uint32_t) \|\|
234	read32be(P: &hdr->magic) != FAT_MAGIC) {
235	if (tar)
236	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
237	return cachedReads [key] = mbref;
238	}
239
240	llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
241
242	// Object files and archive files may be fat files, which contain multiple
243	// real files for different CPU ISAs. Here, we search for a file that matches
244	// with the current link target and returns it as a MemoryBufferRef.
245	const auto arch = reinterpret_cast<const* fat_arch >(buf + sizeof(hdr));
246	auto getArchName = [](uint32_t cpuType, uint32_t cpuSubtype) {
247	return getArchitectureName(Arch: getArchitectureFromCpuType(CPUType: cpuType, CPUSubType: cpuSubtype));
248	};
249
250	std::vector<StringRef> archs;
251	for (uint32_t i = `0`, n = read32be(P: &hdr->nfat_arch); i < n; ++i) {
252	if (reinterpret_cast<const char *>(arch + i + `1`) >
253	buf + mbref.getBufferSize()) {
254	error(msg: path + ": fat_arch struct extends beyond end of file");
255	return std::nullopt;
256	}
257
258	uint32_t cpuType = read32be(P: &arch[i].cputype);
259	uint32_t cpuSubtype =
260	read32be(P: &arch[i].cpusubtype) & ~MachO::CPU_SUBTYPE_MASK;
261
262	// FIXME: LD64 has a more complex fallback logic here.
263	// Consider implementing that as well?
264	if (cpuType != static_cast<uint32_t>(target->cpuType) \|\|
265	cpuSubtype != target->cpuSubtype) {
266	archs.emplace_back(args: getArchName (cpuType, cpuSubtype));
267	continue;
268	}
269
270	uint32_t offset = read32be(P: &arch[i].offset);
271	uint32_t size = read32be(P: &arch[i].size);
272	if (offset + size > mbref.getBufferSize())
273	error(msg: path + ": slice extends beyond end of file");
274	if (tar)
275	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
276	return cachedReads [key] = MemoryBufferRef(StringRef(buf + offset, size),
277	path.copy(A&: bAlloc));
278	}
279
280	auto targetArchName = getArchName (target->cpuType, target->cpuSubtype);
281	warn(msg: path + ": ignoring file because it is universal (" + join(R&: archs, Separator: ",") +
282	") but does not contain the " + targetArchName + " architecture");
283	return std::nullopt;
284	}
285
286	InputFile::InputFile(Kind kind, const InterfaceFile &interface)
287	: id(idCount++), fileKind(kind), name(saver().save(S: interface.getPath())) {}
288
289	// Some sections comprise of fixed-size records, so instead of splitting them at
290	// symbol boundaries, we split them based on size. Records are distinct from
291	// literals in that they may contain references to other sections, instead of
292	// being leaf nodes in the InputSection graph.
293	//
294	// Note that "record" is a term I came up with. In contrast, "literal" is a term
295	// used by the Mach-O format.
296	static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) {
297	if (name == section_names::compactUnwind) {
298	if (segname == segment_names::ld)
299	return target->wordSize == `8` ? `32` : `20`;
300	}
301	if (!config ->dedupStrings)
302	return {};
303
304	if (name == section_names::cfString && segname == segment_names::data)
305	return target->wordSize == `8` ? `32` : `16`;
306
307	if (config ->icfLevel == ICFLevel::none)
308	return {};
309
310	if (name == section_names::objcClassRefs && segname == segment_names::data)
311	return target->wordSize;
312
313	if (name == section_names::objcSelrefs && segname == segment_names::data)
314	return target->wordSize;
315	return {};
316	}
317
318	static Error parseCallGraph(ArrayRef<uint8_t> data,
319	std::vector<CallGraphEntry> &callGraph) {
320	TimeTraceScope timeScope("Parsing call graph section");
321	BinaryStreamReader reader(data, llvm::endianness::little);
322	while (!reader.empty()) {
323	uint32_t fromIndex, toIndex;
324	uint64_t count;
325	if (Error err = reader.readInteger(Dest&: fromIndex))
326	return err;
327	if (Error err = reader.readInteger(Dest&: toIndex))
328	return err;
329	if (Error err = reader.readInteger(Dest&: count))
330	return err;
331	callGraph.emplace_back(args&: fromIndex, args&: toIndex, args&: count);
332	}
333	return Error::success();
334	}
335
336	// Parse the sequence of sections within a single LC_SEGMENT(_64).
337	// Split each section into subsections.
338	template <class SectionHeader>
339	void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
340	sections.reserve(n: sectionHeaders.size());
341	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
342
343	for (const SectionHeader &sec : sectionHeaders) {
344	StringRef name =
345	StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
346	StringRef segname =
347	StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
348	sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
349	if (sec.align >= `32`) {
350	error("alignment " + std::to_string(sec.align) + " of section " + name +
351	" is too large");
352	continue;
353	}
354	Section &section = *sections.back();
355	uint32_t align = `1` << sec.align;
356	ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
357	: buf + sec.offset,
358	static_cast<size_t>(sec.size)};
359
360	auto splitRecords = [&](size_t recordSize) -> void {
361	if (data.empty())
362	return;
363	Subsections &subsections = section.subsections;
364	subsections.reserve(n: data.size() / recordSize);
365	for (uint64_t off = `0`; off < data.size(); off += recordSize) {
366	auto *isec = make<ConcatInputSection>(
367	args&: section, args: data.slice(N: off, M: std::min(a: data.size(), b: recordSize)), args&: align);
368	subsections.push_back(x: {.offset: off, .isec: isec});
369	}
370	section.doneSplitting = true;
371	};
372
373	if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
374	if (sec.nreloc)
375	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
376	" contains relocations, which is unsupported");
377	bool dedupLiterals =
378	name == section_names::objcMethname \|\| config ->dedupStrings;
379	InputSection *isec =
380	make<CStringInputSection>(args&: section, args&: data, args&: align, args&: dedupLiterals);
381	// FIXME: parallelize this?
382	cast<CStringInputSection>(Val: isec)->splitIntoPieces();
383	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
384	} else if (isWordLiteralSection(sec.flags)) {
385	if (sec.nreloc)
386	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
387	" contains relocations, which is unsupported");
388	InputSection *isec = make<WordLiteralInputSection>(args&: section, args&: data, args&: align);
389	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
390	} else if (auto recordSize = getRecordSize(segname, name)) {
391	splitRecords(*recordSize);
392	} else if (name == section_names::ehFrame &&
393	segname == segment_names::text) {
394	splitEhFrames(dataArr: data, ehFrameSection&: *sections.back());
395	} else if (segname == segment_names::llvm) {
396	if (config ->callGraphProfileSort && name == section_names::cgProfile)
397	checkError(e: parseCallGraph(data, callGraph));
398	// ld64 does not appear to emit contents from sections within the __LLVM
399	// segment. Symbols within those sections point to bitcode metadata
400	// instead of actual symbols. Global symbols within those sections could
401	// have the same name without causing duplicate symbol errors. To avoid
402	// spurious duplicate symbol errors, we do not parse these sections.
403	// TODO: Evaluate whether the bitcode metadata is needed.
404	} else if (name == section_names::objCImageInfo &&
405	segname == segment_names::data) {
406	objCImageInfo = data;
407	} else {
408	if (name == section_names::addrSig)
409	addrSigSection = sections.back();
410
411	auto *isec = make<ConcatInputSection>(args&: section, args&: data, args&: align);
412	if (isDebugSection(flags: isec->getFlags()) &&
413	isec->getSegName() == segment_names::dwarf) {
414	// Instead of emitting DWARF sections, we emit STABS symbols to the
415	// object files that contain them. We filter them out early to avoid
416	// parsing their relocations unnecessarily.
417	debugSections.push_back(x: isec);
418	} else {
419	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
420	}
421	}
422	}
423	}
424
425	void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
426	EhReader reader(this, data, /dataOff=/`0`);
427	size_t off = `0`;
428	while (off < reader.size()) {
429	uint64_t frameOff = off;
430	uint64_t length = reader.readLength(off: &off);
431	if (length == `0`)
432	break;
433	uint64_t fullLength = length + (off - frameOff);
434	off += length;
435	// We hard-code an alignment of 1 here because we don't actually want our
436	// EH frames to be aligned to the section alignment. EH frame decoders don't
437	// expect this alignment. Moreover, each EH frame must start where the
438	// previous one ends, and where it ends is indicated by the length field.
439	// Unless we update the length field (troublesome), we should keep the
440	// alignment to 1.
441	// Note that we still want to preserve the alignment of the overall section,
442	// just not of the individual EH frames.
443	ehFrameSection.subsections.push_back(
444	x: {.offset: frameOff, .isec: make<ConcatInputSection>(args&: ehFrameSection,
445	args: data.slice(N: frameOff, M: fullLength),
446	/align=/args: `1`)});
447	}
448	ehFrameSection.doneSplitting = true;
449	}
450
451	template <class T>
452	static Section findContainingSection(const* std::vector<Section *> &sections,
453	T *offset) {
454	static_assert(std::is_same<uint64_t, T>::value \|\|
455	std::is_same<uint32_t, T>::value,
456	"unexpected type for offset");
457	auto it = std::prev(llvm::upper_bound(
458	sections, *offset,
459	[](uint64_t value, const Section sec) { return* value < sec->addr; }));
460	offset -= (it)->addr;
461	return *it;
462	}
463
464	// Find the subsection corresponding to the greatest section offset that is <=
465	// that of the given offset.
466	//
467	// offset: an offset relative to the start of the original InputSection (before
468	// any subsection splitting has occurred). It will be updated to represent the
469	// same location as an offset relative to the start of the containing
470	// subsection.
471	template <class T>
472	static InputSection findContainingSubsection(const* Section &section,
473	T *offset) {
474	static_assert(std::is_same<uint64_t, T>::value \|\|
475	std::is_same<uint32_t, T>::value,
476	"unexpected type for offset");
477	auto it = std::prev(llvm::upper_bound(
478	section.subsections, *offset,
479	[](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
480	*offset -= it->offset;
481	return it->isec;
482	}
483
484	// Find a symbol at offset `off` within `isec`.
485	static Defined findSymbolAtOffset(const* ConcatInputSection *isec,
486	uint64_t off) {
487	auto it = llvm::lower_bound(Range: isec->symbols, Value&: off, C: [](Defined *d, uint64_t off) {
488	return d->value < off;
489	});
490	// The offset should point at the exact address of a symbol (with no addend.)
491	if (it == isec->symbols.end() \|\| (*it)->value != off) {
492	assert(isec->wasCoalesced);
493	return nullptr;
494	}
495	return *it;
496	}
497
498	template <class SectionHeader>
499	static bool validateRelocationInfo(InputFile file, const* SectionHeader &sec,
500	relocation_info rel) {
501	const RelocAttrs &relocAttrs = target->getRelocAttrs(type: rel.r_type);
502	bool valid = true;
503	auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
504	valid = false;
505	return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
506	std::to_string(val: rel.r_address) + " of " + sec.segname + "," +
507	sec.sectname + " in " + toString(f: file))
508	.str();
509	};
510
511	if (!relocAttrs.hasAttr(b: RelocAttrBits::LOCAL) && !rel.r_extern)
512	error(message("must be extern"));
513	if (relocAttrs.hasAttr(b: RelocAttrBits::PCREL) != rel.r_pcrel)
514	error(message(Twine ("must ") + (rel.r_pcrel ? "not " : "") +
515	"be PC-relative"));
516	if (isThreadLocalVariables(sec.flags) &&
517	!relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED))
518	error(message("not allowed in thread-local section, must be UNSIGNED"));
519	if (rel.r_length < `2` \|\| rel.r_length > `3` \|\|
520	!relocAttrs.hasAttr(b: static_cast<RelocAttrBits>(`1` << rel.r_length))) {
521	static SmallVector<StringRef, `4`> widths{"0", "4", "8", "4 or 8"};
522	error(message("has width " + std::to_string(val: `1` << rel.r_length) +
523	" bytes, but must be " +
524	widths [(static_cast<int>(relocAttrs.bits) >> `2`) & `3`] +
525	" bytes"));
526	}
527	return valid;
528	}
529
530	template <class SectionHeader>
531	void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
532	const SectionHeader &sec, Section &section) {
533	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
534	ArrayRef<relocation_info> relInfos(
535	reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
536
537	Subsections &subsections = section.subsections;
538	auto subsecIt = subsections.rbegin();
539	for (size_t i = `0`; i < relInfos.size(); i++) {
540	// Paired relocations serve as Mach-O's method for attaching a
541	// supplemental datum to a primary relocation record. ELF does not
542	// need them because the _RELOC_RELA records contain the extra*
543	// addend field, vs. _RELOC_REL which omit the addend.*
544	//
545	// The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
546	// and the paired _RELOC_UNSIGNED record holds the minuend. The*
547	// datum for each is a symbolic address. The result is the offset
548	// between two addresses.
549	//
550	// The ARM64_RELOC_ADDEND record holds the addend, and the paired
551	// ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
552	// base symbolic address.
553	//
554	// Note: X86 does not use _RELOC_ADDEND because it can embed an addend into*
555	// the instruction stream. On X86, a relocatable address field always
556	// occupies an entire contiguous sequence of byte(s), so there is no need to
557	// merge opcode bits with address bits. Therefore, it's easy and convenient
558	// to store addends in the instruction-stream bytes that would otherwise
559	// contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
560	// address bits so that bitwise arithmetic is necessary to extract and
561	// insert them. Storing addends in the instruction stream is possible, but
562	// inconvenient and more costly at link time.
563
564	relocation_info relInfo = relInfos [i];
565	bool isSubtrahend =
566	target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::SUBTRAHEND);
567	int64_t pairedAddend = `0`;
568	if (target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::ADDEND)) {
569	pairedAddend = SignExtend64<`24`>(x: relInfo.r_symbolnum);
570	relInfo = relInfos [++i];
571	}
572	assert(i < relInfos.size());
573	if (!validateRelocationInfo(this, sec, relInfo))
574	continue;
575	if (relInfo.r_address & R_SCATTERED)
576	fatal(msg: "TODO: Scattered relocations not supported");
577
578	int64_t embeddedAddend = target->getEmbeddedAddend(mb, offset: sec.offset, relInfo);
579	assert(!(embeddedAddend && pairedAddend));
580	int64_t totalAddend = pairedAddend + embeddedAddend;
581	Reloc r;
582	r.type = relInfo.r_type;
583	r.pcrel = relInfo.r_pcrel;
584	r.length = relInfo.r_length;
585	r.offset = relInfo.r_address;
586	if (relInfo.r_extern) {
587	r.referent = symbols [relInfo.r_symbolnum];
588	r.addend = isSubtrahend ? `0` : totalAddend;
589	} else {
590	assert(!isSubtrahend);
591	const SectionHeader &referentSecHead =
592	sectionHeaders[relInfo.r_symbolnum - `1`];
593	uint64_t referentOffset;
594	if (relInfo.r_pcrel) {
595	// The implicit addend for pcrel section relocations is the pcrel offset
596	// in terms of the addresses in the input file. Here we adjust it so
597	// that it describes the offset from the start of the referent section.
598	// FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
599	// have pcrel section relocations. We may want to factor this out into
600	// the arch-specific .cpp file.
601	assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
602	referentOffset = sec.addr + relInfo.r_address + `4` + totalAddend -
603	referentSecHead.addr;
604	} else {
605	// The addend for a non-pcrel relocation is its absolute address.
606	referentOffset = totalAddend - referentSecHead.addr;
607	}
608	r.referent = findContainingSubsection(section: *sections [relInfo.r_symbolnum - `1`],
609	offset: &referentOffset);
610	r.addend = referentOffset;
611	}
612
613	// Find the subsection that this relocation belongs to.
614	// Though not required by the Mach-O format, clang and gcc seem to emit
615	// relocations in order, so let's take advantage of it. However, ld64 emits
616	// unsorted relocations (in `-r` mode), so we have a fallback for that
617	// uncommon case.
618	InputSection *subsec;
619	while (subsecIt != subsections.rend() && subsecIt ->offset > r.offset)
620	++subsecIt;
621	if (subsecIt == subsections.rend() \|\|
622	subsecIt ->offset + subsecIt ->isec->getSize() <= r.offset) {
623	subsec = findContainingSubsection(section, offset: &r.offset);
624	// Now that we know the relocs are unsorted, avoid trying the 'fast path'
625	// for the other relocations.
626	subsecIt = subsections.rend();
627	} else {
628	subsec = subsecIt ->isec;
629	r.offset -= subsecIt ->offset;
630	}
631	subsec->relocs.push_back(x: r);
632
633	if (isSubtrahend) {
634	relocation_info minuendInfo = relInfos [++i];
635	// SUBTRACTOR relocations should always be followed by an UNSIGNED one
636	// attached to the same address.
637	assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
638	relInfo.r_address == minuendInfo.r_address);
639	Reloc p;
640	p.type = minuendInfo.r_type;
641	if (minuendInfo.r_extern) {
642	p.referent = symbols [minuendInfo.r_symbolnum];
643	p.addend = totalAddend;
644	} else {
645	uint64_t referentOffset =
646	totalAddend - sectionHeaders[minuendInfo.r_symbolnum - `1`].addr;
647	p.referent = findContainingSubsection(
648	section: *sections [minuendInfo.r_symbolnum - `1`], offset: &referentOffset);
649	p.addend = referentOffset;
650	}
651	subsec->relocs.push_back(x: p);
652	}
653	}
654	}
655
656	template <class NList>
657	static macho::Symbol createDefined(const* NList &sym, StringRef name,
658	InputSection *isec, uint64_t value,
659	uint64_t size, bool forceHidden) {
660	// Symbol scope is determined by sym.n_type & (N_EXT \| N_PEXT):
661	// N_EXT: Global symbols. These go in the symbol table during the link,
662	// and also in the export table of the output so that the dynamic
663	// linker sees them.
664	// N_EXT \| N_PEXT: Linkage unit (think: dylib) scoped. These go in the
665	// symbol table during the link so that duplicates are
666	// either reported (for non-weak symbols) or merged
667	// (for weak symbols), but they do not go in the export
668	// table of the output.
669	// N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
670	// object files) may produce them. LLD does not yet support -r.
671	// These are translation-unit scoped, identical to the `0` case.
672	// 0: Translation-unit scoped. These are not in the symbol table during
673	// link, and not in the export table of the output either.
674	bool isWeakDefCanBeHidden =
675	(sym.n_desc & (N_WEAK_DEF \| N_WEAK_REF)) == (N_WEAK_DEF \| N_WEAK_REF);
676
677	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
678
679	if (sym.n_type & N_EXT) {
680	// -load_hidden makes us treat global symbols as linkage unit scoped.
681	// Duplicates are reported but the symbol does not go in the export trie.
682	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
683
684	// lld's behavior for merging symbols is slightly different from ld64:
685	// ld64 picks the winning symbol based on several criteria (see
686	// pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
687	// just merges metadata and keeps the contents of the first symbol
688	// with that name (see SymbolTable::addDefined). For:
689	// inline function F in a TU built with -fvisibility-inlines-hidden*
690	// and inline function F in another TU built without that flag*
691	// ld64 will pick the one from the file built without
692	// -fvisibility-inlines-hidden.
693	// lld will instead pick the one listed first on the link command line and
694	// give it visibility as if the function was built without
695	// -fvisibility-inlines-hidden.
696	// If both functions have the same contents, this will have the same
697	// behavior. If not, it won't, but the input had an ODR violation in
698	// that case.
699	//
700	// Similarly, merging a symbol
701	// that's isPrivateExtern and not isWeakDefCanBeHidden with one
702	// that's not isPrivateExtern but isWeakDefCanBeHidden technically
703	// should produce one
704	// that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
705	// with ld64's semantics, because it means the non-private-extern
706	// definition will continue to take priority if more private extern
707	// definitions are encountered. With lld's semantics there's no observable
708	// difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
709	// that's privateExtern -- neither makes it into the dynamic symbol table,
710	// unless the autohide symbol is explicitly exported.
711	// But if a symbol is both privateExtern and autohide then it can't
712	// be exported.
713	// So we nullify the autohide flag when privateExtern is present
714	// and promote the symbol to privateExtern when it is not already.
715	if (isWeakDefCanBeHidden && isPrivateExtern)
716	isWeakDefCanBeHidden = false;
717	else if (isWeakDefCanBeHidden)
718	isPrivateExtern = true;
719	return symtab ->addDefined(
720	name, isec->getFile(), isec, value, size, isWeakDef: sym.n_desc & N_WEAK_DEF,
721	isPrivateExtern, isReferencedDynamically: sym.n_desc & REFERENCED_DYNAMICALLY,
722	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
723	}
724	bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
725	return make<Defined>(
726	name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
727	/isExternal=/false, /isPrivateExtern=/false, includeInSymtab,
728	sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
729	}
730
731	// Absolute symbols are defined symbols that do not have an associated
732	// InputSection. They cannot be weak.
733	template <class NList>
734	static macho::Symbol createAbsolute(const* NList &sym, InputFile *file,
735	StringRef name, bool forceHidden) {
736	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
737
738	if (sym.n_type & N_EXT) {
739	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
740	return symtab ->addDefined(name, file, nullptr, value: sym.n_value, /size=/`0`,
741	/isWeakDef=/false, isPrivateExtern,
742	/isReferencedDynamically=/false,
743	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP,
744	/isWeakDefCanBeHidden=/false);
745	}
746	return make<Defined>(name, file, nullptr, sym.n_value, /size=/`0`,
747	/isWeakDef=/false,
748	/isExternal=/false, /isPrivateExtern=/false,
749	/includeInSymtab=/true,
750	/isReferencedDynamically=/false,
751	sym.n_desc & N_NO_DEAD_STRIP);
752	}
753
754	template <class NList>
755	macho::Symbol ObjFile::parseNonSectionSymbol(const* NList &sym,
756	const char *strtab) {
757	StringRef name = StringRef(strtab + sym.n_strx);
758	uint8_t type = sym.n_type & N_TYPE;
759	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
760	switch (type) {
761	case N_UNDF:
762	return sym.n_value == `0`
763	? symtab ->addUndefined(name, this, isWeakRef: sym.n_desc & N_WEAK_REF)
764	: symtab ->addCommon(name, this, size: sym.n_value,
765	align: `1` << GET_COMM_ALIGN(sym.n_desc),
766	isPrivateExtern);
767	case N_ABS:
768	return createAbsolute(sym, this, name, forceHidden);
769	case N_INDR: {
770	// Not much point in making local aliases -- relocs in the current file can
771	// just refer to the actual symbol itself. ld64 ignores these symbols too.
772	if (!(sym.n_type & N_EXT))
773	return nullptr;
774	StringRef aliasedName = StringRef(strtab + sym.n_value);
775	// isPrivateExtern is the only symbol flag that has an impact on the final
776	// aliased symbol.
777	auto alias = make<AliasSymbol>(args: this*, args&: name, args&: aliasedName, args&: isPrivateExtern);
778	aliases.push_back(x: alias);
779	return alias;
780	}
781	case N_PBUD:
782	error(msg: "TODO: support symbols of type N_PBUD");
783	return nullptr;
784	case N_SECT:
785	llvm_unreachable(
786	"N_SECT symbols should not be passed to parseNonSectionSymbol");
787	default:
788	llvm_unreachable("invalid symbol type");
789	}
790	}
791
792	template <class NList> static bool isUndef(const NList &sym) {
793	return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == `0`;
794	}
795
796	template <class LP>
797	void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
798	ArrayRef<typename LP::nlist> nList,
799	const char strtab, bool* subsectionsViaSymbols) {
800	using NList = typename LP::nlist;
801
802	// Groups indices of the symbols by the sections that contain them.
803	std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
804	symbols.resize(nList.size());
805	SmallVector<unsigned, `32`> undefineds;
806	for (uint32_t i = `0`; i < nList.size(); ++i) {
807	const NList &sym = nList[i];
808
809	// Ignore debug symbols for now.
810	// FIXME: may need special handling.
811	if (sym.n_type & N_STAB)
812	continue;
813
814	if ((sym.n_type & N_TYPE) == N_SECT) {
815	Subsections &subsections = sections[sym.n_sect - `1`]->subsections;
816	// parseSections() may have chosen not to parse this section.
817	if (subsections.empty())
818	continue;
819	symbolsBySection[sym.n_sect - `1`].push_back(i);
820	} else if (isUndef(sym)) {
821	undefineds.push_back(Elt: i);
822	} else {
823	symbols [i] = parseNonSectionSymbol(sym, strtab);
824	}
825	}
826
827	for (size_t i = `0`; i < sections.size(); ++i) {
828	Subsections &subsections = sections [i]->subsections;
829	if (subsections.empty())
830	continue;
831	std::vector<uint32_t> &symbolIndices = symbolsBySection [i];
832	uint64_t sectionAddr = sectionHeaders[i].addr;
833	uint32_t sectionAlign = `1u` << sectionHeaders[i].align;
834
835	// Some sections have already been split into subsections during
836	// parseSections(), so we simply need to match Symbols to the corresponding
837	// subsection here.
838	if (sections [i]->doneSplitting) {
839	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
840	const uint32_t symIndex = symbolIndices [j];
841	const NList &sym = nList[symIndex];
842	StringRef name = strtab + sym.n_strx;
843	uint64_t symbolOffset = sym.n_value - sectionAddr;
844	InputSection *isec =
845	findContainingSubsection(section: *sections [i], offset: &symbolOffset);
846	if (symbolOffset != `0`) {
847	error(msg: toString(sec: *sections [i]) + ": symbol " + name +
848	" at misaligned offset");
849	continue;
850	}
851	symbols [symIndex] =
852	createDefined(sym, name, isec, `0`, isec->getSize(), forceHidden);
853	}
854	continue;
855	}
856	sections [i]->doneSplitting = true;
857
858	auto getSymName = [strtab](const NList& sym) -> StringRef {
859	return StringRef(strtab + sym.n_strx);
860	};
861
862	// Calculate symbol sizes and create subsections by splitting the sections
863	// along symbol boundaries.
864	// We populate subsections by repeatedly splitting the last (highest
865	// address) subsection.
866	llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
867	// Put extern weak symbols after other symbols at the same address so
868	// that weak symbol coalescing works correctly. See
869	// SymbolTable::addDefined() for details.
870	if (nList[lhs].n_value == nList[rhs].n_value &&
871	nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
872	return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
873	return nList[lhs].n_value < nList[rhs].n_value;
874	});
875	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
876	const uint32_t symIndex = symbolIndices [j];
877	const NList &sym = nList[symIndex];
878	StringRef name = getSymName(sym);
879	Subsection &subsec = subsections.back();
880	InputSection *isec = subsec.isec;
881
882	uint64_t subsecAddr = sectionAddr + subsec.offset;
883	size_t symbolOffset = sym.n_value - subsecAddr;
884	uint64_t symbolSize =
885	j + `1` < symbolIndices.size()
886	? nList[symbolIndices [j + `1`]].n_value - sym.n_value
887	: isec->data.size() - symbolOffset;
888	// There are 4 cases where we do not need to create a new subsection:
889	// 1. If the input file does not use subsections-via-symbols.
890	// 2. Multiple symbols at the same address only induce one subsection.
891	// (The symbolOffset == 0 check covers both this case as well as
892	// the first loop iteration.)
893	// 3. Alternative entry points do not induce new subsections.
894	// 4. If we have a literal section (e.g. __cstring and __literal4).
895	if (!subsectionsViaSymbols \|\| symbolOffset == `0` \|\|
896	sym.n_desc & N_ALT_ENTRY \|\| !isa<ConcatInputSection>(Val: isec)) {
897	isec->hasAltEntry = symbolOffset != `0`;
898	symbols [symIndex] = createDefined(sym, name, isec, symbolOffset,
899	symbolSize, forceHidden);
900	continue;
901	}
902	auto *concatIsec = cast<ConcatInputSection>(Val: isec);
903
904	auto nextIsec = make<ConcatInputSection>(args&: concatIsec);
905	nextIsec->wasCoalesced = false;
906	if (isZeroFill(flags: isec->getFlags())) {
907	// Zero-fill sections have NULL data.data() non-zero data.size()
908	nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
909	isec->data = {nullptr, symbolOffset};
910	} else {
911	nextIsec->data = isec->data.slice(N: symbolOffset);
912	isec->data = isec->data.slice(N: `0`, M: symbolOffset);
913	}
914
915	// By construction, the symbol will be at offset zero in the new
916	// subsection.
917	symbols [symIndex] = createDefined(sym, name, nextIsec, /value=/`0`,
918	symbolSize, forceHidden);
919	// TODO: ld64 appears to preserve the original alignment as well as each
920	// subsection's offset from the last aligned address. We should consider
921	// emulating that behavior.
922	nextIsec->align = MinAlign(sectionAlign, sym.n_value);
923	subsections.push_back({sym.n_value - sectionAddr, nextIsec});
924	}
925	}
926
927	// Undefined symbols can trigger recursive fetch from Archives due to
928	// LazySymbols. Process defined symbols first so that the relative order
929	// between a defined symbol and an undefined symbol does not change the
930	// symbol resolution behavior. In addition, a set of interconnected symbols
931	// will all be resolved to the same file, instead of being resolved to
932	// different files.
933	for (unsigned i : undefineds)
934	symbols [i] = parseNonSectionSymbol(nList[i], strtab);
935	}
936
937	OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
938	StringRef sectName)
939	: InputFile (OpaqueKind, mb) {
940	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
941	ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
942	sections.push_back(x: make<Section>(/file=/args: this, args: segName.take_front(N: `16`),
943	args: sectName.take_front(N: `16`),
944	/flags=/args: `0`, /addr=/args: `0`));
945	Section &section = *sections.back();
946	ConcatInputSection *isec = make<ConcatInputSection>(args&: section, args&: data);
947	isec->live = true;
948	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
949	}
950
951	template <class LP>
952	void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
953	using Header = typename LP::mach_header;
954	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
955
956	for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
957	StringRef data{reinterpret_cast<const char *>(cmd + `1`),
958	cmd->cmdsize - sizeof(linker_option_command)};
959	parseLCLinkerOption(LCLinkerOptions, this, cmd->count, data);
960	}
961	}
962
963	SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
964	ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
965	bool lazy, bool forceHidden, bool compatArch,
966	bool builtFromBitcode)
967	: InputFile (ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
968	builtFromBitcode(builtFromBitcode) {
969	this->archiveName = std::string (archiveName);
970	this->compatArch = compatArch;
971	if (lazy) {
972	if (target->wordSize == `8`)
973	parseLazy<LP64>();
974	else
975	parseLazy<ILP32>();
976	} else {
977	if (target->wordSize == `8`)
978	parse<LP64>();
979	else
980	parse<ILP32>();
981	}
982	}
983
984	template <class LP> void ObjFile::parse() {
985	using Header = typename LP::mach_header;
986	using SegmentCommand = typename LP::segment_command;
987	using SectionHeader = typename LP::section;
988	using NList = typename LP::nlist;
989
990	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
991	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
992
993	// If we've already checked the arch, then don't need to check again.
994	if (!compatArch)
995	return;
996	if (!(compatArch = compatWithTargetArch(this, hdr)))
997	return;
998
999	// We will resolve LC linker options once all native objects are loaded after
1000	// LTO is finished.
1001	SmallVector<StringRef, `4`> LCLinkerOptions;
1002	parseLinkerOptions<LP>(LCLinkerOptions);
1003	unprocessedLCLinkerOptions.append(RHS: LCLinkerOptions);
1004
1005	ArrayRef<SectionHeader> sectionHeaders;
1006	if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
1007	auto c = reinterpret_cast<const* SegmentCommand *>(cmd);
1008	sectionHeaders = ArrayRef<SectionHeader>{
1009	reinterpret_cast<const SectionHeader *>(c + `1`), c->nsects};
1010	parseSections(sectionHeaders);
1011	}
1012
1013	// TODO: Error on missing LC_SYMTAB?
1014	if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
1015	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1016	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1017	c->nsyms);
1018	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1019	bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
1020	parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
1021	}
1022
1023	// The relocations may refer to the symbols, so we parse them after we have
1024	// parsed all the symbols.
1025	for (size_t i = `0`, n = sections.size(); i < n; ++i)
1026	if (!sections [i]->subsections.empty())
1027	parseRelocations(sectionHeaders, sectionHeaders[i], *sections [i]);
1028
1029	parseDebugInfo();
1030
1031	Section ehFrameSection = nullptr*;
1032	Section compactUnwindSection = nullptr*;
1033	for (Section *sec : sections) {
1034	Section s = StringSwitch<Section >(sec->name)
1035	.Case(S: section_names::compactUnwind, Value: &compactUnwindSection)
1036	.Case(S: section_names::ehFrame, Value: &ehFrameSection)
1037	.Default(Value: nullptr);
1038	if (s)
1039	*s = sec;
1040	}
1041	if (compactUnwindSection)
1042	registerCompactUnwind(compactUnwindSection&: *compactUnwindSection);
1043	if (ehFrameSection)
1044	registerEhFrames(ehFrameSection&: *ehFrameSection);
1045	}
1046
1047	template <class LP> void ObjFile::parseLazy() {
1048	using Header = typename LP::mach_header;
1049	using NList = typename LP::nlist;
1050
1051	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1052	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
1053
1054	if (!compatArch)
1055	return;
1056	if (!(compatArch = compatWithTargetArch(this, hdr)))
1057	return;
1058
1059	const load_command *cmd = findCommand(hdr, LC_SYMTAB);
1060	if (!cmd)
1061	return;
1062	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1063	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1064	c->nsyms);
1065	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1066	symbols.resize(nList.size());
1067	for (const auto &[i, sym] : llvm::enumerate(nList)) {
1068	if ((sym.n_type & N_EXT) && !isUndef(sym)) {
1069	// TODO: Bound checking
1070	StringRef name = strtab + sym.n_strx;
1071	symbols[i] = symtab ->addLazyObject(name, file&: *this);
1072	if (!lazy)
1073	break;
1074	}
1075	}
1076	}
1077
1078	void ObjFile::parseDebugInfo() {
1079	std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
1080	if (!dObj)
1081	return;
1082
1083	// We do not re-use the context from getDwarf() here as that function
1084	// constructs an expensive DWARFCache object.
1085	auto *ctx = make<DWARFContext>(
1086	args: std::move(dObj), args: "",
1087	args: [&](Error err) {
1088	warn(msg: toString(f: this) + ": " + toString(E: std::move(err)));
1089	},
1090	args: [&](Error warning) {
1091	warn(msg: toString(f: this) + ": " + toString(E: std::move(warning)));
1092	});
1093
1094	// TODO: Since object files can contain a lot of DWARF info, we should verify
1095	// that we are parsing just the info we need
1096	const DWARFContext::compile_unit_range &units = ctx->compile_units();
1097	// FIXME: There can be more than one compile unit per object file. See
1098	// PR48637.
1099	auto it = units.begin();
1100	compileUnit = it != units.end() ? it ->get() : nullptr;
1101	}
1102
1103	ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1104	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1105	const load_command *cmd = findCommand(anyHdr: buf, types: LC_DATA_IN_CODE);
1106	if (!cmd)
1107	return {};
1108	const auto c = reinterpret_cast<const* linkedit_data_command *>(cmd);
1109	return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
1110	c->datasize / sizeof(data_in_code_entry)};
1111	}
1112
1113	ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1114	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1115	if (auto *cmd =
1116	findCommand<linkedit_data_command>(anyHdr: buf, types: LC_LINKER_OPTIMIZATION_HINT))
1117	return {buf + cmd->dataoff, cmd->datasize};
1118	return {};
1119	}
1120
1121	// Create pointers from symbols to their associated compact unwind entries.
1122	void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
1123	for (const Subsection &subsection : compactUnwindSection.subsections) {
1124	ConcatInputSection *isec = cast<ConcatInputSection>(Val: subsection.isec);
1125	// Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1126	// their addends in its data. Thus if ICF operated naively and compared the
1127	// entire contents of each CUE, entries with identical unwind info but e.g.
1128	// belonging to different functions would never be considered equivalent. To
1129	// work around this problem, we remove some parts of the data containing the
1130	// embedded addends. In particular, we remove the function address and LSDA
1131	// pointers. Since these locations are at the start and end of the entry,
1132	// we can do this using a simple, efficient slice rather than performing a
1133	// copy. We are not losing any information here because the embedded
1134	// addends have already been parsed in the corresponding Reloc structs.
1135	//
1136	// Removing these pointers would not be safe if they were pointers to
1137	// absolute symbols. In that case, there would be no corresponding
1138	// relocation. However, (AFAIK) MC cannot emit references to absolute
1139	// symbols for either the function address or the LSDA. However, it can* do*
1140	// so for the personality pointer, so we are not slicing that field away.
1141	//
1142	// Note that we do not adjust the offsets of the corresponding relocations;
1143	// instead, we rely on `relocateCompactUnwind()` to correctly handle these
1144	// truncated input sections.
1145	isec->data = isec->data.slice(N: target->wordSize, M: `8` + target->wordSize);
1146	uint32_t encoding = read32le(P: isec->data.data() + sizeof(uint32_t));
1147	// llvm-mc omits CU entries for functions that need DWARF encoding, but
1148	// `ld -r` doesn't. We can ignore them because we will re-synthesize these
1149	// CU entries from the DWARF info during the output phase.
1150	if ((encoding & static_cast<uint32_t>(UNWIND_MODE_MASK)) ==
1151	target->modeDwarfEncoding)
1152	continue;
1153
1154	ConcatInputSection *referentIsec;
1155	for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
1156	Reloc &r = *it;
1157	// CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1158	if (r.offset != `0`) {
1159	++it;
1160	continue;
1161	}
1162	uint64_t add = r.addend;
1163	if (auto sym = cast_or_null<Defined>(Val: r.referent.dyn_cast<Symbol >())) {
1164	// Check whether the symbol defined in this file is the prevailing one.
1165	// Skip if it is e.g. a weak def that didn't prevail.
1166	if (sym->getFile() != this) {
1167	++it;
1168	continue;
1169	}
1170	add += sym->value;
1171	referentIsec = cast<ConcatInputSection>(Val: sym->isec());
1172	} else {
1173	referentIsec =
1174	cast<ConcatInputSection>(Val: r.referent.dyn_cast<InputSection *>());
1175	}
1176	// Unwind info lives in __DATA, and finalization of __TEXT will occur
1177	// before finalization of __DATA. Moreover, the finalization of unwind
1178	// info depends on the exact addresses that it references. So it is safe
1179	// for compact unwind to reference addresses in __TEXT, but not addresses
1180	// in any other segment.
1181	if (referentIsec->getSegName() != segment_names::text)
1182	error(msg: isec->getLocation(off: r.offset) + " references section " +
1183	referentIsec->getName() + " which is not in segment __TEXT");
1184	// The functionAddress relocations are typically section relocations.
1185	// However, unwind info operates on a per-symbol basis, so we search for
1186	// the function symbol here.
1187	Defined *d = findSymbolAtOffset(isec: referentIsec, off: add);
1188	if (!d) {
1189	++it;
1190	continue;
1191	}
1192	d->originalUnwindEntry = isec;
1193	// Now that the symbol points to the unwind entry, we can remove the reloc
1194	// that points from the unwind entry back to the symbol.
1195	//
1196	// First, the symbol keeps the unwind entry alive (and not vice versa), so
1197	// this keeps dead-stripping simple.
1198	//
1199	// Moreover, it reduces the work that ICF needs to do to figure out if
1200	// functions with unwind info are foldable.
1201	//
1202	// However, this does make it possible for ICF to fold CUEs that point to
1203	// distinct functions (if the CUEs are otherwise identical).
1204	// UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1205	// each one can hold a distinct functionAddress value.
1206	//
1207	// Given that clang emits relocations in reverse order of address, this
1208	// relocation should be at the end of the vector for most of our input
1209	// object files, so this erase() is typically an O(1) operation.
1210	it = isec->relocs.erase(position: it);
1211	}
1212	}
1213	}
1214
1215	struct CIE {
1216	macho::Symbol personalitySymbol = nullptr*;
1217	bool fdesHaveAug = false;
1218	uint8_t lsdaPtrSize = `0`; // 0 => no LSDA
1219	uint8_t funcPtrSize = `0`;
1220	};
1221
1222	static uint8_t pointerEncodingToSize(uint8_t enc) {
1223	switch (enc & `0xf`) {
1224	case dwarf::DW_EH_PE_absptr:
1225	return target->wordSize;
1226	case dwarf::DW_EH_PE_sdata4:
1227	return `4`;
1228	case dwarf::DW_EH_PE_sdata8:
1229	// ld64 doesn't actually support sdata8, but this seems simple enough...
1230	return `8`;
1231	default:
1232	return `0`;
1233	};
1234	}
1235
1236	static CIE parseCIE(const InputSection isec, const* EhReader &reader,
1237	size_t off) {
1238	// Handling the full generality of possible DWARF encodings would be a major
1239	// pain. We instead take advantage of our knowledge of how llvm-mc encodes
1240	// DWARF and handle just that.
1241	constexpr uint8_t expectedPersonalityEnc =
1242	dwarf::DW_EH_PE_pcrel \| dwarf::DW_EH_PE_indirect \| dwarf::DW_EH_PE_sdata4;
1243
1244	CIE cie;
1245	uint8_t version = reader.readByte(off: &off);
1246	if (version != `1` && version != `3`)
1247	fatal(msg: "Expected CIE version of 1 or 3, got " + Twine (version));
1248	StringRef aug = reader.readString(off: &off);
1249	reader.skipLeb128(off: &off); // skip code alignment
1250	reader.skipLeb128(off: &off); // skip data alignment
1251	reader.skipLeb128(off: &off); // skip return address register
1252	reader.skipLeb128(off: &off); // skip aug data length
1253	uint64_t personalityAddrOff = `0`;
1254	for (char c : aug) {
1255	switch (c) {
1256	case `'z'`:
1257	cie.fdesHaveAug = true;
1258	break;
1259	case `'P'`: {
1260	uint8_t personalityEnc = reader.readByte(off: &off);
1261	if (personalityEnc != expectedPersonalityEnc)
1262	reader.failOn(errOff: off, msg: "unexpected personality encoding 0x" +
1263	Twine::utohexstr(Val: personalityEnc));
1264	personalityAddrOff = off;
1265	off += `4`;
1266	break;
1267	}
1268	case `'L'`: {
1269	uint8_t lsdaEnc = reader.readByte(off: &off);
1270	cie.lsdaPtrSize = pointerEncodingToSize(enc: lsdaEnc);
1271	if (cie.lsdaPtrSize == `0`)
1272	reader.failOn(errOff: off, msg: "unexpected LSDA encoding 0x" +
1273	Twine::utohexstr(Val: lsdaEnc));
1274	break;
1275	}
1276	case `'R'`: {
1277	uint8_t pointerEnc = reader.readByte(off: &off);
1278	cie.funcPtrSize = pointerEncodingToSize(enc: pointerEnc);
1279	if (cie.funcPtrSize == `0` \|\| !(pointerEnc & dwarf::DW_EH_PE_pcrel))
1280	reader.failOn(errOff: off, msg: "unexpected pointer encoding 0x" +
1281	Twine::utohexstr(Val: pointerEnc));
1282	break;
1283	}
1284	default:
1285	break;
1286	}
1287	}
1288	if (personalityAddrOff != `0`) {
1289	const auto *personalityReloc = isec->getRelocAt(off: personalityAddrOff);
1290	if (!personalityReloc)
1291	reader.failOn(errOff: off, msg: "Failed to locate relocation for personality symbol");
1292	cie.personalitySymbol = cast<macho::Symbol *>(Val: personalityReloc->referent);
1293	}
1294	return cie;
1295	}
1296
1297	// EH frame target addresses may be encoded as pcrel offsets. However, instead
1298	// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
1299	// This function recovers the target address from the subtractors, essentially
1300	// performing the inverse operation of EhRelocator.
1301	//
1302	// Concretely, we expect our relocations to write the value of `PC -
1303	// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
1304	// points to a symbol plus an addend.
1305	//
1306	// It is important that the minuend relocation point to a symbol within the
1307	// same section as the fixup value, since sections may get moved around.
1308	//
1309	// For example, for arm64, llvm-mc emits relocations for the target function
1310	// address like so:
1311	//
1312	// ltmp:
1313	// <CIE start>
1314	// ...
1315	// <CIE end>
1316	// ... multiple FDEs ...
1317	// <FDE start>
1318	// <target function address - (ltmp + pcrel offset)>
1319	// ...
1320	//
1321	// If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
1322	// will move to an earlier address, and `ltmp + pcrel offset` will no longer
1323	// reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
1324	// our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
1325	// the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
1326	//
1327	// If `Invert` is set, then we instead expect `target_addr - PC` to be written
1328	// to `PC`.
1329	template <bool Invert = false>
1330	Defined *
1331	targetSymFromCanonicalSubtractor(const InputSection *isec,
1332	std::vector<macho::Reloc>::iterator relocIt) {
1333	macho::Reloc &subtrahend = *relocIt;
1334	macho::Reloc &minuend = *std::next(x: relocIt);
1335	assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
1336	assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
1337	// Note: pcSym may not* be exactly at the PC; there's usually a non-zero*
1338	// addend.
1339	auto pcSym = cast<Defined>(Val: cast<macho::Symbol >(Val&: subtrahend.referent));
1340	Defined *target =
1341	cast_or_null<Defined>(Val: minuend.referent.dyn_cast<macho::Symbol *>());
1342	if (!pcSym) {
1343	auto *targetIsec =
1344	cast<ConcatInputSection>(Val: cast<InputSection *>(Val&: minuend.referent));
1345	target = findSymbolAtOffset(isec: targetIsec, off: minuend.addend);
1346	}
1347	if (Invert)
1348	std::swap(a&: pcSym, b&: target);
1349	if (pcSym->isec() == isec) {
1350	if (pcSym->value - (Invert ? -`1` : `1`) * minuend.addend != subtrahend.offset)
1351	fatal(msg: "invalid FDE relocation in __eh_frame");
1352	} else {
1353	// Ensure the pcReloc points to a symbol within the current EH frame.
1354	// HACK: we should really verify that the original relocation's semantics
1355	// are preserved. In particular, we should have
1356	// `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
1357	// have an easy way to access the offsets from this point in the code; some
1358	// refactoring is needed for that.
1359	macho::Reloc &pcReloc = Invert ? minuend : subtrahend;
1360	pcReloc.referent = isec->symbols [`0`];
1361	assert(isec->symbols[`0`]->value == `0`);
1362	minuend.addend = pcReloc.offset * (Invert ? `1LL` : -`1LL`);
1363	}
1364	return target;
1365	}
1366
1367	Defined findSymbolAtAddress(const* std::vector<Section *> &sections,
1368	uint64_t addr) {
1369	Section *sec = findContainingSection(sections, offset: &addr);
1370	auto isec = cast<ConcatInputSection>(Val: findContainingSubsection(section: sec, offset: &addr));
1371	return findSymbolAtOffset(isec, off: addr);
1372	}
1373
1374	// For symbols that don't have compact unwind info, associate them with the more
1375	// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
1376	//
1377	// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
1378	// description of its format.
1379	//
1380	// While parsing, we also look for what MC calls "abs-ified" relocations -- they
1381	// are relocations which are implicitly encoded as offsets in the section data.
1382	// We convert them into explicit Reloc structs so that the EH frames can be
1383	// handled just like a regular ConcatInputSection later in our output phase.
1384	//
1385	// We also need to handle the case where our input object file has explicit
1386	// relocations. This is the case when e.g. it's the output of `ld -r`. We only
1387	// look for the "abs-ified" relocation if an explicit relocation is absent.
1388	void ObjFile::registerEhFrames(Section &ehFrameSection) {
1389	DenseMap<const InputSection *, CIE> cieMap;
1390	for (const Subsection &subsec : ehFrameSection.subsections) {
1391	auto *isec = cast<ConcatInputSection>(Val: subsec.isec);
1392	uint64_t isecOff = subsec.offset;
1393
1394	// Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
1395	// that all EH frames have an associated symbol so that we can generate
1396	// subtractor relocs that reference them.
1397	if (isec->symbols.size() == `0`)
1398	make<Defined>(args: "EH_Frame", args: isec->getFile(), args&: isec, /value=/args: `0`,
1399	args: isec->getSize(), /isWeakDef=/args: false, /isExternal=/args: false,
1400	/isPrivateExtern=/args: false, /includeInSymtab=/args: false,
1401	/isReferencedDynamically=/args: false,
1402	/noDeadStrip=/args: false);
1403	else if (isec->symbols [`0`]->value != `0`)
1404	fatal(msg: "found symbol at unexpected offset in __eh_frame");
1405
1406	EhReader reader(this, isec->data, subsec.offset);
1407	size_t dataOff = `0`; // Offset from the start of the EH frame.
1408	reader.skipValidLength(off: &dataOff); // readLength() already validated this.
1409	// cieOffOff is the offset from the start of the EH frame to the cieOff
1410	// value, which is itself an offset from the current PC to a CIE.
1411	const size_t cieOffOff = dataOff;
1412
1413	EhRelocator ehRelocator(isec);
1414	auto cieOffRelocIt = llvm::find_if(
1415	Range&: isec->relocs, P: [=](const Reloc &r) { return r.offset == cieOffOff; });
1416	InputSection cieIsec = nullptr*;
1417	if (cieOffRelocIt != isec->relocs.end()) {
1418	// We already have an explicit relocation for the CIE offset.
1419	cieIsec =
1420	targetSymFromCanonicalSubtractor</Invert=/true>(isec, relocIt: cieOffRelocIt)
1421	->isec();
1422	dataOff += sizeof(uint32_t);
1423	} else {
1424	// If we haven't found a relocation, then the CIE offset is most likely
1425	// embedded in the section data (AKA an "abs-ified" reloc.). Parse that
1426	// and generate a Reloc struct.
1427	uint32_t cieMinuend = reader.readU32(off: &dataOff);
1428	if (cieMinuend == `0`) {
1429	cieIsec = isec;
1430	} else {
1431	uint32_t cieOff = isecOff + dataOff - cieMinuend;
1432	cieIsec = findContainingSubsection(section: ehFrameSection, offset: &cieOff);
1433	if (cieIsec == nullptr)
1434	fatal(msg: "failed to find CIE");
1435	}
1436	if (cieIsec != isec)
1437	ehRelocator.makeNegativePcRel(off: cieOffOff, target: cieIsec->symbols [`0`],
1438	/length=/`2`);
1439	}
1440	if (cieIsec == isec) {
1441	cieMap [cieIsec] = parseCIE(isec, reader, off: dataOff);
1442	continue;
1443	}
1444
1445	assert(cieMap.count(cieIsec));
1446	const CIE &cie = cieMap [cieIsec];
1447	// Offset of the function address within the EH frame.
1448	const size_t funcAddrOff = dataOff;
1449	uint64_t funcAddr = reader.readPointer(off: &dataOff, size: cie.funcPtrSize) +
1450	ehFrameSection.addr + isecOff + funcAddrOff;
1451	uint32_t funcLength = reader.readPointer(off: &dataOff, size: cie.funcPtrSize);
1452	size_t lsdaAddrOff = `0`; // Offset of the LSDA address within the EH frame.
1453	std::optional<uint64_t> lsdaAddrOpt;
1454	if (cie.fdesHaveAug) {
1455	reader.skipLeb128(off: &dataOff);
1456	lsdaAddrOff = dataOff;
1457	if (cie.lsdaPtrSize != `0`) {
1458	uint64_t lsdaOff = reader.readPointer(off: &dataOff, size: cie.lsdaPtrSize);
1459	if (lsdaOff != `0`) // FIXME possible to test this?
1460	lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
1461	}
1462	}
1463
1464	auto funcAddrRelocIt = isec->relocs.end();
1465	auto lsdaAddrRelocIt = isec->relocs.end();
1466	for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
1467	if (it ->offset == funcAddrOff)
1468	funcAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1469	else if (lsdaAddrOpt && it ->offset == lsdaAddrOff)
1470	lsdaAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1471	}
1472
1473	Defined *funcSym;
1474	if (funcAddrRelocIt != isec->relocs.end()) {
1475	funcSym = targetSymFromCanonicalSubtractor(isec, relocIt: funcAddrRelocIt);
1476	// Canonicalize the symbol. If there are multiple symbols at the same
1477	// address, we want both `registerEhFrame` and `registerCompactUnwind`
1478	// to register the unwind entry under same symbol.
1479	// This is not particularly efficient, but we should run into this case
1480	// infrequently (only when handling the output of `ld -r`).
1481	if (funcSym->isec())
1482	funcSym = findSymbolAtOffset(isec: cast<ConcatInputSection>(Val: funcSym->isec()),
1483	off: funcSym->value);
1484	} else {
1485	funcSym = findSymbolAtAddress(sections, addr: funcAddr);
1486	ehRelocator.makePcRel(off: funcAddrOff, target: funcSym, length: target->p2WordSize);
1487	}
1488	// The symbol has been coalesced, or already has a compact unwind entry.
1489	if (!funcSym \|\| funcSym->getFile() != this \|\| funcSym->unwindEntry()) {
1490	// We must prune unused FDEs for correctness, so we cannot rely on
1491	// -dead_strip being enabled.
1492	isec->live = false;
1493	continue;
1494	}
1495
1496	InputSection lsdaIsec = nullptr*;
1497	if (lsdaAddrRelocIt != isec->relocs.end()) {
1498	lsdaIsec =
1499	targetSymFromCanonicalSubtractor(isec, relocIt: lsdaAddrRelocIt)->isec();
1500	} else if (lsdaAddrOpt) {
1501	uint64_t lsdaAddr = *lsdaAddrOpt;
1502	Section *sec = findContainingSection(sections, offset: &lsdaAddr);
1503	lsdaIsec =
1504	cast<ConcatInputSection>(Val: findContainingSubsection(section: *sec, offset: &lsdaAddr));
1505	ehRelocator.makePcRel(off: lsdaAddrOff, target: lsdaIsec, length: target->p2WordSize);
1506	}
1507
1508	fdes [isec] = {.funcLength: funcLength, .personality: cie.personalitySymbol, .lsda: lsdaIsec};
1509	funcSym->originalUnwindEntry = isec;
1510	ehRelocator.commit();
1511	}
1512
1513	// __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
1514	// are normally required to be kept alive if they reference a live symbol.
1515	// However, we've explicitly created a dependency from a symbol to its FDE, so
1516	// dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
1517	// serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
1518	// live symbol (e.g. if there were multiple weak copies). Remove this flag to
1519	// let dead-stripping proceed correctly.
1520	ehFrameSection.flags &= ~S_ATTR_LIVE_SUPPORT;
1521	}
1522
1523	std::string ObjFile::sourceFile() const {
1524	const char *unitName = compileUnit->getUnitDIE().getShortName();
1525	// DWARF allows DW_AT_name to be absolute, in which case nothing should be
1526	// prepended. As for the styles, debug info can contain paths from any OS, not
1527	// necessarily an OS we're currently running on. Moreover different
1528	// compilation units can be compiled on different operating systems and linked
1529	// together later.
1530	if (sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::posix) \|\|
1531	sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::windows))
1532	return unitName;
1533	SmallString<`261`> dir(compileUnit->getCompilationDir());
1534	StringRef sep = sys::path::get_separator();
1535	// We don't use `path::append` here because we want an empty `dir` to result
1536	// in an absolute path. `append` would give us a relative path for that case.
1537	if (!dir.ends_with(Suffix: sep))
1538	dir += sep;
1539	return (dir + unitName).str();
1540	}
1541
1542	lld::DWARFCache *ObjFile::getDwarf() {
1543	llvm::call_once(flag&: initDwarf, F: [this]() {
1544	auto dwObj = DwarfObject::create(this);
1545	if (!dwObj)
1546	return;
1547	dwarfCache = std::make_unique<DWARFCache>(args: std::make_unique<DWARFContext>(
1548	args: std::move(dwObj), args: "",
1549	args: [&](Error err) { warn(msg: getName() + ": " + toString(E: std::move(err))); },
1550	args: [&](Error warning) {
1551	warn(msg: getName() + ": " + toString(E: std::move(warning)));
1552	}));
1553	});
1554
1555	return dwarfCache.get();
1556	}
1557	// The path can point to either a dylib or a .tbd file.
1558	static DylibFile loadDylib(StringRef path, DylibFile umbrella) {
1559	std::optional<MemoryBufferRef> mbref = readFile(path);
1560	if (!mbref) {
1561	error(msg: "could not read dylib file at " + path);
1562	return nullptr;
1563	}
1564	return loadDylib(mbref: *mbref, umbrella);
1565	}
1566
1567	// TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1568	// the first document storing child pointers to the rest of them. When we are
1569	// processing a given TBD file, we store that top-level document in
1570	// currentTopLevelTapi. When processing re-exports, we search its children for
1571	// potentially matching documents in the same TBD file. Note that the children
1572	// themselves don't point to further documents, i.e. this is a two-level tree.
1573	//
1574	// Re-exports can either refer to on-disk files, or to documents within .tbd
1575	// files.
1576	static DylibFile findDylib(StringRef path, DylibFile umbrella,
1577	const InterfaceFile *currentTopLevelTapi) {
1578	// Search order:
1579	// 1. Install name basename in -F / -L directories.
1580	{
1581	// Framework names can be in multiple formats:
1582	// - Foo.framework/Foo
1583	// - Foo.framework/Versions/A/Foo
1584	StringRef stem = path::stem(path);
1585	SmallString<`128`> frameworkName("/");
1586	frameworkName += stem;
1587	frameworkName += ".framework/";
1588	size_t i = path.rfind(Str: frameworkName);
1589	if (i != StringRef::npos) {
1590	StringRef frameworkPath = path.substr(Start: i + `1`);
1591	for (StringRef dir : config ->frameworkSearchPaths) {
1592	SmallString<`128`> candidate = dir;
1593	path::append(path&: candidate, a: frameworkPath);
1594	if (std::optional<StringRef> dylibPath =
1595	resolveDylibPath(path: candidate.str()))
1596	return loadDylib(path: *dylibPath, umbrella);
1597	}
1598	} else if (std::optional<StringRef> dylibPath = findPathCombination(
1599	name: stem, roots: config ->librarySearchPaths, extensions: {".tbd", ".dylib", ".so"}))
1600	return loadDylib(path: *dylibPath, umbrella);
1601	}
1602
1603	// 2. As absolute path.
1604	if (path::is_absolute(path, style: path::Style::posix))
1605	for (StringRef root : config ->systemLibraryRoots)
1606	if (std::optional<StringRef> dylibPath =
1607	resolveDylibPath(path: (root + path).str()))
1608	return loadDylib(path: *dylibPath, umbrella);
1609
1610	// 3. As relative path.
1611
1612	// TODO: Handle -dylib_file
1613
1614	// Replace @executable_path, @loader_path, @rpath prefixes in install name.
1615	SmallString<`128`> newPath;
1616	if (config ->outputType == MH_EXECUTE &&
1617	path.consume_front(Prefix: "@executable_path/")) {
1618	// ld64 allows overriding this with the undocumented flag -executable_path.
1619	// lld doesn't currently implement that flag.
1620	// FIXME: Consider using finalOutput instead of outputFile.
1621	path::append(path&: newPath, a: path::parent_path(path: config ->outputFile), b: path);
1622	path = newPath;
1623	} else if (path.consume_front(Prefix: "@loader_path/")) {
1624	fs::real_path(path: umbrella->getName(), output&: newPath);
1625	path::remove_filename(path&: newPath);
1626	path::append(path&: newPath, a: path);
1627	path = newPath;
1628	} else if (path.starts_with(Prefix: "@rpath/")) {
1629	for (StringRef rpath : umbrella->rpaths) {
1630	newPath.clear();
1631	if (rpath.consume_front(Prefix: "@loader_path/")) {
1632	fs::real_path(path: umbrella->getName(), output&: newPath);
1633	path::remove_filename(path&: newPath);
1634	}
1635	path::append(path&: newPath, a: rpath, b: path.drop_front(N: strlen(s: "@rpath/")));
1636	if (std::optional<StringRef> dylibPath = resolveDylibPath(path: newPath.str()))
1637	return loadDylib(path: *dylibPath, umbrella);
1638	}
1639	// If not found in umbrella, try the rpaths specified via -rpath too.
1640	for (StringRef rpath : config ->runtimePaths) {
1641	newPath.clear();
1642	if (rpath.consume_front(Prefix: "@loader_path/")) {
1643	fs::real_path(path: umbrella->getName(), output&: newPath);
1644	path::remove_filename(path&: newPath);
1645	}
1646	path::append(path&: newPath, a: rpath, b: path.drop_front(N: strlen(s: "@rpath/")));
1647	if (std::optional<StringRef> dylibPath = resolveDylibPath(path: newPath.str()))
1648	return loadDylib(path: *dylibPath, umbrella);
1649	}
1650	}
1651
1652	// FIXME: Should this be further up?
1653	if (currentTopLevelTapi) {
1654	for (InterfaceFile &child :
1655	make_pointee_range(Range: currentTopLevelTapi->documents())) {
1656	assert(child.documents().empty());
1657	if (path == child.getInstallName()) {
1658	auto file = make<DylibFile>(args&: child, args&: umbrella, /isBundleLoader=/args: false*,
1659	/explicitlyLinked=/args: false);
1660	file->parseReexports(interface: child);
1661	return file;
1662	}
1663	}
1664	}
1665
1666	if (std::optional<StringRef> dylibPath = resolveDylibPath(path))
1667	return loadDylib(path: *dylibPath, umbrella);
1668
1669	return nullptr;
1670	}
1671
1672	// If a re-exported dylib is public (lives in /usr/lib or
1673	// /System/Library/Frameworks), then it is considered implicitly linked: we
1674	// should bind to its symbols directly instead of via the re-exporting umbrella
1675	// library.
1676	static bool isImplicitlyLinked(StringRef path) {
1677	if (!config ->implicitDylibs)
1678	return false;
1679
1680	if (path::parent_path(path) == "/usr/lib")
1681	return true;
1682
1683	// Match /System/Library/Frameworks/$FOO.framework//$FOO
1684	if (path.consume_front(Prefix: "/System/Library/Frameworks/")) {
1685	StringRef frameworkName = path.take_until(F: [](char c) { return c == `'.'`; });
1686	return path::filename(path) == frameworkName;
1687	}
1688
1689	return false;
1690	}
1691
1692	void DylibFile::loadReexport(StringRef path, DylibFile *umbrella,
1693	const InterfaceFile *currentTopLevelTapi) {
1694	DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
1695	if (!reexport) {
1696	// If not found in umbrella, retry since some rpaths might have been
1697	// defined in "this" dylib (which contains the LC_REEXPORT_DYLIB cmd) and
1698	// not in the umbrella.
1699	DylibFile reexport2 = findDylib(path, umbrella: this*, currentTopLevelTapi);
1700	if (!reexport2) {
1701	error(msg: toString(f: this) + ": unable to locate re-export with install name " +
1702	path);
1703	}
1704	}
1705	}
1706
1707	DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
1708	bool isBundleLoader, bool explicitlyLinked)
1709	: InputFile (DylibKind, mb), refState(RefState::Unreferenced),
1710	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1711	assert(!isBundleLoader \|\| !umbrella);
1712	if (umbrella == nullptr)
1713	umbrella = this;
1714	this->umbrella = umbrella;
1715
1716	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1717
1718	// Initialize installName.
1719	if (const load_command *cmd = findCommand(anyHdr: hdr, types: LC_ID_DYLIB)) {
1720	auto c = reinterpret_cast<const* dylib_command *>(cmd);
1721	currentVersion = read32le(P: &c->dylib.current_version);
1722	compatibilityVersion = read32le(P: &c->dylib.compatibility_version);
1723	installName =
1724	reinterpret_cast<const char *>(cmd) + read32le(P: &c->dylib.name);
1725	} else if (!isBundleLoader) {
1726	// macho_executable and macho_bundle don't have LC_ID_DYLIB,
1727	// so it's OK.
1728	error(msg: toString(f: this) + ": dylib missing LC_ID_DYLIB load command");
1729	return;
1730	}
1731
1732	if (config ->printEachFile)
1733	message(msg: toString(f: this));
1734	inputFiles.insert(X: this);
1735
1736	deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1737
1738	if (!checkCompatibility(input: this))
1739	return;
1740
1741	checkAppExtensionSafety(dylibIsAppExtensionSafe: hdr->flags & MH_APP_EXTENSION_SAFE);
1742
1743	for (auto *cmd : findCommands<rpath_command>(anyHdr: hdr, types: LC_RPATH)) {
1744	StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1745	rpaths.push_back(Elt: rpath);
1746	}
1747
1748	// Initialize symbols.
1749	bool canBeImplicitlyLinked = findCommand(anyHdr: hdr, types: LC_SUB_CLIENT) == nullptr;
1750	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1751	? this
1752	: this->umbrella;
1753
1754	if (!canBeImplicitlyLinked) {
1755	for (auto *cmd : findCommands<sub_client_command>(anyHdr: hdr, types: LC_SUB_CLIENT)) {
1756	StringRef allowableClient{reinterpret_cast<const char *>(cmd) +
1757	cmd->client};
1758	allowableClients.push_back(Elt: allowableClient);
1759	}
1760	}
1761
1762	const auto *dyldInfo = findCommand<dyld_info_command>(anyHdr: hdr, types: LC_DYLD_INFO_ONLY);
1763	const auto *exportsTrie =
1764	findCommand<linkedit_data_command>(anyHdr: hdr, types: LC_DYLD_EXPORTS_TRIE);
1765	if (dyldInfo && exportsTrie) {
1766	// It's unclear what should happen in this case. Maybe we should only error
1767	// out if the two load commands refer to different data?
1768	error(msg: toString(f: this) +
1769	": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1770	return;
1771	}
1772
1773	if (dyldInfo) {
1774	parseExportedSymbols(offset: dyldInfo->export_off, size: dyldInfo->export_size);
1775	} else if (exportsTrie) {
1776	parseExportedSymbols(offset: exportsTrie->dataoff, size: exportsTrie->datasize);
1777	} else {
1778	error(msg: "No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1779	toString(f: this));
1780	}
1781	}
1782
1783	void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
1784	struct TrieEntry {
1785	StringRef name;
1786	uint64_t flags;
1787	};
1788
1789	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1790	std::vector<TrieEntry> entries;
1791	// Find all the $ld$ symbols to process first.*
1792	parseTrie(buf: buf + offset, size, [&](const Twine &name, uint64_t flags) {
1793	StringRef savedName = saver().save(S: name);
1794	if (handleLDSymbol(originalName: savedName))
1795	return;
1796	entries.push_back(x: {.name: savedName, .flags: flags});
1797	});
1798
1799	// Process the "normal" symbols.
1800	for (TrieEntry &entry : entries) {
1801	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (entry.name)))
1802	continue;
1803
1804	bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1805	bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
1806
1807	symbols.push_back(
1808	x: symtab ->addDylib(name: entry.name, file: exportingFile, isWeakDef, isTlv));
1809	}
1810	}
1811
1812	void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1813	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1814	const uint8_t p = reinterpret_cast<const* uint8_t *>(mb.getBufferStart()) +
1815	target->headerSize;
1816	for (uint32_t i = `0`, n = hdr->ncmds; i < n; ++i) {
1817	auto cmd = reinterpret_cast<const* load_command *>(p);
1818	p += cmd->cmdsize;
1819
1820	if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1821	cmd->cmd == LC_REEXPORT_DYLIB) {
1822	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1823	StringRef reexportPath =
1824	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1825	loadReexport(path: reexportPath, umbrella: exportingFile, currentTopLevelTapi: nullptr);
1826	}
1827
1828	// FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1829	// LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1830	// MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1831	if (config ->namespaceKind == NamespaceKind::flat &&
1832	cmd->cmd == LC_LOAD_DYLIB) {
1833	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1834	StringRef dylibPath =
1835	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1836	DylibFile dylib = findDylib(path: dylibPath, umbrella, currentTopLevelTapi: nullptr*);
1837	if (!dylib)
1838	error(msg: Twine("unable to locate library '") + dylibPath +
1839	"' loaded from '" + toString(f: this) + "' for -flat_namespace");
1840	}
1841	}
1842	}
1843
1844	// Some versions of Xcode ship with .tbd files that don't have the right
1845	// platform settings.
1846	constexpr std::array<StringRef, `3`> skipPlatformChecks{
1847	"/usr/lib/system/libsystem_kernel.dylib",
1848	"/usr/lib/system/libsystem_platform.dylib",
1849	"/usr/lib/system/libsystem_pthread.dylib"};
1850
1851	static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
1852	bool explicitlyLinked) {
1853	// Catalyst outputs can link against implicitly linked macOS-only libraries.
1854	if (config ->platform() != PLATFORM_MACCATALYST \|\| explicitlyLinked)
1855	return false;
1856	return is_contained(Range: interface.targets(),
1857	Element: MachO::Target (config ->arch(), PLATFORM_MACOS));
1858	}
1859
1860	static bool isArchABICompatible(ArchitectureSet archSet,
1861	Architecture targetArch) {
1862	uint32_t cpuType;
1863	uint32_t targetCpuType;
1864	std::tie(args&: targetCpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: targetArch);
1865
1866	return llvm::any_of(Range&: archSet, P: [&](const auto &p) {
1867	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(p);
1868	return cpuType == targetCpuType;
1869	});
1870	}
1871
1872	static bool isTargetPlatformArchCompatible(
1873	InterfaceFile::const_target_range interfaceTargets, Target target) {
1874	if (is_contained(Range&: interfaceTargets, Element: target))
1875	return true;
1876
1877	if (config ->forceExactCpuSubtypeMatch)
1878	return false;
1879
1880	ArchitectureSet archSet;
1881	for (const auto &p : interfaceTargets)
1882	if (p.Platform == target.Platform)
1883	archSet.set(p.Arch);
1884	if (archSet.empty())
1885	return false;
1886
1887	return isArchABICompatible(archSet, targetArch: target.Arch);
1888	}
1889
1890	DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
1891	bool isBundleLoader, bool explicitlyLinked)
1892	: InputFile (DylibKind, interface), refState(RefState::Unreferenced),
1893	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1894	// FIXME: Add test for the missing TBD code path.
1895
1896	if (umbrella == nullptr)
1897	umbrella = this;
1898	this->umbrella = umbrella;
1899
1900	installName = saver().save(S: interface.getInstallName());
1901	compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1902	currentVersion = interface.getCurrentVersion().rawValue();
1903	for (const auto &rpath : interface.rpaths())
1904	if (rpath.first == config ->platformInfo.target)
1905	rpaths.push_back(Elt: saver().save(S: rpath.second));
1906
1907	if (config ->printEachFile)
1908	message(msg: toString(f: this));
1909	inputFiles.insert(X: this);
1910
1911	if (!is_contained(Range: skipPlatformChecks, Element: installName) &&
1912	!isTargetPlatformArchCompatible(interfaceTargets: interface.targets(),
1913	target: config ->platformInfo.target) &&
1914	!skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
1915	error(msg: toString(f: this) + " is incompatible with " +
1916	std::string(config ->platformInfo.target));
1917	return;
1918	}
1919
1920	checkAppExtensionSafety(dylibIsAppExtensionSafe: interface.isApplicationExtensionSafe());
1921
1922	bool canBeImplicitlyLinked = interface.allowableClients().size() == `0`;
1923	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1924	? this
1925	: umbrella;
1926
1927	if (!canBeImplicitlyLinked)
1928	for (const auto &allowableClient : interface.allowableClients())
1929	allowableClients.push_back(
1930	Elt: *make<std::string>(args: allowableClient.getInstallName().data()));
1931
1932	auto addSymbol = [&](const llvm::MachO::Symbol &symbol,
1933	const Twine &name) -> void {
1934	StringRef savedName = saver().save(S: name);
1935	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (savedName)))
1936	return;
1937
1938	symbols.push_back(x: symtab ->addDylib(name: savedName, file: exportingFile,
1939	isWeakDef: symbol.isWeakDefined(),
1940	isTlv: symbol.isThreadLocalValue()));
1941	};
1942
1943	std::vector<const llvm::MachO::Symbol *> normalSymbols;
1944	normalSymbols.reserve(n: interface.symbolsCount());
1945	for (const auto *symbol : interface.symbols()) {
1946	if (!isArchABICompatible(archSet: symbol->getArchitectures(), targetArch: config ->arch()))
1947	continue;
1948	if (handleLDSymbol(originalName: symbol->getName()))
1949	continue;
1950
1951	switch (symbol->getKind()) {
1952	case EncodeKind::GlobalSymbol:
1953	case EncodeKind::ObjectiveCClass:
1954	case EncodeKind::ObjectiveCClassEHType:
1955	case EncodeKind::ObjectiveCInstanceVariable:
1956	normalSymbols.push_back(x: symbol);
1957	}
1958	}
1959	// interface.symbols() order is non-deterministic.
1960	llvm::sort(C&: normalSymbols,
1961	Comp: [](auto l, auto* r) { return* l->getName() < r->getName(); });
1962
1963	// TODO(compnerd) filter out symbols based on the target platform
1964	for (const auto *symbol : normalSymbols) {
1965	switch (symbol->getKind()) {
1966	case EncodeKind::GlobalSymbol:
1967	addSymbol (*symbol, symbol->getName());
1968	break;
1969	case EncodeKind::ObjectiveCClass:
1970	// XXX ld64 only creates these symbols when -ObjC is passed in. We may
1971	// want to emulate that.
1972	addSymbol (*symbol, objc::symbol_names::klass + symbol->getName());
1973	addSymbol (*symbol, objc::symbol_names::metaclass + symbol->getName());
1974	break;
1975	case EncodeKind::ObjectiveCClassEHType:
1976	addSymbol (*symbol, objc::symbol_names::ehtype + symbol->getName());
1977	break;
1978	case EncodeKind::ObjectiveCInstanceVariable:
1979	addSymbol (*symbol, objc::symbol_names::ivar + symbol->getName());
1980	break;
1981	}
1982	}
1983	}
1984
1985	DylibFile::DylibFile(DylibFile *umbrella)
1986	: InputFile (DylibKind, MemoryBufferRef{}), refState(RefState::Unreferenced),
1987	explicitlyLinked(false), isBundleLoader(false) {
1988	if (umbrella == nullptr)
1989	umbrella = this;
1990	this->umbrella = umbrella;
1991	}
1992
1993	void DylibFile::parseReexports(const InterfaceFile &interface) {
1994	const InterfaceFile *topLevel =
1995	interface.getParent() == nullptr ? &interface : interface.getParent();
1996	for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
1997	InterfaceFile::const_target_range targets = intfRef.targets();
1998	if (is_contained(Range: skipPlatformChecks, Element: intfRef.getInstallName()) \|\|
1999	isTargetPlatformArchCompatible(interfaceTargets: targets, target: config ->platformInfo.target))
2000	loadReexport(path: intfRef.getInstallName(), umbrella: exportingFile, currentTopLevelTapi: topLevel);
2001	}
2002	}
2003
2004	bool DylibFile::isExplicitlyLinked() const {
2005	if (!explicitlyLinked)
2006	return false;
2007
2008	// If this dylib was explicitly linked, but at least one of the symbols
2009	// of the synthetic dylibs it created via $ld$previous symbols is
2010	// referenced, then that synthetic dylib fulfils the explicit linkedness
2011	// and we can deadstrip this dylib if it's unreferenced.
2012	for (const auto *dylib : extraDylibs)
2013	if (dylib->isReferenced())
2014	return false;
2015
2016	return true;
2017	}
2018
2019	DylibFile *DylibFile::getSyntheticDylib(StringRef installName,
2020	uint32_t currentVersion,
2021	uint32_t compatVersion) {
2022	for (DylibFile *dylib : extraDylibs)
2023	if (dylib->installName == installName) {
2024	// FIXME: Check what to do if different $ld$previous symbols
2025	// request the same dylib, but with different versions.
2026	return dylib;
2027	}
2028
2029	auto dylib = make<DylibFile>(args: umbrella == this* ? nullptr : umbrella);
2030	dylib->installName = saver().save(S: installName);
2031	dylib->currentVersion = currentVersion;
2032	dylib->compatibilityVersion = compatVersion;
2033	extraDylibs.push_back(Elt: dylib);
2034	return dylib;
2035	}
2036
2037	// $ld$ symbols modify the properties/behavior of the library (e.g. its install
2038	// name, compatibility version or hide/add symbols) for specific target
2039	// versions.
2040	bool DylibFile::handleLDSymbol(StringRef originalName) {
2041	if (!originalName.starts_with(Prefix: "$ld$"))
2042	return false;
2043
2044	StringRef action;
2045	StringRef name;
2046	std::tie(args&: action, args&: name) = originalName.drop_front(N: strlen(s: "$ld$")).split(Separator: `'$'`);
2047	if (action == "previous")
2048	handleLDPreviousSymbol(name, originalName);
2049	else if (action == "install_name")
2050	handleLDInstallNameSymbol(name, originalName);
2051	else if (action == "hide")
2052	handleLDHideSymbol(name, originalName);
2053	return true;
2054	}
2055
2056	void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
2057	// originalName: $ld$ previous $ <installname> $ <compatversion> $
2058	// <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
2059	StringRef installName;
2060	StringRef compatVersion;
2061	StringRef platformStr;
2062	StringRef startVersion;
2063	StringRef endVersion;
2064	StringRef symbolName;
2065	StringRef rest;
2066
2067	std::tie(args&: installName, args&: name) = name.split(Separator: `'$'`);
2068	std::tie(args&: compatVersion, args&: name) = name.split(Separator: `'$'`);
2069	std::tie(args&: platformStr, args&: name) = name.split(Separator: `'$'`);
2070	std::tie(args&: startVersion, args&: name) = name.split(Separator: `'$'`);
2071	std::tie(args&: endVersion, args&: name) = name.split(Separator: `'$'`);
2072	std::tie(args&: symbolName, args&: rest) = name.rsplit(Separator: `'$'`);
2073
2074	// FIXME: Does this do the right thing for zippered files?
2075	unsigned platform;
2076	if (platformStr.getAsInteger(Radix: `10`, Result&: platform) \|\|
2077	platform != static_cast<unsigned>(config ->platform()))
2078	return;
2079
2080	VersionTuple start;
2081	if (start.tryParse(string: startVersion)) {
2082	warn(msg: toString(f: this) + ": failed to parse start version, symbol '" +
2083	originalName + "' ignored");
2084	return;
2085	}
2086	VersionTuple end;
2087	if (end.tryParse(string: endVersion)) {
2088	warn(msg: toString(f: this) + ": failed to parse end version, symbol '" +
2089	originalName + "' ignored");
2090	return;
2091	}
2092	if (config ->platformInfo.target.MinDeployment < start \|\|
2093	config ->platformInfo.target.MinDeployment >= end)
2094	return;
2095
2096	// Initialized to compatibilityVersion for the symbolName branch below.
2097	uint32_t newCompatibilityVersion = compatibilityVersion;
2098	uint32_t newCurrentVersionForSymbol = currentVersion;
2099	if (!compatVersion.empty()) {
2100	VersionTuple cVersion;
2101	if (cVersion.tryParse(string: compatVersion)) {
2102	warn(msg: toString(f: this) +
2103	": failed to parse compatibility version, symbol '" + originalName +
2104	"' ignored");
2105	return;
2106	}
2107	newCompatibilityVersion = encodeVersion(version: cVersion);
2108	newCurrentVersionForSymbol = newCompatibilityVersion;
2109	}
2110
2111	if (!symbolName.empty()) {
2112	// A $ld$previous$ symbol with symbol name adds a symbol with that name to
2113	// a dylib with given name and version.
2114	auto *dylib = getSyntheticDylib(installName, currentVersion: newCurrentVersionForSymbol,
2115	compatVersion: newCompatibilityVersion);
2116
2117	// The tbd file usually contains the $ld$previous symbol for an old version,
2118	// and then the symbol itself later, for newer deployment targets, like so:
2119	// symbols: [
2120	// '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2121	// _zzz,
2122	// ]
2123	// Since the symbols are sorted, adding them to the symtab in the given
2124	// order means the $ld$previous version of _zzz will prevail, as desired.
2125	dylib->symbols.push_back(x: symtab ->addDylib(
2126	name: saver().save(S: symbolName), file: dylib, /isWeakDef=/false, /isTlv=/false));
2127	return;
2128	}
2129
2130	// A $ld$previous$ symbol without symbol name modifies the dylib it's in.
2131	this->installName = saver().save(S: installName);
2132	this->compatibilityVersion = newCompatibilityVersion;
2133	}
2134
2135	void DylibFile::handleLDInstallNameSymbol(StringRef name,
2136	StringRef originalName) {
2137	// originalName: $ld$ install_name $ os<version> $ install_name
2138	StringRef condition, installName;
2139	std::tie(args&: condition, args&: installName) = name.split(Separator: `'$'`);
2140	VersionTuple version;
2141	if (!condition.consume_front(Prefix: "os") \|\| version.tryParse(string: condition))
2142	warn(msg: toString(f: this) + ": failed to parse os version, symbol '" +
2143	originalName + "' ignored");
2144	else if (version == config ->platformInfo.target.MinDeployment)
2145	this->installName = saver().save(S: installName);
2146	}
2147
2148	void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
2149	StringRef symbolName;
2150	bool shouldHide = true;
2151	if (name.starts_with(Prefix: "os")) {
2152	// If it's hidden based on versions.
2153	name = name.drop_front(N: `2`);
2154	StringRef minVersion;
2155	std::tie(args&: minVersion, args&: symbolName) = name.split(Separator: `'$'`);
2156	VersionTuple versionTup;
2157	if (versionTup.tryParse(string: minVersion)) {
2158	warn(msg: toString(f: this) + ": failed to parse hidden version, symbol `" + originalName +
2159	"` ignored.");
2160	return;
2161	}
2162	shouldHide = versionTup == config ->platformInfo.target.MinDeployment;
2163	} else {
2164	symbolName = name;
2165	}
2166
2167	if (shouldHide)
2168	exportingFile->hiddenSymbols.insert(V: CachedHashStringRef (symbolName));
2169	}
2170
2171	void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
2172	if (config ->applicationExtension && !dylibIsAppExtensionSafe)
2173	warn(msg: "using '-application_extension' with unsafe dylib: " + toString(f: this));
2174	}
2175
2176	ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
2177	: InputFile (ArchiveKind, f ->getMemoryBufferRef()), file (std::move(f)),
2178	forceHidden(forceHidden) {}
2179
2180	void ArchiveFile::addLazySymbols() {
2181	// Avoid calling getMemoryBufferRef() on zero-symbol archive
2182	// since that crashes.
2183	if (file ->isEmpty() \|\|
2184	(file ->hasSymbolTable() && file ->getNumberOfSymbols() == `0`))
2185	return;
2186
2187	if (!file ->hasSymbolTable()) {
2188	// No index, treat each child as a lazy object file.
2189	Error e = Error::success();
2190	for (const object::Archive::Child &c : file ->children(Err&: e)) {
2191	// Check `seen` but don't insert so a future eager load can still happen.
2192	if (seen.contains(V: c.getChildOffset()))
2193	continue;
2194	if (!seenLazy.insert(V: c.getChildOffset()).second)
2195	continue;
2196	auto file = childToObjectFile(c, /lazy=/true);
2197	if (!file)
2198	error(msg: toString(f: this) +
2199	": couldn't process child: " + toString(E: file.takeError()));
2200	inputFiles.insert(X: *file);
2201	}
2202	if (e)
2203	error(msg: toString(f: this) +
2204	": Archive::children failed: " + toString(E: std::move(e)));
2205	return;
2206	}
2207
2208	Error err = Error::success();
2209	auto child = file ->child_begin(Err&: err);
2210	// Ignore the I/O error here - will be reported later.
2211	if (!err) {
2212	Expected<MemoryBufferRef> mbOrErr = child ->getMemoryBufferRef();
2213	if (!mbOrErr) {
2214	llvm::consumeError(Err: mbOrErr.takeError());
2215	} else {
2216	if (identify_magic(magic: mbOrErr ->getBuffer()) == file_magic::macho_object) {
2217	if (target->wordSize == `8`)
2218	compatArch = compatWithTargetArch(
2219	file: this, hdr: reinterpret_cast<const LP64::mach_header *>(
2220	mbOrErr ->getBufferStart()));
2221	else
2222	compatArch = compatWithTargetArch(
2223	file: this, hdr: reinterpret_cast<const ILP32::mach_header *>(
2224	mbOrErr ->getBufferStart()));
2225	if (!compatArch)
2226	return;
2227	}
2228	}
2229	}
2230
2231	for (const object::Archive::Symbol &sym : file ->symbols())
2232	symtab ->addLazyArchive(name: sym.getName(), file: this, sym);
2233	}
2234
2235	static Expected<InputFile *>
2236	loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
2237	uint64_t offsetInArchive, bool forceHidden, bool compatArch,
2238	bool lazy) {
2239	if (config ->zeroModTime)
2240	modTime = `0`;
2241
2242	switch (identify_magic(magic: mb.getBuffer())) {
2243	case file_magic::macho_object:
2244	return make<ObjFile>(args&: mb, args&: modTime, args&: archiveName, args&: lazy, args&: forceHidden,
2245	args&: compatArch);
2246	case file_magic::bitcode:
2247	return make<BitcodeFile>(args&: mb, args&: archiveName, args&: offsetInArchive, args&: lazy,
2248	args&: forceHidden, args&: compatArch);
2249	default:
2250	return createStringError(EC: inconvertibleErrorCode(),
2251	S: mb.getBufferIdentifier() +
2252	" has unhandled file type");
2253	}
2254	}
2255
2256	Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
2257	if (!seen.insert(V: c.getChildOffset()).second)
2258	return Error::success();
2259	auto file = childToObjectFile(c, /lazy=/false);
2260	if (!file)
2261	return file.takeError();
2262
2263	inputFiles.insert(X: *file);
2264	printArchiveMemberLoad(reason, *file);
2265	return Error::success();
2266	}
2267
2268	void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
2269	object::Archive::Child c =
2270	CHECK(sym.getMember(), toString(this) +
2271	": could not get the member defining symbol " +
2272	toMachOString(sym));
2273
2274	// `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2275	// and become invalid after that call. Copy it to the stack so we can refer
2276	// to it later.
2277	const object::Archive::Symbol symCopy = sym;
2278
2279	// ld64 doesn't demangle sym here even with -demangle.
2280	// Match that: intentionally don't call toMachOString().
2281	if (Error e = fetch(c, reason: symCopy.getName()))
2282	error(msg: toString(f: this) + ": could not get the member defining symbol " +
2283	toMachOString(symCopy) + ": " + toString(E: std::move(e)));
2284	}
2285
2286	Expected<InputFile *>
2287	ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
2288	bool lazy) {
2289	Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
2290	if (!mb)
2291	return mb.takeError();
2292
2293	Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
2294	if (!modTime)
2295	return modTime.takeError();
2296
2297	return loadArchiveMember(mb: mb, modTime: toTimeT(TP: modTime), archiveName: getName(),
2298	offsetInArchive: c.getChildOffset(), forceHidden, compatArch, lazy);
2299	}
2300
2301	static macho::Symbol createBitcodeSymbol(const* lto::InputFile::Symbol &objSym,
2302	BitcodeFile &file) {
2303	StringRef name = saver().save(S: objSym.getName());
2304
2305	if (objSym.isUndefined())
2306	return symtab ->addUndefined(name, &file, /isWeakRef=/objSym.isWeak());
2307
2308	// TODO: Write a test demonstrating why computing isPrivateExtern before
2309	// LTO compilation is important.
2310	bool isPrivateExtern = false;
2311	switch (objSym.getVisibility()) {
2312	case GlobalValue::HiddenVisibility:
2313	isPrivateExtern = true;
2314	break;
2315	case GlobalValue::ProtectedVisibility:
2316	error(msg: name + " has protected visibility, which is not supported by Mach-O");
2317	break;
2318	case GlobalValue::DefaultVisibility:
2319	break;
2320	}
2321	isPrivateExtern = isPrivateExtern \|\| objSym.canBeOmittedFromSymbolTable() \|\|
2322	file.forceHidden;
2323
2324	if (objSym.isCommon())
2325	return symtab ->addCommon(name, &file, size: objSym.getCommonSize(),
2326	align: objSym.getCommonAlignment(), isPrivateExtern);
2327
2328	return symtab ->addDefined(name, &file, /isec=/nullptr, /value=/`0`,
2329	/size=/`0`, isWeakDef: objSym.isWeak(), isPrivateExtern,
2330	/isReferencedDynamically=/false,
2331	/noDeadStrip=/false,
2332	/isWeakDefCanBeHidden=/false);
2333	}
2334
2335	BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
2336	uint64_t offsetInArchive, bool lazy, bool forceHidden,
2337	bool compatArch)
2338	: InputFile (BitcodeKind, mb, lazy), forceHidden(forceHidden) {
2339	this->archiveName = std::string (archiveName);
2340	this->compatArch = compatArch;
2341	std::string path = mb.getBufferIdentifier().str();
2342	if (config ->thinLTOIndexOnly)
2343	path = replaceThinLTOSuffix(path: mb.getBufferIdentifier());
2344
2345	// If the parent archive already determines that the arch is not compat with
2346	// target, then just return.
2347	if (!compatArch)
2348	return;
2349
2350	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2351	// name. If two members with the same name are provided, this causes a
2352	// collision and ThinLTO can't proceed.
2353	// So, we append the archive name to disambiguate two members with the same
2354	// name from multiple different archives, and offset within the archive to
2355	// disambiguate two members of the same name from a single archive.
2356	MemoryBufferRef mbref(mb.getBuffer(),
2357	saver().save(S: archiveName.empty()
2358	? path
2359	: archiveName + "(" +
2360	sys::path::filename(path) + ")" +
2361	utostr(X: offsetInArchive)));
2362	obj = check(e: lto::InputFile::create(Object: mbref));
2363	if (lazy)
2364	parseLazy();
2365	else
2366	parse();
2367	}
2368
2369	void BitcodeFile::parse() {
2370	// Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2371	// "winning" symbol will then be marked as Prevailing at LTO compilation
2372	// time.
2373	symbols.resize(new_size: obj ->symbols().size());
2374
2375	// Process defined symbols first. See the comment at the end of
2376	// ObjFile<>::parseSymbols.
2377	for (auto it : llvm::enumerate(First: obj ->symbols()))
2378	if (!it.value().isUndefined())
2379	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2380	for (auto it : llvm::enumerate(First: obj ->symbols()))
2381	if (it.value().isUndefined())
2382	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2383	}
2384
2385	void BitcodeFile::parseLazy() {
2386	symbols.resize(new_size: obj ->symbols().size());
2387	for (const auto &[i, objSym] : llvm::enumerate(First: obj ->symbols())) {
2388	if (!objSym.isUndefined()) {
2389	symbols [i] = symtab ->addLazyObject(name: saver().save(S: objSym.getName()), file&: *this);
2390	if (!lazy)
2391	break;
2392	}
2393	}
2394	}
2395
2396	std::string macho::replaceThinLTOSuffix(StringRef path) {
2397	auto [suffix, repl] = config ->thinLTOObjectSuffixReplace;
2398	if (path.consume_back(Suffix: suffix))
2399	return (path + repl).str();
2400	return std::string (path);
2401	}
2402
2403	void macho::extract(InputFile &file, StringRef reason) {
2404	if (!file.lazy)
2405	return;
2406	file.lazy = false;
2407
2408	printArchiveMemberLoad(reason, &file);
2409	if (auto *bitcode = dyn_cast<BitcodeFile>(Val: &file)) {
2410	bitcode->parse();
2411	} else {
2412	auto &f = cast<ObjFile>(Val&: file);
2413	if (target->wordSize == `8`)
2414	f.parse<LP64>();
2415	else
2416	f.parse<ILP32>();
2417	}
2418	}
2419
2420	template void ObjFile::parse<LP64>();
2421

Browse the source code of llvm_projects/lld/MachO/InputFiles.cpp