InputFiles.cpp source code [llvm_projects/lld/MachO/InputFiles.cpp]

1	//===- InputFiles.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains functions to parse Mach-O object files. In this comment,
10	// we describe the Mach-O file structure and how we parse it.
11	//
12	// Mach-O is not very different from ELF or COFF. The notion of symbols,
13	// sections and relocations exists in Mach-O as it does in ELF and COFF.
14	//
15	// Perhaps the notion that is new to those who know ELF/COFF is "subsections".
16	// In ELF/COFF, sections are an atomic unit of data copied from input files to
17	// output files. When we merge or garbage-collect sections, we treat each
18	// section as an atomic unit. In Mach-O, that's not the case. Sections can
19	// consist of multiple subsections, and subsections are a unit of merging and
20	// garbage-collecting. Therefore, Mach-O's subsections are more similar to
21	// ELF/COFF's sections than Mach-O's sections are.
22	//
23	// A section can have multiple symbols. A symbol that does not have the
24	// N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
25	// definition, a symbol is always present at the beginning of each subsection. A
26	// symbol with N_ALT_ENTRY attribute does not start a new subsection and can
27	// point to a middle of a subsection.
28	//
29	// The notion of subsections also affects how relocations are represented in
30	// Mach-O. All references within a section need to be explicitly represented as
31	// relocations if they refer to different subsections, because we obviously need
32	// to fix up addresses if subsections are laid out in an output file differently
33	// than they were in object files. To represent that, Mach-O relocations can
34	// refer to an unnamed location via its address. Scattered relocations (those
35	// with the R_SCATTERED bit set) always refer to unnamed locations.
36	// Non-scattered relocations refer to an unnamed location if r_extern is not set
37	// and r_symbolnum is zero.
38	//
39	// Without the above differences, I think you can use your knowledge about ELF
40	// and COFF for Mach-O.
41	//
42	//===----------------------------------------------------------------------===//
43
44	#include "InputFiles.h"
45	#include "Config.h"
46	#include "Driver.h"
47	#include "Dwarf.h"
48	#include "EhFrame.h"
49	#include "ExportTrie.h"
50	#include "InputSection.h"
51	#include "ObjC.h"
52	#include "OutputSection.h"
53	#include "OutputSegment.h"
54	#include "SymbolTable.h"
55	#include "Symbols.h"
56	#include "SyntheticSections.h"
57	#include "Target.h"
58
59	#include "lld/Common/CommonLinkerContext.h"
60	#include "lld/Common/DWARF.h"
61	#include "lld/Common/Reproduce.h"
62	#include "llvm/ADT/iterator.h"
63	#include "llvm/BinaryFormat/MachO.h"
64	#include "llvm/LTO/LTO.h"
65	#include "llvm/Support/BinaryStreamReader.h"
66	#include "llvm/Support/Endian.h"
67	#include "llvm/Support/MemoryBuffer.h"
68	#include "llvm/Support/Path.h"
69	#include "llvm/Support/TarWriter.h"
70	#include "llvm/Support/TimeProfiler.h"
71	#include "llvm/TextAPI/Architecture.h"
72	#include "llvm/TextAPI/InterfaceFile.h"
73
74	#include <optional>
75	#include <type_traits>
76
77	using namespace llvm;
78	using namespace llvm::MachO;
79	using namespace llvm::support::endian;
80	using namespace llvm::sys;
81	using namespace lld;
82	using namespace lld::macho;
83
84	// Returns "<internal>", "foo.a(bar.o)", or "baz.o".
85	std::string lld::toString(const InputFile *f) {
86	if (!f)
87	return "<internal>";
88
89	// Multiple dylibs can be defined in one .tbd file.
90	if (const auto *dylibFile = dyn_cast<DylibFile>(Val: f))
91	if (f->getName().ends_with(Suffix: ".tbd"))
92	return (f->getName() + "(" + dylibFile->installName + ")").str();
93
94	if (f->archiveName.empty())
95	return std::string (f->getName());
96	return (f->archiveName + "(" + path::filename(path: f->getName()) + ")").str();
97	}
98
99	std::string lld::toString(const Section &sec) {
100	return (toString(f: sec.file) + ":(" + sec.name + ")").str();
101	}
102
103	SetVector<InputFile *> macho::inputFiles;
104	std::unique_ptr<TarWriter> macho::tar;
105	int InputFile::idCount = `0`;
106
107	static VersionTuple decodeVersion(uint32_t version) {
108	unsigned major = version >> `16`;
109	unsigned minor = (version >> `8`) & `0xffu`;
110	unsigned subMinor = version & `0xffu`;
111	return VersionTuple (major, minor, subMinor);
112	}
113
114	static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
115	if (!isa<ObjFile>(Val: input) && !isa<DylibFile>(Val: input))
116	return {};
117
118	const char *hdr = input->mb.getBufferStart();
119
120	// "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
121	std::vector<PlatformInfo> platformInfos;
122	for (auto *cmd : findCommands<build_version_command>(anyHdr: hdr, types: LC_BUILD_VERSION)) {
123	PlatformInfo info;
124	info.target.Platform = static_cast<PlatformType>(cmd->platform);
125	info.target.MinDeployment = decodeVersion(version: cmd->minos);
126	platformInfos.emplace_back(args: std::move(info));
127	}
128	for (auto *cmd : findCommands<version_min_command>(
129	anyHdr: hdr, types: LC_VERSION_MIN_MACOSX, types: LC_VERSION_MIN_IPHONEOS,
130	types: LC_VERSION_MIN_TVOS, types: LC_VERSION_MIN_WATCHOS)) {
131	PlatformInfo info;
132	switch (cmd->cmd) {
133	case LC_VERSION_MIN_MACOSX:
134	info.target.Platform = PLATFORM_MACOS;
135	break;
136	case LC_VERSION_MIN_IPHONEOS:
137	info.target.Platform = PLATFORM_IOS;
138	break;
139	case LC_VERSION_MIN_TVOS:
140	info.target.Platform = PLATFORM_TVOS;
141	break;
142	case LC_VERSION_MIN_WATCHOS:
143	info.target.Platform = PLATFORM_WATCHOS;
144	break;
145	}
146	info.target.MinDeployment = decodeVersion(version: cmd->version);
147	platformInfos.emplace_back(args: std::move(info));
148	}
149
150	return platformInfos;
151	}
152
153	static bool checkCompatibility(const InputFile *input) {
154	std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
155	if (platformInfos.empty())
156	return true;
157
158	auto it = find_if(Range&: platformInfos, P: [&](const PlatformInfo &info) {
159	return removeSimulator(platform: info.target.Platform) ==
160	removeSimulator(platform: config ->platform());
161	});
162	if (it == platformInfos.end()) {
163	std::string platformNames;
164	raw_string_ostream os(platformNames);
165	interleave(
166	c: platformInfos, os,
167	each_fn: [&](const PlatformInfo &info) {
168	os << getPlatformName(Platform: info.target.Platform);
169	},
170	separator: "/");
171	error(msg: toString(f: input) + " has platform " + platformNames +
172	Twine (", which is different from target platform ") +
173	getPlatformName(Platform: config ->platform()));
174	return false;
175	}
176
177	if (it ->target.MinDeployment > config ->platformInfo.target.MinDeployment)
178	warn(msg: toString(f: input) + " has version " +
179	it ->target.MinDeployment.getAsString() +
180	", which is newer than target minimum of " +
181	config ->platformInfo.target.MinDeployment.getAsString());
182
183	return true;
184	}
185
186	template <class Header>
187	static bool compatWithTargetArch(const InputFile file, const* Header *hdr) {
188	uint32_t cpuType;
189	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: config ->arch());
190
191	if (hdr->cputype != cpuType) {
192	Architecture arch =
193	getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
194	auto msg = config ->errorForArchMismatch
195	? static_cast<void ()(const* Twine &)>(error)
196	: warn;
197
198	msg(toString(f: file) + " has architecture " + getArchitectureName(Arch: arch) +
199	" which is incompatible with target architecture " +
200	getArchitectureName(Arch: config ->arch()));
201	return false;
202	}
203
204	return checkCompatibility(input: file);
205	}
206
207	// This cache mostly exists to store system libraries (and .tbds) as they're
208	// loaded, rather than the input archives, which are already cached at a higher
209	// level, and other files like the filelist that are only read once.
210	// Theoretically this caching could be more efficient by hoisting it, but that
211	// would require altering many callers to track the state.
212	DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
213	// Open a given file path and return it as a memory-mapped file.
214	std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
215	CachedHashStringRef key(path);
216	auto entry = cachedReads.find(Val: key);
217	if (entry != cachedReads.end())
218	return entry ->second;
219
220	ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
221	MemoryBuffer::getFile(Filename: path, IsText: false, /RequiresNullTerminator=/false);
222	if (std::error_code ec = mbOrErr.getError()) {
223	error(msg: "cannot open " + path + ": " + ec.message());
224	return std::nullopt;
225	}
226
227	std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
228	MemoryBufferRef mbref = mb ->getMemBufferRef();
229	make<std::unique_ptr<MemoryBuffer>>(args: std::move(mb)); // take mb ownership
230
231	// If this is a regular non-fat file, return it.
232	const char *buf = mbref.getBufferStart();
233	const auto hdr = reinterpret_cast<const* fat_header *>(buf);
234	if (mbref.getBufferSize() < sizeof(uint32_t) \|\|
235	read32be(P: &hdr->magic) != FAT_MAGIC) {
236	if (tar)
237	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
238	return cachedReads [key] = mbref;
239	}
240
241	llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
242
243	// Object files and archive files may be fat files, which contain multiple
244	// real files for different CPU ISAs. Here, we search for a file that matches
245	// with the current link target and returns it as a MemoryBufferRef.
246	const auto arch = reinterpret_cast<const* fat_arch >(buf + sizeof(hdr));
247	auto getArchName = [](uint32_t cpuType, uint32_t cpuSubtype) {
248	return getArchitectureName(Arch: getArchitectureFromCpuType(CPUType: cpuType, CPUSubType: cpuSubtype));
249	};
250
251	std::vector<StringRef> archs;
252	for (uint32_t i = `0`, n = read32be(P: &hdr->nfat_arch); i < n; ++i) {
253	if (reinterpret_cast<const char *>(arch + i + `1`) >
254	buf + mbref.getBufferSize()) {
255	error(msg: path + ": fat_arch struct extends beyond end of file");
256	return std::nullopt;
257	}
258
259	uint32_t cpuType = read32be(P: &arch[i].cputype);
260	uint32_t cpuSubtype =
261	read32be(P: &arch[i].cpusubtype) & ~MachO::CPU_SUBTYPE_MASK;
262
263	// FIXME: LD64 has a more complex fallback logic here.
264	// Consider implementing that as well?
265	if (cpuType != static_cast<uint32_t>(target->cpuType) \|\|
266	cpuSubtype != target->cpuSubtype) {
267	archs.emplace_back(args: getArchName (cpuType, cpuSubtype));
268	continue;
269	}
270
271	uint32_t offset = read32be(P: &arch[i].offset);
272	uint32_t size = read32be(P: &arch[i].size);
273	if (offset + size > mbref.getBufferSize())
274	error(msg: path + ": slice extends beyond end of file");
275	if (tar)
276	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
277	return cachedReads [key] = MemoryBufferRef(StringRef(buf + offset, size),
278	path.copy(A&: bAlloc));
279	}
280
281	auto targetArchName = getArchName (target->cpuType, target->cpuSubtype);
282	warn(msg: path + ": ignoring file because it is universal (" + join(R&: archs, Separator: ",") +
283	") but does not contain the " + targetArchName + " architecture");
284	return std::nullopt;
285	}
286
287	InputFile::InputFile(Kind kind, const InterfaceFile &interface)
288	: id(idCount++), fileKind(kind), name(saver().save(S: interface.getPath())) {}
289
290	// Some sections comprise of fixed-size records, so instead of splitting them at
291	// symbol boundaries, we split them based on size. Records are distinct from
292	// literals in that they may contain references to other sections, instead of
293	// being leaf nodes in the InputSection graph.
294	//
295	// Note that "record" is a term I came up with. In contrast, "literal" is a term
296	// used by the Mach-O format.
297	static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) {
298	if (name == section_names::compactUnwind) {
299	if (segname == segment_names::ld)
300	return target->wordSize == `8` ? `32` : `20`;
301	}
302	if (!config ->dedupStrings)
303	return {};
304
305	if (name == section_names::cfString && segname == segment_names::data)
306	return target->wordSize == `8` ? `32` : `16`;
307
308	if (config ->icfLevel == ICFLevel::none)
309	return {};
310
311	if (name == section_names::objcClassRefs && segname == segment_names::data)
312	return target->wordSize;
313
314	if (name == section_names::objcSelrefs && segname == segment_names::data)
315	return target->wordSize;
316	return {};
317	}
318
319	static Error parseCallGraph(ArrayRef<uint8_t> data,
320	std::vector<CallGraphEntry> &callGraph) {
321	TimeTraceScope timeScope("Parsing call graph section");
322	BinaryStreamReader reader(data, llvm::endianness::little);
323	while (!reader.empty()) {
324	uint32_t fromIndex, toIndex;
325	uint64_t count;
326	if (Error err = reader.readInteger(Dest&: fromIndex))
327	return err;
328	if (Error err = reader.readInteger(Dest&: toIndex))
329	return err;
330	if (Error err = reader.readInteger(Dest&: count))
331	return err;
332	callGraph.emplace_back(args&: fromIndex, args&: toIndex, args&: count);
333	}
334	return Error::success();
335	}
336
337	// Parse the sequence of sections within a single LC_SEGMENT(_64).
338	// Split each section into subsections.
339	template <class SectionHeader>
340	void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
341	sections.reserve(n: sectionHeaders.size());
342	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
343
344	for (const SectionHeader &sec : sectionHeaders) {
345	StringRef name =
346	StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
347	StringRef segname =
348	StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
349	sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
350	if (sec.align >= `32`) {
351	error("alignment " + std::to_string(sec.align) + " of section " + name +
352	" is too large");
353	continue;
354	}
355	Section &section = *sections.back();
356	uint32_t align = `1` << sec.align;
357	ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
358	: buf + sec.offset,
359	static_cast<size_t>(sec.size)};
360
361	auto splitRecords = [&](size_t recordSize) -> void {
362	if (data.empty())
363	return;
364	Subsections &subsections = section.subsections;
365	subsections.reserve(n: data.size() / recordSize);
366	for (uint64_t off = `0`; off < data.size(); off += recordSize) {
367	auto *isec = make<ConcatInputSection>(
368	args&: section, args: data.slice(N: off, M: std::min(a: data.size(), b: recordSize)), args&: align);
369	subsections.push_back(x: {.offset: off, .isec: isec});
370	}
371	section.doneSplitting = true;
372	};
373
374	if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
375	if (sec.nreloc)
376	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
377	" contains relocations, which is unsupported");
378	bool dedupLiterals =
379	name == section_names::objcMethname \|\| config ->dedupStrings;
380	InputSection *isec =
381	make<CStringInputSection>(args&: section, args&: data, args&: align, args&: dedupLiterals);
382	// FIXME: parallelize this?
383	cast<CStringInputSection>(Val: isec)->splitIntoPieces();
384	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
385	} else if (isWordLiteralSection(sec.flags)) {
386	if (sec.nreloc)
387	fatal(toString(f: this) + ": " + sec.segname + "," + sec.sectname +
388	" contains relocations, which is unsupported");
389	InputSection *isec = make<WordLiteralInputSection>(args&: section, args&: data, args&: align);
390	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
391	} else if (auto recordSize = getRecordSize(segname, name)) {
392	splitRecords(*recordSize);
393	} else if (name == section_names::ehFrame &&
394	segname == segment_names::text) {
395	splitEhFrames(dataArr: data, ehFrameSection&: *sections.back());
396	} else if (segname == segment_names::llvm) {
397	if (config ->callGraphProfileSort && name == section_names::cgProfile)
398	checkError(e: parseCallGraph(data, callGraph));
399	// ld64 does not appear to emit contents from sections within the __LLVM
400	// segment. Symbols within those sections point to bitcode metadata
401	// instead of actual symbols. Global symbols within those sections could
402	// have the same name without causing duplicate symbol errors. To avoid
403	// spurious duplicate symbol errors, we do not parse these sections.
404	// TODO: Evaluate whether the bitcode metadata is needed.
405	} else if (name == section_names::objCImageInfo &&
406	segname == segment_names::data) {
407	objCImageInfo = data;
408	} else {
409	if (name == section_names::addrSig)
410	addrSigSection = sections.back();
411
412	auto *isec = make<ConcatInputSection>(args&: section, args&: data, args&: align);
413	if (isDebugSection(flags: isec->getFlags()) &&
414	isec->getSegName() == segment_names::dwarf) {
415	// Instead of emitting DWARF sections, we emit STABS symbols to the
416	// object files that contain them. We filter them out early to avoid
417	// parsing their relocations unnecessarily.
418	debugSections.push_back(x: isec);
419	} else {
420	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
421	}
422	}
423	}
424	}
425
426	void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
427	EhReader reader(this, data, /dataOff=/`0`);
428	size_t off = `0`;
429	while (off < reader.size()) {
430	uint64_t frameOff = off;
431	uint64_t length = reader.readLength(off: &off);
432	if (length == `0`)
433	break;
434	uint64_t fullLength = length + (off - frameOff);
435	off += length;
436	// We hard-code an alignment of 1 here because we don't actually want our
437	// EH frames to be aligned to the section alignment. EH frame decoders don't
438	// expect this alignment. Moreover, each EH frame must start where the
439	// previous one ends, and where it ends is indicated by the length field.
440	// Unless we update the length field (troublesome), we should keep the
441	// alignment to 1.
442	// Note that we still want to preserve the alignment of the overall section,
443	// just not of the individual EH frames.
444	ehFrameSection.subsections.push_back(
445	x: {.offset: frameOff, .isec: make<ConcatInputSection>(args&: ehFrameSection,
446	args: data.slice(N: frameOff, M: fullLength),
447	/align=/args: `1`)});
448	}
449	ehFrameSection.doneSplitting = true;
450	}
451
452	template <class T>
453	static Section findContainingSection(const* std::vector<Section *> &sections,
454	T *offset) {
455	static_assert(std::is_same<uint64_t, T>::value \|\|
456	std::is_same<uint32_t, T>::value,
457	"unexpected type for offset");
458	auto it = std::prev(llvm::upper_bound(
459	sections, *offset,
460	[](uint64_t value, const Section sec) { return* value < sec->addr; }));
461	offset -= (it)->addr;
462	return *it;
463	}
464
465	// Find the subsection corresponding to the greatest section offset that is <=
466	// that of the given offset.
467	//
468	// offset: an offset relative to the start of the original InputSection (before
469	// any subsection splitting has occurred). It will be updated to represent the
470	// same location as an offset relative to the start of the containing
471	// subsection.
472	template <class T>
473	static InputSection findContainingSubsection(const* Section &section,
474	T *offset) {
475	static_assert(std::is_same<uint64_t, T>::value \|\|
476	std::is_same<uint32_t, T>::value,
477	"unexpected type for offset");
478	auto it = std::prev(llvm::upper_bound(
479	section.subsections, *offset,
480	[](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
481	*offset -= it->offset;
482	return it->isec;
483	}
484
485	// Try to find a symbol at offset `off` within `isec`.
486	// Returns nullptr if no symbol exists at that offset.
487	static Defined tryFindSymbolAtOffset(const* ConcatInputSection *isec,
488	uint64_t off) {
489	auto it = llvm::lower_bound(Range: isec->symbols, Value&: off, C: [](Defined *d, uint64_t off) {
490	return d->value < off;
491	});
492	if (it == isec->symbols.end() \|\| (*it)->value != off)
493	return nullptr;
494	return *it;
495	}
496
497	// Find a symbol at offset `off` within `isec`.
498	// If no symbol is found, assume the section must have been coalesced.
499	static Defined findSymbolAtOffset(const* ConcatInputSection *isec,
500	uint64_t off) {
501	Defined *d = tryFindSymbolAtOffset(isec, off);
502	// The offset should point at the exact address of a symbol (with no addend.)
503	assert(d \|\| isec->wasCoalesced);
504	return d;
505	}
506
507	template <class SectionHeader>
508	static bool validateRelocationInfo(InputFile file, const* SectionHeader &sec,
509	relocation_info rel) {
510	const RelocAttrs &relocAttrs = target->getRelocAttrs(type: rel.r_type);
511	bool valid = true;
512	auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
513	valid = false;
514	return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
515	std::to_string(val: rel.r_address) + " of " + sec.segname + "," +
516	sec.sectname + " in " + toString(f: file))
517	.str();
518	};
519
520	if (!relocAttrs.hasAttr(b: RelocAttrBits::LOCAL) && !rel.r_extern)
521	error(message("must be extern"));
522	if (relocAttrs.hasAttr(b: RelocAttrBits::PCREL) != rel.r_pcrel)
523	error(message(Twine ("must ") + (rel.r_pcrel ? "not " : "") +
524	"be PC-relative"));
525	if (isThreadLocalVariables(sec.flags) &&
526	!relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED))
527	error(message("not allowed in thread-local section, must be UNSIGNED"));
528	if (!relocAttrs.hasAttr(b: static_cast<RelocAttrBits>(`1` << rel.r_length))) {
529	error(message("has invalid width of " + std::to_string(val: `1` << rel.r_length) +
530	" bytes"));
531	}
532	return valid;
533	}
534
535	template <class SectionHeader>
536	void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
537	const SectionHeader &sec, Section &section) {
538	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
539	ArrayRef<relocation_info> relInfos(
540	reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
541
542	Subsections &subsections = section.subsections;
543	auto subsecIt = subsections.rbegin();
544	for (size_t i = `0`; i < relInfos.size(); i++) {
545	// Paired relocations serve as Mach-O's method for attaching a
546	// supplemental datum to a primary relocation record. ELF does not
547	// need them because the _RELOC_RELA records contain the extra*
548	// addend field, vs. _RELOC_REL which omit the addend.*
549	//
550	// The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
551	// and the paired _RELOC_UNSIGNED record holds the minuend. The*
552	// datum for each is a symbolic address. The result is the offset
553	// between two addresses.
554	//
555	// The ARM64_RELOC_ADDEND record holds the addend, and the paired
556	// ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
557	// base symbolic address.
558	//
559	// Note: X86 does not use _RELOC_ADDEND because it can embed an addend into*
560	// the instruction stream. On X86, a relocatable address field always
561	// occupies an entire contiguous sequence of byte(s), so there is no need to
562	// merge opcode bits with address bits. Therefore, it's easy and convenient
563	// to store addends in the instruction-stream bytes that would otherwise
564	// contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
565	// address bits so that bitwise arithmetic is necessary to extract and
566	// insert them. Storing addends in the instruction stream is possible, but
567	// inconvenient and more costly at link time.
568
569	relocation_info relInfo = relInfos [i];
570	bool isSubtrahend =
571	target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::SUBTRAHEND);
572	int64_t pairedAddend = `0`;
573	if (target->hasAttr(type: relInfo.r_type, bit: RelocAttrBits::ADDEND)) {
574	pairedAddend = SignExtend64<`24`>(x: relInfo.r_symbolnum);
575	relInfo = relInfos [++i];
576	}
577	assert(i < relInfos.size());
578	if (!validateRelocationInfo(this, sec, relInfo))
579	continue;
580	if (relInfo.r_address & R_SCATTERED)
581	fatal(msg: "TODO: Scattered relocations not supported");
582
583	int64_t embeddedAddend = target->getEmbeddedAddend(mb, offset: sec.offset, relInfo);
584	assert(!(embeddedAddend && pairedAddend));
585	int64_t totalAddend = pairedAddend + embeddedAddend;
586	Relocation r;
587	r.type = relInfo.r_type;
588	r.pcrel = relInfo.r_pcrel;
589	r.length = relInfo.r_length;
590	r.offset = relInfo.r_address;
591	if (relInfo.r_extern) {
592	r.referent = symbols [relInfo.r_symbolnum];
593	r.addend = isSubtrahend ? `0` : totalAddend;
594	} else {
595	assert(!isSubtrahend);
596	const SectionHeader &referentSecHead =
597	sectionHeaders[relInfo.r_symbolnum - `1`];
598	uint64_t referentOffset;
599	if (relInfo.r_pcrel) {
600	// The implicit addend for pcrel section relocations is the pcrel offset
601	// in terms of the addresses in the input file. Here we adjust it so
602	// that it describes the offset from the start of the referent section.
603	// FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
604	// have pcrel section relocations. We may want to factor this out into
605	// the arch-specific .cpp file.
606	referentOffset = sec.addr + relInfo.r_address +
607	(`1ull` << relInfo.r_length) + totalAddend -
608	referentSecHead.addr;
609	} else {
610	// The addend for a non-pcrel relocation is its absolute address.
611	referentOffset = totalAddend - referentSecHead.addr;
612	}
613	r.referent = findContainingSubsection(section: *sections [relInfo.r_symbolnum - `1`],
614	offset: &referentOffset);
615	r.addend = referentOffset;
616	}
617
618	// Find the subsection that this relocation belongs to.
619	// Though not required by the Mach-O format, clang and gcc seem to emit
620	// relocations in order, so let's take advantage of it. However, ld64 emits
621	// unsorted relocations (in `-r` mode), so we have a fallback for that
622	// uncommon case.
623	InputSection *subsec;
624	while (subsecIt != subsections.rend() && subsecIt ->offset > r.offset)
625	++subsecIt;
626	if (subsecIt == subsections.rend() \|\|
627	subsecIt ->offset + subsecIt ->isec->getSize() <= r.offset) {
628	subsec = findContainingSubsection(section, offset: &r.offset);
629	// Now that we know the relocs are unsorted, avoid trying the 'fast path'
630	// for the other relocations.
631	subsecIt = subsections.rend();
632	} else {
633	subsec = subsecIt ->isec;
634	r.offset -= subsecIt ->offset;
635	}
636	subsec->relocs.push_back(x: r);
637
638	if (isSubtrahend) {
639	relocation_info minuendInfo = relInfos [++i];
640	// SUBTRACTOR relocations should always be followed by an UNSIGNED one
641	// attached to the same address.
642	assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
643	relInfo.r_address == minuendInfo.r_address);
644	Relocation p;
645	p.type = minuendInfo.r_type;
646	if (minuendInfo.r_extern) {
647	p.referent = symbols [minuendInfo.r_symbolnum];
648	p.addend = totalAddend;
649	} else {
650	uint64_t referentOffset =
651	totalAddend - sectionHeaders[minuendInfo.r_symbolnum - `1`].addr;
652	p.referent = findContainingSubsection(
653	section: *sections [minuendInfo.r_symbolnum - `1`], offset: &referentOffset);
654	p.addend = referentOffset;
655	}
656	subsec->relocs.push_back(x: p);
657	}
658	}
659	}
660
661	// ld64 never turns these labels into named atoms or symbol table entries.
662	static bool shouldIgnoreLabel(const InputSection *isec, StringRef name) {
663	if (isCfStringSection(isec) \|\| isClassRefsSection(isec) \|\|
664	isSelRefsSection(isec))
665	return true;
666	if ((isa<WordLiteralInputSection>(Val: isec) \|\| isa<CStringInputSection>(Val: isec)) &&
667	isPrivateLabel(name))
668	return true;
669	return false;
670	}
671
672	template <class NList>
673	static macho::Symbol createDefined(const* NList &sym, StringRef name,
674	InputSection *isec, uint64_t value,
675	uint64_t size, bool forceHidden) {
676	// Symbol scope is determined by sym.n_type & (N_EXT \| N_PEXT):
677	// N_EXT: Global symbols. These go in the symbol table during the link,
678	// and also in the export table of the output so that the dynamic
679	// linker sees them.
680	// N_EXT \| N_PEXT: Linkage unit (think: dylib) scoped. These go in the
681	// symbol table during the link so that duplicates are
682	// either reported (for non-weak symbols) or merged
683	// (for weak symbols), but they do not go in the export
684	// table of the output.
685	// N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
686	// object files) may produce them. LLD does not yet support -r.
687	// These are translation-unit scoped, identical to the `0` case.
688	// 0: Translation-unit scoped. These are not in the symbol table during
689	// link, and not in the export table of the output either.
690	bool isWeakDefCanBeHidden =
691	(sym.n_desc & (N_WEAK_DEF \| N_WEAK_REF)) == (N_WEAK_DEF \| N_WEAK_REF);
692
693	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
694
695	bool isCold = sym.n_desc & N_COLD_FUNC;
696
697	if ((sym.n_type & N_EXT) && !shouldIgnoreLabel(isec, name)) {
698	// -load_hidden makes us treat global symbols as linkage unit scoped.
699	// Duplicates are reported but the symbol does not go in the export trie.
700	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
701
702	// lld's behavior for merging symbols is slightly different from ld64:
703	// ld64 picks the winning symbol based on several criteria (see
704	// pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
705	// just merges metadata and keeps the contents of the first symbol
706	// with that name (see SymbolTable::addDefined). For:
707	// inline function F in a TU built with -fvisibility-inlines-hidden*
708	// and inline function F in another TU built without that flag*
709	// ld64 will pick the one from the file built without
710	// -fvisibility-inlines-hidden.
711	// lld will instead pick the one listed first on the link command line and
712	// give it visibility as if the function was built without
713	// -fvisibility-inlines-hidden.
714	// If both functions have the same contents, this will have the same
715	// behavior. If not, it won't, but the input had an ODR violation in
716	// that case.
717	//
718	// Similarly, merging a symbol
719	// that's isPrivateExtern and not isWeakDefCanBeHidden with one
720	// that's not isPrivateExtern but isWeakDefCanBeHidden technically
721	// should produce one
722	// that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
723	// with ld64's semantics, because it means the non-private-extern
724	// definition will continue to take priority if more private extern
725	// definitions are encountered. With lld's semantics there's no observable
726	// difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
727	// that's privateExtern -- neither makes it into the dynamic symbol table,
728	// unless the autohide symbol is explicitly exported.
729	// But if a symbol is both privateExtern and autohide then it can't
730	// be exported.
731	// So we nullify the autohide flag when privateExtern is present
732	// and promote the symbol to privateExtern when it is not already.
733	if (isWeakDefCanBeHidden && isPrivateExtern)
734	isWeakDefCanBeHidden = false;
735	else if (isWeakDefCanBeHidden)
736	isPrivateExtern = true;
737	return symtab ->addDefined(
738	name, isec->getFile(), isec, value, size, isWeakDef: sym.n_desc & N_WEAK_DEF,
739	isPrivateExtern, isReferencedDynamically: sym.n_desc & REFERENCED_DYNAMICALLY,
740	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden, isCold);
741	}
742	bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
743	auto *defined = make<Defined>(
744	name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
745	/isExternal=/false, /isPrivateExtern=/false, includeInSymtab,
746	sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
747	defined->cold = isCold;
748	return defined;
749	}
750
751	// Absolute symbols are defined symbols that do not have an associated
752	// InputSection. They cannot be weak.
753	template <class NList>
754	static macho::Symbol createAbsolute(const* NList &sym, InputFile *file,
755	StringRef name, bool forceHidden) {
756	bool isCold = sym.n_desc & N_COLD_FUNC;
757	assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
758
759	if (sym.n_type & N_EXT) {
760	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
761	return symtab ->addDefined(name, file, nullptr, value: sym.n_value, /size=/`0`,
762	/isWeakDef=/false, isPrivateExtern,
763	/isReferencedDynamically=/false,
764	noDeadStrip: sym.n_desc & N_NO_DEAD_STRIP,
765	/isWeakDefCanBeHidden=/false, isCold);
766	}
767	auto defined = make<Defined>(name, file, nullptr, sym.n_value, /size=/*`0`,
768	/isWeakDef=/false,
769	/isExternal=/false, /isPrivateExtern=/false,
770	/includeInSymtab=/true,
771	/isReferencedDynamically=/false,
772	sym.n_desc & N_NO_DEAD_STRIP);
773	defined->cold = isCold;
774	return defined;
775	}
776
777	template <class NList>
778	macho::Symbol ObjFile::parseNonSectionSymbol(const* NList &sym,
779	const char *strtab) {
780	StringRef name = StringRef(strtab + sym.n_strx);
781	uint8_t type = sym.n_type & N_TYPE;
782	bool isPrivateExtern = sym.n_type & N_PEXT \|\| forceHidden;
783	switch (type) {
784	case N_UNDF:
785	return sym.n_value == `0`
786	? symtab ->addUndefined(name, this, isWeakRef: sym.n_desc & N_WEAK_REF)
787	: symtab ->addCommon(name, this, size: sym.n_value,
788	align: `1` << GET_COMM_ALIGN(sym.n_desc),
789	isPrivateExtern);
790	case N_ABS:
791	return createAbsolute(sym, this, name, forceHidden);
792	case N_INDR: {
793	// Not much point in making local aliases -- relocs in the current file can
794	// just refer to the actual symbol itself. ld64 ignores these symbols too.
795	if (!(sym.n_type & N_EXT))
796	return nullptr;
797	StringRef aliasedName = StringRef(strtab + sym.n_value);
798	// isPrivateExtern is the only symbol flag that has an impact on the final
799	// aliased symbol.
800	auto alias = make<AliasSymbol>(args: this*, args&: name, args&: aliasedName, args&: isPrivateExtern);
801	aliases.push_back(x: alias);
802	return alias;
803	}
804	case N_PBUD:
805	error(msg: "TODO: support symbols of type N_PBUD");
806	return nullptr;
807	case N_SECT:
808	llvm_unreachable(
809	"N_SECT symbols should not be passed to parseNonSectionSymbol");
810	default:
811	llvm_unreachable("invalid symbol type");
812	}
813	}
814
815	template <class NList> static bool isUndef(const NList &sym) {
816	return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == `0`;
817	}
818
819	template <class LP>
820	void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
821	ArrayRef<typename LP::nlist> nList,
822	const char strtab, bool* subsectionsViaSymbols) {
823	using NList = typename LP::nlist;
824
825	// Groups indices of the symbols by the sections that contain them.
826	std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
827	symbols.resize(nList.size());
828	SmallVector<unsigned, `32`> undefineds;
829	for (uint32_t i = `0`; i < nList.size(); ++i) {
830	const NList &sym = nList[i];
831
832	// Ignore debug symbols for now.
833	// FIXME: may need special handling.
834	if (sym.n_type & N_STAB)
835	continue;
836
837	if ((sym.n_type & N_TYPE) == N_SECT) {
838	if (sym.n_sect == `0`) {
839	fatal(msg: "section symbol " + StringRef(strtab + sym.n_strx) + " in " +
840	toString(f: this) + " has an invalid section index [0]");
841	}
842	if (sym.n_sect > sections.size()) {
843	fatal(msg: "section symbol " + StringRef(strtab + sym.n_strx) + " in " +
844	toString(f: this) + " has an invalid section index [" +
845	Twine(static_cast<unsigned>(sym.n_sect)) +
846	"] greater than the total number of sections [" +
847	Twine(sections.size()) + "]");
848	}
849	Subsections &subsections = sections[sym.n_sect - `1`]->subsections;
850	// parseSections() may have chosen not to parse this section.
851	if (subsections.empty())
852	continue;
853	symbolsBySection[sym.n_sect - `1`].push_back(i);
854	} else if (isUndef(sym)) {
855	undefineds.push_back(Elt: i);
856	} else {
857	symbols [i] = parseNonSectionSymbol(sym, strtab);
858	}
859	}
860
861	for (size_t i = `0`; i < sections.size(); ++i) {
862	Subsections &subsections = sections [i]->subsections;
863	if (subsections.empty())
864	continue;
865	std::vector<uint32_t> &symbolIndices = symbolsBySection [i];
866	uint64_t sectionAddr = sectionHeaders[i].addr;
867	uint32_t sectionAlign = `1u` << sectionHeaders[i].align;
868
869	// Some sections have already been split into subsections during
870	// parseSections(), so we simply need to match Symbols to the corresponding
871	// subsection here.
872	if (sections [i]->doneSplitting) {
873	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
874	const uint32_t symIndex = symbolIndices [j];
875	const NList &sym = nList[symIndex];
876	StringRef name = strtab + sym.n_strx;
877	uint64_t symbolOffset = sym.n_value - sectionAddr;
878	InputSection *isec =
879	findContainingSubsection(section: *sections [i], offset: &symbolOffset);
880	if (symbolOffset != `0`) {
881	error(msg: toString(sec: *sections [i]) + ": symbol " + name +
882	" at misaligned offset");
883	continue;
884	}
885	symbols [symIndex] =
886	createDefined(sym, name, isec, `0`, isec->getSize(), forceHidden);
887	}
888	continue;
889	}
890	sections [i]->doneSplitting = true;
891
892	auto getSymName = [strtab](const NList& sym) -> StringRef {
893	return StringRef(strtab + sym.n_strx);
894	};
895
896	// Calculate symbol sizes and create subsections by splitting the sections
897	// along symbol boundaries.
898	// We populate subsections by repeatedly splitting the last (highest
899	// address) subsection.
900	llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
901	// Put extern weak symbols after other symbols at the same address so
902	// that weak symbol coalescing works correctly. See
903	// SymbolTable::addDefined() for details.
904	if (nList[lhs].n_value == nList[rhs].n_value &&
905	nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
906	return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
907	return nList[lhs].n_value < nList[rhs].n_value;
908	});
909	for (size_t j = `0`; j < symbolIndices.size(); ++j) {
910	const uint32_t symIndex = symbolIndices [j];
911	const NList &sym = nList[symIndex];
912	StringRef name = getSymName(sym);
913	Subsection &subsec = subsections.back();
914	InputSection *isec = subsec.isec;
915
916	uint64_t subsecAddr = sectionAddr + subsec.offset;
917	size_t symbolOffset = sym.n_value - subsecAddr;
918	uint64_t symbolSize =
919	j + `1` < symbolIndices.size()
920	? nList[symbolIndices [j + `1`]].n_value - sym.n_value
921	: isec->data.size() - symbolOffset;
922	// There are 4 cases where we do not need to create a new subsection:
923	// 1. If the input file does not use subsections-via-symbols.
924	// 2. Multiple symbols at the same address only induce one subsection.
925	// (The symbolOffset == 0 check covers both this case as well as
926	// the first loop iteration.)
927	// 3. Alternative entry points do not induce new subsections.
928	// 4. If we have a literal section (e.g. __cstring and __literal4).
929	if (!subsectionsViaSymbols \|\| symbolOffset == `0` \|\|
930	sym.n_desc & N_ALT_ENTRY \|\| !isa<ConcatInputSection>(Val: isec)) {
931	isec->hasAltEntry = symbolOffset != `0`;
932	symbols [symIndex] = createDefined(sym, name, isec, symbolOffset,
933	symbolSize, forceHidden);
934	continue;
935	}
936	auto *concatIsec = cast<ConcatInputSection>(Val: isec);
937
938	auto nextIsec = make<ConcatInputSection>(args&: concatIsec);
939	nextIsec->wasCoalesced = false;
940	if (isZeroFill(flags: isec->getFlags())) {
941	// Zero-fill sections have NULL data.data() non-zero data.size()
942	nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
943	isec->data = {nullptr, symbolOffset};
944	} else {
945	nextIsec->data = isec->data.slice(N: symbolOffset);
946	isec->data = isec->data.slice(N: `0`, M: symbolOffset);
947	}
948
949	// By construction, the symbol will be at offset zero in the new
950	// subsection.
951	symbols [symIndex] = createDefined(sym, name, nextIsec, /value=/`0`,
952	symbolSize, forceHidden);
953	// TODO: ld64 appears to preserve the original alignment as well as each
954	// subsection's offset from the last aligned address. We should consider
955	// emulating that behavior.
956	nextIsec->align = MinAlign(sectionAlign, sym.n_value);
957	subsections.push_back({sym.n_value - sectionAddr, nextIsec});
958	}
959	}
960
961	// Undefined symbols can trigger recursive fetch from Archives due to
962	// LazySymbols. Process defined symbols first so that the relative order
963	// between a defined symbol and an undefined symbol does not change the
964	// symbol resolution behavior. In addition, a set of interconnected symbols
965	// will all be resolved to the same file, instead of being resolved to
966	// different files.
967	for (unsigned i : undefineds)
968	symbols [i] = parseNonSectionSymbol(nList[i], strtab);
969	}
970
971	OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
972	StringRef sectName)
973	: InputFile (OpaqueKind, mb) {
974	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
975	ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
976	sections.push_back(x: make<Section>(/file=/args: this, args: segName.take_front(N: `16`),
977	args: sectName.take_front(N: `16`),
978	/flags=/args: `0`, /addr=/args: `0`));
979	Section &section = *sections.back();
980	ConcatInputSection *isec = make<ConcatInputSection>(args&: section, args&: data);
981	isec->live = true;
982	section.subsections.push_back(x: {.offset: `0`, .isec: isec});
983	}
984
985	template <class LP>
986	void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
987	using Header = typename LP::mach_header;
988	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
989
990	for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
991	StringRef data{reinterpret_cast<const char *>(cmd + `1`),
992	cmd->cmdsize - sizeof(linker_option_command)};
993	parseLCLinkerOption(LCLinkerOptions, this, cmd->count, data);
994	}
995	}
996
997	SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
998	ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
999	bool lazy, bool forceHidden, bool compatArch,
1000	bool builtFromBitcode)
1001	: InputFile (ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
1002	builtFromBitcode(builtFromBitcode) {
1003	this->archiveName = std::string (archiveName);
1004	this->compatArch = compatArch;
1005	if (lazy) {
1006	if (target->wordSize == `8`)
1007	parseLazy<LP64>();
1008	else
1009	parseLazy<ILP32>();
1010	} else {
1011	if (target->wordSize == `8`)
1012	parse<LP64>();
1013	else
1014	parse<ILP32>();
1015	}
1016	}
1017
1018	template <class LP> void ObjFile::parse() {
1019	using Header = typename LP::mach_header;
1020	using SegmentCommand = typename LP::segment_command;
1021	using SectionHeader = typename LP::section;
1022	using NList = typename LP::nlist;
1023
1024	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1025	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
1026
1027	// If we've already checked the arch, then don't need to check again.
1028	if (!compatArch)
1029	return;
1030	if (!(compatArch = compatWithTargetArch(this, hdr)))
1031	return;
1032
1033	// We will resolve LC linker options once all native objects are loaded after
1034	// LTO is finished.
1035	SmallVector<StringRef, `4`> LCLinkerOptions;
1036	parseLinkerOptions<LP>(LCLinkerOptions);
1037	unprocessedLCLinkerOptions.append(RHS: LCLinkerOptions);
1038
1039	ArrayRef<SectionHeader> sectionHeaders;
1040	if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
1041	auto c = reinterpret_cast<const* SegmentCommand *>(cmd);
1042	sectionHeaders = ArrayRef<SectionHeader>{
1043	reinterpret_cast<const SectionHeader *>(c + `1`), c->nsects};
1044	parseSections(sectionHeaders);
1045	}
1046
1047	// TODO: Error on missing LC_SYMTAB?
1048	if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
1049	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1050	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1051	c->nsyms);
1052	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1053	bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
1054	parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
1055	}
1056
1057	// The relocations may refer to the symbols, so we parse them after we have
1058	// parsed all the symbols.
1059	for (size_t i = `0`, n = sections.size(); i < n; ++i)
1060	if (!sections [i]->subsections.empty())
1061	parseRelocations(sectionHeaders, sectionHeaders[i], *sections [i]);
1062
1063	parseDebugInfo();
1064
1065	Section ehFrameSection = nullptr*;
1066	Section compactUnwindSection = nullptr*;
1067	for (Section *sec : sections) {
1068	Section s = StringSwitch<Section >(sec->name)
1069	.Case(S: section_names::compactUnwind, Value: &compactUnwindSection)
1070	.Case(S: section_names::ehFrame, Value: &ehFrameSection)
1071	.Default(Value: nullptr);
1072	if (s)
1073	*s = sec;
1074	}
1075	if (compactUnwindSection)
1076	registerCompactUnwind(compactUnwindSection&: *compactUnwindSection);
1077	if (ehFrameSection)
1078	registerEhFrames(ehFrameSection&: *ehFrameSection);
1079	}
1080
1081	template <class LP> void ObjFile::parseLazy() {
1082	using Header = typename LP::mach_header;
1083	using NList = typename LP::nlist;
1084
1085	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1086	auto hdr = reinterpret_cast<const* Header *>(mb.getBufferStart());
1087
1088	if (!compatArch)
1089	return;
1090	if (!(compatArch = compatWithTargetArch(this, hdr)))
1091	return;
1092
1093	const load_command *cmd = findCommand(hdr, LC_SYMTAB);
1094	if (!cmd)
1095	return;
1096	auto c = reinterpret_cast<const* symtab_command *>(cmd);
1097	ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1098	c->nsyms);
1099	const char strtab = reinterpret_cast<const* char *>(buf) + c->stroff;
1100	symbols.resize(nList.size());
1101	for (const auto &[i, sym] : llvm::enumerate(nList)) {
1102	if ((sym.n_type & N_EXT) && !isUndef(sym)) {
1103	// TODO: Bound checking
1104	StringRef name = strtab + sym.n_strx;
1105	symbols[i] = symtab ->addLazyObject(name, file&: *this);
1106	if (!lazy)
1107	break;
1108	}
1109	}
1110	}
1111
1112	void ObjFile::parseDebugInfo() {
1113	std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
1114	if (!dObj)
1115	return;
1116
1117	// We do not re-use the context from getDwarf() here as that function
1118	// constructs an expensive DWARFCache object.
1119	auto *ctx = make<DWARFContext>(
1120	args: std::move(dObj), args: "",
1121	args: [&](Error err) {
1122	warn(msg: toString(f: this) + ": " + toString(E: std::move(err)));
1123	},
1124	args: [&](Error warning) {
1125	warn(msg: toString(f: this) + ": " + toString(E: std::move(warning)));
1126	});
1127
1128	// TODO: Since object files can contain a lot of DWARF info, we should verify
1129	// that we are parsing just the info we need
1130	const DWARFContext::compile_unit_range &units = ctx->compile_units();
1131	// FIXME: There can be more than one compile unit per object file. See
1132	// PR48637.
1133	auto it = units.begin();
1134	compileUnit = it != units.end() ? it ->get() : nullptr;
1135	}
1136
1137	ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1138	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1139	const load_command *cmd = findCommand(anyHdr: buf, types: LC_DATA_IN_CODE);
1140	if (!cmd)
1141	return {};
1142	const auto c = reinterpret_cast<const* linkedit_data_command *>(cmd);
1143	return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
1144	c->datasize / sizeof(data_in_code_entry)};
1145	}
1146
1147	ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1148	const auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1149	if (auto *cmd =
1150	findCommand<linkedit_data_command>(anyHdr: buf, types: LC_LINKER_OPTIMIZATION_HINT))
1151	return {buf + cmd->dataoff, cmd->datasize};
1152	return {};
1153	}
1154
1155	// Create pointers from symbols to their associated compact unwind entries.
1156	void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
1157	for (const Subsection &subsection : compactUnwindSection.subsections) {
1158	ConcatInputSection *isec = cast<ConcatInputSection>(Val: subsection.isec);
1159	// Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1160	// their addends in its data. Thus if ICF operated naively and compared the
1161	// entire contents of each CUE, entries with identical unwind info but e.g.
1162	// belonging to different functions would never be considered equivalent. To
1163	// work around this problem, we remove some parts of the data containing the
1164	// embedded addends. In particular, we remove the function address and LSDA
1165	// pointers. Since these locations are at the start and end of the entry,
1166	// we can do this using a simple, efficient slice rather than performing a
1167	// copy. We are not losing any information here because the embedded
1168	// addends have already been parsed in the corresponding Reloc structs.
1169	//
1170	// Removing these pointers would not be safe if they were pointers to
1171	// absolute symbols. In that case, there would be no corresponding
1172	// relocation. However, (AFAIK) MC cannot emit references to absolute
1173	// symbols for either the function address or the LSDA. However, it can* do*
1174	// so for the personality pointer, so we are not slicing that field away.
1175	//
1176	// Note that we do not adjust the offsets of the corresponding relocations;
1177	// instead, we rely on `relocateCompactUnwind()` to correctly handle these
1178	// truncated input sections.
1179	isec->data = isec->data.slice(N: target->wordSize, M: `8` + target->wordSize);
1180	uint32_t encoding = read32le(P: isec->data.data() + sizeof(uint32_t));
1181	// llvm-mc omits CU entries for functions that need DWARF encoding, but
1182	// `ld -r` doesn't. We can ignore them because we will re-synthesize these
1183	// CU entries from the DWARF info during the output phase.
1184	if ((encoding & static_cast<uint32_t>(UNWIND_MODE_MASK)) ==
1185	target->modeDwarfEncoding)
1186	continue;
1187
1188	ConcatInputSection *referentIsec;
1189	for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
1190	Relocation &r = *it;
1191	// CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1192	if (r.offset != `0`) {
1193	++it;
1194	continue;
1195	}
1196	uint64_t add = r.addend;
1197	if (auto sym = cast_or_null<Defined>(Val: r.referent.dyn_cast<Symbol >())) {
1198	// Check whether the symbol defined in this file is the prevailing one.
1199	// Skip if it is e.g. a weak def that didn't prevail.
1200	if (sym->getFile() != this) {
1201	++it;
1202	continue;
1203	}
1204	add += sym->value;
1205	referentIsec = cast<ConcatInputSection>(Val: sym->isec());
1206	} else {
1207	referentIsec =
1208	cast<ConcatInputSection>(Val: r.referent.dyn_cast<InputSection *>());
1209	}
1210	// Unwind info lives in __DATA, and finalization of __TEXT will occur
1211	// before finalization of __DATA. Moreover, the finalization of unwind
1212	// info depends on the exact addresses that it references. So it is safe
1213	// for compact unwind to reference addresses in __TEXT, but not addresses
1214	// in any other segment.
1215	if (referentIsec->getSegName() != segment_names::text)
1216	error(msg: isec->getLocation(off: r.offset) + " references section " +
1217	referentIsec->getName() + " which is not in segment __TEXT");
1218	// The functionAddress relocations are typically section relocations.
1219	// However, unwind info operates on a per-symbol basis, so we search for
1220	// the function symbol here.
1221	Defined *d = tryFindSymbolAtOffset(isec: referentIsec, off: add);
1222	if (!d) {
1223	// If there's no symbol at the function address (e.g. for temporary
1224	// local labels that are not in the symtab), synthesize a local one so
1225	// we still emit correct unwind info.
1226
1227	// Avoid creating symbols for coalesced sections; those functions were
1228	// folded away.
1229	if (referentIsec->wasCoalesced) {
1230	++it;
1231	continue;
1232	}
1233
1234	d = make<Defined>(args: saver().save(S: Twine("Lcu.") + referentIsec->getName() +
1235	"." + Twine::utohexstr(Val: add)),
1236	args: this, args&: referentIsec, args&: add,
1237	/size=/args: `0`, /isWeakDef=/args: false,
1238	/isExternal=/args: false, /isPrivateExtern=/args: false,
1239	/includeInSymtab=/args: false,
1240	/isReferencedDynamically=/args: false,
1241	/noDeadStrip=/args: false);
1242	// Also add to the file-level symbol list so that scanSymbols() in
1243	// Writer picks it up and registers it with UnwindInfoSection.
1244	symbols.push_back(x: d);
1245	}
1246	d->originalUnwindEntry = isec;
1247	// Now that the symbol points to the unwind entry, we can remove the reloc
1248	// that points from the unwind entry back to the symbol.
1249	//
1250	// First, the symbol keeps the unwind entry alive (and not vice versa), so
1251	// this keeps dead-stripping simple.
1252	//
1253	// Moreover, it reduces the work that ICF needs to do to figure out if
1254	// functions with unwind info are foldable.
1255	//
1256	// However, this does make it possible for ICF to fold CUEs that point to
1257	// distinct functions (if the CUEs are otherwise identical).
1258	// UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1259	// each one can hold a distinct functionAddress value.
1260	//
1261	// Given that clang emits relocations in reverse order of address, this
1262	// relocation should be at the end of the vector for most of our input
1263	// object files, so this erase() is typically an O(1) operation.
1264	it = isec->relocs.erase(position: it);
1265	}
1266	}
1267	}
1268
1269	struct CIE {
1270	macho::Symbol personalitySymbol = nullptr*;
1271	bool fdesHaveAug = false;
1272	uint8_t lsdaPtrSize = `0`; // 0 => no LSDA
1273	uint8_t funcPtrSize = `0`;
1274	};
1275
1276	static uint8_t pointerEncodingToSize(uint8_t enc) {
1277	switch (enc & `0xf`) {
1278	case dwarf::DW_EH_PE_absptr:
1279	return target->wordSize;
1280	case dwarf::DW_EH_PE_sdata4:
1281	return `4`;
1282	case dwarf::DW_EH_PE_sdata8:
1283	// ld64 doesn't actually support sdata8, but this seems simple enough...
1284	return `8`;
1285	default:
1286	return `0`;
1287	};
1288	}
1289
1290	static CIE parseCIE(const InputSection isec, const* EhReader &reader,
1291	size_t off) {
1292	// Handling the full generality of possible DWARF encodings would be a major
1293	// pain. We instead take advantage of our knowledge of how llvm-mc encodes
1294	// DWARF and handle just that.
1295	constexpr uint8_t expectedPersonalityEnc =
1296	dwarf::DW_EH_PE_pcrel \| dwarf::DW_EH_PE_indirect \| dwarf::DW_EH_PE_sdata4;
1297
1298	CIE cie;
1299	uint8_t version = reader.readByte(off: &off);
1300	if (version != `1` && version != `3`)
1301	fatal(msg: "Expected CIE version of 1 or 3, got " + Twine (version));
1302	StringRef aug = reader.readString(off: &off);
1303	reader.skipLeb128(off: &off); // skip code alignment
1304	reader.skipLeb128(off: &off); // skip data alignment
1305	reader.skipLeb128(off: &off); // skip return address register
1306	reader.skipLeb128(off: &off); // skip aug data length
1307	uint64_t personalityAddrOff = `0`;
1308	for (char c : aug) {
1309	switch (c) {
1310	case `'z'`:
1311	cie.fdesHaveAug = true;
1312	break;
1313	case `'P'`: {
1314	uint8_t personalityEnc = reader.readByte(off: &off);
1315	if (personalityEnc != expectedPersonalityEnc)
1316	reader.failOn(errOff: off, msg: "unexpected personality encoding 0x" +
1317	Twine::utohexstr(Val: personalityEnc));
1318	personalityAddrOff = off;
1319	off += `4`;
1320	break;
1321	}
1322	case `'L'`: {
1323	uint8_t lsdaEnc = reader.readByte(off: &off);
1324	cie.lsdaPtrSize = pointerEncodingToSize(enc: lsdaEnc);
1325	if (cie.lsdaPtrSize == `0`)
1326	reader.failOn(errOff: off, msg: "unexpected LSDA encoding 0x" +
1327	Twine::utohexstr(Val: lsdaEnc));
1328	break;
1329	}
1330	case `'R'`: {
1331	uint8_t pointerEnc = reader.readByte(off: &off);
1332	cie.funcPtrSize = pointerEncodingToSize(enc: pointerEnc);
1333	if (cie.funcPtrSize == `0` \|\| !(pointerEnc & dwarf::DW_EH_PE_pcrel))
1334	reader.failOn(errOff: off, msg: "unexpected pointer encoding 0x" +
1335	Twine::utohexstr(Val: pointerEnc));
1336	break;
1337	}
1338	default:
1339	break;
1340	}
1341	}
1342	if (personalityAddrOff != `0`) {
1343	const auto *personalityReloc = isec->getRelocAt(off: personalityAddrOff);
1344	if (!personalityReloc)
1345	reader.failOn(errOff: off, msg: "Failed to locate relocation for personality symbol");
1346	cie.personalitySymbol = cast<macho::Symbol *>(Val: personalityReloc->referent);
1347	}
1348	return cie;
1349	}
1350
1351	// EH frame target addresses may be encoded as pcrel offsets. However, instead
1352	// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
1353	// This function recovers the target address from the subtractors, essentially
1354	// performing the inverse operation of EhRelocator.
1355	//
1356	// Concretely, we expect our relocations to write the value of `PC -
1357	// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
1358	// points to a symbol plus an addend.
1359	//
1360	// It is important that the minuend relocation point to a symbol within the
1361	// same section as the fixup value, since sections may get moved around.
1362	//
1363	// For example, for arm64, llvm-mc emits relocations for the target function
1364	// address like so:
1365	//
1366	// ltmp:
1367	// <CIE start>
1368	// ...
1369	// <CIE end>
1370	// ... multiple FDEs ...
1371	// <FDE start>
1372	// <target function address - (ltmp + pcrel offset)>
1373	// ...
1374	//
1375	// If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
1376	// will move to an earlier address, and `ltmp + pcrel offset` will no longer
1377	// reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
1378	// our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
1379	// the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
1380	//
1381	// If `Invert` is set, then we instead expect `target_addr - PC` to be written
1382	// to `PC`.
1383	template <bool Invert = false>
1384	Defined *
1385	targetSymFromCanonicalSubtractor(const InputSection *isec,
1386	std::vector<Relocation>::iterator relocIt) {
1387	Relocation &subtrahend = *relocIt;
1388	Relocation &minuend = *std::next(x: relocIt);
1389	assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
1390	assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
1391	// Note: pcSym may not* be exactly at the PC; there's usually a non-zero*
1392	// addend.
1393	auto pcSym = cast<Defined>(Val: cast<macho::Symbol >(Val&: subtrahend.referent));
1394	Defined *target =
1395	cast_or_null<Defined>(Val: minuend.referent.dyn_cast<macho::Symbol *>());
1396	if (!pcSym) {
1397	auto *targetIsec =
1398	cast<ConcatInputSection>(Val: cast<InputSection *>(Val&: minuend.referent));
1399	target = findSymbolAtOffset(isec: targetIsec, off: minuend.addend);
1400	}
1401	if (Invert)
1402	std::swap(a&: pcSym, b&: target);
1403	if (pcSym->isec() == isec) {
1404	if (pcSym->value - (Invert ? -`1` : `1`) * minuend.addend != subtrahend.offset)
1405	fatal(msg: "invalid FDE relocation in __eh_frame");
1406	} else {
1407	// Ensure the pcReloc points to a symbol within the current EH frame.
1408	// HACK: we should really verify that the original relocation's semantics
1409	// are preserved. In particular, we should have
1410	// `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
1411	// have an easy way to access the offsets from this point in the code; some
1412	// refactoring is needed for that.
1413	Relocation &pcReloc = Invert ? minuend : subtrahend;
1414	pcReloc.referent = isec->symbols [`0`];
1415	assert(isec->symbols[`0`]->value == `0`);
1416	minuend.addend = pcReloc.offset * (Invert ? `1LL` : -`1LL`);
1417	}
1418	return target;
1419	}
1420
1421	Defined findSymbolAtAddress(const* std::vector<Section *> &sections,
1422	uint64_t addr) {
1423	Section *sec = findContainingSection(sections, offset: &addr);
1424	auto isec = cast<ConcatInputSection>(Val: findContainingSubsection(section: sec, offset: &addr));
1425	return findSymbolAtOffset(isec, off: addr);
1426	}
1427
1428	// For symbols that don't have compact unwind info, associate them with the more
1429	// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
1430	//
1431	// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
1432	// description of its format.
1433	//
1434	// While parsing, we also look for what MC calls "abs-ified" relocations -- they
1435	// are relocations which are implicitly encoded as offsets in the section data.
1436	// We convert them into explicit Reloc structs so that the EH frames can be
1437	// handled just like a regular ConcatInputSection later in our output phase.
1438	//
1439	// We also need to handle the case where our input object file has explicit
1440	// relocations. This is the case when e.g. it's the output of `ld -r`. We only
1441	// look for the "abs-ified" relocation if an explicit relocation is absent.
1442	void ObjFile::registerEhFrames(Section &ehFrameSection) {
1443	DenseMap<const InputSection *, CIE> cieMap;
1444	for (const Subsection &subsec : ehFrameSection.subsections) {
1445	auto *isec = cast<ConcatInputSection>(Val: subsec.isec);
1446	uint64_t isecOff = subsec.offset;
1447
1448	// Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
1449	// that all EH frames have an associated symbol so that we can generate
1450	// subtractor relocs that reference them.
1451	if (isec->symbols.size() == `0`)
1452	make<Defined>(args: "EH_Frame", args: isec->getFile(), args&: isec, /value=/args: `0`,
1453	args: isec->getSize(), /isWeakDef=/args: false, /isExternal=/args: false,
1454	/isPrivateExtern=/args: false, /includeInSymtab=/args: false,
1455	/isReferencedDynamically=/args: false,
1456	/noDeadStrip=/args: false);
1457	else if (isec->symbols [`0`]->value != `0`)
1458	fatal(msg: "found symbol at unexpected offset in __eh_frame");
1459
1460	EhReader reader(this, isec->data, subsec.offset);
1461	size_t dataOff = `0`; // Offset from the start of the EH frame.
1462	reader.skipValidLength(off: &dataOff); // readLength() already validated this.
1463	// cieOffOff is the offset from the start of the EH frame to the cieOff
1464	// value, which is itself an offset from the current PC to a CIE.
1465	const size_t cieOffOff = dataOff;
1466
1467	EhRelocator ehRelocator(isec);
1468	auto cieOffRelocIt = llvm::find_if(Range&: isec->relocs, P: [=](const Relocation &r) {
1469	return r.offset == cieOffOff;
1470	});
1471	InputSection cieIsec = nullptr*;
1472	if (cieOffRelocIt != isec->relocs.end()) {
1473	// We already have an explicit relocation for the CIE offset.
1474	cieIsec =
1475	targetSymFromCanonicalSubtractor</Invert=/true>(isec, relocIt: cieOffRelocIt)
1476	->isec();
1477	dataOff += sizeof(uint32_t);
1478	} else {
1479	// If we haven't found a relocation, then the CIE offset is most likely
1480	// embedded in the section data (AKA an "abs-ified" reloc.). Parse that
1481	// and generate a Reloc struct.
1482	uint32_t cieMinuend = reader.readU32(off: &dataOff);
1483	if (cieMinuend == `0`) {
1484	cieIsec = isec;
1485	} else {
1486	uint32_t cieOff = isecOff + dataOff - cieMinuend;
1487	cieIsec = findContainingSubsection(section: ehFrameSection, offset: &cieOff);
1488	if (cieIsec == nullptr)
1489	fatal(msg: "failed to find CIE");
1490	}
1491	if (cieIsec != isec)
1492	ehRelocator.makeNegativePcRel(off: cieOffOff, target: cieIsec->symbols [`0`],
1493	/length=/`2`);
1494	}
1495	if (cieIsec == isec) {
1496	cieMap [cieIsec] = parseCIE(isec, reader, off: dataOff);
1497	continue;
1498	}
1499
1500	assert(cieMap.contains(cieIsec));
1501	const CIE &cie = cieMap [cieIsec];
1502	// Offset of the function address within the EH frame.
1503	const size_t funcAddrOff = dataOff;
1504	uint64_t funcAddr = reader.readPointer(off: &dataOff, size: cie.funcPtrSize) +
1505	ehFrameSection.addr + isecOff + funcAddrOff;
1506	uint32_t funcLength = reader.readPointer(off: &dataOff, size: cie.funcPtrSize);
1507	size_t lsdaAddrOff = `0`; // Offset of the LSDA address within the EH frame.
1508	std::optional<uint64_t> lsdaAddrOpt;
1509	if (cie.fdesHaveAug) {
1510	reader.skipLeb128(off: &dataOff);
1511	lsdaAddrOff = dataOff;
1512	if (cie.lsdaPtrSize != `0`) {
1513	uint64_t lsdaOff = reader.readPointer(off: &dataOff, size: cie.lsdaPtrSize);
1514	if (lsdaOff != `0`) // FIXME possible to test this?
1515	lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
1516	}
1517	}
1518
1519	auto funcAddrRelocIt = isec->relocs.end();
1520	auto lsdaAddrRelocIt = isec->relocs.end();
1521	for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
1522	if (it ->offset == funcAddrOff)
1523	funcAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1524	else if (lsdaAddrOpt && it ->offset == lsdaAddrOff)
1525	lsdaAddrRelocIt = it ++; // Found subtrahend; skip over minuend reloc
1526	}
1527
1528	Defined *funcSym;
1529	if (funcAddrRelocIt != isec->relocs.end()) {
1530	funcSym = targetSymFromCanonicalSubtractor(isec, relocIt: funcAddrRelocIt);
1531	// Canonicalize the symbol. If there are multiple symbols at the same
1532	// address, we want both `registerEhFrame` and `registerCompactUnwind`
1533	// to register the unwind entry under same symbol.
1534	// This is not particularly efficient, but we should run into this case
1535	// infrequently (only when handling the output of `ld -r`).
1536	if (funcSym->isec())
1537	funcSym = findSymbolAtOffset(isec: cast<ConcatInputSection>(Val: funcSym->isec()),
1538	off: funcSym->value);
1539	} else {
1540	funcSym = findSymbolAtAddress(sections, addr: funcAddr);
1541	ehRelocator.makePcRel(off: funcAddrOff, target: funcSym, length: target->p2WordSize);
1542	}
1543	// The symbol has been coalesced, or already has a compact unwind entry.
1544	if (!funcSym \|\| funcSym->getFile() != this \|\| funcSym->unwindEntry()) {
1545	// We must prune unused FDEs for correctness, so we cannot rely on
1546	// -dead_strip being enabled.
1547	isec->live = false;
1548	continue;
1549	}
1550
1551	InputSection lsdaIsec = nullptr*;
1552	if (lsdaAddrRelocIt != isec->relocs.end()) {
1553	lsdaIsec =
1554	targetSymFromCanonicalSubtractor(isec, relocIt: lsdaAddrRelocIt)->isec();
1555	} else if (lsdaAddrOpt) {
1556	uint64_t lsdaAddr = *lsdaAddrOpt;
1557	Section *sec = findContainingSection(sections, offset: &lsdaAddr);
1558	lsdaIsec =
1559	cast<ConcatInputSection>(Val: findContainingSubsection(section: *sec, offset: &lsdaAddr));
1560	ehRelocator.makePcRel(off: lsdaAddrOff, target: lsdaIsec, length: target->p2WordSize);
1561	}
1562
1563	fdes [isec] = {.funcLength: funcLength, .personality: cie.personalitySymbol, .lsda: lsdaIsec};
1564	funcSym->originalUnwindEntry = isec;
1565	ehRelocator.commit();
1566	}
1567
1568	// __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
1569	// are normally required to be kept alive if they reference a live symbol.
1570	// However, we've explicitly created a dependency from a symbol to its FDE, so
1571	// dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
1572	// serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
1573	// live symbol (e.g. if there were multiple weak copies). Remove this flag to
1574	// let dead-stripping proceed correctly.
1575	ehFrameSection.flags &= ~S_ATTR_LIVE_SUPPORT;
1576	}
1577
1578	std::string ObjFile::sourceFile() const {
1579	const char *unitName = compileUnit->getUnitDIE().getShortName();
1580	// DWARF allows DW_AT_name to be absolute, in which case nothing should be
1581	// prepended. As for the styles, debug info can contain paths from any OS, not
1582	// necessarily an OS we're currently running on. Moreover different
1583	// compilation units can be compiled on different operating systems and linked
1584	// together later.
1585	if (sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::posix) \|\|
1586	sys::path::is_absolute(path: unitName, style: llvm::sys::path::Style::windows))
1587	return unitName;
1588	SmallString<`261`> dir(compileUnit->getCompilationDir());
1589	StringRef sep = sys::path::get_separator();
1590	// We don't use `path::append` here because we want an empty `dir` to result
1591	// in an absolute path. `append` would give us a relative path for that case.
1592	if (!dir.ends_with(Suffix: sep))
1593	dir += sep;
1594	return (dir + unitName).str();
1595	}
1596
1597	lld::DWARFCache *ObjFile::getDwarf() {
1598	llvm::call_once(flag&: initDwarf, F: [this]() {
1599	auto dwObj = DwarfObject::create(this);
1600	if (!dwObj)
1601	return;
1602	dwarfCache = std::make_unique<DWARFCache>(args: std::make_unique<DWARFContext>(
1603	args: std::move(dwObj), args: "",
1604	args: [&](Error err) { warn(msg: getName() + ": " + toString(E: std::move(err))); },
1605	args: [&](Error warning) {
1606	warn(msg: getName() + ": " + toString(E: std::move(warning)));
1607	}));
1608	});
1609
1610	return dwarfCache.get();
1611	}
1612	// The path can point to either a dylib or a .tbd file.
1613	static DylibFile loadDylib(StringRef path, DylibFile umbrella) {
1614	std::optional<MemoryBufferRef> mbref = readFile(path);
1615	if (!mbref) {
1616	error(msg: "could not read dylib file at " + path);
1617	return nullptr;
1618	}
1619	return loadDylib(mbref: *mbref, umbrella);
1620	}
1621
1622	// TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1623	// the first document storing child pointers to the rest of them. When we are
1624	// processing a given TBD file, we store that top-level document in
1625	// currentTopLevelTapi. When processing re-exports, we search its children for
1626	// potentially matching documents in the same TBD file. Note that the children
1627	// themselves don't point to further documents, i.e. this is a two-level tree.
1628	//
1629	// Re-exports can either refer to on-disk files, or to documents within .tbd
1630	// files.
1631	static DylibFile findDylib(StringRef path, DylibFile umbrella,
1632	const InterfaceFile *currentTopLevelTapi) {
1633	// Search order:
1634	// 1. Install name basename in -F / -L directories.
1635	{
1636	// Framework names can be in multiple formats:
1637	// - Foo.framework/Foo
1638	// - Foo.framework/Versions/A/Foo
1639	StringRef stem = path::stem(path);
1640	SmallString<`128`> frameworkName("/");
1641	frameworkName += stem;
1642	frameworkName += ".framework/";
1643	size_t i = path.rfind(Str: frameworkName);
1644	if (i != StringRef::npos) {
1645	StringRef frameworkPath = path.substr(Start: i + `1`);
1646	for (StringRef dir : config ->frameworkSearchPaths) {
1647	SmallString<`128`> candidate = dir;
1648	path::append(path&: candidate, a: frameworkPath);
1649	if (std::optional<StringRef> dylibPath =
1650	resolveDylibPath(path: candidate.str()))
1651	return loadDylib(path: *dylibPath, umbrella);
1652	}
1653	} else if (std::optional<StringRef> dylibPath = findPathCombination(
1654	name: stem, roots: config ->librarySearchPaths, extensions: {".tbd", ".dylib", ".so"}))
1655	return loadDylib(path: *dylibPath, umbrella);
1656	}
1657
1658	// 2. As absolute path.
1659	if (path::is_absolute(path, style: path::Style::posix))
1660	for (StringRef root : config ->systemLibraryRoots)
1661	if (std::optional<StringRef> dylibPath =
1662	resolveDylibPath(path: (root + path).str()))
1663	return loadDylib(path: *dylibPath, umbrella);
1664
1665	// 3. As relative path.
1666
1667	// TODO: Handle -dylib_file
1668
1669	// Replace @executable_path, @loader_path, @rpath prefixes in install name.
1670	SmallString<`128`> newPath;
1671	if (config ->outputType == MH_EXECUTE &&
1672	path.consume_front(Prefix: "@executable_path/")) {
1673	// ld64 allows overriding this with the undocumented flag -executable_path.
1674	// lld doesn't currently implement that flag.
1675	// FIXME: Consider using finalOutput instead of outputFile.
1676	path::append(path&: newPath, a: path::parent_path(path: config ->outputFile), b: path);
1677	path = newPath;
1678	} else if (path.consume_front(Prefix: "@loader_path/")) {
1679	fs::real_path(path: umbrella->getName(), output&: newPath);
1680	path::remove_filename(path&: newPath);
1681	path::append(path&: newPath, a: path);
1682	path = newPath;
1683	} else if (path.starts_with(Prefix: "@rpath/")) {
1684	for (StringRef rpath : umbrella->rpaths) {
1685	newPath.clear();
1686	if (rpath.consume_front(Prefix: "@loader_path/")) {
1687	fs::real_path(path: umbrella->getName(), output&: newPath);
1688	path::remove_filename(path&: newPath);
1689	}
1690	path::append(path&: newPath, a: rpath, b: path.drop_front(N: strlen(s: "@rpath/")));
1691	if (std::optional<StringRef> dylibPath = resolveDylibPath(path: newPath.str()))
1692	return loadDylib(path: *dylibPath, umbrella);
1693	}
1694	// If not found in umbrella, try the rpaths specified via -rpath too.
1695	for (StringRef rpath : config ->runtimePaths) {
1696	newPath.clear();
1697	if (rpath.consume_front(Prefix: "@loader_path/")) {
1698	fs::real_path(path: umbrella->getName(), output&: newPath);
1699	path::remove_filename(path&: newPath);
1700	}
1701	path::append(path&: newPath, a: rpath, b: path.drop_front(N: strlen(s: "@rpath/")));
1702	if (std::optional<StringRef> dylibPath = resolveDylibPath(path: newPath.str()))
1703	return loadDylib(path: *dylibPath, umbrella);
1704	}
1705	}
1706
1707	// FIXME: Should this be further up?
1708	if (currentTopLevelTapi) {
1709	for (InterfaceFile &child :
1710	make_pointee_range(Range: currentTopLevelTapi->documents())) {
1711	assert(child.documents().empty());
1712	if (path == child.getInstallName()) {
1713	auto file = make<DylibFile>(args&: child, args&: umbrella, /isBundleLoader=/args: false*,
1714	/explicitlyLinked=/args: false);
1715	file->parseReexports(interface: child);
1716	return file;
1717	}
1718	}
1719	}
1720
1721	if (std::optional<StringRef> dylibPath = resolveDylibPath(path))
1722	return loadDylib(path: *dylibPath, umbrella);
1723
1724	return nullptr;
1725	}
1726
1727	// If a re-exported dylib is public (lives in /usr/lib or
1728	// /System/Library/Frameworks), then it is considered implicitly linked: we
1729	// should bind to its symbols directly instead of via the re-exporting umbrella
1730	// library.
1731	static bool isImplicitlyLinked(StringRef path) {
1732	if (!config ->implicitDylibs)
1733	return false;
1734
1735	if (path::parent_path(path) == "/usr/lib")
1736	return true;
1737
1738	// Match /System/Library/Frameworks/$FOO.framework//$FOO
1739	if (path.consume_front(Prefix: "/System/Library/Frameworks/")) {
1740	StringRef frameworkName = path.take_until(F: [](char c) { return c == `'.'`; });
1741	return path::filename(path) == frameworkName;
1742	}
1743
1744	return false;
1745	}
1746
1747	void DylibFile::loadReexport(StringRef path, DylibFile *umbrella,
1748	const InterfaceFile *currentTopLevelTapi) {
1749	DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
1750	if (!reexport) {
1751	// If not found in umbrella, retry since some rpaths might have been
1752	// defined in "this" dylib (which contains the LC_REEXPORT_DYLIB cmd) and
1753	// not in the umbrella.
1754	DylibFile reexport2 = findDylib(path, umbrella: this*, currentTopLevelTapi);
1755	if (!reexport2) {
1756	error(msg: toString(f: this) + ": unable to locate re-export with install name " +
1757	path);
1758	}
1759	}
1760	}
1761
1762	DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
1763	bool isBundleLoader, bool explicitlyLinked)
1764	: InputFile (DylibKind, mb), refState(RefState::Unreferenced),
1765	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1766	assert(!isBundleLoader \|\| !umbrella);
1767	if (umbrella == nullptr)
1768	umbrella = this;
1769	this->umbrella = umbrella;
1770
1771	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1772
1773	// Initialize installName.
1774	if (const load_command *cmd = findCommand(anyHdr: hdr, types: LC_ID_DYLIB)) {
1775	auto c = reinterpret_cast<const* dylib_command *>(cmd);
1776	currentVersion = read32le(P: &c->dylib.current_version);
1777	compatibilityVersion = read32le(P: &c->dylib.compatibility_version);
1778	installName =
1779	reinterpret_cast<const char *>(cmd) + read32le(P: &c->dylib.name);
1780	} else if (!isBundleLoader) {
1781	// macho_executable and macho_bundle don't have LC_ID_DYLIB,
1782	// so it's OK.
1783	error(msg: toString(f: this) + ": dylib missing LC_ID_DYLIB load command");
1784	return;
1785	}
1786
1787	if (config ->printEachFile)
1788	message(msg: toString(f: this));
1789	inputFiles.insert(X: this);
1790
1791	deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1792
1793	if (!checkCompatibility(input: this))
1794	return;
1795
1796	checkAppExtensionSafety(dylibIsAppExtensionSafe: hdr->flags & MH_APP_EXTENSION_SAFE);
1797
1798	for (auto *cmd : findCommands<rpath_command>(anyHdr: hdr, types: LC_RPATH)) {
1799	StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1800	rpaths.push_back(Elt: rpath);
1801	}
1802
1803	// Initialize symbols.
1804	bool canBeImplicitlyLinked = findCommand(anyHdr: hdr, types: LC_SUB_CLIENT) == nullptr;
1805	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1806	? this
1807	: this->umbrella;
1808
1809	if (!canBeImplicitlyLinked) {
1810	for (auto *cmd : findCommands<sub_client_command>(anyHdr: hdr, types: LC_SUB_CLIENT)) {
1811	StringRef allowableClient{reinterpret_cast<const char *>(cmd) +
1812	cmd->client};
1813	allowableClients.push_back(Elt: allowableClient);
1814	}
1815	}
1816
1817	const auto *dyldInfo = findCommand<dyld_info_command>(anyHdr: hdr, types: LC_DYLD_INFO_ONLY);
1818	const auto *exportsTrie =
1819	findCommand<linkedit_data_command>(anyHdr: hdr, types: LC_DYLD_EXPORTS_TRIE);
1820	if (dyldInfo && exportsTrie) {
1821	// It's unclear what should happen in this case. Maybe we should only error
1822	// out if the two load commands refer to different data?
1823	error(msg: toString(f: this) +
1824	": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1825	return;
1826	}
1827
1828	if (dyldInfo) {
1829	parseExportedSymbols(offset: dyldInfo->export_off, size: dyldInfo->export_size);
1830	} else if (exportsTrie) {
1831	parseExportedSymbols(offset: exportsTrie->dataoff, size: exportsTrie->datasize);
1832	} else {
1833	error(msg: "No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1834	toString(f: this));
1835	}
1836	}
1837
1838	void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
1839	struct TrieEntry {
1840	StringRef name;
1841	uint64_t flags;
1842	};
1843
1844	auto buf = reinterpret_cast<const* uint8_t *>(mb.getBufferStart());
1845	std::vector<TrieEntry> entries;
1846	// Find all the $ld$ symbols to process first.*
1847	parseTrie(fileName: toString(f: this), buf: buf + offset, size,
1848	[&](const Twine &name, uint64_t flags) {
1849	StringRef savedName = saver().save(S: name);
1850	if (handleLDSymbol(originalName: savedName))
1851	return;
1852	entries.push_back(x: {.name: savedName, .flags: flags});
1853	});
1854
1855	// Process the "normal" symbols.
1856	for (TrieEntry &entry : entries) {
1857	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (entry.name)))
1858	continue;
1859
1860	bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1861	bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
1862
1863	symbols.push_back(
1864	x: symtab ->addDylib(name: entry.name, file: exportingFile, isWeakDef, isTlv));
1865	}
1866	}
1867
1868	void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1869	auto hdr = reinterpret_cast<const* mach_header *>(mb.getBufferStart());
1870	const uint8_t p = reinterpret_cast<const* uint8_t *>(mb.getBufferStart()) +
1871	target->headerSize;
1872	for (uint32_t i = `0`, n = hdr->ncmds; i < n; ++i) {
1873	auto cmd = reinterpret_cast<const* load_command *>(p);
1874	p += cmd->cmdsize;
1875
1876	if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1877	cmd->cmd == LC_REEXPORT_DYLIB) {
1878	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1879	StringRef reexportPath =
1880	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1881	loadReexport(path: reexportPath, umbrella: exportingFile, currentTopLevelTapi: nullptr);
1882	}
1883
1884	// FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1885	// LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1886	// MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1887	if (config ->namespaceKind == NamespaceKind::flat &&
1888	cmd->cmd == LC_LOAD_DYLIB) {
1889	const auto c = reinterpret_cast<const* dylib_command *>(cmd);
1890	StringRef dylibPath =
1891	reinterpret_cast<const char *>(c) + read32le(P: &c->dylib.name);
1892	DylibFile dylib = findDylib(path: dylibPath, umbrella, currentTopLevelTapi: nullptr*);
1893	if (!dylib)
1894	error(msg: Twine("unable to locate library '") + dylibPath +
1895	"' loaded from '" + toString(f: this) + "' for -flat_namespace");
1896	}
1897	}
1898	}
1899
1900	// Some versions of Xcode ship with .tbd files that don't have the right
1901	// platform settings.
1902	constexpr std::array<StringRef, `3`> skipPlatformChecks{
1903	"/usr/lib/system/libsystem_kernel.dylib",
1904	"/usr/lib/system/libsystem_platform.dylib",
1905	"/usr/lib/system/libsystem_pthread.dylib"};
1906
1907	static bool isArchABICompatible(ArchitectureSet archSet,
1908	Architecture targetArch) {
1909	uint32_t cpuType;
1910	uint32_t targetCpuType;
1911	std::tie(args&: targetCpuType, args: std::ignore) = getCPUTypeFromArchitecture(Arch: targetArch);
1912
1913	return llvm::any_of(Range&: archSet, P: [&](const auto &p) {
1914	std::tie(args&: cpuType, args: std::ignore) = getCPUTypeFromArchitecture(p);
1915	return cpuType == targetCpuType;
1916	});
1917	}
1918
1919	static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
1920	bool explicitlyLinked) {
1921	// Catalyst outputs can link against implicitly linked macOS-only libraries.
1922	if (config ->platform() != PLATFORM_MACCATALYST \|\| explicitlyLinked)
1923	return false;
1924	ArchitectureSet macOSArchs;
1925	for (const auto &target : interface.targets())
1926	if (target.Platform == PLATFORM_MACOS)
1927	macOSArchs.set(target.Arch);
1928	return isArchABICompatible(archSet: macOSArchs, targetArch: config ->arch());
1929	}
1930
1931	static bool isTargetPlatformArchCompatible(
1932	InterfaceFile::const_target_range interfaceTargets, Target target) {
1933	if (is_contained(Range&: interfaceTargets, Element: target))
1934	return true;
1935
1936	if (config ->forceExactCpuSubtypeMatch)
1937	return false;
1938
1939	ArchitectureSet archSet;
1940	for (const auto &p : interfaceTargets)
1941	if (p.Platform == target.Platform)
1942	archSet.set(p.Arch);
1943	if (archSet.empty())
1944	return false;
1945
1946	return isArchABICompatible(archSet, targetArch: target.Arch);
1947	}
1948
1949	DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
1950	bool isBundleLoader, bool explicitlyLinked)
1951	: InputFile (DylibKind, interface), refState(RefState::Unreferenced),
1952	explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1953	// FIXME: Add test for the missing TBD code path.
1954
1955	if (umbrella == nullptr)
1956	umbrella = this;
1957	this->umbrella = umbrella;
1958
1959	installName = saver().save(S: interface.getInstallName());
1960	compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1961	currentVersion = interface.getCurrentVersion().rawValue();
1962	for (const auto &rpath : interface.rpaths())
1963	if (rpath.first == config ->platformInfo.target)
1964	rpaths.push_back(Elt: saver().save(S: rpath.second));
1965
1966	if (config ->printEachFile)
1967	message(msg: toString(f: this));
1968	inputFiles.insert(X: this);
1969
1970	if (!is_contained(Range: skipPlatformChecks, Element: installName) &&
1971	!isTargetPlatformArchCompatible(interfaceTargets: interface.targets(),
1972	target: config ->platformInfo.target) &&
1973	!skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
1974	error(msg: toString(f: this) + " is incompatible with " +
1975	std::string(config ->platformInfo.target));
1976	return;
1977	}
1978
1979	checkAppExtensionSafety(dylibIsAppExtensionSafe: interface.isApplicationExtensionSafe());
1980
1981	bool canBeImplicitlyLinked = interface.allowableClients().size() == `0`;
1982	exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(path: installName))
1983	? this
1984	: umbrella;
1985
1986	if (!canBeImplicitlyLinked)
1987	for (const auto &allowableClient : interface.allowableClients())
1988	allowableClients.push_back(
1989	Elt: *make<std::string>(args: allowableClient.getInstallName().data()));
1990
1991	auto addSymbol = [&](const llvm::MachO::Symbol &symbol,
1992	const Twine &name) -> void {
1993	StringRef savedName = saver().save(S: name);
1994	if (exportingFile->hiddenSymbols.contains(V: CachedHashStringRef (savedName)))
1995	return;
1996
1997	symbols.push_back(x: symtab ->addDylib(name: savedName, file: exportingFile,
1998	isWeakDef: symbol.isWeakDefined(),
1999	isTlv: symbol.isThreadLocalValue()));
2000	};
2001
2002	std::vector<const llvm::MachO::Symbol *> normalSymbols;
2003	normalSymbols.reserve(n: interface.symbolsCount());
2004	for (const auto *symbol : interface.symbols()) {
2005	if (!isArchABICompatible(archSet: symbol->getArchitectures(), targetArch: config ->arch()))
2006	continue;
2007	if (handleLDSymbol(originalName: symbol->getName()))
2008	continue;
2009
2010	switch (symbol->getKind()) {
2011	case EncodeKind::GlobalSymbol:
2012	case EncodeKind::ObjectiveCClass:
2013	case EncodeKind::ObjectiveCClassEHType:
2014	case EncodeKind::ObjectiveCInstanceVariable:
2015	normalSymbols.push_back(x: symbol);
2016	}
2017	}
2018	// interface.symbols() order is non-deterministic.
2019	llvm::sort(C&: normalSymbols,
2020	Comp: [](auto l, auto* r) { return* l->getName() < r->getName(); });
2021
2022	// TODO(compnerd) filter out symbols based on the target platform
2023	for (const auto *symbol : normalSymbols) {
2024	switch (symbol->getKind()) {
2025	case EncodeKind::GlobalSymbol:
2026	addSymbol (*symbol, symbol->getName());
2027	break;
2028	case EncodeKind::ObjectiveCClass:
2029	// XXX ld64 only creates these symbols when -ObjC is passed in. We may
2030	// want to emulate that.
2031	addSymbol (*symbol, objc::symbol_names::klass + symbol->getName());
2032	addSymbol (*symbol, objc::symbol_names::metaclass + symbol->getName());
2033	break;
2034	case EncodeKind::ObjectiveCClassEHType:
2035	addSymbol (*symbol, objc::symbol_names::ehtype + symbol->getName());
2036	break;
2037	case EncodeKind::ObjectiveCInstanceVariable:
2038	addSymbol (*symbol, objc::symbol_names::ivar + symbol->getName());
2039	break;
2040	}
2041	}
2042	}
2043
2044	DylibFile::DylibFile(DylibFile *umbrella)
2045	: InputFile (DylibKind, MemoryBufferRef{}), refState(RefState::Unreferenced),
2046	explicitlyLinked(false), isBundleLoader(false) {
2047	if (umbrella == nullptr)
2048	umbrella = this;
2049	this->umbrella = umbrella;
2050	}
2051
2052	void DylibFile::parseReexports(const InterfaceFile &interface) {
2053	const InterfaceFile *topLevel =
2054	interface.getParent() == nullptr ? &interface : interface.getParent();
2055	for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
2056	InterfaceFile::const_target_range targets = intfRef.targets();
2057	if (is_contained(Range: skipPlatformChecks, Element: intfRef.getInstallName()) \|\|
2058	isTargetPlatformArchCompatible(interfaceTargets: targets, target: config ->platformInfo.target))
2059	loadReexport(path: intfRef.getInstallName(), umbrella: exportingFile, currentTopLevelTapi: topLevel);
2060	}
2061	}
2062
2063	bool DylibFile::isExplicitlyLinked() const {
2064	if (!explicitlyLinked)
2065	return false;
2066
2067	// If this dylib was explicitly linked, but at least one of the symbols
2068	// of the synthetic dylibs it created via $ld$previous symbols is
2069	// referenced, then that synthetic dylib fulfils the explicit linkedness
2070	// and we can deadstrip this dylib if it's unreferenced.
2071	for (const auto *dylib : extraDylibs)
2072	if (dylib->isReferenced())
2073	return false;
2074
2075	return true;
2076	}
2077
2078	DylibFile *DylibFile::getSyntheticDylib(StringRef installName,
2079	uint32_t currentVersion,
2080	uint32_t compatVersion) {
2081	for (DylibFile *dylib : extraDylibs)
2082	if (dylib->installName == installName) {
2083	// FIXME: Check what to do if different $ld$previous symbols
2084	// request the same dylib, but with different versions.
2085	return dylib;
2086	}
2087
2088	auto dylib = make<DylibFile>(args: umbrella == this* ? nullptr : umbrella);
2089	dylib->installName = saver().save(S: installName);
2090	dylib->currentVersion = currentVersion;
2091	dylib->compatibilityVersion = compatVersion;
2092	extraDylibs.push_back(Elt: dylib);
2093	return dylib;
2094	}
2095
2096	// $ld$ symbols modify the properties/behavior of the library (e.g. its install
2097	// name, compatibility version or hide/add symbols) for specific target
2098	// versions.
2099	bool DylibFile::handleLDSymbol(StringRef originalName) {
2100	if (!originalName.starts_with(Prefix: "$ld$"))
2101	return false;
2102
2103	StringRef action;
2104	StringRef name;
2105	std::tie(args&: action, args&: name) = originalName.drop_front(N: strlen(s: "$ld$")).split(Separator: `'$'`);
2106	if (action == "previous")
2107	handleLDPreviousSymbol(name, originalName);
2108	else if (action == "install_name")
2109	handleLDInstallNameSymbol(name, originalName);
2110	else if (action == "hide")
2111	handleLDHideSymbol(name, originalName);
2112	return true;
2113	}
2114
2115	void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
2116	// originalName: $ld$ previous $ <installname> $ <compatversion> $
2117	// <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
2118	StringRef installName;
2119	StringRef compatVersion;
2120	StringRef platformStr;
2121	StringRef startVersion;
2122	StringRef endVersion;
2123	StringRef symbolName;
2124	StringRef rest;
2125
2126	std::tie(args&: installName, args&: name) = name.split(Separator: `'$'`);
2127	std::tie(args&: compatVersion, args&: name) = name.split(Separator: `'$'`);
2128	std::tie(args&: platformStr, args&: name) = name.split(Separator: `'$'`);
2129	std::tie(args&: startVersion, args&: name) = name.split(Separator: `'$'`);
2130	std::tie(args&: endVersion, args&: name) = name.split(Separator: `'$'`);
2131	std::tie(args&: symbolName, args&: rest) = name.rsplit(Separator: `'$'`);
2132
2133	// FIXME: Does this do the right thing for zippered files?
2134	unsigned platform;
2135	if (platformStr.getAsInteger(Radix: `10`, Result&: platform) \|\|
2136	platform != static_cast<unsigned>(config ->platform()))
2137	return;
2138
2139	VersionTuple start;
2140	if (start.tryParse(string: startVersion)) {
2141	warn(msg: toString(f: this) + ": failed to parse start version, symbol '" +
2142	originalName + "' ignored");
2143	return;
2144	}
2145	VersionTuple end;
2146	if (end.tryParse(string: endVersion)) {
2147	warn(msg: toString(f: this) + ": failed to parse end version, symbol '" +
2148	originalName + "' ignored");
2149	return;
2150	}
2151	if (config ->platformInfo.target.MinDeployment < start \|\|
2152	config ->platformInfo.target.MinDeployment >= end)
2153	return;
2154
2155	// Initialized to compatibilityVersion for the symbolName branch below.
2156	uint32_t newCompatibilityVersion = compatibilityVersion;
2157	uint32_t newCurrentVersionForSymbol = currentVersion;
2158	if (!compatVersion.empty()) {
2159	VersionTuple cVersion;
2160	if (cVersion.tryParse(string: compatVersion)) {
2161	warn(msg: toString(f: this) +
2162	": failed to parse compatibility version, symbol '" + originalName +
2163	"' ignored");
2164	return;
2165	}
2166	newCompatibilityVersion = encodeVersion(version: cVersion);
2167	newCurrentVersionForSymbol = newCompatibilityVersion;
2168	}
2169
2170	if (!symbolName.empty()) {
2171	// A $ld$previous$ symbol with symbol name adds a symbol with that name to
2172	// a dylib with given name and version.
2173	auto *dylib = getSyntheticDylib(installName, currentVersion: newCurrentVersionForSymbol,
2174	compatVersion: newCompatibilityVersion);
2175
2176	// The tbd file usually contains the $ld$previous symbol for an old version,
2177	// and then the symbol itself later, for newer deployment targets, like so:
2178	// symbols: [
2179	// '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2180	// _zzz,
2181	// ]
2182	// Since the symbols are sorted, adding them to the symtab in the given
2183	// order means the $ld$previous version of _zzz will prevail, as desired.
2184	dylib->symbols.push_back(x: symtab ->addDylib(
2185	name: saver().save(S: symbolName), file: dylib, /isWeakDef=/false, /isTlv=/false));
2186	return;
2187	}
2188
2189	// A $ld$previous$ symbol without symbol name modifies the dylib it's in.
2190	this->installName = saver().save(S: installName);
2191	this->compatibilityVersion = newCompatibilityVersion;
2192	}
2193
2194	void DylibFile::handleLDInstallNameSymbol(StringRef name,
2195	StringRef originalName) {
2196	// originalName: $ld$ install_name $ os<version> $ install_name
2197	StringRef condition, installName;
2198	std::tie(args&: condition, args&: installName) = name.split(Separator: `'$'`);
2199	VersionTuple version;
2200	if (!condition.consume_front(Prefix: "os") \|\| version.tryParse(string: condition))
2201	warn(msg: toString(f: this) + ": failed to parse os version, symbol '" +
2202	originalName + "' ignored");
2203	else if (version == config ->platformInfo.target.MinDeployment)
2204	this->installName = saver().save(S: installName);
2205	}
2206
2207	void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
2208	StringRef symbolName;
2209	bool shouldHide = true;
2210	if (name.starts_with(Prefix: "os")) {
2211	// If it's hidden based on versions.
2212	name = name.drop_front(N: `2`);
2213	StringRef minVersion;
2214	std::tie(args&: minVersion, args&: symbolName) = name.split(Separator: `'$'`);
2215	VersionTuple versionTup;
2216	if (versionTup.tryParse(string: minVersion)) {
2217	warn(msg: toString(f: this) + ": failed to parse hidden version, symbol `" + originalName +
2218	"` ignored.");
2219	return;
2220	}
2221	shouldHide = versionTup == config ->platformInfo.target.MinDeployment;
2222	} else {
2223	symbolName = name;
2224	}
2225
2226	if (shouldHide)
2227	exportingFile->hiddenSymbols.insert(V: CachedHashStringRef (symbolName));
2228	}
2229
2230	void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
2231	if (config ->applicationExtension && !dylibIsAppExtensionSafe)
2232	warn(msg: "using '-application_extension' with unsafe dylib: " + toString(f: this));
2233	}
2234
2235	ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
2236	: InputFile (ArchiveKind, f ->getMemoryBufferRef()), file (std::move(f)),
2237	forceHidden(forceHidden) {}
2238
2239	void ArchiveFile::addLazySymbols() {
2240	// Avoid calling getMemoryBufferRef() on zero-symbol archive
2241	// since that crashes.
2242	if (file ->isEmpty() \|\|
2243	(file ->hasSymbolTable() && file ->getNumberOfSymbols() == `0`))
2244	return;
2245
2246	if (!file ->hasSymbolTable()) {
2247	// No index, treat each child as a lazy object file.
2248	Error e = Error::success();
2249	for (const object::Archive::Child &c : file ->children(Err&: e)) {
2250	// Check `seen` but don't insert so a future eager load can still happen.
2251	if (seen.contains(V: c.getChildOffset()))
2252	continue;
2253	if (!seenLazy.insert(V: c.getChildOffset()).second)
2254	continue;
2255	auto file = childToObjectFile(c, /lazy=/true);
2256	if (!file)
2257	error(msg: toString(f: this) +
2258	": couldn't process child: " + toString(E: file.takeError()));
2259	inputFiles.insert(X: *file);
2260	}
2261	if (e)
2262	error(msg: toString(f: this) +
2263	": Archive::children failed: " + toString(E: std::move(e)));
2264	return;
2265	}
2266
2267	Error err = Error::success();
2268	auto child = file ->child_begin(Err&: err);
2269	// Ignore the I/O error here - will be reported later.
2270	if (!err) {
2271	Expected<MemoryBufferRef> mbOrErr = child ->getMemoryBufferRef();
2272	if (!mbOrErr) {
2273	llvm::consumeError(Err: mbOrErr.takeError());
2274	} else {
2275	if (identify_magic(magic: mbOrErr ->getBuffer()) == file_magic::macho_object) {
2276	if (target->wordSize == `8`)
2277	compatArch = compatWithTargetArch(
2278	file: this, hdr: reinterpret_cast<const LP64::mach_header *>(
2279	mbOrErr ->getBufferStart()));
2280	else
2281	compatArch = compatWithTargetArch(
2282	file: this, hdr: reinterpret_cast<const ILP32::mach_header *>(
2283	mbOrErr ->getBufferStart()));
2284	if (!compatArch)
2285	return;
2286	}
2287	}
2288	}
2289
2290	for (const object::Archive::Symbol &sym : file ->symbols())
2291	symtab ->addLazyArchive(name: sym.getName(), file: this, sym);
2292	}
2293
2294	static Expected<InputFile *>
2295	loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
2296	uint64_t offsetInArchive, bool forceHidden, bool compatArch,
2297	bool lazy) {
2298	if (config ->zeroModTime)
2299	modTime = `0`;
2300
2301	switch (identify_magic(magic: mb.getBuffer())) {
2302	case file_magic::macho_object:
2303	return make<ObjFile>(args&: mb, args&: modTime, args&: archiveName, args&: lazy, args&: forceHidden,
2304	args&: compatArch);
2305	case file_magic::bitcode:
2306	return make<BitcodeFile>(args&: mb, args&: archiveName, args&: offsetInArchive, args&: lazy,
2307	args&: forceHidden, args&: compatArch);
2308	default:
2309	return createStringError(EC: inconvertibleErrorCode(),
2310	S: mb.getBufferIdentifier() +
2311	" has unhandled file type");
2312	}
2313	}
2314
2315	Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
2316	if (!seen.insert(V: c.getChildOffset()).second)
2317	return Error::success();
2318	auto file = childToObjectFile(c, /lazy=/false);
2319	if (!file)
2320	return file.takeError();
2321
2322	inputFiles.insert(X: *file);
2323	printArchiveMemberLoad(reason, *file);
2324	return Error::success();
2325	}
2326
2327	void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
2328	object::Archive::Child c =
2329	CHECK(sym.getMember(), toString(this) +
2330	": could not get the member defining symbol " +
2331	toMachOString(sym));
2332
2333	// `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2334	// and become invalid after that call. Copy it to the stack so we can refer
2335	// to it later.
2336	const object::Archive::Symbol symCopy = sym;
2337
2338	// ld64 doesn't demangle sym here even with -demangle.
2339	// Match that: intentionally don't call toMachOString().
2340	if (Error e = fetch(c, reason: symCopy.getName()))
2341	error(msg: toString(f: this) + ": could not get the member defining symbol " +
2342	toMachOString(symCopy) + ": " + toString(E: std::move(e)));
2343	}
2344
2345	Expected<InputFile *>
2346	ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
2347	bool lazy) {
2348	Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
2349	if (!mb)
2350	return mb.takeError();
2351
2352	Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
2353	if (!modTime)
2354	return modTime.takeError();
2355
2356	return loadArchiveMember(mb: mb, modTime: toTimeT(TP: modTime), archiveName: getName(),
2357	offsetInArchive: c.getChildOffset(), forceHidden, compatArch, lazy);
2358	}
2359
2360	static macho::Symbol createBitcodeSymbol(const* lto::InputFile::Symbol &objSym,
2361	BitcodeFile &file) {
2362	StringRef name = saver().save(S: objSym.getName());
2363
2364	if (objSym.isUndefined())
2365	return symtab ->addUndefined(name, &file, /isWeakRef=/objSym.isWeak());
2366
2367	// TODO: Write a test demonstrating why computing isPrivateExtern before
2368	// LTO compilation is important.
2369	bool isPrivateExtern = false;
2370	switch (objSym.getVisibility()) {
2371	case GlobalValue::HiddenVisibility:
2372	isPrivateExtern = true;
2373	break;
2374	case GlobalValue::ProtectedVisibility:
2375	error(msg: name + " has protected visibility, which is not supported by Mach-O");
2376	break;
2377	case GlobalValue::DefaultVisibility:
2378	break;
2379	}
2380	isPrivateExtern = isPrivateExtern \|\| objSym.canBeOmittedFromSymbolTable() \|\|
2381	file.forceHidden;
2382
2383	if (objSym.isCommon())
2384	return symtab ->addCommon(name, &file, size: objSym.getCommonSize(),
2385	align: objSym.getCommonAlignment(), isPrivateExtern);
2386
2387	return symtab ->addDefined(name, &file, /isec=/nullptr, /value=/`0`,
2388	/size=/`0`, isWeakDef: objSym.isWeak(), isPrivateExtern,
2389	/isReferencedDynamically=/false,
2390	/noDeadStrip=/false,
2391	/isWeakDefCanBeHidden=/false);
2392	}
2393
2394	BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
2395	uint64_t offsetInArchive, bool lazy, bool forceHidden,
2396	bool compatArch)
2397	: InputFile (BitcodeKind, mb, lazy), forceHidden(forceHidden) {
2398	this->archiveName = std::string (archiveName);
2399	this->compatArch = compatArch;
2400	std::string path = mb.getBufferIdentifier().str();
2401	if (config ->thinLTOIndexOnly)
2402	path = replaceThinLTOSuffix(path: mb.getBufferIdentifier());
2403
2404	// If the parent archive already determines that the arch is not compat with
2405	// target, then just return.
2406	if (!compatArch)
2407	return;
2408
2409	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2410	// name. If two members with the same name are provided, this causes a
2411	// collision and ThinLTO can't proceed.
2412	// So, we append the archive name to disambiguate two members with the same
2413	// name from multiple different archives, and offset within the archive to
2414	// disambiguate two members of the same name from a single archive.
2415	MemoryBufferRef mbref(mb.getBuffer(),
2416	saver().save(S: archiveName.empty()
2417	? path
2418	: archiveName + "(" +
2419	sys::path::filename(path) + ")" +
2420	utostr(X: offsetInArchive)));
2421	obj = check(e: lto::InputFile::create(Object: mbref));
2422	if (lazy)
2423	parseLazy();
2424	else
2425	parse();
2426	}
2427
2428	void BitcodeFile::parse() {
2429	// Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2430	// "winning" symbol will then be marked as Prevailing at LTO compilation
2431	// time.
2432	symbols.resize(new_size: obj ->symbols().size());
2433
2434	// Process defined symbols first. See the comment at the end of
2435	// ObjFile<>::parseSymbols.
2436	for (auto it : llvm::enumerate(First: obj ->symbols()))
2437	if (!it.value().isUndefined())
2438	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2439	for (auto it : llvm::enumerate(First: obj ->symbols()))
2440	if (it.value().isUndefined())
2441	symbols [it.index()] = createBitcodeSymbol(objSym: it.value(), file&: *this);
2442	}
2443
2444	void BitcodeFile::parseLazy() {
2445	symbols.resize(new_size: obj ->symbols().size());
2446	for (const auto &[i, objSym] : llvm::enumerate(First: obj ->symbols())) {
2447	if (!objSym.isUndefined()) {
2448	symbols [i] = symtab ->addLazyObject(name: saver().save(S: objSym.getName()), file&: *this);
2449	if (!lazy)
2450	break;
2451	}
2452	}
2453	}
2454
2455	std::string macho::replaceThinLTOSuffix(StringRef path) {
2456	auto [suffix, repl] = config ->thinLTOObjectSuffixReplace;
2457	if (path.consume_back(Suffix: suffix))
2458	return (path + repl).str();
2459	return std::string (path);
2460	}
2461
2462	void macho::extract(InputFile &file, StringRef reason) {
2463	if (!file.lazy)
2464	return;
2465	file.lazy = false;
2466
2467	printArchiveMemberLoad(reason, &file);
2468	if (auto *bitcode = dyn_cast<BitcodeFile>(Val: &file)) {
2469	bitcode->parse();
2470	} else {
2471	auto &f = cast<ObjFile>(Val&: file);
2472	if (target->wordSize == `8`)
2473	f.parse<LP64>();
2474	else
2475	f.parse<ILP32>();
2476	}
2477	}
2478
2479	template void ObjFile::parse<LP64>();
2480

Browse the source code of llvm_projects/lld/MachO/InputFiles.cpp