SyntheticSections.cpp source code [llvm_projects/lld/MachO/SyntheticSections.cpp]

1	//===- SyntheticSections.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "SyntheticSections.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "ExportTrie.h"
13	#include "ICF.h"
14	#include "InputFiles.h"
15	#include "ObjC.h"
16	#include "OutputSegment.h"
17	#include "SectionPriorities.h"
18	#include "SymbolTable.h"
19	#include "Symbols.h"
20
21	#include "lld/Common/CommonLinkerContext.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/Config/llvm-config.h"
24	#include "llvm/Support/FileSystem.h"
25	#include "llvm/Support/LEB128.h"
26	#include "llvm/Support/Parallel.h"
27	#include "llvm/Support/xxhash.h"
28
29	#if defined(__APPLE__)
30	#include <sys/mman.h>
31
32	#define COMMON_DIGEST_FOR_OPENSSL
33	#include <CommonCrypto/CommonDigest.h>
34	#else
35	#include "llvm/Support/SHA256.h"
36	#endif
37
38	using namespace llvm;
39	using namespace llvm::MachO;
40	using namespace llvm::support;
41	using namespace llvm::support::endian;
42	using namespace lld;
43	using namespace lld::macho;
44
45	// Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
46	static void sha256(const uint8_t data, size_t len, uint8_t output) {
47	#if defined(__APPLE__)
48	// FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
49	// for some notes on this.
50	CC_SHA256(data, len, output);
51	#else
52	ArrayRef<uint8_t> block(data, len);
53	std::array<uint8_t, `32`> hash = SHA256::hash(Data: block);
54	static_assert(hash.size() == CodeSignatureSection::hashSize);
55	memcpy(dest: output, src: hash.data(), n: hash.size());
56	#endif
57	}
58
59	InStruct macho::in;
60	std::vector<SyntheticSection *> macho::syntheticSections;
61
62	SyntheticSection::SyntheticSection(const char segname, const* char *name)
63	: OutputSection (SyntheticKind, name) {
64	std::tie(args&: this->segname, args&: this->name) = maybeRenameSection(key: {segname, name});
65	isec = makeSyntheticInputSection(segName: segname, sectName: name);
66	isec->parent = this;
67	syntheticSections.push_back(x: this);
68	}
69
70	// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
71	// from the beginning of the file (i.e. the header).
72	MachHeaderSection::MachHeaderSection()
73	: SyntheticSection (segment_names::text, section_names::header) {
74	// XXX: This is a hack. (See D97007)
75	// Setting the index to 1 to pretend that this section is the text
76	// section.
77	index = `1`;
78	isec->isFinal = true;
79	}
80
81	void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
82	loadCommands.push_back(x: lc);
83	sizeOfCmds += lc->getSize();
84	}
85
86	uint64_t MachHeaderSection::getSize() const {
87	uint64_t size = target->headerSize + sizeOfCmds + config ->headerPad;
88	// If we are emitting an encryptable binary, our load commands must have a
89	// separate (non-encrypted) page to themselves.
90	if (config ->emitEncryptionInfo)
91	size = alignToPowerOf2(Value: size, Align: target->getPageSize());
92	return size;
93	}
94
95	static uint32_t cpuSubtype() {
96	uint32_t subtype = target->cpuSubtype;
97
98	if (config ->outputType == MH_EXECUTE && !config ->staticLink &&
99	target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL &&
100	config ->platform() == PLATFORM_MACOS &&
101	config ->platformInfo.target.MinDeployment >= VersionTuple (`10`, `5`))
102	subtype \|= CPU_SUBTYPE_LIB64;
103
104	return subtype;
105	}
106
107	static bool hasWeakBinding() {
108	return config ->emitChainedFixups ? in.chainedFixups->hasWeakBinding()
109	: in.weakBinding->hasEntry();
110	}
111
112	static bool hasNonWeakDefinition() {
113	return config ->emitChainedFixups ? in.chainedFixups->hasNonWeakDefinition()
114	: in.weakBinding->hasNonWeakDefinition();
115	}
116
117	void MachHeaderSection::writeTo(uint8_t buf) const* {
118	auto hdr = reinterpret_cast<mach_header >(buf);
119	hdr->magic = target->magic;
120	hdr->cputype = target->cpuType;
121	hdr->cpusubtype = cpuSubtype();
122	hdr->filetype = config ->outputType;
123	hdr->ncmds = loadCommands.size();
124	hdr->sizeofcmds = sizeOfCmds;
125	hdr->flags = MH_DYLDLINK;
126
127	if (config ->namespaceKind == NamespaceKind::twolevel)
128	hdr->flags \|= MH_NOUNDEFS \| MH_TWOLEVEL;
129
130	if (config ->outputType == MH_DYLIB && !config ->hasReexports)
131	hdr->flags \|= MH_NO_REEXPORTED_DYLIBS;
132
133	if (config ->markDeadStrippableDylib)
134	hdr->flags \|= MH_DEAD_STRIPPABLE_DYLIB;
135
136	if (config ->outputType == MH_EXECUTE && config ->isPic)
137	hdr->flags \|= MH_PIE;
138
139	if (config ->outputType == MH_DYLIB && config ->applicationExtension)
140	hdr->flags \|= MH_APP_EXTENSION_SAFE;
141
142	if (in.exports->hasWeakSymbol \|\| hasNonWeakDefinition())
143	hdr->flags \|= MH_WEAK_DEFINES;
144
145	if (in.exports->hasWeakSymbol \|\| hasWeakBinding())
146	hdr->flags \|= MH_BINDS_TO_WEAK;
147
148	for (const OutputSegment *seg : outputSegments) {
149	for (const OutputSection *osec : seg->getSections()) {
150	if (isThreadLocalVariables(flags: osec->flags)) {
151	hdr->flags \|= MH_HAS_TLV_DESCRIPTORS;
152	break;
153	}
154	}
155	}
156
157	uint8_t p = reinterpret_cast<uint8_t >(hdr) + target->headerSize;
158	for (const LoadCommand *lc : loadCommands) {
159	lc->writeTo(buf: p);
160	p += lc->getSize();
161	}
162	}
163
164	PageZeroSection::PageZeroSection()
165	: SyntheticSection (segment_names::pageZero, section_names::pageZero) {}
166
167	RebaseSection::RebaseSection()
168	: LinkEditSection (segment_names::linkEdit, section_names::rebase) {}
169
170	namespace {
171	struct RebaseState {
172	uint64_t sequenceLength;
173	uint64_t skipLength;
174	};
175	} // namespace
176
177	static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
178	assert(incr != `0`);
179
180	if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
181	(incr % target->wordSize) == `0`) {
182	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED \|
183	(incr >> target->p2WordSize));
184	} else {
185	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
186	encodeULEB128(Value: incr, OS&: os);
187	}
188	}
189
190	static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
191	assert(state.sequenceLength > `0`);
192
193	if (state.skipLength == target->wordSize) {
194	if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
195	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES \|
196	state.sequenceLength);
197	} else {
198	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
199	encodeULEB128(Value: state.sequenceLength, OS&: os);
200	}
201	} else if (state.sequenceLength == `1`) {
202	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
203	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
204	} else {
205	os << static_cast<uint8_t>(
206	REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
207	encodeULEB128(Value: state.sequenceLength, OS&: os);
208	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
209	}
210	}
211
212	// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
213	// memory location at a specific address to be rebased and/or the address to be
214	// incremented.
215	//
216	// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
217	// one, encoding a series of evenly spaced addresses. This algorithm works by
218	// splitting up the sorted list of addresses into such chunks. If the locations
219	// are consecutive or the sequence consists of a single location, flushRebase
220	// will use a smaller, more specialized encoding.
221	static void encodeRebases(const OutputSegment *seg,
222	MutableArrayRef<Location> locations,
223	raw_svector_ostream &os) {
224	// dyld operates on segments. Translate section offsets into segment offsets.
225	for (Location &loc : locations)
226	loc.offset =
227	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
228	// The algorithm assumes that locations are unique.
229	Location *end =
230	llvm::unique(R&: locations, P: [](const Location &a, const Location &b) {
231	return a.offset == b.offset;
232	});
233	size_t count = end - locations.begin();
234
235	os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
236	seg->index);
237	assert(!locations.empty());
238	uint64_t offset = locations [`0`].offset;
239	encodeULEB128(Value: offset, OS&: os);
240
241	RebaseState state{.sequenceLength: `1`, .skipLength: target->wordSize};
242
243	for (size_t i = `1`; i < count; ++i) {
244	offset = locations [i].offset;
245
246	uint64_t skip = offset - locations [i - `1`].offset;
247	assert(skip != `0` && "duplicate locations should have been weeded out");
248
249	if (skip == state.skipLength) {
250	++state.sequenceLength;
251	} else if (state.sequenceLength == `1`) {
252	++state.sequenceLength;
253	state.skipLength = skip;
254	} else if (skip < state.skipLength) {
255	// The address is lower than what the rebase pointer would be if the last
256	// location would be part of a sequence. We start a new sequence from the
257	// previous location.
258	--state.sequenceLength;
259	flushRebase(state, os);
260
261	state.sequenceLength = `2`;
262	state.skipLength = skip;
263	} else {
264	// The address is at some positive offset from the rebase pointer. We
265	// start a new sequence which begins with the current location.
266	flushRebase(state, os);
267	emitIncrement(incr: skip - state.skipLength, os);
268	state.sequenceLength = `1`;
269	state.skipLength = target->wordSize;
270	}
271	}
272	flushRebase(state, os);
273	}
274
275	void RebaseSection::finalizeContents() {
276	if (locations.empty())
277	return;
278
279	raw_svector_ostream os{contents};
280	os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM \| REBASE_TYPE_POINTER);
281
282	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
283	return a.isec->getVA(off: a.offset) < b.isec->getVA(off: b.offset);
284	});
285
286	for (size_t i = `0`, count = locations.size(); i < count;) {
287	const OutputSegment *seg = locations [i].isec->parent->parent;
288	size_t j = i + `1`;
289	while (j < count && locations [j].isec->parent->parent == seg)
290	++j;
291	encodeRebases(seg, locations: {locations.data() + i, locations.data() + j}, os);
292	i = j;
293	}
294	os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
295	}
296
297	void RebaseSection::writeTo(uint8_t buf) const* {
298	memcpy(dest: buf, src: contents.data(), n: contents.size());
299	}
300
301	NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
302	const char *name)
303	: SyntheticSection (segname, name) {
304	align = target->wordSize;
305	}
306
307	void macho::addNonLazyBindingEntries(const Symbol *sym,
308	const InputSection *isec, uint64_t offset,
309	int64_t addend) {
310	if (config ->emitChainedFixups) {
311	if (needsBinding(sym))
312	in.chainedFixups->addBinding(dysym: sym, isec, offset, addend);
313	else if (isa<Defined>(Val: sym))
314	in.chainedFixups->addRebase(isec, offset);
315	else
316	llvm_unreachable("cannot bind to an undefined symbol");
317	return;
318	}
319
320	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
321	in.binding->addEntry(dysym, isec, offset, addend);
322	if (dysym->isWeakDef())
323	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
324	} else if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
325	in.rebase->addEntry(isec, offset);
326	if (defined->isExternalWeakDef())
327	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
328	else if (defined->interposable)
329	in.binding->addEntry(dysym: sym, isec, offset, addend);
330	} else {
331	// Undefined symbols are filtered out in scanRelocations(); we should never
332	// get here
333	llvm_unreachable("cannot bind to an undefined symbol");
334	}
335	}
336
337	void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
338	if (entries.insert(X: sym)) {
339	assert(!sym->isInGot());
340	sym->gotIndex = entries.size() - `1`;
341
342	addNonLazyBindingEntries(sym, isec, offset: sym->gotIndex * target->wordSize);
343	}
344	}
345
346	void macho::writeChainedRebase(uint8_t *buf, uint64_t targetVA) {
347	assert(config->emitChainedFixups);
348	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
349	auto rebase = reinterpret_cast<dyld_chained_ptr_64_rebase >(buf);
350	rebase->target = targetVA & `0xf'ffff'ffff`;
351	rebase->high8 = (targetVA >> `56`);
352	rebase->reserved = `0`;
353	rebase->next = `0`;
354	rebase->bind = `0`;
355
356	// The fixup format places a 64 GiB limit on the output's size.
357	// Should we handle this gracefully?
358	uint64_t encodedVA = rebase->target \| ((uint64_t)rebase->high8 << `56`);
359	if (encodedVA != targetVA)
360	error(msg: "rebase target address 0x" + Twine::utohexstr(Val: targetVA) +
361	" does not fit into chained fixup. Re-link with -no_fixup_chains");
362	}
363
364	static void writeChainedBind(uint8_t buf, const* Symbol *sym, int64_t addend) {
365	assert(config->emitChainedFixups);
366	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
367	auto bind = reinterpret_cast<dyld_chained_ptr_64_bind >(buf);
368	auto [ordinal, inlineAddend] = in.chainedFixups->getBinding(sym, addend);
369	bind->ordinal = ordinal;
370	bind->addend = inlineAddend;
371	bind->reserved = `0`;
372	bind->next = `0`;
373	bind->bind = `1`;
374	}
375
376	void macho::writeChainedFixup(uint8_t buf, const* Symbol *sym, int64_t addend) {
377	if (needsBinding(sym))
378	writeChainedBind(buf, sym, addend);
379	else
380	writeChainedRebase(buf, targetVA: sym->getVA() + addend);
381	}
382
383	void NonLazyPointerSectionBase::writeTo(uint8_t buf) const* {
384	if (config ->emitChainedFixups) {
385	for (const auto &[i, entry] : llvm::enumerate(First: entries))
386	writeChainedFixup(buf: &buf[i * target->wordSize], sym: entry, addend: `0`);
387	} else {
388	for (const auto &[i, entry] : llvm::enumerate(First: entries))
389	if (auto *defined = dyn_cast<Defined>(Val: entry))
390	write64le(P: &buf[i * target->wordSize], V: defined->getVA());
391	}
392	}
393
394	GotSection::GotSection()
395	: NonLazyPointerSectionBase (segment_names::data, section_names::got) {
396	flags = S_NON_LAZY_SYMBOL_POINTERS;
397	}
398
399	TlvPointerSection::TlvPointerSection()
400	: NonLazyPointerSectionBase (segment_names::data,
401	section_names::threadPtrs) {
402	flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
403	}
404
405	BindingSection::BindingSection()
406	: LinkEditSection (segment_names::linkEdit, section_names::binding) {}
407
408	namespace {
409	struct Binding {
410	OutputSegment segment = nullptr*;
411	uint64_t offset = `0`;
412	int64_t addend = `0`;
413	};
414	struct BindIR {
415	// Default value of 0xF0 is not valid opcode and should make the program
416	// scream instead of accidentally writing "valid" values.
417	uint8_t opcode = `0xF0`;
418	uint64_t data = `0`;
419	uint64_t consecutiveCount = `0`;
420	};
421	} // namespace
422
423	// Encode a sequence of opcodes that tell dyld to write the address of symbol +
424	// addend at osec->addr + outSecOff.
425	//
426	// The bind opcode "interpreter" remembers the values of each binding field, so
427	// we only need to encode the differences between bindings. Hence the use of
428	// lastBinding.
429	static void encodeBinding(const OutputSection *osec, uint64_t outSecOff,
430	int64_t addend, Binding &lastBinding,
431	std::vector<BindIR> &opcodes) {
432	OutputSegment *seg = osec->parent;
433	uint64_t offset = osec->getSegmentOffset() + outSecOff;
434	if (lastBinding.segment != seg) {
435	opcodes.push_back(
436	x: {.opcode: static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
437	seg->index),
438	.data: offset});
439	lastBinding.segment = seg;
440	lastBinding.offset = offset;
441	} else if (lastBinding.offset != offset) {
442	opcodes.push_back(x: {.opcode: BIND_OPCODE_ADD_ADDR_ULEB, .data: offset - lastBinding.offset});
443	lastBinding.offset = offset;
444	}
445
446	if (lastBinding.addend != addend) {
447	opcodes.push_back(
448	x: {.opcode: BIND_OPCODE_SET_ADDEND_SLEB, .data: static_cast<uint64_t>(addend)});
449	lastBinding.addend = addend;
450	}
451
452	opcodes.push_back(x: {.opcode: BIND_OPCODE_DO_BIND, .data: `0`});
453	// DO_BIND causes dyld to both perform the binding and increment the offset
454	lastBinding.offset += target->wordSize;
455	}
456
457	static void optimizeOpcodes(std::vector<BindIR> &opcodes) {
458	// Pass 1: Combine bind/add pairs
459	size_t i;
460	int pWrite = `0`;
461	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
462	if ((opcodes [i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) &&
463	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND)) {
464	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
465	opcodes [pWrite].data = opcodes [i].data;
466	++i;
467	} else {
468	opcodes [pWrite] = opcodes [i - `1`];
469	}
470	}
471	if (i == opcodes.size())
472	opcodes [pWrite] = opcodes [i - `1`];
473	opcodes.resize(new_size: pWrite + `1`);
474
475	// Pass 2: Compress two or more bind_add opcodes
476	pWrite = `0`;
477	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
478	if ((opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
479	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
480	(opcodes [i].data == opcodes [i - `1`].data)) {
481	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
482	opcodes [pWrite].consecutiveCount = `2`;
483	opcodes [pWrite].data = opcodes [i].data;
484	++i;
485	while (i < opcodes.size() &&
486	(opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
487	(opcodes [i].data == opcodes [i - `1`].data)) {
488	opcodes [pWrite].consecutiveCount++;
489	++i;
490	}
491	} else {
492	opcodes [pWrite] = opcodes [i - `1`];
493	}
494	}
495	if (i == opcodes.size())
496	opcodes [pWrite] = opcodes [i - `1`];
497	opcodes.resize(new_size: pWrite + `1`);
498
499	// Pass 3: Use immediate encodings
500	// Every binding is the size of one pointer. If the next binding is a
501	// multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
502	// opcode can be scaled by wordSize into a single byte and dyld will
503	// expand it to the correct address.
504	for (auto &p : opcodes) {
505	// It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
506	// but ld64 currently does this. This could be a potential bug, but
507	// for now, perform the same behavior to prevent mysterious bugs.
508	if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
509	((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) &&
510	((p.data % target->wordSize) == `0`)) {
511	p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
512	p.data /= target->wordSize;
513	}
514	}
515	}
516
517	static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) {
518	uint8_t opcode = op.opcode & BIND_OPCODE_MASK;
519	switch (opcode) {
520	case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
521	case BIND_OPCODE_ADD_ADDR_ULEB:
522	case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
523	os << op.opcode;
524	encodeULEB128(Value: op.data, OS&: os);
525	break;
526	case BIND_OPCODE_SET_ADDEND_SLEB:
527	os << op.opcode;
528	encodeSLEB128(Value: static_cast<int64_t>(op.data), OS&: os);
529	break;
530	case BIND_OPCODE_DO_BIND:
531	os << op.opcode;
532	break;
533	case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
534	os << op.opcode;
535	encodeULEB128(Value: op.consecutiveCount, OS&: os);
536	encodeULEB128(Value: op.data, OS&: os);
537	break;
538	case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
539	os << static_cast<uint8_t>(op.opcode \| op.data);
540	break;
541	default:
542	llvm_unreachable("cannot bind to an unrecognized symbol");
543	}
544	}
545
546	static bool needsWeakBind(const Symbol &sym) {
547	if (auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
548	return dysym->isWeakDef();
549	if (auto *defined = dyn_cast<Defined>(Val: &sym))
550	return defined->isExternalWeakDef();
551	return false;
552	}
553
554	// Non-weak bindings need to have their dylib ordinal encoded as well.
555	static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
556	if (config ->namespaceKind == NamespaceKind::flat \|\| dysym.isDynamicLookup())
557	return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP);
558	assert(dysym.getFile()->isReferenced());
559	return dysym.getFile()->ordinal;
560	}
561
562	static int16_t ordinalForSymbol(const Symbol &sym) {
563	if (config ->emitChainedFixups && needsWeakBind(sym))
564	return BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
565	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
566	return ordinalForDylibSymbol(dysym: *dysym);
567	assert(cast<Defined>(&sym)->interposable);
568	return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
569	}
570
571	static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) {
572	if (ordinal <= `0`) {
573	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM \|
574	(ordinal & BIND_IMMEDIATE_MASK));
575	} else if (ordinal <= BIND_IMMEDIATE_MASK) {
576	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM \| ordinal);
577	} else {
578	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
579	encodeULEB128(Value: ordinal, OS&: os);
580	}
581	}
582
583	static void encodeWeakOverride(const Defined *defined,
584	raw_svector_ostream &os) {
585	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM \|
586	BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
587	<< defined->getName() << `'\0'`;
588	}
589
590	// Organize the bindings so we can encoded them with fewer opcodes.
591	//
592	// First, all bindings for a given symbol should be grouped together.
593	// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
594	// has an associated symbol string), so we only want to emit it once per symbol.
595	//
596	// Within each group, we sort the bindings by address. Since bindings are
597	// delta-encoded, sorting them allows for a more compact result. Note that
598	// sorting by address alone ensures that bindings for the same segment / section
599	// are located together, minimizing the number of times we have to emit
600	// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
601	//
602	// Finally, we sort the symbols by the address of their first binding, again
603	// to facilitate the delta-encoding process.
604	template <class Sym>
605	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>>
606	sortBindings(const BindingsMap<const Sym *> &bindingsMap) {
607	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec(
608	bindingsMap.begin(), bindingsMap.end());
609	for (auto &p : bindingsVec) {
610	std::vector<BindingEntry> &bindings = p.second;
611	llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
612	return a.target.getVA() < b.target.getVA();
613	});
614	}
615	llvm::sort(bindingsVec, [](const auto &a, const auto &b) {
616	return a.second[`0`].target.getVA() < b.second[`0`].target.getVA();
617	});
618	return bindingsVec;
619	}
620
621	// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
622	// interprets to update a record with the following fields:
623	// segment index (of the segment to write the symbol addresses to, typically*
624	// the __DATA_CONST segment which contains the GOT)
625	// offset within the segment, indicating the next location to write a binding*
626	// symbol type*
627	// symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)*
628	// symbol name*
629	// addend*
630	// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
631	// a symbol in the GOT, and increments the segment offset to point to the next
632	// entry. It does not* clear the record state after doing the bind, so*
633	// subsequent opcodes only need to encode the differences between bindings.
634	void BindingSection::finalizeContents() {
635	raw_svector_ostream os{contents};
636	Binding lastBinding;
637	int16_t lastOrdinal = `0`;
638
639	for (auto &p : sortBindings(bindingsMap)) {
640	const Symbol *sym = p.first;
641	std::vector<BindingEntry> &bindings = p.second;
642	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
643	if (sym->isWeakRef())
644	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
645	os << flags << sym->getName() << `'\0'`
646	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
647	int16_t ordinal = ordinalForSymbol(sym: *sym);
648	if (ordinal != lastOrdinal) {
649	encodeDylibOrdinal(ordinal, os);
650	lastOrdinal = ordinal;
651	}
652	std::vector<BindIR> opcodes;
653	for (const BindingEntry &b : bindings)
654	encodeBinding(osec: b.target.isec->parent,
655	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
656	lastBinding, opcodes);
657	if (config ->optimize > `1`)
658	optimizeOpcodes(opcodes);
659	for (const auto &op : opcodes)
660	flushOpcodes(op, os);
661	}
662	if (!bindingsMap.empty())
663	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
664	}
665
666	void BindingSection::writeTo(uint8_t buf) const* {
667	memcpy(dest: buf, src: contents.data(), n: contents.size());
668	}
669
670	WeakBindingSection::WeakBindingSection()
671	: LinkEditSection (segment_names::linkEdit, section_names::weakBinding) {}
672
673	void WeakBindingSection::finalizeContents() {
674	raw_svector_ostream os{contents};
675	Binding lastBinding;
676
677	for (const Defined *defined : definitions)
678	encodeWeakOverride(defined, os);
679
680	for (auto &p : sortBindings(bindingsMap)) {
681	const Symbol *sym = p.first;
682	std::vector<BindingEntry> &bindings = p.second;
683	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
684	<< sym->getName() << `'\0'`
685	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
686	std::vector<BindIR> opcodes;
687	for (const BindingEntry &b : bindings)
688	encodeBinding(osec: b.target.isec->parent,
689	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
690	lastBinding, opcodes);
691	if (config ->optimize > `1`)
692	optimizeOpcodes(opcodes);
693	for (const auto &op : opcodes)
694	flushOpcodes(op, os);
695	}
696	if (!bindingsMap.empty() \|\| !definitions.empty())
697	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
698	}
699
700	void WeakBindingSection::writeTo(uint8_t buf) const* {
701	memcpy(dest: buf, src: contents.data(), n: contents.size());
702	}
703
704	StubsSection::StubsSection()
705	: SyntheticSection (segment_names::text, section_names::stubs) {
706	flags = S_SYMBOL_STUBS \| S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
707	// The stubs section comprises machine instructions, which are aligned to
708	// 4 bytes on the archs we care about.
709	align = `4`;
710	reserved2 = target->stubSize;
711	}
712
713	uint64_t StubsSection::getSize() const {
714	return entries.size() * target->stubSize;
715	}
716
717	void StubsSection::writeTo(uint8_t buf) const* {
718	size_t off = `0`;
719	for (const Symbol *sym : entries) {
720	uint64_t pointerVA =
721	config ->emitChainedFixups ? sym->getGotVA() : sym->getLazyPtrVA();
722	target->writeStub(buf: buf + off, *sym, pointerVA);
723	off += target->stubSize;
724	}
725	}
726
727	void StubsSection::finalize() { isFinal = true; }
728
729	static void addBindingsForStub(Symbol *sym) {
730	assert(!config->emitChainedFixups);
731	if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
732	if (sym->isWeakDef()) {
733	in.binding->addEntry(dysym, isec: in.lazyPointers->isec,
734	offset: sym->stubsIndex * target->wordSize);
735	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
736	offset: sym->stubsIndex * target->wordSize);
737	} else {
738	in.lazyBinding->addEntry(dysym);
739	}
740	} else if (auto *defined = dyn_cast<Defined>(Val: sym)) {
741	if (defined->isExternalWeakDef()) {
742	in.rebase->addEntry(isec: in.lazyPointers->isec,
743	offset: sym->stubsIndex * target->wordSize);
744	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
745	offset: sym->stubsIndex * target->wordSize);
746	} else if (defined->interposable) {
747	in.lazyBinding->addEntry(dysym: sym);
748	} else {
749	llvm_unreachable("invalid stub target");
750	}
751	} else {
752	llvm_unreachable("invalid stub target symbol type");
753	}
754	}
755
756	void StubsSection::addEntry(Symbol *sym) {
757	bool inserted = entries.insert(X: sym);
758	if (inserted) {
759	sym->stubsIndex = entries.size() - `1`;
760
761	if (config ->emitChainedFixups)
762	in.got->addEntry(sym);
763	else
764	addBindingsForStub(sym);
765	}
766	}
767
768	StubHelperSection::StubHelperSection()
769	: SyntheticSection (segment_names::text, section_names::stubHelper) {
770	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
771	align = `4`; // This section comprises machine instructions
772	}
773
774	uint64_t StubHelperSection::getSize() const {
775	return target->stubHelperHeaderSize +
776	in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
777	}
778
779	bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
780
781	void StubHelperSection::writeTo(uint8_t buf) const* {
782	target->writeStubHelperHeader(buf);
783	size_t off = target->stubHelperHeaderSize;
784	for (const Symbol *sym : in.lazyBinding->getEntries()) {
785	target->writeStubHelperEntry(buf: buf + off, *sym, entryAddr: addr + off);
786	off += target->stubHelperEntrySize;
787	}
788	}
789
790	void StubHelperSection::setUp() {
791	Symbol binder = symtab ->addUndefined(name: "dyld_stub_binder", /file=/*nullptr,
792	/isWeakRef=/false);
793	if (auto *undefined = dyn_cast<Undefined>(Val: binder))
794	treatUndefinedSymbol(*undefined,
795	source: "lazy binding (normally in libSystem.dylib)");
796
797	// treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
798	stubBinder = dyn_cast_or_null<DylibSymbol>(Val: binder);
799	if (stubBinder == nullptr)
800	return;
801
802	in.got->addEntry(sym: stubBinder);
803
804	in.imageLoaderCache->parent =
805	ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
806	addInputSection(inputSection: in.imageLoaderCache);
807	// Since this isn't in the symbol table or in any input file, the noDeadStrip
808	// argument doesn't matter.
809	dyldPrivate =
810	make<Defined>(args: "__dyld_private", args: nullptr, args&: in.imageLoaderCache, args: `0`, args: `0`,
811	/isWeakDef=/args: false,
812	/isExternal=/args: false, /isPrivateExtern=/args: false,
813	/includeInSymtab=/args: true,
814	/isReferencedDynamically=/args: false,
815	/noDeadStrip=/args: false);
816	dyldPrivate->used = true;
817	}
818
819	llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
820	ObjCSelRefsHelper::methnameToSelref;
821	void ObjCSelRefsHelper::initialize() {
822	// Do not fold selrefs without ICF.
823	if (config ->icfLevel == ICFLevel::none)
824	return;
825
826	// Search methnames already referenced in __objc_selrefs
827	// Map the name to the corresponding selref entry
828	// which we will reuse when creating objc stubs.
829	for (ConcatInputSection *isec : inputSections) {
830	if (isec->shouldOmitFromOutput())
831	continue;
832	if (isec->getName() != section_names::objcSelrefs)
833	continue;
834	// We expect a single relocation per selref entry to __objc_methname that
835	// might be aggregated.
836	assert(isec->relocs.size() == `1`);
837	auto Reloc = isec->relocs [`0`];
838	if (const auto sym = Reloc.referent.dyn_cast<Symbol >()) {
839	if (const auto *d = dyn_cast<Defined>(Val: sym)) {
840	auto *cisec = cast<CStringInputSection>(Val: d->isec());
841	auto methname = cisec->getStringRefAtOffset(off: d->value);
842	methnameToSelref [CachedHashStringRef (methname)] = isec;
843	}
844	}
845	}
846	}
847
848	void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); }
849
850	ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) {
851	auto methnameOffset = in.objcMethnameSection->getStringOffset(str: methname);
852
853	size_t wordSize = target->wordSize;
854	uint8_t *selrefData = bAlloc().Allocate<uint8_t>(Num: wordSize);
855	write64le(P: selrefData, V: methnameOffset);
856	ConcatInputSection *objcSelref =
857	makeSyntheticInputSection(segName: segment_names::data, sectName: section_names::objcSelrefs,
858	flags: S_LITERAL_POINTERS \| S_ATTR_NO_DEAD_STRIP,
859	data: ArrayRef<uint8_t>{selrefData, wordSize},
860	/align=/wordSize);
861	assert(objcSelref->live);
862	objcSelref->relocs.push_back(x: {/type=/target->unsignedRelocType,
863	/pcrel=/false, /length=/`3`,
864	/offset=/`0`,
865	/addend=/static_cast<int64_t>(methnameOffset),
866	/referent=/in.objcMethnameSection->isec});
867	objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
868	addInputSection(inputSection: objcSelref);
869	objcSelref->isFinal = true;
870	methnameToSelref [CachedHashStringRef (methname)] = objcSelref;
871	return objcSelref;
872	}
873
874	ConcatInputSection *ObjCSelRefsHelper::getSelRef(StringRef methname) {
875	auto it = methnameToSelref.find(Val: CachedHashStringRef (methname));
876	if (it == methnameToSelref.end())
877	return nullptr;
878	return it ->second;
879	}
880
881	ObjCStubsSection::ObjCStubsSection()
882	: SyntheticSection (segment_names::text, section_names::objcStubs) {
883	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
884	align = config ->objcStubsMode == ObjCStubsMode::fast
885	? target->objcStubsFastAlignment
886	: target->objcStubsSmallAlignment;
887	}
888
889	bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
890	return sym->getName().starts_with(Prefix: symbolPrefix);
891	}
892
893	StringRef ObjCStubsSection::getMethname(Symbol *sym) {
894	assert(isObjCStubSymbol(sym) && "not an objc stub");
895	auto name = sym->getName();
896	StringRef methname = name.drop_front(N: symbolPrefix.size());
897	return methname;
898	}
899
900	void ObjCStubsSection::addEntry(Symbol *sym) {
901	StringRef methname = getMethname(sym);
902	// We create a selref entry for each unique methname.
903	if (!ObjCSelRefsHelper::getSelRef(methname))
904	ObjCSelRefsHelper::makeSelRef(methname);
905
906	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
907	? target->objcStubsFastSize
908	: target->objcStubsSmallSize;
909	Defined *newSym = replaceSymbol<Defined>(
910	s: sym, arg: sym->getName(), arg: nullptr, arg&: isec,
911	/value=/arg: symbols.size() * stubSize,
912	/size=/arg&: stubSize,
913	/isWeakDef=/arg: false, /isExternal=/arg: true, /isPrivateExtern=/arg: true,
914	/includeInSymtab=/arg: true, /isReferencedDynamically=/arg: false,
915	/noDeadStrip=/arg: false);
916	symbols.push_back(x: newSym);
917	}
918
919	void ObjCStubsSection::setUp() {
920	objcMsgSend = symtab ->addUndefined(name: "_objc_msgSend", /file=/nullptr,
921	/isWeakRef=/false);
922	if (auto *undefined = dyn_cast<Undefined>(Val: objcMsgSend))
923	treatUndefinedSymbol(*undefined,
924	source: "lazy binding (normally in libobjc.dylib)");
925	objcMsgSend->used = true;
926	if (config ->objcStubsMode == ObjCStubsMode::fast) {
927	in.got->addEntry(sym: objcMsgSend);
928	assert(objcMsgSend->isInGot());
929	} else {
930	assert(config->objcStubsMode == ObjCStubsMode::small);
931	// In line with ld64's behavior, when objc_msgSend is a direct symbol,
932	// we directly reference it.
933	// In other cases, typically when binding in libobjc.dylib,
934	// we generate a stub to invoke objc_msgSend.
935	if (!isa<Defined>(Val: objcMsgSend))
936	in.stubs->addEntry(sym: objcMsgSend);
937	}
938	}
939
940	uint64_t ObjCStubsSection::getSize() const {
941	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
942	? target->objcStubsFastSize
943	: target->objcStubsSmallSize;
944	return stubSize * symbols.size();
945	}
946
947	void ObjCStubsSection::writeTo(uint8_t buf) const* {
948	uint64_t stubOffset = `0`;
949	for (Defined *sym : symbols) {
950	auto methname = getMethname(sym);
951	InputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
952	assert(selRef != nullptr && "no selref for methname");
953	auto selrefAddr = selRef->getVA(off: `0`);
954	target->writeObjCMsgSendStub(buf: buf + stubOffset, sym, stubsAddr: in.objcStubs->addr,
955	stubOffset, selrefVA: selrefAddr, objcMsgSend);
956	}
957	}
958
959	LazyPointerSection::LazyPointerSection()
960	: SyntheticSection (segment_names::data, section_names::lazySymbolPtr) {
961	align = target->wordSize;
962	flags = S_LAZY_SYMBOL_POINTERS;
963	}
964
965	uint64_t LazyPointerSection::getSize() const {
966	return in.stubs->getEntries().size() * target->wordSize;
967	}
968
969	bool LazyPointerSection::isNeeded() const {
970	return !in.stubs->getEntries().empty();
971	}
972
973	void LazyPointerSection::writeTo(uint8_t buf) const* {
974	size_t off = `0`;
975	for (const Symbol *sym : in.stubs->getEntries()) {
976	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
977	if (dysym->hasStubsHelper()) {
978	uint64_t stubHelperOffset =
979	target->stubHelperHeaderSize +
980	dysym->stubsHelperIndex * target->stubHelperEntrySize;
981	write64le(P: buf + off, V: in.stubHelper->addr + stubHelperOffset);
982	}
983	} else {
984	write64le(P: buf + off, V: sym->getVA());
985	}
986	off += target->wordSize;
987	}
988	}
989
990	LazyBindingSection::LazyBindingSection()
991	: LinkEditSection (segment_names::linkEdit, section_names::lazyBinding) {}
992
993	void LazyBindingSection::finalizeContents() {
994	// TODO: Just precompute output size here instead of writing to a temporary
995	// buffer
996	for (Symbol *sym : entries)
997	sym->lazyBindOffset = encode(*sym);
998	}
999
1000	void LazyBindingSection::writeTo(uint8_t buf) const* {
1001	memcpy(dest: buf, src: contents.data(), n: contents.size());
1002	}
1003
1004	void LazyBindingSection::addEntry(Symbol *sym) {
1005	assert(!config->emitChainedFixups && "Chained fixups always bind eagerly");
1006	if (entries.insert(X: sym)) {
1007	sym->stubsHelperIndex = entries.size() - `1`;
1008	in.rebase->addEntry(isec: in.lazyPointers->isec,
1009	offset: sym->stubsIndex * target->wordSize);
1010	}
1011	}
1012
1013	// Unlike the non-lazy binding section, the bind opcodes in this section aren't
1014	// interpreted all at once. Rather, dyld will start interpreting opcodes at a
1015	// given offset, typically only binding a single symbol before it finds a
1016	// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1017	// we cannot encode just the differences between symbols; we have to emit the
1018	// complete bind information for each symbol.
1019	uint32_t LazyBindingSection::encode(const Symbol &sym) {
1020	uint32_t opstreamOffset = contents.size();
1021	OutputSegment *dataSeg = in.lazyPointers->parent;
1022	os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
1023	dataSeg->index);
1024	uint64_t offset =
1025	in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize;
1026	encodeULEB128(Value: offset, OS&: os);
1027	encodeDylibOrdinal(ordinal: ordinalForSymbol(sym), os);
1028
1029	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
1030	if (sym.isWeakRef())
1031	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
1032
1033	os << flags << sym.getName() << `'\0'`
1034	<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND)
1035	<< static_cast<uint8_t>(BIND_OPCODE_DONE);
1036	return opstreamOffset;
1037	}
1038
1039	ExportSection::ExportSection()
1040	: LinkEditSection (segment_names::linkEdit, section_names::export_) {}
1041
1042	void ExportSection::finalizeContents() {
1043	trieBuilder.setImageBase(in.header->addr);
1044	for (const Symbol *sym : symtab ->getSymbols()) {
1045	if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
1046	if (defined->privateExtern \|\| !defined->isLive())
1047	continue;
1048	trieBuilder.addSymbol(sym: *defined);
1049	hasWeakSymbol = hasWeakSymbol \|\| sym->isWeakDef();
1050	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1051	if (dysym->shouldReexport)
1052	trieBuilder.addSymbol(sym: *dysym);
1053	}
1054	}
1055	size = trieBuilder.build();
1056	}
1057
1058	void ExportSection::writeTo(uint8_t buf) const* { trieBuilder.writeTo(buf); }
1059
1060	DataInCodeSection::DataInCodeSection()
1061	: LinkEditSection (segment_names::linkEdit, section_names::dataInCode) {}
1062
1063	template <class LP>
1064	static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
1065	std::vector<MachO::data_in_code_entry> dataInCodeEntries;
1066	for (const InputFile *inputFile : inputFiles) {
1067	if (!isa<ObjFile>(Val: inputFile))
1068	continue;
1069	const ObjFile *objFile = cast<ObjFile>(Val: inputFile);
1070	ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode();
1071	if (entries.empty())
1072	continue;
1073
1074	std::vector<MachO::data_in_code_entry> sortedEntries;
1075	sortedEntries.assign(first: entries.begin(), last: entries.end());
1076	llvm::sort(sortedEntries, [](const data_in_code_entry &lhs,
1077	const data_in_code_entry &rhs) {
1078	return lhs.offset < rhs.offset;
1079	});
1080
1081	// For each code subsection find 'data in code' entries residing in it.
1082	// Compute the new offset values as
1083	// <offset within subsection> + <subsection address> - <__TEXT address>.
1084	for (const Section *section : objFile->sections) {
1085	for (const Subsection &subsec : section->subsections) {
1086	const InputSection *isec = subsec.isec;
1087	if (!isCodeSection(isec))
1088	continue;
1089	if (cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput())
1090	continue;
1091	const uint64_t beginAddr = section->addr + subsec.offset;
1092	auto it = llvm::lower_bound(
1093	sortedEntries, beginAddr,
1094	[](const MachO::data_in_code_entry &entry, uint64_t addr) {
1095	return entry.offset < addr;
1096	});
1097	const uint64_t endAddr = beginAddr + isec->getSize();
1098	for (const auto end = sortedEntries.end();
1099	it != end && it->offset + it->length <= endAddr; ++it)
1100	dataInCodeEntries.push_back(
1101	{static_cast<uint32_t>(isec->getVA(off: it->offset - beginAddr) -
1102	in.header->addr),
1103	it->length, it->kind});
1104	}
1105	}
1106	}
1107
1108	// ld64 emits the table in sorted order too.
1109	llvm::sort(dataInCodeEntries,
1110	[](const data_in_code_entry &lhs, const data_in_code_entry &rhs) {
1111	return lhs.offset < rhs.offset;
1112	});
1113	return dataInCodeEntries;
1114	}
1115
1116	void DataInCodeSection::finalizeContents() {
1117	entries = target->wordSize == `8` ? collectDataInCodeEntries<LP64>()
1118	: collectDataInCodeEntries<ILP32>();
1119	}
1120
1121	void DataInCodeSection::writeTo(uint8_t buf) const* {
1122	if (!entries.empty())
1123	memcpy(dest: buf, src: entries.data(), n: getRawSize());
1124	}
1125
1126	FunctionStartsSection::FunctionStartsSection()
1127	: LinkEditSection (segment_names::linkEdit, section_names::functionStarts) {}
1128
1129	void FunctionStartsSection::finalizeContents() {
1130	raw_svector_ostream os{contents};
1131	std::vector<uint64_t> addrs;
1132	for (const InputFile *file : inputFiles) {
1133	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1134	for (const Symbol *sym : objFile->symbols) {
1135	if (const auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1136	if (!defined->isec() \|\| !isCodeSection(defined->isec()) \|\|
1137	!defined->isLive())
1138	continue;
1139	addrs.push_back(x: defined->getVA());
1140	}
1141	}
1142	}
1143	}
1144	llvm::sort(C&: addrs);
1145	uint64_t addr = in.header->addr;
1146	for (uint64_t nextAddr : addrs) {
1147	uint64_t delta = nextAddr - addr;
1148	if (delta == `0`)
1149	continue;
1150	encodeULEB128(Value: delta, OS&: os);
1151	addr = nextAddr;
1152	}
1153	os << `'\0'`;
1154	}
1155
1156	void FunctionStartsSection::writeTo(uint8_t buf) const* {
1157	memcpy(dest: buf, src: contents.data(), n: contents.size());
1158	}
1159
1160	SymtabSection::SymtabSection(StringTableSection &stringTableSection)
1161	: LinkEditSection (segment_names::linkEdit, section_names::symbolTable),
1162	stringTableSection(stringTableSection) {}
1163
1164	void SymtabSection::emitBeginSourceStab(StringRef sourceFile) {
1165	StabsEntry stab(N_SO);
1166	stab.strx = stringTableSection.addString(saver().save(S: sourceFile));
1167	stabs.emplace_back(args: std::move(stab));
1168	}
1169
1170	void SymtabSection::emitEndSourceStab() {
1171	StabsEntry stab(N_SO);
1172	stab.sect = `1`;
1173	stabs.emplace_back(args: std::move(stab));
1174	}
1175
1176	void SymtabSection::emitObjectFileStab(ObjFile *file) {
1177	StabsEntry stab(N_OSO);
1178	stab.sect = target->cpuSubtype;
1179	SmallString<`261`> path(!file->archiveName.empty() ? file->archiveName
1180	: file->getName());
1181	std::error_code ec = sys::fs::make_absolute(path);
1182	if (ec)
1183	fatal(msg: "failed to get absolute path for " + path);
1184
1185	if (!file->archiveName.empty())
1186	path.append(Refs: {"(", file->getName(), ")"});
1187
1188	StringRef adjustedPath = saver().save(S: path.str());
1189	adjustedPath.consume_front(Prefix: config ->osoPrefix);
1190
1191	stab.strx = stringTableSection.addString(adjustedPath);
1192	stab.desc = `1`;
1193	stab.value = file->modTime;
1194	stabs.emplace_back(args: std::move(stab));
1195	}
1196
1197	void SymtabSection::emitEndFunStab(Defined *defined) {
1198	StabsEntry stab(N_FUN);
1199	stab.value = defined->size;
1200	stabs.emplace_back(args: std::move(stab));
1201	}
1202
1203	void SymtabSection::emitStabs() {
1204	if (config ->omitDebugInfo)
1205	return;
1206
1207	for (const std::string &s : config ->astPaths) {
1208	StabsEntry astStab(N_AST);
1209	astStab.strx = stringTableSection.addString(s);
1210	stabs.emplace_back(args: std::move(astStab));
1211	}
1212
1213	// Cache the file ID for each symbol in an std::pair for faster sorting.
1214	using SortingPair = std::pair<Defined , int*>;
1215	std::vector<SortingPair> symbolsNeedingStabs;
1216	for (const SymtabEntry &entry :
1217	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols)) {
1218	Symbol *sym = entry.sym;
1219	assert(sym->isLive() &&
1220	"dead symbols should not be in localSymbols, externalSymbols");
1221	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1222	// Excluded symbols should have been filtered out in finalizeContents().
1223	assert(defined->includeInSymtab);
1224
1225	if (defined->isAbsolute())
1226	continue;
1227
1228	// Constant-folded symbols go in the executable's symbol table, but don't
1229	// get a stabs entry unless --keep-icf-stabs flag is specified.
1230	if (!config ->keepICFStabs &&
1231	defined->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
1232	continue;
1233
1234	ObjFile *file = defined->getObjectFile();
1235	if (!file \|\| !file->compileUnit)
1236	continue;
1237
1238	// We use the symbol's original InputSection to get the file id,
1239	// even for ICF folded symbols, to ensure STABS entries point to the
1240	// correct object file where the symbol was originally defined
1241	symbolsNeedingStabs.emplace_back(args&: defined,
1242	args: defined->originalIsec->getFile()->id);
1243	}
1244	}
1245
1246	llvm::stable_sort(Range&: symbolsNeedingStabs, C: llvm::less_second ());
1247
1248	// Emit STABS symbols so that dsymutil and/or the debugger can map address
1249	// regions in the final binary to the source and object files from which they
1250	// originated.
1251	InputFile lastFile = nullptr*;
1252	for (SortingPair &pair : symbolsNeedingStabs) {
1253	Defined *defined = pair.first;
1254	// When emitting STABS entries for a symbol, always use the original
1255	// InputSection of the defined symbol, not the section of the function body
1256	// (which might be a different function entirely if ICF folded this
1257	// function). This ensures STABS entries point back to the original object
1258	// file.
1259	InputSection *isec = defined->originalIsec;
1260	ObjFile *file = cast<ObjFile>(Val: isec->getFile());
1261
1262	if (lastFile == nullptr \|\| lastFile != file) {
1263	if (lastFile != nullptr)
1264	emitEndSourceStab();
1265	lastFile = file;
1266
1267	emitBeginSourceStab(sourceFile: file->sourceFile());
1268	emitObjectFileStab(file);
1269	}
1270
1271	StabsEntry symStab;
1272	symStab.sect = isec->parent->index;
1273	symStab.strx = stringTableSection.addString(defined->getName());
1274
1275	// When using --keep-icf-stabs, we need to use the VA of the actual function
1276	// body that the linker will place in the binary. This is the function that
1277	// the symbol refers to after ICF folding.
1278	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1279	// For thunks, we need to get the function they point to
1280	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1281	symStab.value = target->getVA();
1282	} else {
1283	symStab.value = defined->getVA();
1284	}
1285
1286	if (isCodeSection(isec)) {
1287	symStab.type = N_FUN;
1288	stabs.emplace_back(args: std::move(symStab));
1289	// For the end function marker in STABS, we need to use the size of the
1290	// actual function body that exists in the output binary
1291	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1292	// For thunks, we use the target's size
1293	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1294	emitEndFunStab(defined: target);
1295	} else {
1296	emitEndFunStab(defined);
1297	}
1298	} else {
1299	symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
1300	stabs.emplace_back(args: std::move(symStab));
1301	}
1302	}
1303
1304	if (!stabs.empty())
1305	emitEndSourceStab();
1306	}
1307
1308	void SymtabSection::finalizeContents() {
1309	auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
1310	uint32_t strx = stringTableSection.addString(sym->getName());
1311	symbols.push_back(x: {.sym: sym, .strx: strx});
1312	};
1313
1314	std::function<void(Symbol *)> localSymbolsHandler;
1315	switch (config ->localSymbolsPresence) {
1316	case SymtabPresence::All:
1317	localSymbolsHandler = [&](Symbol *sym) { addSymbol (localSymbols, sym); };
1318	break;
1319	case SymtabPresence::None:
1320	localSymbolsHandler = [&](Symbol ) { /* Do nothing/ };
1321	break;
1322	case SymtabPresence::SelectivelyIncluded:
1323	localSymbolsHandler = [&](Symbol *sym) {
1324	if (config ->localSymbolPatterns.match(symbolName: sym->getName()))
1325	addSymbol (localSymbols, sym);
1326	};
1327	break;
1328	case SymtabPresence::SelectivelyExcluded:
1329	localSymbolsHandler = [&](Symbol *sym) {
1330	if (!config ->localSymbolPatterns.match(symbolName: sym->getName()))
1331	addSymbol (localSymbols, sym);
1332	};
1333	break;
1334	}
1335
1336	// Local symbols aren't in the SymbolTable, so we walk the list of object
1337	// files to gather them.
1338	// But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1339	// the right thing regardless, but this check is a perf optimization because
1340	// iterating through all the input files and their symbols is expensive.
1341	if (config ->localSymbolsPresence != SymtabPresence::None) {
1342	for (const InputFile *file : inputFiles) {
1343	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1344	for (Symbol *sym : objFile->symbols) {
1345	if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1346	if (defined->isExternal() \|\| !defined->isLive() \|\|
1347	!defined->includeInSymtab)
1348	continue;
1349	localSymbolsHandler (sym);
1350	}
1351	}
1352	}
1353	}
1354	}
1355
1356	// __dyld_private is a local symbol too. It's linker-created and doesn't
1357	// exist in any object file.
1358	if (in.stubHelper && in.stubHelper->dyldPrivate)
1359	localSymbolsHandler (in.stubHelper->dyldPrivate);
1360
1361	for (Symbol *sym : symtab ->getSymbols()) {
1362	if (!sym->isLive())
1363	continue;
1364	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1365	if (!defined->includeInSymtab)
1366	continue;
1367	assert(defined->isExternal());
1368	if (defined->privateExtern)
1369	localSymbolsHandler (defined);
1370	else
1371	addSymbol (externalSymbols, defined);
1372	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1373	if (dysym->isReferenced())
1374	addSymbol (undefinedSymbols, sym);
1375	}
1376	}
1377
1378	emitStabs();
1379	uint32_t symtabIndex = stabs.size();
1380	for (const SymtabEntry &entry :
1381	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols, Ranges&: undefinedSymbols)) {
1382	entry.sym->symtabIndex = symtabIndex++;
1383	}
1384	}
1385
1386	uint32_t SymtabSection::getNumSymbols() const {
1387	return stabs.size() + localSymbols.size() + externalSymbols.size() +
1388	undefinedSymbols.size();
1389	}
1390
1391	// This serves to hide (type-erase) the template parameter from SymtabSection.
1392	template <class LP> class SymtabSectionImpl final : public SymtabSection {
1393	public:
1394	SymtabSectionImpl(StringTableSection &stringTableSection)
1395	: SymtabSection(stringTableSection) {}
1396	uint64_t getRawSize() const override;
1397	void writeTo(uint8_t buf) const* override;
1398	};
1399
1400	template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
1401	return getNumSymbols() * sizeof(typename LP::nlist);
1402	}
1403
1404	template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t buf) const* {
1405	auto nList = reinterpret_cast<typename* LP::nlist *>(buf);
1406	// Emit the stabs entries before the "real" symbols. We cannot emit them
1407	// after as that would render Symbol::symtabIndex inaccurate.
1408	for (const StabsEntry &entry : stabs) {
1409	nList->n_strx = entry.strx;
1410	nList->n_type = entry.type;
1411	nList->n_sect = entry.sect;
1412	nList->n_desc = entry.desc;
1413	nList->n_value = entry.value;
1414	++nList;
1415	}
1416
1417	for (const SymtabEntry &entry : concat<const SymtabEntry>(
1418	localSymbols, externalSymbols, undefinedSymbols)) {
1419	nList->n_strx = entry.strx;
1420	// TODO populate n_desc with more flags
1421	if (auto *defined = dyn_cast<Defined>(Val: entry.sym)) {
1422	uint8_t scope = `0`;
1423	if (defined->privateExtern) {
1424	// Private external -- dylib scoped symbol.
1425	// Promote to non-external at link time.
1426	scope = N_PEXT;
1427	} else if (defined->isExternal()) {
1428	// Normal global symbol.
1429	scope = N_EXT;
1430	} else {
1431	// TU-local symbol from localSymbols.
1432	scope = `0`;
1433	}
1434
1435	if (defined->isAbsolute()) {
1436	nList->n_type = scope \| N_ABS;
1437	nList->n_sect = NO_SECT;
1438	nList->n_value = defined->value;
1439	} else {
1440	nList->n_type = scope \| N_SECT;
1441	nList->n_sect = defined->isec()->parent->index;
1442	// For the N_SECT symbol type, n_value is the address of the symbol
1443	nList->n_value = defined->getVA();
1444	}
1445	nList->n_desc \|= defined->isExternalWeakDef() ? N_WEAK_DEF : `0`;
1446	nList->n_desc \|=
1447	defined->referencedDynamically ? REFERENCED_DYNAMICALLY : `0`;
1448	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: entry.sym)) {
1449	uint16_t n_desc = nList->n_desc;
1450	int16_t ordinal = ordinalForDylibSymbol(dysym: *dysym);
1451	if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
1452	SET_LIBRARY_ORDINAL(n_desc, ordinal: DYNAMIC_LOOKUP_ORDINAL);
1453	else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
1454	SET_LIBRARY_ORDINAL(n_desc, ordinal: EXECUTABLE_ORDINAL);
1455	else {
1456	assert(ordinal > `0`);
1457	SET_LIBRARY_ORDINAL(n_desc, ordinal: static_cast<uint8_t>(ordinal));
1458	}
1459
1460	nList->n_type = N_EXT;
1461	n_desc \|= dysym->isWeakDef() ? N_WEAK_DEF : `0`;
1462	n_desc \|= dysym->isWeakRef() ? N_WEAK_REF : `0`;
1463	nList->n_desc = n_desc;
1464	}
1465	++nList;
1466	}
1467	}
1468
1469	template <class LP>
1470	SymtabSection *
1471	macho::makeSymtabSection(StringTableSection &stringTableSection) {
1472	return make<SymtabSectionImpl<LP>>(stringTableSection);
1473	}
1474
1475	IndirectSymtabSection::IndirectSymtabSection()
1476	: LinkEditSection (segment_names::linkEdit,
1477	section_names::indirectSymbolTable) {}
1478
1479	uint32_t IndirectSymtabSection::getNumSymbols() const {
1480	uint32_t size = in.got->getEntries().size() +
1481	in.tlvPointers->getEntries().size() +
1482	in.stubs->getEntries().size();
1483	if (!config ->emitChainedFixups)
1484	size += in.stubs->getEntries().size();
1485	return size;
1486	}
1487
1488	bool IndirectSymtabSection::isNeeded() const {
1489	return in.got->isNeeded() \|\| in.tlvPointers->isNeeded() \|\|
1490	in.stubs->isNeeded();
1491	}
1492
1493	void IndirectSymtabSection::finalizeContents() {
1494	uint32_t off = `0`;
1495	in.got->reserved1 = off;
1496	off += in.got->getEntries().size();
1497	in.tlvPointers->reserved1 = off;
1498	off += in.tlvPointers->getEntries().size();
1499	in.stubs->reserved1 = off;
1500	if (in.lazyPointers) {
1501	off += in.stubs->getEntries().size();
1502	in.lazyPointers->reserved1 = off;
1503	}
1504	}
1505
1506	static uint32_t indirectValue(const Symbol *sym) {
1507	if (sym->symtabIndex == UINT32_MAX \|\| !needsBinding(sym))
1508	return INDIRECT_SYMBOL_LOCAL;
1509	return sym->symtabIndex;
1510	}
1511
1512	void IndirectSymtabSection::writeTo(uint8_t buf) const* {
1513	uint32_t off = `0`;
1514	for (const Symbol *sym : in.got->getEntries()) {
1515	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1516	++off;
1517	}
1518	for (const Symbol *sym : in.tlvPointers->getEntries()) {
1519	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1520	++off;
1521	}
1522	for (const Symbol *sym : in.stubs->getEntries()) {
1523	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1524	++off;
1525	}
1526
1527	if (in.lazyPointers) {
1528	// There is a 1:1 correspondence between stubs and LazyPointerSection
1529	// entries. But giving __stubs and __la_symbol_ptr the same reserved1
1530	// (the offset into the indirect symbol table) so that they both refer
1531	// to the same range of offsets confuses `strip`, so write the stubs
1532	// symbol table offsets a second time.
1533	for (const Symbol *sym : in.stubs->getEntries()) {
1534	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1535	++off;
1536	}
1537	}
1538	}
1539
1540	StringTableSection::StringTableSection()
1541	: LinkEditSection (segment_names::linkEdit, section_names::stringTable) {}
1542
1543	uint32_t StringTableSection::addString(StringRef str) {
1544	uint32_t strx = size;
1545	if (config ->dedupSymbolStrings) {
1546	llvm::CachedHashStringRef hashedStr(str);
1547	auto [it, inserted] = stringMap.try_emplace(Key: hashedStr, Args&: strx);
1548	if (!inserted)
1549	return it ->second;
1550	}
1551
1552	strings.push_back(x: str);
1553	size += str.size() + `1`; // account for null terminator
1554	return strx;
1555	}
1556
1557	void StringTableSection::writeTo(uint8_t buf) const* {
1558	uint32_t off = `0`;
1559	for (StringRef str : strings) {
1560	memcpy(dest: buf + off, src: str.data(), n: str.size());
1561	off += str.size() + `1`; // account for null terminator
1562	}
1563	}
1564
1565	static_assert((CodeSignatureSection::blobHeadersSize % `8`) == `0`);
1566	static_assert((CodeSignatureSection::fixedHeadersSize % `8`) == `0`);
1567
1568	CodeSignatureSection::CodeSignatureSection()
1569	: LinkEditSection (segment_names::linkEdit, section_names::codeSignature) {
1570	align = `16`; // required by libstuff
1571
1572	// XXX: This mimics LD64, where it uses the install-name as codesign
1573	// identifier, if available.
1574	if (!config ->installName.empty())
1575	fileName = config ->installName;
1576	else
1577	// FIXME: Consider using finalOutput instead of outputFile.
1578	fileName = config ->outputFile;
1579
1580	size_t slashIndex = fileName.rfind(Str: "/");
1581	if (slashIndex != std::string::npos)
1582	fileName = fileName.drop_front(N: slashIndex + `1`);
1583
1584	// NOTE: Any changes to these calculations should be repeated
1585	// in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1586	allHeadersSize = alignTo<`16`>(Value: fixedHeadersSize + fileName.size() + `1`);
1587	fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
1588	}
1589
1590	uint32_t CodeSignatureSection::getBlockCount() const {
1591	return (fileOff + blockSize - `1`) / blockSize;
1592	}
1593
1594	uint64_t CodeSignatureSection::getRawSize() const {
1595	return allHeadersSize + getBlockCount() * hashSize;
1596	}
1597
1598	void CodeSignatureSection::writeHashes(uint8_t buf) const* {
1599	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1600	// MachOWriter::writeSignatureData.
1601	uint8_t *hashes = buf + fileOff + allHeadersSize;
1602	parallelFor(Begin: `0`, End: getBlockCount(), Fn: [&](size_t i) {
1603	sha256(data: buf + i * blockSize,
1604	len: std::min(a: static_cast<size_t>(fileOff - i * blockSize), b: blockSize),
1605	output: hashes + i * hashSize);
1606	});
1607	#if defined(__APPLE__)
1608	// This is macOS-specific work-around and makes no sense for any
1609	// other host OS. See https://openradar.appspot.com/FB8914231
1610	//
1611	// The macOS kernel maintains a signature-verification cache to
1612	// quickly validate applications at time of execve(2). The trouble
1613	// is that for the kernel creates the cache entry at the time of the
1614	// mmap(2) call, before we have a chance to write either the code to
1615	// sign or the signature header+hashes. The fix is to invalidate
1616	// all cached data associated with the output file, thus discarding
1617	// the bogus prematurely-cached signature.
1618	msync(buf, fileOff + getSize(), MS_INVALIDATE);
1619	#endif
1620	}
1621
1622	void CodeSignatureSection::writeTo(uint8_t buf) const* {
1623	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1624	// MachOWriter::writeSignatureData.
1625	uint32_t signatureSize = static_cast<uint32_t>(getSize());
1626	auto superBlob = reinterpret_cast<CS_SuperBlob >(buf);
1627	write32be(P: &superBlob->magic, V: CSMAGIC_EMBEDDED_SIGNATURE);
1628	write32be(P: &superBlob->length, V: signatureSize);
1629	write32be(P: &superBlob->count, V: `1`);
1630	auto blobIndex = reinterpret_cast<CS_BlobIndex >(&superBlob[`1`]);
1631	write32be(P: &blobIndex->type, V: CSSLOT_CODEDIRECTORY);
1632	write32be(P: &blobIndex->offset, V: blobHeadersSize);
1633	auto *codeDirectory =
1634	reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize);
1635	write32be(P: &codeDirectory->magic, V: CSMAGIC_CODEDIRECTORY);
1636	write32be(P: &codeDirectory->length, V: signatureSize - blobHeadersSize);
1637	write32be(P: &codeDirectory->version, V: CS_SUPPORTSEXECSEG);
1638	write32be(P: &codeDirectory->flags, V: CS_ADHOC \| CS_LINKER_SIGNED);
1639	write32be(P: &codeDirectory->hashOffset,
1640	V: sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad);
1641	write32be(P: &codeDirectory->identOffset, V: sizeof(CS_CodeDirectory));
1642	codeDirectory->nSpecialSlots = `0`;
1643	write32be(P: &codeDirectory->nCodeSlots, V: getBlockCount());
1644	write32be(P: &codeDirectory->codeLimit, V: fileOff);
1645	codeDirectory->hashSize = static_cast<uint8_t>(hashSize);
1646	codeDirectory->hashType = kSecCodeSignatureHashSHA256;
1647	codeDirectory->platform = `0`;
1648	codeDirectory->pageSize = blockSizeShift;
1649	codeDirectory->spare2 = `0`;
1650	codeDirectory->scatterOffset = `0`;
1651	codeDirectory->teamOffset = `0`;
1652	codeDirectory->spare3 = `0`;
1653	codeDirectory->codeLimit64 = `0`;
1654	OutputSegment *textSeg = getOrCreateOutputSegment(name: segment_names::text);
1655	write64be(P: &codeDirectory->execSegBase, V: textSeg->fileOff);
1656	write64be(P: &codeDirectory->execSegLimit, V: textSeg->fileSize);
1657	write64be(P: &codeDirectory->execSegFlags,
1658	V: config ->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : `0`);
1659	auto id = reinterpret_cast<char* *>(&codeDirectory[`1`]);
1660	memcpy(dest: id, src: fileName.begin(), n: fileName.size());
1661	memset(s: id + fileName.size(), c: `0`, n: fileNamePad);
1662	}
1663
1664	CStringSection::CStringSection(const char *name)
1665	: SyntheticSection (segment_names::text, name) {
1666	flags = S_CSTRING_LITERALS;
1667	}
1668
1669	void CStringSection::addInput(CStringInputSection *isec) {
1670	isec->parent = this;
1671	inputs.push_back(x: isec);
1672	if (isec->align > align)
1673	align = isec->align;
1674	}
1675
1676	void CStringSection::writeTo(uint8_t buf) const* {
1677	for (const CStringInputSection *isec : inputs) {
1678	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1679	if (!piece.live)
1680	continue;
1681	StringRef string = isec->getStringRef(i);
1682	memcpy(dest: buf + piece.outSecOff, src: string.data(), n: string.size());
1683	}
1684	}
1685	}
1686
1687	// In contrast to ELF, which puts strings that need different alignments into
1688	// different sections, clang's Mach-O backend puts them all in one section.
1689	// Strings that need to be aligned have the .p2align directive emitted before
1690	// them, which simply translates into zero padding in the object file. In other
1691	// words, we have to infer the desired alignment of these cstrings from their
1692	// addresses.
1693	//
1694	// We differ slightly from ld64 in how we've chosen to align these cstrings.
1695	// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1696	// address in the input object files. When deduplicating identical cstrings,
1697	// both linkers pick the cstring whose address has more trailing zeros, and
1698	// preserve the alignment of that address in the final binary. However, ld64
1699	// goes a step further and also preserves the offset of the cstring from the
1700	// last section-aligned address. I.e. if a cstring is at offset 18 in the
1701	// input, with a section alignment of 16, then both LLD and ld64 will ensure the
1702	// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1703	// ensure that the final address is of the form 16 k + 2 for some k.*
1704	//
1705	// Note that ld64's heuristic means that a dedup'ed cstring's final address is
1706	// dependent on the order of the input object files. E.g. if in addition to the
1707	// cstring at offset 18 above, we have a duplicate one in another file with a
1708	// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1709	// the cstring from the object file earlier on the command line (since both have
1710	// the same number of trailing zeros in their address). So the final cstring may
1711	// either be at some address `16 k + 2` or at some address `2 * k`.*
1712	//
1713	// I've opted not to follow this behavior primarily for implementation
1714	// simplicity, and secondarily to save a few more bytes. It's not clear to me
1715	// that preserving the section alignment + offset is ever necessary, and there
1716	// are many cases that are clearly redundant. In particular, if an x86_64 object
1717	// file contains some strings that are accessed via SIMD instructions, then the
1718	// .cstring section in the object file will be 16-byte-aligned (since SIMD
1719	// requires its operand addresses to be 16-byte aligned). However, there will
1720	// typically also be other cstrings in the same file that aren't used via SIMD
1721	// and don't need this alignment. They will be emitted at some arbitrary address
1722	// `A`, but ld64 will treat them as being 16-byte aligned with an offset of
1723	// `16 % A`.
1724	static Align getStringPieceAlignment(const CStringInputSection &isec,
1725	const StringPiece &piece) {
1726	return llvm::Align (`1ULL` << llvm::countr_zero(Val: isec.align \| piece.inSecOff));
1727	}
1728
1729	void CStringSection::finalizeContents() {
1730	size = `0`;
1731	priorityBuilder.forEachStringPiece(
1732	inputs,
1733	f: [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
1734	piece.outSecOff = alignTo(Size: size, A: getStringPieceAlignment(isec, piece));
1735	StringRef string = isec.getStringRef(i: pieceIdx);
1736	size =
1737	piece.outSecOff + string.size() + `1`; // account for null terminator
1738	},
1739	/forceInputOrder=/false, /computeHash=/true);
1740	for (CStringInputSection *isec : inputs)
1741	isec->isFinal = true;
1742	}
1743
1744	void DeduplicatedCStringSection::finalizeContents() {
1745	// Find the largest alignment required for each string.
1746	DenseMap<CachedHashStringRef, Align> strToAlignment;
1747	// Used for tail merging only
1748	std::vector<CachedHashStringRef> deduplicatedStrs;
1749	priorityBuilder.forEachStringPiece(
1750	inputs,
1751	f: [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
1752	auto s = isec.getCachedHashStringRef(i: pieceIdx);
1753	assert(isec.align != `0`);
1754	auto align = getStringPieceAlignment(isec, piece);
1755	auto [it, wasInserted] = strToAlignment.try_emplace(Key: s, Args&: align);
1756	if (config ->tailMergeStrings && wasInserted)
1757	deduplicatedStrs.push_back(x: s);
1758	if (!wasInserted && it ->second < align)
1759	it ->second = align;
1760	},
1761	/forceInputOrder=/true);
1762
1763	// Like lexigraphical sort, except we read strings in reverse and take the
1764	// longest string first
1765	// TODO: We could improve performance by implementing our own sort that avoids
1766	// comparing characters we know to be the same. See
1767	// StringTableBuilder::multikeySort() for details
1768	llvm::sort(C&: deduplicatedStrs, Comp: [](const auto &left, const auto &right) {
1769	for (const auto &[leftChar, rightChar] :
1770	llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
1771	if (leftChar == rightChar)
1772	continue;
1773	return leftChar < rightChar;
1774	}
1775	return left.size() > right.size();
1776	});
1777	std::optional<CachedHashStringRef> mergeCandidate;
1778	DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
1779	tailMergeMap;
1780	for (auto &s : deduplicatedStrs) {
1781	if (!mergeCandidate \|\| !mergeCandidate ->val().ends_with(Suffix: s.val())) {
1782	mergeCandidate = s;
1783	continue;
1784	}
1785	uint64_t tailMergeOffset = mergeCandidate ->size() - s.size();
1786	// TODO: If the tail offset is incompatible with this string's alignment, we
1787	// might be able to find another superstring with a compatible tail offset.
1788	// The difficulty is how to do this efficiently
1789	const auto &align = strToAlignment.at(Val: s);
1790	if (!isAligned(Lhs: align, SizeInBytes: tailMergeOffset))
1791	continue;
1792	auto &mergeCandidateAlign = strToAlignment [*mergeCandidate];
1793	if (align > mergeCandidateAlign)
1794	mergeCandidateAlign = align;
1795	tailMergeMap.try_emplace(Key: s, Args&: *mergeCandidate, Args&: tailMergeOffset);
1796	}
1797
1798	// Sort the strings for performance and compression size win, and then
1799	// assign an offset for each string and save it to the corresponding
1800	// StringPieces for easy access.
1801	priorityBuilder.forEachStringPiece(inputs, f: [&](CStringInputSection &isec,
1802	StringPiece &piece,
1803	size_t pieceIdx) {
1804	auto s = isec.getCachedHashStringRef(i: pieceIdx);
1805	// Any string can be tail merged with itself with an offset of zero
1806	uint64_t tailMergeOffset = `0`;
1807	auto mergeIt =
1808	config ->tailMergeStrings ? tailMergeMap.find(Val: s) : tailMergeMap.end();
1809	if (mergeIt != tailMergeMap.end()) {
1810	auto &[superString, offset] = mergeIt ->second;
1811	// s can be tail merged with superString. Do not layout s. Instead layout
1812	// superString if we haven't already
1813	assert(superString.val().ends_with(s.val()));
1814	s = superString;
1815	tailMergeOffset = offset;
1816	}
1817	auto [it, wasInserted] = stringOffsetMap.try_emplace(Key: s, /placeholder/ Args: `0`);
1818	if (wasInserted) {
1819	// Avoid computing the offset until we are sure we will need to
1820	uint64_t offset = alignTo(Size: size, A: strToAlignment.at(Val: s));
1821	it ->second = offset;
1822	size = offset + s.size() + `1`; // account for null terminator
1823	}
1824	piece.outSecOff = it ->second + tailMergeOffset;
1825	if (mergeIt != tailMergeMap.end()) {
1826	auto &tailMergedString = mergeIt ->first;
1827	stringOffsetMap [tailMergedString] = piece.outSecOff;
1828	assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
1829	}
1830	});
1831	for (CStringInputSection *isec : inputs)
1832	isec->isFinal = true;
1833	}
1834
1835	void DeduplicatedCStringSection::writeTo(uint8_t buf) const* {
1836	for (const auto &[s, outSecOff] : stringOffsetMap)
1837	if (s.size())
1838	memcpy(dest: buf + outSecOff, src: s.data(), n: s.size());
1839	}
1840
1841	uint64_t DeduplicatedCStringSection::getStringOffset(StringRef str) const {
1842	// StringPiece uses 31 bits to store the hashes, so we replicate that
1843	uint32_t hash = xxh3_64bits(data: str) & `0x7fffffff`;
1844	return stringOffsetMap.at(Val: CachedHashStringRef (str, hash));
1845	}
1846
1847	// This section is actually emitted as __TEXT,__const by ld64, but clang may
1848	// emit input sections of that name, and LLD doesn't currently support mixing
1849	// synthetic and concat-type OutputSections. To work around this, I've given
1850	// our merged-literals section a different name.
1851	WordLiteralSection::WordLiteralSection()
1852	: SyntheticSection (segment_names::text, section_names::literals) {
1853	align = `16`;
1854	}
1855
1856	void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
1857	isec->parent = this;
1858	inputs.push_back(x: isec);
1859	}
1860
1861	void WordLiteralSection::finalizeContents() {
1862	for (WordLiteralInputSection *isec : inputs) {
1863	// We do all processing of the InputSection here, so it will be effectively
1864	// finalized.
1865	isec->isFinal = true;
1866	const uint8_t *buf = isec->data.data();
1867	switch (sectionType(flags: isec->getFlags())) {
1868	case S_4BYTE_LITERALS: {
1869	for (size_t off = `0`, e = isec->data.size(); off < e; off += `4`) {
1870	if (!isec->isLive(off))
1871	continue;
1872	uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off);
1873	literal4Map.emplace(args&: value, args: literal4Map.size());
1874	}
1875	break;
1876	}
1877	case S_8BYTE_LITERALS: {
1878	for (size_t off = `0`, e = isec->data.size(); off < e; off += `8`) {
1879	if (!isec->isLive(off))
1880	continue;
1881	uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off);
1882	literal8Map.emplace(args&: value, args: literal8Map.size());
1883	}
1884	break;
1885	}
1886	case S_16BYTE_LITERALS: {
1887	for (size_t off = `0`, e = isec->data.size(); off < e; off += `16`) {
1888	if (!isec->isLive(off))
1889	continue;
1890	UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off);
1891	literal16Map.emplace(args&: value, args: literal16Map.size());
1892	}
1893	break;
1894	}
1895	default:
1896	llvm_unreachable("invalid literal section type");
1897	}
1898	}
1899	}
1900
1901	void WordLiteralSection::writeTo(uint8_t buf) const* {
1902	// Note that we don't attempt to do any endianness conversion in addInput(),
1903	// so we don't do it here either -- just write out the original value,
1904	// byte-for-byte.
1905	for (const auto &p : literal16Map)
1906	memcpy(dest: buf + p.second * `16`, src: &p.first, n: `16`);
1907	buf += literal16Map.size() * `16`;
1908
1909	for (const auto &p : literal8Map)
1910	memcpy(dest: buf + p.second * `8`, src: &p.first, n: `8`);
1911	buf += literal8Map.size() * `8`;
1912
1913	for (const auto &p : literal4Map)
1914	memcpy(dest: buf + p.second * `4`, src: &p.first, n: `4`);
1915	}
1916
1917	ObjCImageInfoSection::ObjCImageInfoSection()
1918	: SyntheticSection (segment_names::data, section_names::objCImageInfo) {}
1919
1920	ObjCImageInfoSection::ImageInfo
1921	ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
1922	ImageInfo info;
1923	ArrayRef<uint8_t> data = file->objCImageInfo;
1924	// The image info struct has the following layout:
1925	// struct {
1926	// uint32_t version;
1927	// uint32_t flags;
1928	// };
1929	if (data.size() < `8`) {
1930	warn(msg: toString(file) + ": invalid __objc_imageinfo size");
1931	return info;
1932	}
1933
1934	auto buf = reinterpret_cast<const* uint32_t *>(data.data());
1935	if (read32le(P: buf) != `0`) {
1936	warn(msg: toString(file) + ": invalid __objc_imageinfo version");
1937	return info;
1938	}
1939
1940	uint32_t flags = read32le(P: buf + `1`);
1941	info.swiftVersion = (flags >> `8`) & `0xff`;
1942	info.hasCategoryClassProperties = flags & `0x40`;
1943	return info;
1944	}
1945
1946	static std::string swiftVersionString(uint8_t version) {
1947	switch (version) {
1948	case `1`:
1949	return "1.0";
1950	case `2`:
1951	return "1.1";
1952	case `3`:
1953	return "2.0";
1954	case `4`:
1955	return "3.0";
1956	case `5`:
1957	return "4.0";
1958	default:
1959	return ("0x" + Twine::utohexstr(Val: version)).str();
1960	}
1961	}
1962
1963	// Validate each object file's __objc_imageinfo and use them to generate the
1964	// image info for the output binary. Only two pieces of info are relevant:
1965	// 1. The Swift version (should be identical across inputs)
1966	// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1967	void ObjCImageInfoSection::finalizeContents() {
1968	assert(files.size() != `0`); // should have already been checked via isNeeded()
1969
1970	info.hasCategoryClassProperties = true;
1971	const InputFile *firstFile;
1972	for (const InputFile *file : files) {
1973	ImageInfo inputInfo = parseImageInfo(file);
1974	info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
1975
1976	// swiftVersion 0 means no Swift is present, so no version checking required
1977	if (inputInfo.swiftVersion == `0`)
1978	continue;
1979
1980	if (info.swiftVersion != `0` && info.swiftVersion != inputInfo.swiftVersion) {
1981	error(msg: "Swift version mismatch: " + toString(file: firstFile) + " has version " +
1982	swiftVersionString(version: info.swiftVersion) + " but " + toString(file) +
1983	" has version " + swiftVersionString(version: inputInfo.swiftVersion));
1984	} else {
1985	info.swiftVersion = inputInfo.swiftVersion;
1986	firstFile = file;
1987	}
1988	}
1989	}
1990
1991	void ObjCImageInfoSection::writeTo(uint8_t buf) const* {
1992	uint32_t flags = info.hasCategoryClassProperties ? `0x40` : `0x0`;
1993	flags \|= info.swiftVersion << `8`;
1994	write32le(P: buf + `4`, V: flags);
1995	}
1996
1997	InitOffsetsSection::InitOffsetsSection()
1998	: SyntheticSection (segment_names::text, section_names::initOffsets) {
1999	flags = S_INIT_FUNC_OFFSETS;
2000	align = `4`; // This section contains 32-bit integers.
2001	}
2002
2003	uint64_t InitOffsetsSection::getSize() const {
2004	size_t count = `0`;
2005	for (const ConcatInputSection *isec : sections)
2006	count += isec->relocs.size();
2007	return count * sizeof(uint32_t);
2008	}
2009
2010	void InitOffsetsSection::writeTo(uint8_t buf) const* {
2011	// FIXME: Add function specified by -init when that argument is implemented.
2012	for (ConcatInputSection *isec : sections) {
2013	for (const Relocation &rel : isec->relocs) {
2014	const Symbol referent = cast<Symbol >(Val: rel.referent);
2015	assert(referent && "section relocation should have been rejected");
2016	uint64_t offset = referent->getVA() - in.header->addr;
2017	// FIXME: Can we handle this gracefully?
2018	if (offset > UINT32_MAX)
2019	fatal(msg: isec->getLocation(off: rel.offset) + ": offset to initializer " +
2020	referent->getName() + " (" + utohexstr(X: offset) +
2021	") does not fit in 32 bits");
2022
2023	// Entries need to be added in the order they appear in the section, but
2024	// relocations aren't guaranteed to be sorted.
2025	size_t index = rel.offset >> target->p2WordSize;
2026	write32le(P: &buf[index * sizeof(uint32_t)], V: offset);
2027	}
2028	buf += isec->relocs.size() * sizeof(uint32_t);
2029	}
2030	}
2031
2032	// The inputs are __mod_init_func sections, which contain pointers to
2033	// initializer functions, therefore all relocations should be of the UNSIGNED
2034	// type. InitOffsetsSection stores offsets, so if the initializer's address is
2035	// not known at link time, stub-indirection has to be used.
2036	void InitOffsetsSection::setUp() {
2037	for (const ConcatInputSection *isec : sections) {
2038	for (const Relocation &rel : isec->relocs) {
2039	RelocAttrs attrs = target->getRelocAttrs(type: rel.type);
2040	if (!attrs.hasAttr(b: RelocAttrBits::UNSIGNED))
2041	error(msg: isec->getLocation(off: rel.offset) +
2042	": unsupported relocation type: " + attrs.name);
2043	if (rel.addend != `0`)
2044	error(msg: isec->getLocation(off: rel.offset) +
2045	": relocation addend is not representable in __init_offsets");
2046	if (isa<InputSection *>(Val: rel.referent))
2047	error(msg: isec->getLocation(off: rel.offset) +
2048	": unexpected section relocation");
2049
2050	Symbol sym = rel.referent.dyn_cast<Symbol >();
2051	if (auto *undefined = dyn_cast<Undefined>(Val: sym))
2052	treatUndefinedSymbol(*undefined, isec, offset: rel.offset);
2053	if (needsBinding(sym))
2054	in.stubs->addEntry(sym);
2055	}
2056	}
2057	}
2058
2059	ObjCMethListSection::ObjCMethListSection()
2060	: SyntheticSection (segment_names::text, section_names::objcMethList) {
2061	flags = S_ATTR_NO_DEAD_STRIP;
2062	align = relativeOffsetSize;
2063	}
2064
2065	// Go through all input method lists and ensure that we have selrefs for all
2066	// their method names. The selrefs will be needed later by ::writeTo. We need to
2067	// create them early on here to ensure they are processed correctly by the lld
2068	// pipeline.
2069	void ObjCMethListSection::setUp() {
2070	for (const ConcatInputSection *isec : inputs) {
2071	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2072	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2073	uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
2074	// Method name is immediately after header
2075	uint32_t methodNameOff = methodListHeaderSize;
2076
2077	// Loop through all methods, and ensure a selref for each of them exists.
2078	while (methodNameOff < isec->data.size()) {
2079	const Relocation *reloc = isec->getRelocAt(off: methodNameOff);
2080	assert(reloc && "Relocation expected at method list name slot");
2081
2082	StringRef methname = reloc->getReferentString();
2083	if (!ObjCSelRefsHelper::getSelRef(methname))
2084	ObjCSelRefsHelper::makeSelRef(methname);
2085
2086	// Jump to method name offset in next struct
2087	methodNameOff += originalStructSize;
2088	}
2089	}
2090	}
2091
2092	// Calculate section size and final offsets for where InputSection's need to be
2093	// written.
2094	void ObjCMethListSection::finalize() {
2095	// sectionSize will be the total size of the __objc_methlist section
2096	sectionSize = `0`;
2097	for (ConcatInputSection *isec : inputs) {
2098	// We can also use sectionSize as write offset for isec
2099	assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
2100	"expected __objc_methlist to be aligned by default with the "
2101	"required section alignment");
2102	isec->outSecOff = sectionSize;
2103
2104	isec->isFinal = true;
2105	uint32_t relativeListSize =
2106	computeRelativeMethodListSize(absoluteMethodListSize: isec->data.size());
2107	sectionSize += relativeListSize;
2108
2109	// If encoding the method list in relative offset format shrinks the size,
2110	// then we also need to adjust symbol sizes to match the new size. Note that
2111	// on 32bit platforms the size of the method list will remain the same when
2112	// encoded in relative offset format.
2113	if (relativeListSize != isec->data.size()) {
2114	for (Symbol *sym : isec->symbols) {
2115	assert(isa<Defined>(sym) &&
2116	"Unexpected undefined symbol in ObjC method list");
2117	auto *def = cast<Defined>(Val: sym);
2118	// There can be 0-size symbols, check if this is the case and ignore
2119	// them.
2120	if (def->size) {
2121	assert(
2122	def->size == isec->data.size() &&
2123	"Invalid ObjC method list symbol size: expected symbol size to "
2124	"match isec size");
2125	def->size = relativeListSize;
2126	}
2127	}
2128	}
2129	}
2130	}
2131
2132	void ObjCMethListSection::writeTo(uint8_t bufStart) const* {
2133	uint8_t *buf = bufStart;
2134	for (const ConcatInputSection *isec : inputs) {
2135	assert(buf - bufStart == std::ptrdiff_t(isec->outSecOff) &&
2136	"Writing at unexpected offset");
2137	uint32_t writtenSize = writeRelativeMethodList(isec, buf);
2138	buf += writtenSize;
2139	}
2140	assert(buf - bufStart == std::ptrdiff_t(sectionSize) &&
2141	"Written size does not match expected section size");
2142	}
2143
2144	// Check if an InputSection is a method list. To do this we scan the
2145	// InputSection for any symbols who's names match the patterns we expect clang
2146	// to generate for method lists.
2147	bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
2148	const char *symPrefixes[] = {objc::symbol_names::classMethods,
2149	objc::symbol_names::instanceMethods,
2150	objc::symbol_names::categoryInstanceMethods,
2151	objc::symbol_names::categoryClassMethods};
2152	if (!isec)
2153	return false;
2154	for (const Symbol *sym : isec->symbols) {
2155	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2156	if (!def)
2157	continue;
2158	for (const char *prefix : symPrefixes) {
2159	if (def->getName().starts_with(Prefix: prefix)) {
2160	assert(def->size == isec->data.size() &&
2161	"Invalid ObjC method list symbol size: expected symbol size to "
2162	"match isec size");
2163	assert(def->value == `0` &&
2164	"Offset of ObjC method list symbol must be 0");
2165	return true;
2166	}
2167	}
2168	}
2169
2170	return false;
2171	}
2172
2173	// Encode a single relative offset value. The input is the data/symbol at
2174	// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2175	// 'createSelRef' indicates that we should not directly use the specified
2176	// symbol, but instead get the selRef for the symbol and use that instead.
2177	void ObjCMethListSection::writeRelativeOffsetForIsec(
2178	const ConcatInputSection isec, uint8_t buf, uint32_t &inSecOff,
2179	uint32_t &outSecOff, bool useSelRef) const {
2180	const Relocation *reloc = isec->getRelocAt(off: inSecOff);
2181	assert(reloc && "Relocation expected at __objc_methlist Offset");
2182
2183	uint32_t symVA = `0`;
2184	if (useSelRef) {
2185	StringRef methname = reloc->getReferentString();
2186	ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
2187	assert(selRef && "Expected all selector names to already be already be "
2188	"present in __objc_selrefs");
2189	symVA = selRef->getVA();
2190	assert(selRef->data.size() == target->wordSize &&
2191	"Expected one selref per ConcatInputSection");
2192	} else if (auto sym = dyn_cast<Symbol >(Val: reloc->referent)) {
2193	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2194	assert(def && "Expected all syms in __objc_methlist to be defined");
2195	symVA = def->getVA();
2196	} else {
2197	auto isec = cast<InputSection >(Val: reloc->referent);
2198	symVA = isec->getVA(off: reloc->addend);
2199	}
2200
2201	uint32_t currentVA = isec->getVA() + outSecOff;
2202	uint32_t delta = symVA - currentVA;
2203	write32le(P: buf + outSecOff, V: delta);
2204
2205	// Move one pointer forward in the absolute method list
2206	inSecOff += target->wordSize;
2207	// Move one relative offset forward in the relative method list (32 bits)
2208	outSecOff += relativeOffsetSize;
2209	}
2210
2211	// Write a relative method list to buf, return the size of the written
2212	// information
2213	uint32_t
2214	ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
2215	uint8_t buf) const* {
2216	// Copy over the header, and add the "this is a relative method list" magic
2217	// value flag
2218	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2219	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2220	// Set the struct size for the relative method list
2221	uint32_t relativeStructSizeAndFlags =
2222	(relativeOffsetSize * pointersPerStruct) & structSizeMask;
2223	// Carry over the old flags from the input struct
2224	relativeStructSizeAndFlags \|= structSizeAndFlags & structFlagsMask;
2225	// Set the relative method list flag
2226	relativeStructSizeAndFlags \|= relMethodHeaderFlag;
2227
2228	writeMethodListHeader(buf, structSizeAndFlags: relativeStructSizeAndFlags, structCount);
2229
2230	assert(methodListHeaderSize +
2231	(structCount * pointersPerStruct * target->wordSize) ==
2232	isec->data.size() &&
2233	"Invalid computed ObjC method list size");
2234
2235	uint32_t inSecOff = methodListHeaderSize;
2236	uint32_t outSecOff = methodListHeaderSize;
2237
2238	// Go through the method list and encode input absolute pointers as relative
2239	// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2240	// outSecOff
2241	for (uint32_t i = `0`; i < structCount; i++) {
2242	// Write the name of the method
2243	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: true);
2244	// Write the type of the method
2245	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2246	// Write reference to the selector of the method
2247	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2248	}
2249
2250	// Expecting to have read all the data in the isec
2251	assert(inSecOff == isec->data.size() &&
2252	"Invalid actual ObjC method list size");
2253	assert(
2254	outSecOff == computeRelativeMethodListSize(inSecOff) &&
2255	"Mismatch between input & output size when writing relative method list");
2256	return outSecOff;
2257	}
2258
2259	// Given the size of an ObjC method list InputSection, return the size of the
2260	// method list when encoded in relative offsets format. We can do this without
2261	// decoding the actual data, as it can be directly inferred from the size of the
2262	// isec.
2263	uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2264	uint32_t absoluteMethodListSize) const {
2265	uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
2266	uint32_t pointerCount = oldPointersSize / target->wordSize;
2267	assert(((pointerCount % pointersPerStruct) == `0`) &&
2268	"__objc_methlist expects method lists to have multiple-of-3 pointers");
2269
2270	uint32_t newPointersSize = pointerCount * relativeOffsetSize;
2271	uint32_t newTotalSize = methodListHeaderSize + newPointersSize;
2272
2273	assert((newTotalSize <= absoluteMethodListSize) &&
2274	"Expected relative method list size to be smaller or equal than "
2275	"original size");
2276	return newTotalSize;
2277	}
2278
2279	// Read a method list header from buf
2280	void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
2281	uint32_t &structSizeAndFlags,
2282	uint32_t &structCount) const {
2283	structSizeAndFlags = read32le(P: buf);
2284	structCount = read32le(P: buf + sizeof(uint32_t));
2285	}
2286
2287	// Write a method list header to buf
2288	void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
2289	uint32_t structSizeAndFlags,
2290	uint32_t structCount) const {
2291	write32le(P: buf, V: structSizeAndFlags);
2292	write32le(P: buf + sizeof(structSizeAndFlags), V: structCount);
2293	}
2294
2295	void macho::createSyntheticSymbols() {
2296	auto addHeaderSymbol = [](const char *name) {
2297	symtab ->addSynthetic(name, in.header->isec, /value=/`0`,
2298	/isPrivateExtern=/true, /includeInSymtab=/false,
2299	/referencedDynamically=/false);
2300	};
2301
2302	switch (config ->outputType) {
2303	// FIXME: Assign the right address value for these symbols
2304	// (rather than 0). But we need to do that after assignAddresses().
2305	case MH_EXECUTE:
2306	// If linking PIE, __mh_execute_header is a defined symbol in
2307	// __TEXT, __text)
2308	// Otherwise, it's an absolute symbol.
2309	if (config ->isPic)
2310	symtab ->addSynthetic(name: "__mh_execute_header", in.header->isec, /value=/`0`,
2311	/isPrivateExtern=/false, /includeInSymtab=/true,
2312	/referencedDynamically=/true);
2313	else
2314	symtab ->addSynthetic(name: "__mh_execute_header", /isec=/nullptr, /value=/`0`,
2315	/isPrivateExtern=/false, /includeInSymtab=/true,
2316	/referencedDynamically=/true);
2317	break;
2318
2319	// The following symbols are N_SECT symbols, even though the header is not
2320	// part of any section and that they are private to the bundle/dylib/object
2321	// they are part of.
2322	case MH_BUNDLE:
2323	addHeaderSymbol ("__mh_bundle_header");
2324	break;
2325	case MH_DYLIB:
2326	addHeaderSymbol ("__mh_dylib_header");
2327	break;
2328	case MH_DYLINKER:
2329	addHeaderSymbol ("__mh_dylinker_header");
2330	break;
2331	case MH_OBJECT:
2332	addHeaderSymbol ("__mh_object_header");
2333	break;
2334	default:
2335	llvm_unreachable("unexpected outputType");
2336	break;
2337	}
2338
2339	// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2340	// which does e.g. cleanup of static global variables. The ABI document
2341	// says that the pointer can point to any address in one of the dylib's
2342	// segments, but in practice ld64 seems to set it to point to the header,
2343	// so that's what's implemented here.
2344	addHeaderSymbol ("___dso_handle");
2345	}
2346
2347	ChainedFixupsSection::ChainedFixupsSection()
2348	: LinkEditSection (segment_names::linkEdit, section_names::chainFixups) {}
2349
2350	bool ChainedFixupsSection::isNeeded() const {
2351	assert(config->emitChainedFixups);
2352	// dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2353	// dyld_chained_fixups_header, so we create this section even if there aren't
2354	// any fixups.
2355	return true;
2356	}
2357
2358	void ChainedFixupsSection::addBinding(const Symbol *sym,
2359	const InputSection *isec, uint64_t offset,
2360	int64_t addend) {
2361	locations.emplace_back(args&: isec, args&: offset);
2362	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2363	auto [it, inserted] = bindings.insert(
2364	KV: {{sym, outlineAddend}, static_cast<uint32_t>(bindings.size())});
2365
2366	if (inserted) {
2367	symtabSize += sym->getName().size() + `1`;
2368	hasWeakBind = hasWeakBind \|\| needsWeakBind(sym: *sym);
2369	if (!isInt<`23`>(x: outlineAddend))
2370	needsLargeAddend = true;
2371	else if (outlineAddend != `0`)
2372	needsAddend = true;
2373	}
2374	}
2375
2376	std::pair<uint32_t, uint8_t>
2377	ChainedFixupsSection::getBinding(const Symbol sym, int64_t addend) const* {
2378	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2379	auto it = bindings.find(Key: {sym, outlineAddend});
2380	assert(it != bindings.end() && "binding not found in the imports table");
2381	if (outlineAddend == `0`)
2382	return {it->second, addend};
2383	return {it->second, `0`};
2384	}
2385
2386	static size_t writeImport(uint8_t buf, int* format, int16_t libOrdinal,
2387	bool weakRef, uint32_t nameOffset, int64_t addend) {
2388	switch (format) {
2389	case DYLD_CHAINED_IMPORT: {
2390	auto import = reinterpret_cast<dyld_chained_import >(buf);
2391	import->lib_ordinal = libOrdinal;
2392	import->weak_import = weakRef;
2393	import->name_offset = nameOffset;
2394	return sizeof(dyld_chained_import);
2395	}
2396	case DYLD_CHAINED_IMPORT_ADDEND: {
2397	auto import = reinterpret_cast<dyld_chained_import_addend >(buf);
2398	import->lib_ordinal = libOrdinal;
2399	import->weak_import = weakRef;
2400	import->name_offset = nameOffset;
2401	import->addend = addend;
2402	return sizeof(dyld_chained_import_addend);
2403	}
2404	case DYLD_CHAINED_IMPORT_ADDEND64: {
2405	auto import = reinterpret_cast<dyld_chained_import_addend64 >(buf);
2406	import->lib_ordinal = libOrdinal;
2407	import->weak_import = weakRef;
2408	import->name_offset = nameOffset;
2409	import->addend = addend;
2410	return sizeof(dyld_chained_import_addend64);
2411	}
2412	default:
2413	llvm_unreachable("Unknown import format");
2414	}
2415	}
2416
2417	size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2418	assert(pageStarts.size() > `0` && "SegmentInfo for segment with no fixups?");
2419	return alignTo<`8`>(Value: sizeof(dyld_chained_starts_in_segment) +
2420	pageStarts.back().first * sizeof(uint16_t));
2421	}
2422
2423	size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t buf) const* {
2424	auto segInfo = reinterpret_cast<dyld_chained_starts_in_segment >(buf);
2425	segInfo->size = getSize();
2426	segInfo->page_size = target->getPageSize();
2427	// FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2428	segInfo->pointer_format = DYLD_CHAINED_PTR_64;
2429	segInfo->segment_offset = oseg->addr - in.header->addr;
2430	segInfo->max_valid_pointer = `0`; // not used on 64-bit
2431	segInfo->page_count = pageStarts.back().first + `1`;
2432
2433	uint16_t *starts = segInfo->page_start;
2434	for (size_t i = `0`; i < segInfo->page_count; ++i)
2435	starts[i] = DYLD_CHAINED_PTR_START_NONE;
2436
2437	for (auto [pageIdx, startAddr] : pageStarts)
2438	starts[pageIdx] = startAddr;
2439	return segInfo->size;
2440	}
2441
2442	static size_t importEntrySize(int format) {
2443	switch (format) {
2444	case DYLD_CHAINED_IMPORT:
2445	return sizeof(dyld_chained_import);
2446	case DYLD_CHAINED_IMPORT_ADDEND:
2447	return sizeof(dyld_chained_import_addend);
2448	case DYLD_CHAINED_IMPORT_ADDEND64:
2449	return sizeof(dyld_chained_import_addend64);
2450	default:
2451	llvm_unreachable("Unknown import format");
2452	}
2453	}
2454
2455	// This is step 3 of the algorithm described in the class comment of
2456	// ChainedFixupsSection.
2457	//
2458	// LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2459	// A dyld_chained_fixups_header*
2460	// A dyld_chained_starts_in_image*
2461	// One dyld_chained_starts_in_segment per segment*
2462	// List of all imports (dyld_chained_import, dyld_chained_import_addend, or*
2463	// dyld_chained_import_addend64)
2464	// Names of imported symbols*
2465	void ChainedFixupsSection::writeTo(uint8_t buf) const* {
2466	auto header = reinterpret_cast<dyld_chained_fixups_header >(buf);
2467	header->fixups_version = `0`;
2468	header->imports_count = bindings.size();
2469	header->imports_format = importFormat;
2470	header->symbols_format = `0`;
2471
2472	buf += alignTo<`8`>(Value: sizeof(*header));
2473
2474	auto curOffset = [&buf, &header]() -> uint32_t {
2475	return buf - reinterpret_cast<uint8_t *>(header);
2476	};
2477
2478	header->starts_offset = curOffset ();
2479
2480	auto imageInfo = reinterpret_cast<dyld_chained_starts_in_image >(buf);
2481	imageInfo->seg_count = outputSegments.size();
2482	uint32_t *segStarts = imageInfo->seg_info_offset;
2483
2484	// dyld_chained_starts_in_image ends in a flexible array member containing an
2485	// uint32_t for each segment. Leave room for it, and fill it via segStarts.
2486	buf += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2487	outputSegments.size() * sizeof(uint32_t));
2488
2489	// Initialize all offsets to 0, which indicates that the segment does not have
2490	// fixups. Those that do have them will be filled in below.
2491	for (size_t i = `0`; i < outputSegments.size(); ++i)
2492	segStarts[i] = `0`;
2493
2494	for (const SegmentInfo &seg : fixupSegments) {
2495	segStarts[seg.oseg->index] = curOffset () - header->starts_offset;
2496	buf += seg.writeTo(buf);
2497	}
2498
2499	// Write imports table.
2500	header->imports_offset = curOffset ();
2501	uint64_t nameOffset = `0`;
2502	for (auto [import, idx] : bindings) {
2503	const Symbol &sym = *import.first;
2504	buf += writeImport(buf, format: importFormat, libOrdinal: ordinalForSymbol(sym),
2505	weakRef: sym.isWeakRef(), nameOffset, addend: import.second);
2506	nameOffset += sym.getName().size() + `1`;
2507	}
2508
2509	// Write imported symbol names.
2510	header->symbols_offset = curOffset ();
2511	for (auto [import, idx] : bindings) {
2512	StringRef name = import.first->getName();
2513	memcpy(dest: buf, src: name.data(), n: name.size());
2514	buf += name.size() + `1`; // account for null terminator
2515	}
2516
2517	assert(curOffset() == getRawSize());
2518	}
2519
2520	// This is step 2 of the algorithm described in the class comment of
2521	// ChainedFixupsSection.
2522	void ChainedFixupsSection::finalizeContents() {
2523	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
2524	assert(config->emitChainedFixups);
2525
2526	if (!isUInt<`32`>(x: symtabSize))
2527	error(msg: "cannot encode chained fixups: imported symbols table size " +
2528	Twine(symtabSize) + " exceeds 4 GiB");
2529
2530	bool needsLargeOrdinal = any_of(Range&: bindings, P: [](const auto &p) {
2531	// 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2532	return ordinalForSymbol(*p.first.first) > `0xF0`;
2533	});
2534
2535	if (needsLargeAddend \|\| !isUInt<`23`>(x: symtabSize) \|\| needsLargeOrdinal)
2536	importFormat = DYLD_CHAINED_IMPORT_ADDEND64;
2537	else if (needsAddend)
2538	importFormat = DYLD_CHAINED_IMPORT_ADDEND;
2539	else
2540	importFormat = DYLD_CHAINED_IMPORT;
2541
2542	for (Location &loc : locations)
2543	loc.offset =
2544	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
2545
2546	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
2547	const OutputSegment *segA = a.isec->parent->parent;
2548	const OutputSegment *segB = b.isec->parent->parent;
2549	if (segA == segB)
2550	return a.offset < b.offset;
2551	return segA->addr < segB->addr;
2552	});
2553
2554	auto sameSegment = [](const Location &a, const Location &b) {
2555	return a.isec->parent->parent == b.isec->parent->parent;
2556	};
2557
2558	const uint64_t pageSize = target->getPageSize();
2559	for (size_t i = `0`, count = locations.size(); i < count;) {
2560	const Location &firstLoc = locations [i];
2561	fixupSegments.emplace_back(Args&: firstLoc.isec->parent->parent);
2562	while (i < count && sameSegment (locations [i], firstLoc)) {
2563	uint32_t pageIdx = locations [i].offset / pageSize;
2564	fixupSegments.back().pageStarts.emplace_back(
2565	Args&: pageIdx, Args: locations [i].offset % pageSize);
2566	++i;
2567	while (i < count && sameSegment (locations [i], firstLoc) &&
2568	locations [i].offset / pageSize == pageIdx)
2569	++i;
2570	}
2571	}
2572
2573	// Compute expected encoded size.
2574	size = alignTo<`8`>(Value: sizeof(dyld_chained_fixups_header));
2575	size += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2576	outputSegments.size() * sizeof(uint32_t));
2577	for (const SegmentInfo &seg : fixupSegments)
2578	size += seg.getSize();
2579	size += importEntrySize(format: importFormat) * bindings.size();
2580	size += symtabSize;
2581	}
2582
2583	template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
2584	template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);
2585

Browse the source code of llvm_projects/lld/MachO/SyntheticSections.cpp