SyntheticSections.cpp source code [llvm_projects/lld/MachO/SyntheticSections.cpp]

1	//===- SyntheticSections.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "SyntheticSections.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "ExportTrie.h"
13	#include "ICF.h"
14	#include "InputFiles.h"
15	#include "ObjC.h"
16	#include "OutputSegment.h"
17	#include "SectionPriorities.h"
18	#include "SymbolTable.h"
19	#include "Symbols.h"
20
21	#include "lld/Common/CommonLinkerContext.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/Config/llvm-config.h"
24	#include "llvm/Support/FileSystem.h"
25	#include "llvm/Support/LEB128.h"
26	#include "llvm/Support/Parallel.h"
27	#include "llvm/Support/xxhash.h"
28
29	#if defined(__APPLE__)
30	#include <sys/mman.h>
31
32	#define COMMON_DIGEST_FOR_OPENSSL
33	#include <CommonCrypto/CommonDigest.h>
34	#else
35	#include "llvm/Support/SHA256.h"
36	#endif
37
38	using namespace llvm;
39	using namespace llvm::MachO;
40	using namespace llvm::support;
41	using namespace llvm::support::endian;
42	using namespace lld;
43	using namespace lld::macho;
44
45	// Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
46	static void sha256(const uint8_t data, size_t len, uint8_t output) {
47	#if defined(__APPLE__)
48	// FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
49	// for some notes on this.
50	CC_SHA256(data, len, output);
51	#else
52	ArrayRef<uint8_t> block(data, len);
53	std::array<uint8_t, `32`> hash = SHA256::hash(Data: block);
54	static_assert(hash.size() == CodeSignatureSection::hashSize);
55	memcpy(dest: output, src: hash.data(), n: hash.size());
56	#endif
57	}
58
59	InStruct macho::in;
60	std::vector<SyntheticSection *> macho::syntheticSections;
61
62	SyntheticSection::SyntheticSection(const char segname, const* char *name)
63	: OutputSection (SyntheticKind, name) {
64	std::tie(args&: this->segname, args&: this->name) = maybeRenameSection(key: {segname, name});
65	isec = makeSyntheticInputSection(segName: segname, sectName: name);
66	isec->parent = this;
67	syntheticSections.push_back(x: this);
68	}
69
70	// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
71	// from the beginning of the file (i.e. the header).
72	MachHeaderSection::MachHeaderSection()
73	: SyntheticSection (segment_names::text, section_names::header) {
74	// XXX: This is a hack. (See D97007)
75	// Setting the index to 1 to pretend that this section is the text
76	// section.
77	index = `1`;
78	isec->isFinal = true;
79	}
80
81	void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
82	loadCommands.push_back(x: lc);
83	sizeOfCmds += lc->getSize();
84	}
85
86	uint64_t MachHeaderSection::getSize() const {
87	uint64_t size = target->headerSize + sizeOfCmds + config ->headerPad;
88	// If we are emitting an encryptable binary, our load commands must have a
89	// separate (non-encrypted) page to themselves.
90	if (config ->emitEncryptionInfo)
91	size = alignToPowerOf2(Value: size, Align: target->getPageSize());
92	return size;
93	}
94
95	static uint32_t cpuSubtype() {
96	uint32_t subtype = target->cpuSubtype;
97
98	if (config ->outputType == MH_EXECUTE && !config ->staticLink &&
99	target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL &&
100	config ->platform() == PLATFORM_MACOS &&
101	config ->platformInfo.target.MinDeployment >= VersionTuple (`10`, `5`))
102	subtype \|= CPU_SUBTYPE_LIB64;
103
104	return subtype;
105	}
106
107	static bool hasWeakBinding() {
108	return config ->emitChainedFixups ? in.chainedFixups->hasWeakBinding()
109	: in.weakBinding->hasEntry();
110	}
111
112	static bool hasNonWeakDefinition() {
113	return config ->emitChainedFixups ? in.chainedFixups->hasNonWeakDefinition()
114	: in.weakBinding->hasNonWeakDefinition();
115	}
116
117	void MachHeaderSection::writeTo(uint8_t buf) const* {
118	auto hdr = reinterpret_cast<mach_header >(buf);
119	hdr->magic = target->magic;
120	hdr->cputype = target->cpuType;
121	hdr->cpusubtype = cpuSubtype();
122	hdr->filetype = config ->outputType;
123	hdr->ncmds = loadCommands.size();
124	hdr->sizeofcmds = sizeOfCmds;
125	hdr->flags = MH_DYLDLINK;
126
127	if (config ->namespaceKind == NamespaceKind::twolevel)
128	hdr->flags \|= MH_NOUNDEFS \| MH_TWOLEVEL;
129
130	if (config ->outputType == MH_DYLIB && !config ->hasReexports)
131	hdr->flags \|= MH_NO_REEXPORTED_DYLIBS;
132
133	if (config ->markDeadStrippableDylib)
134	hdr->flags \|= MH_DEAD_STRIPPABLE_DYLIB;
135
136	if (config ->outputType == MH_EXECUTE && config ->isPic)
137	hdr->flags \|= MH_PIE;
138
139	if (config ->outputType == MH_DYLIB && config ->applicationExtension)
140	hdr->flags \|= MH_APP_EXTENSION_SAFE;
141
142	if (in.exports->hasWeakSymbol \|\| hasNonWeakDefinition())
143	hdr->flags \|= MH_WEAK_DEFINES;
144
145	if (in.exports->hasWeakSymbol \|\| hasWeakBinding())
146	hdr->flags \|= MH_BINDS_TO_WEAK;
147
148	for (const OutputSegment *seg : outputSegments) {
149	for (const OutputSection *osec : seg->getSections()) {
150	if (isThreadLocalVariables(flags: osec->flags)) {
151	hdr->flags \|= MH_HAS_TLV_DESCRIPTORS;
152	break;
153	}
154	}
155	}
156
157	uint8_t p = reinterpret_cast<uint8_t >(hdr) + target->headerSize;
158	for (const LoadCommand *lc : loadCommands) {
159	lc->writeTo(buf: p);
160	p += lc->getSize();
161	}
162	}
163
164	PageZeroSection::PageZeroSection()
165	: SyntheticSection (segment_names::pageZero, section_names::pageZero) {}
166
167	RebaseSection::RebaseSection()
168	: LinkEditSection (segment_names::linkEdit, section_names::rebase) {}
169
170	namespace {
171	struct RebaseState {
172	uint64_t sequenceLength;
173	uint64_t skipLength;
174	};
175	} // namespace
176
177	static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
178	assert(incr != `0`);
179
180	if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
181	(incr % target->wordSize) == `0`) {
182	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED \|
183	(incr >> target->p2WordSize));
184	} else {
185	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
186	encodeULEB128(Value: incr, OS&: os);
187	}
188	}
189
190	static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
191	assert(state.sequenceLength > `0`);
192
193	if (state.skipLength == target->wordSize) {
194	if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
195	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES \|
196	state.sequenceLength);
197	} else {
198	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
199	encodeULEB128(Value: state.sequenceLength, OS&: os);
200	}
201	} else if (state.sequenceLength == `1`) {
202	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
203	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
204	} else {
205	os << static_cast<uint8_t>(
206	REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
207	encodeULEB128(Value: state.sequenceLength, OS&: os);
208	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
209	}
210	}
211
212	// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
213	// memory location at a specific address to be rebased and/or the address to be
214	// incremented.
215	//
216	// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
217	// one, encoding a series of evenly spaced addresses. This algorithm works by
218	// splitting up the sorted list of addresses into such chunks. If the locations
219	// are consecutive or the sequence consists of a single location, flushRebase
220	// will use a smaller, more specialized encoding.
221	static void encodeRebases(const OutputSegment *seg,
222	MutableArrayRef<Location> locations,
223	raw_svector_ostream &os) {
224	// dyld operates on segments. Translate section offsets into segment offsets.
225	for (Location &loc : locations)
226	loc.offset =
227	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
228	// The algorithm assumes that locations are unique.
229	Location *end =
230	llvm::unique(R&: locations, P: [](const Location &a, const Location &b) {
231	return a.offset == b.offset;
232	});
233	size_t count = end - locations.begin();
234
235	os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
236	seg->index);
237	assert(!locations.empty());
238	uint64_t offset = locations [`0`].offset;
239	encodeULEB128(Value: offset, OS&: os);
240
241	RebaseState state{.sequenceLength: `1`, .skipLength: target->wordSize};
242
243	for (size_t i = `1`; i < count; ++i) {
244	offset = locations [i].offset;
245
246	uint64_t skip = offset - locations [i - `1`].offset;
247	assert(skip != `0` && "duplicate locations should have been weeded out");
248
249	if (skip == state.skipLength) {
250	++state.sequenceLength;
251	} else if (state.sequenceLength == `1`) {
252	++state.sequenceLength;
253	state.skipLength = skip;
254	} else if (skip < state.skipLength) {
255	// The address is lower than what the rebase pointer would be if the last
256	// location would be part of a sequence. We start a new sequence from the
257	// previous location.
258	--state.sequenceLength;
259	flushRebase(state, os);
260
261	state.sequenceLength = `2`;
262	state.skipLength = skip;
263	} else {
264	// The address is at some positive offset from the rebase pointer. We
265	// start a new sequence which begins with the current location.
266	flushRebase(state, os);
267	emitIncrement(incr: skip - state.skipLength, os);
268	state.sequenceLength = `1`;
269	state.skipLength = target->wordSize;
270	}
271	}
272	flushRebase(state, os);
273	}
274
275	void RebaseSection::finalizeContents() {
276	if (locations.empty())
277	return;
278
279	raw_svector_ostream os{contents};
280	os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM \| REBASE_TYPE_POINTER);
281
282	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
283	return a.isec->getVA(off: a.offset) < b.isec->getVA(off: b.offset);
284	});
285
286	for (size_t i = `0`, count = locations.size(); i < count;) {
287	const OutputSegment *seg = locations [i].isec->parent->parent;
288	size_t j = i + `1`;
289	while (j < count && locations [j].isec->parent->parent == seg)
290	++j;
291	encodeRebases(seg, locations: {locations.data() + i, locations.data() + j}, os);
292	i = j;
293	}
294	os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
295	}
296
297	void RebaseSection::writeTo(uint8_t buf) const* {
298	memcpy(dest: buf, src: contents.data(), n: contents.size());
299	}
300
301	NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
302	const char *name)
303	: SyntheticSection (segname, name) {
304	align = target->wordSize;
305	}
306
307	void macho::addNonLazyBindingEntries(const Symbol *sym,
308	const InputSection *isec, uint64_t offset,
309	int64_t addend) {
310	if (config ->emitChainedFixups) {
311	if (needsBinding(sym))
312	in.chainedFixups->addBinding(dysym: sym, isec, offset, addend);
313	else if (isa<Defined>(Val: sym))
314	in.chainedFixups->addRebase(isec, offset);
315	else
316	llvm_unreachable("cannot bind to an undefined symbol");
317	return;
318	}
319
320	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
321	in.binding->addEntry(dysym, isec, offset, addend);
322	if (dysym->isWeakDef())
323	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
324	} else if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
325	in.rebase->addEntry(isec, offset);
326	if (defined->isExternalWeakDef())
327	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
328	else if (defined->interposable)
329	in.binding->addEntry(dysym: sym, isec, offset, addend);
330	} else {
331	// Undefined symbols are filtered out in scanRelocations(); we should never
332	// get here
333	llvm_unreachable("cannot bind to an undefined symbol");
334	}
335	}
336
337	void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
338	if (entries.insert(X: sym)) {
339	assert(!sym->isInGot());
340	sym->gotIndex = entries.size() - `1`;
341
342	addNonLazyBindingEntries(sym, isec, offset: sym->gotIndex * target->wordSize);
343	}
344	}
345
346	void macho::writeChainedRebase(uint8_t *buf, uint64_t targetVA) {
347	assert(config->emitChainedFixups);
348	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
349	auto rebase = reinterpret_cast<dyld_chained_ptr_64_rebase >(buf);
350	rebase->target = targetVA & `0xf'ffff'ffff`;
351	rebase->high8 = (targetVA >> `56`);
352	rebase->reserved = `0`;
353	rebase->next = `0`;
354	rebase->bind = `0`;
355
356	// The fixup format places a 64 GiB limit on the output's size.
357	// Should we handle this gracefully?
358	uint64_t encodedVA = rebase->target \| ((uint64_t)rebase->high8 << `56`);
359	if (encodedVA != targetVA)
360	error(msg: "rebase target address 0x" + Twine::utohexstr(Val: targetVA) +
361	" does not fit into chained fixup. Re-link with -no_fixup_chains");
362	}
363
364	static void writeChainedBind(uint8_t buf, const* Symbol *sym, int64_t addend) {
365	assert(config->emitChainedFixups);
366	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
367	auto bind = reinterpret_cast<dyld_chained_ptr_64_bind >(buf);
368	auto [ordinal, inlineAddend] = in.chainedFixups->getBinding(sym, addend);
369	bind->ordinal = ordinal;
370	bind->addend = inlineAddend;
371	bind->reserved = `0`;
372	bind->next = `0`;
373	bind->bind = `1`;
374	}
375
376	void macho::writeChainedFixup(uint8_t buf, const* Symbol *sym, int64_t addend) {
377	if (needsBinding(sym))
378	writeChainedBind(buf, sym, addend);
379	else
380	writeChainedRebase(buf, targetVA: sym->getVA() + addend);
381	}
382
383	void NonLazyPointerSectionBase::writeTo(uint8_t buf) const* {
384	if (config ->emitChainedFixups) {
385	for (const auto &[i, entry] : llvm::enumerate(First: entries))
386	writeChainedFixup(buf: &buf[i * target->wordSize], sym: entry, addend: `0`);
387	} else {
388	for (const auto &[i, entry] : llvm::enumerate(First: entries))
389	if (auto *defined = dyn_cast<Defined>(Val: entry))
390	write64le(P: &buf[i * target->wordSize], V: defined->getVA());
391	}
392	}
393
394	GotSection::GotSection()
395	: NonLazyPointerSectionBase (segment_names::data, section_names::got) {
396	flags = S_NON_LAZY_SYMBOL_POINTERS;
397	}
398
399	TlvPointerSection::TlvPointerSection()
400	: NonLazyPointerSectionBase (segment_names::data,
401	section_names::threadPtrs) {
402	flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
403	}
404
405	BindingSection::BindingSection()
406	: LinkEditSection (segment_names::linkEdit, section_names::binding) {}
407
408	namespace {
409	struct Binding {
410	OutputSegment segment = nullptr*;
411	uint64_t offset = `0`;
412	int64_t addend = `0`;
413	};
414	struct BindIR {
415	// Default value of 0xF0 is not valid opcode and should make the program
416	// scream instead of accidentally writing "valid" values.
417	uint8_t opcode = `0xF0`;
418	uint64_t data = `0`;
419	uint64_t consecutiveCount = `0`;
420	};
421	} // namespace
422
423	// Encode a sequence of opcodes that tell dyld to write the address of symbol +
424	// addend at osec->addr + outSecOff.
425	//
426	// The bind opcode "interpreter" remembers the values of each binding field, so
427	// we only need to encode the differences between bindings. Hence the use of
428	// lastBinding.
429	static void encodeBinding(const OutputSection *osec, uint64_t outSecOff,
430	int64_t addend, Binding &lastBinding,
431	std::vector<BindIR> &opcodes) {
432	OutputSegment *seg = osec->parent;
433	uint64_t offset = osec->getSegmentOffset() + outSecOff;
434	if (lastBinding.segment != seg) {
435	opcodes.push_back(
436	x: {.opcode: static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
437	seg->index),
438	.data: offset});
439	lastBinding.segment = seg;
440	lastBinding.offset = offset;
441	} else if (lastBinding.offset != offset) {
442	opcodes.push_back(x: {.opcode: BIND_OPCODE_ADD_ADDR_ULEB, .data: offset - lastBinding.offset});
443	lastBinding.offset = offset;
444	}
445
446	if (lastBinding.addend != addend) {
447	opcodes.push_back(
448	x: {.opcode: BIND_OPCODE_SET_ADDEND_SLEB, .data: static_cast<uint64_t>(addend)});
449	lastBinding.addend = addend;
450	}
451
452	opcodes.push_back(x: {.opcode: BIND_OPCODE_DO_BIND, .data: `0`});
453	// DO_BIND causes dyld to both perform the binding and increment the offset
454	lastBinding.offset += target->wordSize;
455	}
456
457	static void optimizeOpcodes(std::vector<BindIR> &opcodes) {
458	// Pass 1: Combine bind/add pairs
459	size_t i;
460	int pWrite = `0`;
461	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
462	if ((opcodes [i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) &&
463	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND)) {
464	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
465	opcodes [pWrite].data = opcodes [i].data;
466	++i;
467	} else {
468	opcodes [pWrite] = opcodes [i - `1`];
469	}
470	}
471	if (i == opcodes.size())
472	opcodes [pWrite] = opcodes [i - `1`];
473	opcodes.resize(new_size: pWrite + `1`);
474
475	// Pass 2: Compress two or more bind_add opcodes
476	pWrite = `0`;
477	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
478	if ((opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
479	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
480	(opcodes [i].data == opcodes [i - `1`].data)) {
481	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
482	opcodes [pWrite].consecutiveCount = `2`;
483	opcodes [pWrite].data = opcodes [i].data;
484	++i;
485	while (i < opcodes.size() &&
486	(opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
487	(opcodes [i].data == opcodes [i - `1`].data)) {
488	opcodes [pWrite].consecutiveCount++;
489	++i;
490	}
491	} else {
492	opcodes [pWrite] = opcodes [i - `1`];
493	}
494	}
495	if (i == opcodes.size())
496	opcodes [pWrite] = opcodes [i - `1`];
497	opcodes.resize(new_size: pWrite + `1`);
498
499	// Pass 3: Use immediate encodings
500	// Every binding is the size of one pointer. If the next binding is a
501	// multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
502	// opcode can be scaled by wordSize into a single byte and dyld will
503	// expand it to the correct address.
504	for (auto &p : opcodes) {
505	// It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
506	// but ld64 currently does this. This could be a potential bug, but
507	// for now, perform the same behavior to prevent mysterious bugs.
508	if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
509	((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) &&
510	((p.data % target->wordSize) == `0`)) {
511	p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
512	p.data /= target->wordSize;
513	}
514	}
515	}
516
517	static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) {
518	uint8_t opcode = op.opcode & BIND_OPCODE_MASK;
519	switch (opcode) {
520	case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
521	case BIND_OPCODE_ADD_ADDR_ULEB:
522	case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
523	os << op.opcode;
524	encodeULEB128(Value: op.data, OS&: os);
525	break;
526	case BIND_OPCODE_SET_ADDEND_SLEB:
527	os << op.opcode;
528	encodeSLEB128(Value: static_cast<int64_t>(op.data), OS&: os);
529	break;
530	case BIND_OPCODE_DO_BIND:
531	os << op.opcode;
532	break;
533	case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
534	os << op.opcode;
535	encodeULEB128(Value: op.consecutiveCount, OS&: os);
536	encodeULEB128(Value: op.data, OS&: os);
537	break;
538	case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
539	os << static_cast<uint8_t>(op.opcode \| op.data);
540	break;
541	default:
542	llvm_unreachable("cannot bind to an unrecognized symbol");
543	}
544	}
545
546	static bool needsWeakBind(const Symbol &sym) {
547	if (auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
548	return dysym->isWeakDef();
549	if (auto *defined = dyn_cast<Defined>(Val: &sym))
550	return defined->isExternalWeakDef();
551	return false;
552	}
553
554	// Non-weak bindings need to have their dylib ordinal encoded as well.
555	static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
556	if (config ->namespaceKind == NamespaceKind::flat \|\| dysym.isDynamicLookup())
557	return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP);
558	assert(dysym.getFile()->isReferenced());
559	return dysym.getFile()->ordinal;
560	}
561
562	static int16_t ordinalForSymbol(const Symbol &sym) {
563	if (config ->emitChainedFixups && needsWeakBind(sym))
564	return BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
565	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
566	return ordinalForDylibSymbol(dysym: *dysym);
567	assert(cast<Defined>(&sym)->interposable);
568	return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
569	}
570
571	static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) {
572	if (ordinal <= `0`) {
573	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM \|
574	(ordinal & BIND_IMMEDIATE_MASK));
575	} else if (ordinal <= BIND_IMMEDIATE_MASK) {
576	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM \| ordinal);
577	} else {
578	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
579	encodeULEB128(Value: ordinal, OS&: os);
580	}
581	}
582
583	static void encodeWeakOverride(const Defined *defined,
584	raw_svector_ostream &os) {
585	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM \|
586	BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
587	<< defined->getName() << `'\0'`;
588	}
589
590	// Organize the bindings so we can encoded them with fewer opcodes.
591	//
592	// First, all bindings for a given symbol should be grouped together.
593	// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
594	// has an associated symbol string), so we only want to emit it once per symbol.
595	//
596	// Within each group, we sort the bindings by address. Since bindings are
597	// delta-encoded, sorting them allows for a more compact result. Note that
598	// sorting by address alone ensures that bindings for the same segment / section
599	// are located together, minimizing the number of times we have to emit
600	// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
601	//
602	// Finally, we sort the symbols by the address of their first binding, again
603	// to facilitate the delta-encoding process.
604	template <class Sym>
605	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>>
606	sortBindings(const BindingsMap<const Sym *> &bindingsMap) {
607	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec(
608	bindingsMap.begin(), bindingsMap.end());
609	for (auto &p : bindingsVec) {
610	std::vector<BindingEntry> &bindings = p.second;
611	llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
612	return a.target.getVA() < b.target.getVA();
613	});
614	}
615	llvm::sort(bindingsVec, [](const auto &a, const auto &b) {
616	return a.second[`0`].target.getVA() < b.second[`0`].target.getVA();
617	});
618	return bindingsVec;
619	}
620
621	// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
622	// interprets to update a record with the following fields:
623	// segment index (of the segment to write the symbol addresses to, typically*
624	// the __DATA_CONST segment which contains the GOT)
625	// offset within the segment, indicating the next location to write a binding*
626	// symbol type*
627	// symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)*
628	// symbol name*
629	// addend*
630	// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
631	// a symbol in the GOT, and increments the segment offset to point to the next
632	// entry. It does not* clear the record state after doing the bind, so*
633	// subsequent opcodes only need to encode the differences between bindings.
634	void BindingSection::finalizeContents() {
635	raw_svector_ostream os{contents};
636	Binding lastBinding;
637	int16_t lastOrdinal = `0`;
638
639	for (auto &p : sortBindings(bindingsMap)) {
640	const Symbol *sym = p.first;
641	std::vector<BindingEntry> &bindings = p.second;
642	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
643	if (sym->isWeakRef())
644	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
645	os << flags << sym->getName() << `'\0'`
646	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
647	int16_t ordinal = ordinalForSymbol(sym: *sym);
648	if (ordinal != lastOrdinal) {
649	encodeDylibOrdinal(ordinal, os);
650	lastOrdinal = ordinal;
651	}
652	std::vector<BindIR> opcodes;
653	for (const BindingEntry &b : bindings)
654	encodeBinding(osec: b.target.isec->parent,
655	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
656	lastBinding, opcodes);
657	if (config ->optimize > `1`)
658	optimizeOpcodes(opcodes);
659	for (const auto &op : opcodes)
660	flushOpcodes(op, os);
661	}
662	if (!bindingsMap.empty())
663	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
664	}
665
666	void BindingSection::writeTo(uint8_t buf) const* {
667	memcpy(dest: buf, src: contents.data(), n: contents.size());
668	}
669
670	WeakBindingSection::WeakBindingSection()
671	: LinkEditSection (segment_names::linkEdit, section_names::weakBinding) {}
672
673	void WeakBindingSection::finalizeContents() {
674	raw_svector_ostream os{contents};
675	Binding lastBinding;
676
677	for (const Defined *defined : definitions)
678	encodeWeakOverride(defined, os);
679
680	for (auto &p : sortBindings(bindingsMap)) {
681	const Symbol *sym = p.first;
682	std::vector<BindingEntry> &bindings = p.second;
683	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
684	<< sym->getName() << `'\0'`
685	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
686	std::vector<BindIR> opcodes;
687	for (const BindingEntry &b : bindings)
688	encodeBinding(osec: b.target.isec->parent,
689	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
690	lastBinding, opcodes);
691	if (config ->optimize > `1`)
692	optimizeOpcodes(opcodes);
693	for (const auto &op : opcodes)
694	flushOpcodes(op, os);
695	}
696	if (!bindingsMap.empty() \|\| !definitions.empty())
697	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
698	}
699
700	void WeakBindingSection::writeTo(uint8_t buf) const* {
701	memcpy(dest: buf, src: contents.data(), n: contents.size());
702	}
703
704	StubsSection::StubsSection()
705	: SyntheticSection (segment_names::text, section_names::stubs) {
706	flags = S_SYMBOL_STUBS \| S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
707	// The stubs section comprises machine instructions, which are aligned to
708	// 4 bytes on the archs we care about.
709	align = `4`;
710	reserved2 = target->stubSize;
711	}
712
713	uint64_t StubsSection::getSize() const {
714	return entries.size() * target->stubSize;
715	}
716
717	void StubsSection::writeTo(uint8_t buf) const* {
718	size_t off = `0`;
719	for (const Symbol *sym : entries) {
720	uint64_t pointerVA =
721	config ->emitChainedFixups ? sym->getGotVA() : sym->getLazyPtrVA();
722	target->writeStub(buf: buf + off, *sym, pointerVA);
723	off += target->stubSize;
724	}
725	}
726
727	void StubsSection::finalize() { isFinal = true; }
728
729	static void addBindingsForStub(Symbol *sym) {
730	assert(!config->emitChainedFixups);
731	if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
732	if (sym->isWeakDef()) {
733	in.binding->addEntry(dysym, isec: in.lazyPointers->isec,
734	offset: sym->stubsIndex * target->wordSize);
735	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
736	offset: sym->stubsIndex * target->wordSize);
737	} else {
738	in.lazyBinding->addEntry(dysym);
739	}
740	} else if (auto *defined = dyn_cast<Defined>(Val: sym)) {
741	if (defined->isExternalWeakDef()) {
742	in.rebase->addEntry(isec: in.lazyPointers->isec,
743	offset: sym->stubsIndex * target->wordSize);
744	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
745	offset: sym->stubsIndex * target->wordSize);
746	} else if (defined->interposable) {
747	in.lazyBinding->addEntry(dysym: sym);
748	} else {
749	llvm_unreachable("invalid stub target");
750	}
751	} else {
752	llvm_unreachable("invalid stub target symbol type");
753	}
754	}
755
756	void StubsSection::addEntry(Symbol *sym) {
757	bool inserted = entries.insert(X: sym);
758	if (inserted) {
759	sym->stubsIndex = entries.size() - `1`;
760
761	if (config ->emitChainedFixups)
762	in.got->addEntry(sym);
763	else
764	addBindingsForStub(sym);
765	}
766	}
767
768	StubHelperSection::StubHelperSection()
769	: SyntheticSection (segment_names::text, section_names::stubHelper) {
770	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
771	align = `4`; // This section comprises machine instructions
772	}
773
774	uint64_t StubHelperSection::getSize() const {
775	return target->stubHelperHeaderSize +
776	in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
777	}
778
779	bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
780
781	void StubHelperSection::writeTo(uint8_t buf) const* {
782	target->writeStubHelperHeader(buf);
783	size_t off = target->stubHelperHeaderSize;
784	for (const Symbol *sym : in.lazyBinding->getEntries()) {
785	target->writeStubHelperEntry(buf: buf + off, *sym, entryAddr: addr + off);
786	off += target->stubHelperEntrySize;
787	}
788	}
789
790	void StubHelperSection::setUp() {
791	Symbol binder = symtab ->addUndefined(name: "dyld_stub_binder", /file=/*nullptr,
792	/isWeakRef=/false);
793	if (auto *undefined = dyn_cast<Undefined>(Val: binder))
794	treatUndefinedSymbol(*undefined,
795	source: "lazy binding (normally in libSystem.dylib)");
796
797	// treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
798	stubBinder = dyn_cast_or_null<DylibSymbol>(Val: binder);
799	if (stubBinder == nullptr)
800	return;
801
802	in.got->addEntry(sym: stubBinder);
803
804	in.imageLoaderCache->parent =
805	ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
806	addInputSection(inputSection: in.imageLoaderCache);
807	// Since this isn't in the symbol table or in any input file, the noDeadStrip
808	// argument doesn't matter.
809	dyldPrivate =
810	make<Defined>(args: "__dyld_private", args: nullptr, args&: in.imageLoaderCache, args: `0`, args: `0`,
811	/isWeakDef=/args: false,
812	/isExternal=/args: false, /isPrivateExtern=/args: false,
813	/includeInSymtab=/args: true,
814	/isReferencedDynamically=/args: false,
815	/noDeadStrip=/args: false);
816	dyldPrivate->used = true;
817	}
818
819	llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
820	ObjCSelRefsHelper::methnameToSelref;
821	void ObjCSelRefsHelper::initialize() {
822	// Do not fold selrefs without ICF.
823	if (config ->icfLevel == ICFLevel::none)
824	return;
825
826	// Search methnames already referenced in __objc_selrefs
827	// Map the name to the corresponding selref entry
828	// which we will reuse when creating objc stubs.
829	for (ConcatInputSection *isec : inputSections) {
830	if (isec->shouldOmitFromOutput())
831	continue;
832	if (isec->getName() != section_names::objcSelrefs)
833	continue;
834	// We expect a single relocation per selref entry to __objc_methname that
835	// might be aggregated.
836	assert(isec->relocs.size() == `1`);
837	auto Reloc = isec->relocs [`0`];
838	if (const auto sym = Reloc.referent.dyn_cast<Symbol >()) {
839	if (const auto *d = dyn_cast<Defined>(Val: sym)) {
840	auto *cisec = cast<CStringInputSection>(Val: d->isec());
841	auto methname = cisec->getStringRefAtOffset(off: d->value);
842	methnameToSelref [CachedHashStringRef (methname)] = isec;
843	}
844	}
845	}
846	}
847
848	void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); }
849
850	ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) {
851	auto methnameOffset =
852	in.objcMethnameSection->getStringOffset(str: methname).outSecOff;
853
854	size_t wordSize = target->wordSize;
855	uint8_t *selrefData = bAlloc().Allocate<uint8_t>(Num: wordSize);
856	write64le(P: selrefData, V: methnameOffset);
857	ConcatInputSection *objcSelref =
858	makeSyntheticInputSection(segName: segment_names::data, sectName: section_names::objcSelrefs,
859	flags: S_LITERAL_POINTERS \| S_ATTR_NO_DEAD_STRIP,
860	data: ArrayRef<uint8_t>{selrefData, wordSize},
861	/align=/wordSize);
862	assert(objcSelref->live);
863	objcSelref->relocs.push_back(x: {/type=/target->unsignedRelocType,
864	/pcrel=/false, /length=/`3`,
865	/offset=/`0`,
866	/addend=/static_cast<int64_t>(methnameOffset),
867	/referent=/in.objcMethnameSection->isec});
868	objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
869	addInputSection(inputSection: objcSelref);
870	objcSelref->isFinal = true;
871	methnameToSelref [CachedHashStringRef (methname)] = objcSelref;
872	return objcSelref;
873	}
874
875	ConcatInputSection *ObjCSelRefsHelper::getSelRef(StringRef methname) {
876	auto it = methnameToSelref.find(Val: CachedHashStringRef (methname));
877	if (it == methnameToSelref.end())
878	return nullptr;
879	return it ->second;
880	}
881
882	ObjCStubsSection::ObjCStubsSection()
883	: SyntheticSection (segment_names::text, section_names::objcStubs) {
884	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
885	align = config ->objcStubsMode == ObjCStubsMode::fast
886	? target->objcStubsFastAlignment
887	: target->objcStubsSmallAlignment;
888	}
889
890	bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
891	return sym->getName().starts_with(Prefix: symbolPrefix);
892	}
893
894	StringRef ObjCStubsSection::getMethname(Symbol *sym) {
895	assert(isObjCStubSymbol(sym) && "not an objc stub");
896	auto name = sym->getName();
897	StringRef methname = name.drop_front(N: symbolPrefix.size());
898	return methname;
899	}
900
901	void ObjCStubsSection::addEntry(Symbol *sym) {
902	StringRef methname = getMethname(sym);
903	// We create a selref entry for each unique methname.
904	if (!ObjCSelRefsHelper::getSelRef(methname))
905	ObjCSelRefsHelper::makeSelRef(methname);
906
907	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
908	? target->objcStubsFastSize
909	: target->objcStubsSmallSize;
910	Defined *newSym = replaceSymbol<Defined>(
911	s: sym, arg: sym->getName(), arg: nullptr, arg&: isec,
912	/value=/arg: symbols.size() * stubSize,
913	/size=/arg&: stubSize,
914	/isWeakDef=/arg: false, /isExternal=/arg: true, /isPrivateExtern=/arg: true,
915	/includeInSymtab=/arg: true, /isReferencedDynamically=/arg: false,
916	/noDeadStrip=/arg: false);
917	symbols.push_back(x: newSym);
918	}
919
920	void ObjCStubsSection::setUp() {
921	objcMsgSend = symtab ->addUndefined(name: "_objc_msgSend", /file=/nullptr,
922	/isWeakRef=/false);
923	if (auto *undefined = dyn_cast<Undefined>(Val: objcMsgSend))
924	treatUndefinedSymbol(*undefined,
925	source: "lazy binding (normally in libobjc.dylib)");
926	objcMsgSend->used = true;
927	if (config ->objcStubsMode == ObjCStubsMode::fast) {
928	in.got->addEntry(sym: objcMsgSend);
929	assert(objcMsgSend->isInGot());
930	} else {
931	assert(config->objcStubsMode == ObjCStubsMode::small);
932	// In line with ld64's behavior, when objc_msgSend is a direct symbol,
933	// we directly reference it.
934	// In other cases, typically when binding in libobjc.dylib,
935	// we generate a stub to invoke objc_msgSend.
936	if (!isa<Defined>(Val: objcMsgSend))
937	in.stubs->addEntry(sym: objcMsgSend);
938	}
939	}
940
941	uint64_t ObjCStubsSection::getSize() const {
942	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
943	? target->objcStubsFastSize
944	: target->objcStubsSmallSize;
945	return stubSize * symbols.size();
946	}
947
948	void ObjCStubsSection::writeTo(uint8_t buf) const* {
949	uint64_t stubOffset = `0`;
950	for (Defined *sym : symbols) {
951	auto methname = getMethname(sym);
952	InputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
953	assert(selRef != nullptr && "no selref for methname");
954	auto selrefAddr = selRef->getVA(off: `0`);
955	target->writeObjCMsgSendStub(buf: buf + stubOffset, sym, stubsAddr: in.objcStubs->addr,
956	stubOffset, selrefVA: selrefAddr, objcMsgSend);
957	}
958	}
959
960	LazyPointerSection::LazyPointerSection()
961	: SyntheticSection (segment_names::data, section_names::lazySymbolPtr) {
962	align = target->wordSize;
963	flags = S_LAZY_SYMBOL_POINTERS;
964	}
965
966	uint64_t LazyPointerSection::getSize() const {
967	return in.stubs->getEntries().size() * target->wordSize;
968	}
969
970	bool LazyPointerSection::isNeeded() const {
971	return !in.stubs->getEntries().empty();
972	}
973
974	void LazyPointerSection::writeTo(uint8_t buf) const* {
975	size_t off = `0`;
976	for (const Symbol *sym : in.stubs->getEntries()) {
977	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
978	if (dysym->hasStubsHelper()) {
979	uint64_t stubHelperOffset =
980	target->stubHelperHeaderSize +
981	dysym->stubsHelperIndex * target->stubHelperEntrySize;
982	write64le(P: buf + off, V: in.stubHelper->addr + stubHelperOffset);
983	}
984	} else {
985	write64le(P: buf + off, V: sym->getVA());
986	}
987	off += target->wordSize;
988	}
989	}
990
991	LazyBindingSection::LazyBindingSection()
992	: LinkEditSection (segment_names::linkEdit, section_names::lazyBinding) {}
993
994	void LazyBindingSection::finalizeContents() {
995	// TODO: Just precompute output size here instead of writing to a temporary
996	// buffer
997	for (Symbol *sym : entries)
998	sym->lazyBindOffset = encode(*sym);
999	}
1000
1001	void LazyBindingSection::writeTo(uint8_t buf) const* {
1002	memcpy(dest: buf, src: contents.data(), n: contents.size());
1003	}
1004
1005	void LazyBindingSection::addEntry(Symbol *sym) {
1006	assert(!config->emitChainedFixups && "Chained fixups always bind eagerly");
1007	if (entries.insert(X: sym)) {
1008	sym->stubsHelperIndex = entries.size() - `1`;
1009	in.rebase->addEntry(isec: in.lazyPointers->isec,
1010	offset: sym->stubsIndex * target->wordSize);
1011	}
1012	}
1013
1014	// Unlike the non-lazy binding section, the bind opcodes in this section aren't
1015	// interpreted all at once. Rather, dyld will start interpreting opcodes at a
1016	// given offset, typically only binding a single symbol before it finds a
1017	// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1018	// we cannot encode just the differences between symbols; we have to emit the
1019	// complete bind information for each symbol.
1020	uint32_t LazyBindingSection::encode(const Symbol &sym) {
1021	uint32_t opstreamOffset = contents.size();
1022	OutputSegment *dataSeg = in.lazyPointers->parent;
1023	os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
1024	dataSeg->index);
1025	uint64_t offset =
1026	in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize;
1027	encodeULEB128(Value: offset, OS&: os);
1028	encodeDylibOrdinal(ordinal: ordinalForSymbol(sym), os);
1029
1030	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
1031	if (sym.isWeakRef())
1032	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
1033
1034	os << flags << sym.getName() << `'\0'`
1035	<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND)
1036	<< static_cast<uint8_t>(BIND_OPCODE_DONE);
1037	return opstreamOffset;
1038	}
1039
1040	ExportSection::ExportSection()
1041	: LinkEditSection (segment_names::linkEdit, section_names::export_) {}
1042
1043	void ExportSection::finalizeContents() {
1044	trieBuilder.setImageBase(in.header->addr);
1045	for (const Symbol *sym : symtab ->getSymbols()) {
1046	if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
1047	if (defined->privateExtern \|\| !defined->isLive())
1048	continue;
1049	trieBuilder.addSymbol(sym: *defined);
1050	hasWeakSymbol = hasWeakSymbol \|\| sym->isWeakDef();
1051	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1052	if (dysym->shouldReexport)
1053	trieBuilder.addSymbol(sym: *dysym);
1054	}
1055	}
1056	size = trieBuilder.build();
1057	}
1058
1059	void ExportSection::writeTo(uint8_t buf) const* { trieBuilder.writeTo(buf); }
1060
1061	DataInCodeSection::DataInCodeSection()
1062	: LinkEditSection (segment_names::linkEdit, section_names::dataInCode) {}
1063
1064	template <class LP>
1065	static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
1066	std::vector<MachO::data_in_code_entry> dataInCodeEntries;
1067	for (const InputFile *inputFile : inputFiles) {
1068	if (!isa<ObjFile>(Val: inputFile))
1069	continue;
1070	const ObjFile *objFile = cast<ObjFile>(Val: inputFile);
1071	ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode();
1072	if (entries.empty())
1073	continue;
1074
1075	std::vector<MachO::data_in_code_entry> sortedEntries;
1076	sortedEntries.assign(first: entries.begin(), last: entries.end());
1077	llvm::sort(sortedEntries, [](const data_in_code_entry &lhs,
1078	const data_in_code_entry &rhs) {
1079	return lhs.offset < rhs.offset;
1080	});
1081
1082	// For each code subsection find 'data in code' entries residing in it.
1083	// Compute the new offset values as
1084	// <offset within subsection> + <subsection address> - <__TEXT address>.
1085	for (const Section *section : objFile->sections) {
1086	for (const Subsection &subsec : section->subsections) {
1087	const InputSection *isec = subsec.isec;
1088	if (!isCodeSection(isec))
1089	continue;
1090	if (cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput())
1091	continue;
1092	const uint64_t beginAddr = section->addr + subsec.offset;
1093	auto it = llvm::lower_bound(
1094	sortedEntries, beginAddr,
1095	[](const MachO::data_in_code_entry &entry, uint64_t addr) {
1096	return entry.offset < addr;
1097	});
1098	const uint64_t endAddr = beginAddr + isec->getSize();
1099	for (const auto end = sortedEntries.end();
1100	it != end && it->offset + it->length <= endAddr; ++it)
1101	dataInCodeEntries.push_back(
1102	{static_cast<uint32_t>(isec->getVA(off: it->offset - beginAddr) -
1103	in.header->addr),
1104	it->length, it->kind});
1105	}
1106	}
1107	}
1108
1109	// ld64 emits the table in sorted order too.
1110	llvm::sort(dataInCodeEntries,
1111	[](const data_in_code_entry &lhs, const data_in_code_entry &rhs) {
1112	return lhs.offset < rhs.offset;
1113	});
1114	return dataInCodeEntries;
1115	}
1116
1117	void DataInCodeSection::finalizeContents() {
1118	entries = target->wordSize == `8` ? collectDataInCodeEntries<LP64>()
1119	: collectDataInCodeEntries<ILP32>();
1120	}
1121
1122	void DataInCodeSection::writeTo(uint8_t buf) const* {
1123	if (!entries.empty())
1124	memcpy(dest: buf, src: entries.data(), n: getRawSize());
1125	}
1126
1127	FunctionStartsSection::FunctionStartsSection()
1128	: LinkEditSection (segment_names::linkEdit, section_names::functionStarts) {}
1129
1130	void FunctionStartsSection::finalizeContents() {
1131	raw_svector_ostream os{contents};
1132	std::vector<uint64_t> addrs;
1133	for (const InputFile *file : inputFiles) {
1134	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1135	for (const Symbol *sym : objFile->symbols) {
1136	if (const auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1137	if (!defined->isec() \|\| !isCodeSection(defined->isec()) \|\|
1138	!defined->isLive())
1139	continue;
1140	addrs.push_back(x: defined->getVA());
1141	}
1142	}
1143	}
1144	}
1145	llvm::sort(C&: addrs);
1146	uint64_t addr = in.header->addr;
1147	for (uint64_t nextAddr : addrs) {
1148	uint64_t delta = nextAddr - addr;
1149	if (delta == `0`)
1150	continue;
1151	encodeULEB128(Value: delta, OS&: os);
1152	addr = nextAddr;
1153	}
1154	os << `'\0'`;
1155	}
1156
1157	void FunctionStartsSection::writeTo(uint8_t buf) const* {
1158	memcpy(dest: buf, src: contents.data(), n: contents.size());
1159	}
1160
1161	SymtabSection::SymtabSection(StringTableSection &stringTableSection)
1162	: LinkEditSection (segment_names::linkEdit, section_names::symbolTable),
1163	stringTableSection(stringTableSection) {}
1164
1165	void SymtabSection::emitBeginSourceStab(StringRef sourceFile) {
1166	StabsEntry stab(N_SO);
1167	stab.strx = stringTableSection.addString(saver().save(S: sourceFile));
1168	stabs.emplace_back(args: std::move(stab));
1169	}
1170
1171	void SymtabSection::emitEndSourceStab() {
1172	StabsEntry stab(N_SO);
1173	stab.sect = `1`;
1174	stabs.emplace_back(args: std::move(stab));
1175	}
1176
1177	void SymtabSection::emitObjectFileStab(ObjFile *file) {
1178	StabsEntry stab(N_OSO);
1179	stab.sect = target->cpuSubtype;
1180	SmallString<`261`> path(!file->archiveName.empty() ? file->archiveName
1181	: file->getName());
1182	std::error_code ec = sys::fs::make_absolute(path);
1183	if (ec)
1184	fatal(msg: "failed to get absolute path for " + path);
1185
1186	if (!file->archiveName.empty())
1187	path.append(Refs: {"(", file->getName(), ")"});
1188
1189	StringRef adjustedPath = saver().save(S: path.str());
1190	adjustedPath.consume_front(Prefix: config ->osoPrefix);
1191
1192	stab.strx = stringTableSection.addString(adjustedPath);
1193	stab.desc = `1`;
1194	stab.value = file->modTime;
1195	stabs.emplace_back(args: std::move(stab));
1196	}
1197
1198	void SymtabSection::emitEndFunStab(Defined *defined) {
1199	StabsEntry stab(N_FUN);
1200	stab.value = defined->size;
1201	stabs.emplace_back(args: std::move(stab));
1202	}
1203
1204	void SymtabSection::emitStabs() {
1205	if (config ->omitDebugInfo)
1206	return;
1207
1208	for (const std::string &s : config ->astPaths) {
1209	StabsEntry astStab(N_AST);
1210	astStab.strx = stringTableSection.addString(s);
1211	stabs.emplace_back(args: std::move(astStab));
1212	}
1213
1214	// Cache the file ID for each symbol in an std::pair for faster sorting.
1215	using SortingPair = std::pair<Defined , int*>;
1216	std::vector<SortingPair> symbolsNeedingStabs;
1217	for (const SymtabEntry &entry :
1218	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols)) {
1219	Symbol *sym = entry.sym;
1220	assert(sym->isLive() &&
1221	"dead symbols should not be in localSymbols, externalSymbols");
1222	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1223	// Excluded symbols should have been filtered out in finalizeContents().
1224	assert(defined->includeInSymtab);
1225
1226	if (defined->isAbsolute())
1227	continue;
1228
1229	// Constant-folded symbols go in the executable's symbol table, but don't
1230	// get a stabs entry unless --keep-icf-stabs flag is specified.
1231	if (!config ->keepICFStabs &&
1232	defined->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
1233	continue;
1234
1235	ObjFile *file = defined->getObjectFile();
1236	if (!file \|\| !file->compileUnit)
1237	continue;
1238
1239	// We use the symbol's original InputSection to get the file id,
1240	// even for ICF folded symbols, to ensure STABS entries point to the
1241	// correct object file where the symbol was originally defined
1242	symbolsNeedingStabs.emplace_back(args&: defined,
1243	args: defined->originalIsec->getFile()->id);
1244	}
1245	}
1246
1247	llvm::stable_sort(Range&: symbolsNeedingStabs, C: llvm::less_second ());
1248
1249	// Emit STABS symbols so that dsymutil and/or the debugger can map address
1250	// regions in the final binary to the source and object files from which they
1251	// originated.
1252	InputFile lastFile = nullptr*;
1253	for (SortingPair &pair : symbolsNeedingStabs) {
1254	Defined *defined = pair.first;
1255	// When emitting STABS entries for a symbol, always use the original
1256	// InputSection of the defined symbol, not the section of the function body
1257	// (which might be a different function entirely if ICF folded this
1258	// function). This ensures STABS entries point back to the original object
1259	// file.
1260	InputSection *isec = defined->originalIsec;
1261	ObjFile *file = cast<ObjFile>(Val: isec->getFile());
1262
1263	if (lastFile == nullptr \|\| lastFile != file) {
1264	if (lastFile != nullptr)
1265	emitEndSourceStab();
1266	lastFile = file;
1267
1268	emitBeginSourceStab(sourceFile: file->sourceFile());
1269	emitObjectFileStab(file);
1270	}
1271
1272	StabsEntry symStab;
1273	symStab.sect = isec->parent->index;
1274	symStab.strx = stringTableSection.addString(defined->getName());
1275
1276	// When using --keep-icf-stabs, we need to use the VA of the actual function
1277	// body that the linker will place in the binary. This is the function that
1278	// the symbol refers to after ICF folding.
1279	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1280	// For thunks, we need to get the function they point to
1281	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1282	symStab.value = target->getVA();
1283	} else {
1284	symStab.value = defined->getVA();
1285	}
1286
1287	if (isCodeSection(isec)) {
1288	symStab.type = N_FUN;
1289	stabs.emplace_back(args: std::move(symStab));
1290	// For the end function marker in STABS, we need to use the size of the
1291	// actual function body that exists in the output binary
1292	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1293	// For thunks, we use the target's size
1294	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1295	emitEndFunStab(defined: target);
1296	} else {
1297	emitEndFunStab(defined);
1298	}
1299	} else {
1300	symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
1301	stabs.emplace_back(args: std::move(symStab));
1302	}
1303	}
1304
1305	if (!stabs.empty())
1306	emitEndSourceStab();
1307	}
1308
1309	void SymtabSection::finalizeContents() {
1310	auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
1311	uint32_t strx = stringTableSection.addString(sym->getName());
1312	symbols.push_back(x: {.sym: sym, .strx: strx});
1313	};
1314
1315	std::function<void(Symbol *)> localSymbolsHandler;
1316	switch (config ->localSymbolsPresence) {
1317	case SymtabPresence::All:
1318	localSymbolsHandler = [&](Symbol *sym) { addSymbol (localSymbols, sym); };
1319	break;
1320	case SymtabPresence::None:
1321	localSymbolsHandler = [&](Symbol ) { /* Do nothing/ };
1322	break;
1323	case SymtabPresence::SelectivelyIncluded:
1324	localSymbolsHandler = [&](Symbol *sym) {
1325	if (config ->localSymbolPatterns.match(symbolName: sym->getName()))
1326	addSymbol (localSymbols, sym);
1327	};
1328	break;
1329	case SymtabPresence::SelectivelyExcluded:
1330	localSymbolsHandler = [&](Symbol *sym) {
1331	if (!config ->localSymbolPatterns.match(symbolName: sym->getName()))
1332	addSymbol (localSymbols, sym);
1333	};
1334	break;
1335	}
1336
1337	// Local symbols aren't in the SymbolTable, so we walk the list of object
1338	// files to gather them.
1339	// But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1340	// the right thing regardless, but this check is a perf optimization because
1341	// iterating through all the input files and their symbols is expensive.
1342	if (config ->localSymbolsPresence != SymtabPresence::None) {
1343	for (const InputFile *file : inputFiles) {
1344	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1345	for (Symbol *sym : objFile->symbols) {
1346	if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1347	if (defined->isExternal() \|\| !defined->isLive() \|\|
1348	!defined->includeInSymtab)
1349	continue;
1350	localSymbolsHandler (sym);
1351	}
1352	}
1353	}
1354	}
1355	}
1356
1357	// __dyld_private is a local symbol too. It's linker-created and doesn't
1358	// exist in any object file.
1359	if (in.stubHelper && in.stubHelper->dyldPrivate)
1360	localSymbolsHandler (in.stubHelper->dyldPrivate);
1361
1362	for (Symbol *sym : symtab ->getSymbols()) {
1363	if (!sym->isLive())
1364	continue;
1365	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1366	if (!defined->includeInSymtab)
1367	continue;
1368	assert(defined->isExternal());
1369	if (defined->privateExtern)
1370	localSymbolsHandler (defined);
1371	else
1372	addSymbol (externalSymbols, defined);
1373	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1374	if (dysym->isReferenced())
1375	addSymbol (undefinedSymbols, sym);
1376	}
1377	}
1378
1379	emitStabs();
1380	uint32_t symtabIndex = stabs.size();
1381	for (const SymtabEntry &entry :
1382	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols, Ranges&: undefinedSymbols)) {
1383	entry.sym->symtabIndex = symtabIndex++;
1384	}
1385	}
1386
1387	uint32_t SymtabSection::getNumSymbols() const {
1388	return stabs.size() + localSymbols.size() + externalSymbols.size() +
1389	undefinedSymbols.size();
1390	}
1391
1392	// This serves to hide (type-erase) the template parameter from SymtabSection.
1393	template <class LP> class SymtabSectionImpl final : public SymtabSection {
1394	public:
1395	SymtabSectionImpl(StringTableSection &stringTableSection)
1396	: SymtabSection(stringTableSection) {}
1397	uint64_t getRawSize() const override;
1398	void writeTo(uint8_t buf) const* override;
1399	};
1400
1401	template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
1402	return getNumSymbols() * sizeof(typename LP::nlist);
1403	}
1404
1405	template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t buf) const* {
1406	auto nList = reinterpret_cast<typename* LP::nlist *>(buf);
1407	// Emit the stabs entries before the "real" symbols. We cannot emit them
1408	// after as that would render Symbol::symtabIndex inaccurate.
1409	for (const StabsEntry &entry : stabs) {
1410	nList->n_strx = entry.strx;
1411	nList->n_type = entry.type;
1412	nList->n_sect = entry.sect;
1413	nList->n_desc = entry.desc;
1414	nList->n_value = entry.value;
1415	++nList;
1416	}
1417
1418	for (const SymtabEntry &entry : concat<const SymtabEntry>(
1419	localSymbols, externalSymbols, undefinedSymbols)) {
1420	nList->n_strx = entry.strx;
1421	// TODO populate n_desc with more flags
1422	if (auto *defined = dyn_cast<Defined>(Val: entry.sym)) {
1423	uint8_t scope = `0`;
1424	if (defined->privateExtern) {
1425	// Private external -- dylib scoped symbol.
1426	// Promote to non-external at link time.
1427	scope = N_PEXT;
1428	} else if (defined->isExternal()) {
1429	// Normal global symbol.
1430	scope = N_EXT;
1431	} else {
1432	// TU-local symbol from localSymbols.
1433	scope = `0`;
1434	}
1435
1436	if (defined->isAbsolute()) {
1437	nList->n_type = scope \| N_ABS;
1438	nList->n_sect = NO_SECT;
1439	nList->n_value = defined->value;
1440	} else {
1441	nList->n_type = scope \| N_SECT;
1442	nList->n_sect = defined->isec()->parent->index;
1443	// For the N_SECT symbol type, n_value is the address of the symbol
1444	nList->n_value = defined->getVA();
1445	}
1446	nList->n_desc \|= defined->isExternalWeakDef() ? N_WEAK_DEF : `0`;
1447	nList->n_desc \|=
1448	defined->referencedDynamically ? REFERENCED_DYNAMICALLY : `0`;
1449	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: entry.sym)) {
1450	uint16_t n_desc = nList->n_desc;
1451	int16_t ordinal = ordinalForDylibSymbol(dysym: *dysym);
1452	if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
1453	SET_LIBRARY_ORDINAL(n_desc, ordinal: DYNAMIC_LOOKUP_ORDINAL);
1454	else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
1455	SET_LIBRARY_ORDINAL(n_desc, ordinal: EXECUTABLE_ORDINAL);
1456	else {
1457	assert(ordinal > `0`);
1458	SET_LIBRARY_ORDINAL(n_desc, ordinal: static_cast<uint8_t>(ordinal));
1459	}
1460
1461	nList->n_type = N_EXT;
1462	n_desc \|= dysym->isWeakDef() ? N_WEAK_DEF : `0`;
1463	n_desc \|= dysym->isWeakRef() ? N_WEAK_REF : `0`;
1464	nList->n_desc = n_desc;
1465	}
1466	++nList;
1467	}
1468	}
1469
1470	template <class LP>
1471	SymtabSection *
1472	macho::makeSymtabSection(StringTableSection &stringTableSection) {
1473	return make<SymtabSectionImpl<LP>>(stringTableSection);
1474	}
1475
1476	IndirectSymtabSection::IndirectSymtabSection()
1477	: LinkEditSection (segment_names::linkEdit,
1478	section_names::indirectSymbolTable) {}
1479
1480	uint32_t IndirectSymtabSection::getNumSymbols() const {
1481	uint32_t size = in.got->getEntries().size() +
1482	in.tlvPointers->getEntries().size() +
1483	in.stubs->getEntries().size();
1484	if (!config ->emitChainedFixups)
1485	size += in.stubs->getEntries().size();
1486	return size;
1487	}
1488
1489	bool IndirectSymtabSection::isNeeded() const {
1490	return in.got->isNeeded() \|\| in.tlvPointers->isNeeded() \|\|
1491	in.stubs->isNeeded();
1492	}
1493
1494	void IndirectSymtabSection::finalizeContents() {
1495	uint32_t off = `0`;
1496	in.got->reserved1 = off;
1497	off += in.got->getEntries().size();
1498	in.tlvPointers->reserved1 = off;
1499	off += in.tlvPointers->getEntries().size();
1500	in.stubs->reserved1 = off;
1501	if (in.lazyPointers) {
1502	off += in.stubs->getEntries().size();
1503	in.lazyPointers->reserved1 = off;
1504	}
1505	}
1506
1507	static uint32_t indirectValue(const Symbol *sym) {
1508	if (sym->symtabIndex == UINT32_MAX \|\| !needsBinding(sym))
1509	return INDIRECT_SYMBOL_LOCAL;
1510	return sym->symtabIndex;
1511	}
1512
1513	void IndirectSymtabSection::writeTo(uint8_t buf) const* {
1514	uint32_t off = `0`;
1515	for (const Symbol *sym : in.got->getEntries()) {
1516	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1517	++off;
1518	}
1519	for (const Symbol *sym : in.tlvPointers->getEntries()) {
1520	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1521	++off;
1522	}
1523	for (const Symbol *sym : in.stubs->getEntries()) {
1524	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1525	++off;
1526	}
1527
1528	if (in.lazyPointers) {
1529	// There is a 1:1 correspondence between stubs and LazyPointerSection
1530	// entries. But giving __stubs and __la_symbol_ptr the same reserved1
1531	// (the offset into the indirect symbol table) so that they both refer
1532	// to the same range of offsets confuses `strip`, so write the stubs
1533	// symbol table offsets a second time.
1534	for (const Symbol *sym : in.stubs->getEntries()) {
1535	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1536	++off;
1537	}
1538	}
1539	}
1540
1541	StringTableSection::StringTableSection()
1542	: LinkEditSection (segment_names::linkEdit, section_names::stringTable) {}
1543
1544	uint32_t StringTableSection::addString(StringRef str) {
1545	uint32_t strx = size;
1546	if (config ->dedupSymbolStrings) {
1547	llvm::CachedHashStringRef hashedStr(str);
1548	auto [it, inserted] = stringMap.try_emplace(Key: hashedStr, Args&: strx);
1549	if (!inserted)
1550	return it ->second;
1551	}
1552
1553	strings.push_back(x: str);
1554	size += str.size() + `1`; // account for null terminator
1555	return strx;
1556	}
1557
1558	void StringTableSection::writeTo(uint8_t buf) const* {
1559	uint32_t off = `0`;
1560	for (StringRef str : strings) {
1561	memcpy(dest: buf + off, src: str.data(), n: str.size());
1562	off += str.size() + `1`; // account for null terminator
1563	}
1564	}
1565
1566	static_assert((CodeSignatureSection::blobHeadersSize % `8`) == `0`);
1567	static_assert((CodeSignatureSection::fixedHeadersSize % `8`) == `0`);
1568
1569	CodeSignatureSection::CodeSignatureSection()
1570	: LinkEditSection (segment_names::linkEdit, section_names::codeSignature) {
1571	align = `16`; // required by libstuff
1572
1573	// XXX: This mimics LD64, where it uses the install-name as codesign
1574	// identifier, if available.
1575	if (!config ->installName.empty())
1576	fileName = config ->installName;
1577	else
1578	// FIXME: Consider using finalOutput instead of outputFile.
1579	fileName = config ->outputFile;
1580
1581	size_t slashIndex = fileName.rfind(Str: "/");
1582	if (slashIndex != std::string::npos)
1583	fileName = fileName.drop_front(N: slashIndex + `1`);
1584
1585	// NOTE: Any changes to these calculations should be repeated
1586	// in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1587	allHeadersSize = alignTo<`16`>(Value: fixedHeadersSize + fileName.size() + `1`);
1588	fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
1589	}
1590
1591	uint32_t CodeSignatureSection::getBlockCount() const {
1592	return (fileOff + blockSize - `1`) / blockSize;
1593	}
1594
1595	uint64_t CodeSignatureSection::getRawSize() const {
1596	return allHeadersSize + getBlockCount() * hashSize;
1597	}
1598
1599	void CodeSignatureSection::writeHashes(uint8_t buf) const* {
1600	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1601	// MachOWriter::writeSignatureData.
1602	uint8_t *hashes = buf + fileOff + allHeadersSize;
1603	parallelFor(Begin: `0`, End: getBlockCount(), Fn: [&](size_t i) {
1604	sha256(data: buf + i * blockSize,
1605	len: std::min(a: static_cast<size_t>(fileOff - i * blockSize), b: blockSize),
1606	output: hashes + i * hashSize);
1607	});
1608	#if defined(__APPLE__)
1609	// This is macOS-specific work-around and makes no sense for any
1610	// other host OS. See https://openradar.appspot.com/FB8914231
1611	//
1612	// The macOS kernel maintains a signature-verification cache to
1613	// quickly validate applications at time of execve(2). The trouble
1614	// is that for the kernel creates the cache entry at the time of the
1615	// mmap(2) call, before we have a chance to write either the code to
1616	// sign or the signature header+hashes. The fix is to invalidate
1617	// all cached data associated with the output file, thus discarding
1618	// the bogus prematurely-cached signature.
1619	msync(buf, fileOff + getSize(), MS_INVALIDATE);
1620	#endif
1621	}
1622
1623	void CodeSignatureSection::writeTo(uint8_t buf) const* {
1624	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1625	// MachOWriter::writeSignatureData.
1626	uint32_t signatureSize = static_cast<uint32_t>(getSize());
1627	auto superBlob = reinterpret_cast<CS_SuperBlob >(buf);
1628	write32be(P: &superBlob->magic, V: CSMAGIC_EMBEDDED_SIGNATURE);
1629	write32be(P: &superBlob->length, V: signatureSize);
1630	write32be(P: &superBlob->count, V: `1`);
1631	auto blobIndex = reinterpret_cast<CS_BlobIndex >(&superBlob[`1`]);
1632	write32be(P: &blobIndex->type, V: CSSLOT_CODEDIRECTORY);
1633	write32be(P: &blobIndex->offset, V: blobHeadersSize);
1634	auto *codeDirectory =
1635	reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize);
1636	write32be(P: &codeDirectory->magic, V: CSMAGIC_CODEDIRECTORY);
1637	write32be(P: &codeDirectory->length, V: signatureSize - blobHeadersSize);
1638	write32be(P: &codeDirectory->version, V: CS_SUPPORTSEXECSEG);
1639	write32be(P: &codeDirectory->flags, V: CS_ADHOC \| CS_LINKER_SIGNED);
1640	write32be(P: &codeDirectory->hashOffset,
1641	V: sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad);
1642	write32be(P: &codeDirectory->identOffset, V: sizeof(CS_CodeDirectory));
1643	codeDirectory->nSpecialSlots = `0`;
1644	write32be(P: &codeDirectory->nCodeSlots, V: getBlockCount());
1645	write32be(P: &codeDirectory->codeLimit, V: fileOff);
1646	codeDirectory->hashSize = static_cast<uint8_t>(hashSize);
1647	codeDirectory->hashType = kSecCodeSignatureHashSHA256;
1648	codeDirectory->platform = `0`;
1649	codeDirectory->pageSize = blockSizeShift;
1650	codeDirectory->spare2 = `0`;
1651	codeDirectory->scatterOffset = `0`;
1652	codeDirectory->teamOffset = `0`;
1653	codeDirectory->spare3 = `0`;
1654	codeDirectory->codeLimit64 = `0`;
1655	OutputSegment *textSeg = getOrCreateOutputSegment(name: segment_names::text);
1656	write64be(P: &codeDirectory->execSegBase, V: textSeg->fileOff);
1657	write64be(P: &codeDirectory->execSegLimit, V: textSeg->fileSize);
1658	write64be(P: &codeDirectory->execSegFlags,
1659	V: config ->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : `0`);
1660	auto id = reinterpret_cast<char* *>(&codeDirectory[`1`]);
1661	memcpy(dest: id, src: fileName.begin(), n: fileName.size());
1662	memset(s: id + fileName.size(), c: `0`, n: fileNamePad);
1663	}
1664
1665	CStringSection::CStringSection(const char *name)
1666	: SyntheticSection (segment_names::text, name) {
1667	flags = S_CSTRING_LITERALS;
1668	}
1669
1670	void CStringSection::addInput(CStringInputSection *isec) {
1671	isec->parent = this;
1672	inputs.push_back(x: isec);
1673	if (isec->align > align)
1674	align = isec->align;
1675	}
1676
1677	void CStringSection::writeTo(uint8_t buf) const* {
1678	for (const CStringInputSection *isec : inputs) {
1679	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1680	if (!piece.live)
1681	continue;
1682	StringRef string = isec->getStringRef(i);
1683	memcpy(dest: buf + piece.outSecOff, src: string.data(), n: string.size());
1684	}
1685	}
1686	}
1687
1688	void CStringSection::finalizeContents() {
1689	uint64_t offset = `0`;
1690	for (CStringInputSection *isec : inputs) {
1691	for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) {
1692	if (!piece.live)
1693	continue;
1694	// See comment above DeduplicatedCStringSection for how alignment is
1695	// handled.
1696	uint32_t pieceAlign = `1`
1697	<< llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1698	offset = alignToPowerOf2(Value: offset, Align: pieceAlign);
1699	piece.outSecOff = offset;
1700	isec->isFinal = true;
1701	StringRef string = isec->getStringRef(i);
1702	offset += string.size() + `1`; // account for null terminator
1703	}
1704	}
1705	size = offset;
1706	}
1707
1708	// Mergeable cstring literals are found under the __TEXT,__cstring section. In
1709	// contrast to ELF, which puts strings that need different alignments into
1710	// different sections, clang's Mach-O backend puts them all in one section.
1711	// Strings that need to be aligned have the .p2align directive emitted before
1712	// them, which simply translates into zero padding in the object file. In other
1713	// words, we have to infer the desired alignment of these cstrings from their
1714	// addresses.
1715	//
1716	// We differ slightly from ld64 in how we've chosen to align these cstrings.
1717	// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1718	// address in the input object files. When deduplicating identical cstrings,
1719	// both linkers pick the cstring whose address has more trailing zeros, and
1720	// preserve the alignment of that address in the final binary. However, ld64
1721	// goes a step further and also preserves the offset of the cstring from the
1722	// last section-aligned address. I.e. if a cstring is at offset 18 in the
1723	// input, with a section alignment of 16, then both LLD and ld64 will ensure the
1724	// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1725	// ensure that the final address is of the form 16 k + 2 for some k.*
1726	//
1727	// Note that ld64's heuristic means that a dedup'ed cstring's final address is
1728	// dependent on the order of the input object files. E.g. if in addition to the
1729	// cstring at offset 18 above, we have a duplicate one in another file with a
1730	// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1731	// the cstring from the object file earlier on the command line (since both have
1732	// the same number of trailing zeros in their address). So the final cstring may
1733	// either be at some address `16 k + 2` or at some address `2 * k`.*
1734	//
1735	// I've opted not to follow this behavior primarily for implementation
1736	// simplicity, and secondarily to save a few more bytes. It's not clear to me
1737	// that preserving the section alignment + offset is ever necessary, and there
1738	// are many cases that are clearly redundant. In particular, if an x86_64 object
1739	// file contains some strings that are accessed via SIMD instructions, then the
1740	// .cstring section in the object file will be 16-byte-aligned (since SIMD
1741	// requires its operand addresses to be 16-byte aligned). However, there will
1742	// typically also be other cstrings in the same file that aren't used via SIMD
1743	// and don't need this alignment. They will be emitted at some arbitrary address
1744	// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1745	// % A`.
1746	void DeduplicatedCStringSection::finalizeContents() {
1747	// Find the largest alignment required for each string.
1748	for (const CStringInputSection *isec : inputs) {
1749	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1750	if (!piece.live)
1751	continue;
1752	auto s = isec->getCachedHashStringRef(i);
1753	assert(isec->align != `0`);
1754	uint8_t trailingZeros = llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1755	auto it = stringOffsetMap.insert(
1756	KV: std::make_pair(x&: s, y: StringOffset (trailingZeros)));
1757	if (!it.second && it.first ->second.trailingZeros < trailingZeros)
1758	it.first ->second.trailingZeros = trailingZeros;
1759	}
1760	}
1761
1762	// Sort the strings for performance and compression size win, and then
1763	// assign an offset for each string and save it to the corresponding
1764	// StringPieces for easy access.
1765	for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
1766	auto &piece = isec->pieces [i];
1767	auto s = isec->getCachedHashStringRef(i);
1768	auto it = stringOffsetMap.find(Val: s);
1769	assert(it != stringOffsetMap.end());
1770	lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo =
1771	it ->second;
1772	if (offsetInfo.outSecOff == UINT64_MAX) {
1773	offsetInfo.outSecOff =
1774	alignToPowerOf2(Value: size, Align: `1ULL` << offsetInfo.trailingZeros);
1775	size = offsetInfo.outSecOff + s.size() + `1`; // account for null terminator
1776	}
1777	piece.outSecOff = offsetInfo.outSecOff;
1778	}
1779	for (CStringInputSection *isec : inputs)
1780	isec->isFinal = true;
1781	}
1782
1783	void DeduplicatedCStringSection::writeTo(uint8_t buf) const* {
1784	for (const auto &p : stringOffsetMap) {
1785	StringRef data = p.first.val();
1786	uint64_t off = p.second.outSecOff;
1787	if (!data.empty())
1788	memcpy(dest: buf + off, src: data.data(), n: data.size());
1789	}
1790	}
1791
1792	DeduplicatedCStringSection::StringOffset
1793	DeduplicatedCStringSection::getStringOffset(StringRef str) const {
1794	// StringPiece uses 31 bits to store the hashes, so we replicate that
1795	uint32_t hash = xxh3_64bits(data: str) & `0x7fffffff`;
1796	auto offset = stringOffsetMap.find(Val: CachedHashStringRef (str, hash));
1797	assert(offset != stringOffsetMap.end() &&
1798	"Looked-up strings should always exist in section");
1799	return offset ->second;
1800	}
1801
1802	// This section is actually emitted as __TEXT,__const by ld64, but clang may
1803	// emit input sections of that name, and LLD doesn't currently support mixing
1804	// synthetic and concat-type OutputSections. To work around this, I've given
1805	// our merged-literals section a different name.
1806	WordLiteralSection::WordLiteralSection()
1807	: SyntheticSection (segment_names::text, section_names::literals) {
1808	align = `16`;
1809	}
1810
1811	void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
1812	isec->parent = this;
1813	inputs.push_back(x: isec);
1814	}
1815
1816	void WordLiteralSection::finalizeContents() {
1817	for (WordLiteralInputSection *isec : inputs) {
1818	// We do all processing of the InputSection here, so it will be effectively
1819	// finalized.
1820	isec->isFinal = true;
1821	const uint8_t *buf = isec->data.data();
1822	switch (sectionType(flags: isec->getFlags())) {
1823	case S_4BYTE_LITERALS: {
1824	for (size_t off = `0`, e = isec->data.size(); off < e; off += `4`) {
1825	if (!isec->isLive(off))
1826	continue;
1827	uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off);
1828	literal4Map.emplace(args&: value, args: literal4Map.size());
1829	}
1830	break;
1831	}
1832	case S_8BYTE_LITERALS: {
1833	for (size_t off = `0`, e = isec->data.size(); off < e; off += `8`) {
1834	if (!isec->isLive(off))
1835	continue;
1836	uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off);
1837	literal8Map.emplace(args&: value, args: literal8Map.size());
1838	}
1839	break;
1840	}
1841	case S_16BYTE_LITERALS: {
1842	for (size_t off = `0`, e = isec->data.size(); off < e; off += `16`) {
1843	if (!isec->isLive(off))
1844	continue;
1845	UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off);
1846	literal16Map.emplace(args&: value, args: literal16Map.size());
1847	}
1848	break;
1849	}
1850	default:
1851	llvm_unreachable("invalid literal section type");
1852	}
1853	}
1854	}
1855
1856	void WordLiteralSection::writeTo(uint8_t buf) const* {
1857	// Note that we don't attempt to do any endianness conversion in addInput(),
1858	// so we don't do it here either -- just write out the original value,
1859	// byte-for-byte.
1860	for (const auto &p : literal16Map)
1861	memcpy(dest: buf + p.second * `16`, src: &p.first, n: `16`);
1862	buf += literal16Map.size() * `16`;
1863
1864	for (const auto &p : literal8Map)
1865	memcpy(dest: buf + p.second * `8`, src: &p.first, n: `8`);
1866	buf += literal8Map.size() * `8`;
1867
1868	for (const auto &p : literal4Map)
1869	memcpy(dest: buf + p.second * `4`, src: &p.first, n: `4`);
1870	}
1871
1872	ObjCImageInfoSection::ObjCImageInfoSection()
1873	: SyntheticSection (segment_names::data, section_names::objCImageInfo) {}
1874
1875	ObjCImageInfoSection::ImageInfo
1876	ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
1877	ImageInfo info;
1878	ArrayRef<uint8_t> data = file->objCImageInfo;
1879	// The image info struct has the following layout:
1880	// struct {
1881	// uint32_t version;
1882	// uint32_t flags;
1883	// };
1884	if (data.size() < `8`) {
1885	warn(msg: toString(file) + ": invalid __objc_imageinfo size");
1886	return info;
1887	}
1888
1889	auto buf = reinterpret_cast<const* uint32_t *>(data.data());
1890	if (read32le(P: buf) != `0`) {
1891	warn(msg: toString(file) + ": invalid __objc_imageinfo version");
1892	return info;
1893	}
1894
1895	uint32_t flags = read32le(P: buf + `1`);
1896	info.swiftVersion = (flags >> `8`) & `0xff`;
1897	info.hasCategoryClassProperties = flags & `0x40`;
1898	return info;
1899	}
1900
1901	static std::string swiftVersionString(uint8_t version) {
1902	switch (version) {
1903	case `1`:
1904	return "1.0";
1905	case `2`:
1906	return "1.1";
1907	case `3`:
1908	return "2.0";
1909	case `4`:
1910	return "3.0";
1911	case `5`:
1912	return "4.0";
1913	default:
1914	return ("0x" + Twine::utohexstr(Val: version)).str();
1915	}
1916	}
1917
1918	// Validate each object file's __objc_imageinfo and use them to generate the
1919	// image info for the output binary. Only two pieces of info are relevant:
1920	// 1. The Swift version (should be identical across inputs)
1921	// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1922	void ObjCImageInfoSection::finalizeContents() {
1923	assert(files.size() != `0`); // should have already been checked via isNeeded()
1924
1925	info.hasCategoryClassProperties = true;
1926	const InputFile *firstFile;
1927	for (const InputFile *file : files) {
1928	ImageInfo inputInfo = parseImageInfo(file);
1929	info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
1930
1931	// swiftVersion 0 means no Swift is present, so no version checking required
1932	if (inputInfo.swiftVersion == `0`)
1933	continue;
1934
1935	if (info.swiftVersion != `0` && info.swiftVersion != inputInfo.swiftVersion) {
1936	error(msg: "Swift version mismatch: " + toString(file: firstFile) + " has version " +
1937	swiftVersionString(version: info.swiftVersion) + " but " + toString(file) +
1938	" has version " + swiftVersionString(version: inputInfo.swiftVersion));
1939	} else {
1940	info.swiftVersion = inputInfo.swiftVersion;
1941	firstFile = file;
1942	}
1943	}
1944	}
1945
1946	void ObjCImageInfoSection::writeTo(uint8_t buf) const* {
1947	uint32_t flags = info.hasCategoryClassProperties ? `0x40` : `0x0`;
1948	flags \|= info.swiftVersion << `8`;
1949	write32le(P: buf + `4`, V: flags);
1950	}
1951
1952	InitOffsetsSection::InitOffsetsSection()
1953	: SyntheticSection (segment_names::text, section_names::initOffsets) {
1954	flags = S_INIT_FUNC_OFFSETS;
1955	align = `4`; // This section contains 32-bit integers.
1956	}
1957
1958	uint64_t InitOffsetsSection::getSize() const {
1959	size_t count = `0`;
1960	for (const ConcatInputSection *isec : sections)
1961	count += isec->relocs.size();
1962	return count * sizeof(uint32_t);
1963	}
1964
1965	void InitOffsetsSection::writeTo(uint8_t buf) const* {
1966	// FIXME: Add function specified by -init when that argument is implemented.
1967	for (ConcatInputSection *isec : sections) {
1968	for (const Reloc &rel : isec->relocs) {
1969	const Symbol referent = cast<Symbol >(Val: rel.referent);
1970	assert(referent && "section relocation should have been rejected");
1971	uint64_t offset = referent->getVA() - in.header->addr;
1972	// FIXME: Can we handle this gracefully?
1973	if (offset > UINT32_MAX)
1974	fatal(msg: isec->getLocation(off: rel.offset) + ": offset to initializer " +
1975	referent->getName() + " (" + utohexstr(X: offset) +
1976	") does not fit in 32 bits");
1977
1978	// Entries need to be added in the order they appear in the section, but
1979	// relocations aren't guaranteed to be sorted.
1980	size_t index = rel.offset >> target->p2WordSize;
1981	write32le(P: &buf[index * sizeof(uint32_t)], V: offset);
1982	}
1983	buf += isec->relocs.size() * sizeof(uint32_t);
1984	}
1985	}
1986
1987	// The inputs are __mod_init_func sections, which contain pointers to
1988	// initializer functions, therefore all relocations should be of the UNSIGNED
1989	// type. InitOffsetsSection stores offsets, so if the initializer's address is
1990	// not known at link time, stub-indirection has to be used.
1991	void InitOffsetsSection::setUp() {
1992	for (const ConcatInputSection *isec : sections) {
1993	for (const Reloc &rel : isec->relocs) {
1994	RelocAttrs attrs = target->getRelocAttrs(type: rel.type);
1995	if (!attrs.hasAttr(b: RelocAttrBits::UNSIGNED))
1996	error(msg: isec->getLocation(off: rel.offset) +
1997	": unsupported relocation type: " + attrs.name);
1998	if (rel.addend != `0`)
1999	error(msg: isec->getLocation(off: rel.offset) +
2000	": relocation addend is not representable in __init_offsets");
2001	if (isa<InputSection *>(Val: rel.referent))
2002	error(msg: isec->getLocation(off: rel.offset) +
2003	": unexpected section relocation");
2004
2005	Symbol sym = rel.referent.dyn_cast<Symbol >();
2006	if (auto *undefined = dyn_cast<Undefined>(Val: sym))
2007	treatUndefinedSymbol(*undefined, isec, offset: rel.offset);
2008	if (needsBinding(sym))
2009	in.stubs->addEntry(sym);
2010	}
2011	}
2012	}
2013
2014	ObjCMethListSection::ObjCMethListSection()
2015	: SyntheticSection (segment_names::text, section_names::objcMethList) {
2016	flags = S_ATTR_NO_DEAD_STRIP;
2017	align = relativeOffsetSize;
2018	}
2019
2020	// Go through all input method lists and ensure that we have selrefs for all
2021	// their method names. The selrefs will be needed later by ::writeTo. We need to
2022	// create them early on here to ensure they are processed correctly by the lld
2023	// pipeline.
2024	void ObjCMethListSection::setUp() {
2025	for (const ConcatInputSection *isec : inputs) {
2026	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2027	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2028	uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
2029	// Method name is immediately after header
2030	uint32_t methodNameOff = methodListHeaderSize;
2031
2032	// Loop through all methods, and ensure a selref for each of them exists.
2033	while (methodNameOff < isec->data.size()) {
2034	const Reloc *reloc = isec->getRelocAt(off: methodNameOff);
2035	assert(reloc && "Relocation expected at method list name slot");
2036
2037	StringRef methname = reloc->getReferentString();
2038	if (!ObjCSelRefsHelper::getSelRef(methname))
2039	ObjCSelRefsHelper::makeSelRef(methname);
2040
2041	// Jump to method name offset in next struct
2042	methodNameOff += originalStructSize;
2043	}
2044	}
2045	}
2046
2047	// Calculate section size and final offsets for where InputSection's need to be
2048	// written.
2049	void ObjCMethListSection::finalize() {
2050	// sectionSize will be the total size of the __objc_methlist section
2051	sectionSize = `0`;
2052	for (ConcatInputSection *isec : inputs) {
2053	// We can also use sectionSize as write offset for isec
2054	assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
2055	"expected __objc_methlist to be aligned by default with the "
2056	"required section alignment");
2057	isec->outSecOff = sectionSize;
2058
2059	isec->isFinal = true;
2060	uint32_t relativeListSize =
2061	computeRelativeMethodListSize(absoluteMethodListSize: isec->data.size());
2062	sectionSize += relativeListSize;
2063
2064	// If encoding the method list in relative offset format shrinks the size,
2065	// then we also need to adjust symbol sizes to match the new size. Note that
2066	// on 32bit platforms the size of the method list will remain the same when
2067	// encoded in relative offset format.
2068	if (relativeListSize != isec->data.size()) {
2069	for (Symbol *sym : isec->symbols) {
2070	assert(isa<Defined>(sym) &&
2071	"Unexpected undefined symbol in ObjC method list");
2072	auto *def = cast<Defined>(Val: sym);
2073	// There can be 0-size symbols, check if this is the case and ignore
2074	// them.
2075	if (def->size) {
2076	assert(
2077	def->size == isec->data.size() &&
2078	"Invalid ObjC method list symbol size: expected symbol size to "
2079	"match isec size");
2080	def->size = relativeListSize;
2081	}
2082	}
2083	}
2084	}
2085	}
2086
2087	void ObjCMethListSection::writeTo(uint8_t bufStart) const* {
2088	uint8_t *buf = bufStart;
2089	for (const ConcatInputSection *isec : inputs) {
2090	assert(buf - bufStart == std::ptrdiff_t(isec->outSecOff) &&
2091	"Writing at unexpected offset");
2092	uint32_t writtenSize = writeRelativeMethodList(isec, buf);
2093	buf += writtenSize;
2094	}
2095	assert(buf - bufStart == std::ptrdiff_t(sectionSize) &&
2096	"Written size does not match expected section size");
2097	}
2098
2099	// Check if an InputSection is a method list. To do this we scan the
2100	// InputSection for any symbols who's names match the patterns we expect clang
2101	// to generate for method lists.
2102	bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
2103	const char *symPrefixes[] = {objc::symbol_names::classMethods,
2104	objc::symbol_names::instanceMethods,
2105	objc::symbol_names::categoryInstanceMethods,
2106	objc::symbol_names::categoryClassMethods};
2107	if (!isec)
2108	return false;
2109	for (const Symbol *sym : isec->symbols) {
2110	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2111	if (!def)
2112	continue;
2113	for (const char *prefix : symPrefixes) {
2114	if (def->getName().starts_with(Prefix: prefix)) {
2115	assert(def->size == isec->data.size() &&
2116	"Invalid ObjC method list symbol size: expected symbol size to "
2117	"match isec size");
2118	assert(def->value == `0` &&
2119	"Offset of ObjC method list symbol must be 0");
2120	return true;
2121	}
2122	}
2123	}
2124
2125	return false;
2126	}
2127
2128	// Encode a single relative offset value. The input is the data/symbol at
2129	// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2130	// 'createSelRef' indicates that we should not directly use the specified
2131	// symbol, but instead get the selRef for the symbol and use that instead.
2132	void ObjCMethListSection::writeRelativeOffsetForIsec(
2133	const ConcatInputSection isec, uint8_t buf, uint32_t &inSecOff,
2134	uint32_t &outSecOff, bool useSelRef) const {
2135	const Reloc *reloc = isec->getRelocAt(off: inSecOff);
2136	assert(reloc && "Relocation expected at __objc_methlist Offset");
2137
2138	uint32_t symVA = `0`;
2139	if (useSelRef) {
2140	StringRef methname = reloc->getReferentString();
2141	ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
2142	assert(selRef && "Expected all selector names to already be already be "
2143	"present in __objc_selrefs");
2144	symVA = selRef->getVA();
2145	assert(selRef->data.size() == target->wordSize &&
2146	"Expected one selref per ConcatInputSection");
2147	} else if (auto sym = dyn_cast<Symbol >(Val: reloc->referent)) {
2148	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2149	assert(def && "Expected all syms in __objc_methlist to be defined");
2150	symVA = def->getVA();
2151	} else {
2152	auto isec = cast<InputSection >(Val: reloc->referent);
2153	symVA = isec->getVA(off: reloc->addend);
2154	}
2155
2156	uint32_t currentVA = isec->getVA() + outSecOff;
2157	uint32_t delta = symVA - currentVA;
2158	write32le(P: buf + outSecOff, V: delta);
2159
2160	// Move one pointer forward in the absolute method list
2161	inSecOff += target->wordSize;
2162	// Move one relative offset forward in the relative method list (32 bits)
2163	outSecOff += relativeOffsetSize;
2164	}
2165
2166	// Write a relative method list to buf, return the size of the written
2167	// information
2168	uint32_t
2169	ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
2170	uint8_t buf) const* {
2171	// Copy over the header, and add the "this is a relative method list" magic
2172	// value flag
2173	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2174	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2175	// Set the struct size for the relative method list
2176	uint32_t relativeStructSizeAndFlags =
2177	(relativeOffsetSize * pointersPerStruct) & structSizeMask;
2178	// Carry over the old flags from the input struct
2179	relativeStructSizeAndFlags \|= structSizeAndFlags & structFlagsMask;
2180	// Set the relative method list flag
2181	relativeStructSizeAndFlags \|= relMethodHeaderFlag;
2182
2183	writeMethodListHeader(buf, structSizeAndFlags: relativeStructSizeAndFlags, structCount);
2184
2185	assert(methodListHeaderSize +
2186	(structCount * pointersPerStruct * target->wordSize) ==
2187	isec->data.size() &&
2188	"Invalid computed ObjC method list size");
2189
2190	uint32_t inSecOff = methodListHeaderSize;
2191	uint32_t outSecOff = methodListHeaderSize;
2192
2193	// Go through the method list and encode input absolute pointers as relative
2194	// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2195	// outSecOff
2196	for (uint32_t i = `0`; i < structCount; i++) {
2197	// Write the name of the method
2198	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: true);
2199	// Write the type of the method
2200	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2201	// Write reference to the selector of the method
2202	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2203	}
2204
2205	// Expecting to have read all the data in the isec
2206	assert(inSecOff == isec->data.size() &&
2207	"Invalid actual ObjC method list size");
2208	assert(
2209	outSecOff == computeRelativeMethodListSize(inSecOff) &&
2210	"Mismatch between input & output size when writing relative method list");
2211	return outSecOff;
2212	}
2213
2214	// Given the size of an ObjC method list InputSection, return the size of the
2215	// method list when encoded in relative offsets format. We can do this without
2216	// decoding the actual data, as it can be directly inferred from the size of the
2217	// isec.
2218	uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2219	uint32_t absoluteMethodListSize) const {
2220	uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
2221	uint32_t pointerCount = oldPointersSize / target->wordSize;
2222	assert(((pointerCount % pointersPerStruct) == `0`) &&
2223	"__objc_methlist expects method lists to have multiple-of-3 pointers");
2224
2225	uint32_t newPointersSize = pointerCount * relativeOffsetSize;
2226	uint32_t newTotalSize = methodListHeaderSize + newPointersSize;
2227
2228	assert((newTotalSize <= absoluteMethodListSize) &&
2229	"Expected relative method list size to be smaller or equal than "
2230	"original size");
2231	return newTotalSize;
2232	}
2233
2234	// Read a method list header from buf
2235	void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
2236	uint32_t &structSizeAndFlags,
2237	uint32_t &structCount) const {
2238	structSizeAndFlags = read32le(P: buf);
2239	structCount = read32le(P: buf + sizeof(uint32_t));
2240	}
2241
2242	// Write a method list header to buf
2243	void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
2244	uint32_t structSizeAndFlags,
2245	uint32_t structCount) const {
2246	write32le(P: buf, V: structSizeAndFlags);
2247	write32le(P: buf + sizeof(structSizeAndFlags), V: structCount);
2248	}
2249
2250	void macho::createSyntheticSymbols() {
2251	auto addHeaderSymbol = [](const char *name) {
2252	symtab ->addSynthetic(name, in.header->isec, /value=/`0`,
2253	/isPrivateExtern=/true, /includeInSymtab=/false,
2254	/referencedDynamically=/false);
2255	};
2256
2257	switch (config ->outputType) {
2258	// FIXME: Assign the right address value for these symbols
2259	// (rather than 0). But we need to do that after assignAddresses().
2260	case MH_EXECUTE:
2261	// If linking PIE, __mh_execute_header is a defined symbol in
2262	// __TEXT, __text)
2263	// Otherwise, it's an absolute symbol.
2264	if (config ->isPic)
2265	symtab ->addSynthetic(name: "__mh_execute_header", in.header->isec, /value=/`0`,
2266	/isPrivateExtern=/false, /includeInSymtab=/true,
2267	/referencedDynamically=/true);
2268	else
2269	symtab ->addSynthetic(name: "__mh_execute_header", /isec=/nullptr, /value=/`0`,
2270	/isPrivateExtern=/false, /includeInSymtab=/true,
2271	/referencedDynamically=/true);
2272	break;
2273
2274	// The following symbols are N_SECT symbols, even though the header is not
2275	// part of any section and that they are private to the bundle/dylib/object
2276	// they are part of.
2277	case MH_BUNDLE:
2278	addHeaderSymbol ("__mh_bundle_header");
2279	break;
2280	case MH_DYLIB:
2281	addHeaderSymbol ("__mh_dylib_header");
2282	break;
2283	case MH_DYLINKER:
2284	addHeaderSymbol ("__mh_dylinker_header");
2285	break;
2286	case MH_OBJECT:
2287	addHeaderSymbol ("__mh_object_header");
2288	break;
2289	default:
2290	llvm_unreachable("unexpected outputType");
2291	break;
2292	}
2293
2294	// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2295	// which does e.g. cleanup of static global variables. The ABI document
2296	// says that the pointer can point to any address in one of the dylib's
2297	// segments, but in practice ld64 seems to set it to point to the header,
2298	// so that's what's implemented here.
2299	addHeaderSymbol ("___dso_handle");
2300	}
2301
2302	ChainedFixupsSection::ChainedFixupsSection()
2303	: LinkEditSection (segment_names::linkEdit, section_names::chainFixups) {}
2304
2305	bool ChainedFixupsSection::isNeeded() const {
2306	assert(config->emitChainedFixups);
2307	// dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2308	// dyld_chained_fixups_header, so we create this section even if there aren't
2309	// any fixups.
2310	return true;
2311	}
2312
2313	void ChainedFixupsSection::addBinding(const Symbol *sym,
2314	const InputSection *isec, uint64_t offset,
2315	int64_t addend) {
2316	locations.emplace_back(args&: isec, args&: offset);
2317	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2318	auto [it, inserted] = bindings.insert(
2319	KV: {{sym, outlineAddend}, static_cast<uint32_t>(bindings.size())});
2320
2321	if (inserted) {
2322	symtabSize += sym->getName().size() + `1`;
2323	hasWeakBind = hasWeakBind \|\| needsWeakBind(sym: *sym);
2324	if (!isInt<`23`>(x: outlineAddend))
2325	needsLargeAddend = true;
2326	else if (outlineAddend != `0`)
2327	needsAddend = true;
2328	}
2329	}
2330
2331	std::pair<uint32_t, uint8_t>
2332	ChainedFixupsSection::getBinding(const Symbol sym, int64_t addend) const* {
2333	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2334	auto it = bindings.find(Key: {sym, outlineAddend});
2335	assert(it != bindings.end() && "binding not found in the imports table");
2336	if (outlineAddend == `0`)
2337	return {it->second, addend};
2338	return {it->second, `0`};
2339	}
2340
2341	static size_t writeImport(uint8_t buf, int* format, int16_t libOrdinal,
2342	bool weakRef, uint32_t nameOffset, int64_t addend) {
2343	switch (format) {
2344	case DYLD_CHAINED_IMPORT: {
2345	auto import = reinterpret_cast<dyld_chained_import >(buf);
2346	import->lib_ordinal = libOrdinal;
2347	import->weak_import = weakRef;
2348	import->name_offset = nameOffset;
2349	return sizeof(dyld_chained_import);
2350	}
2351	case DYLD_CHAINED_IMPORT_ADDEND: {
2352	auto import = reinterpret_cast<dyld_chained_import_addend >(buf);
2353	import->lib_ordinal = libOrdinal;
2354	import->weak_import = weakRef;
2355	import->name_offset = nameOffset;
2356	import->addend = addend;
2357	return sizeof(dyld_chained_import_addend);
2358	}
2359	case DYLD_CHAINED_IMPORT_ADDEND64: {
2360	auto import = reinterpret_cast<dyld_chained_import_addend64 >(buf);
2361	import->lib_ordinal = libOrdinal;
2362	import->weak_import = weakRef;
2363	import->name_offset = nameOffset;
2364	import->addend = addend;
2365	return sizeof(dyld_chained_import_addend64);
2366	}
2367	default:
2368	llvm_unreachable("Unknown import format");
2369	}
2370	}
2371
2372	size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2373	assert(pageStarts.size() > `0` && "SegmentInfo for segment with no fixups?");
2374	return alignTo<`8`>(Value: sizeof(dyld_chained_starts_in_segment) +
2375	pageStarts.back().first * sizeof(uint16_t));
2376	}
2377
2378	size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t buf) const* {
2379	auto segInfo = reinterpret_cast<dyld_chained_starts_in_segment >(buf);
2380	segInfo->size = getSize();
2381	segInfo->page_size = target->getPageSize();
2382	// FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2383	segInfo->pointer_format = DYLD_CHAINED_PTR_64;
2384	segInfo->segment_offset = oseg->addr - in.header->addr;
2385	segInfo->max_valid_pointer = `0`; // not used on 64-bit
2386	segInfo->page_count = pageStarts.back().first + `1`;
2387
2388	uint16_t *starts = segInfo->page_start;
2389	for (size_t i = `0`; i < segInfo->page_count; ++i)
2390	starts[i] = DYLD_CHAINED_PTR_START_NONE;
2391
2392	for (auto [pageIdx, startAddr] : pageStarts)
2393	starts[pageIdx] = startAddr;
2394	return segInfo->size;
2395	}
2396
2397	static size_t importEntrySize(int format) {
2398	switch (format) {
2399	case DYLD_CHAINED_IMPORT:
2400	return sizeof(dyld_chained_import);
2401	case DYLD_CHAINED_IMPORT_ADDEND:
2402	return sizeof(dyld_chained_import_addend);
2403	case DYLD_CHAINED_IMPORT_ADDEND64:
2404	return sizeof(dyld_chained_import_addend64);
2405	default:
2406	llvm_unreachable("Unknown import format");
2407	}
2408	}
2409
2410	// This is step 3 of the algorithm described in the class comment of
2411	// ChainedFixupsSection.
2412	//
2413	// LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2414	// A dyld_chained_fixups_header*
2415	// A dyld_chained_starts_in_image*
2416	// One dyld_chained_starts_in_segment per segment*
2417	// List of all imports (dyld_chained_import, dyld_chained_import_addend, or*
2418	// dyld_chained_import_addend64)
2419	// Names of imported symbols*
2420	void ChainedFixupsSection::writeTo(uint8_t buf) const* {
2421	auto header = reinterpret_cast<dyld_chained_fixups_header >(buf);
2422	header->fixups_version = `0`;
2423	header->imports_count = bindings.size();
2424	header->imports_format = importFormat;
2425	header->symbols_format = `0`;
2426
2427	buf += alignTo<`8`>(Value: sizeof(*header));
2428
2429	auto curOffset = [&buf, &header]() -> uint32_t {
2430	return buf - reinterpret_cast<uint8_t *>(header);
2431	};
2432
2433	header->starts_offset = curOffset ();
2434
2435	auto imageInfo = reinterpret_cast<dyld_chained_starts_in_image >(buf);
2436	imageInfo->seg_count = outputSegments.size();
2437	uint32_t *segStarts = imageInfo->seg_info_offset;
2438
2439	// dyld_chained_starts_in_image ends in a flexible array member containing an
2440	// uint32_t for each segment. Leave room for it, and fill it via segStarts.
2441	buf += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2442	outputSegments.size() * sizeof(uint32_t));
2443
2444	// Initialize all offsets to 0, which indicates that the segment does not have
2445	// fixups. Those that do have them will be filled in below.
2446	for (size_t i = `0`; i < outputSegments.size(); ++i)
2447	segStarts[i] = `0`;
2448
2449	for (const SegmentInfo &seg : fixupSegments) {
2450	segStarts[seg.oseg->index] = curOffset () - header->starts_offset;
2451	buf += seg.writeTo(buf);
2452	}
2453
2454	// Write imports table.
2455	header->imports_offset = curOffset ();
2456	uint64_t nameOffset = `0`;
2457	for (auto [import, idx] : bindings) {
2458	const Symbol &sym = *import.first;
2459	buf += writeImport(buf, format: importFormat, libOrdinal: ordinalForSymbol(sym),
2460	weakRef: sym.isWeakRef(), nameOffset, addend: import.second);
2461	nameOffset += sym.getName().size() + `1`;
2462	}
2463
2464	// Write imported symbol names.
2465	header->symbols_offset = curOffset ();
2466	for (auto [import, idx] : bindings) {
2467	StringRef name = import.first->getName();
2468	memcpy(dest: buf, src: name.data(), n: name.size());
2469	buf += name.size() + `1`; // account for null terminator
2470	}
2471
2472	assert(curOffset() == getRawSize());
2473	}
2474
2475	// This is step 2 of the algorithm described in the class comment of
2476	// ChainedFixupsSection.
2477	void ChainedFixupsSection::finalizeContents() {
2478	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
2479	assert(config->emitChainedFixups);
2480
2481	if (!isUInt<`32`>(x: symtabSize))
2482	error(msg: "cannot encode chained fixups: imported symbols table size " +
2483	Twine(symtabSize) + " exceeds 4 GiB");
2484
2485	bool needsLargeOrdinal = any_of(Range&: bindings, P: [](const auto &p) {
2486	// 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2487	return ordinalForSymbol(*p.first.first) > `0xF0`;
2488	});
2489
2490	if (needsLargeAddend \|\| !isUInt<`23`>(x: symtabSize) \|\| needsLargeOrdinal)
2491	importFormat = DYLD_CHAINED_IMPORT_ADDEND64;
2492	else if (needsAddend)
2493	importFormat = DYLD_CHAINED_IMPORT_ADDEND;
2494	else
2495	importFormat = DYLD_CHAINED_IMPORT;
2496
2497	for (Location &loc : locations)
2498	loc.offset =
2499	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
2500
2501	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
2502	const OutputSegment *segA = a.isec->parent->parent;
2503	const OutputSegment *segB = b.isec->parent->parent;
2504	if (segA == segB)
2505	return a.offset < b.offset;
2506	return segA->addr < segB->addr;
2507	});
2508
2509	auto sameSegment = [](const Location &a, const Location &b) {
2510	return a.isec->parent->parent == b.isec->parent->parent;
2511	};
2512
2513	const uint64_t pageSize = target->getPageSize();
2514	for (size_t i = `0`, count = locations.size(); i < count;) {
2515	const Location &firstLoc = locations [i];
2516	fixupSegments.emplace_back(Args&: firstLoc.isec->parent->parent);
2517	while (i < count && sameSegment (locations [i], firstLoc)) {
2518	uint32_t pageIdx = locations [i].offset / pageSize;
2519	fixupSegments.back().pageStarts.emplace_back(
2520	Args&: pageIdx, Args: locations [i].offset % pageSize);
2521	++i;
2522	while (i < count && sameSegment (locations [i], firstLoc) &&
2523	locations [i].offset / pageSize == pageIdx)
2524	++i;
2525	}
2526	}
2527
2528	// Compute expected encoded size.
2529	size = alignTo<`8`>(Value: sizeof(dyld_chained_fixups_header));
2530	size += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2531	outputSegments.size() * sizeof(uint32_t));
2532	for (const SegmentInfo &seg : fixupSegments)
2533	size += seg.getSize();
2534	size += importEntrySize(format: importFormat) * bindings.size();
2535	size += symtabSize;
2536	}
2537
2538	template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
2539	template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);
2540

Browse the source code of llvm_projects/lld/MachO/SyntheticSections.cpp