SyntheticSections.cpp source code [llvm_projects/lld/MachO/SyntheticSections.cpp]

1	//===- SyntheticSections.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "SyntheticSections.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "ExportTrie.h"
13	#include "InputFiles.h"
14	#include "MachOStructs.h"
15	#include "ObjC.h"
16	#include "OutputSegment.h"
17	#include "SymbolTable.h"
18	#include "Symbols.h"
19
20	#include "lld/Common/CommonLinkerContext.h"
21	#include "llvm/ADT/STLExtras.h"
22	#include "llvm/Config/llvm-config.h"
23	#include "llvm/Support/EndianStream.h"
24	#include "llvm/Support/FileSystem.h"
25	#include "llvm/Support/LEB128.h"
26	#include "llvm/Support/Parallel.h"
27	#include "llvm/Support/Path.h"
28	#include "llvm/Support/xxhash.h"
29
30	#if defined(__APPLE__)
31	#include <sys/mman.h>
32
33	#define COMMON_DIGEST_FOR_OPENSSL
34	#include <CommonCrypto/CommonDigest.h>
35	#else
36	#include "llvm/Support/SHA256.h"
37	#endif
38
39	using namespace llvm;
40	using namespace llvm::MachO;
41	using namespace llvm::support;
42	using namespace llvm::support::endian;
43	using namespace lld;
44	using namespace lld::macho;
45
46	// Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
47	static void sha256(const uint8_t data, size_t len, uint8_t output) {
48	#if defined(__APPLE__)
49	// FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
50	// for some notes on this.
51	CC_SHA256(data, len, output);
52	#else
53	ArrayRef<uint8_t> block(data, len);
54	std::array<uint8_t, `32`> hash = SHA256::hash(Data: block);
55	static_assert(hash.size() == CodeSignatureSection::hashSize);
56	memcpy(dest: output, src: hash.data(), n: hash.size());
57	#endif
58	}
59
60	InStruct macho::in;
61	std::vector<SyntheticSection *> macho::syntheticSections;
62
63	SyntheticSection::SyntheticSection(const char segname, const* char *name)
64	: OutputSection (SyntheticKind, name) {
65	std::tie(args&: this->segname, args&: this->name) = maybeRenameSection(key: {segname, name});
66	isec = makeSyntheticInputSection(segName: segname, sectName: name);
67	isec->parent = this;
68	syntheticSections.push_back(x: this);
69	}
70
71	// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
72	// from the beginning of the file (i.e. the header).
73	MachHeaderSection::MachHeaderSection()
74	: SyntheticSection (segment_names::text, section_names::header) {
75	// XXX: This is a hack. (See D97007)
76	// Setting the index to 1 to pretend that this section is the text
77	// section.
78	index = `1`;
79	isec->isFinal = true;
80	}
81
82	void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
83	loadCommands.push_back(x: lc);
84	sizeOfCmds += lc->getSize();
85	}
86
87	uint64_t MachHeaderSection::getSize() const {
88	uint64_t size = target->headerSize + sizeOfCmds + config ->headerPad;
89	// If we are emitting an encryptable binary, our load commands must have a
90	// separate (non-encrypted) page to themselves.
91	if (config ->emitEncryptionInfo)
92	size = alignToPowerOf2(Value: size, Align: target->getPageSize());
93	return size;
94	}
95
96	static uint32_t cpuSubtype() {
97	uint32_t subtype = target->cpuSubtype;
98
99	if (config ->outputType == MH_EXECUTE && !config ->staticLink &&
100	target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL &&
101	config ->platform() == PLATFORM_MACOS &&
102	config ->platformInfo.target.MinDeployment >= VersionTuple (`10`, `5`))
103	subtype \|= CPU_SUBTYPE_LIB64;
104
105	return subtype;
106	}
107
108	static bool hasWeakBinding() {
109	return config ->emitChainedFixups ? in.chainedFixups->hasWeakBinding()
110	: in.weakBinding->hasEntry();
111	}
112
113	static bool hasNonWeakDefinition() {
114	return config ->emitChainedFixups ? in.chainedFixups->hasNonWeakDefinition()
115	: in.weakBinding->hasNonWeakDefinition();
116	}
117
118	void MachHeaderSection::writeTo(uint8_t buf) const* {
119	auto hdr = reinterpret_cast<mach_header >(buf);
120	hdr->magic = target->magic;
121	hdr->cputype = target->cpuType;
122	hdr->cpusubtype = cpuSubtype();
123	hdr->filetype = config ->outputType;
124	hdr->ncmds = loadCommands.size();
125	hdr->sizeofcmds = sizeOfCmds;
126	hdr->flags = MH_DYLDLINK;
127
128	if (config ->namespaceKind == NamespaceKind::twolevel)
129	hdr->flags \|= MH_NOUNDEFS \| MH_TWOLEVEL;
130
131	if (config ->outputType == MH_DYLIB && !config ->hasReexports)
132	hdr->flags \|= MH_NO_REEXPORTED_DYLIBS;
133
134	if (config ->markDeadStrippableDylib)
135	hdr->flags \|= MH_DEAD_STRIPPABLE_DYLIB;
136
137	if (config ->outputType == MH_EXECUTE && config ->isPic)
138	hdr->flags \|= MH_PIE;
139
140	if (config ->outputType == MH_DYLIB && config ->applicationExtension)
141	hdr->flags \|= MH_APP_EXTENSION_SAFE;
142
143	if (in.exports->hasWeakSymbol \|\| hasNonWeakDefinition())
144	hdr->flags \|= MH_WEAK_DEFINES;
145
146	if (in.exports->hasWeakSymbol \|\| hasWeakBinding())
147	hdr->flags \|= MH_BINDS_TO_WEAK;
148
149	for (const OutputSegment *seg : outputSegments) {
150	for (const OutputSection *osec : seg->getSections()) {
151	if (isThreadLocalVariables(flags: osec->flags)) {
152	hdr->flags \|= MH_HAS_TLV_DESCRIPTORS;
153	break;
154	}
155	}
156	}
157
158	uint8_t p = reinterpret_cast<uint8_t >(hdr) + target->headerSize;
159	for (const LoadCommand *lc : loadCommands) {
160	lc->writeTo(buf: p);
161	p += lc->getSize();
162	}
163	}
164
165	PageZeroSection::PageZeroSection()
166	: SyntheticSection (segment_names::pageZero, section_names::pageZero) {}
167
168	RebaseSection::RebaseSection()
169	: LinkEditSection (segment_names::linkEdit, section_names::rebase) {}
170
171	namespace {
172	struct RebaseState {
173	uint64_t sequenceLength;
174	uint64_t skipLength;
175	};
176	} // namespace
177
178	static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
179	assert(incr != `0`);
180
181	if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
182	(incr % target->wordSize) == `0`) {
183	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED \|
184	(incr >> target->p2WordSize));
185	} else {
186	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
187	encodeULEB128(Value: incr, OS&: os);
188	}
189	}
190
191	static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
192	assert(state.sequenceLength > `0`);
193
194	if (state.skipLength == target->wordSize) {
195	if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
196	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES \|
197	state.sequenceLength);
198	} else {
199	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
200	encodeULEB128(Value: state.sequenceLength, OS&: os);
201	}
202	} else if (state.sequenceLength == `1`) {
203	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
204	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
205	} else {
206	os << static_cast<uint8_t>(
207	REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
208	encodeULEB128(Value: state.sequenceLength, OS&: os);
209	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
210	}
211	}
212
213	// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
214	// memory location at a specific address to be rebased and/or the address to be
215	// incremented.
216	//
217	// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
218	// one, encoding a series of evenly spaced addresses. This algorithm works by
219	// splitting up the sorted list of addresses into such chunks. If the locations
220	// are consecutive or the sequence consists of a single location, flushRebase
221	// will use a smaller, more specialized encoding.
222	static void encodeRebases(const OutputSegment *seg,
223	MutableArrayRef<Location> locations,
224	raw_svector_ostream &os) {
225	// dyld operates on segments. Translate section offsets into segment offsets.
226	for (Location &loc : locations)
227	loc.offset =
228	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
229	// The algorithm assumes that locations are unique.
230	Location *end =
231	llvm::unique(R&: locations, P: [](const Location &a, const Location &b) {
232	return a.offset == b.offset;
233	});
234	size_t count = end - locations.begin();
235
236	os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
237	seg->index);
238	assert(!locations.empty());
239	uint64_t offset = locations [`0`].offset;
240	encodeULEB128(Value: offset, OS&: os);
241
242	RebaseState state{.sequenceLength: `1`, .skipLength: target->wordSize};
243
244	for (size_t i = `1`; i < count; ++i) {
245	offset = locations [i].offset;
246
247	uint64_t skip = offset - locations [i - `1`].offset;
248	assert(skip != `0` && "duplicate locations should have been weeded out");
249
250	if (skip == state.skipLength) {
251	++state.sequenceLength;
252	} else if (state.sequenceLength == `1`) {
253	++state.sequenceLength;
254	state.skipLength = skip;
255	} else if (skip < state.skipLength) {
256	// The address is lower than what the rebase pointer would be if the last
257	// location would be part of a sequence. We start a new sequence from the
258	// previous location.
259	--state.sequenceLength;
260	flushRebase(state, os);
261
262	state.sequenceLength = `2`;
263	state.skipLength = skip;
264	} else {
265	// The address is at some positive offset from the rebase pointer. We
266	// start a new sequence which begins with the current location.
267	flushRebase(state, os);
268	emitIncrement(incr: skip - state.skipLength, os);
269	state.sequenceLength = `1`;
270	state.skipLength = target->wordSize;
271	}
272	}
273	flushRebase(state, os);
274	}
275
276	void RebaseSection::finalizeContents() {
277	if (locations.empty())
278	return;
279
280	raw_svector_ostream os{contents};
281	os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM \| REBASE_TYPE_POINTER);
282
283	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
284	return a.isec->getVA(off: a.offset) < b.isec->getVA(off: b.offset);
285	});
286
287	for (size_t i = `0`, count = locations.size(); i < count;) {
288	const OutputSegment *seg = locations [i].isec->parent->parent;
289	size_t j = i + `1`;
290	while (j < count && locations [j].isec->parent->parent == seg)
291	++j;
292	encodeRebases(seg, locations: {locations.data() + i, locations.data() + j}, os);
293	i = j;
294	}
295	os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
296	}
297
298	void RebaseSection::writeTo(uint8_t buf) const* {
299	memcpy(dest: buf, src: contents.data(), n: contents.size());
300	}
301
302	NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
303	const char *name)
304	: SyntheticSection (segname, name) {
305	align = target->wordSize;
306	}
307
308	void macho::addNonLazyBindingEntries(const Symbol *sym,
309	const InputSection *isec, uint64_t offset,
310	int64_t addend) {
311	if (config ->emitChainedFixups) {
312	if (needsBinding(sym))
313	in.chainedFixups->addBinding(dysym: sym, isec, offset, addend);
314	else if (isa<Defined>(Val: sym))
315	in.chainedFixups->addRebase(isec, offset);
316	else
317	llvm_unreachable("cannot bind to an undefined symbol");
318	return;
319	}
320
321	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
322	in.binding->addEntry(dysym, isec, offset, addend);
323	if (dysym->isWeakDef())
324	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
325	} else if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
326	in.rebase->addEntry(isec, offset);
327	if (defined->isExternalWeakDef())
328	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
329	else if (defined->interposable)
330	in.binding->addEntry(dysym: sym, isec, offset, addend);
331	} else {
332	// Undefined symbols are filtered out in scanRelocations(); we should never
333	// get here
334	llvm_unreachable("cannot bind to an undefined symbol");
335	}
336	}
337
338	void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
339	if (entries.insert(X: sym)) {
340	assert(!sym->isInGot());
341	sym->gotIndex = entries.size() - `1`;
342
343	addNonLazyBindingEntries(sym, isec, offset: sym->gotIndex * target->wordSize);
344	}
345	}
346
347	void macho::writeChainedRebase(uint8_t *buf, uint64_t targetVA) {
348	assert(config->emitChainedFixups);
349	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
350	auto rebase = reinterpret_cast<dyld_chained_ptr_64_rebase >(buf);
351	rebase->target = targetVA & `0xf'ffff'ffff`;
352	rebase->high8 = (targetVA >> `56`);
353	rebase->reserved = `0`;
354	rebase->next = `0`;
355	rebase->bind = `0`;
356
357	// The fixup format places a 64 GiB limit on the output's size.
358	// Should we handle this gracefully?
359	uint64_t encodedVA = rebase->target \| ((uint64_t)rebase->high8 << `56`);
360	if (encodedVA != targetVA)
361	error(msg: "rebase target address 0x" + Twine::utohexstr(Val: targetVA) +
362	" does not fit into chained fixup. Re-link with -no_fixup_chains");
363	}
364
365	static void writeChainedBind(uint8_t buf, const* Symbol *sym, int64_t addend) {
366	assert(config->emitChainedFixups);
367	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
368	auto bind = reinterpret_cast<dyld_chained_ptr_64_bind >(buf);
369	auto [ordinal, inlineAddend] = in.chainedFixups->getBinding(sym, addend);
370	bind->ordinal = ordinal;
371	bind->addend = inlineAddend;
372	bind->reserved = `0`;
373	bind->next = `0`;
374	bind->bind = `1`;
375	}
376
377	void macho::writeChainedFixup(uint8_t buf, const* Symbol *sym, int64_t addend) {
378	if (needsBinding(sym))
379	writeChainedBind(buf, sym, addend);
380	else
381	writeChainedRebase(buf, targetVA: sym->getVA() + addend);
382	}
383
384	void NonLazyPointerSectionBase::writeTo(uint8_t buf) const* {
385	if (config ->emitChainedFixups) {
386	for (const auto &[i, entry] : llvm::enumerate(First: entries))
387	writeChainedFixup(buf: &buf[i * target->wordSize], sym: entry, addend: `0`);
388	} else {
389	for (const auto &[i, entry] : llvm::enumerate(First: entries))
390	if (auto *defined = dyn_cast<Defined>(Val: entry))
391	write64le(P: &buf[i * target->wordSize], V: defined->getVA());
392	}
393	}
394
395	GotSection::GotSection()
396	: NonLazyPointerSectionBase (segment_names::data, section_names::got) {
397	flags = S_NON_LAZY_SYMBOL_POINTERS;
398	}
399
400	TlvPointerSection::TlvPointerSection()
401	: NonLazyPointerSectionBase (segment_names::data,
402	section_names::threadPtrs) {
403	flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
404	}
405
406	BindingSection::BindingSection()
407	: LinkEditSection (segment_names::linkEdit, section_names::binding) {}
408
409	namespace {
410	struct Binding {
411	OutputSegment segment = nullptr*;
412	uint64_t offset = `0`;
413	int64_t addend = `0`;
414	};
415	struct BindIR {
416	// Default value of 0xF0 is not valid opcode and should make the program
417	// scream instead of accidentally writing "valid" values.
418	uint8_t opcode = `0xF0`;
419	uint64_t data = `0`;
420	uint64_t consecutiveCount = `0`;
421	};
422	} // namespace
423
424	// Encode a sequence of opcodes that tell dyld to write the address of symbol +
425	// addend at osec->addr + outSecOff.
426	//
427	// The bind opcode "interpreter" remembers the values of each binding field, so
428	// we only need to encode the differences between bindings. Hence the use of
429	// lastBinding.
430	static void encodeBinding(const OutputSection *osec, uint64_t outSecOff,
431	int64_t addend, Binding &lastBinding,
432	std::vector<BindIR> &opcodes) {
433	OutputSegment *seg = osec->parent;
434	uint64_t offset = osec->getSegmentOffset() + outSecOff;
435	if (lastBinding.segment != seg) {
436	opcodes.push_back(
437	x: {.opcode: static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
438	seg->index),
439	.data: offset});
440	lastBinding.segment = seg;
441	lastBinding.offset = offset;
442	} else if (lastBinding.offset != offset) {
443	opcodes.push_back(x: {.opcode: BIND_OPCODE_ADD_ADDR_ULEB, .data: offset - lastBinding.offset});
444	lastBinding.offset = offset;
445	}
446
447	if (lastBinding.addend != addend) {
448	opcodes.push_back(
449	x: {.opcode: BIND_OPCODE_SET_ADDEND_SLEB, .data: static_cast<uint64_t>(addend)});
450	lastBinding.addend = addend;
451	}
452
453	opcodes.push_back(x: {.opcode: BIND_OPCODE_DO_BIND, .data: `0`});
454	// DO_BIND causes dyld to both perform the binding and increment the offset
455	lastBinding.offset += target->wordSize;
456	}
457
458	static void optimizeOpcodes(std::vector<BindIR> &opcodes) {
459	// Pass 1: Combine bind/add pairs
460	size_t i;
461	int pWrite = `0`;
462	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
463	if ((opcodes [i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) &&
464	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND)) {
465	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
466	opcodes [pWrite].data = opcodes [i].data;
467	++i;
468	} else {
469	opcodes [pWrite] = opcodes [i - `1`];
470	}
471	}
472	if (i == opcodes.size())
473	opcodes [pWrite] = opcodes [i - `1`];
474	opcodes.resize(new_size: pWrite + `1`);
475
476	// Pass 2: Compress two or more bind_add opcodes
477	pWrite = `0`;
478	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
479	if ((opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
480	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
481	(opcodes [i].data == opcodes [i - `1`].data)) {
482	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
483	opcodes [pWrite].consecutiveCount = `2`;
484	opcodes [pWrite].data = opcodes [i].data;
485	++i;
486	while (i < opcodes.size() &&
487	(opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
488	(opcodes [i].data == opcodes [i - `1`].data)) {
489	opcodes [pWrite].consecutiveCount++;
490	++i;
491	}
492	} else {
493	opcodes [pWrite] = opcodes [i - `1`];
494	}
495	}
496	if (i == opcodes.size())
497	opcodes [pWrite] = opcodes [i - `1`];
498	opcodes.resize(new_size: pWrite + `1`);
499
500	// Pass 3: Use immediate encodings
501	// Every binding is the size of one pointer. If the next binding is a
502	// multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
503	// opcode can be scaled by wordSize into a single byte and dyld will
504	// expand it to the correct address.
505	for (auto &p : opcodes) {
506	// It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
507	// but ld64 currently does this. This could be a potential bug, but
508	// for now, perform the same behavior to prevent mysterious bugs.
509	if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
510	((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) &&
511	((p.data % target->wordSize) == `0`)) {
512	p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
513	p.data /= target->wordSize;
514	}
515	}
516	}
517
518	static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) {
519	uint8_t opcode = op.opcode & BIND_OPCODE_MASK;
520	switch (opcode) {
521	case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
522	case BIND_OPCODE_ADD_ADDR_ULEB:
523	case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
524	os << op.opcode;
525	encodeULEB128(Value: op.data, OS&: os);
526	break;
527	case BIND_OPCODE_SET_ADDEND_SLEB:
528	os << op.opcode;
529	encodeSLEB128(Value: static_cast<int64_t>(op.data), OS&: os);
530	break;
531	case BIND_OPCODE_DO_BIND:
532	os << op.opcode;
533	break;
534	case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
535	os << op.opcode;
536	encodeULEB128(Value: op.consecutiveCount, OS&: os);
537	encodeULEB128(Value: op.data, OS&: os);
538	break;
539	case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
540	os << static_cast<uint8_t>(op.opcode \| op.data);
541	break;
542	default:
543	llvm_unreachable("cannot bind to an unrecognized symbol");
544	}
545	}
546
547	static bool needsWeakBind(const Symbol &sym) {
548	if (auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
549	return dysym->isWeakDef();
550	if (auto *defined = dyn_cast<Defined>(Val: &sym))
551	return defined->isExternalWeakDef();
552	return false;
553	}
554
555	// Non-weak bindings need to have their dylib ordinal encoded as well.
556	static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
557	if (config ->namespaceKind == NamespaceKind::flat \|\| dysym.isDynamicLookup())
558	return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP);
559	assert(dysym.getFile()->isReferenced());
560	return dysym.getFile()->ordinal;
561	}
562
563	static int16_t ordinalForSymbol(const Symbol &sym) {
564	if (config ->emitChainedFixups && needsWeakBind(sym))
565	return BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
566	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
567	return ordinalForDylibSymbol(dysym: *dysym);
568	assert(cast<Defined>(&sym)->interposable);
569	return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
570	}
571
572	static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) {
573	if (ordinal <= `0`) {
574	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM \|
575	(ordinal & BIND_IMMEDIATE_MASK));
576	} else if (ordinal <= BIND_IMMEDIATE_MASK) {
577	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM \| ordinal);
578	} else {
579	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
580	encodeULEB128(Value: ordinal, OS&: os);
581	}
582	}
583
584	static void encodeWeakOverride(const Defined *defined,
585	raw_svector_ostream &os) {
586	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM \|
587	BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
588	<< defined->getName() << `'\0'`;
589	}
590
591	// Organize the bindings so we can encoded them with fewer opcodes.
592	//
593	// First, all bindings for a given symbol should be grouped together.
594	// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
595	// has an associated symbol string), so we only want to emit it once per symbol.
596	//
597	// Within each group, we sort the bindings by address. Since bindings are
598	// delta-encoded, sorting them allows for a more compact result. Note that
599	// sorting by address alone ensures that bindings for the same segment / section
600	// are located together, minimizing the number of times we have to emit
601	// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
602	//
603	// Finally, we sort the symbols by the address of their first binding, again
604	// to facilitate the delta-encoding process.
605	template <class Sym>
606	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>>
607	sortBindings(const BindingsMap<const Sym *> &bindingsMap) {
608	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec(
609	bindingsMap.begin(), bindingsMap.end());
610	for (auto &p : bindingsVec) {
611	std::vector<BindingEntry> &bindings = p.second;
612	llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
613	return a.target.getVA() < b.target.getVA();
614	});
615	}
616	llvm::sort(bindingsVec, [](const auto &a, const auto &b) {
617	return a.second[`0`].target.getVA() < b.second[`0`].target.getVA();
618	});
619	return bindingsVec;
620	}
621
622	// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
623	// interprets to update a record with the following fields:
624	// segment index (of the segment to write the symbol addresses to, typically*
625	// the __DATA_CONST segment which contains the GOT)
626	// offset within the segment, indicating the next location to write a binding*
627	// symbol type*
628	// symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)*
629	// symbol name*
630	// addend*
631	// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
632	// a symbol in the GOT, and increments the segment offset to point to the next
633	// entry. It does not* clear the record state after doing the bind, so*
634	// subsequent opcodes only need to encode the differences between bindings.
635	void BindingSection::finalizeContents() {
636	raw_svector_ostream os{contents};
637	Binding lastBinding;
638	int16_t lastOrdinal = `0`;
639
640	for (auto &p : sortBindings(bindingsMap)) {
641	const Symbol *sym = p.first;
642	std::vector<BindingEntry> &bindings = p.second;
643	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
644	if (sym->isWeakRef())
645	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
646	os << flags << sym->getName() << `'\0'`
647	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
648	int16_t ordinal = ordinalForSymbol(sym: *sym);
649	if (ordinal != lastOrdinal) {
650	encodeDylibOrdinal(ordinal, os);
651	lastOrdinal = ordinal;
652	}
653	std::vector<BindIR> opcodes;
654	for (const BindingEntry &b : bindings)
655	encodeBinding(osec: b.target.isec->parent,
656	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
657	lastBinding, opcodes);
658	if (config ->optimize > `1`)
659	optimizeOpcodes(opcodes);
660	for (const auto &op : opcodes)
661	flushOpcodes(op, os);
662	}
663	if (!bindingsMap.empty())
664	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
665	}
666
667	void BindingSection::writeTo(uint8_t buf) const* {
668	memcpy(dest: buf, src: contents.data(), n: contents.size());
669	}
670
671	WeakBindingSection::WeakBindingSection()
672	: LinkEditSection (segment_names::linkEdit, section_names::weakBinding) {}
673
674	void WeakBindingSection::finalizeContents() {
675	raw_svector_ostream os{contents};
676	Binding lastBinding;
677
678	for (const Defined *defined : definitions)
679	encodeWeakOverride(defined, os);
680
681	for (auto &p : sortBindings(bindingsMap)) {
682	const Symbol *sym = p.first;
683	std::vector<BindingEntry> &bindings = p.second;
684	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
685	<< sym->getName() << `'\0'`
686	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
687	std::vector<BindIR> opcodes;
688	for (const BindingEntry &b : bindings)
689	encodeBinding(osec: b.target.isec->parent,
690	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
691	lastBinding, opcodes);
692	if (config ->optimize > `1`)
693	optimizeOpcodes(opcodes);
694	for (const auto &op : opcodes)
695	flushOpcodes(op, os);
696	}
697	if (!bindingsMap.empty() \|\| !definitions.empty())
698	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
699	}
700
701	void WeakBindingSection::writeTo(uint8_t buf) const* {
702	memcpy(dest: buf, src: contents.data(), n: contents.size());
703	}
704
705	StubsSection::StubsSection()
706	: SyntheticSection (segment_names::text, section_names::stubs) {
707	flags = S_SYMBOL_STUBS \| S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
708	// The stubs section comprises machine instructions, which are aligned to
709	// 4 bytes on the archs we care about.
710	align = `4`;
711	reserved2 = target->stubSize;
712	}
713
714	uint64_t StubsSection::getSize() const {
715	return entries.size() * target->stubSize;
716	}
717
718	void StubsSection::writeTo(uint8_t buf) const* {
719	size_t off = `0`;
720	for (const Symbol *sym : entries) {
721	uint64_t pointerVA =
722	config ->emitChainedFixups ? sym->getGotVA() : sym->getLazyPtrVA();
723	target->writeStub(buf: buf + off, *sym, pointerVA);
724	off += target->stubSize;
725	}
726	}
727
728	void StubsSection::finalize() { isFinal = true; }
729
730	static void addBindingsForStub(Symbol *sym) {
731	assert(!config->emitChainedFixups);
732	if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
733	if (sym->isWeakDef()) {
734	in.binding->addEntry(dysym, isec: in.lazyPointers->isec,
735	offset: sym->stubsIndex * target->wordSize);
736	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
737	offset: sym->stubsIndex * target->wordSize);
738	} else {
739	in.lazyBinding->addEntry(dysym);
740	}
741	} else if (auto *defined = dyn_cast<Defined>(Val: sym)) {
742	if (defined->isExternalWeakDef()) {
743	in.rebase->addEntry(isec: in.lazyPointers->isec,
744	offset: sym->stubsIndex * target->wordSize);
745	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
746	offset: sym->stubsIndex * target->wordSize);
747	} else if (defined->interposable) {
748	in.lazyBinding->addEntry(dysym: sym);
749	} else {
750	llvm_unreachable("invalid stub target");
751	}
752	} else {
753	llvm_unreachable("invalid stub target symbol type");
754	}
755	}
756
757	void StubsSection::addEntry(Symbol *sym) {
758	bool inserted = entries.insert(X: sym);
759	if (inserted) {
760	sym->stubsIndex = entries.size() - `1`;
761
762	if (config ->emitChainedFixups)
763	in.got->addEntry(sym);
764	else
765	addBindingsForStub(sym);
766	}
767	}
768
769	StubHelperSection::StubHelperSection()
770	: SyntheticSection (segment_names::text, section_names::stubHelper) {
771	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
772	align = `4`; // This section comprises machine instructions
773	}
774
775	uint64_t StubHelperSection::getSize() const {
776	return target->stubHelperHeaderSize +
777	in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
778	}
779
780	bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
781
782	void StubHelperSection::writeTo(uint8_t buf) const* {
783	target->writeStubHelperHeader(buf);
784	size_t off = target->stubHelperHeaderSize;
785	for (const Symbol *sym : in.lazyBinding->getEntries()) {
786	target->writeStubHelperEntry(buf: buf + off, *sym, entryAddr: addr + off);
787	off += target->stubHelperEntrySize;
788	}
789	}
790
791	void StubHelperSection::setUp() {
792	Symbol binder = symtab ->addUndefined(name: "dyld_stub_binder", /file=/*nullptr,
793	/isWeakRef=/false);
794	if (auto *undefined = dyn_cast<Undefined>(Val: binder))
795	treatUndefinedSymbol(*undefined,
796	source: "lazy binding (normally in libSystem.dylib)");
797
798	// treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
799	stubBinder = dyn_cast_or_null<DylibSymbol>(Val: binder);
800	if (stubBinder == nullptr)
801	return;
802
803	in.got->addEntry(sym: stubBinder);
804
805	in.imageLoaderCache->parent =
806	ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
807	addInputSection(inputSection: in.imageLoaderCache);
808	// Since this isn't in the symbol table or in any input file, the noDeadStrip
809	// argument doesn't matter.
810	dyldPrivate =
811	make<Defined>(args: "__dyld_private", args: nullptr, args&: in.imageLoaderCache, args: `0`, args: `0`,
812	/isWeakDef=/args: false,
813	/isExternal=/args: false, /isPrivateExtern=/args: false,
814	/includeInSymtab=/args: true,
815	/isReferencedDynamically=/args: false,
816	/noDeadStrip=/args: false);
817	dyldPrivate->used = true;
818	}
819
820	llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
821	ObjCSelRefsHelper::methnameToSelref;
822	void ObjCSelRefsHelper::initialize() {
823	// Do not fold selrefs without ICF.
824	if (config ->icfLevel == ICFLevel::none)
825	return;
826
827	// Search methnames already referenced in __objc_selrefs
828	// Map the name to the corresponding selref entry
829	// which we will reuse when creating objc stubs.
830	for (ConcatInputSection *isec : inputSections) {
831	if (isec->shouldOmitFromOutput())
832	continue;
833	if (isec->getName() != section_names::objcSelrefs)
834	continue;
835	// We expect a single relocation per selref entry to __objc_methname that
836	// might be aggregated.
837	assert(isec->relocs.size() == `1`);
838	auto Reloc = isec->relocs [`0`];
839	if (const auto sym = Reloc.referent.dyn_cast<Symbol >()) {
840	if (const auto *d = dyn_cast<Defined>(Val: sym)) {
841	auto *cisec = cast<CStringInputSection>(Val: d->isec());
842	auto methname = cisec->getStringRefAtOffset(off: d->value);
843	methnameToSelref [CachedHashStringRef (methname)] = isec;
844	}
845	}
846	}
847	}
848
849	void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); }
850
851	ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) {
852	auto methnameOffset =
853	in.objcMethnameSection->getStringOffset(str: methname).outSecOff;
854
855	size_t wordSize = target->wordSize;
856	uint8_t *selrefData = bAlloc().Allocate<uint8_t>(Num: wordSize);
857	write64le(P: selrefData, V: methnameOffset);
858	ConcatInputSection *objcSelref =
859	makeSyntheticInputSection(segName: segment_names::data, sectName: section_names::objcSelrefs,
860	flags: S_LITERAL_POINTERS \| S_ATTR_NO_DEAD_STRIP,
861	data: ArrayRef<uint8_t>{selrefData, wordSize},
862	/align=/wordSize);
863	assert(objcSelref->live);
864	objcSelref->relocs.push_back(x: {/type=/target->unsignedRelocType,
865	/pcrel=/false, /length=/`3`,
866	/offset=/`0`,
867	/addend=/static_cast<int64_t>(methnameOffset),
868	/referent=/in.objcMethnameSection->isec});
869	objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
870	addInputSection(inputSection: objcSelref);
871	objcSelref->isFinal = true;
872	methnameToSelref [CachedHashStringRef (methname)] = objcSelref;
873	return objcSelref;
874	}
875
876	ConcatInputSection *ObjCSelRefsHelper::getSelRef(StringRef methname) {
877	auto it = methnameToSelref.find(Val: CachedHashStringRef (methname));
878	if (it == methnameToSelref.end())
879	return nullptr;
880	return it ->second;
881	}
882
883	ObjCStubsSection::ObjCStubsSection()
884	: SyntheticSection (segment_names::text, section_names::objcStubs) {
885	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
886	align = config ->objcStubsMode == ObjCStubsMode::fast
887	? target->objcStubsFastAlignment
888	: target->objcStubsSmallAlignment;
889	}
890
891	bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
892	return sym->getName().starts_with(Prefix: symbolPrefix);
893	}
894
895	StringRef ObjCStubsSection::getMethname(Symbol *sym) {
896	assert(isObjCStubSymbol(sym) && "not an objc stub");
897	auto name = sym->getName();
898	StringRef methname = name.drop_front(N: symbolPrefix.size());
899	return methname;
900	}
901
902	void ObjCStubsSection::addEntry(Symbol *sym) {
903	StringRef methname = getMethname(sym);
904	// We create a selref entry for each unique methname.
905	if (!ObjCSelRefsHelper::getSelRef(methname))
906	ObjCSelRefsHelper::makeSelRef(methname);
907
908	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
909	? target->objcStubsFastSize
910	: target->objcStubsSmallSize;
911	Defined *newSym = replaceSymbol<Defined>(
912	s: sym, arg: sym->getName(), arg: nullptr, arg&: isec,
913	/value=/arg: symbols.size() * stubSize,
914	/size=/arg&: stubSize,
915	/isWeakDef=/arg: false, /isExternal=/arg: true, /isPrivateExtern=/arg: true,
916	/includeInSymtab=/arg: true, /isReferencedDynamically=/arg: false,
917	/noDeadStrip=/arg: false);
918	symbols.push_back(x: newSym);
919	}
920
921	void ObjCStubsSection::setUp() {
922	objcMsgSend = symtab ->addUndefined(name: "_objc_msgSend", /file=/nullptr,
923	/isWeakRef=/false);
924	if (auto *undefined = dyn_cast<Undefined>(Val: objcMsgSend))
925	treatUndefinedSymbol(*undefined,
926	source: "lazy binding (normally in libobjc.dylib)");
927	objcMsgSend->used = true;
928	if (config ->objcStubsMode == ObjCStubsMode::fast) {
929	in.got->addEntry(sym: objcMsgSend);
930	assert(objcMsgSend->isInGot());
931	} else {
932	assert(config->objcStubsMode == ObjCStubsMode::small);
933	// In line with ld64's behavior, when objc_msgSend is a direct symbol,
934	// we directly reference it.
935	// In other cases, typically when binding in libobjc.dylib,
936	// we generate a stub to invoke objc_msgSend.
937	if (!isa<Defined>(Val: objcMsgSend))
938	in.stubs->addEntry(sym: objcMsgSend);
939	}
940	}
941
942	uint64_t ObjCStubsSection::getSize() const {
943	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
944	? target->objcStubsFastSize
945	: target->objcStubsSmallSize;
946	return stubSize * symbols.size();
947	}
948
949	void ObjCStubsSection::writeTo(uint8_t buf) const* {
950	uint64_t stubOffset = `0`;
951	for (size_t i = `0`, n = symbols.size(); i < n; ++i) {
952	Defined *sym = symbols [i];
953
954	auto methname = getMethname(sym);
955	InputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
956	assert(selRef != nullptr && "no selref for methname");
957	auto selrefAddr = selRef->getVA(off: `0`);
958	target->writeObjCMsgSendStub(buf: buf + stubOffset, sym, stubsAddr: in.objcStubs->addr,
959	stubOffset, selrefVA: selrefAddr, objcMsgSend);
960	}
961	}
962
963	LazyPointerSection::LazyPointerSection()
964	: SyntheticSection (segment_names::data, section_names::lazySymbolPtr) {
965	align = target->wordSize;
966	flags = S_LAZY_SYMBOL_POINTERS;
967	}
968
969	uint64_t LazyPointerSection::getSize() const {
970	return in.stubs->getEntries().size() * target->wordSize;
971	}
972
973	bool LazyPointerSection::isNeeded() const {
974	return !in.stubs->getEntries().empty();
975	}
976
977	void LazyPointerSection::writeTo(uint8_t buf) const* {
978	size_t off = `0`;
979	for (const Symbol *sym : in.stubs->getEntries()) {
980	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
981	if (dysym->hasStubsHelper()) {
982	uint64_t stubHelperOffset =
983	target->stubHelperHeaderSize +
984	dysym->stubsHelperIndex * target->stubHelperEntrySize;
985	write64le(P: buf + off, V: in.stubHelper->addr + stubHelperOffset);
986	}
987	} else {
988	write64le(P: buf + off, V: sym->getVA());
989	}
990	off += target->wordSize;
991	}
992	}
993
994	LazyBindingSection::LazyBindingSection()
995	: LinkEditSection (segment_names::linkEdit, section_names::lazyBinding) {}
996
997	void LazyBindingSection::finalizeContents() {
998	// TODO: Just precompute output size here instead of writing to a temporary
999	// buffer
1000	for (Symbol *sym : entries)
1001	sym->lazyBindOffset = encode(*sym);
1002	}
1003
1004	void LazyBindingSection::writeTo(uint8_t buf) const* {
1005	memcpy(dest: buf, src: contents.data(), n: contents.size());
1006	}
1007
1008	void LazyBindingSection::addEntry(Symbol *sym) {
1009	assert(!config->emitChainedFixups && "Chained fixups always bind eagerly");
1010	if (entries.insert(X: sym)) {
1011	sym->stubsHelperIndex = entries.size() - `1`;
1012	in.rebase->addEntry(isec: in.lazyPointers->isec,
1013	offset: sym->stubsIndex * target->wordSize);
1014	}
1015	}
1016
1017	// Unlike the non-lazy binding section, the bind opcodes in this section aren't
1018	// interpreted all at once. Rather, dyld will start interpreting opcodes at a
1019	// given offset, typically only binding a single symbol before it finds a
1020	// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1021	// we cannot encode just the differences between symbols; we have to emit the
1022	// complete bind information for each symbol.
1023	uint32_t LazyBindingSection::encode(const Symbol &sym) {
1024	uint32_t opstreamOffset = contents.size();
1025	OutputSegment *dataSeg = in.lazyPointers->parent;
1026	os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
1027	dataSeg->index);
1028	uint64_t offset =
1029	in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize;
1030	encodeULEB128(Value: offset, OS&: os);
1031	encodeDylibOrdinal(ordinal: ordinalForSymbol(sym), os);
1032
1033	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
1034	if (sym.isWeakRef())
1035	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
1036
1037	os << flags << sym.getName() << `'\0'`
1038	<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND)
1039	<< static_cast<uint8_t>(BIND_OPCODE_DONE);
1040	return opstreamOffset;
1041	}
1042
1043	ExportSection::ExportSection()
1044	: LinkEditSection (segment_names::linkEdit, section_names::export_) {}
1045
1046	void ExportSection::finalizeContents() {
1047	trieBuilder.setImageBase(in.header->addr);
1048	for (const Symbol *sym : symtab ->getSymbols()) {
1049	if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
1050	if (defined->privateExtern \|\| !defined->isLive())
1051	continue;
1052	trieBuilder.addSymbol(sym: *defined);
1053	hasWeakSymbol = hasWeakSymbol \|\| sym->isWeakDef();
1054	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1055	if (dysym->shouldReexport)
1056	trieBuilder.addSymbol(sym: *dysym);
1057	}
1058	}
1059	size = trieBuilder.build();
1060	}
1061
1062	void ExportSection::writeTo(uint8_t buf) const* { trieBuilder.writeTo(buf); }
1063
1064	DataInCodeSection::DataInCodeSection()
1065	: LinkEditSection (segment_names::linkEdit, section_names::dataInCode) {}
1066
1067	template <class LP>
1068	static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
1069	std::vector<MachO::data_in_code_entry> dataInCodeEntries;
1070	for (const InputFile *inputFile : inputFiles) {
1071	if (!isa<ObjFile>(Val: inputFile))
1072	continue;
1073	const ObjFile *objFile = cast<ObjFile>(Val: inputFile);
1074	ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode();
1075	if (entries.empty())
1076	continue;
1077
1078	std::vector<MachO::data_in_code_entry> sortedEntries;
1079	sortedEntries.assign(first: entries.begin(), last: entries.end());
1080	llvm::sort(sortedEntries, [](const data_in_code_entry &lhs,
1081	const data_in_code_entry &rhs) {
1082	return lhs.offset < rhs.offset;
1083	});
1084
1085	// For each code subsection find 'data in code' entries residing in it.
1086	// Compute the new offset values as
1087	// <offset within subsection> + <subsection address> - <__TEXT address>.
1088	for (const Section *section : objFile->sections) {
1089	for (const Subsection &subsec : section->subsections) {
1090	const InputSection *isec = subsec.isec;
1091	if (!isCodeSection(isec))
1092	continue;
1093	if (cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput())
1094	continue;
1095	const uint64_t beginAddr = section->addr + subsec.offset;
1096	auto it = llvm::lower_bound(
1097	sortedEntries, beginAddr,
1098	[](const MachO::data_in_code_entry &entry, uint64_t addr) {
1099	return entry.offset < addr;
1100	});
1101	const uint64_t endAddr = beginAddr + isec->getSize();
1102	for (const auto end = sortedEntries.end();
1103	it != end && it->offset + it->length <= endAddr; ++it)
1104	dataInCodeEntries.push_back(
1105	{static_cast<uint32_t>(isec->getVA(off: it->offset - beginAddr) -
1106	in.header->addr),
1107	it->length, it->kind});
1108	}
1109	}
1110	}
1111
1112	// ld64 emits the table in sorted order too.
1113	llvm::sort(dataInCodeEntries,
1114	[](const data_in_code_entry &lhs, const data_in_code_entry &rhs) {
1115	return lhs.offset < rhs.offset;
1116	});
1117	return dataInCodeEntries;
1118	}
1119
1120	void DataInCodeSection::finalizeContents() {
1121	entries = target->wordSize == `8` ? collectDataInCodeEntries<LP64>()
1122	: collectDataInCodeEntries<ILP32>();
1123	}
1124
1125	void DataInCodeSection::writeTo(uint8_t buf) const* {
1126	if (!entries.empty())
1127	memcpy(dest: buf, src: entries.data(), n: getRawSize());
1128	}
1129
1130	FunctionStartsSection::FunctionStartsSection()
1131	: LinkEditSection (segment_names::linkEdit, section_names::functionStarts) {}
1132
1133	void FunctionStartsSection::finalizeContents() {
1134	raw_svector_ostream os{contents};
1135	std::vector<uint64_t> addrs;
1136	for (const InputFile *file : inputFiles) {
1137	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1138	for (const Symbol *sym : objFile->symbols) {
1139	if (const auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1140	if (!defined->isec() \|\| !isCodeSection(defined->isec()) \|\|
1141	!defined->isLive())
1142	continue;
1143	addrs.push_back(x: defined->getVA());
1144	}
1145	}
1146	}
1147	}
1148	llvm::sort(C&: addrs);
1149	uint64_t addr = in.header->addr;
1150	for (uint64_t nextAddr : addrs) {
1151	uint64_t delta = nextAddr - addr;
1152	if (delta == `0`)
1153	continue;
1154	encodeULEB128(Value: delta, OS&: os);
1155	addr = nextAddr;
1156	}
1157	os << `'\0'`;
1158	}
1159
1160	void FunctionStartsSection::writeTo(uint8_t buf) const* {
1161	memcpy(dest: buf, src: contents.data(), n: contents.size());
1162	}
1163
1164	SymtabSection::SymtabSection(StringTableSection &stringTableSection)
1165	: LinkEditSection (segment_names::linkEdit, section_names::symbolTable),
1166	stringTableSection(stringTableSection) {}
1167
1168	void SymtabSection::emitBeginSourceStab(StringRef sourceFile) {
1169	StabsEntry stab(N_SO);
1170	stab.strx = stringTableSection.addString(saver().save(S: sourceFile));
1171	stabs.emplace_back(args: std::move(stab));
1172	}
1173
1174	void SymtabSection::emitEndSourceStab() {
1175	StabsEntry stab(N_SO);
1176	stab.sect = `1`;
1177	stabs.emplace_back(args: std::move(stab));
1178	}
1179
1180	void SymtabSection::emitObjectFileStab(ObjFile *file) {
1181	StabsEntry stab(N_OSO);
1182	stab.sect = target->cpuSubtype;
1183	SmallString<`261`> path(!file->archiveName.empty() ? file->archiveName
1184	: file->getName());
1185	std::error_code ec = sys::fs::make_absolute(path);
1186	if (ec)
1187	fatal(msg: "failed to get absolute path for " + path);
1188
1189	if (!file->archiveName.empty())
1190	path.append(Refs: {"(", file->getName(), ")"});
1191
1192	StringRef adjustedPath = saver().save(S: path.str());
1193	adjustedPath.consume_front(Prefix: config ->osoPrefix);
1194
1195	stab.strx = stringTableSection.addString(adjustedPath);
1196	stab.desc = `1`;
1197	stab.value = file->modTime;
1198	stabs.emplace_back(args: std::move(stab));
1199	}
1200
1201	void SymtabSection::emitEndFunStab(Defined *defined) {
1202	StabsEntry stab(N_FUN);
1203	stab.value = defined->size;
1204	stabs.emplace_back(args: std::move(stab));
1205	}
1206
1207	void SymtabSection::emitStabs() {
1208	if (config ->omitDebugInfo)
1209	return;
1210
1211	for (const std::string &s : config ->astPaths) {
1212	StabsEntry astStab(N_AST);
1213	astStab.strx = stringTableSection.addString(s);
1214	stabs.emplace_back(args: std::move(astStab));
1215	}
1216
1217	// Cache the file ID for each symbol in an std::pair for faster sorting.
1218	using SortingPair = std::pair<Defined , int*>;
1219	std::vector<SortingPair> symbolsNeedingStabs;
1220	for (const SymtabEntry &entry :
1221	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols)) {
1222	Symbol *sym = entry.sym;
1223	assert(sym->isLive() &&
1224	"dead symbols should not be in localSymbols, externalSymbols");
1225	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1226	// Excluded symbols should have been filtered out in finalizeContents().
1227	assert(defined->includeInSymtab);
1228
1229	if (defined->isAbsolute())
1230	continue;
1231
1232	// Constant-folded symbols go in the executable's symbol table, but don't
1233	// get a stabs entry unless --keep-icf-stabs flag is specified
1234	if (!config ->keepICFStabs && defined->wasIdenticalCodeFolded)
1235	continue;
1236
1237	ObjFile *file = defined->getObjectFile();
1238	if (!file \|\| !file->compileUnit)
1239	continue;
1240
1241	// We use 'originalIsec' to get the file id of the symbol since 'isec()'
1242	// might point to the merged ICF symbol's file
1243	symbolsNeedingStabs.emplace_back(args&: defined,
1244	args: defined->originalIsec->getFile()->id);
1245	}
1246	}
1247
1248	llvm::stable_sort(Range&: symbolsNeedingStabs,
1249	C: [&](const SortingPair &a, const SortingPair &b) {
1250	return a.second < b.second;
1251	});
1252
1253	// Emit STABS symbols so that dsymutil and/or the debugger can map address
1254	// regions in the final binary to the source and object files from which they
1255	// originated.
1256	InputFile lastFile = nullptr*;
1257	for (SortingPair &pair : symbolsNeedingStabs) {
1258	Defined *defined = pair.first;
1259	// We use 'originalIsec' of the symbol since we care about the actual origin
1260	// of the symbol, not the canonical location returned by `isec()`.
1261	InputSection *isec = defined->originalIsec;
1262	ObjFile *file = cast<ObjFile>(Val: isec->getFile());
1263
1264	if (lastFile == nullptr \|\| lastFile != file) {
1265	if (lastFile != nullptr)
1266	emitEndSourceStab();
1267	lastFile = file;
1268
1269	emitBeginSourceStab(sourceFile: file->sourceFile());
1270	emitObjectFileStab(file);
1271	}
1272
1273	StabsEntry symStab;
1274	symStab.sect = isec->parent->index;
1275	symStab.strx = stringTableSection.addString(defined->getName());
1276	symStab.value = defined->getVA();
1277
1278	if (isCodeSection(isec)) {
1279	symStab.type = N_FUN;
1280	stabs.emplace_back(args: std::move(symStab));
1281	emitEndFunStab(defined);
1282	} else {
1283	symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
1284	stabs.emplace_back(args: std::move(symStab));
1285	}
1286	}
1287
1288	if (!stabs.empty())
1289	emitEndSourceStab();
1290	}
1291
1292	void SymtabSection::finalizeContents() {
1293	auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
1294	uint32_t strx = stringTableSection.addString(sym->getName());
1295	symbols.push_back(x: {.sym: sym, .strx: strx});
1296	};
1297
1298	std::function<void(Symbol *)> localSymbolsHandler;
1299	switch (config ->localSymbolsPresence) {
1300	case SymtabPresence::All:
1301	localSymbolsHandler = [&](Symbol *sym) { addSymbol (localSymbols, sym); };
1302	break;
1303	case SymtabPresence::None:
1304	localSymbolsHandler = [&](Symbol ) { /* Do nothing/ };
1305	break;
1306	case SymtabPresence::SelectivelyIncluded:
1307	localSymbolsHandler = [&](Symbol *sym) {
1308	if (config ->localSymbolPatterns.match(symbolName: sym->getName()))
1309	addSymbol (localSymbols, sym);
1310	};
1311	break;
1312	case SymtabPresence::SelectivelyExcluded:
1313	localSymbolsHandler = [&](Symbol *sym) {
1314	if (!config ->localSymbolPatterns.match(symbolName: sym->getName()))
1315	addSymbol (localSymbols, sym);
1316	};
1317	break;
1318	}
1319
1320	// Local symbols aren't in the SymbolTable, so we walk the list of object
1321	// files to gather them.
1322	// But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1323	// the right thing regardless, but this check is a perf optimization because
1324	// iterating through all the input files and their symbols is expensive.
1325	if (config ->localSymbolsPresence != SymtabPresence::None) {
1326	for (const InputFile *file : inputFiles) {
1327	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1328	for (Symbol *sym : objFile->symbols) {
1329	if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1330	if (defined->isExternal() \|\| !defined->isLive() \|\|
1331	!defined->includeInSymtab)
1332	continue;
1333	localSymbolsHandler (sym);
1334	}
1335	}
1336	}
1337	}
1338	}
1339
1340	// __dyld_private is a local symbol too. It's linker-created and doesn't
1341	// exist in any object file.
1342	if (in.stubHelper && in.stubHelper->dyldPrivate)
1343	localSymbolsHandler (in.stubHelper->dyldPrivate);
1344
1345	for (Symbol *sym : symtab ->getSymbols()) {
1346	if (!sym->isLive())
1347	continue;
1348	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1349	if (!defined->includeInSymtab)
1350	continue;
1351	assert(defined->isExternal());
1352	if (defined->privateExtern)
1353	localSymbolsHandler (defined);
1354	else
1355	addSymbol (externalSymbols, defined);
1356	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1357	if (dysym->isReferenced())
1358	addSymbol (undefinedSymbols, sym);
1359	}
1360	}
1361
1362	emitStabs();
1363	uint32_t symtabIndex = stabs.size();
1364	for (const SymtabEntry &entry :
1365	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols, Ranges&: undefinedSymbols)) {
1366	entry.sym->symtabIndex = symtabIndex++;
1367	}
1368	}
1369
1370	uint32_t SymtabSection::getNumSymbols() const {
1371	return stabs.size() + localSymbols.size() + externalSymbols.size() +
1372	undefinedSymbols.size();
1373	}
1374
1375	// This serves to hide (type-erase) the template parameter from SymtabSection.
1376	template <class LP> class SymtabSectionImpl final : public SymtabSection {
1377	public:
1378	SymtabSectionImpl(StringTableSection &stringTableSection)
1379	: SymtabSection(stringTableSection) {}
1380	uint64_t getRawSize() const override;
1381	void writeTo(uint8_t buf) const* override;
1382	};
1383
1384	template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
1385	return getNumSymbols() * sizeof(typename LP::nlist);
1386	}
1387
1388	template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t buf) const* {
1389	auto nList = reinterpret_cast<typename* LP::nlist *>(buf);
1390	// Emit the stabs entries before the "real" symbols. We cannot emit them
1391	// after as that would render Symbol::symtabIndex inaccurate.
1392	for (const StabsEntry &entry : stabs) {
1393	nList->n_strx = entry.strx;
1394	nList->n_type = entry.type;
1395	nList->n_sect = entry.sect;
1396	nList->n_desc = entry.desc;
1397	nList->n_value = entry.value;
1398	++nList;
1399	}
1400
1401	for (const SymtabEntry &entry : concat<const SymtabEntry>(
1402	localSymbols, externalSymbols, undefinedSymbols)) {
1403	nList->n_strx = entry.strx;
1404	// TODO populate n_desc with more flags
1405	if (auto *defined = dyn_cast<Defined>(Val: entry.sym)) {
1406	uint8_t scope = `0`;
1407	if (defined->privateExtern) {
1408	// Private external -- dylib scoped symbol.
1409	// Promote to non-external at link time.
1410	scope = N_PEXT;
1411	} else if (defined->isExternal()) {
1412	// Normal global symbol.
1413	scope = N_EXT;
1414	} else {
1415	// TU-local symbol from localSymbols.
1416	scope = `0`;
1417	}
1418
1419	if (defined->isAbsolute()) {
1420	nList->n_type = scope \| N_ABS;
1421	nList->n_sect = NO_SECT;
1422	nList->n_value = defined->value;
1423	} else {
1424	nList->n_type = scope \| N_SECT;
1425	nList->n_sect = defined->isec()->parent->index;
1426	// For the N_SECT symbol type, n_value is the address of the symbol
1427	nList->n_value = defined->getVA();
1428	}
1429	nList->n_desc \|= defined->isExternalWeakDef() ? N_WEAK_DEF : `0`;
1430	nList->n_desc \|=
1431	defined->referencedDynamically ? REFERENCED_DYNAMICALLY : `0`;
1432	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: entry.sym)) {
1433	uint16_t n_desc = nList->n_desc;
1434	int16_t ordinal = ordinalForDylibSymbol(dysym: *dysym);
1435	if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
1436	SET_LIBRARY_ORDINAL(n_desc, ordinal: DYNAMIC_LOOKUP_ORDINAL);
1437	else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
1438	SET_LIBRARY_ORDINAL(n_desc, ordinal: EXECUTABLE_ORDINAL);
1439	else {
1440	assert(ordinal > `0`);
1441	SET_LIBRARY_ORDINAL(n_desc, ordinal: static_cast<uint8_t>(ordinal));
1442	}
1443
1444	nList->n_type = N_EXT;
1445	n_desc \|= dysym->isWeakDef() ? N_WEAK_DEF : `0`;
1446	n_desc \|= dysym->isWeakRef() ? N_WEAK_REF : `0`;
1447	nList->n_desc = n_desc;
1448	}
1449	++nList;
1450	}
1451	}
1452
1453	template <class LP>
1454	SymtabSection *
1455	macho::makeSymtabSection(StringTableSection &stringTableSection) {
1456	return make<SymtabSectionImpl<LP>>(stringTableSection);
1457	}
1458
1459	IndirectSymtabSection::IndirectSymtabSection()
1460	: LinkEditSection (segment_names::linkEdit,
1461	section_names::indirectSymbolTable) {}
1462
1463	uint32_t IndirectSymtabSection::getNumSymbols() const {
1464	uint32_t size = in.got->getEntries().size() +
1465	in.tlvPointers->getEntries().size() +
1466	in.stubs->getEntries().size();
1467	if (!config ->emitChainedFixups)
1468	size += in.stubs->getEntries().size();
1469	return size;
1470	}
1471
1472	bool IndirectSymtabSection::isNeeded() const {
1473	return in.got->isNeeded() \|\| in.tlvPointers->isNeeded() \|\|
1474	in.stubs->isNeeded();
1475	}
1476
1477	void IndirectSymtabSection::finalizeContents() {
1478	uint32_t off = `0`;
1479	in.got->reserved1 = off;
1480	off += in.got->getEntries().size();
1481	in.tlvPointers->reserved1 = off;
1482	off += in.tlvPointers->getEntries().size();
1483	in.stubs->reserved1 = off;
1484	if (in.lazyPointers) {
1485	off += in.stubs->getEntries().size();
1486	in.lazyPointers->reserved1 = off;
1487	}
1488	}
1489
1490	static uint32_t indirectValue(const Symbol *sym) {
1491	if (sym->symtabIndex == UINT32_MAX \|\| !needsBinding(sym))
1492	return INDIRECT_SYMBOL_LOCAL;
1493	return sym->symtabIndex;
1494	}
1495
1496	void IndirectSymtabSection::writeTo(uint8_t buf) const* {
1497	uint32_t off = `0`;
1498	for (const Symbol *sym : in.got->getEntries()) {
1499	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1500	++off;
1501	}
1502	for (const Symbol *sym : in.tlvPointers->getEntries()) {
1503	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1504	++off;
1505	}
1506	for (const Symbol *sym : in.stubs->getEntries()) {
1507	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1508	++off;
1509	}
1510
1511	if (in.lazyPointers) {
1512	// There is a 1:1 correspondence between stubs and LazyPointerSection
1513	// entries. But giving __stubs and __la_symbol_ptr the same reserved1
1514	// (the offset into the indirect symbol table) so that they both refer
1515	// to the same range of offsets confuses `strip`, so write the stubs
1516	// symbol table offsets a second time.
1517	for (const Symbol *sym : in.stubs->getEntries()) {
1518	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1519	++off;
1520	}
1521	}
1522	}
1523
1524	StringTableSection::StringTableSection()
1525	: LinkEditSection (segment_names::linkEdit, section_names::stringTable) {}
1526
1527	uint32_t StringTableSection::addString(StringRef str) {
1528	uint32_t strx = size;
1529	strings.push_back(x: str); // TODO: consider deduplicating strings
1530	size += str.size() + `1`; // account for null terminator
1531	return strx;
1532	}
1533
1534	void StringTableSection::writeTo(uint8_t buf) const* {
1535	uint32_t off = `0`;
1536	for (StringRef str : strings) {
1537	memcpy(dest: buf + off, src: str.data(), n: str.size());
1538	off += str.size() + `1`; // account for null terminator
1539	}
1540	}
1541
1542	static_assert((CodeSignatureSection::blobHeadersSize % `8`) == `0`);
1543	static_assert((CodeSignatureSection::fixedHeadersSize % `8`) == `0`);
1544
1545	CodeSignatureSection::CodeSignatureSection()
1546	: LinkEditSection (segment_names::linkEdit, section_names::codeSignature) {
1547	align = `16`; // required by libstuff
1548
1549	// XXX: This mimics LD64, where it uses the install-name as codesign
1550	// identifier, if available.
1551	if (!config ->installName.empty())
1552	fileName = config ->installName;
1553	else
1554	// FIXME: Consider using finalOutput instead of outputFile.
1555	fileName = config ->outputFile;
1556
1557	size_t slashIndex = fileName.rfind(Str: "/");
1558	if (slashIndex != std::string::npos)
1559	fileName = fileName.drop_front(N: slashIndex + `1`);
1560
1561	// NOTE: Any changes to these calculations should be repeated
1562	// in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1563	allHeadersSize = alignTo<`16`>(Value: fixedHeadersSize + fileName.size() + `1`);
1564	fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
1565	}
1566
1567	uint32_t CodeSignatureSection::getBlockCount() const {
1568	return (fileOff + blockSize - `1`) / blockSize;
1569	}
1570
1571	uint64_t CodeSignatureSection::getRawSize() const {
1572	return allHeadersSize + getBlockCount() * hashSize;
1573	}
1574
1575	void CodeSignatureSection::writeHashes(uint8_t buf) const* {
1576	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1577	// MachOWriter::writeSignatureData.
1578	uint8_t *hashes = buf + fileOff + allHeadersSize;
1579	parallelFor(Begin: `0`, End: getBlockCount(), Fn: [&](size_t i) {
1580	sha256(data: buf + i * blockSize,
1581	len: std::min(a: static_cast<size_t>(fileOff - i * blockSize), b: blockSize),
1582	output: hashes + i * hashSize);
1583	});
1584	#if defined(__APPLE__)
1585	// This is macOS-specific work-around and makes no sense for any
1586	// other host OS. See https://openradar.appspot.com/FB8914231
1587	//
1588	// The macOS kernel maintains a signature-verification cache to
1589	// quickly validate applications at time of execve(2). The trouble
1590	// is that for the kernel creates the cache entry at the time of the
1591	// mmap(2) call, before we have a chance to write either the code to
1592	// sign or the signature header+hashes. The fix is to invalidate
1593	// all cached data associated with the output file, thus discarding
1594	// the bogus prematurely-cached signature.
1595	msync(buf, fileOff + getSize(), MS_INVALIDATE);
1596	#endif
1597	}
1598
1599	void CodeSignatureSection::writeTo(uint8_t buf) const* {
1600	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1601	// MachOWriter::writeSignatureData.
1602	uint32_t signatureSize = static_cast<uint32_t>(getSize());
1603	auto superBlob = reinterpret_cast<CS_SuperBlob >(buf);
1604	write32be(P: &superBlob->magic, V: CSMAGIC_EMBEDDED_SIGNATURE);
1605	write32be(P: &superBlob->length, V: signatureSize);
1606	write32be(P: &superBlob->count, V: `1`);
1607	auto blobIndex = reinterpret_cast<CS_BlobIndex >(&superBlob[`1`]);
1608	write32be(P: &blobIndex->type, V: CSSLOT_CODEDIRECTORY);
1609	write32be(P: &blobIndex->offset, V: blobHeadersSize);
1610	auto *codeDirectory =
1611	reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize);
1612	write32be(P: &codeDirectory->magic, V: CSMAGIC_CODEDIRECTORY);
1613	write32be(P: &codeDirectory->length, V: signatureSize - blobHeadersSize);
1614	write32be(P: &codeDirectory->version, V: CS_SUPPORTSEXECSEG);
1615	write32be(P: &codeDirectory->flags, V: CS_ADHOC \| CS_LINKER_SIGNED);
1616	write32be(P: &codeDirectory->hashOffset,
1617	V: sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad);
1618	write32be(P: &codeDirectory->identOffset, V: sizeof(CS_CodeDirectory));
1619	codeDirectory->nSpecialSlots = `0`;
1620	write32be(P: &codeDirectory->nCodeSlots, V: getBlockCount());
1621	write32be(P: &codeDirectory->codeLimit, V: fileOff);
1622	codeDirectory->hashSize = static_cast<uint8_t>(hashSize);
1623	codeDirectory->hashType = kSecCodeSignatureHashSHA256;
1624	codeDirectory->platform = `0`;
1625	codeDirectory->pageSize = blockSizeShift;
1626	codeDirectory->spare2 = `0`;
1627	codeDirectory->scatterOffset = `0`;
1628	codeDirectory->teamOffset = `0`;
1629	codeDirectory->spare3 = `0`;
1630	codeDirectory->codeLimit64 = `0`;
1631	OutputSegment *textSeg = getOrCreateOutputSegment(name: segment_names::text);
1632	write64be(P: &codeDirectory->execSegBase, V: textSeg->fileOff);
1633	write64be(P: &codeDirectory->execSegLimit, V: textSeg->fileSize);
1634	write64be(P: &codeDirectory->execSegFlags,
1635	V: config ->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : `0`);
1636	auto id = reinterpret_cast<char* *>(&codeDirectory[`1`]);
1637	memcpy(dest: id, src: fileName.begin(), n: fileName.size());
1638	memset(s: id + fileName.size(), c: `0`, n: fileNamePad);
1639	}
1640
1641	CStringSection::CStringSection(const char *name)
1642	: SyntheticSection (segment_names::text, name) {
1643	flags = S_CSTRING_LITERALS;
1644	}
1645
1646	void CStringSection::addInput(CStringInputSection *isec) {
1647	isec->parent = this;
1648	inputs.push_back(x: isec);
1649	if (isec->align > align)
1650	align = isec->align;
1651	}
1652
1653	void CStringSection::writeTo(uint8_t buf) const* {
1654	for (const CStringInputSection *isec : inputs) {
1655	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1656	if (!piece.live)
1657	continue;
1658	StringRef string = isec->getStringRef(i);
1659	memcpy(dest: buf + piece.outSecOff, src: string.data(), n: string.size());
1660	}
1661	}
1662	}
1663
1664	void CStringSection::finalizeContents() {
1665	uint64_t offset = `0`;
1666	for (CStringInputSection *isec : inputs) {
1667	for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) {
1668	if (!piece.live)
1669	continue;
1670	// See comment above DeduplicatedCStringSection for how alignment is
1671	// handled.
1672	uint32_t pieceAlign = `1`
1673	<< llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1674	offset = alignToPowerOf2(Value: offset, Align: pieceAlign);
1675	piece.outSecOff = offset;
1676	isec->isFinal = true;
1677	StringRef string = isec->getStringRef(i);
1678	offset += string.size() + `1`; // account for null terminator
1679	}
1680	}
1681	size = offset;
1682	}
1683
1684	// Mergeable cstring literals are found under the __TEXT,__cstring section. In
1685	// contrast to ELF, which puts strings that need different alignments into
1686	// different sections, clang's Mach-O backend puts them all in one section.
1687	// Strings that need to be aligned have the .p2align directive emitted before
1688	// them, which simply translates into zero padding in the object file. In other
1689	// words, we have to infer the desired alignment of these cstrings from their
1690	// addresses.
1691	//
1692	// We differ slightly from ld64 in how we've chosen to align these cstrings.
1693	// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1694	// address in the input object files. When deduplicating identical cstrings,
1695	// both linkers pick the cstring whose address has more trailing zeros, and
1696	// preserve the alignment of that address in the final binary. However, ld64
1697	// goes a step further and also preserves the offset of the cstring from the
1698	// last section-aligned address. I.e. if a cstring is at offset 18 in the
1699	// input, with a section alignment of 16, then both LLD and ld64 will ensure the
1700	// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1701	// ensure that the final address is of the form 16 k + 2 for some k.*
1702	//
1703	// Note that ld64's heuristic means that a dedup'ed cstring's final address is
1704	// dependent on the order of the input object files. E.g. if in addition to the
1705	// cstring at offset 18 above, we have a duplicate one in another file with a
1706	// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1707	// the cstring from the object file earlier on the command line (since both have
1708	// the same number of trailing zeros in their address). So the final cstring may
1709	// either be at some address `16 k + 2` or at some address `2 * k`.*
1710	//
1711	// I've opted not to follow this behavior primarily for implementation
1712	// simplicity, and secondarily to save a few more bytes. It's not clear to me
1713	// that preserving the section alignment + offset is ever necessary, and there
1714	// are many cases that are clearly redundant. In particular, if an x86_64 object
1715	// file contains some strings that are accessed via SIMD instructions, then the
1716	// .cstring section in the object file will be 16-byte-aligned (since SIMD
1717	// requires its operand addresses to be 16-byte aligned). However, there will
1718	// typically also be other cstrings in the same file that aren't used via SIMD
1719	// and don't need this alignment. They will be emitted at some arbitrary address
1720	// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1721	// % A`.
1722	void DeduplicatedCStringSection::finalizeContents() {
1723	// Find the largest alignment required for each string.
1724	for (const CStringInputSection *isec : inputs) {
1725	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1726	if (!piece.live)
1727	continue;
1728	auto s = isec->getCachedHashStringRef(i);
1729	assert(isec->align != `0`);
1730	uint8_t trailingZeros = llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1731	auto it = stringOffsetMap.insert(
1732	KV: std::make_pair(x&: s, y: StringOffset (trailingZeros)));
1733	if (!it.second && it.first ->second.trailingZeros < trailingZeros)
1734	it.first ->second.trailingZeros = trailingZeros;
1735	}
1736	}
1737
1738	// Assign an offset for each string and save it to the corresponding
1739	// StringPieces for easy access.
1740	for (CStringInputSection *isec : inputs) {
1741	for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) {
1742	if (!piece.live)
1743	continue;
1744	auto s = isec->getCachedHashStringRef(i);
1745	auto it = stringOffsetMap.find(Val: s);
1746	assert(it != stringOffsetMap.end());
1747	StringOffset &offsetInfo = it ->second;
1748	if (offsetInfo.outSecOff == UINT64_MAX) {
1749	offsetInfo.outSecOff =
1750	alignToPowerOf2(Value: size, Align: `1ULL` << offsetInfo.trailingZeros);
1751	size =
1752	offsetInfo.outSecOff + s.size() + `1`; // account for null terminator
1753	}
1754	piece.outSecOff = offsetInfo.outSecOff;
1755	}
1756	isec->isFinal = true;
1757	}
1758	}
1759
1760	void DeduplicatedCStringSection::writeTo(uint8_t buf) const* {
1761	for (const auto &p : stringOffsetMap) {
1762	StringRef data = p.first.val();
1763	uint64_t off = p.second.outSecOff;
1764	if (!data.empty())
1765	memcpy(dest: buf + off, src: data.data(), n: data.size());
1766	}
1767	}
1768
1769	DeduplicatedCStringSection::StringOffset
1770	DeduplicatedCStringSection::getStringOffset(StringRef str) const {
1771	// StringPiece uses 31 bits to store the hashes, so we replicate that
1772	uint32_t hash = xxh3_64bits(data: str) & `0x7fffffff`;
1773	auto offset = stringOffsetMap.find(Val: CachedHashStringRef (str, hash));
1774	assert(offset != stringOffsetMap.end() &&
1775	"Looked-up strings should always exist in section");
1776	return offset ->second;
1777	}
1778
1779	// This section is actually emitted as __TEXT,__const by ld64, but clang may
1780	// emit input sections of that name, and LLD doesn't currently support mixing
1781	// synthetic and concat-type OutputSections. To work around this, I've given
1782	// our merged-literals section a different name.
1783	WordLiteralSection::WordLiteralSection()
1784	: SyntheticSection (segment_names::text, section_names::literals) {
1785	align = `16`;
1786	}
1787
1788	void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
1789	isec->parent = this;
1790	inputs.push_back(x: isec);
1791	}
1792
1793	void WordLiteralSection::finalizeContents() {
1794	for (WordLiteralInputSection *isec : inputs) {
1795	// We do all processing of the InputSection here, so it will be effectively
1796	// finalized.
1797	isec->isFinal = true;
1798	const uint8_t *buf = isec->data.data();
1799	switch (sectionType(flags: isec->getFlags())) {
1800	case S_4BYTE_LITERALS: {
1801	for (size_t off = `0`, e = isec->data.size(); off < e; off += `4`) {
1802	if (!isec->isLive(off))
1803	continue;
1804	uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off);
1805	literal4Map.emplace(args&: value, args: literal4Map.size());
1806	}
1807	break;
1808	}
1809	case S_8BYTE_LITERALS: {
1810	for (size_t off = `0`, e = isec->data.size(); off < e; off += `8`) {
1811	if (!isec->isLive(off))
1812	continue;
1813	uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off);
1814	literal8Map.emplace(args&: value, args: literal8Map.size());
1815	}
1816	break;
1817	}
1818	case S_16BYTE_LITERALS: {
1819	for (size_t off = `0`, e = isec->data.size(); off < e; off += `16`) {
1820	if (!isec->isLive(off))
1821	continue;
1822	UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off);
1823	literal16Map.emplace(args&: value, args: literal16Map.size());
1824	}
1825	break;
1826	}
1827	default:
1828	llvm_unreachable("invalid literal section type");
1829	}
1830	}
1831	}
1832
1833	void WordLiteralSection::writeTo(uint8_t buf) const* {
1834	// Note that we don't attempt to do any endianness conversion in addInput(),
1835	// so we don't do it here either -- just write out the original value,
1836	// byte-for-byte.
1837	for (const auto &p : literal16Map)
1838	memcpy(dest: buf + p.second * `16`, src: &p.first, n: `16`);
1839	buf += literal16Map.size() * `16`;
1840
1841	for (const auto &p : literal8Map)
1842	memcpy(dest: buf + p.second * `8`, src: &p.first, n: `8`);
1843	buf += literal8Map.size() * `8`;
1844
1845	for (const auto &p : literal4Map)
1846	memcpy(dest: buf + p.second * `4`, src: &p.first, n: `4`);
1847	}
1848
1849	ObjCImageInfoSection::ObjCImageInfoSection()
1850	: SyntheticSection (segment_names::data, section_names::objCImageInfo) {}
1851
1852	ObjCImageInfoSection::ImageInfo
1853	ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
1854	ImageInfo info;
1855	ArrayRef<uint8_t> data = file->objCImageInfo;
1856	// The image info struct has the following layout:
1857	// struct {
1858	// uint32_t version;
1859	// uint32_t flags;
1860	// };
1861	if (data.size() < `8`) {
1862	warn(msg: toString(file) + ": invalid __objc_imageinfo size");
1863	return info;
1864	}
1865
1866	auto buf = reinterpret_cast<const* uint32_t *>(data.data());
1867	if (read32le(P: buf) != `0`) {
1868	warn(msg: toString(file) + ": invalid __objc_imageinfo version");
1869	return info;
1870	}
1871
1872	uint32_t flags = read32le(P: buf + `1`);
1873	info.swiftVersion = (flags >> `8`) & `0xff`;
1874	info.hasCategoryClassProperties = flags & `0x40`;
1875	return info;
1876	}
1877
1878	static std::string swiftVersionString(uint8_t version) {
1879	switch (version) {
1880	case `1`:
1881	return "1.0";
1882	case `2`:
1883	return "1.1";
1884	case `3`:
1885	return "2.0";
1886	case `4`:
1887	return "3.0";
1888	case `5`:
1889	return "4.0";
1890	default:
1891	return ("0x" + Twine::utohexstr(Val: version)).str();
1892	}
1893	}
1894
1895	// Validate each object file's __objc_imageinfo and use them to generate the
1896	// image info for the output binary. Only two pieces of info are relevant:
1897	// 1. The Swift version (should be identical across inputs)
1898	// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1899	void ObjCImageInfoSection::finalizeContents() {
1900	assert(files.size() != `0`); // should have already been checked via isNeeded()
1901
1902	info.hasCategoryClassProperties = true;
1903	const InputFile *firstFile;
1904	for (const InputFile *file : files) {
1905	ImageInfo inputInfo = parseImageInfo(file);
1906	info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
1907
1908	// swiftVersion 0 means no Swift is present, so no version checking required
1909	if (inputInfo.swiftVersion == `0`)
1910	continue;
1911
1912	if (info.swiftVersion != `0` && info.swiftVersion != inputInfo.swiftVersion) {
1913	error(msg: "Swift version mismatch: " + toString(file: firstFile) + " has version " +
1914	swiftVersionString(version: info.swiftVersion) + " but " + toString(file) +
1915	" has version " + swiftVersionString(version: inputInfo.swiftVersion));
1916	} else {
1917	info.swiftVersion = inputInfo.swiftVersion;
1918	firstFile = file;
1919	}
1920	}
1921	}
1922
1923	void ObjCImageInfoSection::writeTo(uint8_t buf) const* {
1924	uint32_t flags = info.hasCategoryClassProperties ? `0x40` : `0x0`;
1925	flags \|= info.swiftVersion << `8`;
1926	write32le(P: buf + `4`, V: flags);
1927	}
1928
1929	InitOffsetsSection::InitOffsetsSection()
1930	: SyntheticSection (segment_names::text, section_names::initOffsets) {
1931	flags = S_INIT_FUNC_OFFSETS;
1932	align = `4`; // This section contains 32-bit integers.
1933	}
1934
1935	uint64_t InitOffsetsSection::getSize() const {
1936	size_t count = `0`;
1937	for (const ConcatInputSection *isec : sections)
1938	count += isec->relocs.size();
1939	return count * sizeof(uint32_t);
1940	}
1941
1942	void InitOffsetsSection::writeTo(uint8_t buf) const* {
1943	// FIXME: Add function specified by -init when that argument is implemented.
1944	for (ConcatInputSection *isec : sections) {
1945	for (const Reloc &rel : isec->relocs) {
1946	const Symbol referent = rel.referent.dyn_cast<Symbol >();
1947	assert(referent && "section relocation should have been rejected");
1948	uint64_t offset = referent->getVA() - in.header->addr;
1949	// FIXME: Can we handle this gracefully?
1950	if (offset > UINT32_MAX)
1951	fatal(msg: isec->getLocation(off: rel.offset) + ": offset to initializer " +
1952	referent->getName() + " (" + utohexstr(X: offset) +
1953	") does not fit in 32 bits");
1954
1955	// Entries need to be added in the order they appear in the section, but
1956	// relocations aren't guaranteed to be sorted.
1957	size_t index = rel.offset >> target->p2WordSize;
1958	write32le(P: &buf[index * sizeof(uint32_t)], V: offset);
1959	}
1960	buf += isec->relocs.size() * sizeof(uint32_t);
1961	}
1962	}
1963
1964	// The inputs are __mod_init_func sections, which contain pointers to
1965	// initializer functions, therefore all relocations should be of the UNSIGNED
1966	// type. InitOffsetsSection stores offsets, so if the initializer's address is
1967	// not known at link time, stub-indirection has to be used.
1968	void InitOffsetsSection::setUp() {
1969	for (const ConcatInputSection *isec : sections) {
1970	for (const Reloc &rel : isec->relocs) {
1971	RelocAttrs attrs = target->getRelocAttrs(type: rel.type);
1972	if (!attrs.hasAttr(b: RelocAttrBits::UNSIGNED))
1973	error(msg: isec->getLocation(off: rel.offset) +
1974	": unsupported relocation type: " + attrs.name);
1975	if (rel.addend != `0`)
1976	error(msg: isec->getLocation(off: rel.offset) +
1977	": relocation addend is not representable in __init_offsets");
1978	if (rel.referent.is<InputSection *>())
1979	error(msg: isec->getLocation(off: rel.offset) +
1980	": unexpected section relocation");
1981
1982	Symbol sym = rel.referent.dyn_cast<Symbol >();
1983	if (auto *undefined = dyn_cast<Undefined>(Val: sym))
1984	treatUndefinedSymbol(*undefined, isec, offset: rel.offset);
1985	if (needsBinding(sym))
1986	in.stubs->addEntry(sym);
1987	}
1988	}
1989	}
1990
1991	ObjCMethListSection::ObjCMethListSection()
1992	: SyntheticSection (segment_names::text, section_names::objcMethList) {
1993	flags = S_ATTR_NO_DEAD_STRIP;
1994	align = relativeOffsetSize;
1995	}
1996
1997	// Go through all input method lists and ensure that we have selrefs for all
1998	// their method names. The selrefs will be needed later by ::writeTo. We need to
1999	// create them early on here to ensure they are processed correctly by the lld
2000	// pipeline.
2001	void ObjCMethListSection::setUp() {
2002	for (const ConcatInputSection *isec : inputs) {
2003	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2004	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2005	uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
2006	// Method name is immediately after header
2007	uint32_t methodNameOff = methodListHeaderSize;
2008
2009	// Loop through all methods, and ensure a selref for each of them exists.
2010	while (methodNameOff < isec->data.size()) {
2011	const Reloc *reloc = isec->getRelocAt(off: methodNameOff);
2012	assert(reloc && "Relocation expected at method list name slot");
2013	auto def = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol >());
2014	assert(def && "Expected valid Defined at method list name slot");
2015	auto *cisec = cast<CStringInputSection>(Val: def->isec());
2016	assert(cisec && "Expected method name to be in a CStringInputSection");
2017	auto methname = cisec->getStringRefAtOffset(off: def->value);
2018	if (!ObjCSelRefsHelper::getSelRef(methname))
2019	ObjCSelRefsHelper::makeSelRef(methname);
2020
2021	// Jump to method name offset in next struct
2022	methodNameOff += originalStructSize;
2023	}
2024	}
2025	}
2026
2027	// Calculate section size and final offsets for where InputSection's need to be
2028	// written.
2029	void ObjCMethListSection::finalize() {
2030	// sectionSize will be the total size of the __objc_methlist section
2031	sectionSize = `0`;
2032	for (ConcatInputSection *isec : inputs) {
2033	// We can also use sectionSize as write offset for isec
2034	assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
2035	"expected __objc_methlist to be aligned by default with the "
2036	"required section alignment");
2037	isec->outSecOff = sectionSize;
2038
2039	isec->isFinal = true;
2040	uint32_t relativeListSize =
2041	computeRelativeMethodListSize(absoluteMethodListSize: isec->data.size());
2042	sectionSize += relativeListSize;
2043
2044	// If encoding the method list in relative offset format shrinks the size,
2045	// then we also need to adjust symbol sizes to match the new size. Note that
2046	// on 32bit platforms the size of the method list will remain the same when
2047	// encoded in relative offset format.
2048	if (relativeListSize != isec->data.size()) {
2049	for (Symbol *sym : isec->symbols) {
2050	assert(isa<Defined>(sym) &&
2051	"Unexpected undefined symbol in ObjC method list");
2052	auto *def = cast<Defined>(Val: sym);
2053	// There can be 0-size symbols, check if this is the case and ignore
2054	// them.
2055	if (def->size) {
2056	assert(
2057	def->size == isec->data.size() &&
2058	"Invalid ObjC method list symbol size: expected symbol size to "
2059	"match isec size");
2060	def->size = relativeListSize;
2061	}
2062	}
2063	}
2064	}
2065	}
2066
2067	void ObjCMethListSection::writeTo(uint8_t bufStart) const* {
2068	uint8_t *buf = bufStart;
2069	for (const ConcatInputSection *isec : inputs) {
2070	assert(buf - bufStart == long(isec->outSecOff) &&
2071	"Writing at unexpected offset");
2072	uint32_t writtenSize = writeRelativeMethodList(isec, buf);
2073	buf += writtenSize;
2074	}
2075	assert(buf - bufStart == sectionSize &&
2076	"Written size does not match expected section size");
2077	}
2078
2079	// Check if an InputSection is a method list. To do this we scan the
2080	// InputSection for any symbols who's names match the patterns we expect clang
2081	// to generate for method lists.
2082	bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
2083	const char *symPrefixes[] = {objc::symbol_names::classMethods,
2084	objc::symbol_names::instanceMethods,
2085	objc::symbol_names::categoryInstanceMethods,
2086	objc::symbol_names::categoryClassMethods};
2087	if (!isec)
2088	return false;
2089	for (const Symbol *sym : isec->symbols) {
2090	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2091	if (!def)
2092	continue;
2093	for (const char *prefix : symPrefixes) {
2094	if (def->getName().starts_with(Prefix: prefix)) {
2095	assert(def->size == isec->data.size() &&
2096	"Invalid ObjC method list symbol size: expected symbol size to "
2097	"match isec size");
2098	assert(def->value == `0` &&
2099	"Offset of ObjC method list symbol must be 0");
2100	return true;
2101	}
2102	}
2103	}
2104
2105	return false;
2106	}
2107
2108	// Encode a single relative offset value. The input is the data/symbol at
2109	// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2110	// 'createSelRef' indicates that we should not directly use the specified
2111	// symbol, but instead get the selRef for the symbol and use that instead.
2112	void ObjCMethListSection::writeRelativeOffsetForIsec(
2113	const ConcatInputSection isec, uint8_t buf, uint32_t &inSecOff,
2114	uint32_t &outSecOff, bool useSelRef) const {
2115	const Reloc *reloc = isec->getRelocAt(off: inSecOff);
2116	assert(reloc && "Relocation expected at __objc_methlist Offset");
2117	auto def = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol >());
2118	assert(def && "Expected all syms in __objc_methlist to be defined");
2119	uint32_t symVA = def->getVA();
2120
2121	if (useSelRef) {
2122	auto *cisec = cast<CStringInputSection>(Val: def->isec());
2123	auto methname = cisec->getStringRefAtOffset(off: def->value);
2124	ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
2125	assert(selRef && "Expected all selector names to already be already be "
2126	"present in __objc_selrefs");
2127	symVA = selRef->getVA();
2128	assert(selRef->data.size() == sizeof(target->wordSize) &&
2129	"Expected one selref per ConcatInputSection");
2130	}
2131
2132	uint32_t currentVA = isec->getVA() + outSecOff;
2133	uint32_t delta = symVA - currentVA;
2134	write32le(P: buf + outSecOff, V: delta);
2135
2136	// Move one pointer forward in the absolute method list
2137	inSecOff += target->wordSize;
2138	// Move one relative offset forward in the relative method list (32 bits)
2139	outSecOff += relativeOffsetSize;
2140	}
2141
2142	// Write a relative method list to buf, return the size of the written
2143	// information
2144	uint32_t
2145	ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
2146	uint8_t buf) const* {
2147	// Copy over the header, and add the "this is a relative method list" magic
2148	// value flag
2149	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2150	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2151	// Set the struct size for the relative method list
2152	uint32_t relativeStructSizeAndFlags =
2153	(relativeOffsetSize * pointersPerStruct) & structSizeMask;
2154	// Carry over the old flags from the input struct
2155	relativeStructSizeAndFlags \|= structSizeAndFlags & structFlagsMask;
2156	// Set the relative method list flag
2157	relativeStructSizeAndFlags \|= relMethodHeaderFlag;
2158
2159	writeMethodListHeader(buf, structSizeAndFlags: relativeStructSizeAndFlags, structCount);
2160
2161	assert(methodListHeaderSize +
2162	(structCount * pointersPerStruct * target->wordSize) ==
2163	isec->data.size() &&
2164	"Invalid computed ObjC method list size");
2165
2166	uint32_t inSecOff = methodListHeaderSize;
2167	uint32_t outSecOff = methodListHeaderSize;
2168
2169	// Go through the method list and encode input absolute pointers as relative
2170	// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2171	// outSecOff
2172	for (uint32_t i = `0`; i < structCount; i++) {
2173	// Write the name of the method
2174	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: true);
2175	// Write the type of the method
2176	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2177	// Write reference to the selector of the method
2178	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2179	}
2180
2181	// Expecting to have read all the data in the isec
2182	assert(inSecOff == isec->data.size() &&
2183	"Invalid actual ObjC method list size");
2184	assert(
2185	outSecOff == computeRelativeMethodListSize(inSecOff) &&
2186	"Mismatch between input & output size when writing relative method list");
2187	return outSecOff;
2188	}
2189
2190	// Given the size of an ObjC method list InputSection, return the size of the
2191	// method list when encoded in relative offsets format. We can do this without
2192	// decoding the actual data, as it can be directly inferred from the size of the
2193	// isec.
2194	uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2195	uint32_t absoluteMethodListSize) const {
2196	uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
2197	uint32_t pointerCount = oldPointersSize / target->wordSize;
2198	assert(((pointerCount % pointersPerStruct) == `0`) &&
2199	"__objc_methlist expects method lists to have multiple-of-3 pointers");
2200
2201	uint32_t newPointersSize = pointerCount * relativeOffsetSize;
2202	uint32_t newTotalSize = methodListHeaderSize + newPointersSize;
2203
2204	assert((newTotalSize <= absoluteMethodListSize) &&
2205	"Expected relative method list size to be smaller or equal than "
2206	"original size");
2207	return newTotalSize;
2208	}
2209
2210	// Read a method list header from buf
2211	void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
2212	uint32_t &structSizeAndFlags,
2213	uint32_t &structCount) const {
2214	structSizeAndFlags = read32le(P: buf);
2215	structCount = read32le(P: buf + sizeof(uint32_t));
2216	}
2217
2218	// Write a method list header to buf
2219	void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
2220	uint32_t structSizeAndFlags,
2221	uint32_t structCount) const {
2222	write32le(P: buf, V: structSizeAndFlags);
2223	write32le(P: buf + sizeof(structSizeAndFlags), V: structCount);
2224	}
2225
2226	void macho::createSyntheticSymbols() {
2227	auto addHeaderSymbol = [](const char *name) {
2228	symtab ->addSynthetic(name, in.header->isec, /value=/`0`,
2229	/isPrivateExtern=/true, /includeInSymtab=/false,
2230	/referencedDynamically=/false);
2231	};
2232
2233	switch (config ->outputType) {
2234	// FIXME: Assign the right address value for these symbols
2235	// (rather than 0). But we need to do that after assignAddresses().
2236	case MH_EXECUTE:
2237	// If linking PIE, __mh_execute_header is a defined symbol in
2238	// __TEXT, __text)
2239	// Otherwise, it's an absolute symbol.
2240	if (config ->isPic)
2241	symtab ->addSynthetic(name: "__mh_execute_header", in.header->isec, /value=/`0`,
2242	/isPrivateExtern=/false, /includeInSymtab=/true,
2243	/referencedDynamically=/true);
2244	else
2245	symtab ->addSynthetic(name: "__mh_execute_header", /isec=/nullptr, /value=/`0`,
2246	/isPrivateExtern=/false, /includeInSymtab=/true,
2247	/referencedDynamically=/true);
2248	break;
2249
2250	// The following symbols are N_SECT symbols, even though the header is not
2251	// part of any section and that they are private to the bundle/dylib/object
2252	// they are part of.
2253	case MH_BUNDLE:
2254	addHeaderSymbol ("__mh_bundle_header");
2255	break;
2256	case MH_DYLIB:
2257	addHeaderSymbol ("__mh_dylib_header");
2258	break;
2259	case MH_DYLINKER:
2260	addHeaderSymbol ("__mh_dylinker_header");
2261	break;
2262	case MH_OBJECT:
2263	addHeaderSymbol ("__mh_object_header");
2264	break;
2265	default:
2266	llvm_unreachable("unexpected outputType");
2267	break;
2268	}
2269
2270	// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2271	// which does e.g. cleanup of static global variables. The ABI document
2272	// says that the pointer can point to any address in one of the dylib's
2273	// segments, but in practice ld64 seems to set it to point to the header,
2274	// so that's what's implemented here.
2275	addHeaderSymbol ("___dso_handle");
2276	}
2277
2278	ChainedFixupsSection::ChainedFixupsSection()
2279	: LinkEditSection (segment_names::linkEdit, section_names::chainFixups) {}
2280
2281	bool ChainedFixupsSection::isNeeded() const {
2282	assert(config->emitChainedFixups);
2283	// dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2284	// dyld_chained_fixups_header, so we create this section even if there aren't
2285	// any fixups.
2286	return true;
2287	}
2288
2289	void ChainedFixupsSection::addBinding(const Symbol *sym,
2290	const InputSection *isec, uint64_t offset,
2291	int64_t addend) {
2292	locations.emplace_back(args&: isec, args&: offset);
2293	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2294	auto [it, inserted] = bindings.insert(
2295	KV: {{sym, outlineAddend}, static_cast<uint32_t>(bindings.size())});
2296
2297	if (inserted) {
2298	symtabSize += sym->getName().size() + `1`;
2299	hasWeakBind = hasWeakBind \|\| needsWeakBind(sym: *sym);
2300	if (!isInt<`23`>(x: outlineAddend))
2301	needsLargeAddend = true;
2302	else if (outlineAddend != `0`)
2303	needsAddend = true;
2304	}
2305	}
2306
2307	std::pair<uint32_t, uint8_t>
2308	ChainedFixupsSection::getBinding(const Symbol sym, int64_t addend) const* {
2309	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2310	auto it = bindings.find(Key: {sym, outlineAddend});
2311	assert(it != bindings.end() && "binding not found in the imports table");
2312	if (outlineAddend == `0`)
2313	return {it->second, addend};
2314	return {it->second, `0`};
2315	}
2316
2317	static size_t writeImport(uint8_t buf, int* format, int16_t libOrdinal,
2318	bool weakRef, uint32_t nameOffset, int64_t addend) {
2319	switch (format) {
2320	case DYLD_CHAINED_IMPORT: {
2321	auto import = reinterpret_cast<dyld_chained_import >(buf);
2322	import->lib_ordinal = libOrdinal;
2323	import->weak_import = weakRef;
2324	import->name_offset = nameOffset;
2325	return sizeof(dyld_chained_import);
2326	}
2327	case DYLD_CHAINED_IMPORT_ADDEND: {
2328	auto import = reinterpret_cast<dyld_chained_import_addend >(buf);
2329	import->lib_ordinal = libOrdinal;
2330	import->weak_import = weakRef;
2331	import->name_offset = nameOffset;
2332	import->addend = addend;
2333	return sizeof(dyld_chained_import_addend);
2334	}
2335	case DYLD_CHAINED_IMPORT_ADDEND64: {
2336	auto import = reinterpret_cast<dyld_chained_import_addend64 >(buf);
2337	import->lib_ordinal = libOrdinal;
2338	import->weak_import = weakRef;
2339	import->name_offset = nameOffset;
2340	import->addend = addend;
2341	return sizeof(dyld_chained_import_addend64);
2342	}
2343	default:
2344	llvm_unreachable("Unknown import format");
2345	}
2346	}
2347
2348	size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2349	assert(pageStarts.size() > `0` && "SegmentInfo for segment with no fixups?");
2350	return alignTo<`8`>(Value: sizeof(dyld_chained_starts_in_segment) +
2351	pageStarts.back().first * sizeof(uint16_t));
2352	}
2353
2354	size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t buf) const* {
2355	auto segInfo = reinterpret_cast<dyld_chained_starts_in_segment >(buf);
2356	segInfo->size = getSize();
2357	segInfo->page_size = target->getPageSize();
2358	// FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2359	segInfo->pointer_format = DYLD_CHAINED_PTR_64;
2360	segInfo->segment_offset = oseg->addr - in.header->addr;
2361	segInfo->max_valid_pointer = `0`; // not used on 64-bit
2362	segInfo->page_count = pageStarts.back().first + `1`;
2363
2364	uint16_t *starts = segInfo->page_start;
2365	for (size_t i = `0`; i < segInfo->page_count; ++i)
2366	starts[i] = DYLD_CHAINED_PTR_START_NONE;
2367
2368	for (auto [pageIdx, startAddr] : pageStarts)
2369	starts[pageIdx] = startAddr;
2370	return segInfo->size;
2371	}
2372
2373	static size_t importEntrySize(int format) {
2374	switch (format) {
2375	case DYLD_CHAINED_IMPORT:
2376	return sizeof(dyld_chained_import);
2377	case DYLD_CHAINED_IMPORT_ADDEND:
2378	return sizeof(dyld_chained_import_addend);
2379	case DYLD_CHAINED_IMPORT_ADDEND64:
2380	return sizeof(dyld_chained_import_addend64);
2381	default:
2382	llvm_unreachable("Unknown import format");
2383	}
2384	}
2385
2386	// This is step 3 of the algorithm described in the class comment of
2387	// ChainedFixupsSection.
2388	//
2389	// LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2390	// A dyld_chained_fixups_header*
2391	// A dyld_chained_starts_in_image*
2392	// One dyld_chained_starts_in_segment per segment*
2393	// List of all imports (dyld_chained_import, dyld_chained_import_addend, or*
2394	// dyld_chained_import_addend64)
2395	// Names of imported symbols*
2396	void ChainedFixupsSection::writeTo(uint8_t buf) const* {
2397	auto header = reinterpret_cast<dyld_chained_fixups_header >(buf);
2398	header->fixups_version = `0`;
2399	header->imports_count = bindings.size();
2400	header->imports_format = importFormat;
2401	header->symbols_format = `0`;
2402
2403	buf += alignTo<`8`>(Value: sizeof(*header));
2404
2405	auto curOffset = [&buf, &header]() -> uint32_t {
2406	return buf - reinterpret_cast<uint8_t *>(header);
2407	};
2408
2409	header->starts_offset = curOffset ();
2410
2411	auto imageInfo = reinterpret_cast<dyld_chained_starts_in_image >(buf);
2412	imageInfo->seg_count = outputSegments.size();
2413	uint32_t *segStarts = imageInfo->seg_info_offset;
2414
2415	// dyld_chained_starts_in_image ends in a flexible array member containing an
2416	// uint32_t for each segment. Leave room for it, and fill it via segStarts.
2417	buf += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2418	outputSegments.size() * sizeof(uint32_t));
2419
2420	// Initialize all offsets to 0, which indicates that the segment does not have
2421	// fixups. Those that do have them will be filled in below.
2422	for (size_t i = `0`; i < outputSegments.size(); ++i)
2423	segStarts[i] = `0`;
2424
2425	for (const SegmentInfo &seg : fixupSegments) {
2426	segStarts[seg.oseg->index] = curOffset () - header->starts_offset;
2427	buf += seg.writeTo(buf);
2428	}
2429
2430	// Write imports table.
2431	header->imports_offset = curOffset ();
2432	uint64_t nameOffset = `0`;
2433	for (auto [import, idx] : bindings) {
2434	const Symbol &sym = *import.first;
2435	buf += writeImport(buf, format: importFormat, libOrdinal: ordinalForSymbol(sym),
2436	weakRef: sym.isWeakRef(), nameOffset, addend: import.second);
2437	nameOffset += sym.getName().size() + `1`;
2438	}
2439
2440	// Write imported symbol names.
2441	header->symbols_offset = curOffset ();
2442	for (auto [import, idx] : bindings) {
2443	StringRef name = import.first->getName();
2444	memcpy(dest: buf, src: name.data(), n: name.size());
2445	buf += name.size() + `1`; // account for null terminator
2446	}
2447
2448	assert(curOffset() == getRawSize());
2449	}
2450
2451	// This is step 2 of the algorithm described in the class comment of
2452	// ChainedFixupsSection.
2453	void ChainedFixupsSection::finalizeContents() {
2454	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
2455	assert(config->emitChainedFixups);
2456
2457	if (!isUInt<`32`>(x: symtabSize))
2458	error(msg: "cannot encode chained fixups: imported symbols table size " +
2459	Twine (symtabSize) + " exceeds 4 GiB");
2460
2461	bool needsLargeOrdinal = any_of(Range&: bindings, P: [](const auto &p) {
2462	// 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2463	return ordinalForSymbol(*p.first.first) > `0xF0`;
2464	});
2465
2466	if (needsLargeAddend \|\| !isUInt<`23`>(x: symtabSize) \|\| needsLargeOrdinal)
2467	importFormat = DYLD_CHAINED_IMPORT_ADDEND64;
2468	else if (needsAddend)
2469	importFormat = DYLD_CHAINED_IMPORT_ADDEND;
2470	else
2471	importFormat = DYLD_CHAINED_IMPORT;
2472
2473	for (Location &loc : locations)
2474	loc.offset =
2475	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
2476
2477	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
2478	const OutputSegment *segA = a.isec->parent->parent;
2479	const OutputSegment *segB = b.isec->parent->parent;
2480	if (segA == segB)
2481	return a.offset < b.offset;
2482	return segA->addr < segB->addr;
2483	});
2484
2485	auto sameSegment = [](const Location &a, const Location &b) {
2486	return a.isec->parent->parent == b.isec->parent->parent;
2487	};
2488
2489	const uint64_t pageSize = target->getPageSize();
2490	for (size_t i = `0`, count = locations.size(); i < count;) {
2491	const Location &firstLoc = locations [i];
2492	fixupSegments.emplace_back(Args&: firstLoc.isec->parent->parent);
2493	while (i < count && sameSegment (locations [i], firstLoc)) {
2494	uint32_t pageIdx = locations [i].offset / pageSize;
2495	fixupSegments.back().pageStarts.emplace_back(
2496	Args&: pageIdx, Args: locations [i].offset % pageSize);
2497	++i;
2498	while (i < count && sameSegment (locations [i], firstLoc) &&
2499	locations [i].offset / pageSize == pageIdx)
2500	++i;
2501	}
2502	}
2503
2504	// Compute expected encoded size.
2505	size = alignTo<`8`>(Value: sizeof(dyld_chained_fixups_header));
2506	size += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2507	outputSegments.size() * sizeof(uint32_t));
2508	for (const SegmentInfo &seg : fixupSegments)
2509	size += seg.getSize();
2510	size += importEntrySize(format: importFormat) * bindings.size();
2511	size += symtabSize;
2512	}
2513
2514	template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
2515	template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);
2516

Browse the source code of llvm_projects/lld/MachO/SyntheticSections.cpp