1//===- SyntheticSections.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10#define LLD_MACHO_SYNTHETIC_SECTIONS_H
11
12#include "Config.h"
13#include "ExportTrie.h"
14#include "InputSection.h"
15#include "OutputSection.h"
16#include "OutputSegment.h"
17#include "Target.h"
18#include "Writer.h"
19
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/BinaryFormat/MachO.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26
27namespace llvm {
28class DWARFUnit;
29} // namespace llvm
30
31namespace lld::macho {
32
33class Defined;
34class DylibSymbol;
35class LoadCommand;
36class ObjFile;
37class UnwindInfoSection;
38
39class SyntheticSection : public OutputSection {
40public:
41 SyntheticSection(const char *segname, const char *name);
42 virtual ~SyntheticSection() = default;
43
44 static bool classof(const OutputSection *sec) {
45 return sec->kind() == SyntheticKind;
46 }
47
48 StringRef segname;
49 // This fake InputSection makes it easier for us to write code that applies
50 // generically to both user inputs and synthetics.
51 InputSection *isec;
52};
53
54// All sections in __LINKEDIT should inherit from this.
55class LinkEditSection : public SyntheticSection {
56public:
57 LinkEditSection(const char *segname, const char *name)
58 : SyntheticSection(segname, name) {
59 align = target->wordSize;
60 }
61
62 // Implementations of this method can assume that the regular (non-__LINKEDIT)
63 // sections already have their addresses assigned.
64 virtual void finalizeContents() {}
65
66 // Sections in __LINKEDIT are special: their offsets are recorded in the
67 // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
68 // headers.
69 bool isHidden() const final { return true; }
70
71 virtual uint64_t getRawSize() const = 0;
72
73 // codesign (or more specifically libstuff) checks that each section in
74 // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
75 // therefore align every section's start and end points to WordSize.
76 //
77 // NOTE: This assumes that the extra bytes required for alignment can be
78 // zero-valued bytes.
79 uint64_t getSize() const final { return llvm::alignTo(Value: getRawSize(), Align: align); }
80};
81
82// The header of the Mach-O file, which must have a file offset of zero.
83class MachHeaderSection final : public SyntheticSection {
84public:
85 MachHeaderSection();
86 bool isHidden() const override { return true; }
87 uint64_t getSize() const override;
88 void writeTo(uint8_t *buf) const override;
89
90 void addLoadCommand(LoadCommand *);
91
92protected:
93 std::vector<LoadCommand *> loadCommands;
94 uint32_t sizeOfCmds = 0;
95};
96
97// A hidden section that exists solely for the purpose of creating the
98// __PAGEZERO segment, which is used to catch null pointer dereferences.
99class PageZeroSection final : public SyntheticSection {
100public:
101 PageZeroSection();
102 bool isHidden() const override { return true; }
103 bool isNeeded() const override { return target->pageZeroSize != 0; }
104 uint64_t getSize() const override { return target->pageZeroSize; }
105 uint64_t getFileSize() const override { return 0; }
106 void writeTo(uint8_t *buf) const override {}
107};
108
109// This is the base class for the GOT and TLVPointer sections, which are nearly
110// functionally identical -- they will both be populated by dyld with addresses
111// to non-lazily-loaded dylib symbols. The main difference is that the
112// TLVPointerSection stores references to thread-local variables.
113class NonLazyPointerSectionBase : public SyntheticSection {
114public:
115 NonLazyPointerSectionBase(const char *segname, const char *name);
116 const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
117 bool isNeeded() const override { return !entries.empty(); }
118 uint64_t getSize() const override {
119 return entries.size() * target->wordSize;
120 }
121 void writeTo(uint8_t *buf) const override;
122 void addEntry(Symbol *sym);
123 uint64_t getVA(uint32_t gotIndex) const {
124 return addr + gotIndex * target->wordSize;
125 }
126
127private:
128 llvm::SetVector<const Symbol *> entries;
129};
130
131class GotSection final : public NonLazyPointerSectionBase {
132public:
133 GotSection();
134};
135
136class TlvPointerSection final : public NonLazyPointerSectionBase {
137public:
138 TlvPointerSection();
139};
140
141struct Location {
142 const InputSection *isec;
143 uint64_t offset;
144
145 Location(const InputSection *isec, uint64_t offset)
146 : isec(isec), offset(offset) {}
147 uint64_t getVA() const { return isec->getVA(off: offset); }
148};
149
150// Stores rebase opcodes, which tell dyld where absolute addresses have been
151// encoded in the binary. If the binary is not loaded at its preferred address,
152// dyld has to rebase these addresses by adding an offset to them.
153class RebaseSection final : public LinkEditSection {
154public:
155 RebaseSection();
156 void finalizeContents() override;
157 uint64_t getRawSize() const override { return contents.size(); }
158 bool isNeeded() const override { return !locations.empty(); }
159 void writeTo(uint8_t *buf) const override;
160
161 void addEntry(const InputSection *isec, uint64_t offset) {
162 if (config->isPic)
163 locations.emplace_back(args&: isec, args&: offset);
164 }
165
166private:
167 std::vector<Location> locations;
168 SmallVector<char, 128> contents;
169};
170
171struct BindingEntry {
172 int64_t addend;
173 Location target;
174 BindingEntry(int64_t addend, Location target)
175 : addend(addend), target(target) {}
176};
177
178template <class Sym>
179using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
180
181// Stores bind opcodes for telling dyld which symbols to load non-lazily.
182class BindingSection final : public LinkEditSection {
183public:
184 BindingSection();
185 void finalizeContents() override;
186 uint64_t getRawSize() const override { return contents.size(); }
187 bool isNeeded() const override { return !bindingsMap.empty(); }
188 void writeTo(uint8_t *buf) const override;
189
190 void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset,
191 int64_t addend = 0) {
192 bindingsMap[dysym].emplace_back(args&: addend, args: Location(isec, offset));
193 }
194
195private:
196 BindingsMap<const Symbol *> bindingsMap;
197 SmallVector<char, 128> contents;
198};
199
200// Stores bind opcodes for telling dyld which weak symbols need coalescing.
201// There are two types of entries in this section:
202//
203// 1) Non-weak definitions: This is a symbol definition that weak symbols in
204// other dylibs should coalesce to.
205//
206// 2) Weak bindings: These tell dyld that a given symbol reference should
207// coalesce to a non-weak definition if one is found. Note that unlike the
208// entries in the BindingSection, the bindings here only refer to these
209// symbols by name, but do not specify which dylib to load them from.
210class WeakBindingSection final : public LinkEditSection {
211public:
212 WeakBindingSection();
213 void finalizeContents() override;
214 uint64_t getRawSize() const override { return contents.size(); }
215 bool isNeeded() const override {
216 return !bindingsMap.empty() || !definitions.empty();
217 }
218
219 void writeTo(uint8_t *buf) const override;
220
221 void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
222 int64_t addend = 0) {
223 bindingsMap[symbol].emplace_back(args&: addend, args: Location(isec, offset));
224 }
225
226 bool hasEntry() const { return !bindingsMap.empty(); }
227
228 void addNonWeakDefinition(const Defined *defined) {
229 definitions.emplace_back(args&: defined);
230 }
231
232 bool hasNonWeakDefinition() const { return !definitions.empty(); }
233
234private:
235 BindingsMap<const Symbol *> bindingsMap;
236 std::vector<const Defined *> definitions;
237 SmallVector<char, 128> contents;
238};
239
240// The following sections implement lazy symbol binding -- very similar to the
241// PLT mechanism in ELF.
242//
243// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
244// and StubHelperSection. Calls to functions in dylibs will end up calling into
245// StubsSection, which contains indirect jumps to addresses stored in the
246// LazyPointerSection (the counterpart to ELF's .plt.got).
247//
248// We will first describe how non-weak symbols are handled.
249//
250// At program start, the LazyPointerSection contains addresses that point into
251// one of the entry points in the middle of the StubHelperSection. The code in
252// StubHelperSection will push on the stack an offset into the
253// LazyBindingSection. The push is followed by a jump to the beginning of the
254// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
255// dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
256// the GOT.
257//
258// The stub binder will look up the bind opcodes in the LazyBindingSection at
259// the given offset. The bind opcodes will tell the binder to update the
260// address in the LazyPointerSection to point to the symbol, so that subsequent
261// calls don't have to redo the symbol resolution. The binder will then jump to
262// the resolved symbol.
263//
264// With weak symbols, the situation is slightly different. Since there is no
265// "weak lazy" lookup, function calls to weak symbols are always non-lazily
266// bound. We emit both regular non-lazy bindings as well as weak bindings, in
267// order that the weak bindings may overwrite the non-lazy bindings if an
268// appropriate symbol is found at runtime. However, the bound addresses will
269// still be written (non-lazily) into the LazyPointerSection.
270//
271// Symbols are always bound eagerly when chained fixups are used. In that case,
272// StubsSection contains indirect jumps to addresses stored in the GotSection.
273// The GOT directly contains the fixup entries, which will be replaced by the
274// address of the target symbols on load. LazyPointerSection and
275// StubHelperSection are not used.
276
277class StubsSection final : public SyntheticSection {
278public:
279 StubsSection();
280 uint64_t getSize() const override;
281 bool isNeeded() const override { return !entries.empty(); }
282 void finalize() override;
283 void writeTo(uint8_t *buf) const override;
284 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
285 // Creates a stub for the symbol and the corresponding entry in the
286 // LazyPointerSection.
287 void addEntry(Symbol *);
288 uint64_t getVA(uint32_t stubsIndex) const {
289 assert(isFinal || target->usesThunks());
290 // ConcatOutputSection::finalize() can seek the address of a
291 // stub before its address is assigned. Before __stubs is
292 // finalized, return a contrived out-of-range address.
293 return isFinal ? addr + stubsIndex * target->stubSize
294 : TargetInfo::outOfRangeVA;
295 }
296
297 bool isFinal = false; // is address assigned?
298
299private:
300 llvm::SetVector<Symbol *> entries;
301};
302
303class StubHelperSection final : public SyntheticSection {
304public:
305 StubHelperSection();
306 uint64_t getSize() const override;
307 bool isNeeded() const override;
308 void writeTo(uint8_t *buf) const override;
309
310 void setUp();
311
312 DylibSymbol *stubBinder = nullptr;
313 Defined *dyldPrivate = nullptr;
314};
315
316class ObjCSelRefsHelper {
317public:
318 static void initialize();
319 static void cleanup();
320
321 static ConcatInputSection *getSelRef(StringRef methname);
322 static ConcatInputSection *makeSelRef(StringRef methname);
323
324private:
325 static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
326 methnameToSelref;
327};
328
329// Objective-C stubs are hoisted objc_msgSend calls per selector called in the
330// program. Apple Clang produces undefined symbols to each stub, such as
331// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
332// load the particular selector 'foo' from __objc_selrefs, setting it to the
333// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
334// actual stub contents are mirrored from ld64.
335class ObjCStubsSection final : public SyntheticSection {
336public:
337 ObjCStubsSection();
338 void addEntry(Symbol *sym);
339 uint64_t getSize() const override;
340 bool isNeeded() const override { return !symbols.empty(); }
341 void finalize() override { isec->isFinal = true; }
342 void writeTo(uint8_t *buf) const override;
343 void setUp();
344
345 static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
346 static bool isObjCStubSymbol(Symbol *sym);
347 static StringRef getMethname(Symbol *sym);
348
349private:
350 std::vector<Defined *> symbols;
351 Symbol *objcMsgSend = nullptr;
352};
353
354// Note that this section may also be targeted by non-lazy bindings. In
355// particular, this happens when branch relocations target weak symbols.
356class LazyPointerSection final : public SyntheticSection {
357public:
358 LazyPointerSection();
359 uint64_t getSize() const override;
360 bool isNeeded() const override;
361 void writeTo(uint8_t *buf) const override;
362 uint64_t getVA(uint32_t index) const {
363 return addr + (index << target->p2WordSize);
364 }
365};
366
367class LazyBindingSection final : public LinkEditSection {
368public:
369 LazyBindingSection();
370 void finalizeContents() override;
371 uint64_t getRawSize() const override { return contents.size(); }
372 bool isNeeded() const override { return !entries.empty(); }
373 void writeTo(uint8_t *buf) const override;
374 // Note that every entry here will by referenced by a corresponding entry in
375 // the StubHelperSection.
376 void addEntry(Symbol *dysym);
377 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
378
379private:
380 uint32_t encode(const Symbol &);
381
382 llvm::SetVector<Symbol *> entries;
383 SmallVector<char, 128> contents;
384 llvm::raw_svector_ostream os{contents};
385};
386
387// Stores a trie that describes the set of exported symbols.
388class ExportSection final : public LinkEditSection {
389public:
390 ExportSection();
391 void finalizeContents() override;
392 uint64_t getRawSize() const override { return size; }
393 bool isNeeded() const override { return size; }
394 void writeTo(uint8_t *buf) const override;
395
396 bool hasWeakSymbol = false;
397
398private:
399 TrieBuilder trieBuilder;
400 size_t size = 0;
401};
402
403// Stores 'data in code' entries that describe the locations of data regions
404// inside code sections. This is used by llvm-objdump to distinguish jump tables
405// and stop them from being disassembled as instructions.
406class DataInCodeSection final : public LinkEditSection {
407public:
408 DataInCodeSection();
409 void finalizeContents() override;
410 uint64_t getRawSize() const override {
411 return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
412 }
413 void writeTo(uint8_t *buf) const override;
414
415private:
416 std::vector<llvm::MachO::data_in_code_entry> entries;
417};
418
419// Stores ULEB128 delta encoded addresses of functions.
420class FunctionStartsSection final : public LinkEditSection {
421public:
422 FunctionStartsSection();
423 void finalizeContents() override;
424 uint64_t getRawSize() const override { return contents.size(); }
425 void writeTo(uint8_t *buf) const override;
426
427private:
428 SmallVector<char, 128> contents;
429};
430
431// Stores the strings referenced by the symbol table.
432class StringTableSection final : public LinkEditSection {
433public:
434 StringTableSection();
435 // Returns the start offset of the added string.
436 uint32_t addString(StringRef);
437 uint64_t getRawSize() const override { return size; }
438 void writeTo(uint8_t *buf) const override;
439
440 static constexpr size_t emptyStringIndex = 1;
441
442private:
443 // ld64 emits string tables which start with a space and a zero byte. We
444 // match its behavior here since some tools depend on it.
445 // Consequently, the empty string will be at index 1, not zero.
446 std::vector<StringRef> strings{" "};
447 llvm::DenseMap<llvm::CachedHashStringRef, uint32_t> stringMap;
448 size_t size = 2;
449};
450
451struct SymtabEntry {
452 Symbol *sym;
453 size_t strx;
454};
455
456struct StabsEntry {
457 uint8_t type = 0;
458 uint32_t strx = StringTableSection::emptyStringIndex;
459 uint8_t sect = 0;
460 uint16_t desc = 0;
461 uint64_t value = 0;
462
463 StabsEntry() = default;
464 explicit StabsEntry(uint8_t type) : type(type) {}
465};
466
467// Symbols of the same type must be laid out contiguously: we choose to emit
468// all local symbols first, then external symbols, and finally undefined
469// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
470// range (start index and total number) of those symbols in the symbol table.
471class SymtabSection : public LinkEditSection {
472public:
473 void finalizeContents() override;
474 uint32_t getNumSymbols() const;
475 uint32_t getNumLocalSymbols() const {
476 return stabs.size() + localSymbols.size();
477 }
478 uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
479 uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
480
481private:
482 void emitBeginSourceStab(StringRef);
483 void emitEndSourceStab();
484 void emitObjectFileStab(ObjFile *);
485 void emitEndFunStab(Defined *);
486 Defined *getFuncBodySym(Defined *);
487 void emitStabs();
488
489protected:
490 SymtabSection(StringTableSection &);
491
492 StringTableSection &stringTableSection;
493 // STABS symbols are always local symbols, but we represent them with special
494 // entries because they may use fields like n_sect and n_desc differently.
495 std::vector<StabsEntry> stabs;
496 std::vector<SymtabEntry> localSymbols;
497 std::vector<SymtabEntry> externalSymbols;
498 std::vector<SymtabEntry> undefinedSymbols;
499};
500
501template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
502
503// The indirect symbol table is a list of 32-bit integers that serve as indices
504// into the (actual) symbol table. The indirect symbol table is a
505// concatenation of several sub-arrays of indices, each sub-array belonging to
506// a separate section. The starting offset of each sub-array is stored in the
507// reserved1 header field of the respective section.
508//
509// These sub-arrays provide symbol information for sections that store
510// contiguous sequences of symbol references. These references can be pointers
511// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
512// function stubs).
513class IndirectSymtabSection final : public LinkEditSection {
514public:
515 IndirectSymtabSection();
516 void finalizeContents() override;
517 uint32_t getNumSymbols() const;
518 uint64_t getRawSize() const override {
519 return getNumSymbols() * sizeof(uint32_t);
520 }
521 bool isNeeded() const override;
522 void writeTo(uint8_t *buf) const override;
523};
524
525// The code signature comes at the very end of the linked output file.
526class CodeSignatureSection final : public LinkEditSection {
527public:
528 // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
529 // and any changes here, should be repeated there.
530 static constexpr uint8_t blockSizeShift = 12;
531 static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
532 static constexpr size_t hashSize = 256 / 8;
533 static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
534 Value: sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
535 static constexpr uint32_t fixedHeadersSize =
536 blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
537
538 uint32_t fileNamePad = 0;
539 uint32_t allHeadersSize = 0;
540 StringRef fileName;
541
542 CodeSignatureSection();
543 uint64_t getRawSize() const override;
544 bool isNeeded() const override { return true; }
545 void writeTo(uint8_t *buf) const override;
546 uint32_t getBlockCount() const;
547 void writeHashes(uint8_t *buf) const;
548};
549
550class CStringSection : public SyntheticSection {
551public:
552 CStringSection(const char *name);
553 void addInput(CStringInputSection *);
554 uint64_t getSize() const override { return size; }
555 virtual void finalizeContents();
556 bool isNeeded() const override { return !inputs.empty(); }
557 void writeTo(uint8_t *buf) const override;
558
559 std::vector<CStringInputSection *> inputs;
560
561private:
562 uint64_t size;
563};
564
565class DeduplicatedCStringSection final : public CStringSection {
566public:
567 DeduplicatedCStringSection(const char *name) : CStringSection(name){};
568 uint64_t getSize() const override { return size; }
569 void finalizeContents() override;
570 void writeTo(uint8_t *buf) const override;
571 uint64_t getStringOffset(StringRef str) const;
572
573private:
574 llvm::DenseMap<llvm::CachedHashStringRef, uint64_t> stringOffsetMap;
575 size_t size = 0;
576};
577
578/*
579 * This section contains deduplicated literal values. The 16-byte values are
580 * laid out first, followed by the 8- and then the 4-byte ones.
581 */
582class WordLiteralSection final : public SyntheticSection {
583public:
584 using UInt128 = std::pair<uint64_t, uint64_t>;
585 // I don't think the standard guarantees the size of a pair, so let's make
586 // sure it's exact -- that way we can construct it via `mmap`.
587 static_assert(sizeof(UInt128) == 16);
588
589 WordLiteralSection();
590 void addInput(WordLiteralInputSection *);
591 void finalizeContents();
592 void writeTo(uint8_t *buf) const override;
593
594 uint64_t getSize() const override {
595 return literal16Map.size() * 16 + literal8Map.size() * 8 +
596 literal4Map.size() * 4;
597 }
598
599 bool isNeeded() const override {
600 return !literal16Map.empty() || !literal4Map.empty() ||
601 !literal8Map.empty();
602 }
603
604 uint64_t getLiteral16Offset(uintptr_t buf) const {
605 return literal16Map.at(Val: *reinterpret_cast<const UInt128 *>(buf)) * 16;
606 }
607
608 uint64_t getLiteral8Offset(uintptr_t buf) const {
609 return literal16Map.size() * 16 +
610 literal8Map.at(Val: *reinterpret_cast<const uint64_t *>(buf)) * 8;
611 }
612
613 uint64_t getLiteral4Offset(uintptr_t buf) const {
614 return literal16Map.size() * 16 + literal8Map.size() * 8 +
615 literal4Map.at(Val: *reinterpret_cast<const uint32_t *>(buf)) * 4;
616 }
617
618private:
619 std::vector<WordLiteralInputSection *> inputs;
620
621 // Literal values can be any bit pattern.
622 llvm::DenseMap<UInt128, uint64_t> literal16Map;
623 llvm::DenseMap<uint64_t, uint64_t> literal8Map;
624 llvm::DenseMap<uint32_t, uint64_t> literal4Map;
625};
626
627class ObjCImageInfoSection final : public SyntheticSection {
628public:
629 ObjCImageInfoSection();
630 bool isNeeded() const override { return !files.empty(); }
631 uint64_t getSize() const override { return 8; }
632 void addFile(const InputFile *file) {
633 assert(!file->objCImageInfo.empty());
634 files.push_back(x: file);
635 }
636 void finalizeContents();
637 void writeTo(uint8_t *buf) const override;
638
639private:
640 struct ImageInfo {
641 uint8_t swiftVersion = 0;
642 bool hasCategoryClassProperties = false;
643 } info;
644 static ImageInfo parseImageInfo(const InputFile *);
645 std::vector<const InputFile *> files; // files with image info
646};
647
648// This section stores 32-bit __TEXT segment offsets of initializer functions.
649//
650// The compiler stores pointers to initializers in __mod_init_func. These need
651// to be fixed up at load time, which takes time and dirties memory. By
652// synthesizing InitOffsetsSection from them, this data can live in the
653// read-only __TEXT segment instead. This section is used by default when
654// chained fixups are enabled.
655//
656// There is no similar counterpart to __mod_term_func, as that section is
657// deprecated, and static destructors are instead handled by registering them
658// via __cxa_atexit from an autogenerated initializer function (see D121736).
659class InitOffsetsSection final : public SyntheticSection {
660public:
661 InitOffsetsSection();
662 bool isNeeded() const override { return !sections.empty(); }
663 uint64_t getSize() const override;
664 void writeTo(uint8_t *buf) const override;
665 void setUp();
666
667 void addInput(ConcatInputSection *isec) { sections.push_back(x: isec); }
668 const std::vector<ConcatInputSection *> &inputs() const { return sections; }
669
670private:
671 std::vector<ConcatInputSection *> sections;
672};
673
674// This SyntheticSection is for the __objc_methlist section, which contains
675// relative method lists if the -objc_relative_method_lists option is enabled.
676class ObjCMethListSection final : public SyntheticSection {
677public:
678 ObjCMethListSection();
679
680 static bool isMethodList(const ConcatInputSection *isec);
681 void addInput(ConcatInputSection *isec) { inputs.push_back(x: isec); }
682 std::vector<ConcatInputSection *> getInputs() { return inputs; }
683
684 void setUp();
685 void finalize() override;
686 bool isNeeded() const override { return !inputs.empty(); }
687 uint64_t getSize() const override { return sectionSize; }
688 void writeTo(uint8_t *bufStart) const override;
689
690private:
691 void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags,
692 uint32_t &structCount) const;
693 void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
694 uint32_t structCount) const;
695 uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const;
696 void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf,
697 uint32_t &inSecOff, uint32_t &outSecOff,
698 bool useSelRef) const;
699 uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
700 uint8_t *buf) const;
701
702 static constexpr uint32_t methodListHeaderSize =
703 /*structSizeAndFlags*/ sizeof(uint32_t) +
704 /*structCount*/ sizeof(uint32_t);
705 // Relative method lists are supported only for 3-pointer method lists
706 static constexpr uint32_t pointersPerStruct = 3;
707 // The runtime identifies relative method lists via this magic value
708 static constexpr uint32_t relMethodHeaderFlag = 0x80000000;
709 // In the method list header, the first 2 bytes are the size of struct
710 static constexpr uint32_t structSizeMask = 0x0000FFFF;
711 // In the method list header, the last 2 bytes are the flags for the struct
712 static constexpr uint32_t structFlagsMask = 0xFFFF0000;
713 // Relative method lists have 4 byte alignment as all data in the InputSection
714 // is 4 byte
715 static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t);
716
717 // The output size of the __objc_methlist section, computed during finalize()
718 uint32_t sectionSize = 0;
719 std::vector<ConcatInputSection *> inputs;
720};
721
722// Chained fixups are a replacement for classic dyld opcodes. In this format,
723// most of the metadata necessary for binding symbols and rebasing addresses is
724// stored directly in the memory location that will have the fixup applied.
725//
726// The fixups form singly linked lists; each one covering a single page in
727// memory. The __LINKEDIT,__chainfixups section stores the page offset of the
728// first fixup of each page; the rest can be found by walking the chain using
729// the offset that is embedded in each entry.
730//
731// This setup allows pages to be relocated lazily at page-in time and without
732// being dirtied. The kernel can discard and load them again as needed. This
733// technique, called page-in linking, was introduced in macOS 13.
734//
735// The benefits of this format are:
736// - smaller __LINKEDIT segment, as most of the fixup information is stored in
737// the data segment
738// - faster startup, since not all relocations need to be done upfront
739// - slightly lower memory usage, as fewer pages are dirtied
740//
741// Userspace x86_64 and arm64 binaries have two types of fixup entries:
742// - Rebase entries contain an absolute address, to which the object's load
743// address will be added to get the final value. This is used for loading
744// the address of a symbol defined in the same binary.
745// - Binding entries are mostly used for symbols imported from other dylibs,
746// but for weakly bound and interposable symbols as well. They are looked up
747// by a (symbol name, library) pair stored in __chainfixups. This import
748// entry also encodes whether the import is weak (i.e. if the symbol is
749// missing, it should be set to null instead of producing a load error).
750// The fixup encodes an ordinal associated with the import, and an optional
751// addend.
752//
753// The entries are tightly packed 64-bit bitfields. One of the bits specifies
754// which kind of fixup to interpret them as.
755//
756// LLD generates the fixup data in 5 stages:
757// 1. While scanning relocations, we make a note of each location that needs
758// a fixup by calling addRebase() or addBinding(). During this, we assign
759// a unique ordinal for each (symbol name, library, addend) import tuple.
760// 2. After addresses have been assigned to all sections, and thus the memory
761// layout of the linked image is final; finalizeContents() is called. Here,
762// the page offsets of the chain start entries are calculated.
763// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the
764// imports table to the output file.
765// 4. Each section's fixup entries are encoded and written to disk in
766// ConcatInputSection::writeTo(), but without writing the offsets that form
767// the chain.
768// 5. Finally, each page's (which might correspond to multiple sections)
769// fixups are linked together in Writer::buildFixupChains().
770class ChainedFixupsSection final : public LinkEditSection {
771public:
772 ChainedFixupsSection();
773 void finalizeContents() override;
774 uint64_t getRawSize() const override { return size; }
775 bool isNeeded() const override;
776 void writeTo(uint8_t *buf) const override;
777
778 void addRebase(const InputSection *isec, uint64_t offset) {
779 locations.emplace_back(args&: isec, args&: offset);
780 }
781 void addBinding(const Symbol *dysym, const InputSection *isec,
782 uint64_t offset, int64_t addend = 0);
783
784 void setHasNonWeakDefinition() { hasNonWeakDef = true; }
785
786 // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
787 std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
788 int64_t addend) const;
789
790 const std::vector<Location> &getLocations() const { return locations; }
791
792 bool hasWeakBinding() const { return hasWeakBind; }
793 bool hasNonWeakDefinition() const { return hasNonWeakDef; }
794
795private:
796 // Location::offset initially stores the offset within an InputSection, but
797 // contains output segment offsets after finalizeContents().
798 std::vector<Location> locations;
799 // (target symbol, addend) => import ordinal
800 llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
801
802 struct SegmentInfo {
803 SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
804
805 const OutputSegment *oseg;
806 // (page index, fixup starts offset)
807 llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
808
809 size_t getSize() const;
810 size_t writeTo(uint8_t *buf) const;
811 };
812 llvm::SmallVector<SegmentInfo, 4> fixupSegments;
813
814 size_t symtabSize = 0;
815 size_t size = 0;
816
817 bool needsAddend = false;
818 bool needsLargeAddend = false;
819 bool hasWeakBind = false;
820 bool hasNonWeakDef = false;
821 llvm::MachO::ChainedImportFormat importFormat;
822};
823
824void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
825void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
826
827struct InStruct {
828 const uint8_t *bufferStart = nullptr;
829 MachHeaderSection *header = nullptr;
830 /// The list of cstring sections. Note that this includes \p cStringSection
831 /// and \p objcMethnameSection already.
832 llvm::SmallVector<CStringSection *> cStringSections;
833 CStringSection *cStringSection = nullptr;
834 DeduplicatedCStringSection *objcMethnameSection = nullptr;
835 WordLiteralSection *wordLiteralSection = nullptr;
836 RebaseSection *rebase = nullptr;
837 BindingSection *binding = nullptr;
838 WeakBindingSection *weakBinding = nullptr;
839 LazyBindingSection *lazyBinding = nullptr;
840 ExportSection *exports = nullptr;
841 GotSection *got = nullptr;
842 TlvPointerSection *tlvPointers = nullptr;
843 LazyPointerSection *lazyPointers = nullptr;
844 StubsSection *stubs = nullptr;
845 StubHelperSection *stubHelper = nullptr;
846 ObjCStubsSection *objcStubs = nullptr;
847 UnwindInfoSection *unwindInfo = nullptr;
848 ObjCImageInfoSection *objCImageInfo = nullptr;
849 ConcatInputSection *imageLoaderCache = nullptr;
850 InitOffsetsSection *initOffsets = nullptr;
851 ObjCMethListSection *objcMethList = nullptr;
852 ChainedFixupsSection *chainedFixups = nullptr;
853
854 CStringSection *getOrCreateCStringSection(StringRef name,
855 bool forceDedupStrings = false) {
856 auto [it, didEmplace] =
857 cStringSectionMap.try_emplace(Key: name, Args: cStringSections.size());
858 if (!didEmplace)
859 return cStringSections[it->getValue()];
860
861 std::string &nameData = *make<std::string>(args&: name);
862 CStringSection *sec;
863 if (config->dedupStrings || forceDedupStrings)
864 sec = make<DeduplicatedCStringSection>(args: nameData.c_str());
865 else
866 sec = make<CStringSection>(args: nameData.c_str());
867 cStringSections.push_back(Elt: sec);
868 return sec;
869 }
870
871private:
872 llvm::StringMap<unsigned> cStringSectionMap;
873};
874
875extern InStruct in;
876extern std::vector<SyntheticSection *> syntheticSections;
877
878void createSyntheticSymbols();
879
880} // namespace lld::macho
881
882#endif
883