1//===- Writer.cpp ---------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Writer.h"
10#include "ConcatOutputSection.h"
11#include "Config.h"
12#include "InputFiles.h"
13#include "InputSection.h"
14#include "LinkerOptimizationHints.h"
15#include "MapFile.h"
16#include "OutputSection.h"
17#include "OutputSegment.h"
18#include "SectionPriorities.h"
19#include "SymbolTable.h"
20#include "Symbols.h"
21#include "SyntheticSections.h"
22#include "Target.h"
23#include "UnwindInfoSection.h"
24
25#include "lld/Common/Arrays.h"
26#include "lld/Common/CommonLinkerContext.h"
27#include "llvm/BinaryFormat/MachO.h"
28#include "llvm/Config/llvm-config.h"
29#include "llvm/Support/Parallel.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/thread.h"
33#include "llvm/Support/xxhash.h"
34
35#include <algorithm>
36
37using namespace llvm;
38using namespace llvm::MachO;
39using namespace llvm::sys;
40using namespace lld;
41using namespace lld::macho;
42
43namespace {
44class LCUuid;
45
46class Writer {
47public:
48 Writer() : buffer(errorHandler().outputBuffer) {}
49
50 void treatSpecialUndefineds();
51 void scanRelocations();
52 void scanSymbols();
53 template <class LP> void createOutputSections();
54 template <class LP> void createLoadCommands();
55 void finalizeAddresses();
56 void finalizeLinkEditSegment();
57 void assignAddresses(OutputSegment *);
58
59 void openFile();
60 void writeSections();
61 void applyOptimizationHints();
62 void buildFixupChains();
63 void writeUuid();
64 void writeCodeSignature();
65 void writeOutputFile();
66
67 template <class LP> void run();
68
69 std::unique_ptr<FileOutputBuffer> &buffer;
70 uint64_t addr = 0;
71 uint64_t fileOff = 0;
72 MachHeaderSection *header = nullptr;
73 StringTableSection *stringTableSection = nullptr;
74 SymtabSection *symtabSection = nullptr;
75 IndirectSymtabSection *indirectSymtabSection = nullptr;
76 CodeSignatureSection *codeSignatureSection = nullptr;
77 DataInCodeSection *dataInCodeSection = nullptr;
78 FunctionStartsSection *functionStartsSection = nullptr;
79
80 LCUuid *uuidCommand = nullptr;
81 OutputSegment *linkEditSegment = nullptr;
82};
83
84// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
85class LCDyldInfo final : public LoadCommand {
86public:
87 LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection,
88 WeakBindingSection *weakBindingSection,
89 LazyBindingSection *lazyBindingSection,
90 ExportSection *exportSection)
91 : rebaseSection(rebaseSection), bindingSection(bindingSection),
92 weakBindingSection(weakBindingSection),
93 lazyBindingSection(lazyBindingSection), exportSection(exportSection) {}
94
95 uint32_t getSize() const override { return sizeof(dyld_info_command); }
96
97 void writeTo(uint8_t *buf) const override {
98 auto *c = reinterpret_cast<dyld_info_command *>(buf);
99 c->cmd = LC_DYLD_INFO_ONLY;
100 c->cmdsize = getSize();
101 if (rebaseSection->isNeeded()) {
102 c->rebase_off = rebaseSection->fileOff;
103 c->rebase_size = rebaseSection->getFileSize();
104 }
105 if (bindingSection->isNeeded()) {
106 c->bind_off = bindingSection->fileOff;
107 c->bind_size = bindingSection->getFileSize();
108 }
109 if (weakBindingSection->isNeeded()) {
110 c->weak_bind_off = weakBindingSection->fileOff;
111 c->weak_bind_size = weakBindingSection->getFileSize();
112 }
113 if (lazyBindingSection->isNeeded()) {
114 c->lazy_bind_off = lazyBindingSection->fileOff;
115 c->lazy_bind_size = lazyBindingSection->getFileSize();
116 }
117 if (exportSection->isNeeded()) {
118 c->export_off = exportSection->fileOff;
119 c->export_size = exportSection->getFileSize();
120 }
121 }
122
123 RebaseSection *rebaseSection;
124 BindingSection *bindingSection;
125 WeakBindingSection *weakBindingSection;
126 LazyBindingSection *lazyBindingSection;
127 ExportSection *exportSection;
128};
129
130class LCSubFramework final : public LoadCommand {
131public:
132 LCSubFramework(StringRef umbrella) : umbrella(umbrella) {}
133
134 uint32_t getSize() const override {
135 return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1,
136 Align: target->wordSize);
137 }
138
139 void writeTo(uint8_t *buf) const override {
140 auto *c = reinterpret_cast<sub_framework_command *>(buf);
141 buf += sizeof(sub_framework_command);
142
143 c->cmd = LC_SUB_FRAMEWORK;
144 c->cmdsize = getSize();
145 c->umbrella = sizeof(sub_framework_command);
146
147 memcpy(dest: buf, src: umbrella.data(), n: umbrella.size());
148 buf[umbrella.size()] = '\0';
149 }
150
151private:
152 const StringRef umbrella;
153};
154
155class LCFunctionStarts final : public LoadCommand {
156public:
157 explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection)
158 : functionStartsSection(functionStartsSection) {}
159
160 uint32_t getSize() const override { return sizeof(linkedit_data_command); }
161
162 void writeTo(uint8_t *buf) const override {
163 auto *c = reinterpret_cast<linkedit_data_command *>(buf);
164 c->cmd = LC_FUNCTION_STARTS;
165 c->cmdsize = getSize();
166 c->dataoff = functionStartsSection->fileOff;
167 c->datasize = functionStartsSection->getFileSize();
168 }
169
170private:
171 FunctionStartsSection *functionStartsSection;
172};
173
174class LCDataInCode final : public LoadCommand {
175public:
176 explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
177 : dataInCodeSection(dataInCodeSection) {}
178
179 uint32_t getSize() const override { return sizeof(linkedit_data_command); }
180
181 void writeTo(uint8_t *buf) const override {
182 auto *c = reinterpret_cast<linkedit_data_command *>(buf);
183 c->cmd = LC_DATA_IN_CODE;
184 c->cmdsize = getSize();
185 c->dataoff = dataInCodeSection->fileOff;
186 c->datasize = dataInCodeSection->getFileSize();
187 }
188
189private:
190 DataInCodeSection *dataInCodeSection;
191};
192
193class LCDysymtab final : public LoadCommand {
194public:
195 LCDysymtab(SymtabSection *symtabSection,
196 IndirectSymtabSection *indirectSymtabSection)
197 : symtabSection(symtabSection),
198 indirectSymtabSection(indirectSymtabSection) {}
199
200 uint32_t getSize() const override { return sizeof(dysymtab_command); }
201
202 void writeTo(uint8_t *buf) const override {
203 auto *c = reinterpret_cast<dysymtab_command *>(buf);
204 c->cmd = LC_DYSYMTAB;
205 c->cmdsize = getSize();
206
207 c->ilocalsym = 0;
208 c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
209 c->nextdefsym = symtabSection->getNumExternalSymbols();
210 c->iundefsym = c->iextdefsym + c->nextdefsym;
211 c->nundefsym = symtabSection->getNumUndefinedSymbols();
212
213 c->indirectsymoff = indirectSymtabSection->fileOff;
214 c->nindirectsyms = indirectSymtabSection->getNumSymbols();
215 }
216
217 SymtabSection *symtabSection;
218 IndirectSymtabSection *indirectSymtabSection;
219};
220
221template <class LP> class LCSegment final : public LoadCommand {
222public:
223 LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}
224
225 uint32_t getSize() const override {
226 return sizeof(typename LP::segment_command) +
227 seg->numNonHiddenSections() * sizeof(typename LP::section);
228 }
229
230 void writeTo(uint8_t *buf) const override {
231 using SegmentCommand = typename LP::segment_command;
232 using SectionHeader = typename LP::section;
233
234 auto *c = reinterpret_cast<SegmentCommand *>(buf);
235 buf += sizeof(SegmentCommand);
236
237 c->cmd = LP::segmentLCType;
238 c->cmdsize = getSize();
239 memcpy(c->segname, name.data(), name.size());
240 c->fileoff = seg->fileOff;
241 c->maxprot = seg->maxProt;
242 c->initprot = seg->initProt;
243
244 c->vmaddr = seg->addr;
245 c->vmsize = seg->vmSize;
246 c->filesize = seg->fileSize;
247 c->nsects = seg->numNonHiddenSections();
248 c->flags = seg->flags;
249
250 for (const OutputSection *osec : seg->getSections()) {
251 if (osec->isHidden())
252 continue;
253
254 auto *sectHdr = reinterpret_cast<SectionHeader *>(buf);
255 buf += sizeof(SectionHeader);
256
257 memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
258 memcpy(sectHdr->segname, name.data(), name.size());
259
260 sectHdr->addr = osec->addr;
261 sectHdr->offset = osec->fileOff;
262 sectHdr->align = Log2_32(Value: osec->align);
263 sectHdr->flags = osec->flags;
264 sectHdr->size = osec->getSize();
265 sectHdr->reserved1 = osec->reserved1;
266 sectHdr->reserved2 = osec->reserved2;
267 }
268 }
269
270private:
271 StringRef name;
272 OutputSegment *seg;
273};
274
275class LCMain final : public LoadCommand {
276 uint32_t getSize() const override {
277 return sizeof(structs::entry_point_command);
278 }
279
280 void writeTo(uint8_t *buf) const override {
281 auto *c = reinterpret_cast<structs::entry_point_command *>(buf);
282 c->cmd = LC_MAIN;
283 c->cmdsize = getSize();
284
285 if (config->entry->isInStubs())
286 c->entryoff =
287 in.stubs->fileOff + config->entry->stubsIndex * target->stubSize;
288 else
289 c->entryoff = config->entry->getVA() - in.header->addr;
290
291 c->stacksize = 0;
292 }
293};
294
295class LCSymtab final : public LoadCommand {
296public:
297 LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
298 : symtabSection(symtabSection), stringTableSection(stringTableSection) {}
299
300 uint32_t getSize() const override { return sizeof(symtab_command); }
301
302 void writeTo(uint8_t *buf) const override {
303 auto *c = reinterpret_cast<symtab_command *>(buf);
304 c->cmd = LC_SYMTAB;
305 c->cmdsize = getSize();
306 c->symoff = symtabSection->fileOff;
307 c->nsyms = symtabSection->getNumSymbols();
308 c->stroff = stringTableSection->fileOff;
309 c->strsize = stringTableSection->getFileSize();
310 }
311
312 SymtabSection *symtabSection = nullptr;
313 StringTableSection *stringTableSection = nullptr;
314};
315
316// There are several dylib load commands that share the same structure:
317// * LC_LOAD_DYLIB
318// * LC_ID_DYLIB
319// * LC_REEXPORT_DYLIB
320class LCDylib final : public LoadCommand {
321public:
322 LCDylib(LoadCommandType type, StringRef path,
323 uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0)
324 : type(type), path(path), compatibilityVersion(compatibilityVersion),
325 currentVersion(currentVersion) {
326 instanceCount++;
327 }
328
329 uint32_t getSize() const override {
330 return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1,
331 Align: target->wordSize);
332 }
333
334 void writeTo(uint8_t *buf) const override {
335 auto *c = reinterpret_cast<dylib_command *>(buf);
336 buf += sizeof(dylib_command);
337
338 c->cmd = type;
339 c->cmdsize = getSize();
340 c->dylib.name = sizeof(dylib_command);
341 c->dylib.timestamp = 0;
342 c->dylib.compatibility_version = compatibilityVersion;
343 c->dylib.current_version = currentVersion;
344
345 memcpy(dest: buf, src: path.data(), n: path.size());
346 buf[path.size()] = '\0';
347 }
348
349 static uint32_t getInstanceCount() { return instanceCount; }
350 static void resetInstanceCount() { instanceCount = 0; }
351
352private:
353 LoadCommandType type;
354 StringRef path;
355 uint32_t compatibilityVersion;
356 uint32_t currentVersion;
357 static uint32_t instanceCount;
358};
359
360uint32_t LCDylib::instanceCount = 0;
361
362class LCLoadDylinker final : public LoadCommand {
363public:
364 uint32_t getSize() const override {
365 return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1,
366 Align: target->wordSize);
367 }
368
369 void writeTo(uint8_t *buf) const override {
370 auto *c = reinterpret_cast<dylinker_command *>(buf);
371 buf += sizeof(dylinker_command);
372
373 c->cmd = LC_LOAD_DYLINKER;
374 c->cmdsize = getSize();
375 c->name = sizeof(dylinker_command);
376
377 memcpy(dest: buf, src: path.data(), n: path.size());
378 buf[path.size()] = '\0';
379 }
380
381private:
382 // Recent versions of Darwin won't run any binary that has dyld at a
383 // different location.
384 const StringRef path = "/usr/lib/dyld";
385};
386
387class LCRPath final : public LoadCommand {
388public:
389 explicit LCRPath(StringRef path) : path(path) {}
390
391 uint32_t getSize() const override {
392 return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1,
393 Align: target->wordSize);
394 }
395
396 void writeTo(uint8_t *buf) const override {
397 auto *c = reinterpret_cast<rpath_command *>(buf);
398 buf += sizeof(rpath_command);
399
400 c->cmd = LC_RPATH;
401 c->cmdsize = getSize();
402 c->path = sizeof(rpath_command);
403
404 memcpy(dest: buf, src: path.data(), n: path.size());
405 buf[path.size()] = '\0';
406 }
407
408private:
409 StringRef path;
410};
411
412class LCSubClient final : public LoadCommand {
413public:
414 explicit LCSubClient(StringRef client) : client(client) {}
415
416 uint32_t getSize() const override {
417 return alignToPowerOf2(Value: sizeof(sub_client_command) + client.size() + 1,
418 Align: target->wordSize);
419 }
420
421 void writeTo(uint8_t *buf) const override {
422 auto *c = reinterpret_cast<sub_client_command *>(buf);
423 buf += sizeof(sub_client_command);
424
425 c->cmd = LC_SUB_CLIENT;
426 c->cmdsize = getSize();
427 c->client = sizeof(sub_client_command);
428
429 memcpy(dest: buf, src: client.data(), n: client.size());
430 buf[client.size()] = '\0';
431 }
432
433private:
434 StringRef client;
435};
436
437class LCDyldEnv final : public LoadCommand {
438public:
439 explicit LCDyldEnv(StringRef name) : name(name) {}
440
441 uint32_t getSize() const override {
442 return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1,
443 Align: target->wordSize);
444 }
445
446 void writeTo(uint8_t *buf) const override {
447 auto *c = reinterpret_cast<dyld_env_command *>(buf);
448 buf += sizeof(dyld_env_command);
449
450 c->cmd = LC_DYLD_ENVIRONMENT;
451 c->cmdsize = getSize();
452 c->name = sizeof(dyld_env_command);
453
454 memcpy(dest: buf, src: name.data(), n: name.size());
455 buf[name.size()] = '\0';
456 }
457
458private:
459 StringRef name;
460};
461
462class LCMinVersion final : public LoadCommand {
463public:
464 explicit LCMinVersion(const PlatformInfo &platformInfo)
465 : platformInfo(platformInfo) {}
466
467 uint32_t getSize() const override { return sizeof(version_min_command); }
468
469 void writeTo(uint8_t *buf) const override {
470 auto *c = reinterpret_cast<version_min_command *>(buf);
471 switch (platformInfo.target.Platform) {
472 case PLATFORM_MACOS:
473 c->cmd = LC_VERSION_MIN_MACOSX;
474 break;
475 case PLATFORM_IOS:
476 case PLATFORM_IOSSIMULATOR:
477 c->cmd = LC_VERSION_MIN_IPHONEOS;
478 break;
479 case PLATFORM_TVOS:
480 case PLATFORM_TVOSSIMULATOR:
481 c->cmd = LC_VERSION_MIN_TVOS;
482 break;
483 case PLATFORM_WATCHOS:
484 case PLATFORM_WATCHOSSIMULATOR:
485 c->cmd = LC_VERSION_MIN_WATCHOS;
486 break;
487 default:
488 llvm_unreachable("invalid platform");
489 break;
490 }
491 c->cmdsize = getSize();
492 c->version = encodeVersion(version: platformInfo.target.MinDeployment);
493 c->sdk = encodeVersion(version: platformInfo.sdk);
494 }
495
496private:
497 const PlatformInfo &platformInfo;
498};
499
500class LCBuildVersion final : public LoadCommand {
501public:
502 explicit LCBuildVersion(const PlatformInfo &platformInfo)
503 : platformInfo(platformInfo) {}
504
505 const int ntools = 1;
506
507 uint32_t getSize() const override {
508 return sizeof(build_version_command) + ntools * sizeof(build_tool_version);
509 }
510
511 void writeTo(uint8_t *buf) const override {
512 auto *c = reinterpret_cast<build_version_command *>(buf);
513 c->cmd = LC_BUILD_VERSION;
514 c->cmdsize = getSize();
515
516 c->platform = static_cast<uint32_t>(platformInfo.target.Platform);
517 c->minos = encodeVersion(version: platformInfo.target.MinDeployment);
518 c->sdk = encodeVersion(version: platformInfo.sdk);
519
520 c->ntools = ntools;
521 auto *t = reinterpret_cast<build_tool_version *>(&c[1]);
522 t->tool = TOOL_LLD;
523 t->version = encodeVersion(version: VersionTuple(
524 LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH));
525 }
526
527private:
528 const PlatformInfo &platformInfo;
529};
530
531// Stores a unique identifier for the output file based on an MD5 hash of its
532// contents. In order to hash the contents, we must first write them, but
533// LC_UUID itself must be part of the written contents in order for all the
534// offsets to be calculated correctly. We resolve this circular paradox by
535// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
536// its real value later.
537class LCUuid final : public LoadCommand {
538public:
539 uint32_t getSize() const override { return sizeof(uuid_command); }
540
541 void writeTo(uint8_t *buf) const override {
542 auto *c = reinterpret_cast<uuid_command *>(buf);
543 c->cmd = LC_UUID;
544 c->cmdsize = getSize();
545 uuidBuf = c->uuid;
546 }
547
548 void writeUuid(uint64_t digest) const {
549 // xxhash only gives us 8 bytes, so put some fixed data in the other half.
550 static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size");
551 memcpy(dest: uuidBuf, src: "LLD\xa1UU1D", n: 8);
552 memcpy(dest: uuidBuf + 8, src: &digest, n: 8);
553
554 // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
555 // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
556 // want to lose bits of the digest in byte 8, so swap that with a byte of
557 // fixed data that happens to have the right bits set.
558 std::swap(a&: uuidBuf[3], b&: uuidBuf[8]);
559
560 // Claim that this is an MD5-based hash. It isn't, but this signals that
561 // this is not a time-based and not a random hash. MD5 seems like the least
562 // bad lie we can put here.
563 assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
564 assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
565 }
566
567 mutable uint8_t *uuidBuf;
568};
569
570template <class LP> class LCEncryptionInfo final : public LoadCommand {
571public:
572 uint32_t getSize() const override {
573 return sizeof(typename LP::encryption_info_command);
574 }
575
576 void writeTo(uint8_t *buf) const override {
577 using EncryptionInfo = typename LP::encryption_info_command;
578 auto *c = reinterpret_cast<EncryptionInfo *>(buf);
579 buf += sizeof(EncryptionInfo);
580 c->cmd = LP::encryptionInfoLCType;
581 c->cmdsize = getSize();
582 c->cryptoff = in.header->getSize();
583 auto it = find_if(outputSegments, [](const OutputSegment *seg) {
584 return seg->name == segment_names::text;
585 });
586 assert(it != outputSegments.end());
587 c->cryptsize = (*it)->fileSize - c->cryptoff;
588 }
589};
590
591class LCCodeSignature final : public LoadCommand {
592public:
593 LCCodeSignature(CodeSignatureSection *section) : section(section) {}
594
595 uint32_t getSize() const override { return sizeof(linkedit_data_command); }
596
597 void writeTo(uint8_t *buf) const override {
598 auto *c = reinterpret_cast<linkedit_data_command *>(buf);
599 c->cmd = LC_CODE_SIGNATURE;
600 c->cmdsize = getSize();
601 c->dataoff = static_cast<uint32_t>(section->fileOff);
602 c->datasize = section->getSize();
603 }
604
605 CodeSignatureSection *section;
606};
607
608class LCExportsTrie final : public LoadCommand {
609public:
610 LCExportsTrie(ExportSection *section) : section(section) {}
611
612 uint32_t getSize() const override { return sizeof(linkedit_data_command); }
613
614 void writeTo(uint8_t *buf) const override {
615 auto *c = reinterpret_cast<linkedit_data_command *>(buf);
616 c->cmd = LC_DYLD_EXPORTS_TRIE;
617 c->cmdsize = getSize();
618 c->dataoff = section->fileOff;
619 c->datasize = section->getSize();
620 }
621
622 ExportSection *section;
623};
624
625class LCChainedFixups final : public LoadCommand {
626public:
627 LCChainedFixups(ChainedFixupsSection *section) : section(section) {}
628
629 uint32_t getSize() const override { return sizeof(linkedit_data_command); }
630
631 void writeTo(uint8_t *buf) const override {
632 auto *c = reinterpret_cast<linkedit_data_command *>(buf);
633 c->cmd = LC_DYLD_CHAINED_FIXUPS;
634 c->cmdsize = getSize();
635 c->dataoff = section->fileOff;
636 c->datasize = section->getSize();
637 }
638
639 ChainedFixupsSection *section;
640};
641
642} // namespace
643
644void Writer::treatSpecialUndefineds() {
645 if (config->entry)
646 if (auto *undefined = dyn_cast<Undefined>(Val: config->entry))
647 treatUndefinedSymbol(*undefined, source: "the entry point");
648
649 // FIXME: This prints symbols that are undefined both in input files and
650 // via -u flag twice.
651 for (const Symbol *sym : config->explicitUndefineds) {
652 if (const auto *undefined = dyn_cast<Undefined>(Val: sym))
653 treatUndefinedSymbol(*undefined, source: "-u");
654 }
655 // Literal exported-symbol names must be defined, but glob
656 // patterns need not match.
657 for (const CachedHashStringRef &cachedName :
658 config->exportedSymbols.literals) {
659 if (const Symbol *sym = symtab->find(name: cachedName))
660 if (const auto *undefined = dyn_cast<Undefined>(Val: sym))
661 treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)");
662 }
663}
664
665static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
666 const Relocation &r) {
667 if (!sym->isLive()) {
668 if (Defined *defined = dyn_cast<Defined>(Val: sym)) {
669 if (config->emitInitOffsets &&
670 defined->isec()->getName() == section_names::moduleInitFunc)
671 fatal(msg: isec->getLocation(off: r.offset) + ": cannot reference " +
672 sym->getName() +
673 " defined in __mod_init_func when -init_offsets is used");
674 }
675 assert(false && "referenced symbol must be live");
676 }
677
678 const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type);
679
680 if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) {
681 if (needsBinding(sym))
682 in.stubs->addEntry(sym);
683 } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) {
684 if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym))
685 in.got->addEntry(sym);
686 } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) {
687 if (needsBinding(sym))
688 in.tlvPointers->addEntry(sym);
689 } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) {
690 // References from thread-local variable sections are treated as offsets
691 // relative to the start of the referent section, and therefore have no
692 // need of rebase opcodes.
693 if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym)))
694 addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend);
695 }
696}
697
698void Writer::scanRelocations() {
699 TimeTraceScope timeScope("Scan relocations");
700
701 // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
702 // add to inputSections, which invalidates inputSections's iterators.
703 for (size_t i = 0; i < inputSections.size(); ++i) {
704 ConcatInputSection *isec = inputSections[i];
705
706 if (isec->shouldOmitFromOutput())
707 continue;
708
709 for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
710 Relocation &r = *it;
711
712 // Canonicalize the referent so that later accesses in Writer won't
713 // have to worry about it.
714 if (auto *referentIsec = dyn_cast_if_present<InputSection *>(Val&: r.referent))
715 r.referent = referentIsec->canonical();
716
717 if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) {
718 // Skip over the following UNSIGNED relocation -- it's just there as the
719 // minuend, and doesn't have the usual UNSIGNED semantics. We don't want
720 // to emit rebase opcodes for it.
721 ++it;
722 // Canonicalize the referent so that later accesses in Writer won't
723 // have to worry about it.
724 if (auto *referentIsec = it->referent.dyn_cast<InputSection *>())
725 it->referent = referentIsec->canonical();
726 continue;
727 }
728 if (auto *sym = dyn_cast_if_present<Symbol *>(Val&: r.referent)) {
729 if (auto *undefined = dyn_cast<Undefined>(Val: sym))
730 treatUndefinedSymbol(*undefined, isec, offset: r.offset);
731 // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
732 if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r))
733 prepareSymbolRelocation(sym, isec, r);
734 } else {
735 if (!r.pcrel) {
736 if (config->emitChainedFixups)
737 in.chainedFixups->addRebase(isec, offset: r.offset);
738 else
739 in.rebase->addEntry(isec, offset: r.offset);
740 }
741 }
742 }
743 }
744
745 in.unwindInfo->prepare();
746}
747
748static void addNonWeakDefinition(const Defined *defined) {
749 if (config->emitChainedFixups)
750 in.chainedFixups->setHasNonWeakDefinition();
751 else
752 in.weakBinding->addNonWeakDefinition(defined);
753}
754
755void Writer::scanSymbols() {
756 TimeTraceScope timeScope("Scan symbols");
757 ObjCSelRefsHelper::initialize();
758 for (Symbol *sym : symtab->getSymbols()) {
759 if (auto *defined = dyn_cast<Defined>(Val: sym)) {
760 if (!defined->isLive())
761 continue;
762 if (defined->overridesWeakDef)
763 addNonWeakDefinition(defined);
764 if (!defined->isAbsolute() && isCodeSection(defined->isec()))
765 in.unwindInfo->addSymbol(defined);
766 } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
767 // This branch intentionally doesn't check isLive().
768 if (dysym->isDynamicLookup())
769 continue;
770 dysym->getFile()->refState =
771 std::max(a: dysym->getFile()->refState, b: dysym->getRefState());
772 } else if (isa<Undefined>(Val: sym)) {
773 if (ObjCStubsSection::isObjCStubSymbol(sym)) {
774 // When -dead_strip is enabled, we don't want to emit any dead stubs.
775 // Although this stub symbol is yet undefined, addSym() was called
776 // during MarkLive.
777 if (config->deadStrip) {
778 if (!sym->isLive())
779 continue;
780 }
781 in.objcStubs->addEntry(sym);
782 }
783 }
784 }
785
786 for (const InputFile *file : inputFiles) {
787 if (auto *objFile = dyn_cast<ObjFile>(Val: file))
788 for (Symbol *sym : objFile->symbols) {
789 if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
790 if (!defined->isLive())
791 continue;
792 if (!defined->isExternal() && !defined->isAbsolute() &&
793 isCodeSection(defined->isec()))
794 in.unwindInfo->addSymbol(defined);
795 }
796 }
797 }
798}
799
800// TODO: ld64 enforces the old load commands in a few other cases.
801static bool useLCBuildVersion(const PlatformInfo &platformInfo) {
802 static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion =
803 {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)},
804 {PLATFORM_IOS, VersionTuple(12, 0)},
805 {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)},
806 {PLATFORM_TVOS, VersionTuple(12, 0)},
807 {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)},
808 {PLATFORM_WATCHOS, VersionTuple(5, 0)},
809 {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}};
810 auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) {
811 return p.first == platformInfo.target.Platform;
812 });
813 return it == minVersion.end()
814 ? true
815 : platformInfo.target.MinDeployment >= it->second;
816}
817
818template <class LP> void Writer::createLoadCommands() {
819 uint8_t segIndex = 0;
820 for (OutputSegment *seg : outputSegments) {
821 in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
822 seg->index = segIndex++;
823 }
824
825 if (config->emitChainedFixups) {
826 in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups));
827 in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports));
828 } else {
829 in.header->addLoadCommand(make<LCDyldInfo>(
830 args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports));
831 }
832 in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection));
833 in.header->addLoadCommand(
834 make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection));
835 if (!config->umbrella.empty())
836 in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella));
837 if (config->emitEncryptionInfo)
838 in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
839 for (StringRef path : config->runtimePaths)
840 in.header->addLoadCommand(make<LCRPath>(args&: path));
841
842 switch (config->outputType) {
843 case MH_EXECUTE:
844 in.header->addLoadCommand(make<LCLoadDylinker>());
845 break;
846 case MH_DYLIB:
847 in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName,
848 args&: config->dylibCompatibilityVersion,
849 args&: config->dylibCurrentVersion));
850 for (StringRef client : config->allowableClients)
851 in.header->addLoadCommand(make<LCSubClient>(args&: client));
852 break;
853 case MH_BUNDLE:
854 break;
855 default:
856 llvm_unreachable("unhandled output file type");
857 }
858
859 if (config->generateUuid) {
860 uuidCommand = make<LCUuid>();
861 in.header->addLoadCommand(uuidCommand);
862 }
863
864 if (useLCBuildVersion(platformInfo: config->platformInfo))
865 in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo));
866 else
867 in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo));
868
869 if (config->secondaryPlatformInfo) {
870 in.header->addLoadCommand(
871 make<LCBuildVersion>(args&: *config->secondaryPlatformInfo));
872 }
873
874 // This is down here to match ld64's load command order.
875 if (config->outputType == MH_EXECUTE)
876 in.header->addLoadCommand(make<LCMain>());
877
878 // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding
879 // library ordinal computation code in ld64.
880 int64_t dylibOrdinal = 1;
881 DenseMap<StringRef, int64_t> ordinalForInstallName;
882
883 std::vector<DylibFile *> dylibFiles;
884 for (InputFile *file : inputFiles) {
885 if (auto *dylibFile = dyn_cast<DylibFile>(Val: file))
886 dylibFiles.push_back(x: dylibFile);
887 }
888 for (size_t i = 0; i < dylibFiles.size(); ++i)
889 dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(),
890 last: dylibFiles[i]->extraDylibs.end());
891
892 for (DylibFile *dylibFile : dylibFiles) {
893 if (dylibFile->isBundleLoader) {
894 dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
895 // Shortcut since bundle-loader does not re-export the symbols.
896
897 dylibFile->reexport = false;
898 continue;
899 }
900
901 // Don't emit load commands for a dylib that is not referenced if:
902 // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER --
903 // if it's on the linker command line, it's explicit)
904 // - or it's marked MH_DEAD_STRIPPABLE_DYLIB
905 // - or the flag -dead_strip_dylibs is used
906 // FIXME: `isReferenced()` is currently computed before dead code
907 // stripping, so references from dead code keep a dylib alive. This
908 // matches ld64, but it's something we should do better.
909 if (!dylibFile->isReferenced() && !dylibFile->forceNeeded &&
910 (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable ||
911 config->deadStripDylibs))
912 continue;
913
914 // Several DylibFiles can have the same installName. Only emit a single
915 // load command for that installName and give all these DylibFiles the
916 // same ordinal.
917 // This can happen in several cases:
918 // - a new framework could change its installName to an older
919 // framework name via an $ld$ symbol depending on platform_version
920 // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd;
921 // Foo.framework/Foo.tbd is usually a symlink to
922 // Foo.framework/Versions/Current/Foo.tbd, where
923 // Foo.framework/Versions/Current is usually a symlink to
924 // Foo.framework/Versions/A)
925 // - a framework can be linked both explicitly on the linker
926 // command line and implicitly as a reexport from a different
927 // framework. The re-export will usually point to the tbd file
928 // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will
929 // usually find Foo.framework/Foo.tbd. These are usually symlinks,
930 // but in a --reproduce archive they will be identical but distinct
931 // files.
932 // In the first case, *semantically distinct* DylibFiles will have the
933 // same installName.
934 int64_t &ordinal = ordinalForInstallName[dylibFile->installName];
935 if (ordinal) {
936 dylibFile->ordinal = ordinal;
937 continue;
938 }
939
940 ordinal = dylibFile->ordinal = dylibOrdinal++;
941 LoadCommandType lcType = LC_LOAD_DYLIB;
942 if (dylibFile->reexport) {
943 if (dylibFile->forceWeakImport)
944 warn(msg: path::filename(path: dylibFile->getName()) +
945 " is re-exported so cannot be weak-linked");
946
947 lcType = LC_REEXPORT_DYLIB;
948 } else if (dylibFile->forceWeakImport ||
949 dylibFile->refState == RefState::Weak) {
950 lcType = LC_LOAD_WEAK_DYLIB;
951 }
952 in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName,
953 args&: dylibFile->compatibilityVersion,
954 args&: dylibFile->currentVersion));
955 }
956
957 for (const auto &dyldEnv : config->dyldEnvs)
958 in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv));
959
960 if (functionStartsSection)
961 in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection));
962 if (dataInCodeSection)
963 in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection));
964 if (codeSignatureSection)
965 in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection));
966
967 const uint32_t MACOS_MAXPATHLEN = 1024;
968 config->headerPad = std::max(
969 a: config->headerPad, b: (config->headerPadMaxInstallNames
970 ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
971 : 0));
972}
973
974// Sorting only can happen once all outputs have been collected. Here we sort
975// segments, output sections within each segment, and input sections within each
976// output segment.
977static void sortSegmentsAndSections() {
978 TimeTraceScope timeScope("Sort segments and sections");
979 sortOutputSegments();
980
981 DenseMap<const InputSection *, int> isecPriorities =
982 priorityBuilder.buildInputSectionPriorities();
983
984 uint32_t sectionIndex = 0;
985 for (OutputSegment *seg : outputSegments) {
986 seg->sortOutputSections();
987 // References from thread-local variable sections are treated as offsets
988 // relative to the start of the thread-local data memory area, which
989 // is initialized via copying all the TLV data sections (which are all
990 // contiguous). If later data sections require a greater alignment than
991 // earlier ones, the offsets of data within those sections won't be
992 // guaranteed to aligned unless we normalize alignments. We therefore use
993 // the largest alignment for all TLV data sections.
994 uint32_t tlvAlign = 0;
995 for (const OutputSection *osec : seg->getSections())
996 if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign)
997 tlvAlign = osec->align;
998
999 for (OutputSection *osec : seg->getSections()) {
1000 // Now that the output sections are sorted, assign the final
1001 // output section indices.
1002 if (!osec->isHidden())
1003 osec->index = ++sectionIndex;
1004 if (isThreadLocalData(flags: osec->flags)) {
1005 if (!firstTLVDataSection)
1006 firstTLVDataSection = osec;
1007 osec->align = tlvAlign;
1008 }
1009
1010 if (!isecPriorities.empty()) {
1011 if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) {
1012 llvm::stable_sort(
1013 Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) {
1014 return isecPriorities.lookup(Val: a) < isecPriorities.lookup(Val: b);
1015 });
1016 }
1017 }
1018 }
1019 }
1020}
1021
1022template <class LP> void Writer::createOutputSections() {
1023 TimeTraceScope timeScope("Create output sections");
1024 // First, create hidden sections
1025 stringTableSection = make<StringTableSection>();
1026 symtabSection = makeSymtabSection<LP>(*stringTableSection);
1027 indirectSymtabSection = make<IndirectSymtabSection>();
1028 if (config->adhocCodesign)
1029 codeSignatureSection = make<CodeSignatureSection>();
1030 if (config->emitDataInCodeInfo)
1031 dataInCodeSection = make<DataInCodeSection>();
1032 if (config->emitFunctionStarts)
1033 functionStartsSection = make<FunctionStartsSection>();
1034
1035 switch (config->outputType) {
1036 case MH_EXECUTE:
1037 make<PageZeroSection>();
1038 break;
1039 case MH_DYLIB:
1040 case MH_BUNDLE:
1041 break;
1042 default:
1043 llvm_unreachable("unhandled output file type");
1044 }
1045
1046 // Then add input sections to output sections.
1047 for (ConcatInputSection *isec : inputSections) {
1048 if (isec->shouldOmitFromOutput())
1049 continue;
1050 ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent);
1051 osec->addInput(input: isec);
1052 osec->inputOrder =
1053 std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff));
1054 }
1055
1056 // Once all the inputs are added, we can finalize the output section
1057 // properties and create the corresponding output segments.
1058 for (const auto &it : concatOutputSections) {
1059 StringRef segname = it.first.first;
1060 ConcatOutputSection *osec = it.second;
1061 assert(segname != segment_names::ld);
1062 if (osec->isNeeded()) {
1063 // See comment in ObjFile::splitEhFrames()
1064 if (osec->name == section_names::ehFrame &&
1065 segname == segment_names::text)
1066 osec->align = target->wordSize;
1067
1068 // MC keeps the default 1-byte alignment for __thread_vars, even though it
1069 // contains pointers that are fixed up by dyld, which requires proper
1070 // alignment.
1071 if (isThreadLocalVariables(flags: osec->flags))
1072 osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize);
1073
1074 getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec);
1075 }
1076 }
1077
1078 for (SyntheticSection *ssec : syntheticSections) {
1079 auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name});
1080 // We add all LinkEdit sections here because we don't know if they are
1081 // needed until their finalizeContents() methods get called later. While
1082 // this means that we add some redundant sections to __LINKEDIT, there is
1083 // is no redundancy in the output, as we do not emit section headers for
1084 // any LinkEdit sections.
1085 if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) {
1086 if (it == concatOutputSections.end()) {
1087 getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec);
1088 } else {
1089 fatal(msg: "section from " +
1090 toString(file: it->second->firstSection()->getFile()) +
1091 " conflicts with synthetic section " + ssec->segname + "," +
1092 ssec->name);
1093 }
1094 }
1095 }
1096
1097 // dyld requires __LINKEDIT segment to always exist (even if empty).
1098 linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit);
1099}
1100
1101void Writer::finalizeAddresses() {
1102 TimeTraceScope timeScope("Finalize addresses");
1103 uint64_t pageSize = target->getPageSize();
1104
1105 // We could parallelize this loop, but local benchmarking indicates it is
1106 // faster to do it all in the main thread.
1107 for (OutputSegment *seg : outputSegments) {
1108 if (seg == linkEditSegment)
1109 continue;
1110 for (OutputSection *osec : seg->getSections()) {
1111 if (!osec->isNeeded())
1112 continue;
1113 // Other kinds of OutputSections have already been finalized.
1114 if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec))
1115 concatOsec->finalizeContents();
1116 }
1117 }
1118
1119 // Ensure that segments (and the sections they contain) are allocated
1120 // addresses in ascending order, which dyld requires.
1121 //
1122 // Note that at this point, __LINKEDIT sections are empty, but we need to
1123 // determine addresses of other segments/sections before generating its
1124 // contents.
1125 for (OutputSegment *seg : outputSegments) {
1126 if (seg == linkEditSegment)
1127 continue;
1128 seg->addr = addr;
1129 assignAddresses(seg);
1130 // codesign / libstuff checks for segment ordering by verifying that
1131 // `fileOff + fileSize == next segment fileOff`. So we call
1132 // alignToPowerOf2() before (instead of after) computing fileSize to ensure
1133 // that the segments are contiguous. We handle addr / vmSize similarly for
1134 // the same reason.
1135 fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize);
1136 addr = alignToPowerOf2(Value: addr, Align: pageSize);
1137 seg->vmSize = addr - seg->addr;
1138 seg->fileSize = fileOff - seg->fileOff;
1139 seg->assignAddressesToStartEndSymbols();
1140 }
1141}
1142
1143void Writer::finalizeLinkEditSegment() {
1144 TimeTraceScope timeScope("Finalize __LINKEDIT segment");
1145 // Fill __LINKEDIT contents.
1146 std::array<LinkEditSection *, 10> linkEditSections{
1147 in.rebase, in.binding,
1148 in.weakBinding, in.lazyBinding,
1149 in.exports, in.chainedFixups,
1150 symtabSection, indirectSymtabSection,
1151 dataInCodeSection, functionStartsSection,
1152 };
1153
1154 parallelForEach(Begin: linkEditSections.begin(), End: linkEditSections.end(),
1155 Fn: [](LinkEditSection *osec) {
1156 if (osec)
1157 osec->finalizeContents();
1158 });
1159
1160 // Now that __LINKEDIT is filled out, do a proper calculation of its
1161 // addresses and offsets.
1162 linkEditSegment->addr = addr;
1163 assignAddresses(linkEditSegment);
1164 // No need to page-align fileOff / addr here since this is the last segment.
1165 linkEditSegment->vmSize = addr - linkEditSegment->addr;
1166 linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff;
1167}
1168
1169void Writer::assignAddresses(OutputSegment *seg) {
1170 seg->fileOff = fileOff;
1171
1172 for (OutputSection *osec : seg->getSections()) {
1173 if (!osec->isNeeded())
1174 continue;
1175 addr = alignToPowerOf2(Value: addr, Align: osec->align);
1176 fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align);
1177 osec->addr = addr;
1178 osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff;
1179 osec->finalize();
1180 osec->assignAddressesToStartEndSymbols();
1181
1182 addr += osec->getSize();
1183 fileOff += osec->getFileSize();
1184 }
1185}
1186
1187void Writer::openFile() {
1188 Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
1189 FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff,
1190 Flags: FileOutputBuffer::F_executable);
1191
1192 if (!bufferOrErr)
1193 fatal(msg: "failed to open " + config->outputFile + ": " +
1194 llvm::toString(E: bufferOrErr.takeError()));
1195 buffer = std::move(*bufferOrErr);
1196 in.bufferStart = buffer->getBufferStart();
1197}
1198
1199void Writer::writeSections() {
1200 TimeTraceScope timeScope("Write output sections");
1201
1202 uint8_t *buf = buffer->getBufferStart();
1203 std::vector<const OutputSection *> osecs;
1204 for (const OutputSegment *seg : outputSegments)
1205 append_range(C&: osecs, R: seg->getSections());
1206
1207 parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) {
1208 osec->writeTo(buf: buf + osec->fileOff);
1209 });
1210}
1211
1212void Writer::applyOptimizationHints() {
1213 if (!is_contained(Set: {AK_arm64, AK_arm64e, AK_arm64_32}, Element: config->arch()) ||
1214 config->ignoreOptimizationHints)
1215 return;
1216
1217 uint8_t *buf = buffer->getBufferStart();
1218 TimeTraceScope timeScope("Apply linker optimization hints");
1219 parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) {
1220 if (const auto *objFile = dyn_cast<ObjFile>(Val: file))
1221 macho::applyOptimizationHints(outBuf: buf, obj: *objFile);
1222 });
1223}
1224
1225// In order to utilize multiple cores, we first split the buffer into chunks,
1226// compute a hash for each chunk, and then compute a hash value of the hash
1227// values.
1228void Writer::writeUuid() {
1229 TimeTraceScope timeScope("Computing UUID");
1230
1231 ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
1232 std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024);
1233
1234 // Leave one slot for filename
1235 std::vector<uint64_t> hashes(chunks.size() + 1);
1236 parallelFor(Begin: 0, End: chunks.size(),
1237 Fn: [&](size_t i) { hashes[i] = xxh3_64bits(data: chunks[i]); });
1238 // Append the output filename so that identical binaries with different names
1239 // don't get the same UUID.
1240 hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput));
1241
1242 uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()),
1243 hashes.size() * sizeof(uint64_t)});
1244 uuidCommand->writeUuid(digest);
1245}
1246
1247// This is step 5 of the algorithm described in the class comment of
1248// ChainedFixupsSection.
1249void Writer::buildFixupChains() {
1250 if (!config->emitChainedFixups)
1251 return;
1252
1253 const std::vector<Location> &loc = in.chainedFixups->getLocations();
1254 if (loc.empty())
1255 return;
1256
1257 TimeTraceScope timeScope("Build fixup chains");
1258
1259 const uint64_t pageSize = target->getPageSize();
1260 constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64
1261
1262 for (size_t i = 0, count = loc.size(); i < count;) {
1263 const OutputSegment *oseg = loc[i].isec->parent->parent;
1264 uint8_t *buf = buffer->getBufferStart() + oseg->fileOff;
1265 uint64_t pageIdx = loc[i].offset / pageSize;
1266 ++i;
1267
1268 while (i < count && loc[i].isec->parent->parent == oseg &&
1269 (loc[i].offset / pageSize) == pageIdx) {
1270 uint64_t offset = loc[i].offset - loc[i - 1].offset;
1271
1272 auto fail = [&](Twine message) {
1273 error(msg: loc[i].isec->getSegName() + "," + loc[i].isec->getName() +
1274 ", offset " +
1275 Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) +
1276 ": " + message);
1277 };
1278
1279 if (offset < target->wordSize)
1280 return fail("fixups overlap");
1281 if (offset % stride != 0)
1282 return fail(
1283 "fixups are unaligned (offset " + Twine(offset) +
1284 " is not a multiple of the stride). Re-link with -no_fixup_chains");
1285
1286 // The "next" field is in the same location for bind and rebase entries.
1287 reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset)
1288 ->next = offset / stride;
1289 ++i;
1290 }
1291 }
1292}
1293
1294void Writer::writeCodeSignature() {
1295 if (codeSignatureSection) {
1296 TimeTraceScope timeScope("Write code signature");
1297 codeSignatureSection->writeHashes(buf: buffer->getBufferStart());
1298 }
1299}
1300
1301void Writer::writeOutputFile() {
1302 TimeTraceScope timeScope("Write output file");
1303 openFile();
1304 reportPendingUndefinedSymbols();
1305 if (errorCount())
1306 return;
1307 writeSections();
1308 applyOptimizationHints();
1309 buildFixupChains();
1310 if (config->generateUuid)
1311 writeUuid();
1312 writeCodeSignature();
1313
1314 if (auto e = buffer->commit())
1315 fatal(msg: "failed to write output '" + buffer->getPath() +
1316 "': " + toString(E: std::move(e)));
1317}
1318
1319template <class LP> void Writer::run() {
1320 treatSpecialUndefineds();
1321 if (config->entry && needsBinding(sym: config->entry))
1322 in.stubs->addEntry(config->entry);
1323
1324 // Canonicalization of all pointers to InputSections should be handled by
1325 // these two scan* methods. I.e. from this point onward, for all live
1326 // InputSections, we should have `isec->canonical() == isec`.
1327 scanSymbols();
1328 if (in.objcStubs->isNeeded())
1329 in.objcStubs->setUp();
1330 if (in.objcMethList->isNeeded())
1331 in.objcMethList->setUp();
1332 scanRelocations();
1333 if (in.initOffsets->isNeeded())
1334 in.initOffsets->setUp();
1335
1336 // Do not proceed if there were undefined or duplicate symbols.
1337 reportPendingUndefinedSymbols();
1338 reportPendingDuplicateSymbols();
1339 if (errorCount())
1340 return;
1341
1342 if (in.stubHelper && in.stubHelper->isNeeded())
1343 in.stubHelper->setUp();
1344
1345 if (in.objCImageInfo->isNeeded())
1346 in.objCImageInfo->finalizeContents();
1347
1348 // At this point, we should know exactly which output sections are needed,
1349 // courtesy of scanSymbols() and scanRelocations().
1350 createOutputSections<LP>();
1351
1352 // After this point, we create no new segments; HOWEVER, we might
1353 // yet create branch-range extension thunks for architectures whose
1354 // hardware call instructions have limited range, e.g., ARM(64).
1355 // The thunks are created as InputSections interspersed among
1356 // the ordinary __TEXT,_text InputSections.
1357 sortSegmentsAndSections();
1358 createLoadCommands<LP>();
1359 finalizeAddresses();
1360
1361 llvm::thread mapFileWriter([&] {
1362 if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
1363 timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile");
1364 writeMapFile();
1365 if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
1366 timeTraceProfilerFinishThread();
1367 });
1368
1369 finalizeLinkEditSegment();
1370 writeOutputFile();
1371 mapFileWriter.join();
1372}
1373
1374template <class LP> void macho::writeResult() { Writer().run<LP>(); }
1375
1376void macho::resetWriter() { LCDylib::resetInstanceCount(); }
1377
1378void macho::createSyntheticSections() {
1379 in.header = make<MachHeaderSection>();
1380 // Materialize cstring and objcMethname sections
1381 in.cStringSection = in.getOrCreateCStringSection(name: section_names::cString);
1382 in.objcMethnameSection = cast<DeduplicatedCStringSection>(
1383 Val: in.getOrCreateCStringSection(name: section_names::objcMethname,
1384 /*forceDedupStrings=*/true));
1385 in.wordLiteralSection = make<WordLiteralSection>();
1386 if (config->emitChainedFixups) {
1387 in.chainedFixups = make<ChainedFixupsSection>();
1388 } else {
1389 in.rebase = make<RebaseSection>();
1390 in.binding = make<BindingSection>();
1391 in.weakBinding = make<WeakBindingSection>();
1392 in.lazyBinding = make<LazyBindingSection>();
1393 in.lazyPointers = make<LazyPointerSection>();
1394 in.stubHelper = make<StubHelperSection>();
1395 }
1396 in.exports = make<ExportSection>();
1397 in.got = make<GotSection>();
1398 in.tlvPointers = make<TlvPointerSection>();
1399 in.stubs = make<StubsSection>();
1400 in.objcStubs = make<ObjCStubsSection>();
1401 in.unwindInfo = makeUnwindInfoSection();
1402 in.objCImageInfo = make<ObjCImageInfoSection>();
1403 in.initOffsets = make<InitOffsetsSection>();
1404 in.objcMethList = make<ObjCMethListSection>();
1405
1406 // This section contains space for just a single word, and will be used by
1407 // dyld to cache an address to the image loader it uses.
1408 uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize);
1409 memset(s: arr, c: 0, n: target->wordSize);
1410 in.imageLoaderCache = makeSyntheticInputSection(
1411 segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR,
1412 data: ArrayRef<uint8_t>{arr, target->wordSize},
1413 /*align=*/target->wordSize);
1414 assert(in.imageLoaderCache->live);
1415}
1416
1417OutputSection *macho::firstTLVDataSection = nullptr;
1418
1419template void macho::writeResult<LP64>();
1420template void macho::writeResult<ILP32>();
1421