1 | //===- Writer.cpp ---------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Writer.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "InputSection.h" |
14 | #include "MapFile.h" |
15 | #include "OutputSection.h" |
16 | #include "OutputSegment.h" |
17 | #include "SectionPriorities.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "UnwindInfoSection.h" |
23 | |
24 | #include "lld/Common/Arrays.h" |
25 | #include "lld/Common/CommonLinkerContext.h" |
26 | #include "llvm/BinaryFormat/MachO.h" |
27 | #include "llvm/Config/llvm-config.h" |
28 | #include "llvm/Support/LEB128.h" |
29 | #include "llvm/Support/Parallel.h" |
30 | #include "llvm/Support/Path.h" |
31 | #include "llvm/Support/TimeProfiler.h" |
32 | #include "llvm/Support/thread.h" |
33 | #include "llvm/Support/xxhash.h" |
34 | |
35 | #include <algorithm> |
36 | |
37 | using namespace llvm; |
38 | using namespace llvm::MachO; |
39 | using namespace llvm::sys; |
40 | using namespace lld; |
41 | using namespace lld::macho; |
42 | |
43 | namespace { |
44 | class LCUuid; |
45 | |
46 | class Writer { |
47 | public: |
48 | Writer() : buffer(errorHandler().outputBuffer) {} |
49 | |
50 | void treatSpecialUndefineds(); |
51 | void scanRelocations(); |
52 | void scanSymbols(); |
53 | template <class LP> void createOutputSections(); |
54 | template <class LP> void createLoadCommands(); |
55 | void finalizeAddresses(); |
56 | void finalizeLinkEditSegment(); |
57 | void assignAddresses(OutputSegment *); |
58 | |
59 | void openFile(); |
60 | void writeSections(); |
61 | void applyOptimizationHints(); |
62 | void buildFixupChains(); |
63 | void writeUuid(); |
64 | void writeCodeSignature(); |
65 | void writeOutputFile(); |
66 | |
67 | template <class LP> void run(); |
68 | |
69 | std::unique_ptr<FileOutputBuffer> &buffer; |
70 | uint64_t addr = 0; |
71 | uint64_t fileOff = 0; |
72 | MachHeaderSection * = nullptr; |
73 | StringTableSection *stringTableSection = nullptr; |
74 | SymtabSection *symtabSection = nullptr; |
75 | IndirectSymtabSection *indirectSymtabSection = nullptr; |
76 | CodeSignatureSection *codeSignatureSection = nullptr; |
77 | DataInCodeSection *dataInCodeSection = nullptr; |
78 | FunctionStartsSection *functionStartsSection = nullptr; |
79 | |
80 | LCUuid *uuidCommand = nullptr; |
81 | OutputSegment *linkEditSegment = nullptr; |
82 | }; |
83 | |
84 | // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. |
85 | class LCDyldInfo final : public LoadCommand { |
86 | public: |
87 | LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, |
88 | WeakBindingSection *weakBindingSection, |
89 | LazyBindingSection *lazyBindingSection, |
90 | ExportSection *exportSection) |
91 | : rebaseSection(rebaseSection), bindingSection(bindingSection), |
92 | weakBindingSection(weakBindingSection), |
93 | lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} |
94 | |
95 | uint32_t getSize() const override { return sizeof(dyld_info_command); } |
96 | |
97 | void writeTo(uint8_t *buf) const override { |
98 | auto *c = reinterpret_cast<dyld_info_command *>(buf); |
99 | c->cmd = LC_DYLD_INFO_ONLY; |
100 | c->cmdsize = getSize(); |
101 | if (rebaseSection->isNeeded()) { |
102 | c->rebase_off = rebaseSection->fileOff; |
103 | c->rebase_size = rebaseSection->getFileSize(); |
104 | } |
105 | if (bindingSection->isNeeded()) { |
106 | c->bind_off = bindingSection->fileOff; |
107 | c->bind_size = bindingSection->getFileSize(); |
108 | } |
109 | if (weakBindingSection->isNeeded()) { |
110 | c->weak_bind_off = weakBindingSection->fileOff; |
111 | c->weak_bind_size = weakBindingSection->getFileSize(); |
112 | } |
113 | if (lazyBindingSection->isNeeded()) { |
114 | c->lazy_bind_off = lazyBindingSection->fileOff; |
115 | c->lazy_bind_size = lazyBindingSection->getFileSize(); |
116 | } |
117 | if (exportSection->isNeeded()) { |
118 | c->export_off = exportSection->fileOff; |
119 | c->export_size = exportSection->getFileSize(); |
120 | } |
121 | } |
122 | |
123 | RebaseSection *rebaseSection; |
124 | BindingSection *bindingSection; |
125 | WeakBindingSection *weakBindingSection; |
126 | LazyBindingSection *lazyBindingSection; |
127 | ExportSection *exportSection; |
128 | }; |
129 | |
130 | class LCSubFramework final : public LoadCommand { |
131 | public: |
132 | LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} |
133 | |
134 | uint32_t getSize() const override { |
135 | return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1, |
136 | Align: target->wordSize); |
137 | } |
138 | |
139 | void writeTo(uint8_t *buf) const override { |
140 | auto *c = reinterpret_cast<sub_framework_command *>(buf); |
141 | buf += sizeof(sub_framework_command); |
142 | |
143 | c->cmd = LC_SUB_FRAMEWORK; |
144 | c->cmdsize = getSize(); |
145 | c->umbrella = sizeof(sub_framework_command); |
146 | |
147 | memcpy(dest: buf, src: umbrella.data(), n: umbrella.size()); |
148 | buf[umbrella.size()] = '\0'; |
149 | } |
150 | |
151 | private: |
152 | const StringRef umbrella; |
153 | }; |
154 | |
155 | class LCFunctionStarts final : public LoadCommand { |
156 | public: |
157 | explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) |
158 | : functionStartsSection(functionStartsSection) {} |
159 | |
160 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
161 | |
162 | void writeTo(uint8_t *buf) const override { |
163 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
164 | c->cmd = LC_FUNCTION_STARTS; |
165 | c->cmdsize = getSize(); |
166 | c->dataoff = functionStartsSection->fileOff; |
167 | c->datasize = functionStartsSection->getFileSize(); |
168 | } |
169 | |
170 | private: |
171 | FunctionStartsSection *functionStartsSection; |
172 | }; |
173 | |
174 | class LCDataInCode final : public LoadCommand { |
175 | public: |
176 | explicit LCDataInCode(DataInCodeSection *dataInCodeSection) |
177 | : dataInCodeSection(dataInCodeSection) {} |
178 | |
179 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
180 | |
181 | void writeTo(uint8_t *buf) const override { |
182 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
183 | c->cmd = LC_DATA_IN_CODE; |
184 | c->cmdsize = getSize(); |
185 | c->dataoff = dataInCodeSection->fileOff; |
186 | c->datasize = dataInCodeSection->getFileSize(); |
187 | } |
188 | |
189 | private: |
190 | DataInCodeSection *dataInCodeSection; |
191 | }; |
192 | |
193 | class LCDysymtab final : public LoadCommand { |
194 | public: |
195 | LCDysymtab(SymtabSection *symtabSection, |
196 | IndirectSymtabSection *indirectSymtabSection) |
197 | : symtabSection(symtabSection), |
198 | indirectSymtabSection(indirectSymtabSection) {} |
199 | |
200 | uint32_t getSize() const override { return sizeof(dysymtab_command); } |
201 | |
202 | void writeTo(uint8_t *buf) const override { |
203 | auto *c = reinterpret_cast<dysymtab_command *>(buf); |
204 | c->cmd = LC_DYSYMTAB; |
205 | c->cmdsize = getSize(); |
206 | |
207 | c->ilocalsym = 0; |
208 | c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); |
209 | c->nextdefsym = symtabSection->getNumExternalSymbols(); |
210 | c->iundefsym = c->iextdefsym + c->nextdefsym; |
211 | c->nundefsym = symtabSection->getNumUndefinedSymbols(); |
212 | |
213 | c->indirectsymoff = indirectSymtabSection->fileOff; |
214 | c->nindirectsyms = indirectSymtabSection->getNumSymbols(); |
215 | } |
216 | |
217 | SymtabSection *symtabSection; |
218 | IndirectSymtabSection *indirectSymtabSection; |
219 | }; |
220 | |
221 | template <class LP> class LCSegment final : public LoadCommand { |
222 | public: |
223 | LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} |
224 | |
225 | uint32_t getSize() const override { |
226 | return sizeof(typename LP::segment_command) + |
227 | seg->numNonHiddenSections() * sizeof(typename LP::section); |
228 | } |
229 | |
230 | void writeTo(uint8_t *buf) const override { |
231 | using SegmentCommand = typename LP::segment_command; |
232 | using = typename LP::section; |
233 | |
234 | auto *c = reinterpret_cast<SegmentCommand *>(buf); |
235 | buf += sizeof(SegmentCommand); |
236 | |
237 | c->cmd = LP::segmentLCType; |
238 | c->cmdsize = getSize(); |
239 | memcpy(c->segname, name.data(), name.size()); |
240 | c->fileoff = seg->fileOff; |
241 | c->maxprot = seg->maxProt; |
242 | c->initprot = seg->initProt; |
243 | |
244 | c->vmaddr = seg->addr; |
245 | c->vmsize = seg->vmSize; |
246 | c->filesize = seg->fileSize; |
247 | c->nsects = seg->numNonHiddenSections(); |
248 | c->flags = seg->flags; |
249 | |
250 | for (const OutputSection *osec : seg->getSections()) { |
251 | if (osec->isHidden()) |
252 | continue; |
253 | |
254 | auto *sectHdr = reinterpret_cast<SectionHeader *>(buf); |
255 | buf += sizeof(SectionHeader); |
256 | |
257 | memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); |
258 | memcpy(sectHdr->segname, name.data(), name.size()); |
259 | |
260 | sectHdr->addr = osec->addr; |
261 | sectHdr->offset = osec->fileOff; |
262 | sectHdr->align = Log2_32(Value: osec->align); |
263 | sectHdr->flags = osec->flags; |
264 | sectHdr->size = osec->getSize(); |
265 | sectHdr->reserved1 = osec->reserved1; |
266 | sectHdr->reserved2 = osec->reserved2; |
267 | } |
268 | } |
269 | |
270 | private: |
271 | StringRef name; |
272 | OutputSegment *seg; |
273 | }; |
274 | |
275 | class LCMain final : public LoadCommand { |
276 | uint32_t getSize() const override { |
277 | return sizeof(structs::entry_point_command); |
278 | } |
279 | |
280 | void writeTo(uint8_t *buf) const override { |
281 | auto *c = reinterpret_cast<structs::entry_point_command *>(buf); |
282 | c->cmd = LC_MAIN; |
283 | c->cmdsize = getSize(); |
284 | |
285 | if (config->entry->isInStubs()) |
286 | c->entryoff = |
287 | in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; |
288 | else |
289 | c->entryoff = config->entry->getVA() - in.header->addr; |
290 | |
291 | c->stacksize = 0; |
292 | } |
293 | }; |
294 | |
295 | class LCSymtab final : public LoadCommand { |
296 | public: |
297 | LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) |
298 | : symtabSection(symtabSection), stringTableSection(stringTableSection) {} |
299 | |
300 | uint32_t getSize() const override { return sizeof(symtab_command); } |
301 | |
302 | void writeTo(uint8_t *buf) const override { |
303 | auto *c = reinterpret_cast<symtab_command *>(buf); |
304 | c->cmd = LC_SYMTAB; |
305 | c->cmdsize = getSize(); |
306 | c->symoff = symtabSection->fileOff; |
307 | c->nsyms = symtabSection->getNumSymbols(); |
308 | c->stroff = stringTableSection->fileOff; |
309 | c->strsize = stringTableSection->getFileSize(); |
310 | } |
311 | |
312 | SymtabSection *symtabSection = nullptr; |
313 | StringTableSection *stringTableSection = nullptr; |
314 | }; |
315 | |
316 | // There are several dylib load commands that share the same structure: |
317 | // * LC_LOAD_DYLIB |
318 | // * LC_ID_DYLIB |
319 | // * LC_REEXPORT_DYLIB |
320 | class LCDylib final : public LoadCommand { |
321 | public: |
322 | LCDylib(LoadCommandType type, StringRef path, |
323 | uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) |
324 | : type(type), path(path), compatibilityVersion(compatibilityVersion), |
325 | currentVersion(currentVersion) { |
326 | instanceCount++; |
327 | } |
328 | |
329 | uint32_t getSize() const override { |
330 | return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1, |
331 | Align: target->wordSize); |
332 | } |
333 | |
334 | void writeTo(uint8_t *buf) const override { |
335 | auto *c = reinterpret_cast<dylib_command *>(buf); |
336 | buf += sizeof(dylib_command); |
337 | |
338 | c->cmd = type; |
339 | c->cmdsize = getSize(); |
340 | c->dylib.name = sizeof(dylib_command); |
341 | c->dylib.timestamp = 0; |
342 | c->dylib.compatibility_version = compatibilityVersion; |
343 | c->dylib.current_version = currentVersion; |
344 | |
345 | memcpy(dest: buf, src: path.data(), n: path.size()); |
346 | buf[path.size()] = '\0'; |
347 | } |
348 | |
349 | static uint32_t getInstanceCount() { return instanceCount; } |
350 | static void resetInstanceCount() { instanceCount = 0; } |
351 | |
352 | private: |
353 | LoadCommandType type; |
354 | StringRef path; |
355 | uint32_t compatibilityVersion; |
356 | uint32_t currentVersion; |
357 | static uint32_t instanceCount; |
358 | }; |
359 | |
360 | uint32_t LCDylib::instanceCount = 0; |
361 | |
362 | class LCLoadDylinker final : public LoadCommand { |
363 | public: |
364 | uint32_t getSize() const override { |
365 | return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1, |
366 | Align: target->wordSize); |
367 | } |
368 | |
369 | void writeTo(uint8_t *buf) const override { |
370 | auto *c = reinterpret_cast<dylinker_command *>(buf); |
371 | buf += sizeof(dylinker_command); |
372 | |
373 | c->cmd = LC_LOAD_DYLINKER; |
374 | c->cmdsize = getSize(); |
375 | c->name = sizeof(dylinker_command); |
376 | |
377 | memcpy(dest: buf, src: path.data(), n: path.size()); |
378 | buf[path.size()] = '\0'; |
379 | } |
380 | |
381 | private: |
382 | // Recent versions of Darwin won't run any binary that has dyld at a |
383 | // different location. |
384 | const StringRef path = "/usr/lib/dyld" ; |
385 | }; |
386 | |
387 | class LCRPath final : public LoadCommand { |
388 | public: |
389 | explicit LCRPath(StringRef path) : path(path) {} |
390 | |
391 | uint32_t getSize() const override { |
392 | return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1, |
393 | Align: target->wordSize); |
394 | } |
395 | |
396 | void writeTo(uint8_t *buf) const override { |
397 | auto *c = reinterpret_cast<rpath_command *>(buf); |
398 | buf += sizeof(rpath_command); |
399 | |
400 | c->cmd = LC_RPATH; |
401 | c->cmdsize = getSize(); |
402 | c->path = sizeof(rpath_command); |
403 | |
404 | memcpy(dest: buf, src: path.data(), n: path.size()); |
405 | buf[path.size()] = '\0'; |
406 | } |
407 | |
408 | private: |
409 | StringRef path; |
410 | }; |
411 | |
412 | class LCDyldEnv final : public LoadCommand { |
413 | public: |
414 | explicit LCDyldEnv(StringRef name) : name(name) {} |
415 | |
416 | uint32_t getSize() const override { |
417 | return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1, |
418 | Align: target->wordSize); |
419 | } |
420 | |
421 | void writeTo(uint8_t *buf) const override { |
422 | auto *c = reinterpret_cast<dyld_env_command *>(buf); |
423 | buf += sizeof(dyld_env_command); |
424 | |
425 | c->cmd = LC_DYLD_ENVIRONMENT; |
426 | c->cmdsize = getSize(); |
427 | c->name = sizeof(dyld_env_command); |
428 | |
429 | memcpy(dest: buf, src: name.data(), n: name.size()); |
430 | buf[name.size()] = '\0'; |
431 | } |
432 | |
433 | private: |
434 | StringRef name; |
435 | }; |
436 | |
437 | class LCMinVersion final : public LoadCommand { |
438 | public: |
439 | explicit LCMinVersion(const PlatformInfo &platformInfo) |
440 | : platformInfo(platformInfo) {} |
441 | |
442 | uint32_t getSize() const override { return sizeof(version_min_command); } |
443 | |
444 | void writeTo(uint8_t *buf) const override { |
445 | auto *c = reinterpret_cast<version_min_command *>(buf); |
446 | switch (platformInfo.target.Platform) { |
447 | case PLATFORM_MACOS: |
448 | c->cmd = LC_VERSION_MIN_MACOSX; |
449 | break; |
450 | case PLATFORM_IOS: |
451 | case PLATFORM_IOSSIMULATOR: |
452 | c->cmd = LC_VERSION_MIN_IPHONEOS; |
453 | break; |
454 | case PLATFORM_TVOS: |
455 | case PLATFORM_TVOSSIMULATOR: |
456 | c->cmd = LC_VERSION_MIN_TVOS; |
457 | break; |
458 | case PLATFORM_WATCHOS: |
459 | case PLATFORM_WATCHOSSIMULATOR: |
460 | c->cmd = LC_VERSION_MIN_WATCHOS; |
461 | break; |
462 | default: |
463 | llvm_unreachable("invalid platform" ); |
464 | break; |
465 | } |
466 | c->cmdsize = getSize(); |
467 | c->version = encodeVersion(version: platformInfo.target.MinDeployment); |
468 | c->sdk = encodeVersion(version: platformInfo.sdk); |
469 | } |
470 | |
471 | private: |
472 | const PlatformInfo &platformInfo; |
473 | }; |
474 | |
475 | class LCBuildVersion final : public LoadCommand { |
476 | public: |
477 | explicit LCBuildVersion(const PlatformInfo &platformInfo) |
478 | : platformInfo(platformInfo) {} |
479 | |
480 | const int ntools = 1; |
481 | |
482 | uint32_t getSize() const override { |
483 | return sizeof(build_version_command) + ntools * sizeof(build_tool_version); |
484 | } |
485 | |
486 | void writeTo(uint8_t *buf) const override { |
487 | auto *c = reinterpret_cast<build_version_command *>(buf); |
488 | c->cmd = LC_BUILD_VERSION; |
489 | c->cmdsize = getSize(); |
490 | |
491 | c->platform = static_cast<uint32_t>(platformInfo.target.Platform); |
492 | c->minos = encodeVersion(version: platformInfo.target.MinDeployment); |
493 | c->sdk = encodeVersion(version: platformInfo.sdk); |
494 | |
495 | c->ntools = ntools; |
496 | auto *t = reinterpret_cast<build_tool_version *>(&c[1]); |
497 | t->tool = TOOL_LLD; |
498 | t->version = encodeVersion(version: VersionTuple( |
499 | LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); |
500 | } |
501 | |
502 | private: |
503 | const PlatformInfo &platformInfo; |
504 | }; |
505 | |
506 | // Stores a unique identifier for the output file based on an MD5 hash of its |
507 | // contents. In order to hash the contents, we must first write them, but |
508 | // LC_UUID itself must be part of the written contents in order for all the |
509 | // offsets to be calculated correctly. We resolve this circular paradox by |
510 | // first writing an LC_UUID with an all-zero UUID, then updating the UUID with |
511 | // its real value later. |
512 | class LCUuid final : public LoadCommand { |
513 | public: |
514 | uint32_t getSize() const override { return sizeof(uuid_command); } |
515 | |
516 | void writeTo(uint8_t *buf) const override { |
517 | auto *c = reinterpret_cast<uuid_command *>(buf); |
518 | c->cmd = LC_UUID; |
519 | c->cmdsize = getSize(); |
520 | uuidBuf = c->uuid; |
521 | } |
522 | |
523 | void writeUuid(uint64_t digest) const { |
524 | // xxhash only gives us 8 bytes, so put some fixed data in the other half. |
525 | static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size" ); |
526 | memcpy(dest: uuidBuf, src: "LLD\xa1UU1D" , n: 8); |
527 | memcpy(dest: uuidBuf + 8, src: &digest, n: 8); |
528 | |
529 | // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in |
530 | // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't |
531 | // want to lose bits of the digest in byte 8, so swap that with a byte of |
532 | // fixed data that happens to have the right bits set. |
533 | std::swap(a&: uuidBuf[3], b&: uuidBuf[8]); |
534 | |
535 | // Claim that this is an MD5-based hash. It isn't, but this signals that |
536 | // this is not a time-based and not a random hash. MD5 seems like the least |
537 | // bad lie we can put here. |
538 | assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3" ); |
539 | assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2" ); |
540 | } |
541 | |
542 | mutable uint8_t *uuidBuf; |
543 | }; |
544 | |
545 | template <class LP> class LCEncryptionInfo final : public LoadCommand { |
546 | public: |
547 | uint32_t getSize() const override { |
548 | return sizeof(typename LP::encryption_info_command); |
549 | } |
550 | |
551 | void writeTo(uint8_t *buf) const override { |
552 | using EncryptionInfo = typename LP::encryption_info_command; |
553 | auto *c = reinterpret_cast<EncryptionInfo *>(buf); |
554 | buf += sizeof(EncryptionInfo); |
555 | c->cmd = LP::encryptionInfoLCType; |
556 | c->cmdsize = getSize(); |
557 | c->cryptoff = in.header->getSize(); |
558 | auto it = find_if(outputSegments, [](const OutputSegment *seg) { |
559 | return seg->name == segment_names::text; |
560 | }); |
561 | assert(it != outputSegments.end()); |
562 | c->cryptsize = (*it)->fileSize - c->cryptoff; |
563 | } |
564 | }; |
565 | |
566 | class LCCodeSignature final : public LoadCommand { |
567 | public: |
568 | LCCodeSignature(CodeSignatureSection *section) : section(section) {} |
569 | |
570 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
571 | |
572 | void writeTo(uint8_t *buf) const override { |
573 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
574 | c->cmd = LC_CODE_SIGNATURE; |
575 | c->cmdsize = getSize(); |
576 | c->dataoff = static_cast<uint32_t>(section->fileOff); |
577 | c->datasize = section->getSize(); |
578 | } |
579 | |
580 | CodeSignatureSection *section; |
581 | }; |
582 | |
583 | class LCExportsTrie final : public LoadCommand { |
584 | public: |
585 | LCExportsTrie(ExportSection *section) : section(section) {} |
586 | |
587 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
588 | |
589 | void writeTo(uint8_t *buf) const override { |
590 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
591 | c->cmd = LC_DYLD_EXPORTS_TRIE; |
592 | c->cmdsize = getSize(); |
593 | c->dataoff = section->fileOff; |
594 | c->datasize = section->getSize(); |
595 | } |
596 | |
597 | ExportSection *section; |
598 | }; |
599 | |
600 | class LCChainedFixups final : public LoadCommand { |
601 | public: |
602 | LCChainedFixups(ChainedFixupsSection *section) : section(section) {} |
603 | |
604 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
605 | |
606 | void writeTo(uint8_t *buf) const override { |
607 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
608 | c->cmd = LC_DYLD_CHAINED_FIXUPS; |
609 | c->cmdsize = getSize(); |
610 | c->dataoff = section->fileOff; |
611 | c->datasize = section->getSize(); |
612 | } |
613 | |
614 | ChainedFixupsSection *section; |
615 | }; |
616 | |
617 | } // namespace |
618 | |
619 | void Writer::treatSpecialUndefineds() { |
620 | if (config->entry) |
621 | if (auto *undefined = dyn_cast<Undefined>(Val: config->entry)) |
622 | treatUndefinedSymbol(*undefined, source: "the entry point" ); |
623 | |
624 | // FIXME: This prints symbols that are undefined both in input files and |
625 | // via -u flag twice. |
626 | for (const Symbol *sym : config->explicitUndefineds) { |
627 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
628 | treatUndefinedSymbol(*undefined, source: "-u" ); |
629 | } |
630 | // Literal exported-symbol names must be defined, but glob |
631 | // patterns need not match. |
632 | for (const CachedHashStringRef &cachedName : |
633 | config->exportedSymbols.literals) { |
634 | if (const Symbol *sym = symtab->find(name: cachedName)) |
635 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
636 | treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)" ); |
637 | } |
638 | } |
639 | |
640 | static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, |
641 | const lld::macho::Reloc &r) { |
642 | if (!sym->isLive()) { |
643 | if (Defined *defined = dyn_cast<Defined>(Val: sym)) { |
644 | if (config->emitInitOffsets && |
645 | defined->isec()->getName() == section_names::moduleInitFunc) |
646 | fatal(msg: isec->getLocation(off: r.offset) + ": cannot reference " + |
647 | sym->getName() + |
648 | " defined in __mod_init_func when -init_offsets is used" ); |
649 | } |
650 | assert(false && "referenced symbol must be live" ); |
651 | } |
652 | |
653 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type); |
654 | |
655 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) { |
656 | if (needsBinding(sym)) |
657 | in.stubs->addEntry(sym); |
658 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) { |
659 | if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym)) |
660 | in.got->addEntry(sym); |
661 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) { |
662 | if (needsBinding(sym)) |
663 | in.tlvPointers->addEntry(sym); |
664 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) { |
665 | // References from thread-local variable sections are treated as offsets |
666 | // relative to the start of the referent section, and therefore have no |
667 | // need of rebase opcodes. |
668 | if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym))) |
669 | addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend); |
670 | } |
671 | } |
672 | |
673 | void Writer::scanRelocations() { |
674 | TimeTraceScope timeScope("Scan relocations" ); |
675 | |
676 | // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can |
677 | // add to inputSections, which invalidates inputSections's iterators. |
678 | for (size_t i = 0; i < inputSections.size(); ++i) { |
679 | ConcatInputSection *isec = inputSections[i]; |
680 | |
681 | if (isec->shouldOmitFromOutput()) |
682 | continue; |
683 | |
684 | for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { |
685 | lld::macho::Reloc &r = *it; |
686 | |
687 | // Canonicalize the referent so that later accesses in Writer won't |
688 | // have to worry about it. |
689 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) |
690 | r.referent = referentIsec->canonical(); |
691 | |
692 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
693 | // Skip over the following UNSIGNED relocation -- it's just there as the |
694 | // minuend, and doesn't have the usual UNSIGNED semantics. We don't want |
695 | // to emit rebase opcodes for it. |
696 | ++it; |
697 | // Canonicalize the referent so that later accesses in Writer won't |
698 | // have to worry about it. |
699 | if (auto *referentIsec = it->referent.dyn_cast<InputSection *>()) |
700 | it->referent = referentIsec->canonical(); |
701 | continue; |
702 | } |
703 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
704 | if (auto *undefined = dyn_cast<Undefined>(Val: sym)) |
705 | treatUndefinedSymbol(*undefined, isec, offset: r.offset); |
706 | // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. |
707 | if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r)) |
708 | prepareSymbolRelocation(sym, isec, r); |
709 | } else { |
710 | if (!r.pcrel) { |
711 | if (config->emitChainedFixups) |
712 | in.chainedFixups->addRebase(isec, offset: r.offset); |
713 | else |
714 | in.rebase->addEntry(isec, offset: r.offset); |
715 | } |
716 | } |
717 | } |
718 | } |
719 | |
720 | in.unwindInfo->prepare(); |
721 | } |
722 | |
723 | static void addNonWeakDefinition(const Defined *defined) { |
724 | if (config->emitChainedFixups) |
725 | in.chainedFixups->setHasNonWeakDefinition(); |
726 | else |
727 | in.weakBinding->addNonWeakDefinition(defined); |
728 | } |
729 | |
730 | void Writer::scanSymbols() { |
731 | TimeTraceScope timeScope("Scan symbols" ); |
732 | ObjCSelRefsHelper::initialize(); |
733 | for (Symbol *sym : symtab->getSymbols()) { |
734 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
735 | if (!defined->isLive()) |
736 | continue; |
737 | if (defined->overridesWeakDef) |
738 | addNonWeakDefinition(defined); |
739 | if (!defined->isAbsolute() && isCodeSection(defined->isec())) |
740 | in.unwindInfo->addSymbol(defined); |
741 | } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) { |
742 | // This branch intentionally doesn't check isLive(). |
743 | if (dysym->isDynamicLookup()) |
744 | continue; |
745 | dysym->getFile()->refState = |
746 | std::max(a: dysym->getFile()->refState, b: dysym->getRefState()); |
747 | } else if (isa<Undefined>(Val: sym)) { |
748 | if (ObjCStubsSection::isObjCStubSymbol(sym)) { |
749 | // When -dead_strip is enabled, we don't want to emit any dead stubs. |
750 | // Although this stub symbol is yet undefined, addSym() was called |
751 | // during MarkLive. |
752 | if (config->deadStrip) { |
753 | if (!sym->isLive()) |
754 | continue; |
755 | } |
756 | in.objcStubs->addEntry(sym); |
757 | } |
758 | } |
759 | } |
760 | |
761 | for (const InputFile *file : inputFiles) { |
762 | if (auto *objFile = dyn_cast<ObjFile>(Val: file)) |
763 | for (Symbol *sym : objFile->symbols) { |
764 | if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) { |
765 | if (!defined->isLive()) |
766 | continue; |
767 | if (!defined->isExternal() && !defined->isAbsolute() && |
768 | isCodeSection(defined->isec())) |
769 | in.unwindInfo->addSymbol(defined); |
770 | } |
771 | } |
772 | } |
773 | } |
774 | |
775 | // TODO: ld64 enforces the old load commands in a few other cases. |
776 | static bool useLCBuildVersion(const PlatformInfo &platformInfo) { |
777 | static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion = |
778 | {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)}, |
779 | {PLATFORM_IOS, VersionTuple(12, 0)}, |
780 | {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)}, |
781 | {PLATFORM_TVOS, VersionTuple(12, 0)}, |
782 | {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)}, |
783 | {PLATFORM_WATCHOS, VersionTuple(5, 0)}, |
784 | {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}}; |
785 | auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) { |
786 | return p.first == platformInfo.target.Platform; |
787 | }); |
788 | return it == minVersion.end() |
789 | ? true |
790 | : platformInfo.target.MinDeployment >= it->second; |
791 | } |
792 | |
793 | template <class LP> void Writer::createLoadCommands() { |
794 | uint8_t segIndex = 0; |
795 | for (OutputSegment *seg : outputSegments) { |
796 | in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); |
797 | seg->index = segIndex++; |
798 | } |
799 | |
800 | if (config->emitChainedFixups) { |
801 | in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups)); |
802 | in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports)); |
803 | } else { |
804 | in.header->addLoadCommand(make<LCDyldInfo>( |
805 | args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports)); |
806 | } |
807 | in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection)); |
808 | in.header->addLoadCommand( |
809 | make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection)); |
810 | if (!config->umbrella.empty()) |
811 | in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella)); |
812 | if (config->emitEncryptionInfo) |
813 | in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); |
814 | for (StringRef path : config->runtimePaths) |
815 | in.header->addLoadCommand(make<LCRPath>(args&: path)); |
816 | |
817 | switch (config->outputType) { |
818 | case MH_EXECUTE: |
819 | in.header->addLoadCommand(make<LCLoadDylinker>()); |
820 | break; |
821 | case MH_DYLIB: |
822 | in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName, |
823 | args&: config->dylibCompatibilityVersion, |
824 | args&: config->dylibCurrentVersion)); |
825 | break; |
826 | case MH_BUNDLE: |
827 | break; |
828 | default: |
829 | llvm_unreachable("unhandled output file type" ); |
830 | } |
831 | |
832 | if (config->generateUuid) { |
833 | uuidCommand = make<LCUuid>(); |
834 | in.header->addLoadCommand(uuidCommand); |
835 | } |
836 | |
837 | if (useLCBuildVersion(platformInfo: config->platformInfo)) |
838 | in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo)); |
839 | else |
840 | in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo)); |
841 | |
842 | if (config->secondaryPlatformInfo) { |
843 | in.header->addLoadCommand( |
844 | make<LCBuildVersion>(args&: *config->secondaryPlatformInfo)); |
845 | } |
846 | |
847 | // This is down here to match ld64's load command order. |
848 | if (config->outputType == MH_EXECUTE) |
849 | in.header->addLoadCommand(make<LCMain>()); |
850 | |
851 | // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding |
852 | // library ordinal computation code in ld64. |
853 | int64_t dylibOrdinal = 1; |
854 | DenseMap<StringRef, int64_t> ordinalForInstallName; |
855 | |
856 | std::vector<DylibFile *> dylibFiles; |
857 | for (InputFile *file : inputFiles) { |
858 | if (auto *dylibFile = dyn_cast<DylibFile>(Val: file)) |
859 | dylibFiles.push_back(x: dylibFile); |
860 | } |
861 | for (size_t i = 0; i < dylibFiles.size(); ++i) |
862 | dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(), |
863 | last: dylibFiles[i]->extraDylibs.end()); |
864 | |
865 | for (DylibFile *dylibFile : dylibFiles) { |
866 | if (dylibFile->isBundleLoader) { |
867 | dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; |
868 | // Shortcut since bundle-loader does not re-export the symbols. |
869 | |
870 | dylibFile->reexport = false; |
871 | continue; |
872 | } |
873 | |
874 | // Don't emit load commands for a dylib that is not referenced if: |
875 | // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- |
876 | // if it's on the linker command line, it's explicit) |
877 | // - or it's marked MH_DEAD_STRIPPABLE_DYLIB |
878 | // - or the flag -dead_strip_dylibs is used |
879 | // FIXME: `isReferenced()` is currently computed before dead code |
880 | // stripping, so references from dead code keep a dylib alive. This |
881 | // matches ld64, but it's something we should do better. |
882 | if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && |
883 | (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable || |
884 | config->deadStripDylibs)) |
885 | continue; |
886 | |
887 | // Several DylibFiles can have the same installName. Only emit a single |
888 | // load command for that installName and give all these DylibFiles the |
889 | // same ordinal. |
890 | // This can happen in several cases: |
891 | // - a new framework could change its installName to an older |
892 | // framework name via an $ld$ symbol depending on platform_version |
893 | // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; |
894 | // Foo.framework/Foo.tbd is usually a symlink to |
895 | // Foo.framework/Versions/Current/Foo.tbd, where |
896 | // Foo.framework/Versions/Current is usually a symlink to |
897 | // Foo.framework/Versions/A) |
898 | // - a framework can be linked both explicitly on the linker |
899 | // command line and implicitly as a reexport from a different |
900 | // framework. The re-export will usually point to the tbd file |
901 | // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will |
902 | // usually find Foo.framework/Foo.tbd. These are usually symlinks, |
903 | // but in a --reproduce archive they will be identical but distinct |
904 | // files. |
905 | // In the first case, *semantically distinct* DylibFiles will have the |
906 | // same installName. |
907 | int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; |
908 | if (ordinal) { |
909 | dylibFile->ordinal = ordinal; |
910 | continue; |
911 | } |
912 | |
913 | ordinal = dylibFile->ordinal = dylibOrdinal++; |
914 | LoadCommandType lcType = |
915 | dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak |
916 | ? LC_LOAD_WEAK_DYLIB |
917 | : LC_LOAD_DYLIB; |
918 | in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName, |
919 | args&: dylibFile->compatibilityVersion, |
920 | args&: dylibFile->currentVersion)); |
921 | |
922 | if (dylibFile->reexport) |
923 | in.header->addLoadCommand( |
924 | make<LCDylib>(args: LC_REEXPORT_DYLIB, args&: dylibFile->installName)); |
925 | } |
926 | |
927 | for (const auto &dyldEnv : config->dyldEnvs) |
928 | in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv)); |
929 | |
930 | if (functionStartsSection) |
931 | in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection)); |
932 | if (dataInCodeSection) |
933 | in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection)); |
934 | if (codeSignatureSection) |
935 | in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection)); |
936 | |
937 | const uint32_t MACOS_MAXPATHLEN = 1024; |
938 | config->headerPad = std::max( |
939 | a: config->headerPad, b: (config->headerPadMaxInstallNames |
940 | ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN |
941 | : 0)); |
942 | } |
943 | |
944 | // Sorting only can happen once all outputs have been collected. Here we sort |
945 | // segments, output sections within each segment, and input sections within each |
946 | // output segment. |
947 | static void sortSegmentsAndSections() { |
948 | TimeTraceScope timeScope("Sort segments and sections" ); |
949 | sortOutputSegments(); |
950 | |
951 | DenseMap<const InputSection *, size_t> isecPriorities = |
952 | priorityBuilder.buildInputSectionPriorities(); |
953 | |
954 | uint32_t sectionIndex = 0; |
955 | for (OutputSegment *seg : outputSegments) { |
956 | seg->sortOutputSections(); |
957 | // References from thread-local variable sections are treated as offsets |
958 | // relative to the start of the thread-local data memory area, which |
959 | // is initialized via copying all the TLV data sections (which are all |
960 | // contiguous). If later data sections require a greater alignment than |
961 | // earlier ones, the offsets of data within those sections won't be |
962 | // guaranteed to aligned unless we normalize alignments. We therefore use |
963 | // the largest alignment for all TLV data sections. |
964 | uint32_t tlvAlign = 0; |
965 | for (const OutputSection *osec : seg->getSections()) |
966 | if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign) |
967 | tlvAlign = osec->align; |
968 | |
969 | for (OutputSection *osec : seg->getSections()) { |
970 | // Now that the output sections are sorted, assign the final |
971 | // output section indices. |
972 | if (!osec->isHidden()) |
973 | osec->index = ++sectionIndex; |
974 | if (isThreadLocalData(flags: osec->flags)) { |
975 | if (!firstTLVDataSection) |
976 | firstTLVDataSection = osec; |
977 | osec->align = tlvAlign; |
978 | } |
979 | |
980 | if (!isecPriorities.empty()) { |
981 | if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) { |
982 | llvm::stable_sort( |
983 | Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) { |
984 | return isecPriorities.lookup(Val: a) > isecPriorities.lookup(Val: b); |
985 | }); |
986 | } |
987 | } |
988 | } |
989 | } |
990 | } |
991 | |
992 | template <class LP> void Writer::createOutputSections() { |
993 | TimeTraceScope timeScope("Create output sections" ); |
994 | // First, create hidden sections |
995 | stringTableSection = make<StringTableSection>(); |
996 | symtabSection = makeSymtabSection<LP>(*stringTableSection); |
997 | indirectSymtabSection = make<IndirectSymtabSection>(); |
998 | if (config->adhocCodesign) |
999 | codeSignatureSection = make<CodeSignatureSection>(); |
1000 | if (config->emitDataInCodeInfo) |
1001 | dataInCodeSection = make<DataInCodeSection>(); |
1002 | if (config->emitFunctionStarts) |
1003 | functionStartsSection = make<FunctionStartsSection>(); |
1004 | |
1005 | switch (config->outputType) { |
1006 | case MH_EXECUTE: |
1007 | make<PageZeroSection>(); |
1008 | break; |
1009 | case MH_DYLIB: |
1010 | case MH_BUNDLE: |
1011 | break; |
1012 | default: |
1013 | llvm_unreachable("unhandled output file type" ); |
1014 | } |
1015 | |
1016 | // Then add input sections to output sections. |
1017 | for (ConcatInputSection *isec : inputSections) { |
1018 | if (isec->shouldOmitFromOutput()) |
1019 | continue; |
1020 | ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent); |
1021 | osec->addInput(input: isec); |
1022 | osec->inputOrder = |
1023 | std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff)); |
1024 | } |
1025 | |
1026 | // Once all the inputs are added, we can finalize the output section |
1027 | // properties and create the corresponding output segments. |
1028 | for (const auto &it : concatOutputSections) { |
1029 | StringRef segname = it.first.first; |
1030 | ConcatOutputSection *osec = it.second; |
1031 | assert(segname != segment_names::ld); |
1032 | if (osec->isNeeded()) { |
1033 | // See comment in ObjFile::splitEhFrames() |
1034 | if (osec->name == section_names::ehFrame && |
1035 | segname == segment_names::text) |
1036 | osec->align = target->wordSize; |
1037 | |
1038 | // MC keeps the default 1-byte alignment for __thread_vars, even though it |
1039 | // contains pointers that are fixed up by dyld, which requires proper |
1040 | // alignment. |
1041 | if (isThreadLocalVariables(flags: osec->flags)) |
1042 | osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize); |
1043 | |
1044 | getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec); |
1045 | } |
1046 | } |
1047 | |
1048 | for (SyntheticSection *ssec : syntheticSections) { |
1049 | auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name}); |
1050 | // We add all LinkEdit sections here because we don't know if they are |
1051 | // needed until their finalizeContents() methods get called later. While |
1052 | // this means that we add some redundant sections to __LINKEDIT, there is |
1053 | // is no redundancy in the output, as we do not emit section headers for |
1054 | // any LinkEdit sections. |
1055 | if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) { |
1056 | if (it == concatOutputSections.end()) { |
1057 | getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec); |
1058 | } else { |
1059 | fatal(msg: "section from " + |
1060 | toString(file: it->second->firstSection()->getFile()) + |
1061 | " conflicts with synthetic section " + ssec->segname + "," + |
1062 | ssec->name); |
1063 | } |
1064 | } |
1065 | } |
1066 | |
1067 | // dyld requires __LINKEDIT segment to always exist (even if empty). |
1068 | linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit); |
1069 | } |
1070 | |
1071 | void Writer::finalizeAddresses() { |
1072 | TimeTraceScope timeScope("Finalize addresses" ); |
1073 | uint64_t pageSize = target->getPageSize(); |
1074 | |
1075 | // We could parallelize this loop, but local benchmarking indicates it is |
1076 | // faster to do it all in the main thread. |
1077 | for (OutputSegment *seg : outputSegments) { |
1078 | if (seg == linkEditSegment) |
1079 | continue; |
1080 | for (OutputSection *osec : seg->getSections()) { |
1081 | if (!osec->isNeeded()) |
1082 | continue; |
1083 | // Other kinds of OutputSections have already been finalized. |
1084 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) |
1085 | concatOsec->finalizeContents(); |
1086 | } |
1087 | } |
1088 | |
1089 | // Ensure that segments (and the sections they contain) are allocated |
1090 | // addresses in ascending order, which dyld requires. |
1091 | // |
1092 | // Note that at this point, __LINKEDIT sections are empty, but we need to |
1093 | // determine addresses of other segments/sections before generating its |
1094 | // contents. |
1095 | for (OutputSegment *seg : outputSegments) { |
1096 | if (seg == linkEditSegment) |
1097 | continue; |
1098 | seg->addr = addr; |
1099 | assignAddresses(seg); |
1100 | // codesign / libstuff checks for segment ordering by verifying that |
1101 | // `fileOff + fileSize == next segment fileOff`. So we call |
1102 | // alignToPowerOf2() before (instead of after) computing fileSize to ensure |
1103 | // that the segments are contiguous. We handle addr / vmSize similarly for |
1104 | // the same reason. |
1105 | fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize); |
1106 | addr = alignToPowerOf2(Value: addr, Align: pageSize); |
1107 | seg->vmSize = addr - seg->addr; |
1108 | seg->fileSize = fileOff - seg->fileOff; |
1109 | seg->assignAddressesToStartEndSymbols(); |
1110 | } |
1111 | } |
1112 | |
1113 | void Writer::finalizeLinkEditSegment() { |
1114 | TimeTraceScope timeScope("Finalize __LINKEDIT segment" ); |
1115 | // Fill __LINKEDIT contents. |
1116 | std::array<LinkEditSection *, 10> linkEditSections{ |
1117 | in.rebase, in.binding, |
1118 | in.weakBinding, in.lazyBinding, |
1119 | in.exports, in.chainedFixups, |
1120 | symtabSection, indirectSymtabSection, |
1121 | dataInCodeSection, functionStartsSection, |
1122 | }; |
1123 | |
1124 | parallelForEach(Begin: linkEditSections.begin(), End: linkEditSections.end(), |
1125 | Fn: [](LinkEditSection *osec) { |
1126 | if (osec) |
1127 | osec->finalizeContents(); |
1128 | }); |
1129 | |
1130 | // Now that __LINKEDIT is filled out, do a proper calculation of its |
1131 | // addresses and offsets. |
1132 | linkEditSegment->addr = addr; |
1133 | assignAddresses(linkEditSegment); |
1134 | // No need to page-align fileOff / addr here since this is the last segment. |
1135 | linkEditSegment->vmSize = addr - linkEditSegment->addr; |
1136 | linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; |
1137 | } |
1138 | |
1139 | void Writer::assignAddresses(OutputSegment *seg) { |
1140 | seg->fileOff = fileOff; |
1141 | |
1142 | for (OutputSection *osec : seg->getSections()) { |
1143 | if (!osec->isNeeded()) |
1144 | continue; |
1145 | addr = alignToPowerOf2(Value: addr, Align: osec->align); |
1146 | fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align); |
1147 | osec->addr = addr; |
1148 | osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff; |
1149 | osec->finalize(); |
1150 | osec->assignAddressesToStartEndSymbols(); |
1151 | |
1152 | addr += osec->getSize(); |
1153 | fileOff += osec->getFileSize(); |
1154 | } |
1155 | } |
1156 | |
1157 | void Writer::openFile() { |
1158 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1159 | FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff, |
1160 | Flags: FileOutputBuffer::F_executable); |
1161 | |
1162 | if (!bufferOrErr) |
1163 | fatal(msg: "failed to open " + config->outputFile + ": " + |
1164 | llvm::toString(E: bufferOrErr.takeError())); |
1165 | buffer = std::move(*bufferOrErr); |
1166 | in.bufferStart = buffer->getBufferStart(); |
1167 | } |
1168 | |
1169 | void Writer::writeSections() { |
1170 | TimeTraceScope timeScope("Write output sections" ); |
1171 | |
1172 | uint8_t *buf = buffer->getBufferStart(); |
1173 | std::vector<const OutputSection *> osecs; |
1174 | for (const OutputSegment *seg : outputSegments) |
1175 | append_range(C&: osecs, R: seg->getSections()); |
1176 | |
1177 | parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) { |
1178 | osec->writeTo(buf: buf + osec->fileOff); |
1179 | }); |
1180 | } |
1181 | |
1182 | void Writer::applyOptimizationHints() { |
1183 | if (config->arch() != AK_arm64 || config->ignoreOptimizationHints) |
1184 | return; |
1185 | |
1186 | uint8_t *buf = buffer->getBufferStart(); |
1187 | TimeTraceScope timeScope("Apply linker optimization hints" ); |
1188 | parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) { |
1189 | if (const auto *objFile = dyn_cast<ObjFile>(Val: file)) |
1190 | target->applyOptimizationHints(buf, *objFile); |
1191 | }); |
1192 | } |
1193 | |
1194 | // In order to utilize multiple cores, we first split the buffer into chunks, |
1195 | // compute a hash for each chunk, and then compute a hash value of the hash |
1196 | // values. |
1197 | void Writer::writeUuid() { |
1198 | TimeTraceScope timeScope("Computing UUID" ); |
1199 | |
1200 | ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; |
1201 | std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024); |
1202 | |
1203 | // Leave one slot for filename |
1204 | std::vector<uint64_t> hashes(chunks.size() + 1); |
1205 | parallelFor(Begin: 0, End: chunks.size(), |
1206 | Fn: [&](size_t i) { hashes[i] = xxh3_64bits(data: chunks[i]); }); |
1207 | // Append the output filename so that identical binaries with different names |
1208 | // don't get the same UUID. |
1209 | hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput)); |
1210 | |
1211 | uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()), |
1212 | hashes.size() * sizeof(uint64_t)}); |
1213 | uuidCommand->writeUuid(digest); |
1214 | } |
1215 | |
1216 | // This is step 5 of the algorithm described in the class comment of |
1217 | // ChainedFixupsSection. |
1218 | void Writer::buildFixupChains() { |
1219 | if (!config->emitChainedFixups) |
1220 | return; |
1221 | |
1222 | const std::vector<Location> &loc = in.chainedFixups->getLocations(); |
1223 | if (loc.empty()) |
1224 | return; |
1225 | |
1226 | TimeTraceScope timeScope("Build fixup chains" ); |
1227 | |
1228 | const uint64_t pageSize = target->getPageSize(); |
1229 | constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64 |
1230 | |
1231 | for (size_t i = 0, count = loc.size(); i < count;) { |
1232 | const OutputSegment *oseg = loc[i].isec->parent->parent; |
1233 | uint8_t *buf = buffer->getBufferStart() + oseg->fileOff; |
1234 | uint64_t pageIdx = loc[i].offset / pageSize; |
1235 | ++i; |
1236 | |
1237 | while (i < count && loc[i].isec->parent->parent == oseg && |
1238 | (loc[i].offset / pageSize) == pageIdx) { |
1239 | uint64_t offset = loc[i].offset - loc[i - 1].offset; |
1240 | |
1241 | auto fail = [&](Twine message) { |
1242 | error(msg: loc[i].isec->getSegName() + "," + loc[i].isec->getName() + |
1243 | ", offset " + |
1244 | Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) + |
1245 | ": " + message); |
1246 | }; |
1247 | |
1248 | if (offset < target->wordSize) |
1249 | return fail("fixups overlap" ); |
1250 | if (offset % stride != 0) |
1251 | return fail( |
1252 | "fixups are unaligned (offset " + Twine(offset) + |
1253 | " is not a multiple of the stride). Re-link with -no_fixup_chains" ); |
1254 | |
1255 | // The "next" field is in the same location for bind and rebase entries. |
1256 | reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset) |
1257 | ->next = offset / stride; |
1258 | ++i; |
1259 | } |
1260 | } |
1261 | } |
1262 | |
1263 | void Writer::writeCodeSignature() { |
1264 | if (codeSignatureSection) { |
1265 | TimeTraceScope timeScope("Write code signature" ); |
1266 | codeSignatureSection->writeHashes(buf: buffer->getBufferStart()); |
1267 | } |
1268 | } |
1269 | |
1270 | void Writer::writeOutputFile() { |
1271 | TimeTraceScope timeScope("Write output file" ); |
1272 | openFile(); |
1273 | reportPendingUndefinedSymbols(); |
1274 | if (errorCount()) |
1275 | return; |
1276 | writeSections(); |
1277 | applyOptimizationHints(); |
1278 | buildFixupChains(); |
1279 | if (config->generateUuid) |
1280 | writeUuid(); |
1281 | writeCodeSignature(); |
1282 | |
1283 | if (auto e = buffer->commit()) |
1284 | fatal(msg: "failed to write output '" + buffer->getPath() + |
1285 | "': " + toString(E: std::move(e))); |
1286 | } |
1287 | |
1288 | template <class LP> void Writer::run() { |
1289 | treatSpecialUndefineds(); |
1290 | if (config->entry && needsBinding(sym: config->entry)) |
1291 | in.stubs->addEntry(config->entry); |
1292 | |
1293 | // Canonicalization of all pointers to InputSections should be handled by |
1294 | // these two scan* methods. I.e. from this point onward, for all live |
1295 | // InputSections, we should have `isec->canonical() == isec`. |
1296 | scanSymbols(); |
1297 | if (in.objcStubs->isNeeded()) |
1298 | in.objcStubs->setUp(); |
1299 | if (in.objcMethList->isNeeded()) |
1300 | in.objcMethList->setUp(); |
1301 | scanRelocations(); |
1302 | if (in.initOffsets->isNeeded()) |
1303 | in.initOffsets->setUp(); |
1304 | |
1305 | // Do not proceed if there were undefined or duplicate symbols. |
1306 | reportPendingUndefinedSymbols(); |
1307 | reportPendingDuplicateSymbols(); |
1308 | if (errorCount()) |
1309 | return; |
1310 | |
1311 | if (in.stubHelper && in.stubHelper->isNeeded()) |
1312 | in.stubHelper->setUp(); |
1313 | |
1314 | if (in.objCImageInfo->isNeeded()) |
1315 | in.objCImageInfo->finalizeContents(); |
1316 | |
1317 | // At this point, we should know exactly which output sections are needed, |
1318 | // courtesy of scanSymbols() and scanRelocations(). |
1319 | createOutputSections<LP>(); |
1320 | |
1321 | // After this point, we create no new segments; HOWEVER, we might |
1322 | // yet create branch-range extension thunks for architectures whose |
1323 | // hardware call instructions have limited range, e.g., ARM(64). |
1324 | // The thunks are created as InputSections interspersed among |
1325 | // the ordinary __TEXT,_text InputSections. |
1326 | sortSegmentsAndSections(); |
1327 | createLoadCommands<LP>(); |
1328 | finalizeAddresses(); |
1329 | |
1330 | llvm::thread mapFileWriter([&] { |
1331 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1332 | timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile" ); |
1333 | writeMapFile(); |
1334 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1335 | timeTraceProfilerFinishThread(); |
1336 | }); |
1337 | |
1338 | finalizeLinkEditSegment(); |
1339 | writeOutputFile(); |
1340 | mapFileWriter.join(); |
1341 | } |
1342 | |
1343 | template <class LP> void macho::writeResult() { Writer().run<LP>(); } |
1344 | |
1345 | void macho::resetWriter() { LCDylib::resetInstanceCount(); } |
1346 | |
1347 | void macho::createSyntheticSections() { |
1348 | in.header = make<MachHeaderSection>(); |
1349 | if (config->dedupStrings) |
1350 | in.cStringSection = |
1351 | make<DeduplicatedCStringSection>(args: section_names::cString); |
1352 | else |
1353 | in.cStringSection = make<CStringSection>(args: section_names::cString); |
1354 | in.objcMethnameSection = |
1355 | make<DeduplicatedCStringSection>(args: section_names::objcMethname); |
1356 | in.wordLiteralSection = make<WordLiteralSection>(); |
1357 | if (config->emitChainedFixups) { |
1358 | in.chainedFixups = make<ChainedFixupsSection>(); |
1359 | } else { |
1360 | in.rebase = make<RebaseSection>(); |
1361 | in.binding = make<BindingSection>(); |
1362 | in.weakBinding = make<WeakBindingSection>(); |
1363 | in.lazyBinding = make<LazyBindingSection>(); |
1364 | in.lazyPointers = make<LazyPointerSection>(); |
1365 | in.stubHelper = make<StubHelperSection>(); |
1366 | } |
1367 | in.exports = make<ExportSection>(); |
1368 | in.got = make<GotSection>(); |
1369 | in.tlvPointers = make<TlvPointerSection>(); |
1370 | in.stubs = make<StubsSection>(); |
1371 | in.objcStubs = make<ObjCStubsSection>(); |
1372 | in.unwindInfo = makeUnwindInfoSection(); |
1373 | in.objCImageInfo = make<ObjCImageInfoSection>(); |
1374 | in.initOffsets = make<InitOffsetsSection>(); |
1375 | in.objcMethList = make<ObjCMethListSection>(); |
1376 | |
1377 | // This section contains space for just a single word, and will be used by |
1378 | // dyld to cache an address to the image loader it uses. |
1379 | uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize); |
1380 | memset(s: arr, c: 0, n: target->wordSize); |
1381 | in.imageLoaderCache = makeSyntheticInputSection( |
1382 | segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR, |
1383 | data: ArrayRef<uint8_t>{arr, target->wordSize}, |
1384 | /*align=*/target->wordSize); |
1385 | assert(in.imageLoaderCache->live); |
1386 | } |
1387 | |
1388 | OutputSection *macho::firstTLVDataSection = nullptr; |
1389 | |
1390 | template void macho::writeResult<LP64>(); |
1391 | template void macho::writeResult<ILP32>(); |
1392 | |