| 1 | //===- Writer.cpp ---------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "Writer.h" |
| 10 | #include "ConcatOutputSection.h" |
| 11 | #include "Config.h" |
| 12 | #include "InputFiles.h" |
| 13 | #include "InputSection.h" |
| 14 | #include "MapFile.h" |
| 15 | #include "OutputSection.h" |
| 16 | #include "OutputSegment.h" |
| 17 | #include "SectionPriorities.h" |
| 18 | #include "SymbolTable.h" |
| 19 | #include "Symbols.h" |
| 20 | #include "SyntheticSections.h" |
| 21 | #include "Target.h" |
| 22 | #include "UnwindInfoSection.h" |
| 23 | |
| 24 | #include "lld/Common/Arrays.h" |
| 25 | #include "lld/Common/CommonLinkerContext.h" |
| 26 | #include "llvm/BinaryFormat/MachO.h" |
| 27 | #include "llvm/Config/llvm-config.h" |
| 28 | #include "llvm/Support/Parallel.h" |
| 29 | #include "llvm/Support/Path.h" |
| 30 | #include "llvm/Support/TimeProfiler.h" |
| 31 | #include "llvm/Support/thread.h" |
| 32 | #include "llvm/Support/xxhash.h" |
| 33 | |
| 34 | #include <algorithm> |
| 35 | |
| 36 | using namespace llvm; |
| 37 | using namespace llvm::MachO; |
| 38 | using namespace llvm::sys; |
| 39 | using namespace lld; |
| 40 | using namespace lld::macho; |
| 41 | |
| 42 | namespace { |
| 43 | class LCUuid; |
| 44 | |
| 45 | class Writer { |
| 46 | public: |
| 47 | Writer() : buffer(errorHandler().outputBuffer) {} |
| 48 | |
| 49 | void treatSpecialUndefineds(); |
| 50 | void scanRelocations(); |
| 51 | void scanSymbols(); |
| 52 | template <class LP> void createOutputSections(); |
| 53 | template <class LP> void createLoadCommands(); |
| 54 | void finalizeAddresses(); |
| 55 | void finalizeLinkEditSegment(); |
| 56 | void assignAddresses(OutputSegment *); |
| 57 | |
| 58 | void openFile(); |
| 59 | void writeSections(); |
| 60 | void applyOptimizationHints(); |
| 61 | void buildFixupChains(); |
| 62 | void writeUuid(); |
| 63 | void writeCodeSignature(); |
| 64 | void writeOutputFile(); |
| 65 | |
| 66 | template <class LP> void run(); |
| 67 | |
| 68 | std::unique_ptr<FileOutputBuffer> &buffer; |
| 69 | uint64_t addr = 0; |
| 70 | uint64_t fileOff = 0; |
| 71 | MachHeaderSection * = nullptr; |
| 72 | StringTableSection *stringTableSection = nullptr; |
| 73 | SymtabSection *symtabSection = nullptr; |
| 74 | IndirectSymtabSection *indirectSymtabSection = nullptr; |
| 75 | CodeSignatureSection *codeSignatureSection = nullptr; |
| 76 | DataInCodeSection *dataInCodeSection = nullptr; |
| 77 | FunctionStartsSection *functionStartsSection = nullptr; |
| 78 | |
| 79 | LCUuid *uuidCommand = nullptr; |
| 80 | OutputSegment *linkEditSegment = nullptr; |
| 81 | }; |
| 82 | |
| 83 | // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. |
| 84 | class LCDyldInfo final : public LoadCommand { |
| 85 | public: |
| 86 | LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, |
| 87 | WeakBindingSection *weakBindingSection, |
| 88 | LazyBindingSection *lazyBindingSection, |
| 89 | ExportSection *exportSection) |
| 90 | : rebaseSection(rebaseSection), bindingSection(bindingSection), |
| 91 | weakBindingSection(weakBindingSection), |
| 92 | lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} |
| 93 | |
| 94 | uint32_t getSize() const override { return sizeof(dyld_info_command); } |
| 95 | |
| 96 | void writeTo(uint8_t *buf) const override { |
| 97 | auto *c = reinterpret_cast<dyld_info_command *>(buf); |
| 98 | c->cmd = LC_DYLD_INFO_ONLY; |
| 99 | c->cmdsize = getSize(); |
| 100 | if (rebaseSection->isNeeded()) { |
| 101 | c->rebase_off = rebaseSection->fileOff; |
| 102 | c->rebase_size = rebaseSection->getFileSize(); |
| 103 | } |
| 104 | if (bindingSection->isNeeded()) { |
| 105 | c->bind_off = bindingSection->fileOff; |
| 106 | c->bind_size = bindingSection->getFileSize(); |
| 107 | } |
| 108 | if (weakBindingSection->isNeeded()) { |
| 109 | c->weak_bind_off = weakBindingSection->fileOff; |
| 110 | c->weak_bind_size = weakBindingSection->getFileSize(); |
| 111 | } |
| 112 | if (lazyBindingSection->isNeeded()) { |
| 113 | c->lazy_bind_off = lazyBindingSection->fileOff; |
| 114 | c->lazy_bind_size = lazyBindingSection->getFileSize(); |
| 115 | } |
| 116 | if (exportSection->isNeeded()) { |
| 117 | c->export_off = exportSection->fileOff; |
| 118 | c->export_size = exportSection->getFileSize(); |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | RebaseSection *rebaseSection; |
| 123 | BindingSection *bindingSection; |
| 124 | WeakBindingSection *weakBindingSection; |
| 125 | LazyBindingSection *lazyBindingSection; |
| 126 | ExportSection *exportSection; |
| 127 | }; |
| 128 | |
| 129 | class LCSubFramework final : public LoadCommand { |
| 130 | public: |
| 131 | LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} |
| 132 | |
| 133 | uint32_t getSize() const override { |
| 134 | return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1, |
| 135 | Align: target->wordSize); |
| 136 | } |
| 137 | |
| 138 | void writeTo(uint8_t *buf) const override { |
| 139 | auto *c = reinterpret_cast<sub_framework_command *>(buf); |
| 140 | buf += sizeof(sub_framework_command); |
| 141 | |
| 142 | c->cmd = LC_SUB_FRAMEWORK; |
| 143 | c->cmdsize = getSize(); |
| 144 | c->umbrella = sizeof(sub_framework_command); |
| 145 | |
| 146 | memcpy(dest: buf, src: umbrella.data(), n: umbrella.size()); |
| 147 | buf[umbrella.size()] = '\0'; |
| 148 | } |
| 149 | |
| 150 | private: |
| 151 | const StringRef umbrella; |
| 152 | }; |
| 153 | |
| 154 | class LCFunctionStarts final : public LoadCommand { |
| 155 | public: |
| 156 | explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) |
| 157 | : functionStartsSection(functionStartsSection) {} |
| 158 | |
| 159 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
| 160 | |
| 161 | void writeTo(uint8_t *buf) const override { |
| 162 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
| 163 | c->cmd = LC_FUNCTION_STARTS; |
| 164 | c->cmdsize = getSize(); |
| 165 | c->dataoff = functionStartsSection->fileOff; |
| 166 | c->datasize = functionStartsSection->getFileSize(); |
| 167 | } |
| 168 | |
| 169 | private: |
| 170 | FunctionStartsSection *functionStartsSection; |
| 171 | }; |
| 172 | |
| 173 | class LCDataInCode final : public LoadCommand { |
| 174 | public: |
| 175 | explicit LCDataInCode(DataInCodeSection *dataInCodeSection) |
| 176 | : dataInCodeSection(dataInCodeSection) {} |
| 177 | |
| 178 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
| 179 | |
| 180 | void writeTo(uint8_t *buf) const override { |
| 181 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
| 182 | c->cmd = LC_DATA_IN_CODE; |
| 183 | c->cmdsize = getSize(); |
| 184 | c->dataoff = dataInCodeSection->fileOff; |
| 185 | c->datasize = dataInCodeSection->getFileSize(); |
| 186 | } |
| 187 | |
| 188 | private: |
| 189 | DataInCodeSection *dataInCodeSection; |
| 190 | }; |
| 191 | |
| 192 | class LCDysymtab final : public LoadCommand { |
| 193 | public: |
| 194 | LCDysymtab(SymtabSection *symtabSection, |
| 195 | IndirectSymtabSection *indirectSymtabSection) |
| 196 | : symtabSection(symtabSection), |
| 197 | indirectSymtabSection(indirectSymtabSection) {} |
| 198 | |
| 199 | uint32_t getSize() const override { return sizeof(dysymtab_command); } |
| 200 | |
| 201 | void writeTo(uint8_t *buf) const override { |
| 202 | auto *c = reinterpret_cast<dysymtab_command *>(buf); |
| 203 | c->cmd = LC_DYSYMTAB; |
| 204 | c->cmdsize = getSize(); |
| 205 | |
| 206 | c->ilocalsym = 0; |
| 207 | c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); |
| 208 | c->nextdefsym = symtabSection->getNumExternalSymbols(); |
| 209 | c->iundefsym = c->iextdefsym + c->nextdefsym; |
| 210 | c->nundefsym = symtabSection->getNumUndefinedSymbols(); |
| 211 | |
| 212 | c->indirectsymoff = indirectSymtabSection->fileOff; |
| 213 | c->nindirectsyms = indirectSymtabSection->getNumSymbols(); |
| 214 | } |
| 215 | |
| 216 | SymtabSection *symtabSection; |
| 217 | IndirectSymtabSection *indirectSymtabSection; |
| 218 | }; |
| 219 | |
| 220 | template <class LP> class LCSegment final : public LoadCommand { |
| 221 | public: |
| 222 | LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} |
| 223 | |
| 224 | uint32_t getSize() const override { |
| 225 | return sizeof(typename LP::segment_command) + |
| 226 | seg->numNonHiddenSections() * sizeof(typename LP::section); |
| 227 | } |
| 228 | |
| 229 | void writeTo(uint8_t *buf) const override { |
| 230 | using SegmentCommand = typename LP::segment_command; |
| 231 | using = typename LP::section; |
| 232 | |
| 233 | auto *c = reinterpret_cast<SegmentCommand *>(buf); |
| 234 | buf += sizeof(SegmentCommand); |
| 235 | |
| 236 | c->cmd = LP::segmentLCType; |
| 237 | c->cmdsize = getSize(); |
| 238 | memcpy(c->segname, name.data(), name.size()); |
| 239 | c->fileoff = seg->fileOff; |
| 240 | c->maxprot = seg->maxProt; |
| 241 | c->initprot = seg->initProt; |
| 242 | |
| 243 | c->vmaddr = seg->addr; |
| 244 | c->vmsize = seg->vmSize; |
| 245 | c->filesize = seg->fileSize; |
| 246 | c->nsects = seg->numNonHiddenSections(); |
| 247 | c->flags = seg->flags; |
| 248 | |
| 249 | for (const OutputSection *osec : seg->getSections()) { |
| 250 | if (osec->isHidden()) |
| 251 | continue; |
| 252 | |
| 253 | auto *sectHdr = reinterpret_cast<SectionHeader *>(buf); |
| 254 | buf += sizeof(SectionHeader); |
| 255 | |
| 256 | memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); |
| 257 | memcpy(sectHdr->segname, name.data(), name.size()); |
| 258 | |
| 259 | sectHdr->addr = osec->addr; |
| 260 | sectHdr->offset = osec->fileOff; |
| 261 | sectHdr->align = Log2_32(Value: osec->align); |
| 262 | sectHdr->flags = osec->flags; |
| 263 | sectHdr->size = osec->getSize(); |
| 264 | sectHdr->reserved1 = osec->reserved1; |
| 265 | sectHdr->reserved2 = osec->reserved2; |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | private: |
| 270 | StringRef name; |
| 271 | OutputSegment *seg; |
| 272 | }; |
| 273 | |
| 274 | class LCMain final : public LoadCommand { |
| 275 | uint32_t getSize() const override { |
| 276 | return sizeof(structs::entry_point_command); |
| 277 | } |
| 278 | |
| 279 | void writeTo(uint8_t *buf) const override { |
| 280 | auto *c = reinterpret_cast<structs::entry_point_command *>(buf); |
| 281 | c->cmd = LC_MAIN; |
| 282 | c->cmdsize = getSize(); |
| 283 | |
| 284 | if (config->entry->isInStubs()) |
| 285 | c->entryoff = |
| 286 | in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; |
| 287 | else |
| 288 | c->entryoff = config->entry->getVA() - in.header->addr; |
| 289 | |
| 290 | c->stacksize = 0; |
| 291 | } |
| 292 | }; |
| 293 | |
| 294 | class LCSymtab final : public LoadCommand { |
| 295 | public: |
| 296 | LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) |
| 297 | : symtabSection(symtabSection), stringTableSection(stringTableSection) {} |
| 298 | |
| 299 | uint32_t getSize() const override { return sizeof(symtab_command); } |
| 300 | |
| 301 | void writeTo(uint8_t *buf) const override { |
| 302 | auto *c = reinterpret_cast<symtab_command *>(buf); |
| 303 | c->cmd = LC_SYMTAB; |
| 304 | c->cmdsize = getSize(); |
| 305 | c->symoff = symtabSection->fileOff; |
| 306 | c->nsyms = symtabSection->getNumSymbols(); |
| 307 | c->stroff = stringTableSection->fileOff; |
| 308 | c->strsize = stringTableSection->getFileSize(); |
| 309 | } |
| 310 | |
| 311 | SymtabSection *symtabSection = nullptr; |
| 312 | StringTableSection *stringTableSection = nullptr; |
| 313 | }; |
| 314 | |
| 315 | // There are several dylib load commands that share the same structure: |
| 316 | // * LC_LOAD_DYLIB |
| 317 | // * LC_ID_DYLIB |
| 318 | // * LC_REEXPORT_DYLIB |
| 319 | class LCDylib final : public LoadCommand { |
| 320 | public: |
| 321 | LCDylib(LoadCommandType type, StringRef path, |
| 322 | uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) |
| 323 | : type(type), path(path), compatibilityVersion(compatibilityVersion), |
| 324 | currentVersion(currentVersion) { |
| 325 | instanceCount++; |
| 326 | } |
| 327 | |
| 328 | uint32_t getSize() const override { |
| 329 | return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1, |
| 330 | Align: target->wordSize); |
| 331 | } |
| 332 | |
| 333 | void writeTo(uint8_t *buf) const override { |
| 334 | auto *c = reinterpret_cast<dylib_command *>(buf); |
| 335 | buf += sizeof(dylib_command); |
| 336 | |
| 337 | c->cmd = type; |
| 338 | c->cmdsize = getSize(); |
| 339 | c->dylib.name = sizeof(dylib_command); |
| 340 | c->dylib.timestamp = 0; |
| 341 | c->dylib.compatibility_version = compatibilityVersion; |
| 342 | c->dylib.current_version = currentVersion; |
| 343 | |
| 344 | memcpy(dest: buf, src: path.data(), n: path.size()); |
| 345 | buf[path.size()] = '\0'; |
| 346 | } |
| 347 | |
| 348 | static uint32_t getInstanceCount() { return instanceCount; } |
| 349 | static void resetInstanceCount() { instanceCount = 0; } |
| 350 | |
| 351 | private: |
| 352 | LoadCommandType type; |
| 353 | StringRef path; |
| 354 | uint32_t compatibilityVersion; |
| 355 | uint32_t currentVersion; |
| 356 | static uint32_t instanceCount; |
| 357 | }; |
| 358 | |
| 359 | uint32_t LCDylib::instanceCount = 0; |
| 360 | |
| 361 | class LCLoadDylinker final : public LoadCommand { |
| 362 | public: |
| 363 | uint32_t getSize() const override { |
| 364 | return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1, |
| 365 | Align: target->wordSize); |
| 366 | } |
| 367 | |
| 368 | void writeTo(uint8_t *buf) const override { |
| 369 | auto *c = reinterpret_cast<dylinker_command *>(buf); |
| 370 | buf += sizeof(dylinker_command); |
| 371 | |
| 372 | c->cmd = LC_LOAD_DYLINKER; |
| 373 | c->cmdsize = getSize(); |
| 374 | c->name = sizeof(dylinker_command); |
| 375 | |
| 376 | memcpy(dest: buf, src: path.data(), n: path.size()); |
| 377 | buf[path.size()] = '\0'; |
| 378 | } |
| 379 | |
| 380 | private: |
| 381 | // Recent versions of Darwin won't run any binary that has dyld at a |
| 382 | // different location. |
| 383 | const StringRef path = "/usr/lib/dyld" ; |
| 384 | }; |
| 385 | |
| 386 | class LCRPath final : public LoadCommand { |
| 387 | public: |
| 388 | explicit LCRPath(StringRef path) : path(path) {} |
| 389 | |
| 390 | uint32_t getSize() const override { |
| 391 | return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1, |
| 392 | Align: target->wordSize); |
| 393 | } |
| 394 | |
| 395 | void writeTo(uint8_t *buf) const override { |
| 396 | auto *c = reinterpret_cast<rpath_command *>(buf); |
| 397 | buf += sizeof(rpath_command); |
| 398 | |
| 399 | c->cmd = LC_RPATH; |
| 400 | c->cmdsize = getSize(); |
| 401 | c->path = sizeof(rpath_command); |
| 402 | |
| 403 | memcpy(dest: buf, src: path.data(), n: path.size()); |
| 404 | buf[path.size()] = '\0'; |
| 405 | } |
| 406 | |
| 407 | private: |
| 408 | StringRef path; |
| 409 | }; |
| 410 | |
| 411 | class LCSubClient final : public LoadCommand { |
| 412 | public: |
| 413 | explicit LCSubClient(StringRef client) : client(client) {} |
| 414 | |
| 415 | uint32_t getSize() const override { |
| 416 | return alignToPowerOf2(Value: sizeof(sub_client_command) + client.size() + 1, |
| 417 | Align: target->wordSize); |
| 418 | } |
| 419 | |
| 420 | void writeTo(uint8_t *buf) const override { |
| 421 | auto *c = reinterpret_cast<sub_client_command *>(buf); |
| 422 | buf += sizeof(sub_client_command); |
| 423 | |
| 424 | c->cmd = LC_SUB_CLIENT; |
| 425 | c->cmdsize = getSize(); |
| 426 | c->client = sizeof(sub_client_command); |
| 427 | |
| 428 | memcpy(dest: buf, src: client.data(), n: client.size()); |
| 429 | buf[client.size()] = '\0'; |
| 430 | } |
| 431 | |
| 432 | private: |
| 433 | StringRef client; |
| 434 | }; |
| 435 | |
| 436 | class LCDyldEnv final : public LoadCommand { |
| 437 | public: |
| 438 | explicit LCDyldEnv(StringRef name) : name(name) {} |
| 439 | |
| 440 | uint32_t getSize() const override { |
| 441 | return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1, |
| 442 | Align: target->wordSize); |
| 443 | } |
| 444 | |
| 445 | void writeTo(uint8_t *buf) const override { |
| 446 | auto *c = reinterpret_cast<dyld_env_command *>(buf); |
| 447 | buf += sizeof(dyld_env_command); |
| 448 | |
| 449 | c->cmd = LC_DYLD_ENVIRONMENT; |
| 450 | c->cmdsize = getSize(); |
| 451 | c->name = sizeof(dyld_env_command); |
| 452 | |
| 453 | memcpy(dest: buf, src: name.data(), n: name.size()); |
| 454 | buf[name.size()] = '\0'; |
| 455 | } |
| 456 | |
| 457 | private: |
| 458 | StringRef name; |
| 459 | }; |
| 460 | |
| 461 | class LCMinVersion final : public LoadCommand { |
| 462 | public: |
| 463 | explicit LCMinVersion(const PlatformInfo &platformInfo) |
| 464 | : platformInfo(platformInfo) {} |
| 465 | |
| 466 | uint32_t getSize() const override { return sizeof(version_min_command); } |
| 467 | |
| 468 | void writeTo(uint8_t *buf) const override { |
| 469 | auto *c = reinterpret_cast<version_min_command *>(buf); |
| 470 | switch (platformInfo.target.Platform) { |
| 471 | case PLATFORM_MACOS: |
| 472 | c->cmd = LC_VERSION_MIN_MACOSX; |
| 473 | break; |
| 474 | case PLATFORM_IOS: |
| 475 | case PLATFORM_IOSSIMULATOR: |
| 476 | c->cmd = LC_VERSION_MIN_IPHONEOS; |
| 477 | break; |
| 478 | case PLATFORM_TVOS: |
| 479 | case PLATFORM_TVOSSIMULATOR: |
| 480 | c->cmd = LC_VERSION_MIN_TVOS; |
| 481 | break; |
| 482 | case PLATFORM_WATCHOS: |
| 483 | case PLATFORM_WATCHOSSIMULATOR: |
| 484 | c->cmd = LC_VERSION_MIN_WATCHOS; |
| 485 | break; |
| 486 | default: |
| 487 | llvm_unreachable("invalid platform" ); |
| 488 | break; |
| 489 | } |
| 490 | c->cmdsize = getSize(); |
| 491 | c->version = encodeVersion(version: platformInfo.target.MinDeployment); |
| 492 | c->sdk = encodeVersion(version: platformInfo.sdk); |
| 493 | } |
| 494 | |
| 495 | private: |
| 496 | const PlatformInfo &platformInfo; |
| 497 | }; |
| 498 | |
| 499 | class LCBuildVersion final : public LoadCommand { |
| 500 | public: |
| 501 | explicit LCBuildVersion(const PlatformInfo &platformInfo) |
| 502 | : platformInfo(platformInfo) {} |
| 503 | |
| 504 | const int ntools = 1; |
| 505 | |
| 506 | uint32_t getSize() const override { |
| 507 | return sizeof(build_version_command) + ntools * sizeof(build_tool_version); |
| 508 | } |
| 509 | |
| 510 | void writeTo(uint8_t *buf) const override { |
| 511 | auto *c = reinterpret_cast<build_version_command *>(buf); |
| 512 | c->cmd = LC_BUILD_VERSION; |
| 513 | c->cmdsize = getSize(); |
| 514 | |
| 515 | c->platform = static_cast<uint32_t>(platformInfo.target.Platform); |
| 516 | c->minos = encodeVersion(version: platformInfo.target.MinDeployment); |
| 517 | c->sdk = encodeVersion(version: platformInfo.sdk); |
| 518 | |
| 519 | c->ntools = ntools; |
| 520 | auto *t = reinterpret_cast<build_tool_version *>(&c[1]); |
| 521 | t->tool = TOOL_LLD; |
| 522 | t->version = encodeVersion(version: VersionTuple( |
| 523 | LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); |
| 524 | } |
| 525 | |
| 526 | private: |
| 527 | const PlatformInfo &platformInfo; |
| 528 | }; |
| 529 | |
| 530 | // Stores a unique identifier for the output file based on an MD5 hash of its |
| 531 | // contents. In order to hash the contents, we must first write them, but |
| 532 | // LC_UUID itself must be part of the written contents in order for all the |
| 533 | // offsets to be calculated correctly. We resolve this circular paradox by |
| 534 | // first writing an LC_UUID with an all-zero UUID, then updating the UUID with |
| 535 | // its real value later. |
| 536 | class LCUuid final : public LoadCommand { |
| 537 | public: |
| 538 | uint32_t getSize() const override { return sizeof(uuid_command); } |
| 539 | |
| 540 | void writeTo(uint8_t *buf) const override { |
| 541 | auto *c = reinterpret_cast<uuid_command *>(buf); |
| 542 | c->cmd = LC_UUID; |
| 543 | c->cmdsize = getSize(); |
| 544 | uuidBuf = c->uuid; |
| 545 | } |
| 546 | |
| 547 | void writeUuid(uint64_t digest) const { |
| 548 | // xxhash only gives us 8 bytes, so put some fixed data in the other half. |
| 549 | static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size" ); |
| 550 | memcpy(dest: uuidBuf, src: "LLD\xa1UU1D" , n: 8); |
| 551 | memcpy(dest: uuidBuf + 8, src: &digest, n: 8); |
| 552 | |
| 553 | // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in |
| 554 | // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't |
| 555 | // want to lose bits of the digest in byte 8, so swap that with a byte of |
| 556 | // fixed data that happens to have the right bits set. |
| 557 | std::swap(a&: uuidBuf[3], b&: uuidBuf[8]); |
| 558 | |
| 559 | // Claim that this is an MD5-based hash. It isn't, but this signals that |
| 560 | // this is not a time-based and not a random hash. MD5 seems like the least |
| 561 | // bad lie we can put here. |
| 562 | assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3" ); |
| 563 | assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2" ); |
| 564 | } |
| 565 | |
| 566 | mutable uint8_t *uuidBuf; |
| 567 | }; |
| 568 | |
| 569 | template <class LP> class LCEncryptionInfo final : public LoadCommand { |
| 570 | public: |
| 571 | uint32_t getSize() const override { |
| 572 | return sizeof(typename LP::encryption_info_command); |
| 573 | } |
| 574 | |
| 575 | void writeTo(uint8_t *buf) const override { |
| 576 | using EncryptionInfo = typename LP::encryption_info_command; |
| 577 | auto *c = reinterpret_cast<EncryptionInfo *>(buf); |
| 578 | buf += sizeof(EncryptionInfo); |
| 579 | c->cmd = LP::encryptionInfoLCType; |
| 580 | c->cmdsize = getSize(); |
| 581 | c->cryptoff = in.header->getSize(); |
| 582 | auto it = find_if(outputSegments, [](const OutputSegment *seg) { |
| 583 | return seg->name == segment_names::text; |
| 584 | }); |
| 585 | assert(it != outputSegments.end()); |
| 586 | c->cryptsize = (*it)->fileSize - c->cryptoff; |
| 587 | } |
| 588 | }; |
| 589 | |
| 590 | class LCCodeSignature final : public LoadCommand { |
| 591 | public: |
| 592 | LCCodeSignature(CodeSignatureSection *section) : section(section) {} |
| 593 | |
| 594 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
| 595 | |
| 596 | void writeTo(uint8_t *buf) const override { |
| 597 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
| 598 | c->cmd = LC_CODE_SIGNATURE; |
| 599 | c->cmdsize = getSize(); |
| 600 | c->dataoff = static_cast<uint32_t>(section->fileOff); |
| 601 | c->datasize = section->getSize(); |
| 602 | } |
| 603 | |
| 604 | CodeSignatureSection *section; |
| 605 | }; |
| 606 | |
| 607 | class LCExportsTrie final : public LoadCommand { |
| 608 | public: |
| 609 | LCExportsTrie(ExportSection *section) : section(section) {} |
| 610 | |
| 611 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
| 612 | |
| 613 | void writeTo(uint8_t *buf) const override { |
| 614 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
| 615 | c->cmd = LC_DYLD_EXPORTS_TRIE; |
| 616 | c->cmdsize = getSize(); |
| 617 | c->dataoff = section->fileOff; |
| 618 | c->datasize = section->getSize(); |
| 619 | } |
| 620 | |
| 621 | ExportSection *section; |
| 622 | }; |
| 623 | |
| 624 | class LCChainedFixups final : public LoadCommand { |
| 625 | public: |
| 626 | LCChainedFixups(ChainedFixupsSection *section) : section(section) {} |
| 627 | |
| 628 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
| 629 | |
| 630 | void writeTo(uint8_t *buf) const override { |
| 631 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
| 632 | c->cmd = LC_DYLD_CHAINED_FIXUPS; |
| 633 | c->cmdsize = getSize(); |
| 634 | c->dataoff = section->fileOff; |
| 635 | c->datasize = section->getSize(); |
| 636 | } |
| 637 | |
| 638 | ChainedFixupsSection *section; |
| 639 | }; |
| 640 | |
| 641 | } // namespace |
| 642 | |
| 643 | void Writer::treatSpecialUndefineds() { |
| 644 | if (config->entry) |
| 645 | if (auto *undefined = dyn_cast<Undefined>(Val: config->entry)) |
| 646 | treatUndefinedSymbol(*undefined, source: "the entry point" ); |
| 647 | |
| 648 | // FIXME: This prints symbols that are undefined both in input files and |
| 649 | // via -u flag twice. |
| 650 | for (const Symbol *sym : config->explicitUndefineds) { |
| 651 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
| 652 | treatUndefinedSymbol(*undefined, source: "-u" ); |
| 653 | } |
| 654 | // Literal exported-symbol names must be defined, but glob |
| 655 | // patterns need not match. |
| 656 | for (const CachedHashStringRef &cachedName : |
| 657 | config->exportedSymbols.literals) { |
| 658 | if (const Symbol *sym = symtab->find(name: cachedName)) |
| 659 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
| 660 | treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)" ); |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, |
| 665 | const lld::macho::Reloc &r) { |
| 666 | if (!sym->isLive()) { |
| 667 | if (Defined *defined = dyn_cast<Defined>(Val: sym)) { |
| 668 | if (config->emitInitOffsets && |
| 669 | defined->isec()->getName() == section_names::moduleInitFunc) |
| 670 | fatal(msg: isec->getLocation(off: r.offset) + ": cannot reference " + |
| 671 | sym->getName() + |
| 672 | " defined in __mod_init_func when -init_offsets is used" ); |
| 673 | } |
| 674 | assert(false && "referenced symbol must be live" ); |
| 675 | } |
| 676 | |
| 677 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type); |
| 678 | |
| 679 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) { |
| 680 | if (needsBinding(sym)) |
| 681 | in.stubs->addEntry(sym); |
| 682 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) { |
| 683 | if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym)) |
| 684 | in.got->addEntry(sym); |
| 685 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) { |
| 686 | if (needsBinding(sym)) |
| 687 | in.tlvPointers->addEntry(sym); |
| 688 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) { |
| 689 | // References from thread-local variable sections are treated as offsets |
| 690 | // relative to the start of the referent section, and therefore have no |
| 691 | // need of rebase opcodes. |
| 692 | if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym))) |
| 693 | addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend); |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | void Writer::scanRelocations() { |
| 698 | TimeTraceScope timeScope("Scan relocations" ); |
| 699 | |
| 700 | // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can |
| 701 | // add to inputSections, which invalidates inputSections's iterators. |
| 702 | for (size_t i = 0; i < inputSections.size(); ++i) { |
| 703 | ConcatInputSection *isec = inputSections[i]; |
| 704 | |
| 705 | if (isec->shouldOmitFromOutput()) |
| 706 | continue; |
| 707 | |
| 708 | for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { |
| 709 | lld::macho::Reloc &r = *it; |
| 710 | |
| 711 | // Canonicalize the referent so that later accesses in Writer won't |
| 712 | // have to worry about it. |
| 713 | if (auto *referentIsec = dyn_cast_if_present<InputSection *>(Val&: r.referent)) |
| 714 | r.referent = referentIsec->canonical(); |
| 715 | |
| 716 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
| 717 | // Skip over the following UNSIGNED relocation -- it's just there as the |
| 718 | // minuend, and doesn't have the usual UNSIGNED semantics. We don't want |
| 719 | // to emit rebase opcodes for it. |
| 720 | ++it; |
| 721 | // Canonicalize the referent so that later accesses in Writer won't |
| 722 | // have to worry about it. |
| 723 | if (auto *referentIsec = it->referent.dyn_cast<InputSection *>()) |
| 724 | it->referent = referentIsec->canonical(); |
| 725 | continue; |
| 726 | } |
| 727 | if (auto *sym = dyn_cast_if_present<Symbol *>(Val&: r.referent)) { |
| 728 | if (auto *undefined = dyn_cast<Undefined>(Val: sym)) |
| 729 | treatUndefinedSymbol(*undefined, isec, offset: r.offset); |
| 730 | // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. |
| 731 | if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r)) |
| 732 | prepareSymbolRelocation(sym, isec, r); |
| 733 | } else { |
| 734 | if (!r.pcrel) { |
| 735 | if (config->emitChainedFixups) |
| 736 | in.chainedFixups->addRebase(isec, offset: r.offset); |
| 737 | else |
| 738 | in.rebase->addEntry(isec, offset: r.offset); |
| 739 | } |
| 740 | } |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | in.unwindInfo->prepare(); |
| 745 | } |
| 746 | |
| 747 | static void addNonWeakDefinition(const Defined *defined) { |
| 748 | if (config->emitChainedFixups) |
| 749 | in.chainedFixups->setHasNonWeakDefinition(); |
| 750 | else |
| 751 | in.weakBinding->addNonWeakDefinition(defined); |
| 752 | } |
| 753 | |
| 754 | void Writer::scanSymbols() { |
| 755 | TimeTraceScope timeScope("Scan symbols" ); |
| 756 | ObjCSelRefsHelper::initialize(); |
| 757 | for (Symbol *sym : symtab->getSymbols()) { |
| 758 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
| 759 | if (!defined->isLive()) |
| 760 | continue; |
| 761 | if (defined->overridesWeakDef) |
| 762 | addNonWeakDefinition(defined); |
| 763 | if (!defined->isAbsolute() && isCodeSection(defined->isec())) |
| 764 | in.unwindInfo->addSymbol(defined); |
| 765 | } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) { |
| 766 | // This branch intentionally doesn't check isLive(). |
| 767 | if (dysym->isDynamicLookup()) |
| 768 | continue; |
| 769 | dysym->getFile()->refState = |
| 770 | std::max(a: dysym->getFile()->refState, b: dysym->getRefState()); |
| 771 | } else if (isa<Undefined>(Val: sym)) { |
| 772 | if (ObjCStubsSection::isObjCStubSymbol(sym)) { |
| 773 | // When -dead_strip is enabled, we don't want to emit any dead stubs. |
| 774 | // Although this stub symbol is yet undefined, addSym() was called |
| 775 | // during MarkLive. |
| 776 | if (config->deadStrip) { |
| 777 | if (!sym->isLive()) |
| 778 | continue; |
| 779 | } |
| 780 | in.objcStubs->addEntry(sym); |
| 781 | } |
| 782 | } |
| 783 | } |
| 784 | |
| 785 | for (const InputFile *file : inputFiles) { |
| 786 | if (auto *objFile = dyn_cast<ObjFile>(Val: file)) |
| 787 | for (Symbol *sym : objFile->symbols) { |
| 788 | if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) { |
| 789 | if (!defined->isLive()) |
| 790 | continue; |
| 791 | if (!defined->isExternal() && !defined->isAbsolute() && |
| 792 | isCodeSection(defined->isec())) |
| 793 | in.unwindInfo->addSymbol(defined); |
| 794 | } |
| 795 | } |
| 796 | } |
| 797 | } |
| 798 | |
| 799 | // TODO: ld64 enforces the old load commands in a few other cases. |
| 800 | static bool useLCBuildVersion(const PlatformInfo &platformInfo) { |
| 801 | static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion = |
| 802 | {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)}, |
| 803 | {PLATFORM_IOS, VersionTuple(12, 0)}, |
| 804 | {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)}, |
| 805 | {PLATFORM_TVOS, VersionTuple(12, 0)}, |
| 806 | {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)}, |
| 807 | {PLATFORM_WATCHOS, VersionTuple(5, 0)}, |
| 808 | {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}}; |
| 809 | auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) { |
| 810 | return p.first == platformInfo.target.Platform; |
| 811 | }); |
| 812 | return it == minVersion.end() |
| 813 | ? true |
| 814 | : platformInfo.target.MinDeployment >= it->second; |
| 815 | } |
| 816 | |
| 817 | template <class LP> void Writer::createLoadCommands() { |
| 818 | uint8_t segIndex = 0; |
| 819 | for (OutputSegment *seg : outputSegments) { |
| 820 | in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); |
| 821 | seg->index = segIndex++; |
| 822 | } |
| 823 | |
| 824 | if (config->emitChainedFixups) { |
| 825 | in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups)); |
| 826 | in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports)); |
| 827 | } else { |
| 828 | in.header->addLoadCommand(make<LCDyldInfo>( |
| 829 | args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports)); |
| 830 | } |
| 831 | in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection)); |
| 832 | in.header->addLoadCommand( |
| 833 | make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection)); |
| 834 | if (!config->umbrella.empty()) |
| 835 | in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella)); |
| 836 | if (config->emitEncryptionInfo) |
| 837 | in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); |
| 838 | for (StringRef path : config->runtimePaths) |
| 839 | in.header->addLoadCommand(make<LCRPath>(args&: path)); |
| 840 | |
| 841 | switch (config->outputType) { |
| 842 | case MH_EXECUTE: |
| 843 | in.header->addLoadCommand(make<LCLoadDylinker>()); |
| 844 | break; |
| 845 | case MH_DYLIB: |
| 846 | in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName, |
| 847 | args&: config->dylibCompatibilityVersion, |
| 848 | args&: config->dylibCurrentVersion)); |
| 849 | for (StringRef client : config->allowableClients) |
| 850 | in.header->addLoadCommand(make<LCSubClient>(args&: client)); |
| 851 | break; |
| 852 | case MH_BUNDLE: |
| 853 | break; |
| 854 | default: |
| 855 | llvm_unreachable("unhandled output file type" ); |
| 856 | } |
| 857 | |
| 858 | if (config->generateUuid) { |
| 859 | uuidCommand = make<LCUuid>(); |
| 860 | in.header->addLoadCommand(uuidCommand); |
| 861 | } |
| 862 | |
| 863 | if (useLCBuildVersion(platformInfo: config->platformInfo)) |
| 864 | in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo)); |
| 865 | else |
| 866 | in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo)); |
| 867 | |
| 868 | if (config->secondaryPlatformInfo) { |
| 869 | in.header->addLoadCommand( |
| 870 | make<LCBuildVersion>(args&: *config->secondaryPlatformInfo)); |
| 871 | } |
| 872 | |
| 873 | // This is down here to match ld64's load command order. |
| 874 | if (config->outputType == MH_EXECUTE) |
| 875 | in.header->addLoadCommand(make<LCMain>()); |
| 876 | |
| 877 | // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding |
| 878 | // library ordinal computation code in ld64. |
| 879 | int64_t dylibOrdinal = 1; |
| 880 | DenseMap<StringRef, int64_t> ordinalForInstallName; |
| 881 | |
| 882 | std::vector<DylibFile *> dylibFiles; |
| 883 | for (InputFile *file : inputFiles) { |
| 884 | if (auto *dylibFile = dyn_cast<DylibFile>(Val: file)) |
| 885 | dylibFiles.push_back(x: dylibFile); |
| 886 | } |
| 887 | for (size_t i = 0; i < dylibFiles.size(); ++i) |
| 888 | dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(), |
| 889 | last: dylibFiles[i]->extraDylibs.end()); |
| 890 | |
| 891 | for (DylibFile *dylibFile : dylibFiles) { |
| 892 | if (dylibFile->isBundleLoader) { |
| 893 | dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; |
| 894 | // Shortcut since bundle-loader does not re-export the symbols. |
| 895 | |
| 896 | dylibFile->reexport = false; |
| 897 | continue; |
| 898 | } |
| 899 | |
| 900 | // Don't emit load commands for a dylib that is not referenced if: |
| 901 | // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- |
| 902 | // if it's on the linker command line, it's explicit) |
| 903 | // - or it's marked MH_DEAD_STRIPPABLE_DYLIB |
| 904 | // - or the flag -dead_strip_dylibs is used |
| 905 | // FIXME: `isReferenced()` is currently computed before dead code |
| 906 | // stripping, so references from dead code keep a dylib alive. This |
| 907 | // matches ld64, but it's something we should do better. |
| 908 | if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && |
| 909 | (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable || |
| 910 | config->deadStripDylibs)) |
| 911 | continue; |
| 912 | |
| 913 | // Several DylibFiles can have the same installName. Only emit a single |
| 914 | // load command for that installName and give all these DylibFiles the |
| 915 | // same ordinal. |
| 916 | // This can happen in several cases: |
| 917 | // - a new framework could change its installName to an older |
| 918 | // framework name via an $ld$ symbol depending on platform_version |
| 919 | // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; |
| 920 | // Foo.framework/Foo.tbd is usually a symlink to |
| 921 | // Foo.framework/Versions/Current/Foo.tbd, where |
| 922 | // Foo.framework/Versions/Current is usually a symlink to |
| 923 | // Foo.framework/Versions/A) |
| 924 | // - a framework can be linked both explicitly on the linker |
| 925 | // command line and implicitly as a reexport from a different |
| 926 | // framework. The re-export will usually point to the tbd file |
| 927 | // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will |
| 928 | // usually find Foo.framework/Foo.tbd. These are usually symlinks, |
| 929 | // but in a --reproduce archive they will be identical but distinct |
| 930 | // files. |
| 931 | // In the first case, *semantically distinct* DylibFiles will have the |
| 932 | // same installName. |
| 933 | int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; |
| 934 | if (ordinal) { |
| 935 | dylibFile->ordinal = ordinal; |
| 936 | continue; |
| 937 | } |
| 938 | |
| 939 | ordinal = dylibFile->ordinal = dylibOrdinal++; |
| 940 | LoadCommandType lcType = LC_LOAD_DYLIB; |
| 941 | if (dylibFile->reexport) { |
| 942 | if (dylibFile->forceWeakImport) |
| 943 | warn(msg: path::filename(path: dylibFile->getName()) + |
| 944 | " is re-exported so cannot be weak-linked" ); |
| 945 | |
| 946 | lcType = LC_REEXPORT_DYLIB; |
| 947 | } else if (dylibFile->forceWeakImport || |
| 948 | dylibFile->refState == RefState::Weak) { |
| 949 | lcType = LC_LOAD_WEAK_DYLIB; |
| 950 | } |
| 951 | in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName, |
| 952 | args&: dylibFile->compatibilityVersion, |
| 953 | args&: dylibFile->currentVersion)); |
| 954 | } |
| 955 | |
| 956 | for (const auto &dyldEnv : config->dyldEnvs) |
| 957 | in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv)); |
| 958 | |
| 959 | if (functionStartsSection) |
| 960 | in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection)); |
| 961 | if (dataInCodeSection) |
| 962 | in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection)); |
| 963 | if (codeSignatureSection) |
| 964 | in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection)); |
| 965 | |
| 966 | const uint32_t MACOS_MAXPATHLEN = 1024; |
| 967 | config->headerPad = std::max( |
| 968 | a: config->headerPad, b: (config->headerPadMaxInstallNames |
| 969 | ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN |
| 970 | : 0)); |
| 971 | } |
| 972 | |
| 973 | // Sorting only can happen once all outputs have been collected. Here we sort |
| 974 | // segments, output sections within each segment, and input sections within each |
| 975 | // output segment. |
| 976 | static void sortSegmentsAndSections() { |
| 977 | TimeTraceScope timeScope("Sort segments and sections" ); |
| 978 | sortOutputSegments(); |
| 979 | |
| 980 | DenseMap<const InputSection *, int> isecPriorities = |
| 981 | priorityBuilder.buildInputSectionPriorities(); |
| 982 | |
| 983 | uint32_t sectionIndex = 0; |
| 984 | for (OutputSegment *seg : outputSegments) { |
| 985 | seg->sortOutputSections(); |
| 986 | // References from thread-local variable sections are treated as offsets |
| 987 | // relative to the start of the thread-local data memory area, which |
| 988 | // is initialized via copying all the TLV data sections (which are all |
| 989 | // contiguous). If later data sections require a greater alignment than |
| 990 | // earlier ones, the offsets of data within those sections won't be |
| 991 | // guaranteed to aligned unless we normalize alignments. We therefore use |
| 992 | // the largest alignment for all TLV data sections. |
| 993 | uint32_t tlvAlign = 0; |
| 994 | for (const OutputSection *osec : seg->getSections()) |
| 995 | if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign) |
| 996 | tlvAlign = osec->align; |
| 997 | |
| 998 | for (OutputSection *osec : seg->getSections()) { |
| 999 | // Now that the output sections are sorted, assign the final |
| 1000 | // output section indices. |
| 1001 | if (!osec->isHidden()) |
| 1002 | osec->index = ++sectionIndex; |
| 1003 | if (isThreadLocalData(flags: osec->flags)) { |
| 1004 | if (!firstTLVDataSection) |
| 1005 | firstTLVDataSection = osec; |
| 1006 | osec->align = tlvAlign; |
| 1007 | } |
| 1008 | |
| 1009 | if (!isecPriorities.empty()) { |
| 1010 | if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) { |
| 1011 | llvm::stable_sort( |
| 1012 | Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) { |
| 1013 | return isecPriorities.lookup(Val: a) < isecPriorities.lookup(Val: b); |
| 1014 | }); |
| 1015 | } |
| 1016 | } |
| 1017 | } |
| 1018 | } |
| 1019 | } |
| 1020 | |
| 1021 | template <class LP> void Writer::createOutputSections() { |
| 1022 | TimeTraceScope timeScope("Create output sections" ); |
| 1023 | // First, create hidden sections |
| 1024 | stringTableSection = make<StringTableSection>(); |
| 1025 | symtabSection = makeSymtabSection<LP>(*stringTableSection); |
| 1026 | indirectSymtabSection = make<IndirectSymtabSection>(); |
| 1027 | if (config->adhocCodesign) |
| 1028 | codeSignatureSection = make<CodeSignatureSection>(); |
| 1029 | if (config->emitDataInCodeInfo) |
| 1030 | dataInCodeSection = make<DataInCodeSection>(); |
| 1031 | if (config->emitFunctionStarts) |
| 1032 | functionStartsSection = make<FunctionStartsSection>(); |
| 1033 | |
| 1034 | switch (config->outputType) { |
| 1035 | case MH_EXECUTE: |
| 1036 | make<PageZeroSection>(); |
| 1037 | break; |
| 1038 | case MH_DYLIB: |
| 1039 | case MH_BUNDLE: |
| 1040 | break; |
| 1041 | default: |
| 1042 | llvm_unreachable("unhandled output file type" ); |
| 1043 | } |
| 1044 | |
| 1045 | // Then add input sections to output sections. |
| 1046 | for (ConcatInputSection *isec : inputSections) { |
| 1047 | if (isec->shouldOmitFromOutput()) |
| 1048 | continue; |
| 1049 | ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent); |
| 1050 | osec->addInput(input: isec); |
| 1051 | osec->inputOrder = |
| 1052 | std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff)); |
| 1053 | } |
| 1054 | |
| 1055 | // Once all the inputs are added, we can finalize the output section |
| 1056 | // properties and create the corresponding output segments. |
| 1057 | for (const auto &it : concatOutputSections) { |
| 1058 | StringRef segname = it.first.first; |
| 1059 | ConcatOutputSection *osec = it.second; |
| 1060 | assert(segname != segment_names::ld); |
| 1061 | if (osec->isNeeded()) { |
| 1062 | // See comment in ObjFile::splitEhFrames() |
| 1063 | if (osec->name == section_names::ehFrame && |
| 1064 | segname == segment_names::text) |
| 1065 | osec->align = target->wordSize; |
| 1066 | |
| 1067 | // MC keeps the default 1-byte alignment for __thread_vars, even though it |
| 1068 | // contains pointers that are fixed up by dyld, which requires proper |
| 1069 | // alignment. |
| 1070 | if (isThreadLocalVariables(flags: osec->flags)) |
| 1071 | osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize); |
| 1072 | |
| 1073 | getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec); |
| 1074 | } |
| 1075 | } |
| 1076 | |
| 1077 | for (SyntheticSection *ssec : syntheticSections) { |
| 1078 | auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name}); |
| 1079 | // We add all LinkEdit sections here because we don't know if they are |
| 1080 | // needed until their finalizeContents() methods get called later. While |
| 1081 | // this means that we add some redundant sections to __LINKEDIT, there is |
| 1082 | // is no redundancy in the output, as we do not emit section headers for |
| 1083 | // any LinkEdit sections. |
| 1084 | if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) { |
| 1085 | if (it == concatOutputSections.end()) { |
| 1086 | getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec); |
| 1087 | } else { |
| 1088 | fatal(msg: "section from " + |
| 1089 | toString(file: it->second->firstSection()->getFile()) + |
| 1090 | " conflicts with synthetic section " + ssec->segname + "," + |
| 1091 | ssec->name); |
| 1092 | } |
| 1093 | } |
| 1094 | } |
| 1095 | |
| 1096 | // dyld requires __LINKEDIT segment to always exist (even if empty). |
| 1097 | linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit); |
| 1098 | } |
| 1099 | |
| 1100 | void Writer::finalizeAddresses() { |
| 1101 | TimeTraceScope timeScope("Finalize addresses" ); |
| 1102 | uint64_t pageSize = target->getPageSize(); |
| 1103 | |
| 1104 | // We could parallelize this loop, but local benchmarking indicates it is |
| 1105 | // faster to do it all in the main thread. |
| 1106 | for (OutputSegment *seg : outputSegments) { |
| 1107 | if (seg == linkEditSegment) |
| 1108 | continue; |
| 1109 | for (OutputSection *osec : seg->getSections()) { |
| 1110 | if (!osec->isNeeded()) |
| 1111 | continue; |
| 1112 | // Other kinds of OutputSections have already been finalized. |
| 1113 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) |
| 1114 | concatOsec->finalizeContents(); |
| 1115 | } |
| 1116 | } |
| 1117 | |
| 1118 | // Ensure that segments (and the sections they contain) are allocated |
| 1119 | // addresses in ascending order, which dyld requires. |
| 1120 | // |
| 1121 | // Note that at this point, __LINKEDIT sections are empty, but we need to |
| 1122 | // determine addresses of other segments/sections before generating its |
| 1123 | // contents. |
| 1124 | for (OutputSegment *seg : outputSegments) { |
| 1125 | if (seg == linkEditSegment) |
| 1126 | continue; |
| 1127 | seg->addr = addr; |
| 1128 | assignAddresses(seg); |
| 1129 | // codesign / libstuff checks for segment ordering by verifying that |
| 1130 | // `fileOff + fileSize == next segment fileOff`. So we call |
| 1131 | // alignToPowerOf2() before (instead of after) computing fileSize to ensure |
| 1132 | // that the segments are contiguous. We handle addr / vmSize similarly for |
| 1133 | // the same reason. |
| 1134 | fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize); |
| 1135 | addr = alignToPowerOf2(Value: addr, Align: pageSize); |
| 1136 | seg->vmSize = addr - seg->addr; |
| 1137 | seg->fileSize = fileOff - seg->fileOff; |
| 1138 | seg->assignAddressesToStartEndSymbols(); |
| 1139 | } |
| 1140 | } |
| 1141 | |
| 1142 | void Writer::finalizeLinkEditSegment() { |
| 1143 | TimeTraceScope timeScope("Finalize __LINKEDIT segment" ); |
| 1144 | // Fill __LINKEDIT contents. |
| 1145 | std::array<LinkEditSection *, 10> linkEditSections{ |
| 1146 | in.rebase, in.binding, |
| 1147 | in.weakBinding, in.lazyBinding, |
| 1148 | in.exports, in.chainedFixups, |
| 1149 | symtabSection, indirectSymtabSection, |
| 1150 | dataInCodeSection, functionStartsSection, |
| 1151 | }; |
| 1152 | |
| 1153 | parallelForEach(Begin: linkEditSections.begin(), End: linkEditSections.end(), |
| 1154 | Fn: [](LinkEditSection *osec) { |
| 1155 | if (osec) |
| 1156 | osec->finalizeContents(); |
| 1157 | }); |
| 1158 | |
| 1159 | // Now that __LINKEDIT is filled out, do a proper calculation of its |
| 1160 | // addresses and offsets. |
| 1161 | linkEditSegment->addr = addr; |
| 1162 | assignAddresses(linkEditSegment); |
| 1163 | // No need to page-align fileOff / addr here since this is the last segment. |
| 1164 | linkEditSegment->vmSize = addr - linkEditSegment->addr; |
| 1165 | linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; |
| 1166 | } |
| 1167 | |
| 1168 | void Writer::assignAddresses(OutputSegment *seg) { |
| 1169 | seg->fileOff = fileOff; |
| 1170 | |
| 1171 | for (OutputSection *osec : seg->getSections()) { |
| 1172 | if (!osec->isNeeded()) |
| 1173 | continue; |
| 1174 | addr = alignToPowerOf2(Value: addr, Align: osec->align); |
| 1175 | fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align); |
| 1176 | osec->addr = addr; |
| 1177 | osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff; |
| 1178 | osec->finalize(); |
| 1179 | osec->assignAddressesToStartEndSymbols(); |
| 1180 | |
| 1181 | addr += osec->getSize(); |
| 1182 | fileOff += osec->getFileSize(); |
| 1183 | } |
| 1184 | } |
| 1185 | |
| 1186 | void Writer::openFile() { |
| 1187 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
| 1188 | FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff, |
| 1189 | Flags: FileOutputBuffer::F_executable); |
| 1190 | |
| 1191 | if (!bufferOrErr) |
| 1192 | fatal(msg: "failed to open " + config->outputFile + ": " + |
| 1193 | llvm::toString(E: bufferOrErr.takeError())); |
| 1194 | buffer = std::move(*bufferOrErr); |
| 1195 | in.bufferStart = buffer->getBufferStart(); |
| 1196 | } |
| 1197 | |
| 1198 | void Writer::writeSections() { |
| 1199 | TimeTraceScope timeScope("Write output sections" ); |
| 1200 | |
| 1201 | uint8_t *buf = buffer->getBufferStart(); |
| 1202 | std::vector<const OutputSection *> osecs; |
| 1203 | for (const OutputSegment *seg : outputSegments) |
| 1204 | append_range(C&: osecs, R: seg->getSections()); |
| 1205 | |
| 1206 | parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) { |
| 1207 | osec->writeTo(buf: buf + osec->fileOff); |
| 1208 | }); |
| 1209 | } |
| 1210 | |
| 1211 | void Writer::applyOptimizationHints() { |
| 1212 | if (config->arch() != AK_arm64 || config->ignoreOptimizationHints) |
| 1213 | return; |
| 1214 | |
| 1215 | uint8_t *buf = buffer->getBufferStart(); |
| 1216 | TimeTraceScope timeScope("Apply linker optimization hints" ); |
| 1217 | parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) { |
| 1218 | if (const auto *objFile = dyn_cast<ObjFile>(Val: file)) |
| 1219 | target->applyOptimizationHints(buf, *objFile); |
| 1220 | }); |
| 1221 | } |
| 1222 | |
| 1223 | // In order to utilize multiple cores, we first split the buffer into chunks, |
| 1224 | // compute a hash for each chunk, and then compute a hash value of the hash |
| 1225 | // values. |
| 1226 | void Writer::writeUuid() { |
| 1227 | TimeTraceScope timeScope("Computing UUID" ); |
| 1228 | |
| 1229 | ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; |
| 1230 | std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024); |
| 1231 | |
| 1232 | // Leave one slot for filename |
| 1233 | std::vector<uint64_t> hashes(chunks.size() + 1); |
| 1234 | parallelFor(Begin: 0, End: chunks.size(), |
| 1235 | Fn: [&](size_t i) { hashes[i] = xxh3_64bits(data: chunks[i]); }); |
| 1236 | // Append the output filename so that identical binaries with different names |
| 1237 | // don't get the same UUID. |
| 1238 | hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput)); |
| 1239 | |
| 1240 | uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()), |
| 1241 | hashes.size() * sizeof(uint64_t)}); |
| 1242 | uuidCommand->writeUuid(digest); |
| 1243 | } |
| 1244 | |
| 1245 | // This is step 5 of the algorithm described in the class comment of |
| 1246 | // ChainedFixupsSection. |
| 1247 | void Writer::buildFixupChains() { |
| 1248 | if (!config->emitChainedFixups) |
| 1249 | return; |
| 1250 | |
| 1251 | const std::vector<Location> &loc = in.chainedFixups->getLocations(); |
| 1252 | if (loc.empty()) |
| 1253 | return; |
| 1254 | |
| 1255 | TimeTraceScope timeScope("Build fixup chains" ); |
| 1256 | |
| 1257 | const uint64_t pageSize = target->getPageSize(); |
| 1258 | constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64 |
| 1259 | |
| 1260 | for (size_t i = 0, count = loc.size(); i < count;) { |
| 1261 | const OutputSegment *oseg = loc[i].isec->parent->parent; |
| 1262 | uint8_t *buf = buffer->getBufferStart() + oseg->fileOff; |
| 1263 | uint64_t pageIdx = loc[i].offset / pageSize; |
| 1264 | ++i; |
| 1265 | |
| 1266 | while (i < count && loc[i].isec->parent->parent == oseg && |
| 1267 | (loc[i].offset / pageSize) == pageIdx) { |
| 1268 | uint64_t offset = loc[i].offset - loc[i - 1].offset; |
| 1269 | |
| 1270 | auto fail = [&](Twine message) { |
| 1271 | error(msg: loc[i].isec->getSegName() + "," + loc[i].isec->getName() + |
| 1272 | ", offset " + |
| 1273 | Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) + |
| 1274 | ": " + message); |
| 1275 | }; |
| 1276 | |
| 1277 | if (offset < target->wordSize) |
| 1278 | return fail("fixups overlap" ); |
| 1279 | if (offset % stride != 0) |
| 1280 | return fail( |
| 1281 | "fixups are unaligned (offset " + Twine(offset) + |
| 1282 | " is not a multiple of the stride). Re-link with -no_fixup_chains" ); |
| 1283 | |
| 1284 | // The "next" field is in the same location for bind and rebase entries. |
| 1285 | reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset) |
| 1286 | ->next = offset / stride; |
| 1287 | ++i; |
| 1288 | } |
| 1289 | } |
| 1290 | } |
| 1291 | |
| 1292 | void Writer::writeCodeSignature() { |
| 1293 | if (codeSignatureSection) { |
| 1294 | TimeTraceScope timeScope("Write code signature" ); |
| 1295 | codeSignatureSection->writeHashes(buf: buffer->getBufferStart()); |
| 1296 | } |
| 1297 | } |
| 1298 | |
| 1299 | void Writer::writeOutputFile() { |
| 1300 | TimeTraceScope timeScope("Write output file" ); |
| 1301 | openFile(); |
| 1302 | reportPendingUndefinedSymbols(); |
| 1303 | if (errorCount()) |
| 1304 | return; |
| 1305 | writeSections(); |
| 1306 | applyOptimizationHints(); |
| 1307 | buildFixupChains(); |
| 1308 | if (config->generateUuid) |
| 1309 | writeUuid(); |
| 1310 | writeCodeSignature(); |
| 1311 | |
| 1312 | if (auto e = buffer->commit()) |
| 1313 | fatal(msg: "failed to write output '" + buffer->getPath() + |
| 1314 | "': " + toString(E: std::move(e))); |
| 1315 | } |
| 1316 | |
| 1317 | template <class LP> void Writer::run() { |
| 1318 | treatSpecialUndefineds(); |
| 1319 | if (config->entry && needsBinding(sym: config->entry)) |
| 1320 | in.stubs->addEntry(config->entry); |
| 1321 | |
| 1322 | // Canonicalization of all pointers to InputSections should be handled by |
| 1323 | // these two scan* methods. I.e. from this point onward, for all live |
| 1324 | // InputSections, we should have `isec->canonical() == isec`. |
| 1325 | scanSymbols(); |
| 1326 | if (in.objcStubs->isNeeded()) |
| 1327 | in.objcStubs->setUp(); |
| 1328 | if (in.objcMethList->isNeeded()) |
| 1329 | in.objcMethList->setUp(); |
| 1330 | scanRelocations(); |
| 1331 | if (in.initOffsets->isNeeded()) |
| 1332 | in.initOffsets->setUp(); |
| 1333 | |
| 1334 | // Do not proceed if there were undefined or duplicate symbols. |
| 1335 | reportPendingUndefinedSymbols(); |
| 1336 | reportPendingDuplicateSymbols(); |
| 1337 | if (errorCount()) |
| 1338 | return; |
| 1339 | |
| 1340 | if (in.stubHelper && in.stubHelper->isNeeded()) |
| 1341 | in.stubHelper->setUp(); |
| 1342 | |
| 1343 | if (in.objCImageInfo->isNeeded()) |
| 1344 | in.objCImageInfo->finalizeContents(); |
| 1345 | |
| 1346 | // At this point, we should know exactly which output sections are needed, |
| 1347 | // courtesy of scanSymbols() and scanRelocations(). |
| 1348 | createOutputSections<LP>(); |
| 1349 | |
| 1350 | // After this point, we create no new segments; HOWEVER, we might |
| 1351 | // yet create branch-range extension thunks for architectures whose |
| 1352 | // hardware call instructions have limited range, e.g., ARM(64). |
| 1353 | // The thunks are created as InputSections interspersed among |
| 1354 | // the ordinary __TEXT,_text InputSections. |
| 1355 | sortSegmentsAndSections(); |
| 1356 | createLoadCommands<LP>(); |
| 1357 | finalizeAddresses(); |
| 1358 | |
| 1359 | llvm::thread mapFileWriter([&] { |
| 1360 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
| 1361 | timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile" ); |
| 1362 | writeMapFile(); |
| 1363 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
| 1364 | timeTraceProfilerFinishThread(); |
| 1365 | }); |
| 1366 | |
| 1367 | finalizeLinkEditSegment(); |
| 1368 | writeOutputFile(); |
| 1369 | mapFileWriter.join(); |
| 1370 | } |
| 1371 | |
| 1372 | template <class LP> void macho::writeResult() { Writer().run<LP>(); } |
| 1373 | |
| 1374 | void macho::resetWriter() { LCDylib::resetInstanceCount(); } |
| 1375 | |
| 1376 | void macho::createSyntheticSections() { |
| 1377 | in.header = make<MachHeaderSection>(); |
| 1378 | if (config->dedupStrings) |
| 1379 | in.cStringSection = |
| 1380 | make<DeduplicatedCStringSection>(args: section_names::cString); |
| 1381 | else |
| 1382 | in.cStringSection = make<CStringSection>(args: section_names::cString); |
| 1383 | in.objcMethnameSection = |
| 1384 | make<DeduplicatedCStringSection>(args: section_names::objcMethname); |
| 1385 | in.wordLiteralSection = make<WordLiteralSection>(); |
| 1386 | if (config->emitChainedFixups) { |
| 1387 | in.chainedFixups = make<ChainedFixupsSection>(); |
| 1388 | } else { |
| 1389 | in.rebase = make<RebaseSection>(); |
| 1390 | in.binding = make<BindingSection>(); |
| 1391 | in.weakBinding = make<WeakBindingSection>(); |
| 1392 | in.lazyBinding = make<LazyBindingSection>(); |
| 1393 | in.lazyPointers = make<LazyPointerSection>(); |
| 1394 | in.stubHelper = make<StubHelperSection>(); |
| 1395 | } |
| 1396 | in.exports = make<ExportSection>(); |
| 1397 | in.got = make<GotSection>(); |
| 1398 | in.tlvPointers = make<TlvPointerSection>(); |
| 1399 | in.stubs = make<StubsSection>(); |
| 1400 | in.objcStubs = make<ObjCStubsSection>(); |
| 1401 | in.unwindInfo = makeUnwindInfoSection(); |
| 1402 | in.objCImageInfo = make<ObjCImageInfoSection>(); |
| 1403 | in.initOffsets = make<InitOffsetsSection>(); |
| 1404 | in.objcMethList = make<ObjCMethListSection>(); |
| 1405 | |
| 1406 | // This section contains space for just a single word, and will be used by |
| 1407 | // dyld to cache an address to the image loader it uses. |
| 1408 | uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize); |
| 1409 | memset(s: arr, c: 0, n: target->wordSize); |
| 1410 | in.imageLoaderCache = makeSyntheticInputSection( |
| 1411 | segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR, |
| 1412 | data: ArrayRef<uint8_t>{arr, target->wordSize}, |
| 1413 | /*align=*/target->wordSize); |
| 1414 | assert(in.imageLoaderCache->live); |
| 1415 | } |
| 1416 | |
| 1417 | OutputSection *macho::firstTLVDataSection = nullptr; |
| 1418 | |
| 1419 | template void macho::writeResult<LP64>(); |
| 1420 | template void macho::writeResult<ILP32>(); |
| 1421 | |