| 1 | //===- ConcatOutputSection.cpp --------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "ConcatOutputSection.h" |
| 10 | #include "Config.h" |
| 11 | #include "OutputSegment.h" |
| 12 | #include "SymbolTable.h" |
| 13 | #include "Symbols.h" |
| 14 | #include "SyntheticSections.h" |
| 15 | #include "Target.h" |
| 16 | #include "lld/Common/CommonLinkerContext.h" |
| 17 | #include "llvm/BinaryFormat/MachO.h" |
| 18 | #include <deque> |
| 19 | |
| 20 | using namespace llvm; |
| 21 | using namespace llvm::MachO; |
| 22 | using namespace lld; |
| 23 | using namespace lld::macho; |
| 24 | |
| 25 | MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections; |
| 26 | |
| 27 | void ConcatOutputSection::addInput(ConcatInputSection *input) { |
| 28 | assert(input->parent == this); |
| 29 | if (inputs.empty()) { |
| 30 | align = input->align; |
| 31 | flags = input->getFlags(); |
| 32 | } else { |
| 33 | align = std::max(a: align, b: input->align); |
| 34 | finalizeFlags(input); |
| 35 | } |
| 36 | inputs.push_back(x: input); |
| 37 | } |
| 38 | |
| 39 | // Branch-range extension can be implemented in two ways, either through ... |
| 40 | // |
| 41 | // (1) Branch islands: Single branch instructions (also of limited range), |
| 42 | // that might be chained in multiple hops to reach the desired |
| 43 | // destination. On ARM64, as 16 branch islands are needed to hop between |
| 44 | // opposite ends of a 2 GiB program. LD64 uses branch islands exclusively, |
| 45 | // even when it needs excessive hops. |
| 46 | // |
| 47 | // (2) Thunks: Instruction(s) to load the destination address into a scratch |
| 48 | // register, followed by a register-indirect branch. Thunks are |
| 49 | // constructed to reach any arbitrary address, so need not be |
| 50 | // chained. Although thunks need not be chained, a program might need |
| 51 | // multiple thunks to the same destination distributed throughout a large |
| 52 | // program so that all call sites can have one within range. |
| 53 | // |
| 54 | // The optimal approach is to mix islands for destinations within two hops, |
| 55 | // and use thunks for destinations at greater distance. For now, we only |
| 56 | // implement thunks. TODO: Adding support for branch islands! |
| 57 | |
| 58 | DenseMap<ThunkKey, ThunkInfo, ThunkMapKeyInfo> lld::macho::thunkMap; |
| 59 | |
| 60 | // Determine whether we need thunks, which depends on the target arch -- RISC |
| 61 | // (i.e., ARM) generally does because it has limited-range branch/call |
| 62 | // instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs |
| 63 | // thunks for programs so large that branch source & destination addresses |
| 64 | // might differ more than the range of branch instruction(s). |
| 65 | bool TextOutputSection::needsThunks() const { |
| 66 | if (!target->usesThunks()) |
| 67 | return false; |
| 68 | // FIXME: It is not enough to just estimate the size of this section. We |
| 69 | // should compute parent->needsThunks by estimating the size of all __text |
| 70 | // sections. See https://github.com/llvm/llvm-project/issues/195387 |
| 71 | uint64_t isecAddr = addr; |
| 72 | for (ConcatInputSection *isec : inputs) |
| 73 | isecAddr = alignToPowerOf2(Value: isecAddr, Align: isec->align) + isec->getSize(); |
| 74 | // Other sections besides __text might be small enough to pass this |
| 75 | // test but nevertheless need thunks for calling into other sections. |
| 76 | // An imperfect heuristic to use in this case is that if a section |
| 77 | // we've already processed in this segment needs thunks, so do the |
| 78 | // rest. |
| 79 | bool needsThunks = parent && parent->needsThunks; |
| 80 | |
| 81 | // Calculate the total size of all branch target sections |
| 82 | uint64_t branchTargetsSize = in.stubs->getSize(); |
| 83 | |
| 84 | // Add the size of __objc_stubs section if it exists |
| 85 | if (in.objcStubs && in.objcStubs->isNeeded()) |
| 86 | branchTargetsSize += in.objcStubs->getSize(); |
| 87 | |
| 88 | if (!needsThunks && |
| 89 | isecAddr - addr + branchTargetsSize <= |
| 90 | std::min(a: target->backwardBranchRange, b: target->forwardBranchRange)) |
| 91 | return false; |
| 92 | // Yes, this program is large enough to need thunks. |
| 93 | if (parent) |
| 94 | parent->needsThunks = true; |
| 95 | return true; |
| 96 | } |
| 97 | |
| 98 | void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) { |
| 99 | size = alignToPowerOf2(Value: size, Align: isec->align); |
| 100 | fileSize = alignToPowerOf2(Value: fileSize, Align: isec->align); |
| 101 | isec->outSecOff = size; |
| 102 | isec->isFinal = true; |
| 103 | size += isec->getSize(); |
| 104 | fileSize += isec->getFileSize(); |
| 105 | } |
| 106 | |
| 107 | void ConcatOutputSection::finalizeContents() { |
| 108 | for (ConcatInputSection *isec : inputs) |
| 109 | finalizeOne(isec); |
| 110 | } |
| 111 | |
| 112 | bool TextOutputSection::isTargetKnownInRange(const ConcatInputSection &isec, |
| 113 | const Relocation &r) const { |
| 114 | uint64_t callVA = isec.getVA() + r.offset; |
| 115 | uint64_t lowVA = target->backwardBranchRange < callVA |
| 116 | ? callVA - target->backwardBranchRange |
| 117 | : 0; |
| 118 | uint64_t highVA = callVA + target->forwardBranchRange; |
| 119 | auto *funcSym = cast<Symbol *>(Val: r.referent); |
| 120 | uint64_t funcVA = resolveSymbolOffsetVA(sym: funcSym, type: r.type, offset: r.addend); |
| 121 | // Check if the referent is reachable with a simple call instruction. |
| 122 | return lowVA <= funcVA && funcVA <= highVA; |
| 123 | } |
| 124 | |
| 125 | Defined *TextOutputSection::getThunkInRange(const ConcatInputSection &isec, |
| 126 | const Relocation &r, |
| 127 | const ThunkInfo &thunkInfo) const { |
| 128 | assert(!isTargetKnownInRange(isec, r)); |
| 129 | if (!thunkInfo.sym) |
| 130 | return nullptr; |
| 131 | uint64_t callVA = isec.getVA() + r.offset; |
| 132 | uint64_t lowVA = target->backwardBranchRange < callVA |
| 133 | ? callVA - target->backwardBranchRange |
| 134 | : 0; |
| 135 | uint64_t highVA = callVA + target->forwardBranchRange; |
| 136 | uint64_t thunkVA = thunkInfo.isec->getVA(); |
| 137 | if (lowVA <= thunkVA && thunkVA <= highVA) |
| 138 | return thunkInfo.sym; |
| 139 | return nullptr; |
| 140 | } |
| 141 | |
| 142 | void TextOutputSection::updateBranchTargetToThunk(Relocation &r, |
| 143 | Defined *thunk) { |
| 144 | r.referent = thunk; |
| 145 | // The thunk itself bakes in the addend, so the call-site reloc must |
| 146 | // branch to the thunk start with no extra offset. |
| 147 | r.addend = 0; |
| 148 | ++thunkCallCount; |
| 149 | } |
| 150 | |
| 151 | void TextOutputSection::createThunk(const ConcatInputSection &isec, |
| 152 | Relocation &r, ThunkInfo &thunkInfo) { |
| 153 | assert(getThunkInRange(isec, r, thunkInfo) == nullptr); |
| 154 | assert(isec.isFinal); |
| 155 | uint64_t highVA = isec.getVA() + r.offset + target->forwardBranchRange; |
| 156 | if (addr + size > highVA) { |
| 157 | // There were too many consecutive branch instructions for `slop` |
| 158 | // below. If you hit this: For the current algorithm, just bumping up |
| 159 | // slop below and trying again is probably simplest. (See also PR51578 |
| 160 | // comment 5). |
| 161 | fatal(msg: Twine(__FUNCTION__) + |
| 162 | ": FIXME: thunk range overrun. Consider increasing the " |
| 163 | "slop-scale with `--slop-scale=<unsigned_int>`." ); |
| 164 | } |
| 165 | thunkInfo.isec = makeSyntheticInputSection(segName: isec.getSegName(), sectName: isec.getName()); |
| 166 | thunkInfo.isec->parent = this; |
| 167 | assert(thunkInfo.isec->live); |
| 168 | |
| 169 | std::string addendSuffix; |
| 170 | if (r.addend != 0) |
| 171 | addendSuffix = "+" + std::to_string(val: r.addend); |
| 172 | size_t thunkSize = target->thunkSize; |
| 173 | auto *funcSym = cast<Symbol *>(Val&: r.referent); |
| 174 | StringRef thunkName = |
| 175 | saver().save(S: funcSym->getName() + addendSuffix + ".thunk." + |
| 176 | std::to_string(val: thunkInfo.sequence++)); |
| 177 | if (!isa<Defined>(Val: funcSym) || cast<Defined>(Val: funcSym)->isExternal()) { |
| 178 | thunkInfo.sym = symtab->addDefined( |
| 179 | name: thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, size: thunkSize, |
| 180 | /*isWeakDef=*/false, /*isPrivateExtern=*/true, |
| 181 | /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, |
| 182 | /*isWeakDefCanBeHidden=*/false); |
| 183 | } else { |
| 184 | thunkInfo.sym = make<Defined>( |
| 185 | args&: thunkName, /*file=*/args: nullptr, args&: thunkInfo.isec, /*value=*/args: 0, args&: thunkSize, |
| 186 | /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: true, |
| 187 | /*includeInSymtab=*/args: true, /*isReferencedDynamically=*/args: false, |
| 188 | /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false); |
| 189 | } |
| 190 | thunkInfo.sym->used = true; |
| 191 | target->populateThunk(thunk: thunkInfo.isec, funcSym, addend: r.addend); |
| 192 | updateBranchTargetToThunk(r, thunk: thunkInfo.sym); |
| 193 | finalizeOne(isec: thunkInfo.isec); |
| 194 | thunks.push_back(x: thunkInfo.isec); |
| 195 | } |
| 196 | |
| 197 | std::optional<uint64_t> |
| 198 | TextOutputSection::estimateStubsEndVA(unsigned numPotentialThunks) const { |
| 199 | if (!parent) |
| 200 | return std::nullopt; |
| 201 | |
| 202 | auto sections = |
| 203 | ArrayRef(parent->getSections()) |
| 204 | .drop_until(Pred: [&](const OutputSection *osec) { return osec == this; }); |
| 205 | |
| 206 | // Walk backwards to find the last stubs section |
| 207 | while (!sections.empty()) { |
| 208 | auto *osec = sections.back(); |
| 209 | if (osec->isNeeded() && (osec == in.stubs || osec == in.objcStubs)) |
| 210 | break; |
| 211 | sections.consume_back(); |
| 212 | } |
| 213 | if (sections.empty()) |
| 214 | return std::nullopt; |
| 215 | |
| 216 | assert(inputs.empty() || inputs.back()->isFinal); |
| 217 | uint64_t estimatedStubsEnd = |
| 218 | addr + size + numPotentialThunks * target->thunkSize; |
| 219 | for (auto *osec : sections) { |
| 220 | if (osec == this) |
| 221 | continue; |
| 222 | if (!osec->isNeeded()) |
| 223 | continue; |
| 224 | // Check if we will emit any more sections before the last stubs section |
| 225 | if (osec != in.stubs && osec != in.stubHelper && osec != in.objcStubs) |
| 226 | return std::nullopt; |
| 227 | estimatedStubsEnd = |
| 228 | alignToPowerOf2(Value: estimatedStubsEnd, Align: osec->align) + osec->getSize(); |
| 229 | } |
| 230 | return estimatedStubsEnd; |
| 231 | } |
| 232 | |
| 233 | bool TextOutputSection::isTargetStubsAndInRange( |
| 234 | const ConcatInputSection &isec, const Relocation &r, |
| 235 | std::optional<uint64_t> estimatedStubsEnd) const { |
| 236 | if (!estimatedStubsEnd.has_value()) |
| 237 | return false; |
| 238 | auto *funcSym = cast<Symbol *>(Val: r.referent); |
| 239 | if (!funcSym->isInStubs() && !(in.objcStubs && in.objcStubs->isNeeded() && |
| 240 | ObjCStubsSection::isObjCStubSymbol(sym: funcSym))) |
| 241 | return false; |
| 242 | if (r.addend) |
| 243 | return false; |
| 244 | uint64_t highVA = isec.getVA() + r.offset + target->forwardBranchRange; |
| 245 | return *estimatedStubsEnd <= highVA; |
| 246 | } |
| 247 | |
| 248 | void TextOutputSection::finalize() { |
| 249 | if (!needsThunks()) { |
| 250 | for (ConcatInputSection *isec : inputs) |
| 251 | finalizeOne(isec); |
| 252 | return; |
| 253 | } |
| 254 | |
| 255 | // Branches whose target sections are out of range or have not yet been |
| 256 | // finalized. We may need to emit thunks for them. |
| 257 | std::deque<std::pair<ConcatInputSection *, Relocation *>> branchesToProcess; |
| 258 | // Branches whose targets have not yet be finalized, but a thunk for that |
| 259 | // target exists. We defer processing these branches because it's possible we |
| 260 | // can still direct call to their targets after they have all been finalized. |
| 261 | SmallVector<std::tuple<ConcatInputSection *, Relocation *, Defined *>> |
| 262 | deferredBranchRedirects; |
| 263 | |
| 264 | const uint64_t slop = config->slopScale * target->thunkSize; |
| 265 | for (auto *isec : inputs) { |
| 266 | while (!branchesToProcess.empty()) { |
| 267 | auto [callerIsec, r] = branchesToProcess.front(); |
| 268 | assert(callerIsec->isFinal); |
| 269 | auto &thunkInfo = thunkMap[*r]; |
| 270 | if (isTargetKnownInRange(isec: *callerIsec, r: *r)) { |
| 271 | branchesToProcess.pop_front(); |
| 272 | continue; |
| 273 | } |
| 274 | if (auto *thunk = getThunkInRange(isec: *callerIsec, r: *r, thunkInfo)) { |
| 275 | deferredBranchRedirects.emplace_back(Args&: callerIsec, Args&: r, Args&: thunk); |
| 276 | branchesToProcess.pop_front(); |
| 277 | continue; |
| 278 | } |
| 279 | uint64_t highVA = |
| 280 | callerIsec->getVA() + r->offset + target->forwardBranchRange; |
| 281 | uint64_t nextEnd = |
| 282 | alignToPowerOf2(Value: addr + size, Align: isec->align) + isec->getSize(); |
| 283 | // If we were to emit this section, would we have enough space for more |
| 284 | // thunks? If we do, then we can delay processing this thunk so we may |
| 285 | // finalize more potencial target sections. Otherwise we must emit thunks |
| 286 | // until we have enough space. |
| 287 | if (nextEnd + slop <= highVA) |
| 288 | break; |
| 289 | |
| 290 | createThunk(isec: *callerIsec, r&: *r, thunkInfo); |
| 291 | branchesToProcess.pop_front(); |
| 292 | } |
| 293 | finalizeOne(isec); |
| 294 | |
| 295 | // TODO: Remove this check and the assert below. In fact, I don't believe |
| 296 | // the relocation iteration order matters for correctness. |
| 297 | bool hasCallsite = llvm::any_of(Range&: isec->relocs, P: [](Relocation &r) { |
| 298 | return target->hasAttr(type: r.type, bit: RelocAttrBits::BRANCH); |
| 299 | }); |
| 300 | if (!hasCallsite) |
| 301 | continue; |
| 302 | |
| 303 | // Process relocs by ascending address, i.e., ascending offset within isec |
| 304 | // FIXME: This property does not hold for object files produced by ld64's |
| 305 | // `-r` mode. |
| 306 | assert(is_sorted(isec->relocs, [](Relocation &a, Relocation &b) { |
| 307 | return a.offset > b.offset; |
| 308 | })); |
| 309 | for (Relocation &r : reverse(C&: isec->relocs)) { |
| 310 | if (!target->hasAttr(type: r.type, bit: RelocAttrBits::BRANCH)) |
| 311 | continue; |
| 312 | if (isTargetKnownInRange(isec: *isec, r)) |
| 313 | continue; |
| 314 | auto &thunkInfo = thunkMap[r]; |
| 315 | if (auto *thunk = getThunkInRange(isec: *isec, r, thunkInfo)) { |
| 316 | deferredBranchRedirects.emplace_back(Args&: isec, Args: &r, Args&: thunk); |
| 317 | continue; |
| 318 | } |
| 319 | branchesToProcess.emplace_back(args&: isec, args: &r); |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | llvm::erase_if(C&: branchesToProcess, P: [&](auto &pair) { |
| 324 | auto [callerIsec, r] = pair; |
| 325 | return isTargetKnownInRange(isec: *callerIsec, r: *r); |
| 326 | }); |
| 327 | // Count distinct unresolved branch targets that still lack an in-range thunk. |
| 328 | // We use this as an upper bound on the number of thunks we may still create |
| 329 | // when estimating where __stubs / __objc_stubs could end up. |
| 330 | DenseSet<ThunkKey, ThunkMapKeyInfo> branchTargets; |
| 331 | for (auto [callerIsec, r] : branchesToProcess) { |
| 332 | ThunkKey thunkKey(*r); |
| 333 | auto &thunkInfo = thunkMap[thunkKey]; |
| 334 | if (!getThunkInRange(isec: *callerIsec, r: *r, thunkInfo)) |
| 335 | branchTargets.insert(V: thunkKey); |
| 336 | } |
| 337 | |
| 338 | auto estimatedStubsEnd = estimateStubsEndVA(numPotentialThunks: branchTargets.size()); |
| 339 | for (auto [isec, r, thunk] : deferredBranchRedirects) { |
| 340 | if (isTargetKnownInRange(isec: *isec, r: *r)) |
| 341 | continue; |
| 342 | if (isTargetStubsAndInRange(isec: *isec, r: *r, estimatedStubsEnd)) |
| 343 | continue; |
| 344 | updateBranchTargetToThunk(r&: *r, thunk); |
| 345 | } |
| 346 | |
| 347 | for (auto [isec, r] : branchesToProcess) { |
| 348 | if (isTargetStubsAndInRange(isec: *isec, r: *r, estimatedStubsEnd)) |
| 349 | continue; |
| 350 | auto &thunkInfo = thunkMap[*r]; |
| 351 | if (auto *thunk = getThunkInRange(isec: *isec, r: *r, thunkInfo)) { |
| 352 | updateBranchTargetToThunk(r&: *r, thunk); |
| 353 | continue; |
| 354 | } |
| 355 | createThunk(isec: *isec, r&: *r, thunkInfo); |
| 356 | } |
| 357 | |
| 358 | if (!thunks.empty()) |
| 359 | log(msg: name + ": Created " + Twine(thunks.size()) + " (" + |
| 360 | Twine(thunks.size() * target->thunkSize / 1024) + |
| 361 | " KB) thunks and updated " + Twine(thunkCallCount) + " branch targets" ); |
| 362 | } |
| 363 | |
| 364 | void ConcatOutputSection::writeTo(uint8_t *buf) const { |
| 365 | for (ConcatInputSection *isec : inputs) |
| 366 | isec->writeTo(buf: buf + isec->outSecOff); |
| 367 | } |
| 368 | |
| 369 | void TextOutputSection::writeTo(uint8_t *buf) const { |
| 370 | // Merge input sections from thunk & ordinary vectors |
| 371 | size_t i = 0, ie = inputs.size(); |
| 372 | size_t t = 0, te = thunks.size(); |
| 373 | while (i < ie || t < te) { |
| 374 | while (i < ie && (t == te || inputs[i]->empty() || |
| 375 | inputs[i]->outSecOff < thunks[t]->outSecOff)) { |
| 376 | inputs[i]->writeTo(buf: buf + inputs[i]->outSecOff); |
| 377 | ++i; |
| 378 | } |
| 379 | while (t < te && (i == ie || thunks[t]->outSecOff < inputs[i]->outSecOff)) { |
| 380 | thunks[t]->writeTo(buf: buf + thunks[t]->outSecOff); |
| 381 | ++t; |
| 382 | } |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | void ConcatOutputSection::finalizeFlags(InputSection *input) { |
| 387 | switch (sectionType(flags: input->getFlags())) { |
| 388 | default /*type-unspec'ed*/: |
| 389 | // FIXME: Add additional logic here when supporting emitting obj files. |
| 390 | break; |
| 391 | case S_4BYTE_LITERALS: |
| 392 | case S_8BYTE_LITERALS: |
| 393 | case S_16BYTE_LITERALS: |
| 394 | case S_CSTRING_LITERALS: |
| 395 | case S_ZEROFILL: |
| 396 | case S_LAZY_SYMBOL_POINTERS: |
| 397 | case S_MOD_TERM_FUNC_POINTERS: |
| 398 | case S_THREAD_LOCAL_REGULAR: |
| 399 | case S_THREAD_LOCAL_ZEROFILL: |
| 400 | case S_THREAD_LOCAL_VARIABLES: |
| 401 | case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: |
| 402 | case S_THREAD_LOCAL_VARIABLE_POINTERS: |
| 403 | case S_NON_LAZY_SYMBOL_POINTERS: |
| 404 | case S_SYMBOL_STUBS: |
| 405 | flags |= input->getFlags(); |
| 406 | break; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | ConcatOutputSection * |
| 411 | ConcatOutputSection::getOrCreateForInput(const InputSection *isec) { |
| 412 | NamePair names = maybeRenameSection(key: {isec->getSegName(), isec->getName()}); |
| 413 | ConcatOutputSection *&osec = concatOutputSections[names]; |
| 414 | if (!osec) { |
| 415 | if (isec->getSegName() == segment_names::text && |
| 416 | isec->getName() != section_names::gccExceptTab && |
| 417 | isec->getName() != section_names::ehFrame) |
| 418 | osec = make<TextOutputSection>(args&: names.second); |
| 419 | else |
| 420 | osec = make<ConcatOutputSection>(args&: names.second); |
| 421 | } |
| 422 | return osec; |
| 423 | } |
| 424 | |
| 425 | NamePair macho::maybeRenameSection(NamePair key) { |
| 426 | auto newNames = config->sectionRenameMap.find(Val: key); |
| 427 | if (newNames != config->sectionRenameMap.end()) |
| 428 | return newNames->second; |
| 429 | return key; |
| 430 | } |
| 431 | |