1//===- ConcatOutputSection.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ConcatOutputSection.h"
10#include "Config.h"
11#include "OutputSegment.h"
12#include "SymbolTable.h"
13#include "Symbols.h"
14#include "SyntheticSections.h"
15#include "Target.h"
16#include "lld/Common/CommonLinkerContext.h"
17#include "llvm/BinaryFormat/MachO.h"
18#include <deque>
19
20using namespace llvm;
21using namespace llvm::MachO;
22using namespace lld;
23using namespace lld::macho;
24
25MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
26
27void ConcatOutputSection::addInput(ConcatInputSection *input) {
28 assert(input->parent == this);
29 if (inputs.empty()) {
30 align = input->align;
31 flags = input->getFlags();
32 } else {
33 align = std::max(a: align, b: input->align);
34 finalizeFlags(input);
35 }
36 inputs.push_back(x: input);
37}
38
39// Branch-range extension can be implemented in two ways, either through ...
40//
41// (1) Branch islands: Single branch instructions (also of limited range),
42// that might be chained in multiple hops to reach the desired
43// destination. On ARM64, as 16 branch islands are needed to hop between
44// opposite ends of a 2 GiB program. LD64 uses branch islands exclusively,
45// even when it needs excessive hops.
46//
47// (2) Thunks: Instruction(s) to load the destination address into a scratch
48// register, followed by a register-indirect branch. Thunks are
49// constructed to reach any arbitrary address, so need not be
50// chained. Although thunks need not be chained, a program might need
51// multiple thunks to the same destination distributed throughout a large
52// program so that all call sites can have one within range.
53//
54// The optimal approach is to mix islands for destinations within two hops,
55// and use thunks for destinations at greater distance. For now, we only
56// implement thunks. TODO: Adding support for branch islands!
57
58DenseMap<ThunkKey, ThunkInfo, ThunkMapKeyInfo> lld::macho::thunkMap;
59
60// Determine whether we need thunks, which depends on the target arch -- RISC
61// (i.e., ARM) generally does because it has limited-range branch/call
62// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
63// thunks for programs so large that branch source & destination addresses
64// might differ more than the range of branch instruction(s).
65bool TextOutputSection::needsThunks() const {
66 if (!target->usesThunks())
67 return false;
68 // FIXME: It is not enough to just estimate the size of this section. We
69 // should compute parent->needsThunks by estimating the size of all __text
70 // sections. See https://github.com/llvm/llvm-project/issues/195387
71 uint64_t isecAddr = addr;
72 for (ConcatInputSection *isec : inputs)
73 isecAddr = alignToPowerOf2(Value: isecAddr, Align: isec->align) + isec->getSize();
74 // Other sections besides __text might be small enough to pass this
75 // test but nevertheless need thunks for calling into other sections.
76 // An imperfect heuristic to use in this case is that if a section
77 // we've already processed in this segment needs thunks, so do the
78 // rest.
79 bool needsThunks = parent && parent->needsThunks;
80
81 // Calculate the total size of all branch target sections
82 uint64_t branchTargetsSize = in.stubs->getSize();
83
84 // Add the size of __objc_stubs section if it exists
85 if (in.objcStubs && in.objcStubs->isNeeded())
86 branchTargetsSize += in.objcStubs->getSize();
87
88 if (!needsThunks &&
89 isecAddr - addr + branchTargetsSize <=
90 std::min(a: target->backwardBranchRange, b: target->forwardBranchRange))
91 return false;
92 // Yes, this program is large enough to need thunks.
93 if (parent)
94 parent->needsThunks = true;
95 return true;
96}
97
98void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) {
99 size = alignToPowerOf2(Value: size, Align: isec->align);
100 fileSize = alignToPowerOf2(Value: fileSize, Align: isec->align);
101 isec->outSecOff = size;
102 isec->isFinal = true;
103 size += isec->getSize();
104 fileSize += isec->getFileSize();
105}
106
107void ConcatOutputSection::finalizeContents() {
108 for (ConcatInputSection *isec : inputs)
109 finalizeOne(isec);
110}
111
112bool TextOutputSection::isTargetKnownInRange(const ConcatInputSection &isec,
113 const Relocation &r) const {
114 uint64_t callVA = isec.getVA() + r.offset;
115 uint64_t lowVA = target->backwardBranchRange < callVA
116 ? callVA - target->backwardBranchRange
117 : 0;
118 uint64_t highVA = callVA + target->forwardBranchRange;
119 auto *funcSym = cast<Symbol *>(Val: r.referent);
120 uint64_t funcVA = resolveSymbolOffsetVA(sym: funcSym, type: r.type, offset: r.addend);
121 // Check if the referent is reachable with a simple call instruction.
122 return lowVA <= funcVA && funcVA <= highVA;
123}
124
125Defined *TextOutputSection::getThunkInRange(const ConcatInputSection &isec,
126 const Relocation &r,
127 const ThunkInfo &thunkInfo) const {
128 assert(!isTargetKnownInRange(isec, r));
129 if (!thunkInfo.sym)
130 return nullptr;
131 uint64_t callVA = isec.getVA() + r.offset;
132 uint64_t lowVA = target->backwardBranchRange < callVA
133 ? callVA - target->backwardBranchRange
134 : 0;
135 uint64_t highVA = callVA + target->forwardBranchRange;
136 uint64_t thunkVA = thunkInfo.isec->getVA();
137 if (lowVA <= thunkVA && thunkVA <= highVA)
138 return thunkInfo.sym;
139 return nullptr;
140}
141
142void TextOutputSection::updateBranchTargetToThunk(Relocation &r,
143 Defined *thunk) {
144 r.referent = thunk;
145 // The thunk itself bakes in the addend, so the call-site reloc must
146 // branch to the thunk start with no extra offset.
147 r.addend = 0;
148 ++thunkCallCount;
149}
150
151void TextOutputSection::createThunk(const ConcatInputSection &isec,
152 Relocation &r, ThunkInfo &thunkInfo) {
153 assert(getThunkInRange(isec, r, thunkInfo) == nullptr);
154 assert(isec.isFinal);
155 uint64_t highVA = isec.getVA() + r.offset + target->forwardBranchRange;
156 if (addr + size > highVA) {
157 // There were too many consecutive branch instructions for `slop`
158 // below. If you hit this: For the current algorithm, just bumping up
159 // slop below and trying again is probably simplest. (See also PR51578
160 // comment 5).
161 fatal(msg: Twine(__FUNCTION__) +
162 ": FIXME: thunk range overrun. Consider increasing the "
163 "slop-scale with `--slop-scale=<unsigned_int>`.");
164 }
165 thunkInfo.isec = makeSyntheticInputSection(segName: isec.getSegName(), sectName: isec.getName());
166 thunkInfo.isec->parent = this;
167 assert(thunkInfo.isec->live);
168
169 std::string addendSuffix;
170 if (r.addend != 0)
171 addendSuffix = "+" + std::to_string(val: r.addend);
172 size_t thunkSize = target->thunkSize;
173 auto *funcSym = cast<Symbol *>(Val&: r.referent);
174 StringRef thunkName =
175 saver().save(S: funcSym->getName() + addendSuffix + ".thunk." +
176 std::to_string(val: thunkInfo.sequence++));
177 if (!isa<Defined>(Val: funcSym) || cast<Defined>(Val: funcSym)->isExternal()) {
178 thunkInfo.sym = symtab->addDefined(
179 name: thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, size: thunkSize,
180 /*isWeakDef=*/false, /*isPrivateExtern=*/true,
181 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
182 /*isWeakDefCanBeHidden=*/false);
183 } else {
184 thunkInfo.sym = make<Defined>(
185 args&: thunkName, /*file=*/args: nullptr, args&: thunkInfo.isec, /*value=*/args: 0, args&: thunkSize,
186 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: true,
187 /*includeInSymtab=*/args: true, /*isReferencedDynamically=*/args: false,
188 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
189 }
190 thunkInfo.sym->used = true;
191 target->populateThunk(thunk: thunkInfo.isec, funcSym, addend: r.addend);
192 updateBranchTargetToThunk(r, thunk: thunkInfo.sym);
193 finalizeOne(isec: thunkInfo.isec);
194 thunks.push_back(x: thunkInfo.isec);
195}
196
197std::optional<uint64_t>
198TextOutputSection::estimateStubsEndVA(unsigned numPotentialThunks) const {
199 if (!parent)
200 return std::nullopt;
201
202 auto sections =
203 ArrayRef(parent->getSections())
204 .drop_until(Pred: [&](const OutputSection *osec) { return osec == this; });
205
206 // Walk backwards to find the last stubs section
207 while (!sections.empty()) {
208 auto *osec = sections.back();
209 if (osec->isNeeded() && (osec == in.stubs || osec == in.objcStubs))
210 break;
211 sections.consume_back();
212 }
213 if (sections.empty())
214 return std::nullopt;
215
216 assert(inputs.empty() || inputs.back()->isFinal);
217 uint64_t estimatedStubsEnd =
218 addr + size + numPotentialThunks * target->thunkSize;
219 for (auto *osec : sections) {
220 if (osec == this)
221 continue;
222 if (!osec->isNeeded())
223 continue;
224 // Check if we will emit any more sections before the last stubs section
225 if (osec != in.stubs && osec != in.stubHelper && osec != in.objcStubs)
226 return std::nullopt;
227 estimatedStubsEnd =
228 alignToPowerOf2(Value: estimatedStubsEnd, Align: osec->align) + osec->getSize();
229 }
230 return estimatedStubsEnd;
231}
232
233bool TextOutputSection::isTargetStubsAndInRange(
234 const ConcatInputSection &isec, const Relocation &r,
235 std::optional<uint64_t> estimatedStubsEnd) const {
236 if (!estimatedStubsEnd.has_value())
237 return false;
238 auto *funcSym = cast<Symbol *>(Val: r.referent);
239 if (!funcSym->isInStubs() && !(in.objcStubs && in.objcStubs->isNeeded() &&
240 ObjCStubsSection::isObjCStubSymbol(sym: funcSym)))
241 return false;
242 if (r.addend)
243 return false;
244 uint64_t highVA = isec.getVA() + r.offset + target->forwardBranchRange;
245 return *estimatedStubsEnd <= highVA;
246}
247
248void TextOutputSection::finalize() {
249 if (!needsThunks()) {
250 for (ConcatInputSection *isec : inputs)
251 finalizeOne(isec);
252 return;
253 }
254
255 // Branches whose target sections are out of range or have not yet been
256 // finalized. We may need to emit thunks for them.
257 std::deque<std::pair<ConcatInputSection *, Relocation *>> branchesToProcess;
258 // Branches whose targets have not yet be finalized, but a thunk for that
259 // target exists. We defer processing these branches because it's possible we
260 // can still direct call to their targets after they have all been finalized.
261 SmallVector<std::tuple<ConcatInputSection *, Relocation *, Defined *>>
262 deferredBranchRedirects;
263
264 const uint64_t slop = config->slopScale * target->thunkSize;
265 for (auto *isec : inputs) {
266 while (!branchesToProcess.empty()) {
267 auto [callerIsec, r] = branchesToProcess.front();
268 assert(callerIsec->isFinal);
269 auto &thunkInfo = thunkMap[*r];
270 if (isTargetKnownInRange(isec: *callerIsec, r: *r)) {
271 branchesToProcess.pop_front();
272 continue;
273 }
274 if (auto *thunk = getThunkInRange(isec: *callerIsec, r: *r, thunkInfo)) {
275 deferredBranchRedirects.emplace_back(Args&: callerIsec, Args&: r, Args&: thunk);
276 branchesToProcess.pop_front();
277 continue;
278 }
279 uint64_t highVA =
280 callerIsec->getVA() + r->offset + target->forwardBranchRange;
281 uint64_t nextEnd =
282 alignToPowerOf2(Value: addr + size, Align: isec->align) + isec->getSize();
283 // If we were to emit this section, would we have enough space for more
284 // thunks? If we do, then we can delay processing this thunk so we may
285 // finalize more potencial target sections. Otherwise we must emit thunks
286 // until we have enough space.
287 if (nextEnd + slop <= highVA)
288 break;
289
290 createThunk(isec: *callerIsec, r&: *r, thunkInfo);
291 branchesToProcess.pop_front();
292 }
293 finalizeOne(isec);
294
295 // TODO: Remove this check and the assert below. In fact, I don't believe
296 // the relocation iteration order matters for correctness.
297 bool hasCallsite = llvm::any_of(Range&: isec->relocs, P: [](Relocation &r) {
298 return target->hasAttr(type: r.type, bit: RelocAttrBits::BRANCH);
299 });
300 if (!hasCallsite)
301 continue;
302
303 // Process relocs by ascending address, i.e., ascending offset within isec
304 // FIXME: This property does not hold for object files produced by ld64's
305 // `-r` mode.
306 assert(is_sorted(isec->relocs, [](Relocation &a, Relocation &b) {
307 return a.offset > b.offset;
308 }));
309 for (Relocation &r : reverse(C&: isec->relocs)) {
310 if (!target->hasAttr(type: r.type, bit: RelocAttrBits::BRANCH))
311 continue;
312 if (isTargetKnownInRange(isec: *isec, r))
313 continue;
314 auto &thunkInfo = thunkMap[r];
315 if (auto *thunk = getThunkInRange(isec: *isec, r, thunkInfo)) {
316 deferredBranchRedirects.emplace_back(Args&: isec, Args: &r, Args&: thunk);
317 continue;
318 }
319 branchesToProcess.emplace_back(args&: isec, args: &r);
320 }
321 }
322
323 llvm::erase_if(C&: branchesToProcess, P: [&](auto &pair) {
324 auto [callerIsec, r] = pair;
325 return isTargetKnownInRange(isec: *callerIsec, r: *r);
326 });
327 // Count distinct unresolved branch targets that still lack an in-range thunk.
328 // We use this as an upper bound on the number of thunks we may still create
329 // when estimating where __stubs / __objc_stubs could end up.
330 DenseSet<ThunkKey, ThunkMapKeyInfo> branchTargets;
331 for (auto [callerIsec, r] : branchesToProcess) {
332 ThunkKey thunkKey(*r);
333 auto &thunkInfo = thunkMap[thunkKey];
334 if (!getThunkInRange(isec: *callerIsec, r: *r, thunkInfo))
335 branchTargets.insert(V: thunkKey);
336 }
337
338 auto estimatedStubsEnd = estimateStubsEndVA(numPotentialThunks: branchTargets.size());
339 for (auto [isec, r, thunk] : deferredBranchRedirects) {
340 if (isTargetKnownInRange(isec: *isec, r: *r))
341 continue;
342 if (isTargetStubsAndInRange(isec: *isec, r: *r, estimatedStubsEnd))
343 continue;
344 updateBranchTargetToThunk(r&: *r, thunk);
345 }
346
347 for (auto [isec, r] : branchesToProcess) {
348 if (isTargetStubsAndInRange(isec: *isec, r: *r, estimatedStubsEnd))
349 continue;
350 auto &thunkInfo = thunkMap[*r];
351 if (auto *thunk = getThunkInRange(isec: *isec, r: *r, thunkInfo)) {
352 updateBranchTargetToThunk(r&: *r, thunk);
353 continue;
354 }
355 createThunk(isec: *isec, r&: *r, thunkInfo);
356 }
357
358 if (!thunks.empty())
359 log(msg: name + ": Created " + Twine(thunks.size()) + " (" +
360 Twine(thunks.size() * target->thunkSize / 1024) +
361 " KB) thunks and updated " + Twine(thunkCallCount) + " branch targets");
362}
363
364void ConcatOutputSection::writeTo(uint8_t *buf) const {
365 for (ConcatInputSection *isec : inputs)
366 isec->writeTo(buf: buf + isec->outSecOff);
367}
368
369void TextOutputSection::writeTo(uint8_t *buf) const {
370 // Merge input sections from thunk & ordinary vectors
371 size_t i = 0, ie = inputs.size();
372 size_t t = 0, te = thunks.size();
373 while (i < ie || t < te) {
374 while (i < ie && (t == te || inputs[i]->empty() ||
375 inputs[i]->outSecOff < thunks[t]->outSecOff)) {
376 inputs[i]->writeTo(buf: buf + inputs[i]->outSecOff);
377 ++i;
378 }
379 while (t < te && (i == ie || thunks[t]->outSecOff < inputs[i]->outSecOff)) {
380 thunks[t]->writeTo(buf: buf + thunks[t]->outSecOff);
381 ++t;
382 }
383 }
384}
385
386void ConcatOutputSection::finalizeFlags(InputSection *input) {
387 switch (sectionType(flags: input->getFlags())) {
388 default /*type-unspec'ed*/:
389 // FIXME: Add additional logic here when supporting emitting obj files.
390 break;
391 case S_4BYTE_LITERALS:
392 case S_8BYTE_LITERALS:
393 case S_16BYTE_LITERALS:
394 case S_CSTRING_LITERALS:
395 case S_ZEROFILL:
396 case S_LAZY_SYMBOL_POINTERS:
397 case S_MOD_TERM_FUNC_POINTERS:
398 case S_THREAD_LOCAL_REGULAR:
399 case S_THREAD_LOCAL_ZEROFILL:
400 case S_THREAD_LOCAL_VARIABLES:
401 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
402 case S_THREAD_LOCAL_VARIABLE_POINTERS:
403 case S_NON_LAZY_SYMBOL_POINTERS:
404 case S_SYMBOL_STUBS:
405 flags |= input->getFlags();
406 break;
407 }
408}
409
410ConcatOutputSection *
411ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
412 NamePair names = maybeRenameSection(key: {isec->getSegName(), isec->getName()});
413 ConcatOutputSection *&osec = concatOutputSections[names];
414 if (!osec) {
415 if (isec->getSegName() == segment_names::text &&
416 isec->getName() != section_names::gccExceptTab &&
417 isec->getName() != section_names::ehFrame)
418 osec = make<TextOutputSection>(args&: names.second);
419 else
420 osec = make<ConcatOutputSection>(args&: names.second);
421 }
422 return osec;
423}
424
425NamePair macho::maybeRenameSection(NamePair key) {
426 auto newNames = config->sectionRenameMap.find(Val: key);
427 if (newNames != config->sectionRenameMap.end())
428 return newNames->second;
429 return key;
430}
431