1//===- InputSection.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputSection.h"
10#include "ConcatOutputSection.h"
11#include "Config.h"
12#include "InputFiles.h"
13#include "OutputSegment.h"
14#include "Sections.h"
15#include "Symbols.h"
16#include "SyntheticSections.h"
17#include "Target.h"
18#include "Writer.h"
19
20#include "lld/Common/ErrorHandler.h"
21#include "lld/Common/Memory.h"
22#include "llvm/Support/xxhash.h"
23
24using namespace llvm;
25using namespace llvm::MachO;
26using namespace llvm::support;
27using namespace lld;
28using namespace lld::macho;
29
30// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
31// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
32// so account for that.
33static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) ==
34 sizeof(std::vector<Relocation>) + 88,
35 "Try to minimize ConcatInputSection's size, we create many "
36 "instances of it");
37
38std::vector<ConcatInputSection *> macho::inputSections;
39int macho::inputSectionsOrder = 0;
40
41// Call this function to add a new InputSection and have it routed to the
42// appropriate container. Depending on its type and current config, it will
43// either be added to 'inputSections' vector or to a synthetic section.
44void lld::macho::addInputSection(InputSection *inputSection) {
45 if (auto *isec = dyn_cast<ConcatInputSection>(Val: inputSection)) {
46 if (isec->isCoalescedWeak())
47 return;
48 if (config->emitRelativeMethodLists &&
49 ObjCMethListSection::isMethodList(isec)) {
50 if (in.objcMethList->inputOrder == UnspecifiedInputOrder)
51 in.objcMethList->inputOrder = inputSectionsOrder++;
52 in.objcMethList->addInput(isec);
53 isec->parent = in.objcMethList;
54 return;
55 }
56 if (config->emitInitOffsets &&
57 sectionType(flags: isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
58 in.initOffsets->addInput(isec);
59 return;
60 }
61 isec->outSecOff = inputSectionsOrder++;
62 auto *osec = ConcatOutputSection::getOrCreateForInput(isec);
63 isec->parent = osec;
64 inputSections.push_back(x: isec);
65 } else if (auto *isec = dyn_cast<CStringInputSection>(Val: inputSection)) {
66 bool useSectionName = config->separateCstringLiteralSections ||
67 isec->getName() == section_names::objcMethname;
68 auto *osec = in.getOrCreateCStringSection(
69 name: useSectionName ? isec->getName() : section_names::cString);
70 if (osec->inputOrder == UnspecifiedInputOrder)
71 osec->inputOrder = inputSectionsOrder++;
72 osec->addInput(isec);
73 } else if (auto *isec = dyn_cast<WordLiteralInputSection>(Val: inputSection)) {
74 if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
75 in.wordLiteralSection->inputOrder = inputSectionsOrder++;
76 in.wordLiteralSection->addInput(isec);
77 } else {
78 llvm_unreachable("unexpected input section kind");
79 }
80
81 assert(inputSectionsOrder <= UnspecifiedInputOrder);
82}
83
84uint64_t InputSection::getFileSize() const {
85 return isZeroFill(flags: getFlags()) ? 0 : getSize();
86}
87
88uint64_t InputSection::getVA(uint64_t off) const {
89 return parent->addr + getOffset(off);
90}
91
92static uint64_t resolveSymbolOffsetVA(const Symbol *sym, uint8_t type,
93 int64_t offset) {
94 const RelocAttrs &relocAttrs = target->getRelocAttrs(type);
95 uint64_t symVA;
96 if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) {
97 // For branch relocations with non-zero offsets, use the actual function
98 // address rather than the stub address. Branching to an interior point
99 // of a function (e.g., _func+16) implies reliance on the original
100 // function's layout, which an interposed replacement wouldn't preserve.
101 // There's no meaningful way to "interpose" an interior offset.
102 symVA = (offset != 0) ? sym->getVA() : sym->resolveBranchVA();
103 } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) {
104 symVA = sym->resolveGotVA();
105 } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) {
106 symVA = sym->resolveTlvVA();
107 } else {
108 symVA = sym->getVA();
109 }
110 return symVA + offset;
111}
112
113const Defined *InputSection::getContainingSymbol(uint64_t off) const {
114 auto *nextSym = llvm::upper_bound(
115 Range: symbols, Value&: off, C: [](uint64_t a, const Defined *b) { return a < b->value; });
116 if (nextSym == symbols.begin())
117 return nullptr;
118 return *std::prev(x: nextSym);
119}
120
121std::string InputSection::getLocation(uint64_t off) const {
122 // First, try to find a symbol that's near the offset. Use it as a reference
123 // point.
124 if (auto *sym = getContainingSymbol(off))
125 return (toString(file: getFile()) + ":(symbol " + toString(*sym) + "+0x" +
126 Twine::utohexstr(Val: off - sym->value) + ")")
127 .str();
128
129 // If that fails, use the section itself as a reference point.
130 for (const Subsection &subsec : section.subsections) {
131 if (subsec.isec == this) {
132 off += subsec.offset;
133 break;
134 }
135 }
136
137 return (toString(file: getFile()) + ":(" + getName() + "+0x" +
138 Twine::utohexstr(Val: off) + ")")
139 .str();
140}
141
142std::string InputSection::getSourceLocation(uint64_t off) const {
143 auto *obj = dyn_cast_or_null<ObjFile>(Val: getFile());
144 if (!obj)
145 return {};
146
147 DWARFCache *dwarf = obj->getDwarf();
148 if (!dwarf)
149 return std::string();
150
151 for (const Subsection &subsec : section.subsections) {
152 if (subsec.isec == this) {
153 off += subsec.offset;
154 break;
155 }
156 }
157
158 auto createMsg = [&](StringRef path, unsigned line) {
159 std::string filename = sys::path::filename(path).str();
160 std::string lineStr = (":" + Twine(line)).str();
161 if (filename == path)
162 return filename + lineStr;
163 return (filename + lineStr + " (" + path + lineStr + ")").str();
164 };
165
166 // First, look up a function for a given offset.
167 if (std::optional<DILineInfo> li = dwarf->getDILineInfo(
168 offset: section.addr + off, sectionIndex: object::SectionedAddress::UndefSection))
169 return createMsg(li->FileName, li->Line);
170
171 // If it failed, look up again as a variable.
172 if (const Defined *sym = getContainingSymbol(off)) {
173 // Symbols are generally prefixed with an underscore, which is not included
174 // in the debug information.
175 StringRef symName = sym->getName();
176 symName.consume_front(Prefix: "_");
177
178 if (std::optional<std::pair<std::string, unsigned>> fileLine =
179 dwarf->getVariableLoc(name: symName))
180 return createMsg(fileLine->first, fileLine->second);
181 }
182
183 // Try to get the source file's name from the DWARF information.
184 if (obj->compileUnit)
185 return obj->sourceFile();
186
187 return {};
188}
189
190const Relocation *InputSection::getRelocAt(uint32_t off) const {
191 auto it = llvm::find_if(Range: relocs,
192 P: [=](const Relocation &r) { return r.offset == off; });
193 if (it == relocs.end())
194 return nullptr;
195 return &*it;
196}
197
198void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
199 Symbol::ICFFoldKind foldKind) {
200 align = std::max(a: align, b: copy->align);
201 copy->live = false;
202 copy->wasCoalesced = true;
203 copy->replacement = this;
204 for (auto &copySym : copy->symbols)
205 copySym->identicalCodeFoldingKind = foldKind;
206
207 symbols.insert(I: symbols.end(), From: copy->symbols.begin(), To: copy->symbols.end());
208 copy->symbols.clear();
209
210 // Remove duplicate compact unwind info for symbols at the same address.
211 if (symbols.empty())
212 return;
213 for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
214 assert((*it)->value == 0);
215 (*it)->originalUnwindEntry = nullptr;
216 }
217}
218
219void ConcatInputSection::writeTo(uint8_t *buf) {
220 assert(!shouldOmitFromOutput());
221
222 if (getFileSize() == 0)
223 return;
224
225 memcpy(dest: buf, src: data.data(), n: data.size());
226
227 for (size_t i = 0; i < relocs.size(); i++) {
228 const Relocation &r = relocs[i];
229 uint8_t *loc = buf + r.offset;
230 uint64_t referentVA = 0;
231
232 const bool needsFixup = config->emitChainedFixups &&
233 target->hasAttr(type: r.type, bit: RelocAttrBits::UNSIGNED);
234 if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) {
235 const Symbol *fromSym = cast<Symbol *>(Val: r.referent);
236 const Relocation &minuend = relocs[++i];
237 uint64_t minuendVA;
238 if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>())
239 minuendVA = toSym->getVA() + minuend.addend;
240 else {
241 auto *referentIsec = cast<InputSection *>(Val: minuend.referent);
242 assert(!::shouldOmitFromOutput(referentIsec));
243 minuendVA = referentIsec->getVA(off: minuend.addend);
244 }
245 referentVA = minuendVA - fromSym->getVA();
246 } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
247 if (target->hasAttr(type: r.type, bit: RelocAttrBits::LOAD) &&
248 !referentSym->isInGot())
249 target->relaxGotLoad(loc, type: r.type);
250 // For dtrace symbols, do not handle them as normal undefined symbols
251 if (referentSym->getName().starts_with(Prefix: "___dtrace_")) {
252 // Change dtrace call site to pre-defined instructions
253 target->handleDtraceReloc(sym: referentSym, r, loc);
254 continue;
255 }
256 referentVA = resolveSymbolOffsetVA(sym: referentSym, type: r.type, offset: r.addend);
257
258 if (isThreadLocalVariables(flags: getFlags()) && isa<Defined>(Val: referentSym)) {
259 // References from thread-local variable sections are treated as offsets
260 // relative to the start of the thread-local data memory area, which
261 // is initialized via copying all the TLV data sections (which are all
262 // contiguous).
263 referentVA -= firstTLVDataSection->addr;
264 } else if (needsFixup) {
265 writeChainedFixup(buf: loc, sym: referentSym, addend: r.addend);
266 continue;
267 }
268 } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
269 assert(!::shouldOmitFromOutput(referentIsec));
270 referentVA = referentIsec->getVA(off: r.addend);
271
272 if (needsFixup) {
273 writeChainedRebase(buf: loc, targetVA: referentVA);
274 continue;
275 }
276 }
277 target->relocateOne(loc, r, va: referentVA, relocVA: getVA() + r.offset);
278 }
279}
280
281ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,
282 StringRef sectName,
283 uint32_t flags,
284 ArrayRef<uint8_t> data,
285 uint32_t align) {
286 Section &section =
287 *make<Section>(/*file=*/args: nullptr, args&: segName, args&: sectName, args&: flags, /*addr=*/args: 0);
288 auto isec = make<ConcatInputSection>(args&: section, args&: data, args&: align);
289 // Since this is an explicitly created 'fake' input section,
290 // it should not be dead stripped.
291 isec->live = true;
292 section.subsections.push_back(x: {.offset: 0, .isec: isec});
293 return isec;
294}
295
296void CStringInputSection::splitIntoPieces() {
297 size_t off = 0;
298 StringRef s = toStringRef(Input: data);
299 while (!s.empty()) {
300 size_t end = s.find(C: 0);
301 if (end == StringRef::npos)
302 fatal(msg: getLocation(off) + ": string is not null terminated");
303 uint32_t hash = deduplicateLiterals ? xxh3_64bits(data: s.take_front(N: end)) : 0;
304 pieces.emplace_back(args&: off, args&: hash);
305 size_t size = end + 1; // include null terminator
306 s = s.substr(Start: size);
307 off += size;
308 }
309}
310
311StringPiece &CStringInputSection::getStringPiece(uint64_t off) {
312 if (off >= data.size())
313 fatal(msg: toString(this) + ": offset is outside the section");
314
315 auto it =
316 partition_point(Range&: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; });
317 return it[-1];
318}
319
320const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {
321 return const_cast<CStringInputSection *>(this)->getStringPiece(off);
322}
323
324size_t CStringInputSection::getStringPieceIndex(uint64_t off) const {
325 if (off >= data.size())
326 fatal(msg: toString(this) + ": offset is outside the section");
327
328 auto it =
329 partition_point(Range: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; });
330 return std::distance(first: pieces.begin(), last: it) - 1;
331}
332
333uint64_t CStringInputSection::getOffset(uint64_t off) const {
334 const StringPiece &piece = getStringPiece(off);
335 uint64_t addend = off - piece.inSecOff;
336 return piece.outSecOff + addend;
337}
338
339WordLiteralInputSection::WordLiteralInputSection(const Section &section,
340 ArrayRef<uint8_t> data,
341 uint32_t align)
342 : InputSection(WordLiteralKind, section, data, align) {
343 switch (sectionType(flags: getFlags())) {
344 case S_4BYTE_LITERALS:
345 power2LiteralSize = 2;
346 break;
347 case S_8BYTE_LITERALS:
348 power2LiteralSize = 3;
349 break;
350 case S_16BYTE_LITERALS:
351 power2LiteralSize = 4;
352 break;
353 default:
354 llvm_unreachable("invalid literal section type");
355 }
356
357 live.resize(N: data.size() >> power2LiteralSize, t: !config->deadStrip);
358}
359
360uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
361 if (off >= data.size())
362 fatal(msg: toString(this) + ": offset is outside the section");
363
364 auto *osec = cast<WordLiteralSection>(Val: parent);
365 const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data());
366 switch (sectionType(flags: getFlags())) {
367 case S_4BYTE_LITERALS:
368 return osec->getLiteral4Offset(buf: buf + (off & ~3LLU)) | (off & 3);
369 case S_8BYTE_LITERALS:
370 return osec->getLiteral8Offset(buf: buf + (off & ~7LLU)) | (off & 7);
371 case S_16BYTE_LITERALS:
372 return osec->getLiteral16Offset(buf: buf + (off & ~15LLU)) | (off & 15);
373 default:
374 llvm_unreachable("invalid literal section type");
375 }
376}
377
378bool macho::isCodeSection(const InputSection *isec) {
379 return sections::isCodeSection(name: isec->getName(), segName: isec->getSegName(),
380 flags: isec->getFlags());
381}
382
383bool macho::isCfStringSection(const InputSection *isec) {
384 return isec->getName() == section_names::cfString &&
385 isec->getSegName() == segment_names::data;
386}
387
388bool macho::isClassRefsSection(const InputSection *isec) {
389 return isec->getName() == section_names::objcClassRefs &&
390 isec->getSegName() == segment_names::data;
391}
392
393bool macho::isSelRefsSection(const InputSection *isec) {
394 return isec->getName() == section_names::objcSelrefs &&
395 isec->getSegName() == segment_names::data;
396}
397
398bool macho::isEhFrameSection(const InputSection *isec) {
399 return isec->getName() == section_names::ehFrame &&
400 isec->getSegName() == segment_names::text;
401}
402
403bool macho::isGccExceptTabSection(const InputSection *isec) {
404 return isec->getName() == section_names::gccExceptTab &&
405 isec->getSegName() == segment_names::text;
406}
407
408std::string lld::toString(const InputSection *isec) {
409 return (toString(file: isec->getFile()) + ":(" + isec->getName() + ")").str();
410}
411