1//===- InputSection.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_INPUT_SECTION_H
10#define LLD_MACHO_INPUT_SECTION_H
11
12#include "Config.h"
13#include "Relocations.h"
14#include "Symbols.h"
15
16#include "lld/Common/LLVM.h"
17#include "lld/Common/Memory.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/CachedHashString.h"
21#include "llvm/ADT/TinyPtrVector.h"
22#include "llvm/BinaryFormat/MachO.h"
23
24namespace lld {
25namespace macho {
26
27class InputFile;
28class OutputSection;
29
30class InputSection {
31public:
32 enum Kind : uint8_t {
33 ConcatKind,
34 CStringLiteralKind,
35 WordLiteralKind,
36 };
37
38 Kind kind() const { return sectionKind; }
39 virtual ~InputSection() = default;
40 virtual uint64_t getSize() const { return data.size(); }
41 virtual bool empty() const { return data.empty(); }
42 InputFile *getFile() const { return section.file; }
43 StringRef getName() const { return section.name; }
44 StringRef getSegName() const { return section.segname; }
45 uint32_t getFlags() const { return section.flags; }
46 uint64_t getFileSize() const;
47 // Translates \p off -- an offset relative to this InputSection -- into an
48 // offset from the beginning of its parent OutputSection.
49 virtual uint64_t getOffset(uint64_t off) const = 0;
50 // The offset from the beginning of the file.
51 uint64_t getVA(uint64_t off) const;
52 // Return a user-friendly string for use in diagnostics.
53 // Format: /path/to/object.o:(symbol _func+0x123)
54 std::string getLocation(uint64_t off) const;
55 // Return the source line corresponding to an address, or the empty string.
56 // Format: Source.cpp:123 (/path/to/Source.cpp:123)
57 std::string getSourceLocation(uint64_t off) const;
58 // Return the relocation at \p off, if it exists. This does a linear search.
59 const Relocation *getRelocAt(uint32_t off) const;
60 // Whether the data at \p off in this InputSection is live.
61 virtual bool isLive(uint64_t off) const = 0;
62 virtual void markLive(uint64_t off) = 0;
63 virtual InputSection *canonical() { return this; }
64 virtual const InputSection *canonical() const { return this; }
65
66protected:
67 InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
68 uint32_t align)
69 : sectionKind(kind), keepUnique(false), hasAltEntry(false), isCold(false),
70 align(align), data(data), section(section) {}
71
72 InputSection(const InputSection &rhs)
73 : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
74 isCold(rhs.isCold), align(rhs.align), data(rhs.data),
75 section(rhs.section) {}
76
77 Kind sectionKind;
78
79public:
80 // is address assigned?
81 bool isFinal = false;
82 // keep the address of the symbol(s) in this section unique in the final
83 // binary ?
84 bool keepUnique : 1;
85 // Does this section have symbols at offsets other than zero? (NOTE: only
86 // applies to ConcatInputSections.)
87 bool hasAltEntry : 1;
88 // Is this considered cold? Computed before ICF. Currently reflects whether
89 // any symbol in the section has the N_COLD_FUNC nlist flag set. Cold
90 // sections are placed at the end of their containing output section to
91 // improve locality of non-cold input sections. When a section is given an
92 // explicit priority (via order file, --bp-startup-sort, or
93 // --bp-compression-sort), this flag is unset so that the priority-based
94 // ordering takes precedence over cold partitioning.
95 bool isCold : 1;
96 uint32_t align = 1;
97
98 OutputSection *parent = nullptr;
99 ArrayRef<uint8_t> data;
100 std::vector<Relocation> relocs;
101 // The symbols that belong to this InputSection, sorted by value. With
102 // .subsections_via_symbols, there is typically only one element here.
103 llvm::TinyPtrVector<Defined *> symbols;
104
105 const Section &section;
106
107protected:
108 const Defined *getContainingSymbol(uint64_t off) const;
109};
110
111// ConcatInputSections are combined into (Concat)OutputSections through simple
112// concatenation, in contrast with literal sections which may have their
113// contents merged before output.
114class ConcatInputSection final : public InputSection {
115public:
116 ConcatInputSection(const Section &section, ArrayRef<uint8_t> data,
117 uint32_t align = 1)
118 : InputSection(ConcatKind, section, data, align) {}
119
120 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
121 uint64_t getVA() const { return InputSection::getVA(off: 0); }
122 // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
123 bool isLive(uint64_t off) const override { return live; }
124 void markLive(uint64_t off) override { live = true; }
125 bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
126 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
127 void writeTo(uint8_t *buf);
128
129 void foldIdentical(ConcatInputSection *redundant,
130 Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
131 ConcatInputSection *canonical() override {
132 return replacement ? replacement : this;
133 }
134 const InputSection *canonical() const override {
135 return replacement ? replacement : this;
136 }
137
138 static bool classof(const InputSection *isec) {
139 return isec->kind() == ConcatKind;
140 }
141
142 // Points to the surviving section after this one is folded by ICF
143 ConcatInputSection *replacement = nullptr;
144 // Equivalence-class ID for ICF
145 uint32_t icfEqClass[2] = {0, 0};
146
147 // With subsections_via_symbols, most symbols have their own InputSection,
148 // and for weak symbols (e.g. from inline functions), only the
149 // InputSection from one translation unit will make it to the output,
150 // while all copies in other translation units are coalesced into the
151 // first and not copied to the output.
152 bool wasCoalesced = false;
153 bool live = !config->deadStrip;
154 // This variable has two usages. Initially, it represents the input order.
155 // After assignAddresses is called, it represents the offset from the
156 // beginning of the output section this section was assigned to.
157 uint64_t outSecOff = 0;
158};
159
160// Initialize a fake InputSection that does not belong to any InputFile.
161// The created ConcatInputSection will always have 'live=true'
162ConcatInputSection *makeSyntheticInputSection(StringRef segName,
163 StringRef sectName,
164 uint32_t flags = 0,
165 ArrayRef<uint8_t> data = {},
166 uint32_t align = 1);
167
168// Helper functions to make it easy to sprinkle asserts.
169
170inline bool shouldOmitFromOutput(InputSection *isec) {
171 return isa<ConcatInputSection>(Val: isec) &&
172 cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput();
173}
174
175inline bool isCoalescedWeak(InputSection *isec) {
176 return isa<ConcatInputSection>(Val: isec) &&
177 cast<ConcatInputSection>(Val: isec)->isCoalescedWeak();
178}
179
180// We allocate a lot of these and binary search on them, so they should be as
181// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
182struct StringPiece {
183 // Offset from the start of the containing input section.
184 uint32_t inSecOff;
185 uint32_t live : 1;
186 // Only set if deduplicating literals
187 uint32_t hash : 31;
188 // Offset from the start of the containing output section.
189 uint64_t outSecOff = 0;
190
191 StringPiece(uint64_t off, uint32_t hash)
192 : inSecOff(off), live(!config->deadStrip), hash(hash) {}
193};
194
195static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
196
197// CStringInputSections are composed of multiple null-terminated string
198// literals, which we represent using StringPieces. These literals can be
199// deduplicated and tail-merged, so translating offsets between the input and
200// outputs sections is more complicated.
201//
202// NOTE: One significant difference between LLD and ld64 is that we merge all
203// cstring literals, even those referenced directly by non-private symbols.
204// ld64 is more conservative and does not do that. This was mostly done for
205// implementation simplicity; if we find programs that need the more
206// conservative behavior we can certainly implement that.
207class CStringInputSection final : public InputSection {
208public:
209 CStringInputSection(const Section &section, ArrayRef<uint8_t> data,
210 uint32_t align, bool dedupLiterals)
211 : InputSection(CStringLiteralKind, section, data, align),
212 deduplicateLiterals(dedupLiterals) {}
213
214 uint64_t getOffset(uint64_t off) const override;
215 bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
216 void markLive(uint64_t off) override { getStringPiece(off).live = true; }
217 // Find the StringPiece that contains this offset.
218 StringPiece &getStringPiece(uint64_t off);
219 const StringPiece &getStringPiece(uint64_t off) const;
220 // Split at each null byte.
221 void splitIntoPieces();
222
223 LLVM_ATTRIBUTE_ALWAYS_INLINE
224 StringRef getStringRef(size_t i) const {
225 size_t begin = pieces[i].inSecOff;
226 // The endpoint should be *at* the null terminator, not after. This matches
227 // the behavior of StringRef(const char *Str).
228 size_t end =
229 ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
230 return toStringRef(Input: data.slice(N: begin, M: end - begin));
231 }
232
233 StringRef getStringRefAtOffset(uint64_t off) const {
234 return getStringRef(i: getStringPieceIndex(off));
235 }
236
237 // Returns i'th piece as a CachedHashStringRef. This function is very hot when
238 // string merging is enabled, so we want to inline.
239 LLVM_ATTRIBUTE_ALWAYS_INLINE
240 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
241 assert(deduplicateLiterals);
242 return {getStringRef(i), pieces[i].hash};
243 }
244
245 static bool classof(const InputSection *isec) {
246 return isec->kind() == CStringLiteralKind;
247 }
248
249 bool deduplicateLiterals = false;
250 std::vector<StringPiece> pieces;
251
252private:
253 size_t getStringPieceIndex(uint64_t off) const;
254};
255
256class WordLiteralInputSection final : public InputSection {
257public:
258 WordLiteralInputSection(const Section &section, ArrayRef<uint8_t> data,
259 uint32_t align);
260 uint64_t getOffset(uint64_t off) const override;
261 bool isLive(uint64_t off) const override {
262 return live[off >> power2LiteralSize];
263 }
264 void markLive(uint64_t off) override {
265 live[off >> power2LiteralSize] = true;
266 }
267
268 static bool classof(const InputSection *isec) {
269 return isec->kind() == WordLiteralKind;
270 }
271
272private:
273 unsigned power2LiteralSize;
274 // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
275 llvm::BitVector live;
276};
277
278inline uint8_t sectionType(uint32_t flags) {
279 return flags & llvm::MachO::SECTION_TYPE;
280}
281
282inline bool isZeroFill(uint32_t flags) {
283 return llvm::MachO::isVirtualSection(type: sectionType(flags));
284}
285
286inline bool isThreadLocalVariables(uint32_t flags) {
287 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
288}
289
290// These sections contain the data for initializing thread-local variables.
291inline bool isThreadLocalData(uint32_t flags) {
292 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
293 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
294}
295
296inline bool isDebugSection(uint32_t flags) {
297 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
298 llvm::MachO::S_ATTR_DEBUG;
299}
300
301inline bool isWordLiteralSection(uint32_t flags) {
302 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
303 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
304 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
305}
306
307bool isCodeSection(const InputSection *);
308bool isCfStringSection(const InputSection *);
309bool isClassRefsSection(const InputSection *);
310bool isSelRefsSection(const InputSection *);
311bool isEhFrameSection(const InputSection *);
312bool isGccExceptTabSection(const InputSection *);
313
314extern std::vector<ConcatInputSection *> inputSections;
315// This is used as a counter for specyfing input order for input sections
316extern int inputSectionsOrder;
317
318namespace section_names {
319
320constexpr const char authGot[] = "__auth_got";
321constexpr const char authPtr[] = "__auth_ptr";
322constexpr const char binding[] = "__binding";
323constexpr const char bitcodeBundle[] = "__bundle";
324constexpr const char cString[] = "__cstring";
325constexpr const char cfString[] = "__cfstring";
326constexpr const char cgProfile[] = "__cg_profile";
327constexpr const char chainFixups[] = "__chainfixups";
328constexpr const char codeSignature[] = "__code_signature";
329constexpr const char common[] = "__common";
330constexpr const char compactUnwind[] = "__compact_unwind";
331constexpr const char data[] = "__data";
332constexpr const char debugAbbrev[] = "__debug_abbrev";
333constexpr const char debugInfo[] = "__debug_info";
334constexpr const char debugLine[] = "__debug_line";
335constexpr const char debugStr[] = "__debug_str";
336constexpr const char debugStrOffs[] = "__debug_str_offs";
337constexpr const char ehFrame[] = "__eh_frame";
338constexpr const char gccExceptTab[] = "__gcc_except_tab";
339constexpr const char export_[] = "__export";
340constexpr const char dataInCode[] = "__data_in_code";
341constexpr const char functionStarts[] = "__func_starts";
342constexpr const char got[] = "__got";
343constexpr const char header[] = "__mach_header";
344constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
345constexpr const char initOffsets[] = "__init_offsets";
346constexpr const char const_[] = "__const";
347constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
348constexpr const char lazyBinding[] = "__lazy_binding";
349constexpr const char literals[] = "__literals";
350constexpr const char functionMap[] = "__llvm_merge";
351constexpr const char moduleInitFunc[] = "__mod_init_func";
352constexpr const char moduleTermFunc[] = "__mod_term_func";
353constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
354constexpr const char objcCatList[] = "__objc_catlist";
355constexpr const char objcClassList[] = "__objc_classlist";
356constexpr const char objcMethList[] = "__objc_methlist";
357constexpr const char objcClassRefs[] = "__objc_classrefs";
358constexpr const char objcConst[] = "__objc_const";
359constexpr const char objCImageInfo[] = "__objc_imageinfo";
360constexpr const char objcStubs[] = "__objc_stubs";
361constexpr const char objcSelrefs[] = "__objc_selrefs";
362constexpr const char objcMethname[] = "__objc_methname";
363constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
364constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
365constexpr const char objcProtoList[] = "__objc_protolist";
366constexpr const char outlinedHashTree[] = "__llvm_outline";
367constexpr const char pageZero[] = "__pagezero";
368constexpr const char pointers[] = "__pointers";
369constexpr const char rebase[] = "__rebase";
370constexpr const char staticInit[] = "__StaticInit";
371constexpr const char stringTable[] = "__string_table";
372constexpr const char stubHelper[] = "__stub_helper";
373constexpr const char stubs[] = "__stubs";
374constexpr const char swift[] = "__swift";
375constexpr const char symbolTable[] = "__symbol_table";
376constexpr const char textCoalNt[] = "__textcoal_nt";
377constexpr const char text[] = "__text";
378constexpr const char threadPtrs[] = "__thread_ptrs";
379constexpr const char threadVars[] = "__thread_vars";
380constexpr const char unwindInfo[] = "__unwind_info";
381constexpr const char weakBinding[] = "__weak_binding";
382constexpr const char zeroFill[] = "__zerofill";
383constexpr const char addrSig[] = "__llvm_addrsig";
384
385} // namespace section_names
386
387void addInputSection(InputSection *inputSection);
388
389uint64_t resolveSymbolOffsetVA(const Symbol *sym, uint8_t type, int64_t offset);
390} // namespace macho
391
392std::string toString(const macho::InputSection *);
393
394} // namespace lld
395
396#endif
397