1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYMBOLS_H
10#define LLD_MACHO_SYMBOLS_H
11
12#include "Config.h"
13#include "InputFiles.h"
14#include "Target.h"
15
16#include "llvm/Object/Archive.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MathExtras.h"
19
20namespace lld {
21namespace macho {
22
23class MachHeaderSection;
24
25class Symbol {
26public:
27 enum Kind {
28 DefinedKind,
29 UndefinedKind,
30 CommonKind,
31 DylibKind,
32 LazyArchiveKind,
33 LazyObjectKind,
34 AliasKind,
35 };
36
37 // Enum that describes the type of Identical Code Folding (ICF) applied to a
38 // symbol. This information is crucial for accurately representing symbol
39 // sizes in the map file.
40 enum ICFFoldKind {
41 None, // No folding is applied.
42 Body, // The entire body (function or data) is folded.
43 Thunk // The function body is folded into a single branch thunk.
44 };
45
46 virtual ~Symbol() {}
47
48 Kind kind() const { return symbolKind; }
49
50 StringRef getName() const { return {nameData, nameSize}; }
51
52 bool isLive() const { return used; }
53 bool isLazy() const {
54 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
55 }
56
57 virtual uint64_t getVA() const { return 0; }
58
59 virtual bool isWeakDef() const { return false; }
60
61 // Only undefined or dylib symbols can be weak references. A weak reference
62 // need not be satisfied at runtime, e.g. due to the symbol not being
63 // available on a given target platform.
64 virtual bool isWeakRef() const { return false; }
65
66 virtual bool isTlv() const { return false; }
67
68 // Whether this symbol is in the GOT or TLVPointer sections.
69 bool isInGot() const { return gotIndex != UINT32_MAX; }
70
71 // Whether this symbol is in the StubsSection.
72 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
73
74 uint64_t getStubVA() const;
75 uint64_t getLazyPtrVA() const;
76 uint64_t getGotVA() const;
77 uint64_t getTlvVA() const;
78 uint64_t resolveBranchVA() const {
79 assert(isa<Defined>(this) || isa<DylibSymbol>(this));
80 return isInStubs() ? getStubVA() : getVA();
81 }
82 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
83 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
84
85 // The index of this symbol in the GOT or the TLVPointer section, depending
86 // on whether it is a thread-local. A given symbol cannot be referenced by
87 // both these sections at once.
88 uint32_t gotIndex = UINT32_MAX;
89 uint32_t lazyBindOffset = UINT32_MAX;
90 uint32_t stubsHelperIndex = UINT32_MAX;
91 uint32_t stubsIndex = UINT32_MAX;
92 uint32_t symtabIndex = UINT32_MAX;
93
94 InputFile *getFile() const { return file; }
95
96protected:
97 Symbol(Kind k, StringRef name, InputFile *file)
98 : symbolKind(k), nameData(name.data()), file(file), nameSize(name.size()),
99 isUsedInRegularObj(!file || isa<ObjFile>(Val: file)),
100 used(!config->deadStrip) {}
101
102 Kind symbolKind;
103 const char *nameData;
104 InputFile *file;
105 uint32_t nameSize;
106
107public:
108 // True if this symbol was referenced by a regular (non-bitcode) object.
109 bool isUsedInRegularObj : 1;
110
111 // True if this symbol is used from a live section.
112 bool used : 1;
113};
114
115class Defined : public Symbol {
116public:
117 Defined(StringRef name, InputFile *file, InputSection *isec, uint64_t value,
118 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
119 bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
120 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
121 bool interposable = false, bool cold = false);
122
123 bool isWeakDef() const override { return weakDef; }
124 bool isExternalWeakDef() const {
125 return isWeakDef() && isExternal() && !privateExtern;
126 }
127 bool isTlv() const override;
128
129 bool isExternal() const { return external; }
130 bool isAbsolute() const { return originalIsec == nullptr; }
131 bool isCold() const { return cold; }
132
133 uint64_t getVA() const override;
134
135 // Returns the object file that this symbol was defined in. This value differs
136 // from `getFile()` if the symbol originated from a bitcode file.
137 ObjFile *getObjectFile() const;
138
139 std::string getSourceLocation();
140
141 // Get the canonical InputSection of the symbol.
142 InputSection *isec() const;
143
144 // Get the canonical unwind entry of the symbol.
145 ConcatInputSection *unwindEntry() const;
146
147 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
148
149 // Place the bitfields first so that they can get placed in the tail padding
150 // of the parent class, on platforms which support it.
151 bool overridesWeakDef : 1;
152 // Whether this symbol should appear in the output binary's export trie.
153 bool privateExtern : 1;
154 // Whether this symbol should appear in the output symbol table.
155 bool includeInSymtab : 1;
156 // The ICF folding kind of this symbol: None / Body / Thunk.
157 LLVM_PREFERRED_TYPE(ICFFoldKind)
158 uint8_t identicalCodeFoldingKind : 2;
159 // Symbols marked referencedDynamically won't be removed from the output's
160 // symbol table by tools like strip. In theory, this could be set on arbitrary
161 // symbols in input object files. In practice, it's used solely for the
162 // synthetic __mh_execute_header symbol.
163 // This is information for the static linker, and it's also written to the
164 // output file's symbol table for tools running later (such as `strip`).
165 bool referencedDynamically : 1;
166 // Set on symbols that should not be removed by dead code stripping.
167 // Set for example on `__attribute__((used))` globals, or on some Objective-C
168 // metadata. This is information only for the static linker and not written
169 // to the output.
170 bool noDeadStrip : 1;
171 // Whether references to this symbol can be interposed at runtime to point to
172 // a different symbol definition (with the same name). For example, if both
173 // dylib A and B define an interposable symbol _foo, and we load A before B at
174 // runtime, then all references to _foo within dylib B will point to the
175 // definition in dylib A.
176 //
177 // Only extern symbols may be interposable.
178 bool interposable : 1;
179
180 bool weakDefCanBeHidden : 1;
181
182 // Whether this symbol has the N_COLD_FUNC nlist flag set. Populated from the
183 // symbol table of input object files.
184 bool cold : 1;
185
186private:
187 const bool weakDef : 1;
188 const bool external : 1;
189
190public:
191 // The native InputSection of the symbol. The symbol may be moved to another
192 // InputSection in which case originalIsec->canonical() will point to the new
193 // InputSection
194 InputSection *originalIsec;
195 // Contains the offset from the containing subsection. Note that this is
196 // different from nlist::n_value, which is the absolute address of the symbol.
197 uint64_t value;
198 // size is only calculated for regular (non-bitcode) symbols.
199 uint64_t size;
200 // This can be a subsection of either __compact_unwind or __eh_frame.
201 ConcatInputSection *originalUnwindEntry = nullptr;
202};
203
204// This enum does double-duty: as a symbol property, it indicates whether & how
205// a dylib symbol is referenced. As a DylibFile property, it indicates the kind
206// of referenced symbols contained within the file. If there are both weak
207// and strong references to the same file, we will count the file as
208// strongly-referenced.
209enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
210
211class Undefined : public Symbol {
212public:
213 Undefined(StringRef name, InputFile *file, RefState refState,
214 bool wasBitcodeSymbol)
215 : Symbol(UndefinedKind, name, file), refState(refState),
216 wasBitcodeSymbol(wasBitcodeSymbol) {
217 assert(refState != RefState::Unreferenced);
218 }
219
220 bool isWeakRef() const override { return refState == RefState::Weak; }
221
222 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
223
224 RefState refState : 2;
225 bool wasBitcodeSymbol;
226};
227
228// On Unix, it is traditionally allowed to write variable definitions without
229// initialization expressions (such as "int foo;") to header files. These are
230// called tentative definitions.
231//
232// Using tentative definitions is usually considered a bad practice; you should
233// write only declarations (such as "extern int foo;") to header files.
234// Nevertheless, the linker and the compiler have to do something to support
235// bad code by allowing duplicate definitions for this particular case.
236//
237// The compiler creates common symbols when it sees tentative definitions.
238// (You can suppress this behavior and let the compiler create a regular
239// defined symbol by passing -fno-common. -fno-common is the default in clang
240// as of LLVM 11.0.) When linking the final binary, if there are remaining
241// common symbols after name resolution is complete, the linker converts them
242// to regular defined symbols in a __common section.
243class CommonSymbol : public Symbol {
244public:
245 CommonSymbol(StringRef name, InputFile *file, uint64_t size, uint32_t align,
246 bool isPrivateExtern)
247 : Symbol(CommonKind, name, file), size(size),
248 align(align != 1 ? align : llvm::PowerOf2Ceil(A: size)),
249 privateExtern(isPrivateExtern) {
250 // TODO: cap maximum alignment
251 }
252
253 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
254
255 const uint64_t size;
256 const uint32_t align;
257 const bool privateExtern;
258};
259
260class DylibSymbol : public Symbol {
261public:
262 DylibSymbol(DylibFile *file, StringRef name, bool isWeakDef,
263 RefState refState, bool isTlv)
264 : Symbol(DylibKind, name, file), shouldReexport(false),
265 refState(refState), weakDef(isWeakDef), tlv(isTlv) {
266 if (file && refState > RefState::Unreferenced)
267 file->numReferencedSymbols++;
268 }
269
270 uint64_t getVA() const override;
271 bool isWeakDef() const override { return weakDef; }
272
273 // Symbols from weak libraries/frameworks are also weakly-referenced.
274 bool isWeakRef() const override {
275 return refState == RefState::Weak ||
276 (file && getFile()->umbrella->forceWeakImport);
277 }
278 bool isReferenced() const { return refState != RefState::Unreferenced; }
279 bool isTlv() const override { return tlv; }
280 bool isDynamicLookup() const { return file == nullptr; }
281 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
282
283 DylibFile *getFile() const {
284 assert(!isDynamicLookup());
285 return cast<DylibFile>(Val: file);
286 }
287
288 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
289
290 RefState getRefState() const { return refState; }
291
292 void reference(RefState newState) {
293 assert(newState > RefState::Unreferenced);
294 if (refState == RefState::Unreferenced && file)
295 getFile()->numReferencedSymbols++;
296 refState = std::max(a: refState, b: newState);
297 }
298
299 void unreference() {
300 // dynamic_lookup symbols have no file.
301 if (refState > RefState::Unreferenced && file) {
302 assert(getFile()->numReferencedSymbols > 0);
303 getFile()->numReferencedSymbols--;
304 }
305 }
306
307 bool shouldReexport : 1;
308
309private:
310 RefState refState : 2;
311 const bool weakDef : 1;
312 const bool tlv : 1;
313};
314
315class LazyArchive : public Symbol {
316public:
317 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
318 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
319
320 ArchiveFile *getFile() const { return cast<ArchiveFile>(Val: file); }
321 void fetchArchiveMember();
322
323 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
324
325private:
326 const llvm::object::Archive::Symbol sym;
327};
328
329// A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
330// --end-lib.
331class LazyObject : public Symbol {
332public:
333 LazyObject(InputFile &file, StringRef name)
334 : Symbol(LazyObjectKind, name, &file) {
335 isUsedInRegularObj = false;
336 }
337
338 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
339};
340
341// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
342// then all AliasSymbol instances will be converted into one of the other Symbol
343// types after `createAliases()` runs.
344class AliasSymbol final : public Symbol {
345public:
346 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
347 bool isPrivateExtern)
348 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
349 aliasedName(aliasedName) {}
350
351 StringRef getAliasedName() const { return aliasedName; }
352
353 static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
354
355 const bool privateExtern;
356
357private:
358 StringRef aliasedName;
359};
360
361union SymbolUnion {
362 alignas(Defined) char a[sizeof(Defined)];
363 alignas(Undefined) char b[sizeof(Undefined)];
364 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
365 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
366 alignas(LazyArchive) char e[sizeof(LazyArchive)];
367 alignas(LazyObject) char f[sizeof(LazyObject)];
368 alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
369};
370
371template <typename T, typename... ArgT>
372T *replaceSymbol(Symbol *s, ArgT &&...arg) {
373 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
374 static_assert(alignof(T) <= alignof(SymbolUnion),
375 "SymbolUnion not aligned enough");
376 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
377 "Not a Symbol");
378
379 bool isUsedInRegularObj = s->isUsedInRegularObj;
380 bool used = s->used;
381 T *sym = new (s) T(std::forward<ArgT>(arg)...);
382 sym->isUsedInRegularObj |= isUsedInRegularObj;
383 sym->used |= used;
384 return sym;
385}
386
387// Can a symbol's address only be resolved at runtime?
388inline bool needsBinding(const Symbol *sym) {
389 if (isa<DylibSymbol>(Val: sym))
390 return true;
391 if (const auto *defined = dyn_cast<Defined>(Val: sym))
392 return defined->isExternalWeakDef() || defined->interposable;
393 return false;
394}
395
396// Symbols with `l` or `L` as a prefix are linker-private and never appear in
397// the output.
398inline bool isPrivateLabel(StringRef name) {
399 return name.starts_with(Prefix: "l") || name.starts_with(Prefix: "L");
400}
401} // namespace macho
402
403std::string toString(const macho::Symbol &);
404std::string toMachOString(const llvm::object::Archive::Symbol &);
405
406} // namespace lld
407
408#endif
409