1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYMBOLS_H
10#define LLD_MACHO_SYMBOLS_H
11
12#include "Config.h"
13#include "InputFiles.h"
14#include "Target.h"
15
16#include "llvm/Object/Archive.h"
17#include "llvm/Support/MathExtras.h"
18
19namespace lld {
20namespace macho {
21
22class MachHeaderSection;
23
24struct StringRefZ {
25 StringRefZ(const char *s) : data(s), size(-1) {}
26 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
27
28 const char *data;
29 const uint32_t size;
30};
31
32class Symbol {
33public:
34 enum Kind {
35 DefinedKind,
36 UndefinedKind,
37 CommonKind,
38 DylibKind,
39 LazyArchiveKind,
40 LazyObjectKind,
41 AliasKind,
42 };
43
44 virtual ~Symbol() {}
45
46 Kind kind() const { return symbolKind; }
47
48 StringRef getName() const {
49 if (nameSize == (uint32_t)-1)
50 nameSize = strlen(s: nameData);
51 return {nameData, nameSize};
52 }
53
54 bool isLive() const { return used; }
55 bool isLazy() const {
56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
57 }
58
59 virtual uint64_t getVA() const { return 0; }
60
61 virtual bool isWeakDef() const { return false; }
62
63 // Only undefined or dylib symbols can be weak references. A weak reference
64 // need not be satisfied at runtime, e.g. due to the symbol not being
65 // available on a given target platform.
66 virtual bool isWeakRef() const { return false; }
67
68 virtual bool isTlv() const { return false; }
69
70 // Whether this symbol is in the GOT or TLVPointer sections.
71 bool isInGot() const { return gotIndex != UINT32_MAX; }
72
73 // Whether this symbol is in the StubsSection.
74 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
75
76 uint64_t getStubVA() const;
77 uint64_t getLazyPtrVA() const;
78 uint64_t getGotVA() const;
79 uint64_t getTlvVA() const;
80 uint64_t resolveBranchVA() const {
81 assert(isa<Defined>(this) || isa<DylibSymbol>(this));
82 return isInStubs() ? getStubVA() : getVA();
83 }
84 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
85 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
86
87 // The index of this symbol in the GOT or the TLVPointer section, depending
88 // on whether it is a thread-local. A given symbol cannot be referenced by
89 // both these sections at once.
90 uint32_t gotIndex = UINT32_MAX;
91 uint32_t lazyBindOffset = UINT32_MAX;
92 uint32_t stubsHelperIndex = UINT32_MAX;
93 uint32_t stubsIndex = UINT32_MAX;
94 uint32_t symtabIndex = UINT32_MAX;
95
96 InputFile *getFile() const { return file; }
97
98protected:
99 Symbol(Kind k, StringRefZ name, InputFile *file)
100 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
101 isUsedInRegularObj(!file || isa<ObjFile>(Val: file)),
102 used(!config->deadStrip) {}
103
104 Kind symbolKind;
105 const char *nameData;
106 InputFile *file;
107 mutable uint32_t nameSize;
108
109public:
110 // True if this symbol was referenced by a regular (non-bitcode) object.
111 bool isUsedInRegularObj : 1;
112
113 // True if this symbol is used from a live section.
114 bool used : 1;
115};
116
117class Defined : public Symbol {
118public:
119 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
120 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
121 bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
122 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
123 bool interposable = false);
124
125 bool isWeakDef() const override { return weakDef; }
126 bool isExternalWeakDef() const {
127 return isWeakDef() && isExternal() && !privateExtern;
128 }
129 bool isTlv() const override;
130
131 bool isExternal() const { return external; }
132 bool isAbsolute() const { return originalIsec == nullptr; }
133
134 uint64_t getVA() const override;
135
136 // Returns the object file that this symbol was defined in. This value differs
137 // from `getFile()` if the symbol originated from a bitcode file.
138 ObjFile *getObjectFile() const;
139
140 std::string getSourceLocation();
141
142 // Get the canonical InputSection of the symbol.
143 InputSection *isec() const;
144
145 // Get the canonical unwind entry of the symbol.
146 ConcatInputSection *unwindEntry() const;
147
148 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
149
150 // Place the bitfields first so that they can get placed in the tail padding
151 // of the parent class, on platforms which support it.
152 bool overridesWeakDef : 1;
153 // Whether this symbol should appear in the output binary's export trie.
154 bool privateExtern : 1;
155 // Whether this symbol should appear in the output symbol table.
156 bool includeInSymtab : 1;
157 // Whether this symbol was folded into a different symbol during ICF.
158 bool wasIdenticalCodeFolded : 1;
159 // Symbols marked referencedDynamically won't be removed from the output's
160 // symbol table by tools like strip. In theory, this could be set on arbitrary
161 // symbols in input object files. In practice, it's used solely for the
162 // synthetic __mh_execute_header symbol.
163 // This is information for the static linker, and it's also written to the
164 // output file's symbol table for tools running later (such as `strip`).
165 bool referencedDynamically : 1;
166 // Set on symbols that should not be removed by dead code stripping.
167 // Set for example on `__attribute__((used))` globals, or on some Objective-C
168 // metadata. This is information only for the static linker and not written
169 // to the output.
170 bool noDeadStrip : 1;
171 // Whether references to this symbol can be interposed at runtime to point to
172 // a different symbol definition (with the same name). For example, if both
173 // dylib A and B define an interposable symbol _foo, and we load A before B at
174 // runtime, then all references to _foo within dylib B will point to the
175 // definition in dylib A.
176 //
177 // Only extern symbols may be interposable.
178 bool interposable : 1;
179
180 bool weakDefCanBeHidden : 1;
181
182private:
183 const bool weakDef : 1;
184 const bool external : 1;
185
186public:
187 // The native InputSection of the symbol. The symbol may be moved to another
188 // InputSection in which case originalIsec->canonical() will point to the new
189 // InputSection
190 InputSection *originalIsec;
191 // Contains the offset from the containing subsection. Note that this is
192 // different from nlist::n_value, which is the absolute address of the symbol.
193 uint64_t value;
194 // size is only calculated for regular (non-bitcode) symbols.
195 uint64_t size;
196 // This can be a subsection of either __compact_unwind or __eh_frame.
197 ConcatInputSection *originalUnwindEntry = nullptr;
198};
199
200// This enum does double-duty: as a symbol property, it indicates whether & how
201// a dylib symbol is referenced. As a DylibFile property, it indicates the kind
202// of referenced symbols contained within the file. If there are both weak
203// and strong references to the same file, we will count the file as
204// strongly-referenced.
205enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
206
207class Undefined : public Symbol {
208public:
209 Undefined(StringRefZ name, InputFile *file, RefState refState,
210 bool wasBitcodeSymbol)
211 : Symbol(UndefinedKind, name, file), refState(refState),
212 wasBitcodeSymbol(wasBitcodeSymbol) {
213 assert(refState != RefState::Unreferenced);
214 }
215
216 bool isWeakRef() const override { return refState == RefState::Weak; }
217
218 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
219
220 RefState refState : 2;
221 bool wasBitcodeSymbol;
222};
223
224// On Unix, it is traditionally allowed to write variable definitions without
225// initialization expressions (such as "int foo;") to header files. These are
226// called tentative definitions.
227//
228// Using tentative definitions is usually considered a bad practice; you should
229// write only declarations (such as "extern int foo;") to header files.
230// Nevertheless, the linker and the compiler have to do something to support
231// bad code by allowing duplicate definitions for this particular case.
232//
233// The compiler creates common symbols when it sees tentative definitions.
234// (You can suppress this behavior and let the compiler create a regular
235// defined symbol by passing -fno-common. -fno-common is the default in clang
236// as of LLVM 11.0.) When linking the final binary, if there are remaining
237// common symbols after name resolution is complete, the linker converts them
238// to regular defined symbols in a __common section.
239class CommonSymbol : public Symbol {
240public:
241 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
242 bool isPrivateExtern)
243 : Symbol(CommonKind, name, file), size(size),
244 align(align != 1 ? align : llvm::PowerOf2Ceil(A: size)),
245 privateExtern(isPrivateExtern) {
246 // TODO: cap maximum alignment
247 }
248
249 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
250
251 const uint64_t size;
252 const uint32_t align;
253 const bool privateExtern;
254};
255
256class DylibSymbol : public Symbol {
257public:
258 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
259 RefState refState, bool isTlv)
260 : Symbol(DylibKind, name, file), shouldReexport(false),
261 refState(refState), weakDef(isWeakDef), tlv(isTlv) {
262 if (file && refState > RefState::Unreferenced)
263 file->numReferencedSymbols++;
264 }
265
266 uint64_t getVA() const override;
267 bool isWeakDef() const override { return weakDef; }
268
269 // Symbols from weak libraries/frameworks are also weakly-referenced.
270 bool isWeakRef() const override {
271 return refState == RefState::Weak ||
272 (file && getFile()->umbrella->forceWeakImport);
273 }
274 bool isReferenced() const { return refState != RefState::Unreferenced; }
275 bool isTlv() const override { return tlv; }
276 bool isDynamicLookup() const { return file == nullptr; }
277 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
278
279 DylibFile *getFile() const {
280 assert(!isDynamicLookup());
281 return cast<DylibFile>(Val: file);
282 }
283
284 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
285
286 RefState getRefState() const { return refState; }
287
288 void reference(RefState newState) {
289 assert(newState > RefState::Unreferenced);
290 if (refState == RefState::Unreferenced && file)
291 getFile()->numReferencedSymbols++;
292 refState = std::max(a: refState, b: newState);
293 }
294
295 void unreference() {
296 // dynamic_lookup symbols have no file.
297 if (refState > RefState::Unreferenced && file) {
298 assert(getFile()->numReferencedSymbols > 0);
299 getFile()->numReferencedSymbols--;
300 }
301 }
302
303 bool shouldReexport : 1;
304private:
305 RefState refState : 2;
306 const bool weakDef : 1;
307 const bool tlv : 1;
308};
309
310class LazyArchive : public Symbol {
311public:
312 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
313 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
314
315 ArchiveFile *getFile() const { return cast<ArchiveFile>(Val: file); }
316 void fetchArchiveMember();
317
318 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
319
320private:
321 const llvm::object::Archive::Symbol sym;
322};
323
324// A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
325// --end-lib.
326class LazyObject : public Symbol {
327public:
328 LazyObject(InputFile &file, StringRef name)
329 : Symbol(LazyObjectKind, name, &file) {
330 isUsedInRegularObj = false;
331 }
332
333 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
334};
335
336// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
337// then all AliasSymbol instances will be converted into one of the other Symbol
338// types after `createAliases()` runs.
339class AliasSymbol final : public Symbol {
340public:
341 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
342 bool isPrivateExtern)
343 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
344 aliasedName(aliasedName) {}
345
346 StringRef getAliasedName() const { return aliasedName; }
347
348 static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
349
350 const bool privateExtern;
351
352private:
353 StringRef aliasedName;
354};
355
356union SymbolUnion {
357 alignas(Defined) char a[sizeof(Defined)];
358 alignas(Undefined) char b[sizeof(Undefined)];
359 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
360 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
361 alignas(LazyArchive) char e[sizeof(LazyArchive)];
362 alignas(LazyObject) char f[sizeof(LazyObject)];
363 alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
364};
365
366template <typename T, typename... ArgT>
367T *replaceSymbol(Symbol *s, ArgT &&...arg) {
368 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
369 static_assert(alignof(T) <= alignof(SymbolUnion),
370 "SymbolUnion not aligned enough");
371 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
372 "Not a Symbol");
373
374 bool isUsedInRegularObj = s->isUsedInRegularObj;
375 bool used = s->used;
376 T *sym = new (s) T(std::forward<ArgT>(arg)...);
377 sym->isUsedInRegularObj |= isUsedInRegularObj;
378 sym->used |= used;
379 return sym;
380}
381
382// Can a symbol's address only be resolved at runtime?
383inline bool needsBinding(const Symbol *sym) {
384 if (isa<DylibSymbol>(Val: sym))
385 return true;
386 if (const auto *defined = dyn_cast<Defined>(Val: sym))
387 return defined->isExternalWeakDef() || defined->interposable;
388 return false;
389}
390
391// Symbols with `l` or `L` as a prefix are linker-private and never appear in
392// the output.
393inline bool isPrivateLabel(StringRef name) {
394 return name.starts_with(Prefix: "l") || name.starts_with(Prefix: "L");
395}
396} // namespace macho
397
398std::string toString(const macho::Symbol &);
399std::string toMachOString(const llvm::object::Archive::Symbol &);
400
401} // namespace lld
402
403#endif
404