1 | //===- Symbols.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_COFF_SYMBOLS_H |
10 | #define LLD_COFF_SYMBOLS_H |
11 | |
12 | #include "Chunks.h" |
13 | #include "Config.h" |
14 | #include "lld/Common/LLVM.h" |
15 | #include "lld/Common/Memory.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/Object/Archive.h" |
18 | #include "llvm/Object/COFF.h" |
19 | #include <atomic> |
20 | #include <memory> |
21 | #include <vector> |
22 | |
23 | namespace lld { |
24 | |
25 | namespace coff { |
26 | |
27 | using llvm::object::Archive; |
28 | using llvm::object::COFFSymbolRef; |
29 | using llvm::object::coff_import_header; |
30 | using llvm::object::coff_symbol_generic; |
31 | |
32 | class ArchiveFile; |
33 | class COFFLinkerContext; |
34 | class InputFile; |
35 | class ObjFile; |
36 | class Symbol; |
37 | class SymbolTable; |
38 | |
39 | const COFFSyncStream &operator<<(const COFFSyncStream &, |
40 | const llvm::object::Archive::Symbol *); |
41 | |
42 | // The base class for real symbol classes. |
43 | class Symbol { |
44 | public: |
45 | enum Kind { |
46 | // The order of these is significant. We start with the regular defined |
47 | // symbols as those are the most prevalent and the zero tag is the cheapest |
48 | // to set. Among the defined kinds, the lower the kind is preferred over |
49 | // the higher kind when testing whether one symbol should take precedence |
50 | // over another. |
51 | DefinedRegularKind = 0, |
52 | DefinedCommonKind, |
53 | DefinedLocalImportKind, |
54 | DefinedImportThunkKind, |
55 | DefinedImportDataKind, |
56 | DefinedAbsoluteKind, |
57 | DefinedSyntheticKind, |
58 | |
59 | UndefinedKind, |
60 | LazyArchiveKind, |
61 | LazyObjectKind, |
62 | LazyDLLSymbolKind, |
63 | |
64 | LastDefinedCOFFKind = DefinedCommonKind, |
65 | LastDefinedKind = DefinedSyntheticKind, |
66 | }; |
67 | |
68 | Kind kind() const { return static_cast<Kind>(symbolKind); } |
69 | |
70 | // Returns the symbol name. |
71 | StringRef getName() { |
72 | // COFF symbol names are read lazily for a performance reason. |
73 | // Non-external symbol names are never used by the linker except for logging |
74 | // or debugging. Their internal references are resolved not by name but by |
75 | // symbol index. And because they are not external, no one can refer them by |
76 | // name. Object files contain lots of non-external symbols, and creating |
77 | // StringRefs for them (which involves lots of strlen() on the string table) |
78 | // is a waste of time. |
79 | if (nameData == nullptr) |
80 | computeName(); |
81 | return StringRef(nameData, nameSize); |
82 | } |
83 | |
84 | void replaceKeepingName(Symbol *other, size_t size); |
85 | |
86 | // Returns the file from which this symbol was created. |
87 | InputFile *getFile(); |
88 | |
89 | // Indicates that this symbol will be included in the final image. Only valid |
90 | // after calling markLive. |
91 | bool isLive() const; |
92 | |
93 | bool isLazy() const { |
94 | return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind || |
95 | symbolKind == LazyDLLSymbolKind; |
96 | } |
97 | |
98 | private: |
99 | void computeName(); |
100 | |
101 | protected: |
102 | friend SymbolTable; |
103 | explicit Symbol(Kind k, StringRef n = "" ) |
104 | : symbolKind(k), isExternal(true), isCOMDAT(false), |
105 | writtenToSymtab(false), isUsedInRegularObj(false), |
106 | pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false), |
107 | deferUndefined(false), canInline(true), isWeak(false), isAntiDep(false), |
108 | nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) { |
109 | assert((!n.empty() || k <= LastDefinedCOFFKind) && |
110 | "If the name is empty, the Symbol must be a DefinedCOFF." ); |
111 | } |
112 | |
113 | unsigned symbolKind : 8; |
114 | unsigned isExternal : 1; |
115 | |
116 | public: |
117 | // This bit is used by the \c DefinedRegular subclass. |
118 | unsigned isCOMDAT : 1; |
119 | |
120 | // This bit is used by Writer::createSymbolAndStringTable() to prevent |
121 | // symbols from being written to the symbol table more than once. |
122 | unsigned writtenToSymtab : 1; |
123 | |
124 | // True if this symbol was referenced by a regular (non-bitcode) object. |
125 | unsigned isUsedInRegularObj : 1; |
126 | |
127 | // True if we've seen both a lazy and an undefined symbol with this symbol |
128 | // name, which means that we have enqueued an archive member load and should |
129 | // not load any more archive members to resolve the same symbol. |
130 | unsigned pendingArchiveLoad : 1; |
131 | |
132 | /// True if we've already added this symbol to the list of GC roots. |
133 | unsigned isGCRoot : 1; |
134 | |
135 | unsigned isRuntimePseudoReloc : 1; |
136 | |
137 | // True if we want to allow this symbol to be undefined in the early |
138 | // undefined check pass in SymbolTable::reportUnresolvable(), as it |
139 | // might be fixed up later. |
140 | unsigned deferUndefined : 1; |
141 | |
142 | // False if LTO shouldn't inline whatever this symbol points to. If a symbol |
143 | // is overwritten after LTO, LTO shouldn't inline the symbol because it |
144 | // doesn't know the final contents of the symbol. |
145 | unsigned canInline : 1; |
146 | |
147 | // True if the symbol is weak. This is only tracked for bitcode/LTO symbols. |
148 | // This information isn't written to the output; rather, it's used for |
149 | // managing weak symbol overrides. |
150 | unsigned isWeak : 1; |
151 | |
152 | // True if the symbol is an anti-dependency. |
153 | unsigned isAntiDep : 1; |
154 | |
155 | protected: |
156 | // Symbol name length. Assume symbol lengths fit in a 32-bit integer. |
157 | uint32_t nameSize; |
158 | |
159 | const char *nameData; |
160 | }; |
161 | |
162 | // The base class for any defined symbols, including absolute symbols, |
163 | // etc. |
164 | class Defined : public Symbol { |
165 | public: |
166 | Defined(Kind k, StringRef n) : Symbol(k, n) {} |
167 | |
168 | static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } |
169 | |
170 | // Returns the RVA (relative virtual address) of this symbol. The |
171 | // writer sets and uses RVAs. |
172 | uint64_t getRVA(); |
173 | |
174 | // Returns the chunk containing this symbol. Absolute symbols and __ImageBase |
175 | // do not have chunks, so this may return null. |
176 | Chunk *getChunk(); |
177 | }; |
178 | |
179 | // Symbols defined via a COFF object file or bitcode file. For COFF files, this |
180 | // stores a coff_symbol_generic*, and names of internal symbols are lazily |
181 | // loaded through that. For bitcode files, Sym is nullptr and the name is stored |
182 | // as a decomposed StringRef. |
183 | class DefinedCOFF : public Defined { |
184 | friend Symbol; |
185 | |
186 | public: |
187 | DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) |
188 | : Defined(k, n), file(f), sym(s) {} |
189 | |
190 | static bool classof(const Symbol *s) { |
191 | return s->kind() <= LastDefinedCOFFKind; |
192 | } |
193 | |
194 | InputFile *getFile() { return file; } |
195 | |
196 | COFFSymbolRef getCOFFSymbol(); |
197 | |
198 | InputFile *file; |
199 | |
200 | protected: |
201 | const coff_symbol_generic *sym; |
202 | }; |
203 | |
204 | // Regular defined symbols read from object file symbol tables. |
205 | class DefinedRegular : public DefinedCOFF { |
206 | public: |
207 | DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, |
208 | bool isExternal = false, |
209 | const coff_symbol_generic *s = nullptr, |
210 | SectionChunk *c = nullptr, bool isWeak = false) |
211 | : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { |
212 | this->isExternal = isExternal; |
213 | this->isCOMDAT = isCOMDAT; |
214 | this->isWeak = isWeak; |
215 | } |
216 | |
217 | static bool classof(const Symbol *s) { |
218 | return s->kind() == DefinedRegularKind; |
219 | } |
220 | |
221 | uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } |
222 | SectionChunk *getChunk() const { return *data; } |
223 | uint32_t getValue() const { return sym->Value; } |
224 | |
225 | SectionChunk **data; |
226 | }; |
227 | |
228 | class DefinedCommon : public DefinedCOFF { |
229 | public: |
230 | DefinedCommon(InputFile *f, StringRef n, uint64_t size, |
231 | const coff_symbol_generic *s = nullptr, |
232 | CommonChunk *c = nullptr) |
233 | : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { |
234 | this->isExternal = true; |
235 | } |
236 | |
237 | static bool classof(const Symbol *s) { |
238 | return s->kind() == DefinedCommonKind; |
239 | } |
240 | |
241 | uint64_t getRVA() { return data->getRVA(); } |
242 | CommonChunk *getChunk() { return data; } |
243 | |
244 | private: |
245 | friend SymbolTable; |
246 | uint64_t getSize() const { return size; } |
247 | CommonChunk *data; |
248 | uint64_t size; |
249 | }; |
250 | |
251 | // Absolute symbols. |
252 | class DefinedAbsolute : public Defined { |
253 | public: |
254 | DefinedAbsolute(const COFFLinkerContext &c, StringRef n, COFFSymbolRef s) |
255 | : Defined(DefinedAbsoluteKind, n), va(s.getValue()), ctx(c) { |
256 | isExternal = s.isExternal(); |
257 | } |
258 | |
259 | DefinedAbsolute(const COFFLinkerContext &c, StringRef n, uint64_t v) |
260 | : Defined(DefinedAbsoluteKind, n), va(v), ctx(c) {} |
261 | |
262 | static bool classof(const Symbol *s) { |
263 | return s->kind() == DefinedAbsoluteKind; |
264 | } |
265 | |
266 | uint64_t getRVA(); |
267 | void setVA(uint64_t v) { va = v; } |
268 | uint64_t getVA() const { return va; } |
269 | |
270 | private: |
271 | uint64_t va; |
272 | const COFFLinkerContext &ctx; |
273 | }; |
274 | |
275 | // This symbol is used for linker-synthesized symbols like __ImageBase and |
276 | // __safe_se_handler_table. |
277 | class DefinedSynthetic : public Defined { |
278 | public: |
279 | explicit DefinedSynthetic(StringRef name, Chunk *c, uint32_t offset = 0) |
280 | : Defined(DefinedSyntheticKind, name), c(c), offset(offset) {} |
281 | |
282 | static bool classof(const Symbol *s) { |
283 | return s->kind() == DefinedSyntheticKind; |
284 | } |
285 | |
286 | // A null chunk indicates that this is __ImageBase. Otherwise, this is some |
287 | // other synthesized chunk, like SEHTableChunk. |
288 | uint32_t getRVA() { return c ? c->getRVA() + offset : 0; } |
289 | Chunk *getChunk() { return c; } |
290 | |
291 | private: |
292 | Chunk *c; |
293 | uint32_t offset; |
294 | }; |
295 | |
296 | // This class represents a symbol defined in an archive file. It is |
297 | // created from an archive file header, and it knows how to load an |
298 | // object file from an archive to replace itself with a defined |
299 | // symbol. If the resolver finds both Undefined and LazyArchive for |
300 | // the same name, it will ask the LazyArchive to load a file. |
301 | class LazyArchive : public Symbol { |
302 | public: |
303 | LazyArchive(ArchiveFile *f, const Archive::Symbol s) |
304 | : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {} |
305 | |
306 | static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } |
307 | |
308 | MemoryBufferRef getMemberBuffer(); |
309 | |
310 | ArchiveFile *file; |
311 | const Archive::Symbol sym; |
312 | }; |
313 | |
314 | class LazyObject : public Symbol { |
315 | public: |
316 | LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {} |
317 | static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } |
318 | InputFile *file; |
319 | }; |
320 | |
321 | // MinGW only. |
322 | class LazyDLLSymbol : public Symbol { |
323 | public: |
324 | LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n) |
325 | : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {} |
326 | static bool classof(const Symbol *s) { |
327 | return s->kind() == LazyDLLSymbolKind; |
328 | } |
329 | |
330 | DLLFile *file; |
331 | DLLFile::Symbol *sym; |
332 | }; |
333 | |
334 | // Undefined symbols. |
335 | class Undefined : public Symbol { |
336 | public: |
337 | explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} |
338 | |
339 | static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } |
340 | |
341 | // An undefined symbol can have a fallback symbol which gives an |
342 | // undefined symbol a second chance if it would remain undefined. |
343 | // If it remains undefined, it'll be replaced with whatever the |
344 | // Alias pointer points to. |
345 | Symbol *weakAlias = nullptr; |
346 | |
347 | // If this symbol is external weak, try to resolve it to a defined |
348 | // symbol by searching the chain of fallback symbols. Returns the symbol if |
349 | // successful, otherwise returns null. |
350 | Symbol *getWeakAlias(); |
351 | Defined *getDefinedWeakAlias() { |
352 | return dyn_cast_or_null<Defined>(Val: getWeakAlias()); |
353 | } |
354 | |
355 | void setWeakAlias(Symbol *sym, bool antiDep = false) { |
356 | weakAlias = sym; |
357 | isAntiDep = antiDep; |
358 | } |
359 | |
360 | bool isECAlias(MachineTypes machine) const { |
361 | return weakAlias && isAntiDep && isArm64EC(Machine: machine); |
362 | } |
363 | |
364 | // If this symbol is external weak, replace this object with aliased symbol. |
365 | bool resolveWeakAlias(); |
366 | }; |
367 | |
368 | // Windows-specific classes. |
369 | |
370 | // This class represents a symbol imported from a DLL. This has two |
371 | // names for internal use and external use. The former is used for |
372 | // name resolution, and the latter is used for the import descriptor |
373 | // table in an output. The former has "__imp_" prefix. |
374 | class DefinedImportData : public Defined { |
375 | public: |
376 | DefinedImportData(StringRef n, ImportFile *file, Chunk *&location) |
377 | : Defined(DefinedImportDataKind, n), file(file), location(location) {} |
378 | |
379 | static bool classof(const Symbol *s) { |
380 | return s->kind() == DefinedImportDataKind; |
381 | } |
382 | |
383 | uint64_t getRVA() { return getChunk()->getRVA(); } |
384 | Chunk *getChunk() { return location; } |
385 | void setLocation(Chunk *addressTable) { location = addressTable; } |
386 | |
387 | StringRef getDLLName() { return file->dllName; } |
388 | StringRef getExternalName() { return file->externalName; } |
389 | uint16_t getOrdinal() { return file->hdr->OrdinalHint; } |
390 | |
391 | ImportFile *file; |
392 | Chunk *&location; |
393 | |
394 | // This is a pointer to the synthetic symbol associated with the load thunk |
395 | // for this symbol that will be called if the DLL is delay-loaded. This is |
396 | // needed for Control Flow Guard because if this DefinedImportData symbol is a |
397 | // valid call target, the corresponding load thunk must also be marked as a |
398 | // valid call target. |
399 | DefinedSynthetic *loadThunkSym = nullptr; |
400 | }; |
401 | |
402 | // This class represents a symbol for a jump table entry which jumps |
403 | // to a function in a DLL. Linker are supposed to create such symbols |
404 | // without "__imp_" prefix for all function symbols exported from |
405 | // DLLs, so that you can call DLL functions as regular functions with |
406 | // a regular name. A function pointer is given as a DefinedImportData. |
407 | class DefinedImportThunk : public Defined { |
408 | public: |
409 | DefinedImportThunk(COFFLinkerContext &ctx, StringRef name, |
410 | DefinedImportData *s, ImportThunkChunk *chunk); |
411 | |
412 | static bool classof(const Symbol *s) { |
413 | return s->kind() == DefinedImportThunkKind; |
414 | } |
415 | |
416 | uint64_t getRVA() { return data->getRVA(); } |
417 | ImportThunkChunk *getChunk() const { return data; } |
418 | |
419 | DefinedImportData *wrappedSym; |
420 | |
421 | private: |
422 | ImportThunkChunk *data; |
423 | }; |
424 | |
425 | // If you have a symbol "foo" in your object file, a symbol name |
426 | // "__imp_foo" becomes automatically available as a pointer to "foo". |
427 | // This class is for such automatically-created symbols. |
428 | // Yes, this is an odd feature. We didn't intend to implement that. |
429 | // This is here just for compatibility with MSVC. |
430 | class DefinedLocalImport : public Defined { |
431 | public: |
432 | DefinedLocalImport(COFFLinkerContext &ctx, StringRef n, Defined *s) |
433 | : Defined(DefinedLocalImportKind, n), |
434 | data(make<LocalImportChunk>(args&: ctx, args&: s)) {} |
435 | |
436 | static bool classof(const Symbol *s) { |
437 | return s->kind() == DefinedLocalImportKind; |
438 | } |
439 | |
440 | uint64_t getRVA() { return data->getRVA(); } |
441 | Chunk *getChunk() { return data; } |
442 | |
443 | private: |
444 | LocalImportChunk *data; |
445 | }; |
446 | |
447 | inline uint64_t Defined::getRVA() { |
448 | switch (kind()) { |
449 | case DefinedAbsoluteKind: |
450 | return cast<DefinedAbsolute>(Val: this)->getRVA(); |
451 | case DefinedSyntheticKind: |
452 | return cast<DefinedSynthetic>(Val: this)->getRVA(); |
453 | case DefinedImportDataKind: |
454 | return cast<DefinedImportData>(Val: this)->getRVA(); |
455 | case DefinedImportThunkKind: |
456 | return cast<DefinedImportThunk>(Val: this)->getRVA(); |
457 | case DefinedLocalImportKind: |
458 | return cast<DefinedLocalImport>(Val: this)->getRVA(); |
459 | case DefinedCommonKind: |
460 | return cast<DefinedCommon>(Val: this)->getRVA(); |
461 | case DefinedRegularKind: |
462 | return cast<DefinedRegular>(Val: this)->getRVA(); |
463 | case LazyArchiveKind: |
464 | case LazyObjectKind: |
465 | case LazyDLLSymbolKind: |
466 | case UndefinedKind: |
467 | llvm_unreachable("Cannot get the address for an undefined symbol." ); |
468 | } |
469 | llvm_unreachable("unknown symbol kind" ); |
470 | } |
471 | |
472 | inline Chunk *Defined::getChunk() { |
473 | switch (kind()) { |
474 | case DefinedRegularKind: |
475 | return cast<DefinedRegular>(Val: this)->getChunk(); |
476 | case DefinedAbsoluteKind: |
477 | return nullptr; |
478 | case DefinedSyntheticKind: |
479 | return cast<DefinedSynthetic>(Val: this)->getChunk(); |
480 | case DefinedImportDataKind: |
481 | return cast<DefinedImportData>(Val: this)->getChunk(); |
482 | case DefinedImportThunkKind: |
483 | return cast<DefinedImportThunk>(Val: this)->getChunk(); |
484 | case DefinedLocalImportKind: |
485 | return cast<DefinedLocalImport>(Val: this)->getChunk(); |
486 | case DefinedCommonKind: |
487 | return cast<DefinedCommon>(Val: this)->getChunk(); |
488 | case LazyArchiveKind: |
489 | case LazyObjectKind: |
490 | case LazyDLLSymbolKind: |
491 | case UndefinedKind: |
492 | llvm_unreachable("Cannot get the chunk of an undefined symbol." ); |
493 | } |
494 | llvm_unreachable("unknown symbol kind" ); |
495 | } |
496 | |
497 | // A buffer class that is large enough to hold any Symbol-derived |
498 | // object. We allocate memory using this class and instantiate a symbol |
499 | // using the placement new. |
500 | union SymbolUnion { |
501 | alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; |
502 | alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; |
503 | alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; |
504 | alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; |
505 | alignas(LazyArchive) char e[sizeof(LazyArchive)]; |
506 | alignas(Undefined) char f[sizeof(Undefined)]; |
507 | alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; |
508 | alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; |
509 | alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; |
510 | alignas(LazyObject) char j[sizeof(LazyObject)]; |
511 | alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)]; |
512 | }; |
513 | |
514 | template <typename T, typename... ArgT> |
515 | void replaceSymbol(Symbol *s, ArgT &&... arg) { |
516 | static_assert(std::is_trivially_destructible<T>(), |
517 | "Symbol types must be trivially destructible" ); |
518 | static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small" ); |
519 | static_assert(alignof(T) <= alignof(SymbolUnion), |
520 | "SymbolUnion not aligned enough" ); |
521 | assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && |
522 | "Not a Symbol" ); |
523 | bool canInline = s->canInline; |
524 | bool isUsedInRegularObj = s->isUsedInRegularObj; |
525 | new (s) T(std::forward<ArgT>(arg)...); |
526 | s->canInline = canInline; |
527 | s->isUsedInRegularObj = isUsedInRegularObj; |
528 | } |
529 | } // namespace coff |
530 | |
531 | std::string toString(const coff::COFFLinkerContext &ctx, coff::Symbol &b); |
532 | std::string toCOFFString(const coff::COFFLinkerContext &ctx, |
533 | const llvm::object::Archive::Symbol &b); |
534 | |
535 | // Returns a symbol name for an error message. |
536 | std::string maybeDemangleSymbol(const coff::COFFLinkerContext &ctx, |
537 | StringRef symName); |
538 | |
539 | } // namespace lld |
540 | |
541 | #endif |
542 | |