1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_WASM_SYMBOLS_H
10#define LLD_WASM_SYMBOLS_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/Object/Archive.h"
15#include "llvm/Object/Wasm.h"
16#include <optional>
17
18namespace lld {
19namespace wasm {
20
21// Shared string constants
22
23// The default module name to use for symbol imports.
24extern const char *defaultModule;
25
26// The name under which to import or export the wasm table.
27extern const char *functionTableName;
28
29// The name under which to import or export the wasm memory.
30extern const char *memoryName;
31
32using llvm::wasm::WasmSymbolType;
33
34class InputFile;
35class InputChunk;
36class InputSegment;
37class InputFunction;
38class InputGlobal;
39class InputTag;
40class InputSection;
41class InputTable;
42class OutputSection;
43
44#define INVALID_INDEX UINT32_MAX
45
46// The base class for real symbol classes.
47class Symbol {
48public:
49 enum Kind : uint8_t {
50 DefinedFunctionKind,
51 DefinedDataKind,
52 DefinedGlobalKind,
53 DefinedTagKind,
54 DefinedTableKind,
55 SectionKind,
56 OutputSectionKind,
57 UndefinedFunctionKind,
58 UndefinedDataKind,
59 UndefinedGlobalKind,
60 UndefinedTableKind,
61 UndefinedTagKind,
62 LazyKind,
63 SharedFunctionKind,
64 SharedDataKind,
65 SharedTagKind,
66 };
67
68 Kind kind() const { return symbolKind; }
69
70 bool isDefined() const { return !isLazy() && !isUndefined(); }
71
72 bool isUndefined() const {
73 return symbolKind == UndefinedFunctionKind ||
74 symbolKind == UndefinedDataKind ||
75 symbolKind == UndefinedGlobalKind ||
76 symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind;
77 }
78
79 bool isLazy() const { return symbolKind == LazyKind; }
80 bool isShared() const {
81 return symbolKind == SharedFunctionKind || symbolKind == SharedDataKind ||
82 symbolKind == SharedTagKind;
83 }
84
85 bool isLocal() const;
86 bool isWeak() const;
87 bool isHidden() const;
88 bool isTLS() const;
89
90 // Returns true if this symbol exists in a discarded (due to COMDAT) section
91 bool isDiscarded() const;
92
93 // True if this is an undefined weak symbol. This only works once
94 // all input files have been added.
95 bool isUndefWeak() const {
96 // See comment on lazy symbols for details.
97 return isWeak() && (isUndefined() || isLazy());
98 }
99
100 // Returns the symbol name.
101 StringRef getName() const { return name; }
102
103 // Returns the file from which this symbol was created.
104 InputFile *getFile() const { return file; }
105
106 InputChunk *getChunk() const;
107
108 // Indicates that the section or import for this symbol will be included in
109 // the final image.
110 bool isLive() const;
111
112 // Marks the symbol's InputChunk as Live, so that it will be included in the
113 // final image.
114 void markLive();
115
116 void setHidden(bool isHidden);
117
118 // Get/set the index in the output symbol table. This is only used for
119 // relocatable output.
120 uint32_t getOutputSymbolIndex() const;
121 void setOutputSymbolIndex(uint32_t index);
122
123 WasmSymbolType getWasmType() const;
124 bool isImported() const;
125 bool isExported() const;
126 bool isExportedExplicit() const;
127
128 // Indicates that the symbol is used in an __attribute__((used)) directive
129 // or similar.
130 bool isNoStrip() const;
131
132 const WasmSignature *getSignature() const;
133
134 uint32_t getGOTIndex() const {
135 assert(gotIndex != INVALID_INDEX);
136 return gotIndex;
137 }
138
139 void setGOTIndex(uint32_t index);
140 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
141
142protected:
143 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
144 : name(name), file(f), symbolKind(k), referenced(!ctx.arg.gcSections),
145 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
146 forceImport(false), canInline(false), traced(false), isStub(false),
147 flags(flags) {}
148
149 StringRef name;
150 InputFile *file;
151 uint32_t outputSymbolIndex = INVALID_INDEX;
152 uint32_t gotIndex = INVALID_INDEX;
153 Kind symbolKind;
154
155public:
156 bool referenced : 1;
157
158 // True for data symbols that needs a dummy GOT entry. Used for static
159 // linking of GOT accesses.
160 bool requiresGOT : 1;
161
162 // True if the symbol was used for linking and thus need to be added to the
163 // output file's symbol table. This is true for all symbols except for
164 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
165 // are unreferenced except by other bitcode objects.
166 bool isUsedInRegularObj : 1;
167
168 // True if this symbol is explicitly marked for export (i.e. via the
169 // -e/--export command line flag)
170 bool forceExport : 1;
171
172 bool forceImport : 1;
173
174 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
175 // is overwritten after LTO, LTO shouldn't inline the symbol because it
176 // doesn't know the final contents of the symbol.
177 bool canInline : 1;
178
179 // True if this symbol is specified by --trace-symbol option.
180 bool traced : 1;
181
182 // True if this symbol is a linker-synthesized stub function (traps when
183 // called) and should otherwise be treated as missing/undefined. See
184 // SymbolTable::replaceWithUndefined.
185 // These stubs never appear in the table and any table index relocations
186 // against them will produce address 0 (The table index representing
187 // the null function pointer).
188 bool isStub : 1;
189
190 uint32_t flags;
191
192 std::optional<StringRef> importName;
193 std::optional<StringRef> importModule;
194};
195
196class FunctionSymbol : public Symbol {
197public:
198 static bool classof(const Symbol *s) {
199 return s->kind() == DefinedFunctionKind ||
200 s->kind() == SharedFunctionKind ||
201 s->kind() == UndefinedFunctionKind;
202 }
203
204 // Get/set the table index
205 void setTableIndex(uint32_t index);
206 uint32_t getTableIndex() const;
207 bool hasTableIndex() const;
208
209 // Get/set the function index
210 uint32_t getFunctionIndex() const;
211 void setFunctionIndex(uint32_t index);
212 bool hasFunctionIndex() const;
213
214 const WasmSignature *signature;
215
216protected:
217 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
218 const WasmSignature *sig)
219 : Symbol(name, k, flags, f), signature(sig) {}
220
221 uint32_t tableIndex = INVALID_INDEX;
222 uint32_t functionIndex = INVALID_INDEX;
223};
224
225class DefinedFunction : public FunctionSymbol {
226public:
227 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
228 InputFunction *function);
229
230 static bool classof(const Symbol *s) {
231 return s->kind() == DefinedFunctionKind;
232 }
233
234 // Get the function index to be used when exporting. This only applies to
235 // defined functions and can be differ from the regular function index for
236 // weakly defined functions (that are imported and used via one index but
237 // defined and exported via another).
238 uint32_t getExportedFunctionIndex() const;
239
240 InputFunction *function;
241};
242
243class UndefinedFunction : public FunctionSymbol {
244public:
245 UndefinedFunction(StringRef name, std::optional<StringRef> importName,
246 std::optional<StringRef> importModule, uint32_t flags,
247 InputFile *file = nullptr,
248 const WasmSignature *type = nullptr,
249 bool isCalledDirectly = true)
250 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
251 isCalledDirectly(isCalledDirectly) {
252 this->importName = importName;
253 this->importModule = importModule;
254 }
255
256 static bool classof(const Symbol *s) {
257 return s->kind() == UndefinedFunctionKind;
258 }
259
260 DefinedFunction *stubFunction = nullptr;
261 bool isCalledDirectly;
262};
263
264// Section symbols for output sections are different from those for input
265// section. These are generated by the linker and point the OutputSection
266// rather than an InputSection.
267class OutputSectionSymbol : public Symbol {
268public:
269 OutputSectionSymbol(const OutputSection *s)
270 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
271 nullptr),
272 section(s) {}
273
274 static bool classof(const Symbol *s) {
275 return s->kind() == OutputSectionKind;
276 }
277
278 const OutputSection *section;
279};
280
281class SectionSymbol : public Symbol {
282public:
283 SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
284 : Symbol("", SectionKind, flags, f), section(s) {}
285
286 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
287
288 const OutputSectionSymbol *getOutputSectionSymbol() const;
289
290 const InputChunk *section;
291};
292
293class DataSymbol : public Symbol {
294public:
295 static bool classof(const Symbol *s) {
296 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind ||
297 s->kind() == SharedDataKind;
298 }
299
300protected:
301 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
302 : Symbol(name, k, flags, f) {}
303};
304
305class DefinedData : public DataSymbol {
306public:
307 // Constructor for regular data symbols originating from input files.
308 DefinedData(StringRef name, uint32_t flags, InputFile *f, InputChunk *segment,
309 uint64_t value, uint64_t size)
310 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
311 value(value), size(size) {}
312
313 // Constructor for linker synthetic data symbols.
314 DefinedData(StringRef name, uint32_t flags)
315 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
316
317 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
318
319 // Returns the output virtual address of a defined data symbol.
320 // For TLS symbols, by default (unless absolute is set), this returns an
321 // address relative the `__tls_base`.
322 uint64_t getVA(bool absolute = false) const;
323 void setVA(uint64_t va);
324
325 // Returns the offset of a defined data symbol within its OutputSegment.
326 uint64_t getOutputSegmentOffset() const;
327 uint64_t getOutputSegmentIndex() const;
328 uint64_t getSize() const { return size; }
329
330 InputChunk *segment = nullptr;
331 uint64_t value = 0;
332
333protected:
334 uint64_t size = 0;
335};
336
337class SharedData : public DataSymbol {
338public:
339 SharedData(StringRef name, uint32_t flags, InputFile *f)
340 : DataSymbol(name, SharedDataKind, flags, f) {}
341};
342
343class UndefinedData : public DataSymbol {
344public:
345 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
346 : DataSymbol(name, UndefinedDataKind, flags, file) {}
347 static bool classof(const Symbol *s) {
348 return s->kind() == UndefinedDataKind;
349 }
350};
351
352class GlobalSymbol : public Symbol {
353public:
354 static bool classof(const Symbol *s) {
355 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
356 }
357
358 const WasmGlobalType *getGlobalType() const { return globalType; }
359
360 // Get/set the global index
361 uint32_t getGlobalIndex() const;
362 void setGlobalIndex(uint32_t index);
363 bool hasGlobalIndex() const;
364
365protected:
366 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
367 const WasmGlobalType *globalType)
368 : Symbol(name, k, flags, f), globalType(globalType) {}
369
370 const WasmGlobalType *globalType;
371 uint32_t globalIndex = INVALID_INDEX;
372};
373
374class DefinedGlobal : public GlobalSymbol {
375public:
376 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
377 InputGlobal *global);
378
379 static bool classof(const Symbol *s) {
380 return s->kind() == DefinedGlobalKind;
381 }
382
383 InputGlobal *global;
384};
385
386class UndefinedGlobal : public GlobalSymbol {
387public:
388 UndefinedGlobal(StringRef name, std::optional<StringRef> importName,
389 std::optional<StringRef> importModule, uint32_t flags,
390 InputFile *file = nullptr,
391 const WasmGlobalType *type = nullptr)
392 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type) {
393 this->importName = importName;
394 this->importModule = importModule;
395 }
396
397 static bool classof(const Symbol *s) {
398 return s->kind() == UndefinedGlobalKind;
399 }
400};
401
402class TableSymbol : public Symbol {
403public:
404 static bool classof(const Symbol *s) {
405 return s->kind() == DefinedTableKind || s->kind() == UndefinedTableKind;
406 }
407
408 const WasmTableType *getTableType() const { return tableType; }
409 void setLimits(const WasmLimits &limits);
410
411 // Get/set the table number
412 uint32_t getTableNumber() const;
413 void setTableNumber(uint32_t number);
414 bool hasTableNumber() const;
415
416protected:
417 TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
418 const WasmTableType *type)
419 : Symbol(name, k, flags, f), tableType(type) {}
420
421 const WasmTableType *tableType;
422 uint32_t tableNumber = INVALID_INDEX;
423};
424
425class DefinedTable : public TableSymbol {
426public:
427 DefinedTable(StringRef name, uint32_t flags, InputFile *file,
428 InputTable *table);
429
430 static bool classof(const Symbol *s) { return s->kind() == DefinedTableKind; }
431
432 InputTable *table;
433};
434
435class UndefinedTable : public TableSymbol {
436public:
437 UndefinedTable(StringRef name, std::optional<StringRef> importName,
438 std::optional<StringRef> importModule, uint32_t flags,
439 InputFile *file, const WasmTableType *type)
440 : TableSymbol(name, UndefinedTableKind, flags, file, type) {
441 this->importName = importName;
442 this->importModule = importModule;
443 }
444
445 static bool classof(const Symbol *s) {
446 return s->kind() == UndefinedTableKind;
447 }
448};
449
450// A tag is a general format to distinguish typed entities. Each tag has an
451// attribute and a type. Currently the attribute can only specify that the tag
452// is for an exception tag.
453//
454// In exception handling, tags are used to distinguish different kinds of
455// exceptions. For example, they can be used to distinguish different language's
456// exceptions, e.g., all C++ exceptions have the same tag and Java exceptions
457// would have a distinct tag. Wasm can filter the exceptions it catches based on
458// their tag.
459//
460// A single TagSymbol object represents a single tag. The C++ exception symbol
461// is a weak symbol generated in every object file in which exceptions are used,
462// and is named '__cpp_exception' for linking.
463class TagSymbol : public Symbol {
464public:
465 static bool classof(const Symbol *s) {
466 return s->kind() == DefinedTagKind || s->kind() == UndefinedTagKind ||
467 s->kind() == SharedTagKind;
468 }
469
470 // Get/set the tag index
471 uint32_t getTagIndex() const;
472 void setTagIndex(uint32_t index);
473 bool hasTagIndex() const;
474
475 const WasmSignature *signature;
476
477protected:
478 TagSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
479 const WasmSignature *sig)
480 : Symbol(name, k, flags, f), signature(sig) {}
481
482 uint32_t tagIndex = INVALID_INDEX;
483};
484
485class DefinedTag : public TagSymbol {
486public:
487 DefinedTag(StringRef name, uint32_t flags, InputFile *file, InputTag *tag);
488
489 static bool classof(const Symbol *s) { return s->kind() == DefinedTagKind; }
490
491 InputTag *tag;
492};
493
494class UndefinedTag : public TagSymbol {
495public:
496 UndefinedTag(StringRef name, std::optional<StringRef> importName,
497 std::optional<StringRef> importModule, uint32_t flags,
498 InputFile *file = nullptr, const WasmSignature *sig = nullptr)
499 : TagSymbol(name, UndefinedTagKind, flags, file, sig) {
500 this->importName = importName;
501 this->importModule = importModule;
502 }
503
504 static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; }
505};
506
507class SharedTagSymbol : public TagSymbol {
508public:
509 SharedTagSymbol(StringRef name, uint32_t flags, InputFile *f,
510 const WasmSignature *sig)
511 : TagSymbol(name, SharedTagKind, flags, f, sig) {}
512
513 static bool classof(const Symbol *s) { return s->kind() == SharedTagKind; }
514};
515
516class SharedFunctionSymbol : public FunctionSymbol {
517public:
518 SharedFunctionSymbol(StringRef name, uint32_t flags, InputFile *file,
519 const WasmSignature *sig)
520 : FunctionSymbol(name, SharedFunctionKind, flags, file, sig) {}
521 static bool classof(const Symbol *s) {
522 return s->kind() == SharedFunctionKind;
523 }
524};
525
526// LazySymbol symbols represent symbols in object files between --start-lib and
527// --end-lib options. LLD also handles traditional archives as if all the files
528// in the archive are surrounded by --start-lib and --end-lib.
529//
530// A special complication is the handling of weak undefined symbols. They should
531// not load a file, but we have to remember we have seen both the weak undefined
532// and the lazy. We represent that with a lazy symbol with a weak binding. This
533// means that code looking for undefined symbols normally also has to take lazy
534// symbols into consideration.
535class LazySymbol : public Symbol {
536public:
537 LazySymbol(StringRef name, uint32_t flags, InputFile *file)
538 : Symbol(name, LazyKind, flags, file) {}
539
540 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
541 void extract();
542 void setWeak();
543
544 // Lazy symbols can have a signature because they can replace an
545 // UndefinedFunction in which case we need to be able to preserve the
546 // signature.
547 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
548 // the use of class hierarchy to represent symbol taxonomy.
549 const WasmSignature *signature = nullptr;
550};
551
552// A buffer class that is large enough to hold any Symbol-derived
553// object. We allocate memory using this class and instantiate a symbol
554// using the placement new.
555union SymbolUnion {
556 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
557 alignas(DefinedData) char b[sizeof(DefinedData)];
558 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
559 alignas(DefinedTag) char d[sizeof(DefinedTag)];
560 alignas(DefinedTable) char e[sizeof(DefinedTable)];
561 alignas(LazySymbol) char f[sizeof(LazySymbol)];
562 alignas(UndefinedFunction) char g[sizeof(UndefinedFunction)];
563 alignas(UndefinedData) char h[sizeof(UndefinedData)];
564 alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)];
565 alignas(UndefinedTable) char j[sizeof(UndefinedTable)];
566 alignas(SectionSymbol) char k[sizeof(SectionSymbol)];
567 alignas(SharedFunctionSymbol) char l[sizeof(SharedFunctionSymbol)];
568 alignas(SharedTagSymbol) char m[sizeof(SharedTagSymbol)];
569};
570
571// It is important to keep the size of SymbolUnion small for performance and
572// memory usage reasons. 96 bytes is a soft limit based on the size of
573// UndefinedFunction on a 64-bit system.
574static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
575
576void printTraceSymbol(Symbol *sym);
577void printTraceSymbolUndefined(StringRef name, const InputFile *file);
578
579template <typename T, typename... ArgT>
580T *replaceSymbol(Symbol *s, ArgT &&...arg) {
581 static_assert(std::is_trivially_destructible<T>(),
582 "Symbol types must be trivially destructible");
583 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
584 static_assert(alignof(T) <= alignof(SymbolUnion),
585 "SymbolUnion not aligned enough");
586 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
587 "Not a Symbol");
588
589 Symbol symCopy = *s;
590
591 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
592 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
593 s2->forceExport = symCopy.forceExport;
594 s2->forceImport = symCopy.forceImport;
595 s2->canInline = symCopy.canInline;
596 s2->traced = symCopy.traced;
597 s2->referenced = symCopy.referenced;
598
599 // Print out a log message if --trace-symbol was specified.
600 // This is for debugging.
601 if (s2->traced)
602 printTraceSymbol(s2);
603
604 return s2;
605}
606
607} // namespace wasm
608
609// Returns a symbol name for an error message.
610std::string toString(const wasm::Symbol &sym);
611std::string toString(wasm::Symbol::Kind kind);
612std::string maybeDemangleSymbol(StringRef name);
613
614} // namespace lld
615
616#endif
617