1//===- SymbolTable.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SymbolTable.h"
10#include "Config.h"
11#include "InputChunks.h"
12#include "InputElement.h"
13#include "WriterUtils.h"
14#include "lld/Common/CommonLinkerContext.h"
15#include <optional>
16
17#define DEBUG_TYPE "lld"
18
19using namespace llvm;
20using namespace llvm::wasm;
21using namespace llvm::object;
22
23namespace lld::wasm {
24SymbolTable *symtab;
25
26void SymbolTable::addFile(InputFile *file, StringRef symName) {
27 log(msg: "Processing: " + toString(file));
28
29 // Lazy object file
30 if (file->lazy) {
31 if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
32 ctx.lazyBitcodeFiles.push_back(Elt: f);
33 f->parseLazy();
34 } else {
35 cast<ObjFile>(Val: file)->parseLazy();
36 }
37 return;
38 }
39
40 // .so file
41 if (auto *f = dyn_cast<SharedFile>(Val: file)) {
42 // If we are not reporting undefined symbols that we don't actualy
43 // parse the shared library symbol table.
44 f->parse();
45 ctx.sharedFiles.push_back(Elt: f);
46 return;
47 }
48
49 // stub file
50 if (auto *f = dyn_cast<StubFile>(Val: file)) {
51 f->parse();
52 ctx.stubFiles.push_back(Elt: f);
53 return;
54 }
55
56 if (ctx.arg.trace)
57 message(msg: toString(file));
58
59 // LLVM bitcode file
60 if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
61 // This order, first adding to `bitcodeFiles` and then parsing is necessary.
62 // See https://github.com/llvm/llvm-project/pull/73095
63 ctx.bitcodeFiles.push_back(Elt: f);
64 f->parse(symName);
65 return;
66 }
67
68 // Regular object file
69 auto *f = cast<ObjFile>(Val: file);
70 f->parse(ignoreComdats: false);
71 ctx.objectFiles.push_back(Elt: f);
72}
73
74// This function is where all the optimizations of link-time
75// optimization happens. When LTO is in use, some input files are
76// not in native object file format but in the LLVM bitcode format.
77// This function compiles bitcode files into a few big native files
78// using LLVM functions and replaces bitcode symbols with the results.
79// Because all bitcode files that the program consists of are passed
80// to the compiler at once, it can do whole-program optimization.
81void SymbolTable::compileBitcodeFiles() {
82 // Prevent further LTO objects being included
83 BitcodeFile::doneLTO = true;
84
85 // Collect the bitcode library functions that are not safe to call because
86 // they were not yet brought in the link. (Such symbols are lazy.)
87 llvm::BumpPtrAllocator alloc;
88 llvm::StringSaver saver(alloc);
89 SmallVector<StringRef> bitcodeLibFuncs;
90 if (!ctx.bitcodeFiles.empty()) {
91 // Triple must be captured before the bitcode is moved into the compiler.
92 // Note that the below assumes that the set of possible libfuncs is
93 // equivalent for all bitcode translation units.
94 llvm::Triple tt =
95 llvm::Triple(ctx.bitcodeFiles.front()->obj->getTargetTriple());
96 for (StringRef libFunc : llvm::lto::LTO::getLibFuncSymbols(TT: tt, Saver&: saver)) {
97 if (Symbol *sym = find(name: libFunc)) {
98 if (auto *lazy = dyn_cast<LazySymbol>(Val: sym)) {
99 if (isa<BitcodeFile>(Val: lazy->getFile()))
100 bitcodeLibFuncs.push_back(Elt: libFunc);
101 }
102 }
103 }
104 }
105
106 // Compile bitcode files and replace bitcode symbols.
107 lto.reset(p: new BitcodeCompiler);
108 lto->setBitcodeLibFuncs(bitcodeLibFuncs);
109
110 for (BitcodeFile *f : ctx.bitcodeFiles)
111 lto->add(f&: *f);
112
113 for (auto &file : lto->compile()) {
114 auto *obj = cast<ObjFile>(Val: file);
115 obj->parse(ignoreComdats: true);
116 ctx.objectFiles.push_back(Elt: obj);
117 }
118}
119
120Symbol *SymbolTable::find(StringRef name) {
121 auto it = symMap.find(Val: CachedHashStringRef(name));
122 if (it == symMap.end() || it->second == -1)
123 return nullptr;
124 return symVector[it->second];
125}
126
127void SymbolTable::replace(StringRef name, Symbol *sym) {
128 auto it = symMap.find(Val: CachedHashStringRef(name));
129 symVector[it->second] = sym;
130}
131
132std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
133 bool trace = false;
134 auto p = symMap.insert(KV: {CachedHashStringRef(name), (int)symVector.size()});
135 int &symIndex = p.first->second;
136 bool isNew = p.second;
137 if (symIndex == -1) {
138 symIndex = symVector.size();
139 trace = true;
140 isNew = true;
141 }
142
143 if (!isNew)
144 return {symVector[symIndex], false};
145
146 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
147 sym->isUsedInRegularObj = false;
148 sym->canInline = true;
149 sym->traced = trace;
150 sym->forceExport = false;
151 sym->referenced = !ctx.arg.gcSections;
152 symVector.emplace_back(args&: sym);
153 return {sym, true};
154}
155
156std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
157 const InputFile *file) {
158 Symbol *s;
159 bool wasInserted;
160 std::tie(args&: s, args&: wasInserted) = insertName(name);
161
162 if (!file || file->kind() == InputFile::ObjectKind)
163 s->isUsedInRegularObj = true;
164
165 return {s, wasInserted};
166}
167
168static bool isBitcodeSymbol(const Symbol *symbol) {
169 return symbol->getFile() &&
170 symbol->getFile()->kind() == InputFile::BitcodeKind;
171}
172
173static void reportTypeError(const Symbol *existing, const InputFile *file,
174 llvm::wasm::WasmSymbolType type) {
175 error(msg: "symbol type mismatch: " + toString(sym: *existing) + "\n>>> defined as " +
176 toString(type: existing->getWasmType()) + " in " +
177 toString(file: existing->getFile()) + "\n>>> defined as " + toString(type) +
178 " in " + toString(file));
179}
180
181// Check the type of new symbol matches that of the symbol is replacing.
182// Returns true if the function types match, false is there is a signature
183// mismatch.
184static bool signatureMatches(FunctionSymbol *existing,
185 const WasmSignature *newSig) {
186 const WasmSignature *oldSig = existing->signature;
187
188 // If either function is missing a signature (this happens for bitcode
189 // symbols) then assume they match. Any mismatch will be reported later
190 // when the LTO objects are added.
191 if (!newSig || !oldSig)
192 return true;
193
194 return *newSig == *oldSig;
195}
196
197static void checkGlobalType(const Symbol *existing, const InputFile *file,
198 const WasmGlobalType *newType) {
199 if (!isa<GlobalSymbol>(Val: existing)) {
200 if (isBitcodeSymbol(symbol: existing))
201 return;
202 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_GLOBAL);
203 return;
204 }
205
206 const WasmGlobalType *oldType = cast<GlobalSymbol>(Val: existing)->getGlobalType();
207 if (*newType != *oldType) {
208 error(msg: "Global type mismatch: " + existing->getName() + "\n>>> defined as " +
209 toString(type: *oldType) + " in " + toString(file: existing->getFile()) +
210 "\n>>> defined as " + toString(type: *newType) + " in " + toString(file));
211 }
212}
213
214static void checkTagType(const Symbol *existing, const InputFile *file,
215 const WasmSignature *newSig) {
216 if (!isa<TagSymbol>(Val: existing)) {
217 if (isBitcodeSymbol(symbol: existing))
218 return;
219 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TAG);
220 return;
221 }
222
223 const auto *existingTag = cast<TagSymbol>(Val: existing);
224
225 const WasmSignature *oldSig = existingTag->signature;
226 if (*newSig != *oldSig)
227 warn(msg: "Tag signature mismatch: " + existing->getName() +
228 "\n>>> defined as " + toString(sig: *oldSig) + " in " +
229 toString(file: existing->getFile()) + "\n>>> defined as " +
230 toString(sig: *newSig) + " in " + toString(file));
231}
232
233static void checkTableType(const Symbol *existing, const InputFile *file,
234 const WasmTableType *newType) {
235 if (!isa<TableSymbol>(Val: existing)) {
236 if (isBitcodeSymbol(symbol: existing))
237 return;
238 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TABLE);
239 return;
240 }
241
242 const WasmTableType *oldType = cast<TableSymbol>(Val: existing)->getTableType();
243 if (newType->ElemType != oldType->ElemType) {
244 error(msg: "Table type mismatch: " + existing->getName() + "\n>>> defined as " +
245 toString(type: *oldType) + " in " + toString(file: existing->getFile()) +
246 "\n>>> defined as " + toString(type: *newType) + " in " + toString(file));
247 }
248 // FIXME: No assertions currently on the limits.
249}
250
251static void checkDataType(const Symbol *existing, const InputFile *file) {
252 if (!isa<DataSymbol>(Val: existing) && !isBitcodeSymbol(symbol: existing))
253 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_DATA);
254}
255
256DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
257 uint32_t flags,
258 InputFunction *function) {
259 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
260 assert(!find(name));
261 ctx.syntheticFunctions.emplace_back(Args&: function);
262 return replaceSymbol<DefinedFunction>(s: insertName(name).first, arg&: name, arg&: flags,
263 arg: nullptr, arg&: function);
264}
265
266// Adds an optional, linker generated, data symbol. The symbol will only be
267// added if there is an undefine reference to it, or if it is explicitly
268// exported via the --export flag. Otherwise we don't add the symbol and return
269// nullptr.
270DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
271 uint64_t value) {
272 Symbol *s = find(name);
273 if (!s && (ctx.arg.exportAll || ctx.arg.exportedSymbols.contains(key: name)))
274 s = insertName(name).first;
275 else if (!s || s->isDefined())
276 return nullptr;
277 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
278 auto *rtn = replaceSymbol<DefinedData>(
279 s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE);
280 rtn->setVA(value);
281 rtn->referenced = true;
282 return rtn;
283}
284
285DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
286 uint32_t flags) {
287 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
288 assert(!find(name));
289 return replaceSymbol<DefinedData>(s: insertName(name).first, arg&: name,
290 arg: flags | WASM_SYMBOL_ABSOLUTE);
291}
292
293DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
294 InputGlobal *global) {
295 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
296 << "\n");
297 assert(!find(name));
298 ctx.syntheticGlobals.emplace_back(Args&: global);
299 return replaceSymbol<DefinedGlobal>(s: insertName(name).first, arg&: name, arg&: flags,
300 arg: nullptr, arg&: global);
301}
302
303DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
304 InputGlobal *global) {
305 Symbol *s = find(name);
306 if (!s && (ctx.arg.exportAll || ctx.arg.exportedSymbols.contains(key: name)))
307 s = insertName(name).first;
308 else if (!s || s->isDefined())
309 return nullptr;
310 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
311 << "\n");
312 ctx.syntheticGlobals.emplace_back(Args&: global);
313 return replaceSymbol<DefinedGlobal>(s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN,
314 arg: nullptr, arg&: global);
315}
316
317DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
318 InputTable *table) {
319 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
320 << "\n");
321 Symbol *s = find(name);
322 assert(!s || s->isUndefined());
323 if (!s)
324 s = insertName(name).first;
325 ctx.syntheticTables.emplace_back(Args&: table);
326 return replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg: nullptr, arg&: table);
327}
328
329static bool shouldReplace(const Symbol *existing, InputFile *newFile,
330 uint32_t newFlags) {
331 // If existing symbol is undefined, replace it.
332 if (!existing->isDefined()) {
333 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
334 << existing->getName() << "\n");
335 return true;
336 }
337
338 // Now we have two defined symbols. If the new one is weak, we can ignore it.
339 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
340 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
341 return false;
342 }
343
344 // If the existing symbol is weak, we should replace it.
345 if (existing->isWeak()) {
346 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
347 return true;
348 }
349
350 // Similarly with shared symbols
351 if (existing->isShared()) {
352 LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n");
353 return true;
354 }
355
356 // Neither symbol is week. They conflict.
357 if (ctx.arg.allowMultipleDefinition)
358 return false;
359
360 errorOrWarn(msg: "duplicate symbol: " + toString(sym: *existing) + "\n>>> defined in " +
361 toString(file: existing->getFile()) + "\n>>> defined in " +
362 toString(file: newFile));
363 return true;
364}
365
366static void reportFunctionSignatureMismatch(StringRef symName,
367 FunctionSymbol *sym,
368 const WasmSignature *signature,
369 InputFile *file,
370 bool isError = true) {
371 std::string msg =
372 ("function signature mismatch: " + symName + "\n>>> defined as " +
373 toString(sig: *sym->signature) + " in " + toString(file: sym->getFile()) +
374 "\n>>> defined as " + toString(sig: *signature) + " in " + toString(file))
375 .str();
376 if (isError)
377 error(msg);
378 else
379 warn(msg);
380}
381
382static void reportFunctionSignatureMismatch(StringRef symName,
383 FunctionSymbol *a,
384 FunctionSymbol *b,
385 bool isError = true) {
386 reportFunctionSignatureMismatch(symName, sym: a, signature: b->signature, file: b->getFile(),
387 isError);
388}
389
390Symbol *SymbolTable::addSharedTag(StringRef name, uint32_t flags,
391 InputFile *file, const WasmSignature *sig) {
392 LLVM_DEBUG(dbgs() << "addSharedTag: " << name << " [" << toString(*sig)
393 << "]\n");
394 Symbol *s;
395 bool wasInserted;
396 std::tie(args&: s, args&: wasInserted) = insert(name, file);
397
398 auto replaceSym = [&](Symbol *sym) {
399 replaceSymbol<SharedTagSymbol>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: sig);
400 };
401
402 // same as addSharedFunction, but this is in its own function
403 if (wasInserted || s->isLazy()) {
404 replaceSym(s);
405 return s;
406 }
407
408 auto *existingTag = dyn_cast<TagSymbol>(Val: s);
409 if (!existingTag) {
410 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_TAG);
411 return s;
412 }
413
414 // Shared symbols should never replace locally-defined ones
415 if (s->isDefined())
416 return s;
417
418 // undefined existing sym
419 const WasmSignature *oldSig = existingTag->signature;
420 if (oldSig && sig && *oldSig != *sig)
421 error(msg: "Tag signature mismatch: " + name + "\n>>> defined as " +
422 toString(sig: *oldSig) + " in " + toString(file: existingTag->getFile()) +
423 "\n>>> defined as " + toString(sig: *sig) + " in " + toString(file));
424 replaceSym(s);
425 return s;
426}
427
428Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags,
429 InputFile *file,
430 const WasmSignature *sig) {
431 LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig)
432 << "]\n");
433 Symbol *s;
434 bool wasInserted;
435 std::tie(args&: s, args&: wasInserted) = insert(name, file);
436
437 auto replaceSym = [&](Symbol *sym) {
438 replaceSymbol<SharedFunctionSymbol>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: sig);
439 };
440
441 if (wasInserted || s->isLazy()) {
442 replaceSym(s);
443 return s;
444 }
445
446 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
447 if (!existingFunction) {
448 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
449 return s;
450 }
451
452 // Shared symbols should never replace locally-defined ones
453 if (s->isDefined())
454 return s;
455
456 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName()
457 << "\n");
458
459 bool checkSig = true;
460 if (auto ud = dyn_cast<UndefinedFunction>(Val: existingFunction))
461 checkSig = ud->isCalledDirectly;
462
463 if (checkSig && !signatureMatches(existing: existingFunction, newSig: sig)) {
464 if (ctx.arg.shlibSigCheck) {
465 reportFunctionSignatureMismatch(symName: name, sym: existingFunction, signature: sig, file);
466 } else {
467 // With --no-shlib-sigcheck we ignore the signature of the function as
468 // defined by the shared library and instead use the signature as
469 // expected by the program being linked.
470 sig = existingFunction->signature;
471 }
472 }
473
474 replaceSym(s);
475 return s;
476}
477
478Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags,
479 InputFile *file) {
480 LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n");
481 Symbol *s;
482 bool wasInserted;
483 std::tie(args&: s, args&: wasInserted) = insert(name, file);
484
485 if (wasInserted || s->isLazy()) {
486 replaceSymbol<SharedData>(s, arg&: name, arg&: flags, arg&: file);
487 return s;
488 }
489
490 // Shared symbols should never replace locally-defined ones
491 if (s->isDefined())
492 return s;
493
494 checkDataType(existing: s, file);
495 replaceSymbol<SharedData>(s, arg&: name, arg&: flags, arg&: file);
496 return s;
497}
498
499Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
500 InputFile *file,
501 InputFunction *function) {
502 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
503 << (function ? toString(function->signature) : "none")
504 << "]\n");
505 Symbol *s;
506 bool wasInserted;
507 std::tie(args&: s, args&: wasInserted) = insert(name, file);
508
509 auto replaceSym = [&](Symbol *sym) {
510 // If the new defined function doesn't have signature (i.e. bitcode
511 // functions) but the old symbol does, then preserve the old signature
512 const WasmSignature *oldSig = s->getSignature();
513 auto *newSym =
514 replaceSymbol<DefinedFunction>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: function);
515 if (!newSym->signature)
516 newSym->signature = oldSig;
517 };
518
519 if (wasInserted || s->isLazy()) {
520 replaceSym(s);
521 return s;
522 }
523
524 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
525 if (!existingFunction) {
526 if (isBitcodeSymbol(symbol: s)) {
527 replaceSym(s);
528 return s;
529 }
530 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
531 return s;
532 }
533
534 bool checkSig = true;
535 if (auto ud = dyn_cast<UndefinedFunction>(Val: existingFunction))
536 checkSig = ud->isCalledDirectly;
537
538 if (checkSig && function &&
539 !signatureMatches(existing: existingFunction, newSig: &function->signature)) {
540 Symbol *variant;
541 if (getFunctionVariant(sym: s, sig: &function->signature, file, out: &variant))
542 // New variant, always replace
543 replaceSym(variant);
544 else if (shouldReplace(existing: s, newFile: file, newFlags: flags))
545 // Variant already exists, replace it after checking shouldReplace
546 replaceSym(variant);
547
548 // This variant we found take the place in the symbol table as the primary
549 // variant.
550 replace(name, sym: variant);
551 return variant;
552 }
553
554 // Existing function with matching signature.
555 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
556 replaceSym(s);
557
558 return s;
559}
560
561Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
562 InputFile *file, InputChunk *segment,
563 uint64_t address, uint64_t size) {
564 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
565 << "\n");
566 Symbol *s;
567 bool wasInserted;
568 std::tie(args&: s, args&: wasInserted) = insert(name, file);
569
570 auto replaceSym = [&]() {
571 replaceSymbol<DefinedData>(s, arg&: name, arg&: flags, arg&: file, arg&: segment, arg&: address, arg&: size);
572 };
573
574 if (wasInserted || s->isLazy()) {
575 replaceSym();
576 return s;
577 }
578
579 checkDataType(existing: s, file);
580
581 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
582 replaceSym();
583 return s;
584}
585
586Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
587 InputFile *file, InputGlobal *global) {
588 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
589
590 Symbol *s;
591 bool wasInserted;
592 std::tie(args&: s, args&: wasInserted) = insert(name, file);
593
594 auto replaceSym = [&]() {
595 replaceSymbol<DefinedGlobal>(s, arg&: name, arg&: flags, arg&: file, arg&: global);
596 };
597
598 if (wasInserted || s->isLazy()) {
599 replaceSym();
600 return s;
601 }
602
603 checkGlobalType(existing: s, file, newType: &global->getType());
604
605 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
606 replaceSym();
607 return s;
608}
609
610Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
611 InputFile *file, InputTag *tag) {
612 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
613
614 Symbol *s;
615 bool wasInserted;
616 std::tie(args&: s, args&: wasInserted) = insert(name, file);
617
618 auto replaceSym = [&]() {
619 replaceSymbol<DefinedTag>(s, arg&: name, arg&: flags, arg&: file, arg&: tag);
620 };
621
622 if (wasInserted || s->isLazy()) {
623 replaceSym();
624 return s;
625 }
626
627 checkTagType(existing: s, file, newSig: &tag->signature);
628
629 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
630 replaceSym();
631 return s;
632}
633
634Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
635 InputFile *file, InputTable *table) {
636 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
637
638 Symbol *s;
639 bool wasInserted;
640 std::tie(args&: s, args&: wasInserted) = insert(name, file);
641
642 auto replaceSym = [&]() {
643 replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg&: file, arg&: table);
644 };
645
646 if (wasInserted || s->isLazy()) {
647 replaceSym();
648 return s;
649 }
650
651 checkTableType(existing: s, file, newType: &table->getType());
652
653 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
654 replaceSym();
655 return s;
656}
657
658// This function get called when an undefined symbol is added, and there is
659// already an existing one in the symbols table. In this case we check that
660// custom 'import-module' and 'import-field' symbol attributes agree.
661// With LTO these attributes are not available when the bitcode is read and only
662// become available when the LTO object is read. In this case we silently
663// replace the empty attributes with the valid ones.
664static void
665updateExistingUndefined(Symbol *existing, uint32_t flags, InputFile *file,
666 std::optional<StringRef> importName = {},
667 std::optional<StringRef> importModule = {}) {
668 if (importName) {
669 if (!existing->importName)
670 existing->importName = importName;
671 if (existing->importName != importName)
672 error(msg: "import name mismatch for symbol: " + toString(sym: *existing) +
673 "\n>>> defined as " + *existing->importName + " in " +
674 toString(file: existing->getFile()) + "\n>>> defined as " + *importName +
675 " in " + toString(file));
676 }
677
678 if (importModule) {
679 if (!existing->importModule)
680 existing->importModule = importModule;
681 if (existing->importModule != importModule)
682 error(msg: "import module mismatch for symbol: " + toString(sym: *existing) +
683 "\n>>> defined as " + *existing->importModule + " in " +
684 toString(file: existing->getFile()) + "\n>>> defined as " +
685 *importModule + " in " + toString(file));
686 }
687
688 // Update symbol binding, if the existing symbol is weak
689 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
690 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
691 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
692 }
693
694 // Certain flags such as NO_STRIP should be maintianed if either old or
695 // new symbol is marked as such.
696 existing->flags |= flags & WASM_SYMBOL_NO_STRIP;
697}
698
699Symbol *SymbolTable::addUndefinedFunction(StringRef name,
700 std::optional<StringRef> importName,
701 std::optional<StringRef> importModule,
702 uint32_t flags, InputFile *file,
703 const WasmSignature *sig,
704 bool isCalledDirectly) {
705 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
706 << (sig ? toString(*sig) : "none")
707 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
708 << utohexstr(flags) << "\n");
709 assert(flags & WASM_SYMBOL_UNDEFINED);
710
711 Symbol *s;
712 bool wasInserted;
713 std::tie(args&: s, args&: wasInserted) = insert(name, file);
714 if (s->traced)
715 printTraceSymbolUndefined(name, file);
716
717 auto replaceSym = [&]() {
718 replaceSymbol<UndefinedFunction>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
719 arg&: file, arg&: sig, arg&: isCalledDirectly);
720 };
721
722 if (wasInserted) {
723 replaceSym();
724 } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) {
725 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
726 lazy->setWeak();
727 lazy->signature = sig;
728 } else {
729 lazy->extract();
730 if (!ctx.arg.whyExtract.empty())
731 ctx.whyExtractRecords.emplace_back(Args: toString(file), Args: s->getFile(), Args&: *s);
732 }
733 } else {
734 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
735 if (!existingFunction) {
736 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
737 return s;
738 }
739 if (!existingFunction->signature && sig)
740 existingFunction->signature = sig;
741 auto *existingUndefined = dyn_cast<UndefinedFunction>(Val: existingFunction);
742 if (isCalledDirectly && !signatureMatches(existing: existingFunction, newSig: sig)) {
743 if (existingFunction->isShared()) {
744 // Special handling for when the existing function is a shared symbol
745 if (ctx.arg.shlibSigCheck) {
746 reportFunctionSignatureMismatch(symName: name, sym: existingFunction, signature: sig, file);
747 } else {
748 existingFunction->signature = sig;
749 }
750 }
751 // If the existing undefined functions is not called directly then let
752 // this one take precedence. Otherwise the existing function is either
753 // directly called or defined, in which case we need a function variant.
754 else if (existingUndefined && !existingUndefined->isCalledDirectly)
755 replaceSym();
756 else if (getFunctionVariant(sym: s, sig, file, out: &s))
757 replaceSym();
758 }
759 if (existingUndefined) {
760 updateExistingUndefined(existing: existingUndefined, flags, file, importName,
761 importModule);
762 if (isCalledDirectly)
763 existingUndefined->isCalledDirectly = true;
764 }
765 }
766
767 return s;
768}
769
770Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
771 InputFile *file) {
772 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
773 assert(flags & WASM_SYMBOL_UNDEFINED);
774
775 Symbol *s;
776 bool wasInserted;
777 std::tie(args&: s, args&: wasInserted) = insert(name, file);
778 if (s->traced)
779 printTraceSymbolUndefined(name, file);
780
781 if (wasInserted) {
782 replaceSymbol<UndefinedData>(s, arg&: name, arg&: flags, arg&: file);
783 } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) {
784 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
785 lazy->setWeak();
786 else
787 lazy->extract();
788 } else if (s->isDefined()) {
789 checkDataType(existing: s, file);
790 } else {
791 updateExistingUndefined(existing: s, flags, file);
792 }
793 return s;
794}
795
796Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
797 std::optional<StringRef> importName,
798 std::optional<StringRef> importModule,
799 uint32_t flags, InputFile *file,
800 const WasmGlobalType *type) {
801 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
802 assert(flags & WASM_SYMBOL_UNDEFINED);
803
804 Symbol *s;
805 bool wasInserted;
806 std::tie(args&: s, args&: wasInserted) = insert(name, file);
807 if (s->traced)
808 printTraceSymbolUndefined(name, file);
809
810 if (wasInserted)
811 replaceSymbol<UndefinedGlobal>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
812 arg&: file, arg&: type);
813 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
814 lazy->extract();
815 else if (s->isDefined())
816 checkGlobalType(existing: s, file, newType: type);
817 else
818 updateExistingUndefined(existing: s, flags, file, importName, importModule);
819 return s;
820}
821
822Symbol *SymbolTable::addUndefinedTable(StringRef name,
823 std::optional<StringRef> importName,
824 std::optional<StringRef> importModule,
825 uint32_t flags, InputFile *file,
826 const WasmTableType *type) {
827 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
828 assert(flags & WASM_SYMBOL_UNDEFINED);
829
830 Symbol *s;
831 bool wasInserted;
832 std::tie(args&: s, args&: wasInserted) = insert(name, file);
833 if (s->traced)
834 printTraceSymbolUndefined(name, file);
835
836 if (wasInserted)
837 replaceSymbol<UndefinedTable>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
838 arg&: file, arg&: type);
839 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
840 lazy->extract();
841 else if (s->isDefined())
842 checkTableType(existing: s, file, newType: type);
843 else
844 updateExistingUndefined(existing: s, flags, file, importName, importModule);
845 return s;
846}
847
848Symbol *SymbolTable::addUndefinedTag(StringRef name,
849 std::optional<StringRef> importName,
850 std::optional<StringRef> importModule,
851 uint32_t flags, InputFile *file,
852 const WasmSignature *sig) {
853 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
854 assert(flags & WASM_SYMBOL_UNDEFINED);
855
856 Symbol *s;
857 bool wasInserted;
858 std::tie(args&: s, args&: wasInserted) = insert(name, file);
859 if (s->traced)
860 printTraceSymbolUndefined(name, file);
861
862 if (wasInserted)
863 replaceSymbol<UndefinedTag>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, arg&: file,
864 arg&: sig);
865 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
866 lazy->extract();
867 else if (s->isDefined())
868 checkTagType(existing: s, file, newSig: sig);
869 else
870 updateExistingUndefined(existing: s, flags, file, importName, importModule);
871 return s;
872}
873
874TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
875 LLVM_DEBUG(llvm::dbgs() << "createUndefinedIndirectFunctionTable\n");
876 WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0, .PageSize: 0}; // Set by the writer.
877 WasmTableType *type = make<WasmTableType>();
878 type->ElemType = ValType::FUNCREF;
879 type->Limits = limits;
880 uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
881 flags |= WASM_SYMBOL_UNDEFINED;
882 Symbol *sym =
883 addUndefinedTable(name, importName: name, importModule: defaultModule, flags, file: nullptr, type);
884 sym->markLive();
885 sym->forceExport = ctx.arg.exportTable;
886 return cast<TableSymbol>(Val: sym);
887}
888
889TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
890 LLVM_DEBUG(llvm::dbgs() << "createDefinedIndirectFunctionTable\n");
891 const uint32_t invalidIndex = -1;
892 WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0, .PageSize: 0}; // Set by the writer.
893 WasmTableType type{.ElemType: ValType::FUNCREF, .Limits: limits};
894 WasmTable desc{.Index: invalidIndex, .Type: type, .SymbolName: name};
895 InputTable *table = make<InputTable>(args&: desc, args: nullptr);
896 uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
897 TableSymbol *sym = addSyntheticTable(name, flags, table);
898 sym->markLive();
899 sym->forceExport = ctx.arg.exportTable;
900 return sym;
901}
902
903// Whether or not we need an indirect function table is usually a function of
904// whether an input declares a need for it. However sometimes it's possible for
905// no input to need the indirect function table, but then a late
906// addInternalGOTEntry causes a function to be allocated an address. In that
907// case address we synthesize a definition at the last minute.
908TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
909 Symbol *existing = find(name: functionTableName);
910 if (existing) {
911 if (!isa<TableSymbol>(Val: existing)) {
912 error(msg: Twine("reserved symbol must be of type table: `") +
913 functionTableName + "`");
914 return nullptr;
915 }
916 if (existing->isDefined()) {
917 error(msg: Twine("reserved symbol must not be defined in input files: `") +
918 functionTableName + "`");
919 return nullptr;
920 }
921 }
922
923 if (ctx.arg.importTable) {
924 if (existing) {
925 existing->importModule = defaultModule;
926 existing->importName = functionTableName;
927 return cast<TableSymbol>(Val: existing);
928 }
929 if (required)
930 return createUndefinedIndirectFunctionTable(name: functionTableName);
931 } else if ((existing && existing->isLive()) || ctx.arg.exportTable ||
932 required) {
933 // A defined table is required. Either because the user request an exported
934 // table or because the table symbol is already live. The existing table is
935 // guaranteed to be undefined due to the check above.
936 return createDefinedIndirectFunctionTable(name: functionTableName);
937 }
938
939 // An indirect function table will only be present in the symbol table if
940 // needed by a reloc; if we get here, we don't need one.
941 return nullptr;
942}
943
944void SymbolTable::addLazy(StringRef name, InputFile *file) {
945 LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
946
947 Symbol *s;
948 bool wasInserted;
949 std::tie(args&: s, args&: wasInserted) = insertName(name);
950
951 if (wasInserted) {
952 replaceSymbol<LazySymbol>(s, arg&: name, arg: 0, arg&: file);
953 return;
954 }
955
956 if (!s->isUndefined())
957 return;
958
959 // The existing symbol is undefined, load a new one from the archive,
960 // unless the existing symbol is weak in which case replace the undefined
961 // symbols with a LazySymbol.
962 if (s->isWeak()) {
963 const WasmSignature *oldSig = nullptr;
964 // In the case of an UndefinedFunction we need to preserve the expected
965 // signature.
966 if (auto *f = dyn_cast<UndefinedFunction>(Val: s))
967 oldSig = f->signature;
968 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
969 auto newSym =
970 replaceSymbol<LazySymbol>(s, arg&: name, arg: WASM_SYMBOL_BINDING_WEAK, arg&: file);
971 newSym->signature = oldSig;
972 return;
973 }
974
975 LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
976 const InputFile *oldFile = s->getFile();
977 LazySymbol(name, 0, file).extract();
978 if (!ctx.arg.whyExtract.empty())
979 ctx.whyExtractRecords.emplace_back(Args: toString(file: oldFile), Args: s->getFile(), Args&: *s);
980}
981
982bool SymbolTable::addComdat(StringRef name) {
983 return comdatGroups.insert(V: CachedHashStringRef(name)).second;
984}
985
986// The new signature doesn't match. Create a variant to the symbol with the
987// signature encoded in the name and return that instead. These symbols are
988// then unified later in handleSymbolVariants.
989bool SymbolTable::getFunctionVariant(Symbol *sym, const WasmSignature *sig,
990 const InputFile *file, Symbol **out) {
991 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
992 << " " << toString(*sig) << "\n");
993 Symbol *variant = nullptr;
994
995 // Linear search through symbol variants. Should never be more than two
996 // or three entries here.
997 auto &variants = symVariants[CachedHashStringRef(sym->getName())];
998 if (variants.empty())
999 variants.push_back(x: sym);
1000
1001 for (Symbol *v : variants) {
1002 if (*v->getSignature() == *sig) {
1003 variant = v;
1004 break;
1005 }
1006 }
1007
1008 bool wasAdded = !variant;
1009 if (wasAdded) {
1010 // Create a new variant;
1011 LLVM_DEBUG(dbgs() << "added new variant\n");
1012 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
1013 variant->isUsedInRegularObj =
1014 !file || file->kind() == InputFile::ObjectKind;
1015 variant->canInline = true;
1016 variant->traced = false;
1017 variant->forceExport = false;
1018 variants.push_back(x: variant);
1019 } else {
1020 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant)
1021 << "\n");
1022 assert(*variant->getSignature() == *sig);
1023 }
1024
1025 *out = variant;
1026 return wasAdded;
1027}
1028
1029// Set a flag for --trace-symbol so that we can print out a log message
1030// if a new symbol with the same name is inserted into the symbol table.
1031void SymbolTable::trace(StringRef name) {
1032 symMap.insert(KV: {CachedHashStringRef(name), -1});
1033}
1034
1035void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
1036 // Swap symbols as instructed by -wrap.
1037 int &origIdx = symMap[CachedHashStringRef(sym->getName())];
1038 int &realIdx = symMap[CachedHashStringRef(real->getName())];
1039 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
1040 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
1041
1042 // Anyone looking up __real symbols should get the original
1043 realIdx = origIdx;
1044 // Anyone looking up the original should get the __wrap symbol
1045 origIdx = wrapIdx;
1046}
1047
1048static const uint8_t unreachableFn[] = {
1049 0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
1050 0x00 /* opcode unreachable */, 0x0b /* opcode end */
1051};
1052
1053// Replace the given symbol body with an unreachable function.
1054// This is used by handleWeakUndefines in order to generate a callable
1055// equivalent of an undefined function and also handleSymbolVariants for
1056// undefined functions that don't match the signature of the definition.
1057InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
1058 const WasmSignature &sig,
1059 StringRef debugName) {
1060 auto *func = make<SyntheticFunction>(args: sig, args: sym->getName(), args&: debugName);
1061 func->setBody(unreachableFn);
1062 ctx.syntheticFunctions.emplace_back(Args&: func);
1063 // Mark new symbols as local. For relocatable output we don't want them
1064 // to be exported outside the object file.
1065 replaceSymbol<DefinedFunction>(s: sym, arg&: debugName, arg: WASM_SYMBOL_BINDING_LOCAL,
1066 arg: nullptr, arg&: func);
1067 // Ensure the stub function doesn't get a table entry. Its address
1068 // should always compare equal to the null pointer.
1069 sym->isStub = true;
1070 return func;
1071}
1072
1073void SymbolTable::replaceWithUndefined(Symbol *sym) {
1074 // Add a synthetic dummy for weak undefined functions. These dummies will
1075 // be GC'd if not used as the target of any "call" instructions.
1076 StringRef debugName = saver().save(S: "undefined_weak:" + toString(sym: *sym));
1077 replaceWithUnreachable(sym, sig: *sym->getSignature(), debugName);
1078 // Hide our dummy to prevent export.
1079 sym->setHidden(true);
1080}
1081
1082// For weak undefined functions, there may be "call" instructions that reference
1083// the symbol. In this case, we need to synthesise a dummy/stub function that
1084// will abort at runtime, so that relocations can still provided an operand to
1085// the call instruction that passes Wasm validation.
1086void SymbolTable::handleWeakUndefines() {
1087 for (Symbol *sym : symbols()) {
1088 if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
1089 if (sym->getSignature()) {
1090 replaceWithUndefined(sym);
1091 } else {
1092 // It is possible for undefined functions not to have a signature (eg.
1093 // if added via "--undefined"), but weak undefined ones do have a
1094 // signature. Lazy symbols may not be functions and therefore Sig can
1095 // still be null in some circumstance.
1096 assert(!isa<FunctionSymbol>(sym));
1097 }
1098 }
1099 }
1100}
1101
1102DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
1103 if (auto it = stubFunctions.find(Val: sig); it != stubFunctions.end())
1104 return it->second;
1105 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
1106 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
1107 sym->isUsedInRegularObj = true;
1108 sym->canInline = true;
1109 sym->traced = false;
1110 sym->forceExport = false;
1111 sym->signature = &sig;
1112 replaceSymbol<DefinedFunction>(
1113 s: sym, arg: "undefined_stub", arg: WASM_SYMBOL_VISIBILITY_HIDDEN, arg: nullptr, arg: nullptr);
1114 replaceWithUnreachable(sym, sig, debugName: "undefined_stub");
1115 stubFunctions[sig] = sym;
1116 return sym;
1117}
1118
1119// Remove any variant symbols that were created due to function signature
1120// mismatches.
1121void SymbolTable::handleSymbolVariants() {
1122 for (auto pair : symVariants) {
1123 // Push the initial symbol onto the list of variants.
1124 StringRef symName = pair.first.val();
1125 std::vector<Symbol *> &variants = pair.second;
1126
1127#ifndef NDEBUG
1128 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
1129 << ") variants: " << symName << "\n");
1130 for (auto *s : variants) {
1131 auto *f = cast<FunctionSymbol>(s);
1132 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
1133 << toString(*f->signature) << "\n");
1134 }
1135#endif
1136
1137 // Find the one definition.
1138 DefinedFunction *defined = nullptr;
1139 for (auto *symbol : variants) {
1140 if (auto f = dyn_cast<DefinedFunction>(Val: symbol)) {
1141 defined = f;
1142 break;
1143 }
1144 }
1145
1146 // If there are no definitions, and the undefined symbols disagree on
1147 // the signature, there is not we can do since we don't know which one
1148 // to use as the signature on the import.
1149 if (!defined) {
1150 reportFunctionSignatureMismatch(symName,
1151 a: cast<FunctionSymbol>(Val: variants[0]),
1152 b: cast<FunctionSymbol>(Val: variants[1]));
1153 return;
1154 }
1155
1156 for (auto *symbol : variants) {
1157 if (symbol != defined) {
1158 auto *f = cast<FunctionSymbol>(Val: symbol);
1159 reportFunctionSignatureMismatch(symName, a: f, b: defined, isError: false);
1160 StringRef debugName =
1161 saver().save(S: "signature_mismatch:" + toString(sym: *f));
1162 replaceWithUnreachable(sym: f, sig: *f->signature, debugName);
1163 }
1164 }
1165 }
1166}
1167
1168} // namespace lld::wasm
1169