1//===- SymbolTable.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SymbolTable.h"
10#include "Config.h"
11#include "InputChunks.h"
12#include "InputElement.h"
13#include "WriterUtils.h"
14#include "lld/Common/CommonLinkerContext.h"
15#include <optional>
16
17#define DEBUG_TYPE "lld"
18
19using namespace llvm;
20using namespace llvm::wasm;
21using namespace llvm::object;
22
23namespace lld::wasm {
24SymbolTable *symtab;
25
26void SymbolTable::addFile(InputFile *file, StringRef symName) {
27 log(msg: "Processing: " + toString(file));
28
29 // Lazy object file
30 if (file->lazy) {
31 if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
32 f->parseLazy();
33 } else {
34 cast<ObjFile>(Val: file)->parseLazy();
35 }
36 return;
37 }
38
39 // .so file
40 if (auto *f = dyn_cast<SharedFile>(Val: file)) {
41 // If we are not reporting undefined symbols that we don't actualy
42 // parse the shared library symbol table.
43 f->parse();
44 ctx.sharedFiles.push_back(Elt: f);
45 return;
46 }
47
48 // stub file
49 if (auto *f = dyn_cast<StubFile>(Val: file)) {
50 f->parse();
51 ctx.stubFiles.push_back(Elt: f);
52 return;
53 }
54
55 if (config->trace)
56 message(msg: toString(file));
57
58 // LLVM bitcode file
59 if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
60 // This order, first adding to `bitcodeFiles` and then parsing is necessary.
61 // See https://github.com/llvm/llvm-project/pull/73095
62 ctx.bitcodeFiles.push_back(Elt: f);
63 f->parse(symName);
64 return;
65 }
66
67 // Regular object file
68 auto *f = cast<ObjFile>(Val: file);
69 f->parse(ignoreComdats: false);
70 ctx.objectFiles.push_back(Elt: f);
71}
72
73// This function is where all the optimizations of link-time
74// optimization happens. When LTO is in use, some input files are
75// not in native object file format but in the LLVM bitcode format.
76// This function compiles bitcode files into a few big native files
77// using LLVM functions and replaces bitcode symbols with the results.
78// Because all bitcode files that the program consists of are passed
79// to the compiler at once, it can do whole-program optimization.
80void SymbolTable::compileBitcodeFiles() {
81 // Prevent further LTO objects being included
82 BitcodeFile::doneLTO = true;
83
84 if (ctx.bitcodeFiles.empty())
85 return;
86
87 // Compile bitcode files and replace bitcode symbols.
88 lto.reset(p: new BitcodeCompiler);
89 for (BitcodeFile *f : ctx.bitcodeFiles)
90 lto->add(f&: *f);
91
92 for (StringRef filename : lto->compile()) {
93 auto *obj = make<ObjFile>(args: MemoryBufferRef(filename, "lto.tmp"), args: "");
94 obj->parse(ignoreComdats: true);
95 ctx.objectFiles.push_back(Elt: obj);
96 }
97}
98
99Symbol *SymbolTable::find(StringRef name) {
100 auto it = symMap.find(Val: CachedHashStringRef(name));
101 if (it == symMap.end() || it->second == -1)
102 return nullptr;
103 return symVector[it->second];
104}
105
106void SymbolTable::replace(StringRef name, Symbol* sym) {
107 auto it = symMap.find(Val: CachedHashStringRef(name));
108 symVector[it->second] = sym;
109}
110
111std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
112 bool trace = false;
113 auto p = symMap.insert(KV: {CachedHashStringRef(name), (int)symVector.size()});
114 int &symIndex = p.first->second;
115 bool isNew = p.second;
116 if (symIndex == -1) {
117 symIndex = symVector.size();
118 trace = true;
119 isNew = true;
120 }
121
122 if (!isNew)
123 return {symVector[symIndex], false};
124
125 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
126 sym->isUsedInRegularObj = false;
127 sym->canInline = true;
128 sym->traced = trace;
129 sym->forceExport = false;
130 sym->referenced = !config->gcSections;
131 symVector.emplace_back(args&: sym);
132 return {sym, true};
133}
134
135std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
136 const InputFile *file) {
137 Symbol *s;
138 bool wasInserted;
139 std::tie(args&: s, args&: wasInserted) = insertName(name);
140
141 if (!file || file->kind() == InputFile::ObjectKind)
142 s->isUsedInRegularObj = true;
143
144 return {s, wasInserted};
145}
146
147static void reportTypeError(const Symbol *existing, const InputFile *file,
148 llvm::wasm::WasmSymbolType type) {
149 error(msg: "symbol type mismatch: " + toString(sym: *existing) + "\n>>> defined as " +
150 toString(type: existing->getWasmType()) + " in " +
151 toString(file: existing->getFile()) + "\n>>> defined as " + toString(type) +
152 " in " + toString(file));
153}
154
155// Check the type of new symbol matches that of the symbol is replacing.
156// Returns true if the function types match, false is there is a signature
157// mismatch.
158static bool signatureMatches(FunctionSymbol *existing,
159 const WasmSignature *newSig) {
160 const WasmSignature *oldSig = existing->signature;
161
162 // If either function is missing a signature (this happens for bitcode
163 // symbols) then assume they match. Any mismatch will be reported later
164 // when the LTO objects are added.
165 if (!newSig || !oldSig)
166 return true;
167
168 return *newSig == *oldSig;
169}
170
171static void checkGlobalType(const Symbol *existing, const InputFile *file,
172 const WasmGlobalType *newType) {
173 if (!isa<GlobalSymbol>(Val: existing)) {
174 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_GLOBAL);
175 return;
176 }
177
178 const WasmGlobalType *oldType = cast<GlobalSymbol>(Val: existing)->getGlobalType();
179 if (*newType != *oldType) {
180 error(msg: "Global type mismatch: " + existing->getName() + "\n>>> defined as " +
181 toString(type: *oldType) + " in " + toString(file: existing->getFile()) +
182 "\n>>> defined as " + toString(type: *newType) + " in " + toString(file));
183 }
184}
185
186static void checkTagType(const Symbol *existing, const InputFile *file,
187 const WasmSignature *newSig) {
188 const auto *existingTag = dyn_cast<TagSymbol>(Val: existing);
189 if (!isa<TagSymbol>(Val: existing)) {
190 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TAG);
191 return;
192 }
193
194 const WasmSignature *oldSig = existingTag->signature;
195 if (*newSig != *oldSig)
196 warn(msg: "Tag signature mismatch: " + existing->getName() +
197 "\n>>> defined as " + toString(sig: *oldSig) + " in " +
198 toString(file: existing->getFile()) + "\n>>> defined as " +
199 toString(sig: *newSig) + " in " + toString(file));
200}
201
202static void checkTableType(const Symbol *existing, const InputFile *file,
203 const WasmTableType *newType) {
204 if (!isa<TableSymbol>(Val: existing)) {
205 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TABLE);
206 return;
207 }
208
209 const WasmTableType *oldType = cast<TableSymbol>(Val: existing)->getTableType();
210 if (newType->ElemType != oldType->ElemType) {
211 error(msg: "Table type mismatch: " + existing->getName() + "\n>>> defined as " +
212 toString(type: *oldType) + " in " + toString(file: existing->getFile()) +
213 "\n>>> defined as " + toString(type: *newType) + " in " + toString(file));
214 }
215 // FIXME: No assertions currently on the limits.
216}
217
218static void checkDataType(const Symbol *existing, const InputFile *file) {
219 if (!isa<DataSymbol>(Val: existing))
220 reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_DATA);
221}
222
223DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
224 uint32_t flags,
225 InputFunction *function) {
226 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
227 assert(!find(name));
228 ctx.syntheticFunctions.emplace_back(Args&: function);
229 return replaceSymbol<DefinedFunction>(s: insertName(name).first, arg&: name,
230 arg&: flags, arg: nullptr, arg&: function);
231}
232
233// Adds an optional, linker generated, data symbol. The symbol will only be
234// added if there is an undefine reference to it, or if it is explicitly
235// exported via the --export flag. Otherwise we don't add the symbol and return
236// nullptr.
237DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
238 uint64_t value) {
239 Symbol *s = find(name);
240 if (!s && (config->exportAll || config->exportedSymbols.count(Key: name) != 0))
241 s = insertName(name).first;
242 else if (!s || s->isDefined())
243 return nullptr;
244 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
245 auto *rtn = replaceSymbol<DefinedData>(
246 s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE);
247 rtn->setVA(value);
248 rtn->referenced = true;
249 return rtn;
250}
251
252DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
253 uint32_t flags) {
254 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
255 assert(!find(name));
256 return replaceSymbol<DefinedData>(s: insertName(name).first, arg&: name,
257 arg: flags | WASM_SYMBOL_ABSOLUTE);
258}
259
260DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
261 InputGlobal *global) {
262 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
263 << "\n");
264 assert(!find(name));
265 ctx.syntheticGlobals.emplace_back(Args&: global);
266 return replaceSymbol<DefinedGlobal>(s: insertName(name).first, arg&: name, arg&: flags,
267 arg: nullptr, arg&: global);
268}
269
270DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
271 InputGlobal *global) {
272 Symbol *s = find(name);
273 if (!s || s->isDefined())
274 return nullptr;
275 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
276 << "\n");
277 ctx.syntheticGlobals.emplace_back(Args&: global);
278 return replaceSymbol<DefinedGlobal>(s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN,
279 arg: nullptr, arg&: global);
280}
281
282DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
283 InputTable *table) {
284 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
285 << "\n");
286 Symbol *s = find(name);
287 assert(!s || s->isUndefined());
288 if (!s)
289 s = insertName(name).first;
290 ctx.syntheticTables.emplace_back(Args&: table);
291 return replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg: nullptr, arg&: table);
292}
293
294static bool shouldReplace(const Symbol *existing, InputFile *newFile,
295 uint32_t newFlags) {
296 // If existing symbol is undefined, replace it.
297 if (!existing->isDefined()) {
298 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
299 << existing->getName() << "\n");
300 return true;
301 }
302
303 // Now we have two defined symbols. If the new one is weak, we can ignore it.
304 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
305 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
306 return false;
307 }
308
309 // If the existing symbol is weak, we should replace it.
310 if (existing->isWeak()) {
311 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
312 return true;
313 }
314
315 // Similarly with shared symbols
316 if (existing->isShared()) {
317 LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n");
318 return true;
319 }
320
321 // Neither symbol is week. They conflict.
322 error(msg: "duplicate symbol: " + toString(sym: *existing) + "\n>>> defined in " +
323 toString(file: existing->getFile()) + "\n>>> defined in " +
324 toString(file: newFile));
325 return true;
326}
327
328static void reportFunctionSignatureMismatch(StringRef symName,
329 FunctionSymbol *sym,
330 const WasmSignature *signature,
331 InputFile *file,
332 bool isError = true) {
333 std::string msg =
334 ("function signature mismatch: " + symName + "\n>>> defined as " +
335 toString(sig: *sym->signature) + " in " + toString(file: sym->getFile()) +
336 "\n>>> defined as " + toString(sig: *signature) + " in " + toString(file))
337 .str();
338 if (isError)
339 error(msg);
340 else
341 warn(msg);
342}
343
344static void reportFunctionSignatureMismatch(StringRef symName,
345 FunctionSymbol *a,
346 FunctionSymbol *b,
347 bool isError = true) {
348 reportFunctionSignatureMismatch(symName, sym: a, signature: b->signature, file: b->getFile(),
349 isError);
350}
351
352Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags,
353 InputFile *file,
354 const WasmSignature *sig) {
355 LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig)
356 << "]\n");
357 Symbol *s;
358 bool wasInserted;
359 std::tie(args&: s, args&: wasInserted) = insert(name, file);
360
361 auto replaceSym = [&](Symbol *sym) {
362 replaceSymbol<SharedFunctionSymbol>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: sig);
363 };
364
365 if (wasInserted) {
366 replaceSym(s);
367 return s;
368 }
369
370 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
371 if (!existingFunction) {
372 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
373 return s;
374 }
375
376 // Shared symbols should never replace locally-defined ones
377 if (s->isDefined()) {
378 return s;
379 }
380
381 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName()
382 << "\n");
383
384 bool checkSig = true;
385 if (auto ud = dyn_cast<UndefinedFunction>(Val: existingFunction))
386 checkSig = ud->isCalledDirectly;
387
388 if (checkSig && !signatureMatches(existing: existingFunction, newSig: sig)) {
389 if (config->shlibSigCheck) {
390 reportFunctionSignatureMismatch(symName: name, sym: existingFunction, signature: sig, file);
391 } else {
392 // With --no-shlib-sigcheck we ignore the signature of the function as
393 // defined by the shared library and instead use the signature as
394 // expected by the program being linked.
395 sig = existingFunction->signature;
396 }
397 }
398
399 replaceSym(s);
400 return s;
401}
402
403Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags,
404 InputFile *file) {
405 LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n");
406 Symbol *s;
407 bool wasInserted;
408 std::tie(args&: s, args&: wasInserted) = insert(name, file);
409
410 if (wasInserted || s->isUndefined()) {
411 replaceSymbol<SharedData>(s, arg&: name, arg&: flags, arg&: file);
412 }
413
414 return s;
415}
416
417Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
418 InputFile *file,
419 InputFunction *function) {
420 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
421 << (function ? toString(function->signature) : "none")
422 << "]\n");
423 Symbol *s;
424 bool wasInserted;
425 std::tie(args&: s, args&: wasInserted) = insert(name, file);
426
427 auto replaceSym = [&](Symbol *sym) {
428 // If the new defined function doesn't have signature (i.e. bitcode
429 // functions) but the old symbol does, then preserve the old signature
430 const WasmSignature *oldSig = s->getSignature();
431 auto* newSym = replaceSymbol<DefinedFunction>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: function);
432 if (!newSym->signature)
433 newSym->signature = oldSig;
434 };
435
436 if (wasInserted || s->isLazy()) {
437 replaceSym(s);
438 return s;
439 }
440
441 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
442 if (!existingFunction) {
443 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
444 return s;
445 }
446
447 bool checkSig = true;
448 if (auto ud = dyn_cast<UndefinedFunction>(Val: existingFunction))
449 checkSig = ud->isCalledDirectly;
450
451 if (checkSig && function && !signatureMatches(existing: existingFunction, newSig: &function->signature)) {
452 Symbol* variant;
453 if (getFunctionVariant(sym: s, sig: &function->signature, file, out: &variant))
454 // New variant, always replace
455 replaceSym(variant);
456 else if (shouldReplace(existing: s, newFile: file, newFlags: flags))
457 // Variant already exists, replace it after checking shouldReplace
458 replaceSym(variant);
459
460 // This variant we found take the place in the symbol table as the primary
461 // variant.
462 replace(name, sym: variant);
463 return variant;
464 }
465
466 // Existing function with matching signature.
467 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
468 replaceSym(s);
469
470 return s;
471}
472
473Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
474 InputFile *file, InputChunk *segment,
475 uint64_t address, uint64_t size) {
476 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
477 << "\n");
478 Symbol *s;
479 bool wasInserted;
480 std::tie(args&: s, args&: wasInserted) = insert(name, file);
481
482 auto replaceSym = [&]() {
483 replaceSymbol<DefinedData>(s, arg&: name, arg&: flags, arg&: file, arg&: segment, arg&: address, arg&: size);
484 };
485
486 if (wasInserted || s->isLazy()) {
487 replaceSym();
488 return s;
489 }
490
491 checkDataType(existing: s, file);
492
493 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
494 replaceSym();
495 return s;
496}
497
498Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
499 InputFile *file, InputGlobal *global) {
500 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
501
502 Symbol *s;
503 bool wasInserted;
504 std::tie(args&: s, args&: wasInserted) = insert(name, file);
505
506 auto replaceSym = [&]() {
507 replaceSymbol<DefinedGlobal>(s, arg&: name, arg&: flags, arg&: file, arg&: global);
508 };
509
510 if (wasInserted || s->isLazy()) {
511 replaceSym();
512 return s;
513 }
514
515 checkGlobalType(existing: s, file, newType: &global->getType());
516
517 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
518 replaceSym();
519 return s;
520}
521
522Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
523 InputFile *file, InputTag *tag) {
524 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
525
526 Symbol *s;
527 bool wasInserted;
528 std::tie(args&: s, args&: wasInserted) = insert(name, file);
529
530 auto replaceSym = [&]() {
531 replaceSymbol<DefinedTag>(s, arg&: name, arg&: flags, arg&: file, arg&: tag);
532 };
533
534 if (wasInserted || s->isLazy()) {
535 replaceSym();
536 return s;
537 }
538
539 checkTagType(existing: s, file, newSig: &tag->signature);
540
541 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
542 replaceSym();
543 return s;
544}
545
546Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
547 InputFile *file, InputTable *table) {
548 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
549
550 Symbol *s;
551 bool wasInserted;
552 std::tie(args&: s, args&: wasInserted) = insert(name, file);
553
554 auto replaceSym = [&]() {
555 replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg&: file, arg&: table);
556 };
557
558 if (wasInserted || s->isLazy()) {
559 replaceSym();
560 return s;
561 }
562
563 checkTableType(existing: s, file, newType: &table->getType());
564
565 if (shouldReplace(existing: s, newFile: file, newFlags: flags))
566 replaceSym();
567 return s;
568}
569
570// This function get called when an undefined symbol is added, and there is
571// already an existing one in the symbols table. In this case we check that
572// custom 'import-module' and 'import-field' symbol attributes agree.
573// With LTO these attributes are not available when the bitcode is read and only
574// become available when the LTO object is read. In this case we silently
575// replace the empty attributes with the valid ones.
576template <typename T>
577static void setImportAttributes(T *existing,
578 std::optional<StringRef> importName,
579 std::optional<StringRef> importModule,
580 uint32_t flags, InputFile *file) {
581 if (importName) {
582 if (!existing->importName)
583 existing->importName = importName;
584 if (existing->importName != importName)
585 error("import name mismatch for symbol: " + toString(*existing) +
586 "\n>>> defined as " + *existing->importName + " in " +
587 toString(existing->getFile()) + "\n>>> defined as " + *importName +
588 " in " + toString(file));
589 }
590
591 if (importModule) {
592 if (!existing->importModule)
593 existing->importModule = importModule;
594 if (existing->importModule != importModule)
595 error("import module mismatch for symbol: " + toString(*existing) +
596 "\n>>> defined as " + *existing->importModule + " in " +
597 toString(existing->getFile()) + "\n>>> defined as " +
598 *importModule + " in " + toString(file));
599 }
600
601 // Update symbol binding, if the existing symbol is weak
602 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
603 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
604 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
605 }
606}
607
608Symbol *SymbolTable::addUndefinedFunction(StringRef name,
609 std::optional<StringRef> importName,
610 std::optional<StringRef> importModule,
611 uint32_t flags, InputFile *file,
612 const WasmSignature *sig,
613 bool isCalledDirectly) {
614 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
615 << (sig ? toString(*sig) : "none")
616 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
617 << utohexstr(flags) << "\n");
618 assert(flags & WASM_SYMBOL_UNDEFINED);
619
620 Symbol *s;
621 bool wasInserted;
622 std::tie(args&: s, args&: wasInserted) = insert(name, file);
623 if (s->traced)
624 printTraceSymbolUndefined(name, file);
625
626 auto replaceSym = [&]() {
627 replaceSymbol<UndefinedFunction>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
628 arg&: file, arg&: sig, arg&: isCalledDirectly);
629 };
630
631 if (wasInserted) {
632 replaceSym();
633 } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) {
634 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
635 lazy->setWeak();
636 lazy->signature = sig;
637 } else {
638 lazy->extract();
639 if (!config->whyExtract.empty())
640 ctx.whyExtractRecords.emplace_back(Args: toString(file), Args: s->getFile(), Args&: *s);
641 }
642 } else {
643 auto existingFunction = dyn_cast<FunctionSymbol>(Val: s);
644 if (!existingFunction) {
645 reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION);
646 return s;
647 }
648 if (!existingFunction->signature && sig)
649 existingFunction->signature = sig;
650 auto *existingUndefined = dyn_cast<UndefinedFunction>(Val: existingFunction);
651 if (isCalledDirectly && !signatureMatches(existing: existingFunction, newSig: sig)) {
652 if (existingFunction->isShared()) {
653 // Special handling for when the existing function is a shared symbol
654 if (config->shlibSigCheck) {
655 reportFunctionSignatureMismatch(symName: name, sym: existingFunction, signature: sig, file);
656 } else {
657 existingFunction->signature = sig;
658 }
659 }
660 // If the existing undefined functions is not called directly then let
661 // this one take precedence. Otherwise the existing function is either
662 // directly called or defined, in which case we need a function variant.
663 else if (existingUndefined && !existingUndefined->isCalledDirectly)
664 replaceSym();
665 else if (getFunctionVariant(sym: s, sig, file, out: &s))
666 replaceSym();
667 }
668 if (existingUndefined) {
669 setImportAttributes(existing: existingUndefined, importName, importModule, flags,
670 file);
671 if (isCalledDirectly)
672 existingUndefined->isCalledDirectly = true;
673 if (s->isWeak())
674 s->flags = flags;
675 }
676 }
677
678 return s;
679}
680
681Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
682 InputFile *file) {
683 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
684 assert(flags & WASM_SYMBOL_UNDEFINED);
685
686 Symbol *s;
687 bool wasInserted;
688 std::tie(args&: s, args&: wasInserted) = insert(name, file);
689 if (s->traced)
690 printTraceSymbolUndefined(name, file);
691
692 if (wasInserted) {
693 replaceSymbol<UndefinedData>(s, arg&: name, arg&: flags, arg&: file);
694 } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) {
695 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
696 lazy->setWeak();
697 else
698 lazy->extract();
699 } else if (s->isDefined()) {
700 checkDataType(existing: s, file);
701 } else if (s->isWeak()) {
702 s->flags = flags;
703 }
704 return s;
705}
706
707Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
708 std::optional<StringRef> importName,
709 std::optional<StringRef> importModule,
710 uint32_t flags, InputFile *file,
711 const WasmGlobalType *type) {
712 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
713 assert(flags & WASM_SYMBOL_UNDEFINED);
714
715 Symbol *s;
716 bool wasInserted;
717 std::tie(args&: s, args&: wasInserted) = insert(name, file);
718 if (s->traced)
719 printTraceSymbolUndefined(name, file);
720
721 if (wasInserted)
722 replaceSymbol<UndefinedGlobal>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
723 arg&: file, arg&: type);
724 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
725 lazy->extract();
726 else if (s->isDefined())
727 checkGlobalType(existing: s, file, newType: type);
728 else if (s->isWeak())
729 s->flags = flags;
730 return s;
731}
732
733Symbol *SymbolTable::addUndefinedTable(StringRef name,
734 std::optional<StringRef> importName,
735 std::optional<StringRef> importModule,
736 uint32_t flags, InputFile *file,
737 const WasmTableType *type) {
738 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
739 assert(flags & WASM_SYMBOL_UNDEFINED);
740
741 Symbol *s;
742 bool wasInserted;
743 std::tie(args&: s, args&: wasInserted) = insert(name, file);
744 if (s->traced)
745 printTraceSymbolUndefined(name, file);
746
747 if (wasInserted)
748 replaceSymbol<UndefinedTable>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags,
749 arg&: file, arg&: type);
750 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
751 lazy->extract();
752 else if (s->isDefined())
753 checkTableType(existing: s, file, newType: type);
754 else if (s->isWeak())
755 s->flags = flags;
756 return s;
757}
758
759Symbol *SymbolTable::addUndefinedTag(StringRef name,
760 std::optional<StringRef> importName,
761 std::optional<StringRef> importModule,
762 uint32_t flags, InputFile *file,
763 const WasmSignature *sig) {
764 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
765 assert(flags & WASM_SYMBOL_UNDEFINED);
766
767 Symbol *s;
768 bool wasInserted;
769 std::tie(args&: s, args&: wasInserted) = insert(name, file);
770 if (s->traced)
771 printTraceSymbolUndefined(name, file);
772
773 if (wasInserted)
774 replaceSymbol<UndefinedTag>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, arg&: file,
775 arg&: sig);
776 else if (auto *lazy = dyn_cast<LazySymbol>(Val: s))
777 lazy->extract();
778 else if (s->isDefined())
779 checkTagType(existing: s, file, newSig: sig);
780 else if (s->isWeak())
781 s->flags = flags;
782 return s;
783}
784
785TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
786 WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0}; // Set by the writer.
787 WasmTableType *type = make<WasmTableType>();
788 type->ElemType = ValType::FUNCREF;
789 type->Limits = limits;
790 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
791 flags |= WASM_SYMBOL_UNDEFINED;
792 Symbol *sym =
793 addUndefinedTable(name, importName: name, importModule: defaultModule, flags, file: nullptr, type);
794 sym->markLive();
795 sym->forceExport = config->exportTable;
796 return cast<TableSymbol>(Val: sym);
797}
798
799TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
800 const uint32_t invalidIndex = -1;
801 WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0}; // Set by the writer.
802 WasmTableType type{.ElemType: ValType::FUNCREF, .Limits: limits};
803 WasmTable desc{.Index: invalidIndex, .Type: type, .SymbolName: name};
804 InputTable *table = make<InputTable>(args&: desc, args: nullptr);
805 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
806 TableSymbol *sym = addSyntheticTable(name, flags, table);
807 sym->markLive();
808 sym->forceExport = config->exportTable;
809 return sym;
810}
811
812// Whether or not we need an indirect function table is usually a function of
813// whether an input declares a need for it. However sometimes it's possible for
814// no input to need the indirect function table, but then a late
815// addInternalGOTEntry causes a function to be allocated an address. In that
816// case address we synthesize a definition at the last minute.
817TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
818 Symbol *existing = find(name: functionTableName);
819 if (existing) {
820 if (!isa<TableSymbol>(Val: existing)) {
821 error(msg: Twine("reserved symbol must be of type table: `") +
822 functionTableName + "`");
823 return nullptr;
824 }
825 if (existing->isDefined()) {
826 error(msg: Twine("reserved symbol must not be defined in input files: `") +
827 functionTableName + "`");
828 return nullptr;
829 }
830 }
831
832 if (config->importTable) {
833 if (existing) {
834 existing->importModule = defaultModule;
835 existing->importName = functionTableName;
836 return cast<TableSymbol>(Val: existing);
837 }
838 if (required)
839 return createUndefinedIndirectFunctionTable(name: functionTableName);
840 } else if ((existing && existing->isLive()) || config->exportTable ||
841 required) {
842 // A defined table is required. Either because the user request an exported
843 // table or because the table symbol is already live. The existing table is
844 // guaranteed to be undefined due to the check above.
845 return createDefinedIndirectFunctionTable(name: functionTableName);
846 }
847
848 // An indirect function table will only be present in the symbol table if
849 // needed by a reloc; if we get here, we don't need one.
850 return nullptr;
851}
852
853void SymbolTable::addLazy(StringRef name, InputFile *file) {
854 LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
855
856 Symbol *s;
857 bool wasInserted;
858 std::tie(args&: s, args&: wasInserted) = insertName(name);
859
860 if (wasInserted) {
861 replaceSymbol<LazySymbol>(s, arg&: name, arg: 0, arg&: file);
862 return;
863 }
864
865 if (!s->isUndefined())
866 return;
867
868 // The existing symbol is undefined, load a new one from the archive,
869 // unless the existing symbol is weak in which case replace the undefined
870 // symbols with a LazySymbol.
871 if (s->isWeak()) {
872 const WasmSignature *oldSig = nullptr;
873 // In the case of an UndefinedFunction we need to preserve the expected
874 // signature.
875 if (auto *f = dyn_cast<UndefinedFunction>(Val: s))
876 oldSig = f->signature;
877 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
878 auto newSym =
879 replaceSymbol<LazySymbol>(s, arg&: name, arg: WASM_SYMBOL_BINDING_WEAK, arg&: file);
880 newSym->signature = oldSig;
881 return;
882 }
883
884 LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
885 const InputFile *oldFile = s->getFile();
886 LazySymbol(name, 0, file).extract();
887 if (!config->whyExtract.empty())
888 ctx.whyExtractRecords.emplace_back(Args: toString(file: oldFile), Args: s->getFile(), Args&: *s);
889}
890
891bool SymbolTable::addComdat(StringRef name) {
892 return comdatGroups.insert(V: CachedHashStringRef(name)).second;
893}
894
895// The new signature doesn't match. Create a variant to the symbol with the
896// signature encoded in the name and return that instead. These symbols are
897// then unified later in handleSymbolVariants.
898bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
899 const InputFile *file, Symbol **out) {
900 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
901 << " " << toString(*sig) << "\n");
902 Symbol *variant = nullptr;
903
904 // Linear search through symbol variants. Should never be more than two
905 // or three entries here.
906 auto &variants = symVariants[CachedHashStringRef(sym->getName())];
907 if (variants.empty())
908 variants.push_back(x: sym);
909
910 for (Symbol* v : variants) {
911 if (*v->getSignature() == *sig) {
912 variant = v;
913 break;
914 }
915 }
916
917 bool wasAdded = !variant;
918 if (wasAdded) {
919 // Create a new variant;
920 LLVM_DEBUG(dbgs() << "added new variant\n");
921 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
922 variant->isUsedInRegularObj =
923 !file || file->kind() == InputFile::ObjectKind;
924 variant->canInline = true;
925 variant->traced = false;
926 variant->forceExport = false;
927 variants.push_back(x: variant);
928 } else {
929 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
930 assert(*variant->getSignature() == *sig);
931 }
932
933 *out = variant;
934 return wasAdded;
935}
936
937// Set a flag for --trace-symbol so that we can print out a log message
938// if a new symbol with the same name is inserted into the symbol table.
939void SymbolTable::trace(StringRef name) {
940 symMap.insert(KV: {CachedHashStringRef(name), -1});
941}
942
943void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
944 // Swap symbols as instructed by -wrap.
945 int &origIdx = symMap[CachedHashStringRef(sym->getName())];
946 int &realIdx= symMap[CachedHashStringRef(real->getName())];
947 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
948 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
949
950 // Anyone looking up __real symbols should get the original
951 realIdx = origIdx;
952 // Anyone looking up the original should get the __wrap symbol
953 origIdx = wrapIdx;
954}
955
956static const uint8_t unreachableFn[] = {
957 0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
958 0x00 /* opcode unreachable */, 0x0b /* opcode end */
959};
960
961// Replace the given symbol body with an unreachable function.
962// This is used by handleWeakUndefines in order to generate a callable
963// equivalent of an undefined function and also handleSymbolVariants for
964// undefined functions that don't match the signature of the definition.
965InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
966 const WasmSignature &sig,
967 StringRef debugName) {
968 auto *func = make<SyntheticFunction>(args: sig, args: sym->getName(), args&: debugName);
969 func->setBody(unreachableFn);
970 ctx.syntheticFunctions.emplace_back(Args&: func);
971 // Mark new symbols as local. For relocatable output we don't want them
972 // to be exported outside the object file.
973 replaceSymbol<DefinedFunction>(s: sym, arg&: debugName, arg: WASM_SYMBOL_BINDING_LOCAL,
974 arg: nullptr, arg&: func);
975 // Ensure the stub function doesn't get a table entry. Its address
976 // should always compare equal to the null pointer.
977 sym->isStub = true;
978 return func;
979}
980
981void SymbolTable::replaceWithUndefined(Symbol *sym) {
982 // Add a synthetic dummy for weak undefined functions. These dummies will
983 // be GC'd if not used as the target of any "call" instructions.
984 StringRef debugName = saver().save(S: "undefined_weak:" + toString(sym: *sym));
985 replaceWithUnreachable(sym, sig: *sym->getSignature(), debugName);
986 // Hide our dummy to prevent export.
987 sym->setHidden(true);
988}
989
990// For weak undefined functions, there may be "call" instructions that reference
991// the symbol. In this case, we need to synthesise a dummy/stub function that
992// will abort at runtime, so that relocations can still provided an operand to
993// the call instruction that passes Wasm validation.
994void SymbolTable::handleWeakUndefines() {
995 for (Symbol *sym : symbols()) {
996 if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
997 if (sym->getSignature()) {
998 replaceWithUndefined(sym);
999 } else {
1000 // It is possible for undefined functions not to have a signature (eg.
1001 // if added via "--undefined"), but weak undefined ones do have a
1002 // signature. Lazy symbols may not be functions and therefore Sig can
1003 // still be null in some circumstance.
1004 assert(!isa<FunctionSymbol>(sym));
1005 }
1006 }
1007 }
1008}
1009
1010DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
1011 if (stubFunctions.count(Val: sig))
1012 return stubFunctions[sig];
1013 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
1014 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
1015 sym->isUsedInRegularObj = true;
1016 sym->canInline = true;
1017 sym->traced = false;
1018 sym->forceExport = false;
1019 sym->signature = &sig;
1020 replaceSymbol<DefinedFunction>(
1021 s: sym, arg: "undefined_stub", arg: WASM_SYMBOL_VISIBILITY_HIDDEN, arg: nullptr, arg: nullptr);
1022 replaceWithUnreachable(sym, sig, debugName: "undefined_stub");
1023 stubFunctions[sig] = sym;
1024 return sym;
1025}
1026
1027// Remove any variant symbols that were created due to function signature
1028// mismatches.
1029void SymbolTable::handleSymbolVariants() {
1030 for (auto pair : symVariants) {
1031 // Push the initial symbol onto the list of variants.
1032 StringRef symName = pair.first.val();
1033 std::vector<Symbol *> &variants = pair.second;
1034
1035#ifndef NDEBUG
1036 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
1037 << ") variants: " << symName << "\n");
1038 for (auto *s: variants) {
1039 auto *f = cast<FunctionSymbol>(s);
1040 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
1041 << toString(*f->signature) << "\n");
1042 }
1043#endif
1044
1045 // Find the one definition.
1046 DefinedFunction *defined = nullptr;
1047 for (auto *symbol : variants) {
1048 if (auto f = dyn_cast<DefinedFunction>(Val: symbol)) {
1049 defined = f;
1050 break;
1051 }
1052 }
1053
1054 // If there are no definitions, and the undefined symbols disagree on
1055 // the signature, there is not we can do since we don't know which one
1056 // to use as the signature on the import.
1057 if (!defined) {
1058 reportFunctionSignatureMismatch(symName,
1059 a: cast<FunctionSymbol>(Val: variants[0]),
1060 b: cast<FunctionSymbol>(Val: variants[1]));
1061 return;
1062 }
1063
1064 for (auto *symbol : variants) {
1065 if (symbol != defined) {
1066 auto *f = cast<FunctionSymbol>(Val: symbol);
1067 reportFunctionSignatureMismatch(symName, a: f, b: defined, isError: false);
1068 StringRef debugName =
1069 saver().save(S: "signature_mismatch:" + toString(sym: *f));
1070 replaceWithUnreachable(sym: f, sig: *f->signature, debugName);
1071 }
1072 }
1073 }
1074}
1075
1076} // namespace wasm::lld
1077