1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "Config.h"
11#include "InputChunks.h"
12#include "InputElement.h"
13#include "OutputSegment.h"
14#include "SymbolTable.h"
15#include "lld/Common/CommonLinkerContext.h"
16#include "lld/Common/Reproduce.h"
17#include "llvm/BinaryFormat/Wasm.h"
18#include "llvm/Object/Binary.h"
19#include "llvm/Object/Wasm.h"
20#include "llvm/ProfileData/InstrProf.h"
21#include "llvm/Support/Path.h"
22#include "llvm/Support/TarWriter.h"
23#include "llvm/Support/raw_ostream.h"
24#include <optional>
25
26#define DEBUG_TYPE "lld"
27
28using namespace llvm;
29using namespace llvm::object;
30using namespace llvm::wasm;
31using namespace llvm::sys;
32
33namespace lld {
34
35// Returns a string in the format of "foo.o" or "foo.a(bar.o)".
36std::string toString(const wasm::InputFile *file) {
37 if (!file)
38 return "<internal>";
39
40 if (file->archiveName.empty())
41 return std::string(file->getName());
42
43 return (file->archiveName + "(" + file->getName() + ")").str();
44}
45
46namespace wasm {
47
48std::string replaceThinLTOSuffix(StringRef path) {
49 auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace;
50 if (path.consume_back(Suffix: suffix))
51 return (path + repl).str();
52 return std::string(path);
53}
54
55void InputFile::checkArch(Triple::ArchType arch) const {
56 bool is64 = arch == Triple::wasm64;
57 if (is64 && !ctx.arg.is64) {
58 fatal(msg: toString(file: this) +
59 ": must specify -mwasm64 to process wasm64 object files");
60 } else if (ctx.arg.is64.value_or(u: false) != is64) {
61 fatal(msg: toString(file: this) +
62 ": wasm32 object file can't be linked in wasm64 mode");
63 }
64}
65
66std::unique_ptr<llvm::TarWriter> tar;
67
68std::optional<MemoryBufferRef> readFile(StringRef path) {
69 log(msg: "Loading: " + path);
70
71 auto mbOrErr = MemoryBuffer::getFile(Filename: path);
72 if (auto ec = mbOrErr.getError()) {
73 error(msg: "cannot open " + path + ": " + ec.message());
74 return std::nullopt;
75 }
76 std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
77 MemoryBufferRef mbref = mb->getMemBufferRef();
78 make<std::unique_ptr<MemoryBuffer>>(args: std::move(mb)); // take MB ownership
79
80 if (tar)
81 tar->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
82 return mbref;
83}
84
85InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
86 uint64_t offsetInArchive, bool lazy) {
87 file_magic magic = identify_magic(magic: mb.getBuffer());
88 if (magic == file_magic::wasm_object) {
89 std::unique_ptr<Binary> bin =
90 CHECK(createBinary(mb), mb.getBufferIdentifier());
91 auto *obj = cast<WasmObjectFile>(Val: bin.get());
92 if (obj->hasUnmodeledTypes())
93 fatal(msg: toString(s: mb.getBufferIdentifier()) +
94 " file has unmodeled reference or GC types");
95 if (obj->isSharedObject())
96 return make<SharedFile>(args&: mb);
97 return make<ObjFile>(args&: mb, args&: archiveName, args&: lazy);
98 }
99
100 assert(magic == file_magic::bitcode);
101 return make<BitcodeFile>(args&: mb, args&: archiveName, args&: offsetInArchive, args&: lazy);
102}
103
104// Relocations contain either symbol or type indices. This function takes a
105// relocation and returns relocated index (i.e. translates from the input
106// symbol/type space to the output symbol/type space).
107uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const {
108 if (reloc.Type == R_WASM_TYPE_INDEX_LEB) {
109 assert(typeIsUsed[reloc.Index]);
110 return typeMap[reloc.Index];
111 }
112 const Symbol *sym = symbols[reloc.Index];
113 if (auto *ss = dyn_cast<SectionSymbol>(Val: sym))
114 sym = ss->getOutputSectionSymbol();
115 return sym->getOutputSymbolIndex();
116}
117
118// Relocations can contain addend for combined sections. This function takes a
119// relocation and returns updated addend by offset in the output section.
120int64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
121 switch (reloc.Type) {
122 case R_WASM_MEMORY_ADDR_LEB:
123 case R_WASM_MEMORY_ADDR_LEB64:
124 case R_WASM_MEMORY_ADDR_SLEB64:
125 case R_WASM_MEMORY_ADDR_SLEB:
126 case R_WASM_MEMORY_ADDR_REL_SLEB:
127 case R_WASM_MEMORY_ADDR_REL_SLEB64:
128 case R_WASM_MEMORY_ADDR_I32:
129 case R_WASM_MEMORY_ADDR_I64:
130 case R_WASM_MEMORY_ADDR_TLS_SLEB:
131 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
132 case R_WASM_FUNCTION_OFFSET_I32:
133 case R_WASM_FUNCTION_OFFSET_I64:
134 case R_WASM_MEMORY_ADDR_LOCREL_I32:
135 case R_WASM_MEMORY_ADDR_LOCREL_I64:
136 return reloc.Addend;
137 case R_WASM_SECTION_OFFSET_I32:
138 return getSectionSymbol(index: reloc.Index)->section->getOffset(offset: reloc.Addend);
139 default:
140 llvm_unreachable("unexpected relocation type");
141 }
142}
143
144// Translate from the relocation's index into the final linked output value.
145uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone,
146 const InputChunk *chunk) const {
147 const Symbol *sym = nullptr;
148 if (reloc.Type != R_WASM_TYPE_INDEX_LEB) {
149 sym = symbols[reloc.Index];
150
151 // We can end up with relocations against non-live symbols. For example
152 // in debug sections. We return a tombstone value in debug symbol sections
153 // so this will not produce a valid range conflicting with ranges of actual
154 // code. In other sections we return reloc.Addend.
155
156 if (!isa<SectionSymbol>(Val: sym) && !sym->isLive())
157 return tombstone ? tombstone : reloc.Addend;
158 }
159
160 switch (reloc.Type) {
161 case R_WASM_TABLE_INDEX_I32:
162 case R_WASM_TABLE_INDEX_I64:
163 case R_WASM_TABLE_INDEX_SLEB:
164 case R_WASM_TABLE_INDEX_SLEB64:
165 case R_WASM_TABLE_INDEX_REL_SLEB:
166 case R_WASM_TABLE_INDEX_REL_SLEB64: {
167 if (!getFunctionSymbol(index: reloc.Index)->hasTableIndex())
168 return 0;
169 uint32_t index = getFunctionSymbol(index: reloc.Index)->getTableIndex();
170 if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB ||
171 reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB64)
172 index -= ctx.arg.tableBase;
173 return index;
174 }
175 case R_WASM_MEMORY_ADDR_LEB:
176 case R_WASM_MEMORY_ADDR_LEB64:
177 case R_WASM_MEMORY_ADDR_SLEB:
178 case R_WASM_MEMORY_ADDR_SLEB64:
179 case R_WASM_MEMORY_ADDR_REL_SLEB:
180 case R_WASM_MEMORY_ADDR_REL_SLEB64:
181 case R_WASM_MEMORY_ADDR_I32:
182 case R_WASM_MEMORY_ADDR_I64:
183 case R_WASM_MEMORY_ADDR_TLS_SLEB:
184 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
185 case R_WASM_MEMORY_ADDR_LOCREL_I32:
186 case R_WASM_MEMORY_ADDR_LOCREL_I64: {
187 if (isa<UndefinedData>(Val: sym) || sym->isShared() || sym->isUndefWeak())
188 return 0;
189 auto D = cast<DefinedData>(Val: sym);
190 uint64_t value = D->getVA() + reloc.Addend;
191 if (reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I32 ||
192 reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I64) {
193 const auto *segment = cast<InputSegment>(Val: chunk);
194 uint64_t p = segment->outputSeg->startVA + segment->outputSegmentOffset +
195 reloc.Offset - segment->getInputSectionOffset();
196 value -= p;
197 }
198 return value;
199 }
200 case R_WASM_TYPE_INDEX_LEB:
201 return typeMap[reloc.Index];
202 case R_WASM_FUNCTION_INDEX_LEB:
203 case R_WASM_FUNCTION_INDEX_I32:
204 return getFunctionSymbol(index: reloc.Index)->getFunctionIndex();
205 case R_WASM_GLOBAL_INDEX_LEB:
206 case R_WASM_GLOBAL_INDEX_I32:
207 if (auto gs = dyn_cast<GlobalSymbol>(Val: sym))
208 return gs->getGlobalIndex();
209 return sym->getGOTIndex();
210 case R_WASM_TAG_INDEX_LEB:
211 return getTagSymbol(index: reloc.Index)->getTagIndex();
212 case R_WASM_FUNCTION_OFFSET_I32:
213 case R_WASM_FUNCTION_OFFSET_I64: {
214 if (isa<UndefinedFunction>(Val: sym) || sym->isShared()) {
215 return tombstone ? tombstone : reloc.Addend;
216 }
217 auto *f = cast<DefinedFunction>(Val: sym);
218 return f->function->getOffset(offset: f->function->getFunctionCodeOffset() +
219 reloc.Addend);
220 }
221 case R_WASM_SECTION_OFFSET_I32:
222 return getSectionSymbol(index: reloc.Index)->section->getOffset(offset: reloc.Addend);
223 case R_WASM_TABLE_NUMBER_LEB:
224 return getTableSymbol(index: reloc.Index)->getTableNumber();
225 default:
226 llvm_unreachable("unknown relocation type");
227 }
228}
229
230template <class T>
231static void setRelocs(const std::vector<T *> &chunks,
232 const WasmSection *section) {
233 if (!section)
234 return;
235
236 ArrayRef<WasmRelocation> relocs = section->Relocations;
237 assert(llvm::is_sorted(
238 relocs, [](const WasmRelocation &r1, const WasmRelocation &r2) {
239 return r1.Offset < r2.Offset;
240 }));
241 assert(llvm::is_sorted(chunks, [](InputChunk *c1, InputChunk *c2) {
242 return c1->getInputSectionOffset() < c2->getInputSectionOffset();
243 }));
244
245 auto relocsNext = relocs.begin();
246 auto relocsEnd = relocs.end();
247 auto relocLess = [](const WasmRelocation &r, uint32_t val) {
248 return r.Offset < val;
249 };
250 for (InputChunk *c : chunks) {
251 auto relocsStart = std::lower_bound(relocsNext, relocsEnd,
252 c->getInputSectionOffset(), relocLess);
253 relocsNext = std::lower_bound(
254 relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(),
255 relocLess);
256 c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext));
257 }
258}
259
260// An object file can have two approaches to tables. With the
261// reference-types feature or call-indirect-overlong feature enabled
262// (explicitly, or implied by the reference-types feature), input files that
263// define or use tables declare the tables using symbols, and record each use
264// with a relocation. This way when the linker combines inputs, it can collate
265// the tables used by the inputs, assigning them distinct table numbers, and
266// renumber all the uses as appropriate. At the same time, the linker has
267// special logic to build the indirect function table if it is needed.
268//
269// However, MVP object files (those that target WebAssembly 1.0, the "minimum
270// viable product" version of WebAssembly) neither write table symbols nor
271// record relocations. These files can have at most one table, the indirect
272// function table used by call_indirect and which is the address space for
273// function pointers. If this table is present, it is always an import. If we
274// have a file with a table import but no table symbols, it is an MVP object
275// file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when
276// loading these input files, defining the missing symbol to allow the indirect
277// function table to be built.
278//
279// As indirect function table table usage in MVP objects cannot be relocated,
280// the linker must ensure that this table gets assigned index zero.
281void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
282 uint32_t tableSymbolCount) {
283 uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size();
284
285 // If there are symbols for all tables, then all is good.
286 if (tableCount == tableSymbolCount)
287 return;
288
289 // It's possible for an input to define tables and also use the indirect
290 // function table, but forget to compile with -mattr=+call-indirect-overlong
291 // or -mattr=+reference-types. For these newer files, we require symbols for
292 // all tables, and relocations for all of their uses.
293 if (tableSymbolCount != 0) {
294 error(msg: toString(file: this) +
295 ": expected one symbol table entry for each of the " +
296 Twine(tableCount) + " table(s) present, but got " +
297 Twine(tableSymbolCount) + " symbol(s) instead.");
298 return;
299 }
300
301 // An MVP object file can have up to one table import, for the indirect
302 // function table, but will have no table definitions.
303 if (tables.size()) {
304 error(msg: toString(file: this) +
305 ": unexpected table definition(s) without corresponding "
306 "symbol-table entries.");
307 return;
308 }
309
310 // An MVP object file can have only one table import.
311 if (tableCount != 1) {
312 error(msg: toString(file: this) +
313 ": multiple table imports, but no corresponding symbol-table "
314 "entries.");
315 return;
316 }
317
318 const WasmImport *tableImport = nullptr;
319 for (const auto &import : wasmObj->imports()) {
320 if (import.Kind == WASM_EXTERNAL_TABLE) {
321 assert(!tableImport);
322 tableImport = &import;
323 }
324 }
325 assert(tableImport);
326
327 // We can only synthesize a symtab entry for the indirect function table; if
328 // it has an unexpected name or type, assume that it's not actually the
329 // indirect function table.
330 if (tableImport->Field != functionTableName ||
331 tableImport->Table.ElemType != ValType::FUNCREF) {
332 error(msg: toString(file: this) + ": table import " + Twine(tableImport->Field) +
333 " is missing a symbol table entry.");
334 return;
335 }
336
337 WasmSymbolInfo info;
338 info.Name = tableImport->Field;
339 info.Kind = WASM_SYMBOL_TYPE_TABLE;
340 info.ImportModule = tableImport->Module;
341 info.ImportName = tableImport->Field;
342 info.Flags = WASM_SYMBOL_UNDEFINED | WASM_SYMBOL_NO_STRIP;
343 info.ElementIndex = 0;
344 LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info.Name
345 << "\n");
346 const WasmGlobalType *globalType = nullptr;
347 const WasmSignature *signature = nullptr;
348 auto *wasmSym =
349 make<WasmSymbol>(args&: info, args&: globalType, args: &tableImport->Table, args&: signature);
350 Symbol *sym = createUndefined(sym: *wasmSym, isCalledDirectly: false);
351 // We're only sure it's a TableSymbol if the createUndefined succeeded.
352 if (errorCount())
353 return;
354 symbols.push_back(x: sym);
355 // Because there are no TABLE_NUMBER relocs, we can't compute accurate
356 // liveness info; instead, just mark the symbol as always live.
357 sym->markLive();
358
359 // We assume that this compilation unit has unrelocatable references to
360 // this table.
361 ctx.legacyFunctionTable = true;
362}
363
364static bool shouldMerge(const WasmSection &sec) {
365 if (ctx.arg.optimize == 0)
366 return false;
367 // Sadly we don't have section attributes yet for custom sections, so we
368 // currently go by the name alone.
369 // TODO(sbc): Add ability for wasm sections to carry flags so we don't
370 // need to use names here.
371 // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in
372 // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom
373 // sections.
374 return sec.Name == ".debug_str" || sec.Name == ".debug_str.dwo" ||
375 sec.Name == ".debug_line_str";
376}
377
378static bool shouldMerge(const WasmSegment &seg) {
379 // As of now we only support merging strings, and only with single byte
380 // alignment (2^0).
381 if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) ||
382 (seg.Data.Alignment != 0))
383 return false;
384
385 // On a regular link we don't merge sections if -O0 (default is -O1). This
386 // sometimes makes the linker significantly faster, although the output will
387 // be bigger.
388 if (ctx.arg.optimize == 0)
389 return false;
390
391 // A mergeable section with size 0 is useless because they don't have
392 // any data to merge. A mergeable string section with size 0 can be
393 // argued as invalid because it doesn't end with a null character.
394 // We'll avoid a mess by handling them as if they were non-mergeable.
395 if (seg.Data.Content.size() == 0)
396 return false;
397
398 return true;
399}
400
401void ObjFile::parseLazy() {
402 LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " "
403 << wasmObj.get() << "\n");
404 for (const SymbolRef &sym : wasmObj->symbols()) {
405 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl());
406 if (wasmSym.isUndefined() || wasmSym.isBindingLocal())
407 continue;
408 symtab->addLazy(name: wasmSym.Info.Name, f: this);
409 // addLazy() may trigger this->extract() if an existing symbol is an
410 // undefined symbol. If that happens, this function has served its purpose,
411 // and we can exit from the loop early.
412 if (!lazy)
413 break;
414 }
415}
416
417ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
418 : WasmFileBase(ObjectKind, m) {
419 this->lazy = lazy;
420 this->archiveName = std::string(archiveName);
421
422 // Currently we only do this check for regular object file, and not for shared
423 // object files. This is because architecture detection for shared objects is
424 // currently based on a heuristic, which is fallable:
425 // https://github.com/llvm/llvm-project/issues/98778
426 checkArch(arch: wasmObj->getArch());
427
428 // Unless we are processing this as a lazy object file (e.g. part of an
429 // archive file or within `--start-lib`/`--end-lib`, it's eagerly linked, so
430 // mark it live.
431 if (!lazy)
432 markLive();
433}
434
435void SharedFile::parse() {
436 assert(wasmObj->isSharedObject());
437
438 for (const SymbolRef &sym : wasmObj->symbols()) {
439 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl());
440 if (wasmSym.isDefined()) {
441 StringRef name = wasmSym.Info.Name;
442 // Certain shared library exports are known to be DSO-local so we
443 // don't want to add them to the symbol table.
444 // TODO(sbc): Instead of hardcoding these here perhaps we could add
445 // this as extra metadata in the `dylink` section.
446 if (name == "__wasm_apply_data_relocs" || name == "__wasm_call_ctors" ||
447 name.starts_with(Prefix: "__start_") || name.starts_with(Prefix: "__stop_"))
448 continue;
449 uint32_t flags = wasmSym.Info.Flags;
450 Symbol *s;
451 LLVM_DEBUG(dbgs() << "shared symbol: " << name << "\n");
452 switch (wasmSym.Info.Kind) {
453 case WASM_SYMBOL_TYPE_FUNCTION:
454 s = symtab->addSharedFunction(name, flags, file: this, sig: wasmSym.Signature);
455 break;
456 case WASM_SYMBOL_TYPE_DATA:
457 s = symtab->addSharedData(name, flags, file: this);
458 break;
459 case WASM_SYMBOL_TYPE_TAG:
460 s = symtab->addSharedTag(name, flags, file: this, sig: wasmSym.Signature);
461 break;
462 default:
463 continue;
464 }
465 symbols.push_back(x: s);
466 }
467 }
468}
469
470// Returns the alignment for a custom section. This is used to concatenate
471// custom sections with the same name into a single custom section.
472static uint32_t getCustomSectionAlignment(const WasmSection &sec) {
473 // TODO: Add a section attribute for alignment in the linking spec.
474 if (sec.Name == getInstrProfSectionName(IPSK: IPSK_covfun, OF: Triple::Wasm) ||
475 sec.Name == getInstrProfSectionName(IPSK: IPSK_covmap, OF: Triple::Wasm)) {
476 // llvm-cov assumes that coverage metadata sections are 8-byte aligned.
477 return 8;
478 }
479 return 1;
480}
481
482WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) {
483 // Parse a memory buffer as a wasm file.
484 LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n");
485 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
486
487 auto *obj = dyn_cast<WasmObjectFile>(Val: bin.get());
488 if (!obj)
489 fatal(msg: toString(file: this) + ": not a wasm file");
490
491 bin.release();
492 wasmObj.reset(p: obj);
493}
494
495void ObjFile::parse(bool ignoreComdats) {
496 // Parse a memory buffer as a wasm file.
497 LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
498
499 if (!wasmObj->isRelocatableObject())
500 fatal(msg: toString(file: this) + ": not a relocatable wasm file");
501
502 // Build up a map of function indices to table indices for use when
503 // verifying the existing table index relocations
504 uint32_t totalFunctions =
505 wasmObj->getNumImportedFunctions() + wasmObj->functions().size();
506 tableEntriesRel.resize(new_size: totalFunctions);
507 tableEntries.resize(new_size: totalFunctions);
508 for (const WasmElemSegment &seg : wasmObj->elements()) {
509 int64_t offset;
510 if (seg.Offset.Extended)
511 fatal(msg: toString(file: this) + ": extended init exprs not supported");
512 else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I32_CONST)
513 offset = seg.Offset.Inst.Value.Int32;
514 else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I64_CONST)
515 offset = seg.Offset.Inst.Value.Int64;
516 else
517 fatal(msg: toString(file: this) + ": invalid table elements");
518 for (size_t index = 0; index < seg.Functions.size(); index++) {
519 auto functionIndex = seg.Functions[index];
520 tableEntriesRel[functionIndex] = index;
521 tableEntries[functionIndex] = offset + index;
522 }
523 }
524
525 ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats;
526 for (StringRef comdat : comdats) {
527 bool isNew = ignoreComdats || symtab->addComdat(name: comdat);
528 keptComdats.push_back(x: isNew);
529 }
530
531 uint32_t sectionIndex = 0;
532
533 // Bool for each symbol, true if called directly. This allows us to implement
534 // a weaker form of signature checking where undefined functions that are not
535 // called directly (i.e. only address taken) don't have to match the defined
536 // function's signature. We cannot do this for directly called functions
537 // because those signatures are checked at validation times.
538 // See https://github.com/llvm/llvm-project/issues/39758
539 std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false);
540 for (const SectionRef &sec : wasmObj->sections()) {
541 const WasmSection &section = wasmObj->getWasmSection(Section: sec);
542 // Wasm objects can have at most one code and one data section.
543 if (section.Type == WASM_SEC_CODE) {
544 assert(!codeSection);
545 codeSection = &section;
546 } else if (section.Type == WASM_SEC_DATA) {
547 assert(!dataSection);
548 dataSection = &section;
549 } else if (section.Type == WASM_SEC_CUSTOM) {
550 InputChunk *customSec;
551 uint32_t alignment = getCustomSectionAlignment(sec: section);
552 if (shouldMerge(sec: section))
553 customSec = make<MergeInputChunk>(args: section, args: this, args&: alignment);
554 else
555 customSec = make<InputSection>(args: section, args: this, args&: alignment);
556 customSec->discarded = isExcludedByComdat(chunk: customSec);
557 customSections.emplace_back(args&: customSec);
558 customSections.back()->setRelocations(section.Relocations);
559 customSectionsByIndex[sectionIndex] = customSections.back();
560 }
561 sectionIndex++;
562 // Scans relocations to determine if a function symbol is called directly.
563 for (const WasmRelocation &reloc : section.Relocations)
564 if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
565 isCalledDirectly[reloc.Index] = true;
566 }
567
568 typeMap.resize(new_size: getWasmObj()->types().size());
569 typeIsUsed.resize(new_size: getWasmObj()->types().size(), x: false);
570
571 // Populate `Segments`.
572 for (const WasmSegment &s : wasmObj->dataSegments()) {
573 InputChunk *seg;
574 if (shouldMerge(seg: s))
575 seg = make<MergeInputChunk>(args: s, args: this);
576 else
577 seg = make<InputSegment>(args: s, args: this);
578 seg->discarded = isExcludedByComdat(chunk: seg);
579 // Older object files did not include WASM_SEG_FLAG_TLS and instead
580 // relied on the naming convention. To maintain compat with such objects
581 // we still imply the TLS flag based on the name of the segment.
582 if (!seg->isTLS() &&
583 (seg->name.starts_with(Prefix: ".tdata") || seg->name.starts_with(Prefix: ".tbss")))
584 seg->flags |= WASM_SEG_FLAG_TLS;
585 segments.emplace_back(args&: seg);
586 }
587 setRelocs(chunks: segments, section: dataSection);
588
589 // Populate `Functions`.
590 ArrayRef<WasmFunction> funcs = wasmObj->functions();
591 ArrayRef<WasmSignature> types = wasmObj->types();
592 functions.reserve(n: funcs.size());
593
594 for (auto &f : funcs) {
595 auto *func = make<InputFunction>(args: types[f.SigIndex], args: &f, args: this);
596 func->discarded = isExcludedByComdat(chunk: func);
597 functions.emplace_back(args&: func);
598 }
599 setRelocs(chunks: functions, section: codeSection);
600
601 // Populate `Tables`.
602 for (const WasmTable &t : wasmObj->tables())
603 tables.emplace_back(args: make<InputTable>(args: t, args: this));
604
605 // Populate `Globals`.
606 for (const WasmGlobal &g : wasmObj->globals())
607 globals.emplace_back(args: make<InputGlobal>(args: g, args: this));
608
609 // Populate `Tags`.
610 for (const WasmTag &t : wasmObj->tags())
611 tags.emplace_back(args: make<InputTag>(args: types[t.SigIndex], args: t, args: this));
612
613 // Populate `Symbols` based on the symbols in the object.
614 symbols.reserve(n: wasmObj->getNumberOfSymbols());
615 uint32_t tableSymbolCount = 0;
616 for (const SymbolRef &sym : wasmObj->symbols()) {
617 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl());
618 if (wasmSym.isTypeTable())
619 tableSymbolCount++;
620 if (wasmSym.isDefined()) {
621 // createDefined may fail if the symbol is comdat excluded in which case
622 // we fall back to creating an undefined symbol
623 if (Symbol *d = createDefined(sym: wasmSym)) {
624 symbols.push_back(x: d);
625 continue;
626 }
627 }
628 size_t idx = symbols.size();
629 symbols.push_back(x: createUndefined(sym: wasmSym, isCalledDirectly: isCalledDirectly[idx]));
630 }
631
632 addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount);
633}
634
635bool ObjFile::isExcludedByComdat(const InputChunk *chunk) const {
636 uint32_t c = chunk->getComdat();
637 if (c == UINT32_MAX)
638 return false;
639 return !keptComdats[c];
640}
641
642FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const {
643 return cast<FunctionSymbol>(Val: symbols[index]);
644}
645
646GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const {
647 return cast<GlobalSymbol>(Val: symbols[index]);
648}
649
650TagSymbol *ObjFile::getTagSymbol(uint32_t index) const {
651 return cast<TagSymbol>(Val: symbols[index]);
652}
653
654TableSymbol *ObjFile::getTableSymbol(uint32_t index) const {
655 return cast<TableSymbol>(Val: symbols[index]);
656}
657
658SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const {
659 return cast<SectionSymbol>(Val: symbols[index]);
660}
661
662DataSymbol *ObjFile::getDataSymbol(uint32_t index) const {
663 return cast<DataSymbol>(Val: symbols[index]);
664}
665
666Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
667 StringRef name = sym.Info.Name;
668 uint32_t flags = sym.Info.Flags;
669
670 switch (sym.Info.Kind) {
671 case WASM_SYMBOL_TYPE_FUNCTION: {
672 InputFunction *func =
673 functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
674 if (sym.isBindingLocal())
675 return make<DefinedFunction>(args&: name, args&: flags, args: this, args&: func);
676 if (func->discarded)
677 return nullptr;
678 return symtab->addDefinedFunction(name, flags, file: this, function: func);
679 }
680 case WASM_SYMBOL_TYPE_DATA: {
681 InputChunk *seg = segments[sym.Info.DataRef.Segment];
682 auto offset = sym.Info.DataRef.Offset;
683 auto size = sym.Info.DataRef.Size;
684 // Support older (e.g. llvm 13) object files that pre-date the per-symbol
685 // TLS flag, and symbols were assumed to be TLS by being defined in a TLS
686 // segment.
687 if (!(flags & WASM_SYMBOL_TLS) && seg->isTLS())
688 flags |= WASM_SYMBOL_TLS;
689 if (sym.isBindingLocal())
690 return make<DefinedData>(args&: name, args&: flags, args: this, args&: seg, args&: offset, args&: size);
691 if (seg->discarded)
692 return nullptr;
693 return symtab->addDefinedData(name, flags, file: this, segment: seg, address: offset, size);
694 }
695 case WASM_SYMBOL_TYPE_GLOBAL: {
696 InputGlobal *global =
697 globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()];
698 if (sym.isBindingLocal())
699 return make<DefinedGlobal>(args&: name, args&: flags, args: this, args&: global);
700 return symtab->addDefinedGlobal(name, flags, file: this, g: global);
701 }
702 case WASM_SYMBOL_TYPE_SECTION: {
703 InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex];
704 assert(sym.isBindingLocal());
705 // Need to return null if discarded here? data and func only do that when
706 // binding is not local.
707 if (section->discarded)
708 return nullptr;
709 return make<SectionSymbol>(args&: flags, args&: section, args: this);
710 }
711 case WASM_SYMBOL_TYPE_TAG: {
712 InputTag *tag = tags[sym.Info.ElementIndex - wasmObj->getNumImportedTags()];
713 if (sym.isBindingLocal())
714 return make<DefinedTag>(args&: name, args&: flags, args: this, args&: tag);
715 return symtab->addDefinedTag(name, flags, file: this, t: tag);
716 }
717 case WASM_SYMBOL_TYPE_TABLE: {
718 InputTable *table =
719 tables[sym.Info.ElementIndex - wasmObj->getNumImportedTables()];
720 if (sym.isBindingLocal())
721 return make<DefinedTable>(args&: name, args&: flags, args: this, args&: table);
722 return symtab->addDefinedTable(name, flags, file: this, t: table);
723 }
724 }
725 llvm_unreachable("unknown symbol kind");
726}
727
728Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
729 StringRef name = sym.Info.Name;
730 uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED;
731
732 switch (sym.Info.Kind) {
733 case WASM_SYMBOL_TYPE_FUNCTION:
734 if (sym.isBindingLocal())
735 return make<UndefinedFunction>(args&: name, args: sym.Info.ImportName,
736 args: sym.Info.ImportModule, args&: flags, args: this,
737 args: sym.Signature, args&: isCalledDirectly);
738 return symtab->addUndefinedFunction(name, importName: sym.Info.ImportName,
739 importModule: sym.Info.ImportModule, flags, file: this,
740 signature: sym.Signature, isCalledDirectly);
741 case WASM_SYMBOL_TYPE_DATA:
742 if (sym.isBindingLocal())
743 return make<UndefinedData>(args&: name, args&: flags, args: this);
744 return symtab->addUndefinedData(name, flags, file: this);
745 case WASM_SYMBOL_TYPE_GLOBAL:
746 if (sym.isBindingLocal())
747 return make<UndefinedGlobal>(args&: name, args: sym.Info.ImportName,
748 args: sym.Info.ImportModule, args&: flags, args: this,
749 args: sym.GlobalType);
750 return symtab->addUndefinedGlobal(name, importName: sym.Info.ImportName,
751 importModule: sym.Info.ImportModule, flags, file: this,
752 type: sym.GlobalType);
753 case WASM_SYMBOL_TYPE_TABLE:
754 if (sym.isBindingLocal())
755 return make<UndefinedTable>(args&: name, args: sym.Info.ImportName,
756 args: sym.Info.ImportModule, args&: flags, args: this,
757 args: sym.TableType);
758 return symtab->addUndefinedTable(name, importName: sym.Info.ImportName,
759 importModule: sym.Info.ImportModule, flags, file: this,
760 type: sym.TableType);
761 case WASM_SYMBOL_TYPE_TAG:
762 if (sym.isBindingLocal())
763 return make<UndefinedTag>(args&: name, args: sym.Info.ImportName,
764 args: sym.Info.ImportModule, args&: flags, args: this,
765 args: sym.Signature);
766 return symtab->addUndefinedTag(name, importName: sym.Info.ImportName,
767 importModule: sym.Info.ImportModule, flags, file: this,
768 sig: sym.Signature);
769 case WASM_SYMBOL_TYPE_SECTION:
770 llvm_unreachable("section symbols cannot be undefined");
771 }
772 llvm_unreachable("unknown symbol kind");
773}
774
775static StringRef strip(StringRef s) { return s.trim(Char: ' '); }
776
777void StubFile::parse() {
778 bool first = true;
779
780 SmallVector<StringRef> lines;
781 mb.getBuffer().split(A&: lines, Separator: '\n');
782 for (StringRef line : lines) {
783 line = line.trim();
784
785 // File must begin with #STUB
786 if (first) {
787 assert(line == "#STUB");
788 first = false;
789 }
790
791 // Lines starting with # are considered comments
792 if (line.starts_with(Prefix: "#") || !line.size())
793 continue;
794
795 StringRef sym;
796 StringRef rest;
797 std::tie(args&: sym, args&: rest) = line.split(Separator: ':');
798 sym = strip(s: sym);
799 rest = strip(s: rest);
800
801 symbolDependencies[sym] = {};
802
803 while (rest.size()) {
804 StringRef dep;
805 std::tie(args&: dep, args&: rest) = rest.split(Separator: ',');
806 dep = strip(s: dep);
807 symbolDependencies[sym].push_back(x: dep);
808 }
809 }
810}
811
812static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
813 switch (gvVisibility) {
814 case GlobalValue::DefaultVisibility:
815 return WASM_SYMBOL_VISIBILITY_DEFAULT;
816 case GlobalValue::HiddenVisibility:
817 case GlobalValue::ProtectedVisibility:
818 return WASM_SYMBOL_VISIBILITY_HIDDEN;
819 }
820 llvm_unreachable("unknown visibility");
821}
822
823static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
824 const lto::InputFile::Symbol &objSym,
825 BitcodeFile &f) {
826 StringRef name = saver().save(S: objSym.getName());
827
828 uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
829 flags |= mapVisibility(gvVisibility: objSym.getVisibility());
830
831 int c = objSym.getComdatIndex();
832 bool excludedByComdat = c != -1 && !keptComdats[c];
833
834 if (objSym.isUndefined() || excludedByComdat) {
835 flags |= WASM_SYMBOL_UNDEFINED;
836 if (objSym.isExecutable())
837 return symtab->addUndefinedFunction(name, importName: std::nullopt, importModule: std::nullopt,
838 flags, file: &f, signature: nullptr, isCalledDirectly: true);
839 return symtab->addUndefinedData(name, flags, file: &f);
840 }
841
842 if (objSym.isExecutable())
843 return symtab->addDefinedFunction(name, flags, file: &f, function: nullptr);
844 return symtab->addDefinedData(name, flags, file: &f, segment: nullptr, address: 0, size: 0);
845}
846
847BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
848 uint64_t offsetInArchive, bool lazy)
849 : InputFile(BitcodeKind, m) {
850 this->lazy = lazy;
851 this->archiveName = std::string(archiveName);
852
853 std::string path = mb.getBufferIdentifier().str();
854 if (ctx.arg.thinLTOIndexOnly)
855 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier());
856
857 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
858 // name. If two archives define two members with the same name, this
859 // causes a collision which result in only one of the objects being taken
860 // into consideration at LTO time (which very likely causes undefined
861 // symbols later in the link stage). So we append file offset to make
862 // filename unique.
863 StringRef name = archiveName.empty()
864 ? saver().save(S: path)
865 : saver().save(S: archiveName + "(" + path::filename(path) +
866 " at " + utostr(X: offsetInArchive) + ")");
867 MemoryBufferRef mbref(mb.getBuffer(), name);
868
869 obj = check(e: lto::InputFile::create(Object: mbref));
870
871 // If this isn't part of an archive, it's eagerly linked, so mark it live.
872 if (archiveName.empty())
873 markLive();
874}
875
876bool BitcodeFile::doneLTO = false;
877
878void BitcodeFile::parseLazy() {
879 for (auto [i, irSym] : llvm::enumerate(First: obj->symbols())) {
880 if (irSym.isUndefined())
881 continue;
882 StringRef name = saver().save(S: irSym.getName());
883 symtab->addLazy(name, f: this);
884 // addLazy() may trigger this->extract() if an existing symbol is an
885 // undefined symbol. If that happens, this function has served its purpose,
886 // and we can exit from the loop early.
887 if (!lazy)
888 break;
889 }
890}
891
892void BitcodeFile::parse(StringRef symName) {
893 if (doneLTO) {
894 error(msg: toString(file: this) + ": attempt to add bitcode file after LTO (" +
895 symName + ")");
896 return;
897 }
898
899 Triple t(obj->getTargetTriple());
900 if (!t.isWasm()) {
901 error(msg: toString(file: this) + ": machine type must be wasm32 or wasm64");
902 return;
903 }
904 checkArch(arch: t.getArch());
905 std::vector<bool> keptComdats;
906 // TODO Support nodeduplicate
907 // https://github.com/llvm/llvm-project/issues/49875
908 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable())
909 keptComdats.push_back(x: symtab->addComdat(name: s.first));
910
911 for (const lto::InputFile::Symbol &objSym : obj->symbols())
912 symbols.push_back(x: createBitcodeSymbol(keptComdats, objSym, f&: *this));
913}
914
915} // namespace wasm
916} // namespace lld
917