1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/Object/Binary.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Object/COFFImportFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/Endian.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cstring>
39#include <optional>
40#include <utility>
41
42using namespace llvm;
43using namespace llvm::COFF;
44using namespace llvm::codeview;
45using namespace llvm::object;
46using namespace llvm::support::endian;
47using namespace lld;
48using namespace lld::coff;
49
50using llvm::Triple;
51using llvm::support::ulittle32_t;
52
53// Returns the last element of a path, which is supposed to be a filename.
54static StringRef getBasename(StringRef path) {
55 return sys::path::filename(path, style: sys::path::Style::windows);
56}
57
58// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
59std::string lld::toString(const coff::InputFile *file) {
60 if (!file)
61 return "<internal>";
62 if (file->parentName.empty())
63 return std::string(file->getName());
64
65 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
66 ")")
67 .str();
68}
69
70const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
71 const InputFile *f) {
72 return s << toString(file: f);
73}
74
75/// Checks that Source is compatible with being a weak alias to Target.
76/// If Source is Undefined and has no weak alias set, makes it a weak
77/// alias to Target.
78static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
79 Symbol *source, Symbol *target,
80 bool isAntiDep) {
81 if (auto *u = dyn_cast<Undefined>(Val: source)) {
82 if (u->weakAlias && u->weakAlias != target) {
83 // Ignore duplicated anti-dependency symbols.
84 if (isAntiDep)
85 return;
86 if (!u->isAntiDep) {
87 // Weak aliases as produced by GCC are named in the form
88 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
89 // of another symbol emitted near the weak symbol.
90 // Just use the definition from the first object file that defined
91 // this weak symbol.
92 if (symtab.ctx.config.allowDuplicateWeak)
93 return;
94 symtab.reportDuplicate(existing: source, newFile: f);
95 }
96 }
97 u->setWeakAlias(sym: target, antiDep: isAntiDep);
98 }
99}
100
101static bool ignoredSymbolName(StringRef name) {
102 return name == "@feat.00" || name == "@comp.id";
103}
104
105static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
106 if (sym.isBigObj()) {
107 auto *copy = make<coff_symbol32>(
108 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
109 return reinterpret_cast<coff_symbol_generic *>(copy);
110 } else {
111 auto *copy = make<coff_symbol16>(
112 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
113 return reinterpret_cast<coff_symbol_generic *>(copy);
114 }
115}
116
117// Skip importing DllMain thunks from import libraries.
118static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
119 const Archive::Symbol &sym, bool &skipDllMain) {
120 if (skipDllMain)
121 return true;
122 const Archive::Child &c =
123 CHECK(sym.getMember(), file->getFileName() +
124 ": could not get the member for symbol " +
125 toCOFFString(ctx, sym));
126 MemoryBufferRef mb =
127 CHECK(c.getMemoryBufferRef(),
128 file->getFileName() +
129 ": could not get the buffer for a child buffer of the archive");
130 if (identify_magic(magic: mb.getBuffer()) == file_magic::coff_import_library) {
131 if (ctx.config.warnExportedDllMain) {
132 // We won't place DllMain symbols in the symbol table if they are
133 // coming from a import library. This message can be ignored with the flag
134 // '/ignore:exporteddllmain'
135 Warn(ctx)
136 << file->getFileName()
137 << ": skipping exported DllMain symbol [exporteddllmain]\nNOTE: this "
138 "might be a mistake when the DLL/library was produced.";
139 }
140 skipDllMain = true;
141 return true;
142 }
143 return false;
144}
145
146ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
147 : InputFile(ctx.symtab, ArchiveKind, m) {}
148
149void ArchiveFile::parse() {
150 COFFLinkerContext &ctx = symtab.ctx;
151 SymbolTable *archiveSymtab = &symtab;
152
153 // Parse a MemoryBufferRef as an archive file.
154 file = CHECK(Archive::create(mb), this);
155
156 // Try to read symbols from ECSYMBOLS section on ARM64EC.
157 if (ctx.symtab.isEC()) {
158 iterator_range<Archive::symbol_iterator> symbols =
159 CHECK(file->ec_symbols(), this);
160 if (!symbols.empty()) {
161 for (const Archive::Symbol &sym : symbols)
162 ctx.symtab.addLazyArchive(f: this, sym);
163
164 // Read both EC and native symbols on ARM64X.
165 archiveSymtab = &*ctx.hybridSymtab;
166 } else {
167 // If the ECSYMBOLS section is missing in the archive, the archive could
168 // be either a native-only ARM64 or x86_64 archive. Check the machine type
169 // of the object containing a symbol to determine which symbol table to
170 // use.
171 Archive::symbol_iterator sym = file->symbol_begin();
172 if (sym != file->symbol_end()) {
173 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
174 Archive::Child child =
175 CHECK(sym->getMember(),
176 file->getFileName() +
177 ": could not get the buffer for a child of the archive");
178 MemoryBufferRef mb = CHECK(
179 child.getMemoryBufferRef(),
180 file->getFileName() +
181 ": could not get the buffer for a child buffer of the archive");
182 switch (identify_magic(magic: mb.getBuffer())) {
183 case file_magic::coff_object: {
184 std::unique_ptr<COFFObjectFile> obj =
185 CHECK(COFFObjectFile::create(mb),
186 check(child.getName()) + ":" + ": not a valid COFF file");
187 machine = MachineTypes(obj->getMachine());
188 break;
189 }
190 case file_magic::coff_import_library:
191 machine = MachineTypes(COFFImportFile(mb).getMachine());
192 break;
193 case file_magic::bitcode: {
194 std::unique_ptr<lto::InputFile> obj =
195 check(e: lto::InputFile::create(Object: mb));
196 machine = BitcodeFile::getMachineType(obj: obj.get());
197 break;
198 }
199 default:
200 break;
201 }
202 archiveSymtab = &ctx.getSymtab(machine);
203 }
204 }
205 }
206
207 // Read the symbol table to construct Lazy objects.
208 bool skipDllMain = false;
209 for (const Archive::Symbol &sym : file->symbols()) {
210 // If the DllMain symbol was exported by mistake, skip importing it
211 // otherwise we might end up with a import thunk in the final binary which
212 // is wrong.
213 if (sym.getName() == "__imp_DllMain" || sym.getName() == "DllMain") {
214 if (fixupDllMain(ctx, file: file.get(), sym, skipDllMain))
215 continue;
216 }
217 archiveSymtab->addLazyArchive(f: this, sym);
218 }
219}
220
221// Returns a buffer pointing to a member file containing a given symbol.
222void ArchiveFile::addMember(const Archive::Symbol &sym) {
223 const Archive::Child &c =
224 CHECK(sym.getMember(), "could not get the member for symbol " +
225 toCOFFString(symtab.ctx, sym));
226
227 // Return an empty buffer if we have already returned the same buffer.
228 // FIXME: Remove this once we resolve all defineds before all undefineds in
229 // ObjFile::initializeSymbols().
230 if (!seen.insert(V: c.getChildOffset()).second)
231 return;
232
233 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
234}
235
236std::vector<MemoryBufferRef>
237lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
238 std::vector<MemoryBufferRef> v;
239 Error err = Error::success();
240
241 // Thin archives refer to .o files, so --reproduces needs the .o files too.
242 bool addToTar = file->isThin() && ctx.driver.tar;
243
244 for (const Archive::Child &c : file->children(Err&: err)) {
245 MemoryBufferRef mbref =
246 CHECK(c.getMemoryBufferRef(),
247 file->getFileName() +
248 ": could not get the buffer for a child of the archive");
249 if (addToTar) {
250 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
251 Data: mbref.getBuffer());
252 }
253 v.push_back(x: mbref);
254 }
255 if (err)
256 Fatal(ctx) << file->getFileName()
257 << ": Archive::children failed: " << toString(E: std::move(err));
258 return v;
259}
260
261ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
262 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
263 coffObj(coffObj) {}
264
265ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
266 // Parse a memory buffer as a COFF file.
267 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
268 if (!bin)
269 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
270
271 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
272 if (!obj)
273 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
274
275 bin->release();
276 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
277 args&: lazy);
278}
279
280void ObjFile::parseLazy() {
281 // Native object file.
282 uint32_t numSymbols = coffObj->getNumberOfSymbols();
283 for (uint32_t i = 0; i < numSymbols; ++i) {
284 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
285 if (coffSym.isUndefined() || !coffSym.isExternal() ||
286 coffSym.isWeakExternal())
287 continue;
288 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
289 if (coffSym.isAbsolute() && ignoredSymbolName(name))
290 continue;
291 symtab.addLazyObject(f: this, n: name);
292 if (!lazy)
293 return;
294 i += coffSym.getNumberOfAuxSymbols();
295 }
296}
297
298struct ECMapEntry {
299 ulittle32_t src;
300 ulittle32_t dst;
301 ulittle32_t type;
302};
303
304void ObjFile::initializeECThunks() {
305 for (SectionChunk *chunk : hybmpChunks) {
306 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
307 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
308 << chunk->getContents().size();
309 continue;
310 }
311
312 const uint8_t *end =
313 chunk->getContents().data() + chunk->getContents().size();
314 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
315 iter += sizeof(ECMapEntry)) {
316 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
317 switch (entry->type) {
318 case Arm64ECThunkType::Entry:
319 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
320 break;
321 case Arm64ECThunkType::Exit:
322 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
323 break;
324 case Arm64ECThunkType::GuestExit:
325 break;
326 default:
327 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
328 }
329 }
330 }
331}
332
333void ObjFile::parse() {
334 // Read section and symbol tables.
335 initializeChunks();
336 initializeSymbols();
337 initializeFlags();
338 initializeDependencies();
339 initializeECThunks();
340}
341
342const coff_section *ObjFile::getSection(uint32_t i) {
343 auto sec = coffObj->getSection(index: i);
344 if (!sec)
345 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
346 return *sec;
347}
348
349// We set SectionChunk pointers in the SparseChunks vector to this value
350// temporarily to mark comdat sections as having an unknown resolution. As we
351// walk the object file's symbol table, once we visit either a leader symbol or
352// an associative section definition together with the parent comdat's leader,
353// we set the pointer to either nullptr (to mark the section as discarded) or a
354// valid SectionChunk for that section.
355static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
356
357void ObjFile::initializeChunks() {
358 uint32_t numSections = coffObj->getNumberOfSections();
359 sparseChunks.resize(new_size: numSections + 1);
360 for (uint32_t i = 1; i < numSections + 1; ++i) {
361 const coff_section *sec = getSection(i);
362 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
363 sparseChunks[i] = pendingComdat;
364 else
365 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
366 }
367}
368
369SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
370 const coff_aux_section_definition *def,
371 StringRef leaderName) {
372 const coff_section *sec = getSection(i: sectionNumber);
373
374 StringRef name;
375 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
376 name = *e;
377 else
378 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
379 << e.takeError();
380
381 if (name == ".drectve") {
382 ArrayRef<uint8_t> data;
383 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
384 directives = StringRef((const char *)data.data(), data.size());
385 return nullptr;
386 }
387
388 if (name == ".llvm_addrsig") {
389 addrsigSec = sec;
390 return nullptr;
391 }
392
393 if (name == ".llvm.call-graph-profile") {
394 callgraphSec = sec;
395 return nullptr;
396 }
397
398 // Object files may have DWARF debug info or MS CodeView debug info
399 // (or both).
400 //
401 // DWARF sections don't need any special handling from the perspective
402 // of the linker; they are just a data section containing relocations.
403 // We can just link them to complete debug info.
404 //
405 // CodeView needs linker support. We need to interpret debug info,
406 // and then write it to a separate .pdb file.
407
408 // Ignore DWARF debug info unless requested to be included.
409 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
410 return nullptr;
411
412 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
413 return nullptr;
414 SectionChunk *c;
415 if (isArm64EC(Machine: getMachineType()))
416 c = make<SectionChunkEC>(args: this, args&: sec);
417 else
418 c = make<SectionChunk>(args: this, args&: sec);
419 if (def)
420 c->checksum = def->CheckSum;
421
422 // CodeView sections are stored to a different vector because they are not
423 // linked in the regular manner.
424 if (c->isCodeView())
425 debugChunks.push_back(x: c);
426 else if (name == ".gfids$y")
427 guardFidChunks.push_back(x: c);
428 else if (name == ".giats$y")
429 guardIATChunks.push_back(x: c);
430 else if (name == ".gljmp$y")
431 guardLJmpChunks.push_back(x: c);
432 else if (name == ".gehcont$y")
433 guardEHContChunks.push_back(x: c);
434 else if (name == ".sxdata")
435 sxDataChunks.push_back(x: c);
436 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
437 hybmpChunks.push_back(x: c);
438 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
439 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
440 // COFF sections that look like string literal sections (i.e. no
441 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
442 // for string literals) are subject to string tail merging.
443 MergeChunk::addSection(ctx&: symtab.ctx, c);
444 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
445 resourceChunks.push_back(x: c);
446 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
447 chunks.push_back(x: c);
448
449 return c;
450}
451
452void ObjFile::includeResourceChunks() {
453 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
454}
455
456void ObjFile::readAssociativeDefinition(
457 COFFSymbolRef sym, const coff_aux_section_definition *def) {
458 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
459}
460
461void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
462 const coff_aux_section_definition *def,
463 uint32_t parentIndex) {
464 SectionChunk *parent = sparseChunks[parentIndex];
465 int32_t sectionNumber = sym.getSectionNumber();
466
467 auto diag = [&]() {
468 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
469
470 StringRef parentName;
471 const coff_section *parentSec = getSection(i: parentIndex);
472 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
473 parentName = *e;
474 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
475 << " (sec " << sectionNumber
476 << ") has invalid reference to section " << parentName
477 << " (sec " << parentIndex << ")";
478 };
479
480 if (parent == pendingComdat) {
481 // This can happen if an associative comdat refers to another associative
482 // comdat that appears after it (invalid per COFF spec) or to a section
483 // without any symbols.
484 diag();
485 return;
486 }
487
488 // Check whether the parent is prevailing. If it is, so are we, and we read
489 // the section; otherwise mark it as discarded.
490 if (parent) {
491 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
492 sparseChunks[sectionNumber] = c;
493 if (c) {
494 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
495 parent->addAssociative(child: c);
496 }
497 } else {
498 sparseChunks[sectionNumber] = nullptr;
499 }
500}
501
502void ObjFile::recordPrevailingSymbolForMingw(
503 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
504 // For comdat symbols in executable sections, where this is the copy
505 // of the section chunk we actually include instead of discarding it,
506 // add the symbol to a map to allow using it for implicitly
507 // associating .[px]data$<func> sections to it.
508 // Use the suffix from the .text$<func> instead of the leader symbol
509 // name, for cases where the names differ (i386 mangling/decorations,
510 // cases where the leader is a weak symbol named .weak.func.default*).
511 int32_t sectionNumber = sym.getSectionNumber();
512 SectionChunk *sc = sparseChunks[sectionNumber];
513 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
514 StringRef name = sc->getSectionName().split(Separator: '$').second;
515 prevailingSectionMap[name] = sectionNumber;
516 }
517}
518
519void ObjFile::maybeAssociateSEHForMingw(
520 COFFSymbolRef sym, const coff_aux_section_definition *def,
521 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
522 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
523 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
524 name.consume_front(Prefix: ".eh_frame$")) {
525 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
526 // associative to the symbol <func>.
527 auto parentSym = prevailingSectionMap.find(Val: name);
528 if (parentSym != prevailingSectionMap.end())
529 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
530 }
531}
532
533Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
534 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
535 if (sym.isExternal()) {
536 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
537 if (sc)
538 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
539 sectionOffset: sym.getValue());
540 // For MinGW symbols named .weak.* that point to a discarded section,
541 // don't create an Undefined symbol. If nothing ever refers to the symbol,
542 // everything should be fine. If something actually refers to the symbol
543 // (e.g. the undefined weak alias), linking will fail due to undefined
544 // references at the end.
545 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
546 return nullptr;
547 return symtab.addUndefined(name, f: this, overrideLazy: false);
548 }
549 if (sc) {
550 const coff_symbol_generic *symGen = sym.getGeneric();
551 if (sym.isSection()) {
552 auto *customSymGen = cloneSymbol(sym);
553 customSymGen->Value = 0;
554 symGen = customSymGen;
555 }
556 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
557 /*IsExternal*/ args: false, args&: symGen, args&: sc);
558 }
559 return nullptr;
560}
561
562void ObjFile::initializeSymbols() {
563 uint32_t numSymbols = coffObj->getNumberOfSymbols();
564 symbols.resize(new_size: numSymbols);
565
566 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
567 weakAliases;
568 std::vector<uint32_t> pendingIndexes;
569 pendingIndexes.reserve(n: numSymbols);
570
571 DenseMap<StringRef, uint32_t> prevailingSectionMap;
572 std::vector<const coff_aux_section_definition *> comdatDefs(
573 coffObj->getNumberOfSections() + 1);
574 COFFLinkerContext &ctx = symtab.ctx;
575
576 for (uint32_t i = 0; i < numSymbols; ++i) {
577 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
578 bool prevailingComdat;
579 if (coffSym.isUndefined()) {
580 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
581 } else if (coffSym.isWeakExternal()) {
582 auto aux = coffSym.getAux<coff_aux_weak_external>();
583 bool overrideLazy = true;
584
585 // On ARM64EC, external function calls emit a pair of weak-dependency
586 // aliases: func to #func and #func to the func guess exit thunk
587 // (instead of a single undefined func symbol, which would be emitted on
588 // other targets). Allow such aliases to be overridden by lazy archive
589 // symbols, just as we would for undefined symbols.
590 if (isArm64EC(Machine: getMachineType()) &&
591 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
592 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
593 if (!targetSym.isAnyUndefined()) {
594 // If the target is defined, it may be either a guess exit thunk or
595 // the actual implementation. If it's the latter, consider the alias
596 // to be part of the implementation and override potential lazy
597 // archive symbols.
598 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
599 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
600 std::optional<std::string> mangledName =
601 getArm64ECMangledFunctionName(Name: name);
602 overrideLazy = mangledName == targetName;
603 } else {
604 overrideLazy = false;
605 }
606 }
607 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
608 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
609 } else if (std::optional<Symbol *> optSym =
610 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
611 symbols[i] = *optSym;
612 if (ctx.config.mingw && prevailingComdat)
613 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
614 } else {
615 // createDefined() returns std::nullopt if a symbol belongs to a section
616 // that was pending at the point when the symbol was read. This can happen
617 // in two cases:
618 // 1) section definition symbol for a comdat leader;
619 // 2) symbol belongs to a comdat section associated with another section.
620 // In both of these cases, we can expect the section to be resolved by
621 // the time we finish visiting the remaining symbols in the symbol
622 // table. So we postpone the handling of this symbol until that time.
623 pendingIndexes.push_back(x: i);
624 }
625 i += coffSym.getNumberOfAuxSymbols();
626 }
627
628 for (uint32_t i : pendingIndexes) {
629 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
630 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
631 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
632 readAssociativeDefinition(sym, def);
633 else if (ctx.config.mingw)
634 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
635 }
636 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
637 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
638 Log(ctx) << "comdat section " << name
639 << " without leader and unassociated, discarding";
640 continue;
641 }
642 symbols[i] = createRegular(sym);
643 }
644
645 for (auto &kv : weakAliases) {
646 Symbol *sym = kv.first;
647 const coff_aux_weak_external *aux = kv.second;
648 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
649 isAntiDep: aux->Characteristics ==
650 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
651 }
652
653 // Free the memory used by sparseChunks now that symbol loading is finished.
654 decltype(sparseChunks)().swap(x&: sparseChunks);
655}
656
657Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
658 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
659 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
660
661 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
662 // target.
663 if (symtab.isEC() && getMachineType() == AMD64) {
664 auto u = dyn_cast<Undefined>(Val: s);
665 if (u && !u->weakAlias) {
666 if (std::optional<std::string> mangledName =
667 getArm64ECMangledFunctionName(Name: name)) {
668 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
669 /*overrideLazy=*/false);
670 u->setWeakAlias(sym: m, /*antiDep=*/true);
671 }
672 }
673 }
674 return s;
675}
676
677static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
678 int32_t section) {
679 uint32_t numSymbols = obj->getNumberOfSymbols();
680 for (uint32_t i = 0; i < numSymbols; ++i) {
681 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
682 if (sym.getSectionNumber() != section)
683 continue;
684 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
685 return def;
686 }
687 return nullptr;
688}
689
690void ObjFile::handleComdatSelection(
691 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
692 DefinedRegular *leader,
693 const llvm::object::coff_aux_section_definition *def) {
694 if (prevailing)
695 return;
696 // There's already an existing comdat for this symbol: `Leader`.
697 // Use the comdats's selection field to determine if the new
698 // symbol in `Sym` should be discarded, produce a duplicate symbol
699 // error, etc.
700
701 SectionChunk *leaderChunk = leader->getChunk();
702 COMDATType leaderSelection = leaderChunk->selection;
703 COFFLinkerContext &ctx = symtab.ctx;
704
705 assert(leader->data && "Comdat leader without SectionChunk?");
706 if (isa<BitcodeFile>(Val: leader->file)) {
707 // If the leader is only a LTO symbol, we don't know e.g. its final size
708 // yet, so we can't do the full strict comdat selection checking yet.
709 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
710 }
711
712 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
713 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
714 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
715 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
716 // cl.exe picks "any" for vftables when building with /GR- and
717 // "largest" when building with /GR. To be able to link object files
718 // compiled with each flag, "any" and "largest" are merged as "largest".
719 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
720 }
721
722 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
723 // Clang on the other hand picks "any". To be able to link two object files
724 // with a __declspec(selectany) declaration, one compiled with gcc and the
725 // other with clang, we merge them as proper "same size as"
726 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
727 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
728 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
729 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
730 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
731 }
732
733 // Other than that, comdat selections must match. This is a bit more
734 // strict than link.exe which allows merging "any" and "largest" if "any"
735 // is the first symbol the linker sees, and it allows merging "largest"
736 // with everything (!) if "largest" is the first symbol the linker sees.
737 // Making this symmetric independent of which selection is seen first
738 // seems better though.
739 // (This behavior matches ModuleLinker::getComdatResult().)
740 if (selection != leaderSelection) {
741 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
742 << ": " << (int)leaderSelection << " in " << leader->getFile()
743 << " and " << (int)selection << " in " << this;
744 symtab.reportDuplicate(existing: leader, newFile: this);
745 return;
746 }
747
748 switch (selection) {
749 case IMAGE_COMDAT_SELECT_NODUPLICATES:
750 symtab.reportDuplicate(existing: leader, newFile: this);
751 break;
752
753 case IMAGE_COMDAT_SELECT_ANY:
754 // Nothing to do.
755 break;
756
757 case IMAGE_COMDAT_SELECT_SAME_SIZE:
758 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
759 if (!ctx.config.mingw) {
760 symtab.reportDuplicate(existing: leader, newFile: this);
761 } else {
762 const coff_aux_section_definition *leaderDef = nullptr;
763 if (leaderChunk->file)
764 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
765 section: leaderChunk->getSectionNumber());
766 if (!leaderDef || leaderDef->Length != def->Length)
767 symtab.reportDuplicate(existing: leader, newFile: this);
768 }
769 }
770 break;
771
772 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
773 SectionChunk newChunk(this, getSection(sym));
774 // link.exe only compares section contents here and doesn't complain
775 // if the two comdat sections have e.g. different alignment.
776 // Match that.
777 if (leaderChunk->getContents() != newChunk.getContents())
778 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
779 break;
780 }
781
782 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
783 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
784 // (This means lld-link doesn't produce duplicate symbol errors for
785 // associative comdats while link.exe does, but associate comdats
786 // are never extern in practice.)
787 llvm_unreachable("createDefined not called for associative comdats");
788
789 case IMAGE_COMDAT_SELECT_LARGEST:
790 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
791 // Replace the existing comdat symbol with the new one.
792 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
793 // FIXME: This is incorrect: With /opt:noref, the previous sections
794 // make it into the final executable as well. Correct handling would
795 // be to undo reading of the whole old section that's being replaced,
796 // or doing one pass that determines what the final largest comdat
797 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
798 // only the largest one.
799 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
800 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
801 arg: nullptr);
802 prevailing = true;
803 }
804 break;
805
806 case IMAGE_COMDAT_SELECT_NEWEST:
807 llvm_unreachable("should have been rejected earlier");
808 }
809}
810
811std::optional<Symbol *> ObjFile::createDefined(
812 COFFSymbolRef sym,
813 std::vector<const coff_aux_section_definition *> &comdatDefs,
814 bool &prevailing) {
815 prevailing = false;
816 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
817
818 if (sym.isCommon()) {
819 auto *c = make<CommonChunk>(args&: sym);
820 chunks.push_back(x: c);
821 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
822 c);
823 }
824
825 COFFLinkerContext &ctx = symtab.ctx;
826 if (sym.isAbsolute()) {
827 StringRef name = getName();
828
829 if (name == "@feat.00")
830 feat00Flags = sym.getValue();
831 // Skip special symbols.
832 if (ignoredSymbolName(name))
833 return nullptr;
834
835 if (sym.isExternal())
836 return symtab.addAbsolute(n: name, s: sym);
837 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
838 }
839
840 int32_t sectionNumber = sym.getSectionNumber();
841 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
842 return nullptr;
843
844 if (sym.isEmptySectionDeclaration()) {
845 // As there is no coff_section in the object file for these, make a
846 // new virtual one, with everything zeroed out (i.e. an empty section),
847 // with only the name and characteristics set.
848 StringRef name = getName();
849 auto *hdr = make<coff_section>();
850 memset(s: hdr, c: 0, n: sizeof(*hdr));
851 strncpy(dest: hdr->Name, src: name.data(),
852 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
853 // The Value field in a section symbol may contain the characteristics,
854 // or it may be zero, where we make something up (that matches what is
855 // used in .idata sections in the regular object files in import libraries).
856 if (sym.getValue())
857 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
858 else
859 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
860 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
861 IMAGE_SCN_ALIGN_4BYTES;
862 auto *sc = make<SectionChunk>(args: this, args&: hdr);
863 chunks.push_back(x: sc);
864
865 auto *symGen = cloneSymbol(sym);
866 // Ignore the Value offset of these symbols, as it may be a bitmask.
867 symGen->Value = 0;
868 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
869 /*isExternal=*/args: false, args&: symGen, args&: sc);
870 }
871
872 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
873 Fatal(ctx) << toString(file: this) << ": " << getName()
874 << " should not refer to special section "
875 << Twine(sectionNumber);
876
877 if ((uint32_t)sectionNumber >= sparseChunks.size())
878 Fatal(ctx) << toString(file: this) << ": " << getName()
879 << " should not refer to non-existent section "
880 << Twine(sectionNumber);
881
882 // Comdat handling.
883 // A comdat symbol consists of two symbol table entries.
884 // The first symbol entry has the name of the section (e.g. .text), fixed
885 // values for the other fields, and one auxiliary record.
886 // The second symbol entry has the name of the comdat symbol, called the
887 // "comdat leader".
888 // When this function is called for the first symbol entry of a comdat,
889 // it sets comdatDefs and returns std::nullopt, and when it's called for the
890 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
891
892 // Handle comdat leader.
893 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
894 comdatDefs[sectionNumber] = nullptr;
895 DefinedRegular *leader;
896
897 if (sym.isExternal()) {
898 std::tie(args&: leader, args&: prevailing) =
899 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
900 } else {
901 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
902 /*IsExternal*/ args: false, args: sym.getGeneric());
903 prevailing = true;
904 }
905
906 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
907 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
908 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
909 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
910 Fatal(ctx) << "unknown comdat type "
911 << std::to_string(val: (int)def->Selection) << " for " << getName()
912 << " in " << toString(file: this);
913 }
914 COMDATType selection = (COMDATType)def->Selection;
915
916 if (leader->isCOMDAT)
917 handleComdatSelection(sym, selection, prevailing, leader, def);
918
919 if (prevailing) {
920 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
921 sparseChunks[sectionNumber] = c;
922 if (!c)
923 return nullptr;
924 c->sym = cast<DefinedRegular>(Val: leader);
925 c->selection = selection;
926 cast<DefinedRegular>(Val: leader)->data = &c->repl;
927 } else {
928 sparseChunks[sectionNumber] = nullptr;
929 }
930 return leader;
931 }
932
933 // Prepare to handle the comdat leader symbol by setting the section's
934 // ComdatDefs pointer if we encounter a non-associative comdat.
935 if (sparseChunks[sectionNumber] == pendingComdat) {
936 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
937 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
938 comdatDefs[sectionNumber] = def;
939 }
940 return std::nullopt;
941 }
942
943 return createRegular(sym);
944}
945
946MachineTypes ObjFile::getMachineType() const {
947 return static_cast<MachineTypes>(coffObj->getMachine());
948}
949
950ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
951 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
952 return sec->consumeDebugMagic();
953 return {};
954}
955
956// OBJ files systematically store critical information in a .debug$S stream,
957// even if the TU was compiled with no debug info. At least two records are
958// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
959// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
960// currently used to initialize the hotPatchable member.
961void ObjFile::initializeFlags() {
962 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
963 if (data.empty())
964 return;
965
966 DebugSubsectionArray subsections;
967
968 BinaryStreamReader reader(data, llvm::endianness::little);
969 ExitOnError exitOnErr;
970 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
971
972 for (const DebugSubsectionRecord &ss : subsections) {
973 if (ss.kind() != DebugSubsectionKind::Symbols)
974 continue;
975
976 unsigned offset = 0;
977
978 // Only parse the first two records. We are only looking for S_OBJNAME
979 // and S_COMPILE3, and they usually appear at the beginning of the
980 // stream.
981 for (unsigned i = 0; i < 2; ++i) {
982 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
983 if (!sym) {
984 consumeError(Err: sym.takeError());
985 return;
986 }
987 if (sym->kind() == SymbolKind::S_COMPILE3) {
988 auto cs =
989 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
990 hotPatchable =
991 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
992 }
993 if (sym->kind() == SymbolKind::S_OBJNAME) {
994 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
995 Symbol: sym.get()));
996 if (objName.Signature)
997 pchSignature = objName.Signature;
998 }
999 offset += sym->length();
1000 }
1001 }
1002}
1003
1004// Depending on the compilation flags, OBJs can refer to external files,
1005// necessary to merge this OBJ into the final PDB. We currently support two
1006// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1007// And PDB type servers, when compiling with /Zi. This function extracts these
1008// dependencies and makes them available as a TpiSource interface (see
1009// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1010// output even with /Yc and /Yu and with /Zi.
1011void ObjFile::initializeDependencies() {
1012 COFFLinkerContext &ctx = symtab.ctx;
1013 if (!ctx.config.debug)
1014 return;
1015
1016 bool isPCH = false;
1017
1018 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
1019 if (!data.empty())
1020 isPCH = true;
1021 else
1022 data = getDebugSection(secName: ".debug$T");
1023
1024 // symbols but no types, make a plain, empty TpiSource anyway, because it
1025 // simplifies adding the symbols later.
1026 if (data.empty()) {
1027 if (!debugChunks.empty())
1028 debugTypesObj = makeTpiSource(ctx, f: this);
1029 return;
1030 }
1031
1032 // Get the first type record. It will indicate if this object uses a type
1033 // server (/Zi) or a PCH file (/Yu).
1034 CVTypeArray types;
1035 BinaryStreamReader reader(data, llvm::endianness::little);
1036 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
1037 CVTypeArray::Iterator firstType = types.begin();
1038 if (firstType == types.end())
1039 return;
1040
1041 // Remember the .debug$T or .debug$P section.
1042 debugTypes = data;
1043
1044 // This object file is a PCH file that others will depend on.
1045 if (isPCH) {
1046 debugTypesObj = makePrecompSource(ctx, file: this);
1047 return;
1048 }
1049
1050 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1051 if (firstType->kind() == LF_TYPESERVER2) {
1052 TypeServer2Record ts = cantFail(
1053 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1054 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1055 enqueuePdbFile(path: ts.getName(), fromFile: this);
1056 return;
1057 }
1058
1059 // This object was compiled with /Yu. It uses types from another object file
1060 // with a matching signature.
1061 if (firstType->kind() == LF_PRECOMP) {
1062 PrecompRecord precomp = cantFail(
1063 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1064 // We're better off trusting the LF_PRECOMP signature. In some cases the
1065 // S_OBJNAME record doesn't contain a valid PCH signature.
1066 if (precomp.Signature)
1067 pchSignature = precomp.Signature;
1068 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1069 // Drop the LF_PRECOMP record from the input stream.
1070 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1071 return;
1072 }
1073
1074 // This is a plain old object file.
1075 debugTypesObj = makeTpiSource(ctx, f: this);
1076}
1077
1078// The casing of the PDB path stamped in the OBJ can differ from the actual path
1079// on disk. With this, we ensure to always use lowercase as a key for the
1080// pdbInputFileInstances map, at least on Windows.
1081static std::string normalizePdbPath(StringRef path) {
1082#if defined(_WIN32)
1083 return path.lower();
1084#else // LINUX
1085 return std::string(path);
1086#endif
1087}
1088
1089// If existing, return the actual PDB path on disk.
1090static std::optional<std::string>
1091findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1092 // Ensure the file exists before anything else. In some cases, if the path
1093 // points to a removable device, Driver::enqueuePath() would fail with an
1094 // error (EAGAIN, "resource unavailable try again") which we want to skip
1095 // silently.
1096 if (llvm::sys::fs::exists(Path: pdbPath))
1097 return normalizePdbPath(path: pdbPath);
1098
1099 StringRef objPath = !dependentFile->parentName.empty()
1100 ? dependentFile->parentName
1101 : dependentFile->getName();
1102
1103 // Currently, type server PDBs are only created by MSVC cl, which only runs
1104 // on Windows, so we can assume type server paths are Windows style.
1105 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1106
1107 // Check if the PDB is in the same folder as the OBJ.
1108 SmallString<128> path;
1109 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1110 if (llvm::sys::fs::exists(Path: path))
1111 return normalizePdbPath(path);
1112
1113 // Check if the PDB is in the output folder.
1114 path.clear();
1115 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1116 if (llvm::sys::fs::exists(Path: path))
1117 return normalizePdbPath(path);
1118
1119 return std::nullopt;
1120}
1121
1122PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1123 : InputFile(ctx.symtab, PDBKind, m) {}
1124
1125PDBInputFile::~PDBInputFile() = default;
1126
1127PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1128 StringRef path,
1129 ObjFile *fromFile) {
1130 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1131 if (!p)
1132 return nullptr;
1133 auto it = ctx.pdbInputFileInstances.find(x: *p);
1134 if (it != ctx.pdbInputFileInstances.end())
1135 return it->second;
1136 return nullptr;
1137}
1138
1139void PDBInputFile::parse() {
1140 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1141
1142 std::unique_ptr<pdb::IPDBSession> thisSession;
1143 Error E = pdb::NativeSession::createFromPdb(
1144 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1145 if (E) {
1146 loadErrorStr.emplace(args: toString(E: std::move(E)));
1147 return; // fail silently at this point - the error will be handled later,
1148 // when merging the debug type stream
1149 }
1150
1151 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1152
1153 pdb::PDBFile &pdbFile = session->getPDBFile();
1154 auto expectedInfo = pdbFile.getPDBInfoStream();
1155 // All PDB Files should have an Info stream.
1156 if (!expectedInfo) {
1157 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1158 return;
1159 }
1160 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1161}
1162
1163// Used only for DWARF debug info, which is not common (except in MinGW
1164// environments). This returns an optional pair of file name and line
1165// number for where the variable was defined.
1166std::optional<std::pair<StringRef, uint32_t>>
1167ObjFile::getVariableLocation(StringRef var) {
1168 if (!dwarf) {
1169 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1170 if (!dwarf)
1171 return std::nullopt;
1172 }
1173 if (symtab.machine == I386)
1174 var.consume_front(Prefix: "_");
1175 std::optional<std::pair<std::string, unsigned>> ret =
1176 dwarf->getVariableLoc(name: var);
1177 if (!ret)
1178 return std::nullopt;
1179 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1180}
1181
1182// Used only for DWARF debug info, which is not common (except in MinGW
1183// environments).
1184std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1185 uint32_t sectionIndex) {
1186 if (!dwarf) {
1187 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1188 if (!dwarf)
1189 return std::nullopt;
1190 }
1191
1192 return dwarf->getDILineInfo(offset, sectionIndex);
1193}
1194
1195void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1196 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1197 if (!p)
1198 return;
1199 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1200 if (!it.second)
1201 return; // already scheduled for load
1202 symtab.ctx.driver.enqueuePDB(Path: *p);
1203}
1204
1205ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1206 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1207 live(!ctx.config.doGC) {}
1208
1209MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1210 uint16_t machine =
1211 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1212 return MachineTypes(machine);
1213}
1214
1215bool ImportFile::isSameImport(const ImportFile *other) const {
1216 if (!externalName.empty())
1217 return other->externalName == externalName;
1218 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1219}
1220
1221ImportThunkChunk *ImportFile::makeImportThunk() {
1222 switch (hdr->Machine) {
1223 case AMD64:
1224 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1225 case I386:
1226 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1227 case ARM64:
1228 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1229 case ARMNT:
1230 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1231 }
1232 llvm_unreachable("unknown machine type");
1233}
1234
1235void ImportFile::parse() {
1236 const auto *hdr =
1237 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1238
1239 // Check if the total size is valid.
1240 if (mb.getBufferSize() < sizeof(*hdr) ||
1241 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1242 Fatal(ctx&: symtab.ctx) << "broken import library";
1243
1244 // Read names and create an __imp_ symbol.
1245 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1246 auto split = buf.split(Separator: '\0');
1247 buf = split.second;
1248 StringRef name;
1249 if (isArm64EC(Machine: hdr->Machine)) {
1250 if (std::optional<std::string> demangledName =
1251 getArm64ECDemangledFunctionName(Name: split.first))
1252 name = saver().save(S: *demangledName);
1253 }
1254 if (name.empty())
1255 name = saver().save(S: split.first);
1256 StringRef impName = saver().save(S: "__imp_" + name);
1257 dllName = buf.split(Separator: '\0').first;
1258 StringRef extName;
1259 switch (hdr->getNameType()) {
1260 case IMPORT_ORDINAL:
1261 extName = "";
1262 break;
1263 case IMPORT_NAME:
1264 extName = name;
1265 break;
1266 case IMPORT_NAME_NOPREFIX:
1267 extName = ltrim1(s: name, chars: "?@_");
1268 break;
1269 case IMPORT_NAME_UNDECORATE:
1270 extName = ltrim1(s: name, chars: "?@_");
1271 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1272 break;
1273 case IMPORT_NAME_EXPORTAS:
1274 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1275 break;
1276 }
1277
1278 this->hdr = hdr;
1279 externalName = extName;
1280
1281 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1282
1283 if (!symtab.isEC()) {
1284 impSym = symtab.addImportData(n: impName, f: this, location);
1285 } else {
1286 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1287 // which holds addresses that are guaranteed to be callable directly from
1288 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1289 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1290 // data imports, the naming is reversed.
1291 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1292 if (isCode) {
1293 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1294 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1295 } else {
1296 impSym = symtab.addImportData(n: impName, f: this, location);
1297 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1298 }
1299 if (!impECSym)
1300 return;
1301
1302 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1303 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1304 if (!auxImpCopySym)
1305 return;
1306 }
1307 // If this was a duplicate, we logged an error but may continue;
1308 // in this case, impSym is nullptr.
1309 if (!impSym)
1310 return;
1311
1312 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1313 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1314
1315 // If type is function, we need to create a thunk which jump to an
1316 // address pointed by the __imp_ symbol. (This allows you to call
1317 // DLL functions just like regular non-DLL functions.)
1318 if (isCode) {
1319 if (!symtab.isEC()) {
1320 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1321 } else {
1322 thunkSym = symtab.addImportThunk(
1323 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1324
1325 if (std::optional<std::string> mangledName =
1326 getArm64ECMangledFunctionName(Name: name)) {
1327 StringRef auxThunkName = saver().save(S: *mangledName);
1328 auxThunkSym = symtab.addImportThunk(
1329 name: auxThunkName, s: impECSym,
1330 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1331 }
1332
1333 StringRef impChkName = saver().save(S: "__impchk_" + name);
1334 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1335 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1336 symtab.ctx.driver.pullArm64ECIcallHelper();
1337 }
1338 }
1339}
1340
1341BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1342 std::unique_ptr<lto::InputFile> &o, bool lazy)
1343 : InputFile(symtab, BitcodeKind, mb, lazy) {
1344 obj.swap(u&: o);
1345}
1346
1347BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1348 StringRef archiveName,
1349 uint64_t offsetInArchive, bool lazy) {
1350 std::string path = mb.getBufferIdentifier().str();
1351 if (ctx.config.thinLTOIndexOnly)
1352 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1353 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1354 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1355
1356 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1357 // name. If two archives define two members with the same name, this
1358 // causes a collision which result in only one of the objects being taken
1359 // into consideration at LTO time (which very likely causes undefined
1360 // symbols later in the link stage). So we append file offset to make
1361 // filename unique.
1362 MemoryBufferRef mbref(mb.getBuffer(),
1363 saver().save(S: archiveName.empty()
1364 ? path
1365 : archiveName +
1366 sys::path::filename(path) +
1367 utostr(X: offsetInArchive)));
1368
1369 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1370 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1371 args&: lazy);
1372}
1373
1374BitcodeFile::~BitcodeFile() = default;
1375
1376void BitcodeFile::parse() {
1377 llvm::StringSaver &saver = lld::saver();
1378
1379 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1380 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1381 // FIXME: Check nodeduplicate
1382 comdat[i] =
1383 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1384 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1385 StringRef symName = saver.save(S: objSym.getName());
1386 int comdatIndex = objSym.getComdatIndex();
1387 Symbol *sym;
1388 SectionChunk *fakeSC = nullptr;
1389 if (objSym.isExecutable())
1390 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1391 else
1392 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1393 if (objSym.isUndefined()) {
1394 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1395 if (objSym.isWeak())
1396 sym->deferUndefined = true;
1397 // If one LTO object file references (i.e. has an undefined reference to)
1398 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1399 // as unprefixed but with a dllimport attribute instead, and doesn't
1400 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1401 //
1402 // For such cases, mark the symbol as used in a regular object (i.e. the
1403 // symbol must be retained) so that the linker can associate the
1404 // references in the end. If the symbol is defined in an import library
1405 // or in a regular object file, this has no effect, but if it is defined
1406 // in another LTO object file, this makes sure it is kept, to fulfill
1407 // the reference when linking the output of the LTO compilation.
1408 if (symName.starts_with(Prefix: "__imp_"))
1409 sym->isUsedInRegularObj = true;
1410 } else if (objSym.isCommon()) {
1411 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1412 } else if (objSym.isWeak() && objSym.isIndirect()) {
1413 // Weak external.
1414 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1415 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1416 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1417 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1418 } else if (comdatIndex != -1) {
1419 if (symName == obj->getComdatTable()[comdatIndex].first) {
1420 sym = comdat[comdatIndex].first;
1421 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1422 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1423 } else if (comdat[comdatIndex].second) {
1424 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1425 } else {
1426 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1427 }
1428 } else {
1429 sym =
1430 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1431 }
1432 symbols.push_back(x: sym);
1433 if (objSym.isUsed())
1434 symtab.ctx.config.gcroot.push_back(x: sym);
1435 }
1436 directives = saver.save(S: obj->getCOFFLinkerOpts());
1437}
1438
1439void BitcodeFile::parseLazy() {
1440 for (const lto::InputFile::Symbol &sym : obj->symbols())
1441 if (!sym.isUndefined()) {
1442 symtab.addLazyObject(f: this, n: sym.getName());
1443 if (!lazy)
1444 return;
1445 }
1446}
1447
1448MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1449 Triple t(obj->getTargetTriple());
1450 switch (t.getArch()) {
1451 case Triple::x86_64:
1452 return AMD64;
1453 case Triple::x86:
1454 return I386;
1455 case Triple::arm:
1456 case Triple::thumb:
1457 return ARMNT;
1458 case Triple::aarch64:
1459 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1460 default:
1461 return IMAGE_FILE_MACHINE_UNKNOWN;
1462 }
1463}
1464
1465std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1466 StringRef repl) {
1467 if (path.consume_back(Suffix: suffix))
1468 return (path + repl).str();
1469 return std::string(path);
1470}
1471
1472static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1473 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1474 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1475 if (rva >= sec->VirtualAddress &&
1476 rva <= sec->VirtualAddress + sec->VirtualSize) {
1477 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1478 }
1479 }
1480 return false;
1481}
1482
1483void DLLFile::parse() {
1484 // Parse a memory buffer as a PE-COFF executable.
1485 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1486
1487 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1488 bin.release();
1489 coffObj.reset(p: obj);
1490 } else {
1491 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1492 return;
1493 }
1494
1495 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1496 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1497 return;
1498 }
1499
1500 for (const auto &exp : coffObj->export_directories()) {
1501 StringRef dllName, symbolName;
1502 uint32_t exportRVA;
1503 checkError(e: exp.getDllName(Result&: dllName));
1504 checkError(e: exp.getSymbolName(Result&: symbolName));
1505 checkError(e: exp.getExportRVA(Result&: exportRVA));
1506
1507 if (symbolName.empty())
1508 continue;
1509
1510 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1511
1512 Symbol *s = make<Symbol>();
1513 s->dllName = dllName;
1514 s->symbolName = symbolName;
1515 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1516 s->nameType = ImportNameType::IMPORT_NAME;
1517
1518 if (coffObj->getMachine() == I386) {
1519 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1520 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1521 }
1522
1523 StringRef impName = saver().save(S: "__imp_" + symbolName);
1524 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1525 if (code)
1526 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1527 if (symtab.isEC()) {
1528 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1529 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1530
1531 if (code) {
1532 std::optional<std::string> mangledName =
1533 getArm64ECMangledFunctionName(Name: symbolName);
1534 if (mangledName)
1535 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1536 }
1537 }
1538 }
1539}
1540
1541MachineTypes DLLFile::getMachineType() const {
1542 if (coffObj)
1543 return static_cast<MachineTypes>(coffObj->getMachine());
1544 return IMAGE_FILE_MACHINE_UNKNOWN;
1545}
1546
1547void DLLFile::makeImport(DLLFile::Symbol *s) {
1548 if (!seen.insert(key: s->symbolName).second)
1549 return;
1550
1551 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1552 size_t size = sizeof(coff_import_header) + impSize;
1553 char *buf = bAlloc().Allocate<char>(Num: size);
1554 memset(s: buf, c: 0, n: size);
1555 char *p = buf;
1556 auto *imp = reinterpret_cast<coff_import_header *>(p);
1557 p += sizeof(*imp);
1558 imp->Sig2 = 0xFFFF;
1559 imp->Machine = coffObj->getMachine();
1560 imp->SizeOfData = impSize;
1561 imp->OrdinalHint = 0; // Only linking by name
1562 imp->TypeInfo = (s->nameType << 2) | s->importType;
1563
1564 // Write symbol name and DLL name.
1565 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1566 p += s->symbolName.size() + 1;
1567 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1568 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1569 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1570 symtab.ctx.driver.addFile(file: impFile);
1571}
1572