1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/IR/RuntimeLibcalls.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/Object/Binary.h"
31#include "llvm/Object/COFF.h"
32#include "llvm/Object/COFFImportFile.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/Endian.h"
35#include "llvm/Support/Error.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/Path.h"
38#include "llvm/TargetParser/Triple.h"
39#include <cstring>
40#include <optional>
41#include <utility>
42
43using namespace llvm;
44using namespace llvm::COFF;
45using namespace llvm::codeview;
46using namespace llvm::object;
47using namespace llvm::support::endian;
48using namespace lld;
49using namespace lld::coff;
50
51using llvm::Triple;
52using llvm::support::ulittle32_t;
53
54// Returns the last element of a path, which is supposed to be a filename.
55static StringRef getBasename(StringRef path) {
56 return sys::path::filename(path, style: sys::path::Style::windows);
57}
58
59// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
60std::string lld::toString(const coff::InputFile *file) {
61 if (!file)
62 return "<internal>";
63 if (file->parentName.empty())
64 return std::string(file->getName());
65
66 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
67 ")")
68 .str();
69}
70
71const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
72 const InputFile *f) {
73 return s << toString(file: f);
74}
75
76/// Checks that Source is compatible with being a weak alias to Target.
77/// If Source is Undefined and has no weak alias set, makes it a weak
78/// alias to Target.
79static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
80 Symbol *source, Symbol *target,
81 bool isAntiDep) {
82 if (auto *u = dyn_cast<Undefined>(Val: source)) {
83 if (u->weakAlias && u->weakAlias != target) {
84 // Ignore duplicated anti-dependency symbols.
85 if (isAntiDep)
86 return;
87 if (!u->isAntiDep) {
88 // Weak aliases as produced by GCC are named in the form
89 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
90 // of another symbol emitted near the weak symbol.
91 // Just use the definition from the first object file that defined
92 // this weak symbol.
93 if (symtab.ctx.config.allowDuplicateWeak)
94 return;
95 symtab.reportDuplicate(existing: source, newFile: f);
96 }
97 }
98 u->setWeakAlias(sym: target, antiDep: isAntiDep);
99 }
100}
101
102static bool ignoredSymbolName(StringRef name) {
103 return name == "@feat.00" || name == "@comp.id";
104}
105
106static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
107 if (sym.isBigObj()) {
108 auto *copy = make<coff_symbol32>(
109 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
110 return reinterpret_cast<coff_symbol_generic *>(copy);
111 } else {
112 auto *copy = make<coff_symbol16>(
113 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
114 return reinterpret_cast<coff_symbol_generic *>(copy);
115 }
116}
117
118// Skip importing DllMain thunks from import libraries.
119static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
120 const Archive::Symbol &sym, bool &skipDllMain) {
121 const Archive::Child &c =
122 CHECK(sym.getMember(), file->getFileName() +
123 ": could not get the member for symbol " +
124 toCOFFString(ctx, sym));
125 MemoryBufferRef mb =
126 CHECK(c.getMemoryBufferRef(),
127 file->getFileName() +
128 ": could not get the buffer for a child buffer of the archive");
129 if (identify_magic(magic: mb.getBuffer()) == file_magic::coff_import_library) {
130 if (ctx.config.warnImportedDllMain) {
131 // We won't place DllMain symbols in the symbol table if they are
132 // coming from a import library. This message can be ignored with the flag
133 // '/ignore:importeddllmain'
134 Warn(ctx)
135 << file->getFileName()
136 << ": skipping imported DllMain symbol [importeddllmain]\nNOTE: this "
137 "might be a mistake when the DLL/library was produced.";
138 }
139 skipDllMain = true;
140 return true;
141 }
142 return false;
143}
144
145ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
146 : InputFile(ctx.symtab, ArchiveKind, m) {}
147
148void ArchiveFile::parse() {
149 COFFLinkerContext &ctx = symtab.ctx;
150 SymbolTable *archiveSymtab = &symtab;
151
152 // Parse a MemoryBufferRef as an archive file.
153 file = CHECK(Archive::create(mb), this);
154
155 // Try to read symbols from ECSYMBOLS section on ARM64EC.
156 if (ctx.symtab.isEC()) {
157 iterator_range<Archive::symbol_iterator> symbols =
158 CHECK(file->ec_symbols(), this);
159 if (!symbols.empty()) {
160 for (const Archive::Symbol &sym : symbols)
161 ctx.symtab.addLazyArchive(f: this, sym);
162
163 // Read both EC and native symbols on ARM64X.
164 archiveSymtab = &*ctx.hybridSymtab;
165 } else {
166 // If the ECSYMBOLS section is missing in the archive, the archive could
167 // be either a native-only ARM64 or x86_64 archive. Check the machine type
168 // of the object containing a symbol to determine which symbol table to
169 // use.
170 Archive::symbol_iterator sym = file->symbol_begin();
171 if (sym != file->symbol_end()) {
172 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
173 Archive::Child child =
174 CHECK(sym->getMember(),
175 file->getFileName() +
176 ": could not get the buffer for a child of the archive");
177 MemoryBufferRef mb = CHECK(
178 child.getMemoryBufferRef(),
179 file->getFileName() +
180 ": could not get the buffer for a child buffer of the archive");
181 switch (identify_magic(magic: mb.getBuffer())) {
182 case file_magic::coff_object: {
183 std::unique_ptr<COFFObjectFile> obj =
184 CHECK(COFFObjectFile::create(mb),
185 check(child.getName()) + ":" + ": not a valid COFF file");
186 machine = MachineTypes(obj->getMachine());
187 break;
188 }
189 case file_magic::coff_import_library:
190 machine = MachineTypes(COFFImportFile(mb).getMachine());
191 break;
192 case file_magic::bitcode: {
193 std::unique_ptr<lto::InputFile> obj =
194 check(e: lto::InputFile::create(Object: mb));
195 machine = BitcodeFile::getMachineType(obj: obj.get());
196 break;
197 }
198 default:
199 break;
200 }
201 archiveSymtab = &ctx.getSymtab(machine);
202 }
203 }
204 }
205
206 bool skipDllMain = false;
207 StringRef mangledDllMain, impMangledDllMain;
208
209 // The calls below will fail if we haven't set the machine type yet. Instead
210 // of failing, it is preferable to skip this "imported DllMain" check if we
211 // don't know the machine type at this point.
212 if (!file->isEmpty() && ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN) {
213 mangledDllMain = archiveSymtab->mangle(sym: "DllMain");
214 impMangledDllMain = uniqueSaver().save(S: "__imp_" + mangledDllMain);
215 }
216
217 // Read the symbol table to construct Lazy objects.
218 for (const Archive::Symbol &sym : file->symbols()) {
219 // If an import library provides the DllMain symbol, skip importing it, as
220 // we should be using our own DllMain, not another DLL's DllMain.
221 if (!mangledDllMain.empty() && (sym.getName() == mangledDllMain ||
222 sym.getName() == impMangledDllMain)) {
223 if (skipDllMain || fixupDllMain(ctx, file: file.get(), sym, skipDllMain))
224 continue;
225 }
226 archiveSymtab->addLazyArchive(f: this, sym);
227 }
228}
229
230// Returns a buffer pointing to a member file containing a given symbol.
231void ArchiveFile::addMember(const Archive::Symbol &sym) {
232 const Archive::Child &c =
233 CHECK(sym.getMember(), "could not get the member for symbol " +
234 toCOFFString(symtab.ctx, sym));
235
236 // Return an empty buffer if we have already returned the same buffer.
237 // FIXME: Remove this once we resolve all defineds before all undefineds in
238 // ObjFile::initializeSymbols().
239 if (!seen.insert(V: c.getChildOffset()).second)
240 return;
241
242 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
243}
244
245std::vector<MemoryBufferRef>
246lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
247 std::vector<MemoryBufferRef> v;
248 Error err = Error::success();
249
250 // Thin archives refer to .o files, so --reproduces needs the .o files too.
251 bool addToTar = file->isThin() && ctx.driver.tar;
252
253 for (const Archive::Child &c : file->children(Err&: err)) {
254 MemoryBufferRef mbref =
255 CHECK(c.getMemoryBufferRef(),
256 file->getFileName() +
257 ": could not get the buffer for a child of the archive");
258 if (addToTar) {
259 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
260 Data: mbref.getBuffer());
261 }
262 v.push_back(x: mbref);
263 }
264 if (err)
265 Fatal(ctx) << file->getFileName()
266 << ": Archive::children failed: " << toString(E: std::move(err));
267 return v;
268}
269
270ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
271 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
272 coffObj(coffObj) {}
273
274ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
275 // Parse a memory buffer as a COFF file.
276 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
277 if (!bin)
278 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
279
280 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
281 if (!obj)
282 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
283
284 bin->release();
285 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
286 args&: lazy);
287}
288
289void ObjFile::parseLazy() {
290 // Native object file.
291 uint32_t numSymbols = coffObj->getNumberOfSymbols();
292 for (uint32_t i = 0; i < numSymbols; ++i) {
293 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
294 if (coffSym.isUndefined() || !coffSym.isExternal() ||
295 coffSym.isWeakExternal())
296 continue;
297 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
298 if (coffSym.isAbsolute() && ignoredSymbolName(name))
299 continue;
300 symtab.addLazyObject(f: this, n: name);
301 if (!lazy)
302 return;
303 i += coffSym.getNumberOfAuxSymbols();
304 }
305}
306
307struct ECMapEntry {
308 ulittle32_t src;
309 ulittle32_t dst;
310 ulittle32_t type;
311};
312
313void ObjFile::initializeECThunks() {
314 for (SectionChunk *chunk : hybmpChunks) {
315 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
316 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
317 << chunk->getContents().size();
318 continue;
319 }
320
321 const uint8_t *end =
322 chunk->getContents().data() + chunk->getContents().size();
323 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
324 iter += sizeof(ECMapEntry)) {
325 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
326 switch (entry->type) {
327 case Arm64ECThunkType::Entry:
328 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
329 break;
330 case Arm64ECThunkType::Exit:
331 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
332 break;
333 case Arm64ECThunkType::GuestExit:
334 break;
335 default:
336 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
337 }
338 }
339 }
340}
341
342void ObjFile::parse() {
343 // Read section and symbol tables.
344 initializeChunks();
345 initializeSymbols();
346 initializeFlags();
347 initializeDependencies();
348 initializeECThunks();
349}
350
351const coff_section *ObjFile::getSection(uint32_t i) {
352 auto sec = coffObj->getSection(index: i);
353 if (!sec)
354 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
355 return *sec;
356}
357
358// We set SectionChunk pointers in the SparseChunks vector to this value
359// temporarily to mark comdat sections as having an unknown resolution. As we
360// walk the object file's symbol table, once we visit either a leader symbol or
361// an associative section definition together with the parent comdat's leader,
362// we set the pointer to either nullptr (to mark the section as discarded) or a
363// valid SectionChunk for that section.
364static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
365
366void ObjFile::initializeChunks() {
367 uint32_t numSections = coffObj->getNumberOfSections();
368 sparseChunks.resize(new_size: numSections + 1);
369 for (uint32_t i = 1; i < numSections + 1; ++i) {
370 const coff_section *sec = getSection(i);
371 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
372 sparseChunks[i] = pendingComdat;
373 else
374 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
375 }
376}
377
378SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
379 const coff_aux_section_definition *def,
380 StringRef leaderName) {
381 const coff_section *sec = getSection(i: sectionNumber);
382
383 StringRef name;
384 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
385 name = *e;
386 else
387 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
388 << e.takeError();
389
390 if (name == ".drectve") {
391 ArrayRef<uint8_t> data;
392 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
393 directives = StringRef((const char *)data.data(), data.size());
394 return nullptr;
395 }
396
397 if (name == ".llvm_addrsig") {
398 addrsigSec = sec;
399 return nullptr;
400 }
401
402 if (name == ".llvm.call-graph-profile") {
403 callgraphSec = sec;
404 return nullptr;
405 }
406
407 if (symtab.ctx.config.discardSection.contains(key: name))
408 return nullptr;
409
410 // Object files may have DWARF debug info or MS CodeView debug info
411 // (or both).
412 //
413 // DWARF sections don't need any special handling from the perspective
414 // of the linker; they are just a data section containing relocations.
415 // We can just link them to complete debug info.
416 //
417 // CodeView needs linker support. We need to interpret debug info,
418 // and then write it to a separate .pdb file.
419
420 // Ignore DWARF debug info unless requested to be included.
421 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
422 return nullptr;
423
424 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
425 return nullptr;
426 SectionChunk *c;
427 if (isArm64EC(Machine: getMachineType()))
428 c = make<SectionChunkEC>(args: this, args&: sec);
429 else
430 c = make<SectionChunk>(args: this, args&: sec);
431 if (def)
432 c->checksum = def->CheckSum;
433
434 // CodeView sections are stored to a different vector because they are not
435 // linked in the regular manner.
436 if (c->isCodeView())
437 debugChunks.push_back(x: c);
438 else if (name == ".gfids$y")
439 guardFidChunks.push_back(x: c);
440 else if (name == ".giats$y")
441 guardIATChunks.push_back(x: c);
442 else if (name == ".gljmp$y")
443 guardLJmpChunks.push_back(x: c);
444 else if (name == ".gehcont$y")
445 guardEHContChunks.push_back(x: c);
446 else if (name == ".sxdata")
447 sxDataChunks.push_back(x: c);
448 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
449 hybmpChunks.push_back(x: c);
450 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
451 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
452 // COFF sections that look like string literal sections (i.e. no
453 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
454 // for string literals) are subject to string tail merging.
455 MergeChunk::addSection(ctx&: symtab.ctx, c);
456 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
457 resourceChunks.push_back(x: c);
458 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
459 chunks.push_back(x: c);
460
461 return c;
462}
463
464void ObjFile::includeResourceChunks() {
465 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
466}
467
468void ObjFile::readAssociativeDefinition(
469 COFFSymbolRef sym, const coff_aux_section_definition *def) {
470 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
471}
472
473void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
474 const coff_aux_section_definition *def,
475 uint32_t parentIndex) {
476 SectionChunk *parent = sparseChunks[parentIndex];
477 int32_t sectionNumber = sym.getSectionNumber();
478
479 auto diag = [&]() {
480 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
481
482 StringRef parentName;
483 const coff_section *parentSec = getSection(i: parentIndex);
484 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
485 parentName = *e;
486 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
487 << " (sec " << sectionNumber
488 << ") has invalid reference to section " << parentName
489 << " (sec " << parentIndex << ")";
490 };
491
492 if (parent == pendingComdat) {
493 // This can happen if an associative comdat refers to another associative
494 // comdat that appears after it (invalid per COFF spec) or to a section
495 // without any symbols.
496 diag();
497 return;
498 }
499
500 // Check whether the parent is prevailing. If it is, so are we, and we read
501 // the section; otherwise mark it as discarded.
502 if (parent) {
503 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
504 sparseChunks[sectionNumber] = c;
505 if (c) {
506 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
507 parent->addAssociative(child: c);
508 }
509 } else {
510 sparseChunks[sectionNumber] = nullptr;
511 }
512}
513
514void ObjFile::recordPrevailingSymbolForMingw(
515 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
516 // For comdat symbols in executable sections, where this is the copy
517 // of the section chunk we actually include instead of discarding it,
518 // add the symbol to a map to allow using it for implicitly
519 // associating .[px]data$<func> sections to it.
520 // Use the suffix from the .text$<func> instead of the leader symbol
521 // name, for cases where the names differ (i386 mangling/decorations,
522 // cases where the leader is a weak symbol named .weak.func.default*).
523 int32_t sectionNumber = sym.getSectionNumber();
524 SectionChunk *sc = sparseChunks[sectionNumber];
525 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
526 StringRef name = sc->getSectionName().split(Separator: '$').second;
527 prevailingSectionMap[name] = sectionNumber;
528 }
529}
530
531void ObjFile::maybeAssociateSEHForMingw(
532 COFFSymbolRef sym, const coff_aux_section_definition *def,
533 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
534 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
535 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
536 name.consume_front(Prefix: ".eh_frame$")) {
537 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
538 // associative to the symbol <func>.
539 auto parentSym = prevailingSectionMap.find(Val: name);
540 if (parentSym != prevailingSectionMap.end())
541 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
542 }
543}
544
545Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
546 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
547 if (sym.isExternal()) {
548 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
549 if (sc)
550 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
551 sectionOffset: sym.getValue());
552 // For MinGW symbols named .weak.* that point to a discarded section,
553 // don't create an Undefined symbol. If nothing ever refers to the symbol,
554 // everything should be fine. If something actually refers to the symbol
555 // (e.g. the undefined weak alias), linking will fail due to undefined
556 // references at the end.
557 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
558 return nullptr;
559 return symtab.addUndefined(name, f: this, overrideLazy: false);
560 }
561 if (sc) {
562 const coff_symbol_generic *symGen = sym.getGeneric();
563 if (sym.isSection()) {
564 auto *customSymGen = cloneSymbol(sym);
565 customSymGen->Value = 0;
566 symGen = customSymGen;
567 }
568 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
569 /*IsExternal*/ args: false, args&: symGen, args&: sc);
570 }
571 return nullptr;
572}
573
574void ObjFile::initializeSymbols() {
575 uint32_t numSymbols = coffObj->getNumberOfSymbols();
576 symbols.resize(new_size: numSymbols);
577
578 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
579 weakAliases;
580 std::vector<uint32_t> pendingIndexes;
581 pendingIndexes.reserve(n: numSymbols);
582
583 DenseMap<StringRef, uint32_t> prevailingSectionMap;
584 std::vector<const coff_aux_section_definition *> comdatDefs(
585 coffObj->getNumberOfSections() + 1);
586 COFFLinkerContext &ctx = symtab.ctx;
587
588 for (uint32_t i = 0; i < numSymbols; ++i) {
589 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
590 bool prevailingComdat;
591 if (coffSym.isUndefined()) {
592 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
593 } else if (coffSym.isWeakExternal()) {
594 auto aux = coffSym.getAux<coff_aux_weak_external>();
595 bool overrideLazy = true;
596
597 // On ARM64EC, external function calls emit a pair of weak-dependency
598 // aliases: func to #func and #func to the func guess exit thunk
599 // (instead of a single undefined func symbol, which would be emitted on
600 // other targets). Allow such aliases to be overridden by lazy archive
601 // symbols, just as we would for undefined symbols.
602 if (isArm64EC(Machine: getMachineType()) &&
603 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
604 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
605 if (!targetSym.isAnyUndefined()) {
606 // If the target is defined, it may be either a guess exit thunk or
607 // the actual implementation. If it's the latter, consider the alias
608 // to be part of the implementation and override potential lazy
609 // archive symbols.
610 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
611 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
612 std::optional<std::string> mangledName =
613 getArm64ECMangledFunctionName(Name: name);
614 overrideLazy = mangledName == targetName;
615 } else {
616 overrideLazy = false;
617 }
618 }
619 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
620 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
621 } else if (std::optional<Symbol *> optSym =
622 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
623 symbols[i] = *optSym;
624 if (ctx.config.mingw && prevailingComdat)
625 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
626 } else {
627 // createDefined() returns std::nullopt if a symbol belongs to a section
628 // that was pending at the point when the symbol was read. This can happen
629 // in two cases:
630 // 1) section definition symbol for a comdat leader;
631 // 2) symbol belongs to a comdat section associated with another section.
632 // In both of these cases, we can expect the section to be resolved by
633 // the time we finish visiting the remaining symbols in the symbol
634 // table. So we postpone the handling of this symbol until that time.
635 pendingIndexes.push_back(x: i);
636 }
637 i += coffSym.getNumberOfAuxSymbols();
638 }
639
640 for (uint32_t i : pendingIndexes) {
641 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
642 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
643 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
644 readAssociativeDefinition(sym, def);
645 else if (ctx.config.mingw)
646 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
647 }
648 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
649 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
650 Log(ctx) << "comdat section " << name
651 << " without leader and unassociated, discarding";
652 continue;
653 }
654 symbols[i] = createRegular(sym);
655 }
656
657 for (auto &kv : weakAliases) {
658 Symbol *sym = kv.first;
659 const coff_aux_weak_external *aux = kv.second;
660 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
661 isAntiDep: aux->Characteristics ==
662 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
663 }
664
665 // Free the memory used by sparseChunks now that symbol loading is finished.
666 decltype(sparseChunks)().swap(x&: sparseChunks);
667}
668
669Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
670 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
671 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
672
673 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
674 // target.
675 if (symtab.isEC() && getMachineType() == AMD64) {
676 auto u = dyn_cast<Undefined>(Val: s);
677 if (u && !u->weakAlias) {
678 if (std::optional<std::string> mangledName =
679 getArm64ECMangledFunctionName(Name: name)) {
680 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
681 /*overrideLazy=*/false);
682 u->setWeakAlias(sym: m, /*antiDep=*/true);
683 }
684 }
685 }
686 return s;
687}
688
689static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
690 int32_t section) {
691 uint32_t numSymbols = obj->getNumberOfSymbols();
692 for (uint32_t i = 0; i < numSymbols; ++i) {
693 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
694 if (sym.getSectionNumber() != section)
695 continue;
696 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
697 return def;
698 }
699 return nullptr;
700}
701
702void ObjFile::handleComdatSelection(
703 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
704 DefinedRegular *leader,
705 const llvm::object::coff_aux_section_definition *def) {
706 if (prevailing)
707 return;
708 // There's already an existing comdat for this symbol: `Leader`.
709 // Use the comdats's selection field to determine if the new
710 // symbol in `Sym` should be discarded, produce a duplicate symbol
711 // error, etc.
712
713 SectionChunk *leaderChunk = leader->getChunk();
714 COMDATType leaderSelection = leaderChunk->selection;
715 COFFLinkerContext &ctx = symtab.ctx;
716
717 assert(leader->data && "Comdat leader without SectionChunk?");
718 if (isa<BitcodeFile>(Val: leader->file)) {
719 // If the leader is only a LTO symbol, we don't know e.g. its final size
720 // yet, so we can't do the full strict comdat selection checking yet.
721 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
722 }
723
724 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
725 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
726 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
727 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
728 // cl.exe picks "any" for vftables when building with /GR- and
729 // "largest" when building with /GR. To be able to link object files
730 // compiled with each flag, "any" and "largest" are merged as "largest".
731 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
732 }
733
734 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
735 // Clang on the other hand picks "any". To be able to link two object files
736 // with a __declspec(selectany) declaration, one compiled with gcc and the
737 // other with clang, we merge them as proper "same size as"
738 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
739 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
740 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
741 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
742 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
743 }
744
745 // Other than that, comdat selections must match. This is a bit more
746 // strict than link.exe which allows merging "any" and "largest" if "any"
747 // is the first symbol the linker sees, and it allows merging "largest"
748 // with everything (!) if "largest" is the first symbol the linker sees.
749 // Making this symmetric independent of which selection is seen first
750 // seems better though.
751 // (This behavior matches ModuleLinker::getComdatResult().)
752 if (selection != leaderSelection) {
753 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
754 << ": " << (int)leaderSelection << " in " << leader->getFile()
755 << " and " << (int)selection << " in " << this;
756 symtab.reportDuplicate(existing: leader, newFile: this);
757 return;
758 }
759
760 switch (selection) {
761 case IMAGE_COMDAT_SELECT_NODUPLICATES:
762 symtab.reportDuplicate(existing: leader, newFile: this);
763 break;
764
765 case IMAGE_COMDAT_SELECT_ANY:
766 // Nothing to do.
767 break;
768
769 case IMAGE_COMDAT_SELECT_SAME_SIZE:
770 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
771 if (!ctx.config.mingw) {
772 symtab.reportDuplicate(existing: leader, newFile: this);
773 } else {
774 const coff_aux_section_definition *leaderDef = nullptr;
775 if (leaderChunk->file)
776 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
777 section: leaderChunk->getSectionNumber());
778 if (!leaderDef || leaderDef->Length != def->Length)
779 symtab.reportDuplicate(existing: leader, newFile: this);
780 }
781 }
782 break;
783
784 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
785 SectionChunk newChunk(this, getSection(sym));
786 // link.exe only compares section contents here and doesn't complain
787 // if the two comdat sections have e.g. different alignment.
788 // Match that.
789 if (leaderChunk->getContents() != newChunk.getContents())
790 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
791 break;
792 }
793
794 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
795 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
796 // (This means lld-link doesn't produce duplicate symbol errors for
797 // associative comdats while link.exe does, but associate comdats
798 // are never extern in practice.)
799 llvm_unreachable("createDefined not called for associative comdats");
800
801 case IMAGE_COMDAT_SELECT_LARGEST:
802 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
803 // Replace the existing comdat symbol with the new one.
804 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
805 // FIXME: This is incorrect: With /opt:noref, the previous sections
806 // make it into the final executable as well. Correct handling would
807 // be to undo reading of the whole old section that's being replaced,
808 // or doing one pass that determines what the final largest comdat
809 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
810 // only the largest one.
811 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
812 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
813 arg: nullptr);
814 prevailing = true;
815 }
816 break;
817
818 case IMAGE_COMDAT_SELECT_NEWEST:
819 llvm_unreachable("should have been rejected earlier");
820 }
821}
822
823std::optional<Symbol *> ObjFile::createDefined(
824 COFFSymbolRef sym,
825 std::vector<const coff_aux_section_definition *> &comdatDefs,
826 bool &prevailing) {
827 prevailing = false;
828 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
829
830 if (sym.isCommon()) {
831 auto *c = make<CommonChunk>(args&: sym);
832 chunks.push_back(x: c);
833 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
834 c);
835 }
836
837 COFFLinkerContext &ctx = symtab.ctx;
838 if (sym.isAbsolute()) {
839 StringRef name = getName();
840
841 if (name == "@feat.00")
842 feat00Flags = sym.getValue();
843 // Skip special symbols.
844 if (ignoredSymbolName(name))
845 return nullptr;
846
847 if (sym.isExternal())
848 return symtab.addAbsolute(n: name, s: sym);
849 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
850 }
851
852 int32_t sectionNumber = sym.getSectionNumber();
853 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
854 return nullptr;
855
856 if (sym.isEmptySectionDeclaration()) {
857 // As there is no coff_section in the object file for these, make a
858 // new virtual one, with everything zeroed out (i.e. an empty section),
859 // with only the name and characteristics set.
860 StringRef name = getName();
861 auto *hdr = make<coff_section>();
862 memset(s: hdr, c: 0, n: sizeof(*hdr));
863 strncpy(dest: hdr->Name, src: name.data(),
864 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
865 // The Value field in a section symbol may contain the characteristics,
866 // or it may be zero, where we make something up (that matches what is
867 // used in .idata sections in the regular object files in import libraries).
868 if (sym.getValue())
869 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
870 else
871 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
872 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
873 IMAGE_SCN_ALIGN_4BYTES;
874 auto *sc = make<SectionChunk>(args: this, args&: hdr);
875 chunks.push_back(x: sc);
876
877 auto *symGen = cloneSymbol(sym);
878 // Ignore the Value offset of these symbols, as it may be a bitmask.
879 symGen->Value = 0;
880 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
881 /*isExternal=*/args: false, args&: symGen, args&: sc);
882 }
883
884 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
885 Fatal(ctx) << toString(file: this) << ": " << getName()
886 << " should not refer to special section "
887 << Twine(sectionNumber);
888
889 if ((uint32_t)sectionNumber >= sparseChunks.size())
890 Fatal(ctx) << toString(file: this) << ": " << getName()
891 << " should not refer to non-existent section "
892 << Twine(sectionNumber);
893
894 // Comdat handling.
895 // A comdat symbol consists of two symbol table entries.
896 // The first symbol entry has the name of the section (e.g. .text), fixed
897 // values for the other fields, and one auxiliary record.
898 // The second symbol entry has the name of the comdat symbol, called the
899 // "comdat leader".
900 // When this function is called for the first symbol entry of a comdat,
901 // it sets comdatDefs and returns std::nullopt, and when it's called for the
902 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
903
904 // Handle comdat leader.
905 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
906 comdatDefs[sectionNumber] = nullptr;
907 DefinedRegular *leader;
908
909 if (sym.isExternal()) {
910 std::tie(args&: leader, args&: prevailing) =
911 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
912 } else {
913 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
914 /*IsExternal*/ args: false, args: sym.getGeneric());
915 prevailing = true;
916 }
917
918 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
919 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
920 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
921 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
922 Fatal(ctx) << "unknown comdat type "
923 << std::to_string(val: (int)def->Selection) << " for " << getName()
924 << " in " << toString(file: this);
925 }
926 COMDATType selection = (COMDATType)def->Selection;
927
928 if (leader->isCOMDAT)
929 handleComdatSelection(sym, selection, prevailing, leader, def);
930
931 if (prevailing) {
932 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
933 sparseChunks[sectionNumber] = c;
934 if (!c)
935 return nullptr;
936 c->sym = cast<DefinedRegular>(Val: leader);
937 c->selection = selection;
938 cast<DefinedRegular>(Val: leader)->data = &c->repl;
939 } else {
940 sparseChunks[sectionNumber] = nullptr;
941 }
942 return leader;
943 }
944
945 // Prepare to handle the comdat leader symbol by setting the section's
946 // ComdatDefs pointer if we encounter a non-associative comdat.
947 if (sparseChunks[sectionNumber] == pendingComdat) {
948 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
949 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
950 comdatDefs[sectionNumber] = def;
951 }
952 return std::nullopt;
953 }
954
955 return createRegular(sym);
956}
957
958MachineTypes ObjFile::getMachineType() const {
959 return static_cast<MachineTypes>(coffObj->getMachine());
960}
961
962ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
963 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
964 return sec->consumeDebugMagic();
965 return {};
966}
967
968// OBJ files systematically store critical information in a .debug$S stream,
969// even if the TU was compiled with no debug info. At least two records are
970// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
971// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
972// currently used to initialize the hotPatchable member.
973void ObjFile::initializeFlags() {
974 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
975 if (data.empty())
976 return;
977
978 DebugSubsectionArray subsections;
979
980 BinaryStreamReader reader(data, llvm::endianness::little);
981 ExitOnError exitOnErr;
982 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
983
984 for (const DebugSubsectionRecord &ss : subsections) {
985 if (ss.kind() != DebugSubsectionKind::Symbols)
986 continue;
987
988 unsigned offset = 0;
989
990 // Only parse the first two records. We are only looking for S_OBJNAME
991 // and S_COMPILE3, and they usually appear at the beginning of the
992 // stream.
993 for (unsigned i = 0; i < 2; ++i) {
994 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
995 if (!sym) {
996 consumeError(Err: sym.takeError());
997 return;
998 }
999 if (sym->kind() == SymbolKind::S_COMPILE3) {
1000 auto cs =
1001 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
1002 hotPatchable =
1003 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
1004 }
1005 if (sym->kind() == SymbolKind::S_OBJNAME) {
1006 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
1007 Symbol: sym.get()));
1008 if (objName.Signature)
1009 pchSignature = objName.Signature;
1010 }
1011 offset += sym->length();
1012 }
1013 }
1014}
1015
1016// Depending on the compilation flags, OBJs can refer to external files,
1017// necessary to merge this OBJ into the final PDB. We currently support two
1018// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1019// And PDB type servers, when compiling with /Zi. This function extracts these
1020// dependencies and makes them available as a TpiSource interface (see
1021// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1022// output even with /Yc and /Yu and with /Zi.
1023void ObjFile::initializeDependencies() {
1024 COFFLinkerContext &ctx = symtab.ctx;
1025 if (!ctx.config.debug)
1026 return;
1027
1028 bool isPCH = false;
1029
1030 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
1031 if (!data.empty())
1032 isPCH = true;
1033 else
1034 data = getDebugSection(secName: ".debug$T");
1035
1036 // symbols but no types, make a plain, empty TpiSource anyway, because it
1037 // simplifies adding the symbols later.
1038 if (data.empty()) {
1039 if (!debugChunks.empty())
1040 debugTypesObj = makeTpiSource(ctx, f: this);
1041 return;
1042 }
1043
1044 // Get the first type record. It will indicate if this object uses a type
1045 // server (/Zi) or a PCH file (/Yu).
1046 CVTypeArray types;
1047 BinaryStreamReader reader(data, llvm::endianness::little);
1048 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
1049 CVTypeArray::Iterator firstType = types.begin();
1050 if (firstType == types.end())
1051 return;
1052
1053 // Remember the .debug$T or .debug$P section.
1054 debugTypes = data;
1055
1056 // This object file is a PCH file that others will depend on.
1057 if (isPCH) {
1058 debugTypesObj = makePrecompSource(ctx, file: this);
1059 return;
1060 }
1061
1062 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1063 if (firstType->kind() == LF_TYPESERVER2) {
1064 TypeServer2Record ts = cantFail(
1065 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1066 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1067 enqueuePdbFile(path: ts.getName(), fromFile: this);
1068 return;
1069 }
1070
1071 // This object was compiled with /Yu. It uses types from another object file
1072 // with a matching signature.
1073 if (firstType->kind() == LF_PRECOMP) {
1074 PrecompRecord precomp = cantFail(
1075 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1076 // We're better off trusting the LF_PRECOMP signature. In some cases the
1077 // S_OBJNAME record doesn't contain a valid PCH signature.
1078 if (precomp.Signature)
1079 pchSignature = precomp.Signature;
1080 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1081 // Drop the LF_PRECOMP record from the input stream.
1082 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1083 return;
1084 }
1085
1086 // This is a plain old object file.
1087 debugTypesObj = makeTpiSource(ctx, f: this);
1088}
1089
1090// The casing of the PDB path stamped in the OBJ can differ from the actual path
1091// on disk. With this, we ensure to always use lowercase as a key for the
1092// pdbInputFileInstances map, at least on Windows.
1093static std::string normalizePdbPath(StringRef path) {
1094#if defined(_WIN32)
1095 return path.lower();
1096#else // LINUX
1097 return std::string(path);
1098#endif
1099}
1100
1101// If existing, return the actual PDB path on disk.
1102static std::optional<std::string>
1103findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1104 // Ensure the file exists before anything else. In some cases, if the path
1105 // points to a removable device, Driver::enqueuePath() would fail with an
1106 // error (EAGAIN, "resource unavailable try again") which we want to skip
1107 // silently.
1108 if (llvm::sys::fs::exists(Path: pdbPath))
1109 return normalizePdbPath(path: pdbPath);
1110
1111 StringRef objPath = !dependentFile->parentName.empty()
1112 ? dependentFile->parentName
1113 : dependentFile->getName();
1114
1115 // Currently, type server PDBs are only created by MSVC cl, which only runs
1116 // on Windows, so we can assume type server paths are Windows style.
1117 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1118
1119 // Check if the PDB is in the same folder as the OBJ.
1120 SmallString<128> path;
1121 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1122 if (llvm::sys::fs::exists(Path: path))
1123 return normalizePdbPath(path);
1124
1125 // Check if the PDB is in the output folder.
1126 path.clear();
1127 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1128 if (llvm::sys::fs::exists(Path: path))
1129 return normalizePdbPath(path);
1130
1131 return std::nullopt;
1132}
1133
1134PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1135 : InputFile(ctx.symtab, PDBKind, m) {}
1136
1137PDBInputFile::~PDBInputFile() = default;
1138
1139PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1140 StringRef path,
1141 ObjFile *fromFile) {
1142 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1143 if (!p)
1144 return nullptr;
1145 auto it = ctx.pdbInputFileInstances.find(x: *p);
1146 if (it != ctx.pdbInputFileInstances.end())
1147 return it->second;
1148 return nullptr;
1149}
1150
1151void PDBInputFile::parse() {
1152 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1153
1154 std::unique_ptr<pdb::IPDBSession> thisSession;
1155 Error E = pdb::NativeSession::createFromPdb(
1156 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1157 if (E) {
1158 loadErrorStr.emplace(args: toString(E: std::move(E)));
1159 return; // fail silently at this point - the error will be handled later,
1160 // when merging the debug type stream
1161 }
1162
1163 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1164
1165 pdb::PDBFile &pdbFile = session->getPDBFile();
1166 auto expectedInfo = pdbFile.getPDBInfoStream();
1167 // All PDB Files should have an Info stream.
1168 if (!expectedInfo) {
1169 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1170 return;
1171 }
1172 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1173}
1174
1175// Used only for DWARF debug info, which is not common (except in MinGW
1176// environments). This returns an optional pair of file name and line
1177// number for where the variable was defined.
1178std::optional<std::pair<StringRef, uint32_t>>
1179ObjFile::getVariableLocation(StringRef var) {
1180 if (!dwarf) {
1181 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1182 if (!dwarf)
1183 return std::nullopt;
1184 }
1185 if (symtab.machine == I386)
1186 var.consume_front(Prefix: "_");
1187 std::optional<std::pair<std::string, unsigned>> ret =
1188 dwarf->getVariableLoc(name: var);
1189 if (!ret)
1190 return std::nullopt;
1191 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1192}
1193
1194// Used only for DWARF debug info, which is not common (except in MinGW
1195// environments).
1196std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1197 uint32_t sectionIndex) {
1198 if (!dwarf) {
1199 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1200 if (!dwarf)
1201 return std::nullopt;
1202 }
1203
1204 return dwarf->getDILineInfo(offset, sectionIndex);
1205}
1206
1207void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1208 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1209 if (!p)
1210 return;
1211 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1212 if (!it.second)
1213 return; // already scheduled for load
1214 symtab.ctx.driver.enqueuePDB(Path: *p);
1215}
1216
1217ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1218 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1219 live(!ctx.config.doGC) {}
1220
1221MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1222 uint16_t machine =
1223 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1224 return MachineTypes(machine);
1225}
1226
1227bool ImportFile::isSameImport(const ImportFile *other) const {
1228 if (!externalName.empty())
1229 return other->externalName == externalName;
1230 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1231}
1232
1233ImportThunkChunk *ImportFile::makeImportThunk() {
1234 switch (hdr->Machine) {
1235 case AMD64:
1236 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1237 case I386:
1238 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1239 case ARM64:
1240 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1241 case ARMNT:
1242 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1243 }
1244 llvm_unreachable("unknown machine type");
1245}
1246
1247void ImportFile::parse() {
1248 const auto *hdr =
1249 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1250
1251 // Check if the total size is valid.
1252 if (mb.getBufferSize() < sizeof(*hdr) ||
1253 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1254 Fatal(ctx&: symtab.ctx) << "broken import library";
1255
1256 // Read names and create an __imp_ symbol.
1257 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1258 auto split = buf.split(Separator: '\0');
1259 buf = split.second;
1260 StringRef name;
1261 if (isArm64EC(Machine: hdr->Machine)) {
1262 if (std::optional<std::string> demangledName =
1263 getArm64ECDemangledFunctionName(Name: split.first))
1264 name = saver().save(S: *demangledName);
1265 }
1266 if (name.empty())
1267 name = saver().save(S: split.first);
1268 StringRef impName = saver().save(S: "__imp_" + name);
1269 dllName = buf.split(Separator: '\0').first;
1270 StringRef extName;
1271 switch (hdr->getNameType()) {
1272 case IMPORT_ORDINAL:
1273 extName = "";
1274 break;
1275 case IMPORT_NAME:
1276 extName = name;
1277 break;
1278 case IMPORT_NAME_NOPREFIX:
1279 extName = ltrim1(s: name, chars: "?@_");
1280 break;
1281 case IMPORT_NAME_UNDECORATE:
1282 extName = ltrim1(s: name, chars: "?@_");
1283 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1284 break;
1285 case IMPORT_NAME_EXPORTAS:
1286 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1287 break;
1288 }
1289
1290 this->hdr = hdr;
1291 externalName = extName;
1292
1293 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1294
1295 if (!symtab.isEC()) {
1296 impSym = symtab.addImportData(n: impName, f: this, location);
1297 } else {
1298 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1299 // which holds addresses that are guaranteed to be callable directly from
1300 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1301 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1302 // data imports, the naming is reversed.
1303 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1304 if (isCode) {
1305 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1306 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1307 } else {
1308 impSym = symtab.addImportData(n: impName, f: this, location);
1309 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1310 }
1311 if (!impECSym)
1312 return;
1313
1314 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1315 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1316 if (!auxImpCopySym)
1317 return;
1318 }
1319 // If this was a duplicate, we logged an error but may continue;
1320 // in this case, impSym is nullptr.
1321 if (!impSym)
1322 return;
1323
1324 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1325 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1326
1327 // If type is function, we need to create a thunk which jump to an
1328 // address pointed by the __imp_ symbol. (This allows you to call
1329 // DLL functions just like regular non-DLL functions.)
1330 if (isCode) {
1331 if (!symtab.isEC()) {
1332 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1333 } else {
1334 thunkSym = symtab.addImportThunk(
1335 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1336
1337 if (std::optional<std::string> mangledName =
1338 getArm64ECMangledFunctionName(Name: name)) {
1339 StringRef auxThunkName = saver().save(S: *mangledName);
1340 auxThunkSym = symtab.addImportThunk(
1341 name: auxThunkName, s: impECSym,
1342 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1343 }
1344
1345 StringRef impChkName = saver().save(S: "__impchk_" + name);
1346 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1347 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1348 symtab.ctx.driver.pullArm64ECIcallHelper();
1349 }
1350 }
1351}
1352
1353BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1354 std::unique_ptr<lto::InputFile> &o, bool lazy)
1355 : InputFile(symtab, BitcodeKind, mb, lazy) {
1356 obj.swap(u&: o);
1357}
1358
1359BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1360 StringRef archiveName,
1361 uint64_t offsetInArchive, bool lazy) {
1362 std::string path = mb.getBufferIdentifier().str();
1363 if (ctx.config.thinLTOIndexOnly)
1364 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1365 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1366 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1367
1368 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1369 // name. If two archives define two members with the same name, this
1370 // causes a collision which result in only one of the objects being taken
1371 // into consideration at LTO time (which very likely causes undefined
1372 // symbols later in the link stage). So we append file offset to make
1373 // filename unique.
1374 MemoryBufferRef mbref(mb.getBuffer(),
1375 saver().save(S: archiveName.empty()
1376 ? path
1377 : archiveName +
1378 sys::path::filename(path) +
1379 utostr(X: offsetInArchive)));
1380
1381 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1382 obj->setArchivePathAndName(Path: archiveName, Name: mb.getBufferIdentifier());
1383 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1384 args&: lazy);
1385}
1386
1387BitcodeFile::~BitcodeFile() = default;
1388
1389void BitcodeFile::parse() {
1390 llvm::StringSaver &saver = lld::saver();
1391
1392 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1393 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1394 // FIXME: Check nodeduplicate
1395 comdat[i] =
1396 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1397 Triple tt(obj->getTargetTriple());
1398 RTLIB::RuntimeLibcallsInfo libcalls(tt);
1399 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1400 StringRef symName = saver.save(S: objSym.getName());
1401 int comdatIndex = objSym.getComdatIndex();
1402 Symbol *sym;
1403 SectionChunk *fakeSC = nullptr;
1404 if (objSym.isExecutable())
1405 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1406 else
1407 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1408 if (objSym.isUndefined()) {
1409 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1410 if (objSym.isWeak())
1411 sym->deferUndefined = true;
1412 // If one LTO object file references (i.e. has an undefined reference to)
1413 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1414 // as unprefixed but with a dllimport attribute instead, and doesn't
1415 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1416 //
1417 // For such cases, mark the symbol as used in a regular object (i.e. the
1418 // symbol must be retained) so that the linker can associate the
1419 // references in the end. If the symbol is defined in an import library
1420 // or in a regular object file, this has no effect, but if it is defined
1421 // in another LTO object file, this makes sure it is kept, to fulfill
1422 // the reference when linking the output of the LTO compilation.
1423 if (symName.starts_with(Prefix: "__imp_"))
1424 sym->isUsedInRegularObj = true;
1425 } else if (objSym.isCommon()) {
1426 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1427 } else if (objSym.isWeak() && objSym.isIndirect()) {
1428 // Weak external.
1429 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1430 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1431 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1432 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1433 } else if (comdatIndex != -1) {
1434 if (symName == obj->getComdatTable()[comdatIndex].first) {
1435 sym = comdat[comdatIndex].first;
1436 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1437 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1438 } else if (comdat[comdatIndex].second) {
1439 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1440 } else {
1441 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1442 }
1443 } else {
1444 sym =
1445 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1446 }
1447 symbols.push_back(x: sym);
1448 if (objSym.isUsed() || objSym.isLibcall(Libcalls: libcalls))
1449 symtab.ctx.config.gcroot.push_back(x: sym);
1450 }
1451 directives = saver.save(S: obj->getCOFFLinkerOpts());
1452}
1453
1454void BitcodeFile::parseLazy() {
1455 for (const lto::InputFile::Symbol &sym : obj->symbols())
1456 if (!sym.isUndefined()) {
1457 symtab.addLazyObject(f: this, n: sym.getName());
1458 if (!lazy)
1459 return;
1460 }
1461}
1462
1463MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1464 Triple t(obj->getTargetTriple());
1465 switch (t.getArch()) {
1466 case Triple::x86_64:
1467 return AMD64;
1468 case Triple::x86:
1469 return I386;
1470 case Triple::arm:
1471 case Triple::thumb:
1472 return ARMNT;
1473 case Triple::aarch64:
1474 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1475 default:
1476 return IMAGE_FILE_MACHINE_UNKNOWN;
1477 }
1478}
1479
1480std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1481 StringRef repl) {
1482 if (path.consume_back(Suffix: suffix))
1483 return (path + repl).str();
1484 return std::string(path);
1485}
1486
1487static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1488 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1489 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1490 if (rva >= sec->VirtualAddress &&
1491 rva <= sec->VirtualAddress + sec->VirtualSize) {
1492 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1493 }
1494 }
1495 return false;
1496}
1497
1498void DLLFile::parse() {
1499 // Parse a memory buffer as a PE-COFF executable.
1500 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1501
1502 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1503 bin.release();
1504 coffObj.reset(p: obj);
1505 } else {
1506 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1507 return;
1508 }
1509
1510 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1511 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1512 return;
1513 }
1514
1515 for (const auto &exp : coffObj->export_directories()) {
1516 StringRef dllName, symbolName;
1517 uint32_t exportRVA;
1518 checkError(e: exp.getDllName(Result&: dllName));
1519 checkError(e: exp.getSymbolName(Result&: symbolName));
1520 checkError(e: exp.getExportRVA(Result&: exportRVA));
1521
1522 if (symbolName.empty())
1523 continue;
1524
1525 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1526
1527 Symbol *s = make<Symbol>();
1528 s->dllName = dllName;
1529 s->symbolName = symbolName;
1530 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1531 s->nameType = ImportNameType::IMPORT_NAME;
1532
1533 if (coffObj->getMachine() == I386) {
1534 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1535 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1536 }
1537
1538 StringRef impName = saver().save(S: "__imp_" + symbolName);
1539 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1540 if (code)
1541 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1542 if (symtab.isEC()) {
1543 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1544 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1545
1546 if (code) {
1547 std::optional<std::string> mangledName =
1548 getArm64ECMangledFunctionName(Name: symbolName);
1549 if (mangledName)
1550 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1551 }
1552 }
1553 }
1554}
1555
1556MachineTypes DLLFile::getMachineType() const {
1557 if (coffObj)
1558 return static_cast<MachineTypes>(coffObj->getMachine());
1559 return IMAGE_FILE_MACHINE_UNKNOWN;
1560}
1561
1562void DLLFile::makeImport(DLLFile::Symbol *s) {
1563 if (!seen.insert(key: s->symbolName).second)
1564 return;
1565
1566 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1567 size_t size = sizeof(coff_import_header) + impSize;
1568 char *buf = bAlloc().Allocate<char>(Num: size);
1569 memset(s: buf, c: 0, n: size);
1570 char *p = buf;
1571 auto *imp = reinterpret_cast<coff_import_header *>(p);
1572 p += sizeof(*imp);
1573 imp->Sig2 = 0xFFFF;
1574 imp->Machine = coffObj->getMachine();
1575 imp->SizeOfData = impSize;
1576 imp->OrdinalHint = 0; // Only linking by name
1577 imp->TypeInfo = (s->nameType << 2) | s->importType;
1578
1579 // Write symbol name and DLL name.
1580 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1581 p += s->symbolName.size() + 1;
1582 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1583 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1584 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1585 symtab.ctx.driver.addFile(file: impFile);
1586}
1587