1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/IR/RuntimeLibcalls.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/Object/Binary.h"
31#include "llvm/Object/COFF.h"
32#include "llvm/Object/COFFImportFile.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/Endian.h"
35#include "llvm/Support/Error.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/Path.h"
38#include "llvm/TargetParser/Triple.h"
39#include <cstring>
40#include <optional>
41#include <utility>
42
43using namespace llvm;
44using namespace llvm::COFF;
45using namespace llvm::codeview;
46using namespace llvm::object;
47using namespace llvm::support::endian;
48using namespace lld;
49using namespace lld::coff;
50
51using llvm::Triple;
52using llvm::support::ulittle32_t;
53
54// Returns the last element of a path, which is supposed to be a filename.
55static StringRef getBasename(StringRef path) {
56 return sys::path::filename(path, style: sys::path::Style::windows);
57}
58
59// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
60std::string lld::toString(const coff::InputFile *file) {
61 if (!file)
62 return "<internal>";
63 if (file->parentName.empty())
64 return std::string(file->getName());
65
66 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
67 ")")
68 .str();
69}
70
71const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
72 const InputFile *f) {
73 return s << toString(file: f);
74}
75
76/// Checks that Source is compatible with being a weak alias to Target.
77/// If Source is Undefined and has no weak alias set, makes it a weak
78/// alias to Target.
79static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
80 Symbol *source, Symbol *target,
81 bool isAntiDep) {
82 if (auto *u = dyn_cast<Undefined>(Val: source)) {
83 if (u->weakAlias && u->weakAlias != target) {
84 // Ignore duplicated anti-dependency symbols.
85 if (isAntiDep)
86 return;
87 if (!u->isAntiDep) {
88 // Weak aliases as produced by GCC are named in the form
89 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
90 // of another symbol emitted near the weak symbol.
91 // Just use the definition from the first object file that defined
92 // this weak symbol.
93 if (symtab.ctx.config.allowDuplicateWeak)
94 return;
95 symtab.reportDuplicate(existing: source, newFile: f);
96 }
97 }
98 u->setWeakAlias(sym: target, antiDep: isAntiDep);
99 }
100}
101
102static bool ignoredSymbolName(StringRef name) {
103 return name == "@feat.00" || name == "@comp.id";
104}
105
106static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
107 if (sym.isBigObj()) {
108 auto *copy = make<coff_symbol32>(
109 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
110 return reinterpret_cast<coff_symbol_generic *>(copy);
111 } else {
112 auto *copy = make<coff_symbol16>(
113 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
114 return reinterpret_cast<coff_symbol_generic *>(copy);
115 }
116}
117
118// Skip importing DllMain thunks from import libraries.
119static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
120 const Archive::Symbol &sym, bool &skipDllMain) {
121 const Archive::Child &c =
122 CHECK(sym.getMember(), file->getFileName() +
123 ": could not get the member for symbol " +
124 toCOFFString(ctx, sym));
125 MemoryBufferRef mb =
126 CHECK(c.getMemoryBufferRef(),
127 file->getFileName() +
128 ": could not get the buffer for a child buffer of the archive");
129 if (identify_magic(magic: mb.getBuffer()) == file_magic::coff_import_library) {
130 if (ctx.config.warnImportedDllMain) {
131 // We won't place DllMain symbols in the symbol table if they are
132 // coming from a import library. This message can be ignored with the flag
133 // '/ignore:importeddllmain'
134 Warn(ctx)
135 << file->getFileName()
136 << ": skipping imported DllMain symbol [importeddllmain]\nNOTE: this "
137 "might be a mistake when the DLL/library was produced.";
138 }
139 skipDllMain = true;
140 return true;
141 }
142 return false;
143}
144
145ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
146 : InputFile(ctx.symtab, ArchiveKind, m) {}
147
148void ArchiveFile::parse() {
149 COFFLinkerContext &ctx = symtab.ctx;
150 SymbolTable *archiveSymtab = &symtab;
151
152 // Parse a MemoryBufferRef as an archive file.
153 file = CHECK(Archive::create(mb), this);
154
155 // Try to read symbols from ECSYMBOLS section on ARM64EC.
156 if (ctx.symtab.isEC()) {
157 iterator_range<Archive::symbol_iterator> symbols =
158 CHECK(file->ec_symbols(), this);
159 if (!symbols.empty()) {
160 for (const Archive::Symbol &sym : symbols)
161 ctx.symtab.addLazyArchive(f: this, sym);
162
163 // Read both EC and native symbols on ARM64X.
164 archiveSymtab = &*ctx.hybridSymtab;
165 } else {
166 // If the ECSYMBOLS section is missing in the archive, the archive could
167 // be either a native-only ARM64 or x86_64 archive. Check the machine type
168 // of the object containing a symbol to determine which symbol table to
169 // use.
170 Archive::symbol_iterator sym = file->symbol_begin();
171 if (sym != file->symbol_end()) {
172 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
173 Archive::Child child =
174 CHECK(sym->getMember(),
175 file->getFileName() +
176 ": could not get the buffer for a child of the archive");
177 MemoryBufferRef mb = CHECK(
178 child.getMemoryBufferRef(),
179 file->getFileName() +
180 ": could not get the buffer for a child buffer of the archive");
181 switch (identify_magic(magic: mb.getBuffer())) {
182 case file_magic::coff_object: {
183 std::unique_ptr<COFFObjectFile> obj =
184 CHECK(COFFObjectFile::create(mb),
185 check(child.getName()) + ":" + ": not a valid COFF file");
186 machine = MachineTypes(obj->getMachine());
187 break;
188 }
189 case file_magic::coff_import_library:
190 machine = MachineTypes(COFFImportFile(mb).getMachine());
191 break;
192 case file_magic::bitcode: {
193 std::unique_ptr<lto::InputFile> obj =
194 check(e: lto::InputFile::create(Object: mb));
195 machine = BitcodeFile::getMachineType(obj: obj.get());
196 break;
197 }
198 default:
199 break;
200 }
201 archiveSymtab = &ctx.getSymtab(machine);
202 }
203 }
204 }
205
206 bool skipDllMain = false;
207 StringRef mangledDllMain, impMangledDllMain;
208
209 // The calls below will fail if we haven't set the machine type yet. Instead
210 // of failing, it is preferable to skip this "imported DllMain" check if we
211 // don't know the machine type at this point.
212 if (!file->isEmpty() && ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN) {
213 mangledDllMain = archiveSymtab->mangle(sym: "DllMain");
214 impMangledDllMain = uniqueSaver().save(S: "__imp_" + mangledDllMain);
215 }
216
217 // Read the symbol table to construct Lazy objects.
218 for (const Archive::Symbol &sym : file->symbols()) {
219 // If an import library provides the DllMain symbol, skip importing it, as
220 // we should be using our own DllMain, not another DLL's DllMain.
221 if (!mangledDllMain.empty() && (sym.getName() == mangledDllMain ||
222 sym.getName() == impMangledDllMain)) {
223 if (skipDllMain || fixupDllMain(ctx, file: file.get(), sym, skipDllMain))
224 continue;
225 }
226 archiveSymtab->addLazyArchive(f: this, sym);
227 }
228}
229
230// Returns a buffer pointing to a member file containing a given symbol.
231void ArchiveFile::addMember(const Archive::Symbol &sym) {
232 const Archive::Child &c =
233 CHECK(sym.getMember(), "could not get the member for symbol " +
234 toCOFFString(symtab.ctx, sym));
235
236 // Return an empty buffer if we have already returned the same buffer.
237 // FIXME: Remove this once we resolve all defineds before all undefineds in
238 // ObjFile::initializeSymbols().
239 if (!seen.insert(V: c.getChildOffset()).second)
240 return;
241
242 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
243}
244
245std::vector<MemoryBufferRef>
246lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
247 std::vector<MemoryBufferRef> v;
248 Error err = Error::success();
249
250 // Thin archives refer to .o files, so --reproduces needs the .o files too.
251 bool addToTar = file->isThin() && ctx.driver.tar;
252
253 for (const Archive::Child &c : file->children(Err&: err)) {
254 MemoryBufferRef mbref =
255 CHECK(c.getMemoryBufferRef(),
256 file->getFileName() +
257 ": could not get the buffer for a child of the archive");
258 if (addToTar) {
259 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
260 Data: mbref.getBuffer());
261 }
262 v.push_back(x: mbref);
263 }
264 if (err)
265 Fatal(ctx) << file->getFileName()
266 << ": Archive::children failed: " << toString(E: std::move(err));
267 return v;
268}
269
270ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
271 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
272 coffObj(coffObj) {}
273
274ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
275 // Parse a memory buffer as a COFF file.
276 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
277 if (!bin)
278 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
279
280 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
281 if (!obj)
282 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
283
284 bin->release();
285 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
286 args&: lazy);
287}
288
289void ObjFile::parseLazy() {
290 // Native object file.
291 uint32_t numSymbols = coffObj->getNumberOfSymbols();
292 for (uint32_t i = 0; i < numSymbols; ++i) {
293 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
294 if (coffSym.isUndefined() || !coffSym.isExternal() ||
295 coffSym.isWeakExternal())
296 continue;
297 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
298 if (coffSym.isAbsolute() && ignoredSymbolName(name))
299 continue;
300 symtab.addLazyObject(f: this, n: name);
301 if (!lazy)
302 return;
303 i += coffSym.getNumberOfAuxSymbols();
304 }
305}
306
307struct ECMapEntry {
308 ulittle32_t src;
309 ulittle32_t dst;
310 ulittle32_t type;
311};
312
313void ObjFile::initializeECThunks() {
314 for (SectionChunk *chunk : hybmpChunks) {
315 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
316 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
317 << chunk->getContents().size();
318 continue;
319 }
320
321 const uint8_t *end =
322 chunk->getContents().data() + chunk->getContents().size();
323 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
324 iter += sizeof(ECMapEntry)) {
325 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
326 switch (entry->type) {
327 case Arm64ECThunkType::Entry:
328 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
329 break;
330 case Arm64ECThunkType::Exit:
331 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
332 break;
333 case Arm64ECThunkType::GuestExit:
334 break;
335 default:
336 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
337 }
338 }
339 }
340}
341
342void ObjFile::parse() {
343 // Read section and symbol tables.
344 initializeChunks();
345 initializeSymbols();
346 initializeFlags();
347 initializeDependencies();
348 initializeECThunks();
349}
350
351const coff_section *ObjFile::getSection(uint32_t i) {
352 auto sec = coffObj->getSection(index: i);
353 if (!sec)
354 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
355 return *sec;
356}
357
358// We set SectionChunk pointers in the SparseChunks vector to this value
359// temporarily to mark comdat sections as having an unknown resolution. As we
360// walk the object file's symbol table, once we visit either a leader symbol or
361// an associative section definition together with the parent comdat's leader,
362// we set the pointer to either nullptr (to mark the section as discarded) or a
363// valid SectionChunk for that section.
364static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
365
366void ObjFile::initializeChunks() {
367 uint32_t numSections = coffObj->getNumberOfSections();
368 sparseChunks.resize(new_size: numSections + 1);
369 for (uint32_t i = 1; i < numSections + 1; ++i) {
370 const coff_section *sec = getSection(i);
371 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
372 sparseChunks[i] = pendingComdat;
373 else
374 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
375 }
376}
377
378SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
379 const coff_aux_section_definition *def,
380 StringRef leaderName) {
381 const coff_section *sec = getSection(i: sectionNumber);
382
383 StringRef name;
384 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
385 name = *e;
386 else
387 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
388 << e.takeError();
389
390 if (name == ".drectve") {
391 ArrayRef<uint8_t> data;
392 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
393 directives = StringRef((const char *)data.data(), data.size());
394 return nullptr;
395 }
396
397 if (name == ".llvm_addrsig") {
398 addrsigSec = sec;
399 return nullptr;
400 }
401
402 if (name == ".llvm.call-graph-profile") {
403 callgraphSec = sec;
404 return nullptr;
405 }
406
407 // Those sections are generated by -fembed-bitcode and do not need to be kept
408 // in executable files.
409 if (name == ".llvmbc" || name == ".llvmcmd")
410 return nullptr;
411
412 // Object files may have DWARF debug info or MS CodeView debug info
413 // (or both).
414 //
415 // DWARF sections don't need any special handling from the perspective
416 // of the linker; they are just a data section containing relocations.
417 // We can just link them to complete debug info.
418 //
419 // CodeView needs linker support. We need to interpret debug info,
420 // and then write it to a separate .pdb file.
421
422 // Ignore DWARF debug info unless requested to be included.
423 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
424 return nullptr;
425
426 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
427 return nullptr;
428 SectionChunk *c;
429 if (isArm64EC(Machine: getMachineType()))
430 c = make<SectionChunkEC>(args: this, args&: sec);
431 else
432 c = make<SectionChunk>(args: this, args&: sec);
433 if (def)
434 c->checksum = def->CheckSum;
435
436 // CodeView sections are stored to a different vector because they are not
437 // linked in the regular manner.
438 if (c->isCodeView())
439 debugChunks.push_back(x: c);
440 else if (name == ".gfids$y")
441 guardFidChunks.push_back(x: c);
442 else if (name == ".giats$y")
443 guardIATChunks.push_back(x: c);
444 else if (name == ".gljmp$y")
445 guardLJmpChunks.push_back(x: c);
446 else if (name == ".gehcont$y")
447 guardEHContChunks.push_back(x: c);
448 else if (name == ".sxdata")
449 sxDataChunks.push_back(x: c);
450 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
451 hybmpChunks.push_back(x: c);
452 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
453 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
454 // COFF sections that look like string literal sections (i.e. no
455 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
456 // for string literals) are subject to string tail merging.
457 MergeChunk::addSection(ctx&: symtab.ctx, c);
458 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
459 resourceChunks.push_back(x: c);
460 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
461 chunks.push_back(x: c);
462
463 return c;
464}
465
466void ObjFile::includeResourceChunks() {
467 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
468}
469
470void ObjFile::readAssociativeDefinition(
471 COFFSymbolRef sym, const coff_aux_section_definition *def) {
472 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
473}
474
475void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
476 const coff_aux_section_definition *def,
477 uint32_t parentIndex) {
478 SectionChunk *parent = sparseChunks[parentIndex];
479 int32_t sectionNumber = sym.getSectionNumber();
480
481 auto diag = [&]() {
482 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
483
484 StringRef parentName;
485 const coff_section *parentSec = getSection(i: parentIndex);
486 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
487 parentName = *e;
488 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
489 << " (sec " << sectionNumber
490 << ") has invalid reference to section " << parentName
491 << " (sec " << parentIndex << ")";
492 };
493
494 if (parent == pendingComdat) {
495 // This can happen if an associative comdat refers to another associative
496 // comdat that appears after it (invalid per COFF spec) or to a section
497 // without any symbols.
498 diag();
499 return;
500 }
501
502 // Check whether the parent is prevailing. If it is, so are we, and we read
503 // the section; otherwise mark it as discarded.
504 if (parent) {
505 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
506 sparseChunks[sectionNumber] = c;
507 if (c) {
508 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
509 parent->addAssociative(child: c);
510 }
511 } else {
512 sparseChunks[sectionNumber] = nullptr;
513 }
514}
515
516void ObjFile::recordPrevailingSymbolForMingw(
517 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
518 // For comdat symbols in executable sections, where this is the copy
519 // of the section chunk we actually include instead of discarding it,
520 // add the symbol to a map to allow using it for implicitly
521 // associating .[px]data$<func> sections to it.
522 // Use the suffix from the .text$<func> instead of the leader symbol
523 // name, for cases where the names differ (i386 mangling/decorations,
524 // cases where the leader is a weak symbol named .weak.func.default*).
525 int32_t sectionNumber = sym.getSectionNumber();
526 SectionChunk *sc = sparseChunks[sectionNumber];
527 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
528 StringRef name = sc->getSectionName().split(Separator: '$').second;
529 prevailingSectionMap[name] = sectionNumber;
530 }
531}
532
533void ObjFile::maybeAssociateSEHForMingw(
534 COFFSymbolRef sym, const coff_aux_section_definition *def,
535 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
536 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
537 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
538 name.consume_front(Prefix: ".eh_frame$")) {
539 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
540 // associative to the symbol <func>.
541 auto parentSym = prevailingSectionMap.find(Val: name);
542 if (parentSym != prevailingSectionMap.end())
543 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
544 }
545}
546
547Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
548 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
549 if (sym.isExternal()) {
550 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
551 if (sc)
552 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
553 sectionOffset: sym.getValue());
554 // For MinGW symbols named .weak.* that point to a discarded section,
555 // don't create an Undefined symbol. If nothing ever refers to the symbol,
556 // everything should be fine. If something actually refers to the symbol
557 // (e.g. the undefined weak alias), linking will fail due to undefined
558 // references at the end.
559 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
560 return nullptr;
561 return symtab.addUndefined(name, f: this, overrideLazy: false);
562 }
563 if (sc) {
564 const coff_symbol_generic *symGen = sym.getGeneric();
565 if (sym.isSection()) {
566 auto *customSymGen = cloneSymbol(sym);
567 customSymGen->Value = 0;
568 symGen = customSymGen;
569 }
570 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
571 /*IsExternal*/ args: false, args&: symGen, args&: sc);
572 }
573 return nullptr;
574}
575
576void ObjFile::initializeSymbols() {
577 uint32_t numSymbols = coffObj->getNumberOfSymbols();
578 symbols.resize(new_size: numSymbols);
579
580 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
581 weakAliases;
582 std::vector<uint32_t> pendingIndexes;
583 pendingIndexes.reserve(n: numSymbols);
584
585 DenseMap<StringRef, uint32_t> prevailingSectionMap;
586 std::vector<const coff_aux_section_definition *> comdatDefs(
587 coffObj->getNumberOfSections() + 1);
588 COFFLinkerContext &ctx = symtab.ctx;
589
590 for (uint32_t i = 0; i < numSymbols; ++i) {
591 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
592 bool prevailingComdat;
593 if (coffSym.isUndefined()) {
594 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
595 } else if (coffSym.isWeakExternal()) {
596 auto aux = coffSym.getAux<coff_aux_weak_external>();
597 bool overrideLazy = true;
598
599 // On ARM64EC, external function calls emit a pair of weak-dependency
600 // aliases: func to #func and #func to the func guess exit thunk
601 // (instead of a single undefined func symbol, which would be emitted on
602 // other targets). Allow such aliases to be overridden by lazy archive
603 // symbols, just as we would for undefined symbols.
604 if (isArm64EC(Machine: getMachineType()) &&
605 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
606 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
607 if (!targetSym.isAnyUndefined()) {
608 // If the target is defined, it may be either a guess exit thunk or
609 // the actual implementation. If it's the latter, consider the alias
610 // to be part of the implementation and override potential lazy
611 // archive symbols.
612 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
613 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
614 std::optional<std::string> mangledName =
615 getArm64ECMangledFunctionName(Name: name);
616 overrideLazy = mangledName == targetName;
617 } else {
618 overrideLazy = false;
619 }
620 }
621 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
622 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
623 } else if (std::optional<Symbol *> optSym =
624 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
625 symbols[i] = *optSym;
626 if (ctx.config.mingw && prevailingComdat)
627 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
628 } else {
629 // createDefined() returns std::nullopt if a symbol belongs to a section
630 // that was pending at the point when the symbol was read. This can happen
631 // in two cases:
632 // 1) section definition symbol for a comdat leader;
633 // 2) symbol belongs to a comdat section associated with another section.
634 // In both of these cases, we can expect the section to be resolved by
635 // the time we finish visiting the remaining symbols in the symbol
636 // table. So we postpone the handling of this symbol until that time.
637 pendingIndexes.push_back(x: i);
638 }
639 i += coffSym.getNumberOfAuxSymbols();
640 }
641
642 for (uint32_t i : pendingIndexes) {
643 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
644 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
645 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
646 readAssociativeDefinition(sym, def);
647 else if (ctx.config.mingw)
648 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
649 }
650 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
651 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
652 Log(ctx) << "comdat section " << name
653 << " without leader and unassociated, discarding";
654 continue;
655 }
656 symbols[i] = createRegular(sym);
657 }
658
659 for (auto &kv : weakAliases) {
660 Symbol *sym = kv.first;
661 const coff_aux_weak_external *aux = kv.second;
662 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
663 isAntiDep: aux->Characteristics ==
664 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
665 }
666
667 // Free the memory used by sparseChunks now that symbol loading is finished.
668 decltype(sparseChunks)().swap(x&: sparseChunks);
669}
670
671Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
672 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
673 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
674
675 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
676 // target.
677 if (symtab.isEC() && getMachineType() == AMD64) {
678 auto u = dyn_cast<Undefined>(Val: s);
679 if (u && !u->weakAlias) {
680 if (std::optional<std::string> mangledName =
681 getArm64ECMangledFunctionName(Name: name)) {
682 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
683 /*overrideLazy=*/false);
684 u->setWeakAlias(sym: m, /*antiDep=*/true);
685 }
686 }
687 }
688 return s;
689}
690
691static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
692 int32_t section) {
693 uint32_t numSymbols = obj->getNumberOfSymbols();
694 for (uint32_t i = 0; i < numSymbols; ++i) {
695 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
696 if (sym.getSectionNumber() != section)
697 continue;
698 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
699 return def;
700 }
701 return nullptr;
702}
703
704void ObjFile::handleComdatSelection(
705 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
706 DefinedRegular *leader,
707 const llvm::object::coff_aux_section_definition *def) {
708 if (prevailing)
709 return;
710 // There's already an existing comdat for this symbol: `Leader`.
711 // Use the comdats's selection field to determine if the new
712 // symbol in `Sym` should be discarded, produce a duplicate symbol
713 // error, etc.
714
715 SectionChunk *leaderChunk = leader->getChunk();
716 COMDATType leaderSelection = leaderChunk->selection;
717 COFFLinkerContext &ctx = symtab.ctx;
718
719 assert(leader->data && "Comdat leader without SectionChunk?");
720 if (isa<BitcodeFile>(Val: leader->file)) {
721 // If the leader is only a LTO symbol, we don't know e.g. its final size
722 // yet, so we can't do the full strict comdat selection checking yet.
723 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
724 }
725
726 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
727 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
728 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
729 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
730 // cl.exe picks "any" for vftables when building with /GR- and
731 // "largest" when building with /GR. To be able to link object files
732 // compiled with each flag, "any" and "largest" are merged as "largest".
733 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
734 }
735
736 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
737 // Clang on the other hand picks "any". To be able to link two object files
738 // with a __declspec(selectany) declaration, one compiled with gcc and the
739 // other with clang, we merge them as proper "same size as"
740 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
741 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
742 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
743 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
744 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
745 }
746
747 // Other than that, comdat selections must match. This is a bit more
748 // strict than link.exe which allows merging "any" and "largest" if "any"
749 // is the first symbol the linker sees, and it allows merging "largest"
750 // with everything (!) if "largest" is the first symbol the linker sees.
751 // Making this symmetric independent of which selection is seen first
752 // seems better though.
753 // (This behavior matches ModuleLinker::getComdatResult().)
754 if (selection != leaderSelection) {
755 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
756 << ": " << (int)leaderSelection << " in " << leader->getFile()
757 << " and " << (int)selection << " in " << this;
758 symtab.reportDuplicate(existing: leader, newFile: this);
759 return;
760 }
761
762 switch (selection) {
763 case IMAGE_COMDAT_SELECT_NODUPLICATES:
764 symtab.reportDuplicate(existing: leader, newFile: this);
765 break;
766
767 case IMAGE_COMDAT_SELECT_ANY:
768 // Nothing to do.
769 break;
770
771 case IMAGE_COMDAT_SELECT_SAME_SIZE:
772 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
773 if (!ctx.config.mingw) {
774 symtab.reportDuplicate(existing: leader, newFile: this);
775 } else {
776 const coff_aux_section_definition *leaderDef = nullptr;
777 if (leaderChunk->file)
778 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
779 section: leaderChunk->getSectionNumber());
780 if (!leaderDef || leaderDef->Length != def->Length)
781 symtab.reportDuplicate(existing: leader, newFile: this);
782 }
783 }
784 break;
785
786 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
787 SectionChunk newChunk(this, getSection(sym));
788 // link.exe only compares section contents here and doesn't complain
789 // if the two comdat sections have e.g. different alignment.
790 // Match that.
791 if (leaderChunk->getContents() != newChunk.getContents())
792 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
793 break;
794 }
795
796 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
797 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
798 // (This means lld-link doesn't produce duplicate symbol errors for
799 // associative comdats while link.exe does, but associate comdats
800 // are never extern in practice.)
801 llvm_unreachable("createDefined not called for associative comdats");
802
803 case IMAGE_COMDAT_SELECT_LARGEST:
804 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
805 // Replace the existing comdat symbol with the new one.
806 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
807 // FIXME: This is incorrect: With /opt:noref, the previous sections
808 // make it into the final executable as well. Correct handling would
809 // be to undo reading of the whole old section that's being replaced,
810 // or doing one pass that determines what the final largest comdat
811 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
812 // only the largest one.
813 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
814 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
815 arg: nullptr);
816 prevailing = true;
817 }
818 break;
819
820 case IMAGE_COMDAT_SELECT_NEWEST:
821 llvm_unreachable("should have been rejected earlier");
822 }
823}
824
825std::optional<Symbol *> ObjFile::createDefined(
826 COFFSymbolRef sym,
827 std::vector<const coff_aux_section_definition *> &comdatDefs,
828 bool &prevailing) {
829 prevailing = false;
830 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
831
832 if (sym.isCommon()) {
833 auto *c = make<CommonChunk>(args&: sym);
834 chunks.push_back(x: c);
835 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
836 c);
837 }
838
839 COFFLinkerContext &ctx = symtab.ctx;
840 if (sym.isAbsolute()) {
841 StringRef name = getName();
842
843 if (name == "@feat.00")
844 feat00Flags = sym.getValue();
845 // Skip special symbols.
846 if (ignoredSymbolName(name))
847 return nullptr;
848
849 if (sym.isExternal())
850 return symtab.addAbsolute(n: name, s: sym);
851 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
852 }
853
854 int32_t sectionNumber = sym.getSectionNumber();
855 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
856 return nullptr;
857
858 if (sym.isEmptySectionDeclaration()) {
859 // As there is no coff_section in the object file for these, make a
860 // new virtual one, with everything zeroed out (i.e. an empty section),
861 // with only the name and characteristics set.
862 StringRef name = getName();
863 auto *hdr = make<coff_section>();
864 memset(s: hdr, c: 0, n: sizeof(*hdr));
865 strncpy(dest: hdr->Name, src: name.data(),
866 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
867 // The Value field in a section symbol may contain the characteristics,
868 // or it may be zero, where we make something up (that matches what is
869 // used in .idata sections in the regular object files in import libraries).
870 if (sym.getValue())
871 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
872 else
873 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
874 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
875 IMAGE_SCN_ALIGN_4BYTES;
876 auto *sc = make<SectionChunk>(args: this, args&: hdr);
877 chunks.push_back(x: sc);
878
879 auto *symGen = cloneSymbol(sym);
880 // Ignore the Value offset of these symbols, as it may be a bitmask.
881 symGen->Value = 0;
882 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
883 /*isExternal=*/args: false, args&: symGen, args&: sc);
884 }
885
886 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
887 Fatal(ctx) << toString(file: this) << ": " << getName()
888 << " should not refer to special section "
889 << Twine(sectionNumber);
890
891 if ((uint32_t)sectionNumber >= sparseChunks.size())
892 Fatal(ctx) << toString(file: this) << ": " << getName()
893 << " should not refer to non-existent section "
894 << Twine(sectionNumber);
895
896 // Comdat handling.
897 // A comdat symbol consists of two symbol table entries.
898 // The first symbol entry has the name of the section (e.g. .text), fixed
899 // values for the other fields, and one auxiliary record.
900 // The second symbol entry has the name of the comdat symbol, called the
901 // "comdat leader".
902 // When this function is called for the first symbol entry of a comdat,
903 // it sets comdatDefs and returns std::nullopt, and when it's called for the
904 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
905
906 // Handle comdat leader.
907 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
908 comdatDefs[sectionNumber] = nullptr;
909 DefinedRegular *leader;
910
911 if (sym.isExternal()) {
912 std::tie(args&: leader, args&: prevailing) =
913 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
914 } else {
915 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
916 /*IsExternal*/ args: false, args: sym.getGeneric());
917 prevailing = true;
918 }
919
920 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
921 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
922 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
923 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
924 Fatal(ctx) << "unknown comdat type "
925 << std::to_string(val: (int)def->Selection) << " for " << getName()
926 << " in " << toString(file: this);
927 }
928 COMDATType selection = (COMDATType)def->Selection;
929
930 if (leader->isCOMDAT)
931 handleComdatSelection(sym, selection, prevailing, leader, def);
932
933 if (prevailing) {
934 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
935 sparseChunks[sectionNumber] = c;
936 if (!c)
937 return nullptr;
938 c->sym = cast<DefinedRegular>(Val: leader);
939 c->selection = selection;
940 cast<DefinedRegular>(Val: leader)->data = &c->repl;
941 } else {
942 sparseChunks[sectionNumber] = nullptr;
943 }
944 return leader;
945 }
946
947 // Prepare to handle the comdat leader symbol by setting the section's
948 // ComdatDefs pointer if we encounter a non-associative comdat.
949 if (sparseChunks[sectionNumber] == pendingComdat) {
950 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
951 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
952 comdatDefs[sectionNumber] = def;
953 }
954 return std::nullopt;
955 }
956
957 return createRegular(sym);
958}
959
960MachineTypes ObjFile::getMachineType() const {
961 return static_cast<MachineTypes>(coffObj->getMachine());
962}
963
964ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
965 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
966 return sec->consumeDebugMagic();
967 return {};
968}
969
970// OBJ files systematically store critical information in a .debug$S stream,
971// even if the TU was compiled with no debug info. At least two records are
972// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
973// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
974// currently used to initialize the hotPatchable member.
975void ObjFile::initializeFlags() {
976 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
977 if (data.empty())
978 return;
979
980 DebugSubsectionArray subsections;
981
982 BinaryStreamReader reader(data, llvm::endianness::little);
983 ExitOnError exitOnErr;
984 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
985
986 for (const DebugSubsectionRecord &ss : subsections) {
987 if (ss.kind() != DebugSubsectionKind::Symbols)
988 continue;
989
990 unsigned offset = 0;
991
992 // Only parse the first two records. We are only looking for S_OBJNAME
993 // and S_COMPILE3, and they usually appear at the beginning of the
994 // stream.
995 for (unsigned i = 0; i < 2; ++i) {
996 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
997 if (!sym) {
998 consumeError(Err: sym.takeError());
999 return;
1000 }
1001 if (sym->kind() == SymbolKind::S_COMPILE3) {
1002 auto cs =
1003 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
1004 hotPatchable =
1005 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
1006 }
1007 if (sym->kind() == SymbolKind::S_OBJNAME) {
1008 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
1009 Symbol: sym.get()));
1010 if (objName.Signature)
1011 pchSignature = objName.Signature;
1012 }
1013 offset += sym->length();
1014 }
1015 }
1016}
1017
1018// Depending on the compilation flags, OBJs can refer to external files,
1019// necessary to merge this OBJ into the final PDB. We currently support two
1020// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1021// And PDB type servers, when compiling with /Zi. This function extracts these
1022// dependencies and makes them available as a TpiSource interface (see
1023// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1024// output even with /Yc and /Yu and with /Zi.
1025void ObjFile::initializeDependencies() {
1026 COFFLinkerContext &ctx = symtab.ctx;
1027 if (!ctx.config.debug)
1028 return;
1029
1030 bool isPCH = false;
1031
1032 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
1033 if (!data.empty())
1034 isPCH = true;
1035 else
1036 data = getDebugSection(secName: ".debug$T");
1037
1038 // symbols but no types, make a plain, empty TpiSource anyway, because it
1039 // simplifies adding the symbols later.
1040 if (data.empty()) {
1041 if (!debugChunks.empty())
1042 debugTypesObj = makeTpiSource(ctx, f: this);
1043 return;
1044 }
1045
1046 // Get the first type record. It will indicate if this object uses a type
1047 // server (/Zi) or a PCH file (/Yu).
1048 CVTypeArray types;
1049 BinaryStreamReader reader(data, llvm::endianness::little);
1050 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
1051 CVTypeArray::Iterator firstType = types.begin();
1052 if (firstType == types.end())
1053 return;
1054
1055 // Remember the .debug$T or .debug$P section.
1056 debugTypes = data;
1057
1058 // This object file is a PCH file that others will depend on.
1059 if (isPCH) {
1060 debugTypesObj = makePrecompSource(ctx, file: this);
1061 return;
1062 }
1063
1064 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1065 if (firstType->kind() == LF_TYPESERVER2) {
1066 TypeServer2Record ts = cantFail(
1067 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1068 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1069 enqueuePdbFile(path: ts.getName(), fromFile: this);
1070 return;
1071 }
1072
1073 // This object was compiled with /Yu. It uses types from another object file
1074 // with a matching signature.
1075 if (firstType->kind() == LF_PRECOMP) {
1076 PrecompRecord precomp = cantFail(
1077 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1078 // We're better off trusting the LF_PRECOMP signature. In some cases the
1079 // S_OBJNAME record doesn't contain a valid PCH signature.
1080 if (precomp.Signature)
1081 pchSignature = precomp.Signature;
1082 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1083 // Drop the LF_PRECOMP record from the input stream.
1084 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1085 return;
1086 }
1087
1088 // This is a plain old object file.
1089 debugTypesObj = makeTpiSource(ctx, f: this);
1090}
1091
1092// The casing of the PDB path stamped in the OBJ can differ from the actual path
1093// on disk. With this, we ensure to always use lowercase as a key for the
1094// pdbInputFileInstances map, at least on Windows.
1095static std::string normalizePdbPath(StringRef path) {
1096#if defined(_WIN32)
1097 return path.lower();
1098#else // LINUX
1099 return std::string(path);
1100#endif
1101}
1102
1103// If existing, return the actual PDB path on disk.
1104static std::optional<std::string>
1105findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1106 // Ensure the file exists before anything else. In some cases, if the path
1107 // points to a removable device, Driver::enqueuePath() would fail with an
1108 // error (EAGAIN, "resource unavailable try again") which we want to skip
1109 // silently.
1110 if (llvm::sys::fs::exists(Path: pdbPath))
1111 return normalizePdbPath(path: pdbPath);
1112
1113 StringRef objPath = !dependentFile->parentName.empty()
1114 ? dependentFile->parentName
1115 : dependentFile->getName();
1116
1117 // Currently, type server PDBs are only created by MSVC cl, which only runs
1118 // on Windows, so we can assume type server paths are Windows style.
1119 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1120
1121 // Check if the PDB is in the same folder as the OBJ.
1122 SmallString<128> path;
1123 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1124 if (llvm::sys::fs::exists(Path: path))
1125 return normalizePdbPath(path);
1126
1127 // Check if the PDB is in the output folder.
1128 path.clear();
1129 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1130 if (llvm::sys::fs::exists(Path: path))
1131 return normalizePdbPath(path);
1132
1133 return std::nullopt;
1134}
1135
1136PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1137 : InputFile(ctx.symtab, PDBKind, m) {}
1138
1139PDBInputFile::~PDBInputFile() = default;
1140
1141PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1142 StringRef path,
1143 ObjFile *fromFile) {
1144 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1145 if (!p)
1146 return nullptr;
1147 auto it = ctx.pdbInputFileInstances.find(x: *p);
1148 if (it != ctx.pdbInputFileInstances.end())
1149 return it->second;
1150 return nullptr;
1151}
1152
1153void PDBInputFile::parse() {
1154 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1155
1156 std::unique_ptr<pdb::IPDBSession> thisSession;
1157 Error E = pdb::NativeSession::createFromPdb(
1158 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1159 if (E) {
1160 loadErrorStr.emplace(args: toString(E: std::move(E)));
1161 return; // fail silently at this point - the error will be handled later,
1162 // when merging the debug type stream
1163 }
1164
1165 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1166
1167 pdb::PDBFile &pdbFile = session->getPDBFile();
1168 auto expectedInfo = pdbFile.getPDBInfoStream();
1169 // All PDB Files should have an Info stream.
1170 if (!expectedInfo) {
1171 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1172 return;
1173 }
1174 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1175}
1176
1177// Used only for DWARF debug info, which is not common (except in MinGW
1178// environments). This returns an optional pair of file name and line
1179// number for where the variable was defined.
1180std::optional<std::pair<StringRef, uint32_t>>
1181ObjFile::getVariableLocation(StringRef var) {
1182 if (!dwarf) {
1183 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1184 if (!dwarf)
1185 return std::nullopt;
1186 }
1187 if (symtab.machine == I386)
1188 var.consume_front(Prefix: "_");
1189 std::optional<std::pair<std::string, unsigned>> ret =
1190 dwarf->getVariableLoc(name: var);
1191 if (!ret)
1192 return std::nullopt;
1193 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1194}
1195
1196// Used only for DWARF debug info, which is not common (except in MinGW
1197// environments).
1198std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1199 uint32_t sectionIndex) {
1200 if (!dwarf) {
1201 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1202 if (!dwarf)
1203 return std::nullopt;
1204 }
1205
1206 return dwarf->getDILineInfo(offset, sectionIndex);
1207}
1208
1209void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1210 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1211 if (!p)
1212 return;
1213 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1214 if (!it.second)
1215 return; // already scheduled for load
1216 symtab.ctx.driver.enqueuePDB(Path: *p);
1217}
1218
1219ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1220 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1221 live(!ctx.config.doGC) {}
1222
1223MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1224 uint16_t machine =
1225 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1226 return MachineTypes(machine);
1227}
1228
1229bool ImportFile::isSameImport(const ImportFile *other) const {
1230 if (!externalName.empty())
1231 return other->externalName == externalName;
1232 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1233}
1234
1235ImportThunkChunk *ImportFile::makeImportThunk() {
1236 switch (hdr->Machine) {
1237 case AMD64:
1238 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1239 case I386:
1240 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1241 case ARM64:
1242 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1243 case ARMNT:
1244 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1245 }
1246 llvm_unreachable("unknown machine type");
1247}
1248
1249void ImportFile::parse() {
1250 const auto *hdr =
1251 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1252
1253 // Check if the total size is valid.
1254 if (mb.getBufferSize() < sizeof(*hdr) ||
1255 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1256 Fatal(ctx&: symtab.ctx) << "broken import library";
1257
1258 // Read names and create an __imp_ symbol.
1259 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1260 auto split = buf.split(Separator: '\0');
1261 buf = split.second;
1262 StringRef name;
1263 if (isArm64EC(Machine: hdr->Machine)) {
1264 if (std::optional<std::string> demangledName =
1265 getArm64ECDemangledFunctionName(Name: split.first))
1266 name = saver().save(S: *demangledName);
1267 }
1268 if (name.empty())
1269 name = saver().save(S: split.first);
1270 StringRef impName = saver().save(S: "__imp_" + name);
1271 dllName = buf.split(Separator: '\0').first;
1272 StringRef extName;
1273 switch (hdr->getNameType()) {
1274 case IMPORT_ORDINAL:
1275 extName = "";
1276 break;
1277 case IMPORT_NAME:
1278 extName = name;
1279 break;
1280 case IMPORT_NAME_NOPREFIX:
1281 extName = ltrim1(s: name, chars: "?@_");
1282 break;
1283 case IMPORT_NAME_UNDECORATE:
1284 extName = ltrim1(s: name, chars: "?@_");
1285 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1286 break;
1287 case IMPORT_NAME_EXPORTAS:
1288 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1289 break;
1290 }
1291
1292 this->hdr = hdr;
1293 externalName = extName;
1294
1295 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1296
1297 if (!symtab.isEC()) {
1298 impSym = symtab.addImportData(n: impName, f: this, location);
1299 } else {
1300 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1301 // which holds addresses that are guaranteed to be callable directly from
1302 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1303 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1304 // data imports, the naming is reversed.
1305 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1306 if (isCode) {
1307 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1308 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1309 } else {
1310 impSym = symtab.addImportData(n: impName, f: this, location);
1311 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1312 }
1313 if (!impECSym)
1314 return;
1315
1316 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1317 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1318 if (!auxImpCopySym)
1319 return;
1320 }
1321 // If this was a duplicate, we logged an error but may continue;
1322 // in this case, impSym is nullptr.
1323 if (!impSym)
1324 return;
1325
1326 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1327 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1328
1329 // If type is function, we need to create a thunk which jump to an
1330 // address pointed by the __imp_ symbol. (This allows you to call
1331 // DLL functions just like regular non-DLL functions.)
1332 if (isCode) {
1333 if (!symtab.isEC()) {
1334 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1335 } else {
1336 thunkSym = symtab.addImportThunk(
1337 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1338
1339 if (std::optional<std::string> mangledName =
1340 getArm64ECMangledFunctionName(Name: name)) {
1341 StringRef auxThunkName = saver().save(S: *mangledName);
1342 auxThunkSym = symtab.addImportThunk(
1343 name: auxThunkName, s: impECSym,
1344 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1345 }
1346
1347 StringRef impChkName = saver().save(S: "__impchk_" + name);
1348 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1349 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1350 symtab.ctx.driver.pullArm64ECIcallHelper();
1351 }
1352 }
1353}
1354
1355BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1356 std::unique_ptr<lto::InputFile> &o, bool lazy)
1357 : InputFile(symtab, BitcodeKind, mb, lazy) {
1358 obj.swap(u&: o);
1359}
1360
1361BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1362 StringRef archiveName,
1363 uint64_t offsetInArchive, bool lazy) {
1364 std::string path = mb.getBufferIdentifier().str();
1365 if (ctx.config.thinLTOIndexOnly)
1366 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1367 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1368 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1369
1370 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1371 // name. If two archives define two members with the same name, this
1372 // causes a collision which result in only one of the objects being taken
1373 // into consideration at LTO time (which very likely causes undefined
1374 // symbols later in the link stage). So we append file offset to make
1375 // filename unique.
1376 MemoryBufferRef mbref(mb.getBuffer(),
1377 saver().save(S: archiveName.empty()
1378 ? path
1379 : archiveName +
1380 sys::path::filename(path) +
1381 utostr(X: offsetInArchive)));
1382
1383 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1384 obj->setArchivePathAndName(Path: archiveName, Name: mb.getBufferIdentifier());
1385 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1386 args&: lazy);
1387}
1388
1389BitcodeFile::~BitcodeFile() = default;
1390
1391void BitcodeFile::parse() {
1392 llvm::StringSaver &saver = lld::saver();
1393
1394 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1395 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1396 // FIXME: Check nodeduplicate
1397 comdat[i] =
1398 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1399 Triple tt(obj->getTargetTriple());
1400 RTLIB::RuntimeLibcallsInfo libcalls(tt);
1401 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1402 StringRef symName = saver.save(S: objSym.getName());
1403 int comdatIndex = objSym.getComdatIndex();
1404 Symbol *sym;
1405 SectionChunk *fakeSC = nullptr;
1406 if (objSym.isExecutable())
1407 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1408 else
1409 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1410 if (objSym.isUndefined()) {
1411 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1412 if (objSym.isWeak())
1413 sym->deferUndefined = true;
1414 // If one LTO object file references (i.e. has an undefined reference to)
1415 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1416 // as unprefixed but with a dllimport attribute instead, and doesn't
1417 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1418 //
1419 // For such cases, mark the symbol as used in a regular object (i.e. the
1420 // symbol must be retained) so that the linker can associate the
1421 // references in the end. If the symbol is defined in an import library
1422 // or in a regular object file, this has no effect, but if it is defined
1423 // in another LTO object file, this makes sure it is kept, to fulfill
1424 // the reference when linking the output of the LTO compilation.
1425 if (symName.starts_with(Prefix: "__imp_"))
1426 sym->isUsedInRegularObj = true;
1427 } else if (objSym.isCommon()) {
1428 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1429 } else if (objSym.isWeak() && objSym.isIndirect()) {
1430 // Weak external.
1431 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1432 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1433 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1434 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1435 } else if (comdatIndex != -1) {
1436 if (symName == obj->getComdatTable()[comdatIndex].first) {
1437 sym = comdat[comdatIndex].first;
1438 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1439 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1440 } else if (comdat[comdatIndex].second) {
1441 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1442 } else {
1443 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1444 }
1445 } else {
1446 sym =
1447 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1448 }
1449 symbols.push_back(x: sym);
1450 if (objSym.isUsed() || objSym.isLibcall(Libcalls: libcalls))
1451 symtab.ctx.config.gcroot.push_back(x: sym);
1452 }
1453 directives = saver.save(S: obj->getCOFFLinkerOpts());
1454}
1455
1456void BitcodeFile::parseLazy() {
1457 for (const lto::InputFile::Symbol &sym : obj->symbols())
1458 if (!sym.isUndefined()) {
1459 symtab.addLazyObject(f: this, n: sym.getName());
1460 if (!lazy)
1461 return;
1462 }
1463}
1464
1465MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1466 Triple t(obj->getTargetTriple());
1467 switch (t.getArch()) {
1468 case Triple::x86_64:
1469 return AMD64;
1470 case Triple::x86:
1471 return I386;
1472 case Triple::arm:
1473 case Triple::thumb:
1474 return ARMNT;
1475 case Triple::aarch64:
1476 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1477 default:
1478 return IMAGE_FILE_MACHINE_UNKNOWN;
1479 }
1480}
1481
1482std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1483 StringRef repl) {
1484 if (path.consume_back(Suffix: suffix))
1485 return (path + repl).str();
1486 return std::string(path);
1487}
1488
1489static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1490 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1491 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1492 if (rva >= sec->VirtualAddress &&
1493 rva <= sec->VirtualAddress + sec->VirtualSize) {
1494 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1495 }
1496 }
1497 return false;
1498}
1499
1500void DLLFile::parse() {
1501 // Parse a memory buffer as a PE-COFF executable.
1502 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1503
1504 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1505 bin.release();
1506 coffObj.reset(p: obj);
1507 } else {
1508 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1509 return;
1510 }
1511
1512 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1513 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1514 return;
1515 }
1516
1517 for (const auto &exp : coffObj->export_directories()) {
1518 StringRef dllName, symbolName;
1519 uint32_t exportRVA;
1520 checkError(e: exp.getDllName(Result&: dllName));
1521 checkError(e: exp.getSymbolName(Result&: symbolName));
1522 checkError(e: exp.getExportRVA(Result&: exportRVA));
1523
1524 if (symbolName.empty())
1525 continue;
1526
1527 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1528
1529 Symbol *s = make<Symbol>();
1530 s->dllName = dllName;
1531 s->symbolName = symbolName;
1532 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1533 s->nameType = ImportNameType::IMPORT_NAME;
1534
1535 if (coffObj->getMachine() == I386) {
1536 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1537 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1538 }
1539
1540 StringRef impName = saver().save(S: "__imp_" + symbolName);
1541 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1542 if (code)
1543 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1544 if (symtab.isEC()) {
1545 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1546 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1547
1548 if (code) {
1549 std::optional<std::string> mangledName =
1550 getArm64ECMangledFunctionName(Name: symbolName);
1551 if (mangledName)
1552 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1553 }
1554 }
1555 }
1556}
1557
1558MachineTypes DLLFile::getMachineType() const {
1559 if (coffObj)
1560 return static_cast<MachineTypes>(coffObj->getMachine());
1561 return IMAGE_FILE_MACHINE_UNKNOWN;
1562}
1563
1564void DLLFile::makeImport(DLLFile::Symbol *s) {
1565 if (!seen.insert(key: s->symbolName).second)
1566 return;
1567
1568 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1569 size_t size = sizeof(coff_import_header) + impSize;
1570 char *buf = bAlloc().Allocate<char>(Num: size);
1571 memset(s: buf, c: 0, n: size);
1572 char *p = buf;
1573 auto *imp = reinterpret_cast<coff_import_header *>(p);
1574 p += sizeof(*imp);
1575 imp->Sig2 = 0xFFFF;
1576 imp->Machine = coffObj->getMachine();
1577 imp->SizeOfData = impSize;
1578 imp->OrdinalHint = 0; // Only linking by name
1579 imp->TypeInfo = (s->nameType << 2) | s->importType;
1580
1581 // Write symbol name and DLL name.
1582 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1583 p += s->symbolName.size() + 1;
1584 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1585 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1586 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1587 symtab.ctx.driver.addFile(file: impFile);
1588}
1589