1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/Object/Binary.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Object/COFFImportFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/Endian.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cstring>
39#include <optional>
40#include <utility>
41
42using namespace llvm;
43using namespace llvm::COFF;
44using namespace llvm::codeview;
45using namespace llvm::object;
46using namespace llvm::support::endian;
47using namespace lld;
48using namespace lld::coff;
49
50using llvm::Triple;
51using llvm::support::ulittle32_t;
52
53// Returns the last element of a path, which is supposed to be a filename.
54static StringRef getBasename(StringRef path) {
55 return sys::path::filename(path, style: sys::path::Style::windows);
56}
57
58// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
59std::string lld::toString(const coff::InputFile *file) {
60 if (!file)
61 return "<internal>";
62 if (file->parentName.empty())
63 return std::string(file->getName());
64
65 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
66 ")")
67 .str();
68}
69
70const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
71 const InputFile *f) {
72 return s << toString(file: f);
73}
74
75/// Checks that Source is compatible with being a weak alias to Target.
76/// If Source is Undefined and has no weak alias set, makes it a weak
77/// alias to Target.
78static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
79 Symbol *source, Symbol *target,
80 bool isAntiDep) {
81 if (auto *u = dyn_cast<Undefined>(Val: source)) {
82 if (u->weakAlias && u->weakAlias != target) {
83 // Ignore duplicated anti-dependency symbols.
84 if (isAntiDep)
85 return;
86 if (!u->isAntiDep) {
87 // Weak aliases as produced by GCC are named in the form
88 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
89 // of another symbol emitted near the weak symbol.
90 if (symtab.ctx.config.allowDuplicateWeak) {
91 auto isAbsZero = [](Symbol *sym) -> bool {
92 return isa<DefinedAbsolute>(Val: sym) &&
93 dyn_cast<DefinedAbsolute>(Val: sym)->getVA() == 0;
94 };
95 // If the alias we had points at absolute zero, and we get another
96 // weak symbol which isn't absolute zero, prefer that one.
97 if (isAbsZero(u->weakAlias) && !isAbsZero(target)) {
98 u->setWeakAlias(sym: target, antiDep: isAntiDep);
99 }
100 return;
101 }
102 symtab.reportDuplicate(existing: source, newFile: f);
103 }
104 }
105 u->setWeakAlias(sym: target, antiDep: isAntiDep);
106 }
107}
108
109static bool ignoredSymbolName(StringRef name) {
110 return name == "@feat.00" || name == "@comp.id";
111}
112
113static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
114 if (sym.isBigObj()) {
115 auto *copy = make<coff_symbol32>(
116 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
117 return reinterpret_cast<coff_symbol_generic *>(copy);
118 } else {
119 auto *copy = make<coff_symbol16>(
120 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
121 return reinterpret_cast<coff_symbol_generic *>(copy);
122 }
123}
124
125// Skip importing DllMain thunks from import libraries.
126static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
127 const Archive::Symbol &sym, bool &skipDllMain) {
128 const Archive::Child &c =
129 CHECK(sym.getMember(), file->getFileName() +
130 ": could not get the member for symbol " +
131 toCOFFString(ctx, sym));
132 MemoryBufferRef mb =
133 CHECK(c.getMemoryBufferRef(),
134 file->getFileName() +
135 ": could not get the buffer for a child buffer of the archive");
136 if (identify_magic(magic: mb.getBuffer()) == file_magic::coff_import_library) {
137 if (ctx.config.warnImportedDllMain) {
138 // We won't place DllMain symbols in the symbol table if they are
139 // coming from a import library. This message can be ignored with the flag
140 // '/ignore:importeddllmain'
141 Warn(ctx)
142 << file->getFileName()
143 << ": skipping imported DllMain symbol [importeddllmain]\nNOTE: this "
144 "might be a mistake when the DLL/library was produced.";
145 }
146 skipDllMain = true;
147 return true;
148 }
149 return false;
150}
151
152ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m,
153 std::unique_ptr<Archive> &f)
154 : InputFile(ctx.symtab, ArchiveKind, m) {
155 file.swap(u&: f);
156}
157
158void ArchiveFile::parse() {
159 COFFLinkerContext &ctx = symtab.ctx;
160 SymbolTable *archiveSymtab = &symtab;
161
162 // Try to read symbols from ECSYMBOLS section on ARM64EC.
163 if (ctx.symtab.isEC()) {
164 iterator_range<Archive::symbol_iterator> symbols =
165 CHECK(file->ec_symbols(), this);
166 if (!symbols.empty()) {
167 for (const Archive::Symbol &sym : symbols)
168 ctx.symtab.addLazyArchive(f: this, sym);
169
170 // Read both EC and native symbols on ARM64X.
171 archiveSymtab = &*ctx.hybridSymtab;
172 } else {
173 // If the ECSYMBOLS section is missing in the archive, the archive could
174 // be either a native-only ARM64 or x86_64 archive. Check the machine type
175 // of the object containing a symbol to determine which symbol table to
176 // use.
177 Archive::symbol_iterator sym = file->symbol_begin();
178 if (sym != file->symbol_end()) {
179 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
180 Archive::Child child =
181 CHECK(sym->getMember(),
182 file->getFileName() +
183 ": could not get the buffer for a child of the archive");
184 MemoryBufferRef mb = CHECK(
185 child.getMemoryBufferRef(),
186 file->getFileName() +
187 ": could not get the buffer for a child buffer of the archive");
188 switch (identify_magic(magic: mb.getBuffer())) {
189 case file_magic::coff_object: {
190 std::unique_ptr<COFFObjectFile> obj =
191 CHECK(COFFObjectFile::create(mb),
192 check(child.getName()) + ":" + ": not a valid COFF file");
193 machine = MachineTypes(obj->getMachine());
194 break;
195 }
196 case file_magic::coff_import_library:
197 machine = MachineTypes(COFFImportFile(mb).getMachine());
198 break;
199 case file_magic::bitcode: {
200 std::unique_ptr<lto::InputFile> obj =
201 check(e: lto::InputFile::create(Object: mb));
202 machine = BitcodeFile::getMachineType(obj: obj.get());
203 break;
204 }
205 default:
206 break;
207 }
208 archiveSymtab = &ctx.getSymtab(machine);
209 }
210 }
211 }
212
213 bool skipDllMain = false;
214 StringRef mangledDllMain, impMangledDllMain;
215
216 // The calls below will fail if we haven't set the machine type yet. Instead
217 // of failing, it is preferable to skip this "imported DllMain" check if we
218 // don't know the machine type at this point.
219 if (!file->isEmpty() && ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN) {
220 mangledDllMain = archiveSymtab->mangle(sym: "DllMain");
221 impMangledDllMain = uniqueSaver().save(S: "__imp_" + mangledDllMain);
222 }
223
224 // Read the symbol table to construct Lazy objects.
225 for (const Archive::Symbol &sym : file->symbols()) {
226 // If an import library provides the DllMain symbol, skip importing it, as
227 // we should be using our own DllMain, not another DLL's DllMain.
228 if (!mangledDllMain.empty() && (sym.getName() == mangledDllMain ||
229 sym.getName() == impMangledDllMain)) {
230 if (skipDllMain || fixupDllMain(ctx, file: file.get(), sym, skipDllMain))
231 continue;
232 }
233 archiveSymtab->addLazyArchive(f: this, sym);
234 }
235}
236
237// Returns a buffer pointing to a member file containing a given symbol.
238void ArchiveFile::addMember(const Archive::Symbol &sym) {
239 const Archive::Child &c =
240 CHECK(sym.getMember(), "could not get the member for symbol " +
241 toCOFFString(symtab.ctx, sym));
242
243 // Return an empty buffer if we have already returned the same buffer.
244 // FIXME: Remove this once we resolve all defineds before all undefineds in
245 // ObjFile::initializeSymbols().
246 if (!seen.insert(V: c.getChildOffset()).second)
247 return;
248
249 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
250}
251
252std::vector<MemoryBufferRef>
253lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
254 std::vector<MemoryBufferRef> v;
255 Error err = Error::success();
256
257 // Thin archives refer to .o files, so --reproduces needs the .o files too.
258 bool addToTar = file->isThin() && ctx.driver.tar;
259
260 for (const Archive::Child &c : file->children(Err&: err)) {
261 MemoryBufferRef mbref =
262 CHECK(c.getMemoryBufferRef(),
263 file->getFileName() +
264 ": could not get the buffer for a child of the archive");
265 if (addToTar) {
266 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
267 Data: mbref.getBuffer());
268 }
269 v.push_back(x: mbref);
270 }
271 if (err)
272 Fatal(ctx) << file->getFileName()
273 << ": Archive::children failed: " << toString(E: std::move(err));
274 return v;
275}
276
277ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
278 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
279 coffObj(coffObj) {}
280
281ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
282 // Parse a memory buffer as a COFF file.
283 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
284 if (!bin)
285 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
286
287 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
288 if (!obj)
289 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
290
291 bin->release();
292 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
293 args&: lazy);
294}
295
296void ObjFile::parseLazy() {
297 // Native object file.
298 uint32_t numSymbols = coffObj->getNumberOfSymbols();
299 for (uint32_t i = 0; i < numSymbols; ++i) {
300 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
301 if (coffSym.isUndefined() || !coffSym.isExternal() ||
302 coffSym.isWeakExternal())
303 continue;
304 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
305 if (coffSym.isAbsolute() && ignoredSymbolName(name))
306 continue;
307 symtab.addLazyObject(f: this, n: name);
308 if (!lazy)
309 return;
310 i += coffSym.getNumberOfAuxSymbols();
311 }
312}
313
314struct ECMapEntry {
315 ulittle32_t src;
316 ulittle32_t dst;
317 ulittle32_t type;
318};
319
320void ObjFile::initializeECThunks() {
321 for (SectionChunk *chunk : hybmpChunks) {
322 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
323 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
324 << chunk->getContents().size();
325 continue;
326 }
327
328 const uint8_t *end =
329 chunk->getContents().data() + chunk->getContents().size();
330 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
331 iter += sizeof(ECMapEntry)) {
332 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
333 switch (entry->type) {
334 case Arm64ECThunkType::Entry:
335 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
336 break;
337 case Arm64ECThunkType::Exit:
338 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
339 break;
340 case Arm64ECThunkType::GuestExit:
341 break;
342 default:
343 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
344 }
345 }
346 }
347}
348
349void ObjFile::parse() {
350 // Read section and symbol tables.
351 initializeChunks();
352 initializeSymbols();
353 initializeFlags();
354 initializeDependencies();
355 initializeECThunks();
356}
357
358const coff_section *ObjFile::getSection(uint32_t i) {
359 auto sec = coffObj->getSection(index: i);
360 if (!sec)
361 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
362 return *sec;
363}
364
365// We set SectionChunk pointers in the SparseChunks vector to this value
366// temporarily to mark comdat sections as having an unknown resolution. As we
367// walk the object file's symbol table, once we visit either a leader symbol or
368// an associative section definition together with the parent comdat's leader,
369// we set the pointer to either nullptr (to mark the section as discarded) or a
370// valid SectionChunk for that section.
371static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
372
373void ObjFile::initializeChunks() {
374 uint32_t numSections = coffObj->getNumberOfSections();
375 sparseChunks.resize(new_size: numSections + 1);
376 for (uint32_t i = 1; i < numSections + 1; ++i) {
377 const coff_section *sec = getSection(i);
378 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
379 sparseChunks[i] = pendingComdat;
380 else
381 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
382 }
383}
384
385SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
386 const coff_aux_section_definition *def,
387 StringRef leaderName) {
388 const coff_section *sec = getSection(i: sectionNumber);
389
390 StringRef name;
391 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
392 name = *e;
393 else
394 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
395 << e.takeError();
396
397 if (name == ".drectve") {
398 ArrayRef<uint8_t> data;
399 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
400 directives = StringRef((const char *)data.data(), data.size());
401 return nullptr;
402 }
403
404 if (name == ".llvm_addrsig") {
405 addrsigSec = sec;
406 return nullptr;
407 }
408
409 if (name == ".llvm.call-graph-profile") {
410 callgraphSec = sec;
411 return nullptr;
412 }
413
414 if (symtab.ctx.config.discardSection.contains(key: name))
415 return nullptr;
416
417 // Object files may have DWARF debug info or MS CodeView debug info
418 // (or both).
419 //
420 // DWARF sections don't need any special handling from the perspective
421 // of the linker; they are just a data section containing relocations.
422 // We can just link them to complete debug info.
423 //
424 // CodeView needs linker support. We need to interpret debug info,
425 // and then write it to a separate .pdb file.
426
427 // Ignore DWARF debug info unless requested to be included.
428 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
429 return nullptr;
430
431 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
432 return nullptr;
433 SectionChunk *c;
434 if (isArm64EC(Machine: getMachineType()))
435 c = make<SectionChunkEC>(args: this, args&: sec);
436 else
437 c = make<SectionChunk>(args: this, args&: sec);
438 if (def)
439 c->checksum = def->CheckSum;
440
441 // CodeView sections are stored to a different vector because they are not
442 // linked in the regular manner.
443 if (c->isCodeView())
444 debugChunks.push_back(x: c);
445 else if (name == ".gfids$y")
446 guardFidChunks.push_back(x: c);
447 else if (name == ".giats$y")
448 guardIATChunks.push_back(x: c);
449 else if (name == ".gljmp$y")
450 guardLJmpChunks.push_back(x: c);
451 else if (name == ".gehcont$y")
452 guardEHContChunks.push_back(x: c);
453 else if (name == ".sxdata")
454 sxDataChunks.push_back(x: c);
455 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
456 hybmpChunks.push_back(x: c);
457 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
458 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
459 // COFF sections that look like string literal sections (i.e. no
460 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
461 // for string literals) are subject to string tail merging.
462 MergeChunk::addSection(ctx&: symtab.ctx, c);
463 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
464 resourceChunks.push_back(x: c);
465 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
466 chunks.push_back(x: c);
467
468 return c;
469}
470
471void ObjFile::includeResourceChunks() {
472 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
473}
474
475void ObjFile::readAssociativeDefinition(
476 COFFSymbolRef sym, const coff_aux_section_definition *def) {
477 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
478}
479
480void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
481 const coff_aux_section_definition *def,
482 uint32_t parentIndex) {
483 SectionChunk *parent = sparseChunks[parentIndex];
484 int32_t sectionNumber = sym.getSectionNumber();
485
486 auto diag = [&]() {
487 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
488
489 StringRef parentName;
490 const coff_section *parentSec = getSection(i: parentIndex);
491 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
492 parentName = *e;
493 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
494 << " (sec " << sectionNumber
495 << ") has invalid reference to section " << parentName
496 << " (sec " << parentIndex << ")";
497 };
498
499 if (parent == pendingComdat) {
500 // This can happen if an associative comdat refers to another associative
501 // comdat that appears after it (invalid per COFF spec) or to a section
502 // without any symbols.
503 diag();
504 return;
505 }
506
507 // Check whether the parent is prevailing. If it is, so are we, and we read
508 // the section; otherwise mark it as discarded.
509 if (parent) {
510 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
511 sparseChunks[sectionNumber] = c;
512 if (c) {
513 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
514 parent->addAssociative(child: c);
515 }
516 } else {
517 sparseChunks[sectionNumber] = nullptr;
518 }
519}
520
521void ObjFile::recordPrevailingSymbolForMingw(
522 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
523 // For comdat symbols in executable sections, where this is the copy
524 // of the section chunk we actually include instead of discarding it,
525 // add the symbol to a map to allow using it for implicitly
526 // associating .[px]data$<func> sections to it.
527 // Use the suffix from the .text$<func> instead of the leader symbol
528 // name, for cases where the names differ (i386 mangling/decorations,
529 // cases where the leader is a weak symbol named .weak.func.default*).
530 int32_t sectionNumber = sym.getSectionNumber();
531 SectionChunk *sc = sparseChunks[sectionNumber];
532 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
533 StringRef name = sc->getSectionName().split(Separator: '$').second;
534 prevailingSectionMap[name] = sectionNumber;
535 }
536}
537
538void ObjFile::maybeAssociateSEHForMingw(
539 COFFSymbolRef sym, const coff_aux_section_definition *def,
540 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
541 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
542 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
543 name.consume_front(Prefix: ".eh_frame$")) {
544 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
545 // associative to the symbol <func>.
546 auto parentSym = prevailingSectionMap.find(Val: name);
547 if (parentSym != prevailingSectionMap.end())
548 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
549 }
550}
551
552Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
553 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
554 if (sym.isExternal()) {
555 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
556 if (sc)
557 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
558 sectionOffset: sym.getValue());
559 // For MinGW symbols named .weak.* that point to a discarded section,
560 // don't create an Undefined symbol. If nothing ever refers to the symbol,
561 // everything should be fine. If something actually refers to the symbol
562 // (e.g. the undefined weak alias), linking will fail due to undefined
563 // references at the end.
564 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
565 return nullptr;
566 return symtab.addUndefined(name, f: this, overrideLazy: false);
567 }
568 if (sc) {
569 const coff_symbol_generic *symGen = sym.getGeneric();
570 if (sym.isSection()) {
571 auto *customSymGen = cloneSymbol(sym);
572 customSymGen->Value = 0;
573 symGen = customSymGen;
574 }
575 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
576 /*IsExternal*/ args: false, args&: symGen, args&: sc);
577 }
578 return nullptr;
579}
580
581void ObjFile::initializeSymbols() {
582 uint32_t numSymbols = coffObj->getNumberOfSymbols();
583 symbols.resize(new_size: numSymbols);
584
585 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
586 weakAliases;
587 std::vector<uint32_t> pendingIndexes;
588 pendingIndexes.reserve(n: numSymbols);
589
590 DenseMap<StringRef, uint32_t> prevailingSectionMap;
591 std::vector<const coff_aux_section_definition *> comdatDefs(
592 coffObj->getNumberOfSections() + 1);
593 COFFLinkerContext &ctx = symtab.ctx;
594
595 for (uint32_t i = 0; i < numSymbols; ++i) {
596 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
597 bool prevailingComdat;
598 if (coffSym.isUndefined()) {
599 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
600 } else if (coffSym.isWeakExternal()) {
601 auto aux = coffSym.getAux<coff_aux_weak_external>();
602 bool overrideLazy = true;
603
604 // On ARM64EC, external function calls emit a pair of weak-dependency
605 // aliases: func to #func and #func to the func guess exit thunk
606 // (instead of a single undefined func symbol, which would be emitted on
607 // other targets). Allow such aliases to be overridden by lazy archive
608 // symbols, just as we would for undefined symbols.
609 if (isArm64EC(Machine: getMachineType()) &&
610 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
611 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
612 if (!targetSym.isAnyUndefined()) {
613 // If the target is defined, it may be either a guess exit thunk or
614 // the actual implementation. If it's the latter, consider the alias
615 // to be part of the implementation and override potential lazy
616 // archive symbols.
617 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
618 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
619 std::optional<std::string> mangledName =
620 getArm64ECMangledFunctionName(Name: name);
621 overrideLazy = mangledName == targetName;
622 } else {
623 overrideLazy = false;
624 }
625 }
626 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
627 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
628 } else if (std::optional<Symbol *> optSym =
629 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
630 symbols[i] = *optSym;
631 if (ctx.config.mingw && prevailingComdat)
632 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
633 } else {
634 // createDefined() returns std::nullopt if a symbol belongs to a section
635 // that was pending at the point when the symbol was read. This can happen
636 // in two cases:
637 // 1) section definition symbol for a comdat leader;
638 // 2) symbol belongs to a comdat section associated with another section.
639 // In both of these cases, we can expect the section to be resolved by
640 // the time we finish visiting the remaining symbols in the symbol
641 // table. So we postpone the handling of this symbol until that time.
642 pendingIndexes.push_back(x: i);
643 }
644 i += coffSym.getNumberOfAuxSymbols();
645 }
646
647 for (uint32_t i : pendingIndexes) {
648 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
649 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
650 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
651 readAssociativeDefinition(sym, def);
652 else if (ctx.config.mingw)
653 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
654 }
655 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
656 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
657 Log(ctx) << "comdat section " << name
658 << " without leader and unassociated, discarding";
659 continue;
660 }
661 symbols[i] = createRegular(sym);
662 }
663
664 for (auto &kv : weakAliases) {
665 Symbol *sym = kv.first;
666 const coff_aux_weak_external *aux = kv.second;
667 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
668 isAntiDep: aux->Characteristics ==
669 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
670 }
671
672 // Free the memory used by sparseChunks now that symbol loading is finished.
673 decltype(sparseChunks)().swap(x&: sparseChunks);
674}
675
676Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
677 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
678 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
679
680 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
681 // target.
682 if (symtab.isEC() && getMachineType() == AMD64) {
683 auto u = dyn_cast<Undefined>(Val: s);
684 if (u && !u->weakAlias) {
685 if (std::optional<std::string> mangledName =
686 getArm64ECMangledFunctionName(Name: name)) {
687 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
688 /*overrideLazy=*/false);
689 u->setWeakAlias(sym: m, /*antiDep=*/true);
690 }
691 }
692 }
693 return s;
694}
695
696static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
697 int32_t section) {
698 uint32_t numSymbols = obj->getNumberOfSymbols();
699 for (uint32_t i = 0; i < numSymbols; ++i) {
700 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
701 if (sym.getSectionNumber() != section)
702 continue;
703 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
704 return def;
705 }
706 return nullptr;
707}
708
709void ObjFile::handleComdatSelection(
710 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
711 DefinedRegular *leader,
712 const llvm::object::coff_aux_section_definition *def) {
713 if (prevailing)
714 return;
715 // There's already an existing comdat for this symbol: `Leader`.
716 // Use the comdats's selection field to determine if the new
717 // symbol in `Sym` should be discarded, produce a duplicate symbol
718 // error, etc.
719
720 SectionChunk *leaderChunk = leader->getChunk();
721 COMDATType leaderSelection = leaderChunk->selection;
722 COFFLinkerContext &ctx = symtab.ctx;
723
724 assert(leader->data && "Comdat leader without SectionChunk?");
725 if (isa<BitcodeFile>(Val: leader->file)) {
726 // If the leader is only a LTO symbol, we don't know e.g. its final size
727 // yet, so we can't do the full strict comdat selection checking yet.
728 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
729 }
730
731 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
732 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
733 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
734 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
735 // cl.exe picks "any" for vftables when building with /GR- and
736 // "largest" when building with /GR. To be able to link object files
737 // compiled with each flag, "any" and "largest" are merged as "largest".
738 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
739 }
740
741 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
742 // Clang on the other hand picks "any". To be able to link two object files
743 // with a __declspec(selectany) declaration, one compiled with gcc and the
744 // other with clang, we merge them as proper "same size as"
745 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
746 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
747 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
748 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
749 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
750 }
751
752 // Other than that, comdat selections must match. This is a bit more
753 // strict than link.exe which allows merging "any" and "largest" if "any"
754 // is the first symbol the linker sees, and it allows merging "largest"
755 // with everything (!) if "largest" is the first symbol the linker sees.
756 // Making this symmetric independent of which selection is seen first
757 // seems better though.
758 // (This behavior matches ModuleLinker::getComdatResult().)
759 if (selection != leaderSelection) {
760 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
761 << ": " << (int)leaderSelection << " in " << leader->getFile()
762 << " and " << (int)selection << " in " << this;
763 symtab.reportDuplicate(existing: leader, newFile: this);
764 return;
765 }
766
767 switch (selection) {
768 case IMAGE_COMDAT_SELECT_NODUPLICATES:
769 symtab.reportDuplicate(existing: leader, newFile: this);
770 break;
771
772 case IMAGE_COMDAT_SELECT_ANY:
773 // Nothing to do.
774 break;
775
776 case IMAGE_COMDAT_SELECT_SAME_SIZE:
777 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
778 if (!ctx.config.mingw) {
779 symtab.reportDuplicate(existing: leader, newFile: this);
780 } else {
781 const coff_aux_section_definition *leaderDef = nullptr;
782 if (leaderChunk->file)
783 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
784 section: leaderChunk->getSectionNumber());
785 if (!leaderDef || leaderDef->Length != def->Length)
786 symtab.reportDuplicate(existing: leader, newFile: this);
787 }
788 }
789 break;
790
791 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
792 SectionChunk newChunk(this, getSection(sym));
793 // link.exe only compares section contents here and doesn't complain
794 // if the two comdat sections have e.g. different alignment.
795 // Match that.
796 if (leaderChunk->getContents() != newChunk.getContents())
797 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
798 break;
799 }
800
801 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
802 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
803 // (This means lld-link doesn't produce duplicate symbol errors for
804 // associative comdats while link.exe does, but associate comdats
805 // are never extern in practice.)
806 llvm_unreachable("createDefined not called for associative comdats");
807
808 case IMAGE_COMDAT_SELECT_LARGEST:
809 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
810 // Replace the existing comdat symbol with the new one.
811 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
812 // FIXME: This is incorrect: With /opt:noref, the previous sections
813 // make it into the final executable as well. Correct handling would
814 // be to undo reading of the whole old section that's being replaced,
815 // or doing one pass that determines what the final largest comdat
816 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
817 // only the largest one.
818 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
819 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
820 arg: nullptr);
821 prevailing = true;
822 }
823 break;
824
825 case IMAGE_COMDAT_SELECT_NEWEST:
826 llvm_unreachable("should have been rejected earlier");
827 }
828}
829
830std::optional<Symbol *> ObjFile::createDefined(
831 COFFSymbolRef sym,
832 std::vector<const coff_aux_section_definition *> &comdatDefs,
833 bool &prevailing) {
834 prevailing = false;
835 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
836
837 if (sym.isCommon()) {
838 auto *c = make<CommonChunk>(args&: sym);
839 chunks.push_back(x: c);
840 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
841 c);
842 }
843
844 COFFLinkerContext &ctx = symtab.ctx;
845 if (sym.isAbsolute()) {
846 StringRef name = getName();
847
848 if (name == "@feat.00")
849 feat00Flags = sym.getValue();
850 // Skip special symbols.
851 if (ignoredSymbolName(name))
852 return nullptr;
853
854 if (sym.isExternal())
855 return symtab.addAbsolute(n: name, s: sym);
856 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
857 }
858
859 int32_t sectionNumber = sym.getSectionNumber();
860 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
861 return nullptr;
862
863 if (sym.isEmptySectionDeclaration()) {
864 // As there is no coff_section in the object file for these, make a
865 // new virtual one, with everything zeroed out (i.e. an empty section),
866 // with only the name and characteristics set.
867 StringRef name = getName();
868 auto *hdr = make<coff_section>();
869 memset(s: hdr, c: 0, n: sizeof(*hdr));
870 strncpy(dest: hdr->Name, src: name.data(),
871 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
872 // The Value field in a section symbol may contain the characteristics,
873 // or it may be zero, where we make something up (that matches what is
874 // used in .idata sections in the regular object files in import libraries).
875 if (sym.getValue())
876 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
877 else
878 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
879 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
880 IMAGE_SCN_ALIGN_4BYTES;
881 auto *sc = make<SectionChunk>(args: this, args&: hdr);
882 chunks.push_back(x: sc);
883
884 auto *symGen = cloneSymbol(sym);
885 // Ignore the Value offset of these symbols, as it may be a bitmask.
886 symGen->Value = 0;
887 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
888 /*isExternal=*/args: false, args&: symGen, args&: sc);
889 }
890
891 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
892 Fatal(ctx) << toString(file: this) << ": " << getName()
893 << " should not refer to special section "
894 << Twine(sectionNumber);
895
896 if ((uint32_t)sectionNumber >= sparseChunks.size())
897 Fatal(ctx) << toString(file: this) << ": " << getName()
898 << " should not refer to non-existent section "
899 << Twine(sectionNumber);
900
901 // Comdat handling.
902 // A comdat symbol consists of two symbol table entries.
903 // The first symbol entry has the name of the section (e.g. .text), fixed
904 // values for the other fields, and one auxiliary record.
905 // The second symbol entry has the name of the comdat symbol, called the
906 // "comdat leader".
907 // When this function is called for the first symbol entry of a comdat,
908 // it sets comdatDefs and returns std::nullopt, and when it's called for the
909 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
910
911 // Handle comdat leader.
912 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
913 comdatDefs[sectionNumber] = nullptr;
914 DefinedRegular *leader;
915
916 if (sym.isExternal()) {
917 std::tie(args&: leader, args&: prevailing) =
918 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
919 } else {
920 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
921 /*IsExternal*/ args: false, args: sym.getGeneric());
922 prevailing = true;
923 }
924
925 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
926 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
927 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
928 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
929 Fatal(ctx) << "unknown comdat type "
930 << std::to_string(val: (int)def->Selection) << " for " << getName()
931 << " in " << toString(file: this);
932 }
933 COMDATType selection = (COMDATType)def->Selection;
934
935 if (leader->isCOMDAT)
936 handleComdatSelection(sym, selection, prevailing, leader, def);
937
938 if (prevailing) {
939 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
940 sparseChunks[sectionNumber] = c;
941 if (!c)
942 return nullptr;
943 c->sym = cast<DefinedRegular>(Val: leader);
944 c->selection = selection;
945 cast<DefinedRegular>(Val: leader)->data = &c->repl;
946 } else {
947 sparseChunks[sectionNumber] = nullptr;
948 }
949 return leader;
950 }
951
952 // Prepare to handle the comdat leader symbol by setting the section's
953 // ComdatDefs pointer if we encounter a non-associative comdat.
954 if (sparseChunks[sectionNumber] == pendingComdat) {
955 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
956 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
957 comdatDefs[sectionNumber] = def;
958 }
959 return std::nullopt;
960 }
961
962 return createRegular(sym);
963}
964
965MachineTypes ObjFile::getMachineType() const {
966 return static_cast<MachineTypes>(coffObj->getMachine());
967}
968
969ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
970 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
971 return sec->consumeDebugMagic();
972 return {};
973}
974
975// OBJ files systematically store critical information in a .debug$S stream,
976// even if the TU was compiled with no debug info. At least two records are
977// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
978// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
979// currently used to initialize the hotPatchable member.
980void ObjFile::initializeFlags() {
981 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
982 if (data.empty())
983 return;
984
985 DebugSubsectionArray subsections;
986
987 BinaryStreamReader reader(data, llvm::endianness::little);
988 ExitOnError exitOnErr;
989 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
990
991 for (const DebugSubsectionRecord &ss : subsections) {
992 if (ss.kind() != DebugSubsectionKind::Symbols)
993 continue;
994
995 unsigned offset = 0;
996
997 // Only parse the first two records. We are only looking for S_OBJNAME
998 // and S_COMPILE3, and they usually appear at the beginning of the
999 // stream.
1000 for (unsigned i = 0; i < 2; ++i) {
1001 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
1002 if (!sym) {
1003 consumeError(Err: sym.takeError());
1004 return;
1005 }
1006 if (sym->kind() == SymbolKind::S_COMPILE3) {
1007 auto cs =
1008 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
1009 hotPatchable =
1010 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
1011 }
1012 if (sym->kind() == SymbolKind::S_OBJNAME) {
1013 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
1014 Symbol: sym.get()));
1015 if (objName.Signature)
1016 pchSignature = objName.Signature;
1017 }
1018 offset += sym->length();
1019 }
1020 }
1021}
1022
1023// Depending on the compilation flags, OBJs can refer to external files,
1024// necessary to merge this OBJ into the final PDB. We currently support two
1025// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1026// And PDB type servers, when compiling with /Zi. This function extracts these
1027// dependencies and makes them available as a TpiSource interface (see
1028// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1029// output even with /Yc and /Yu and with /Zi.
1030void ObjFile::initializeDependencies() {
1031 COFFLinkerContext &ctx = symtab.ctx;
1032 if (!ctx.config.debug)
1033 return;
1034
1035 bool isPCH = false;
1036
1037 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
1038 if (!data.empty())
1039 isPCH = true;
1040 else
1041 data = getDebugSection(secName: ".debug$T");
1042
1043 // symbols but no types, make a plain, empty TpiSource anyway, because it
1044 // simplifies adding the symbols later.
1045 if (data.empty()) {
1046 if (!debugChunks.empty())
1047 debugTypesObj = makeTpiSource(ctx, f: this);
1048 return;
1049 }
1050
1051 // Get the first type record. It will indicate if this object uses a type
1052 // server (/Zi) or a PCH file (/Yu).
1053 CVTypeArray types;
1054 BinaryStreamReader reader(data, llvm::endianness::little);
1055 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
1056 CVTypeArray::Iterator firstType = types.begin();
1057 if (firstType == types.end())
1058 return;
1059
1060 // Remember the .debug$T or .debug$P section.
1061 debugTypes = data;
1062
1063 // This object file is a PCH file that others will depend on.
1064 if (isPCH) {
1065 debugTypesObj = makePrecompSource(ctx, file: this);
1066 return;
1067 }
1068
1069 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1070 if (firstType->kind() == LF_TYPESERVER2) {
1071 TypeServer2Record ts = cantFail(
1072 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1073 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1074 enqueuePdbFile(path: ts.getName(), fromFile: this);
1075 return;
1076 }
1077
1078 // This object was compiled with /Yu. It uses types from another object file
1079 // with a matching signature.
1080 if (firstType->kind() == LF_PRECOMP) {
1081 PrecompRecord precomp = cantFail(
1082 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1083 // We're better off trusting the LF_PRECOMP signature. In some cases the
1084 // S_OBJNAME record doesn't contain a valid PCH signature.
1085 if (precomp.Signature)
1086 pchSignature = precomp.Signature;
1087 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1088 // Drop the LF_PRECOMP record from the input stream.
1089 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1090 return;
1091 }
1092
1093 // This is a plain old object file.
1094 debugTypesObj = makeTpiSource(ctx, f: this);
1095}
1096
1097// The casing of the PDB path stamped in the OBJ can differ from the actual path
1098// on disk. With this, we ensure to always use lowercase as a key for the
1099// pdbInputFileInstances map, at least on Windows.
1100static std::string normalizePdbPath(StringRef path) {
1101#if defined(_WIN32)
1102 return path.lower();
1103#else // LINUX
1104 return std::string(path);
1105#endif
1106}
1107
1108// If existing, return the actual PDB path on disk.
1109static std::optional<std::string>
1110findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1111 // Ensure the file exists before anything else. In some cases, if the path
1112 // points to a removable device, Driver::enqueuePath() would fail with an
1113 // error (EAGAIN, "resource unavailable try again") which we want to skip
1114 // silently.
1115 if (llvm::sys::fs::exists(Path: pdbPath))
1116 return normalizePdbPath(path: pdbPath);
1117
1118 StringRef objPath = !dependentFile->parentName.empty()
1119 ? dependentFile->parentName
1120 : dependentFile->getName();
1121
1122 // Currently, type server PDBs are only created by MSVC cl, which only runs
1123 // on Windows, so we can assume type server paths are Windows style.
1124 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1125
1126 // Check if the PDB is in the same folder as the OBJ.
1127 SmallString<128> path;
1128 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1129 if (llvm::sys::fs::exists(Path: path))
1130 return normalizePdbPath(path);
1131
1132 // Check if the PDB is in the output folder.
1133 path.clear();
1134 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1135 if (llvm::sys::fs::exists(Path: path))
1136 return normalizePdbPath(path);
1137
1138 return std::nullopt;
1139}
1140
1141PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1142 : InputFile(ctx.symtab, PDBKind, m) {}
1143
1144PDBInputFile::~PDBInputFile() = default;
1145
1146PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1147 StringRef path,
1148 ObjFile *fromFile) {
1149 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1150 if (!p)
1151 return nullptr;
1152 auto it = ctx.pdbInputFileInstances.find(x: *p);
1153 if (it != ctx.pdbInputFileInstances.end())
1154 return it->second;
1155 return nullptr;
1156}
1157
1158void PDBInputFile::parse() {
1159 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1160
1161 std::unique_ptr<pdb::IPDBSession> thisSession;
1162 Error E = pdb::NativeSession::createFromPdb(
1163 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1164 if (E) {
1165 loadErrorStr.emplace(args: toString(E: std::move(E)));
1166 return; // fail silently at this point - the error will be handled later,
1167 // when merging the debug type stream
1168 }
1169
1170 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1171
1172 pdb::PDBFile &pdbFile = session->getPDBFile();
1173 auto expectedInfo = pdbFile.getPDBInfoStream();
1174 // All PDB Files should have an Info stream.
1175 if (!expectedInfo) {
1176 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1177 return;
1178 }
1179 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1180}
1181
1182// Used only for DWARF debug info, which is not common (except in MinGW
1183// environments). This returns an optional pair of file name and line
1184// number for where the variable was defined.
1185std::optional<std::pair<StringRef, uint32_t>>
1186ObjFile::getVariableLocation(StringRef var) {
1187 if (!dwarf) {
1188 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1189 if (!dwarf)
1190 return std::nullopt;
1191 }
1192 if (symtab.machine == I386)
1193 var.consume_front(Prefix: "_");
1194 std::optional<std::pair<std::string, unsigned>> ret =
1195 dwarf->getVariableLoc(name: var);
1196 if (!ret)
1197 return std::nullopt;
1198 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1199}
1200
1201// Used only for DWARF debug info, which is not common (except in MinGW
1202// environments).
1203std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1204 uint32_t sectionIndex) {
1205 if (!dwarf) {
1206 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1207 if (!dwarf)
1208 return std::nullopt;
1209 }
1210
1211 return dwarf->getDILineInfo(offset, sectionIndex);
1212}
1213
1214void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1215 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1216 if (!p)
1217 return;
1218 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1219 if (!it.second)
1220 return; // already scheduled for load
1221 symtab.ctx.driver.enqueuePDB(Path: *p);
1222}
1223
1224ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1225 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1226 live(!ctx.config.doGC) {}
1227
1228MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1229 uint16_t machine =
1230 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1231 return MachineTypes(machine);
1232}
1233
1234bool ImportFile::isSameImport(const ImportFile *other) const {
1235 if (!externalName.empty())
1236 return other->externalName == externalName;
1237 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1238}
1239
1240ImportThunkChunk *ImportFile::makeImportThunk() {
1241 switch (hdr->Machine) {
1242 case AMD64:
1243 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1244 case I386:
1245 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1246 case ARM64:
1247 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1248 case ARMNT:
1249 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1250 }
1251 llvm_unreachable("unknown machine type");
1252}
1253
1254void ImportFile::parse() {
1255 const auto *hdr =
1256 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1257
1258 // Check if the total size is valid.
1259 if (mb.getBufferSize() < sizeof(*hdr) ||
1260 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1261 Fatal(ctx&: symtab.ctx) << "broken import library";
1262
1263 // Read names and create an __imp_ symbol.
1264 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1265 auto split = buf.split(Separator: '\0');
1266 buf = split.second;
1267 StringRef name;
1268 if (isArm64EC(Machine: hdr->Machine)) {
1269 if (std::optional<std::string> demangledName =
1270 getArm64ECDemangledFunctionName(Name: split.first))
1271 name = saver().save(S: *demangledName);
1272 }
1273 if (name.empty())
1274 name = saver().save(S: split.first);
1275 StringRef impName = saver().save(S: "__imp_" + name);
1276 dllName = buf.split(Separator: '\0').first;
1277 StringRef extName;
1278 switch (hdr->getNameType()) {
1279 case IMPORT_ORDINAL:
1280 extName = "";
1281 break;
1282 case IMPORT_NAME:
1283 extName = name;
1284 break;
1285 case IMPORT_NAME_NOPREFIX:
1286 extName = ltrim1(s: name, chars: "?@_");
1287 break;
1288 case IMPORT_NAME_UNDECORATE:
1289 extName = ltrim1(s: name, chars: "?@_");
1290 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1291 break;
1292 case IMPORT_NAME_EXPORTAS:
1293 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1294 break;
1295 }
1296
1297 this->hdr = hdr;
1298 externalName = extName;
1299
1300 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1301
1302 if (!symtab.isEC()) {
1303 impSym = symtab.addImportData(n: impName, f: this, location);
1304 } else {
1305 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1306 // which holds addresses that are guaranteed to be callable directly from
1307 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1308 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1309 // data imports, the naming is reversed.
1310 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1311 if (isCode) {
1312 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1313 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1314 } else {
1315 impSym = symtab.addImportData(n: impName, f: this, location);
1316 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1317 }
1318 if (!impECSym)
1319 return;
1320
1321 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1322 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1323 if (!auxImpCopySym)
1324 return;
1325 }
1326 // If this was a duplicate, we logged an error but may continue;
1327 // in this case, impSym is nullptr.
1328 if (!impSym)
1329 return;
1330
1331 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1332 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1333
1334 // If type is function, we need to create a thunk which jump to an
1335 // address pointed by the __imp_ symbol. (This allows you to call
1336 // DLL functions just like regular non-DLL functions.)
1337 if (isCode) {
1338 if (!symtab.isEC()) {
1339 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1340 } else {
1341 thunkSym = symtab.addImportThunk(
1342 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1343
1344 if (std::optional<std::string> mangledName =
1345 getArm64ECMangledFunctionName(Name: name)) {
1346 StringRef auxThunkName = saver().save(S: *mangledName);
1347 auxThunkSym = symtab.addImportThunk(
1348 name: auxThunkName, s: impECSym,
1349 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1350 }
1351
1352 StringRef impChkName = saver().save(S: "__impchk_" + name);
1353 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1354 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1355 symtab.ctx.driver.pullArm64ECIcallHelper();
1356 }
1357 }
1358}
1359
1360BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1361 std::unique_ptr<lto::InputFile> &o, bool lazy)
1362 : InputFile(symtab, BitcodeKind, mb, lazy) {
1363 obj.swap(u&: o);
1364}
1365
1366BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1367 StringRef archiveName,
1368 uint64_t offsetInArchive, bool lazy) {
1369 std::string path = mb.getBufferIdentifier().str();
1370 if (ctx.config.thinLTOIndexOnly)
1371 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1372 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1373 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1374
1375 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1376 // name. If two archives define two members with the same name, this
1377 // causes a collision which result in only one of the objects being taken
1378 // into consideration at LTO time (which very likely causes undefined
1379 // symbols later in the link stage). So we append file offset to make
1380 // filename unique.
1381 MemoryBufferRef mbref(mb.getBuffer(),
1382 saver().save(S: archiveName.empty()
1383 ? path
1384 : archiveName +
1385 sys::path::filename(path) +
1386 utostr(X: offsetInArchive)));
1387
1388 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1389 obj->setArchivePathAndName(Path: archiveName, Name: mb.getBufferIdentifier());
1390 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1391 args&: lazy);
1392}
1393
1394BitcodeFile::~BitcodeFile() = default;
1395
1396void BitcodeFile::parse() {
1397 llvm::StringSaver &saver = lld::saver();
1398
1399 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1400 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1401 // FIXME: Check nodeduplicate
1402 comdat[i] =
1403 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1404 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1405 StringRef symName = saver.save(S: objSym.getName());
1406 int comdatIndex = objSym.getComdatIndex();
1407 Symbol *sym;
1408 SectionChunk *fakeSC = nullptr;
1409 if (objSym.isExecutable())
1410 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1411 else
1412 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1413 if (objSym.isUndefined()) {
1414 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1415 if (objSym.isWeak())
1416 sym->deferUndefined = true;
1417 // If one LTO object file references (i.e. has an undefined reference to)
1418 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1419 // as unprefixed but with a dllimport attribute instead, and doesn't
1420 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1421 //
1422 // For such cases, mark the symbol as used in a regular object (i.e. the
1423 // symbol must be retained) so that the linker can associate the
1424 // references in the end. If the symbol is defined in an import library
1425 // or in a regular object file, this has no effect, but if it is defined
1426 // in another LTO object file, this makes sure it is kept, to fulfill
1427 // the reference when linking the output of the LTO compilation.
1428 if (symName.starts_with(Prefix: "__imp_"))
1429 sym->isUsedInRegularObj = true;
1430 } else if (objSym.isCommon()) {
1431 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1432 } else if (objSym.isWeak() && objSym.isIndirect()) {
1433 // Weak external.
1434 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1435 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1436 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1437 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1438 } else if (comdatIndex != -1) {
1439 if (symName == obj->getComdatTable()[comdatIndex].first) {
1440 sym = comdat[comdatIndex].first;
1441 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1442 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1443 } else if (comdat[comdatIndex].second) {
1444 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1445 } else {
1446 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1447 }
1448 } else {
1449 sym =
1450 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1451 }
1452 symbols.push_back(x: sym);
1453 if (objSym.isUsed())
1454 symtab.ctx.config.gcroot.push_back(x: sym);
1455 }
1456 directives = saver.save(S: obj->getCOFFLinkerOpts());
1457}
1458
1459void BitcodeFile::parseLazy() {
1460 for (const lto::InputFile::Symbol &sym : obj->symbols())
1461 if (!sym.isUndefined()) {
1462 symtab.addLazyObject(f: this, n: sym.getName());
1463 if (!lazy)
1464 return;
1465 }
1466}
1467
1468MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1469 Triple t(obj->getTargetTriple());
1470 switch (t.getArch()) {
1471 case Triple::x86_64:
1472 return AMD64;
1473 case Triple::x86:
1474 return I386;
1475 case Triple::arm:
1476 case Triple::thumb:
1477 return ARMNT;
1478 case Triple::aarch64:
1479 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1480 default:
1481 return IMAGE_FILE_MACHINE_UNKNOWN;
1482 }
1483}
1484
1485std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1486 StringRef repl) {
1487 if (path.consume_back(Suffix: suffix))
1488 return (path + repl).str();
1489 return std::string(path);
1490}
1491
1492static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1493 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1494 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1495 if (rva >= sec->VirtualAddress &&
1496 rva <= sec->VirtualAddress + sec->VirtualSize) {
1497 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1498 }
1499 }
1500 return false;
1501}
1502
1503void DLLFile::parse() {
1504 // Parse a memory buffer as a PE-COFF executable.
1505 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1506
1507 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1508 bin.release();
1509 coffObj.reset(p: obj);
1510 } else {
1511 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1512 return;
1513 }
1514
1515 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1516 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1517 return;
1518 }
1519
1520 for (const auto &exp : coffObj->export_directories()) {
1521 StringRef dllName, symbolName;
1522 uint32_t exportRVA;
1523 checkError(e: exp.getDllName(Result&: dllName));
1524 checkError(e: exp.getSymbolName(Result&: symbolName));
1525 checkError(e: exp.getExportRVA(Result&: exportRVA));
1526
1527 if (symbolName.empty())
1528 continue;
1529
1530 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1531
1532 Symbol *s = make<Symbol>();
1533 s->dllName = dllName;
1534 s->symbolName = symbolName;
1535 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1536 s->nameType = ImportNameType::IMPORT_NAME;
1537
1538 if (coffObj->getMachine() == I386) {
1539 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1540 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1541 }
1542
1543 StringRef impName = saver().save(S: "__imp_" + symbolName);
1544 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1545 if (code)
1546 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1547 if (symtab.isEC()) {
1548 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1549 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1550
1551 if (code) {
1552 std::optional<std::string> mangledName =
1553 getArm64ECMangledFunctionName(Name: symbolName);
1554 if (mangledName)
1555 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1556 }
1557 }
1558 }
1559}
1560
1561MachineTypes DLLFile::getMachineType() const {
1562 if (coffObj)
1563 return static_cast<MachineTypes>(coffObj->getMachine());
1564 return IMAGE_FILE_MACHINE_UNKNOWN;
1565}
1566
1567void DLLFile::makeImport(DLLFile::Symbol *s) {
1568 if (!seen.insert(key: s->symbolName).second)
1569 return;
1570
1571 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1572 size_t size = sizeof(coff_import_header) + impSize;
1573 char *buf = bAlloc().Allocate<char>(Num: size);
1574 memset(s: buf, c: 0, n: size);
1575 char *p = buf;
1576 auto *imp = reinterpret_cast<coff_import_header *>(p);
1577 p += sizeof(*imp);
1578 imp->Sig2 = 0xFFFF;
1579 imp->Machine = coffObj->getMachine();
1580 imp->SizeOfData = impSize;
1581 imp->OrdinalHint = 0; // Only linking by name
1582 imp->TypeInfo = (s->nameType << 2) | s->importType;
1583
1584 // Write symbol name and DLL name.
1585 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1586 p += s->symbolName.size() + 1;
1587 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1588 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1589 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1590 symtab.ctx.driver.addFile(file: impFile);
1591}
1592