1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_INPUT_FILES_H
10#define LLD_COFF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/StringSet.h"
18#include "llvm/BinaryFormat/Magic.h"
19#include "llvm/Object/Archive.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Support/StringSaver.h"
22#include <memory>
23#include <set>
24#include <vector>
25
26namespace llvm {
27struct DILineInfo;
28namespace pdb {
29class DbiModuleDescriptorBuilder;
30class NativeSession;
31}
32namespace lto {
33class InputFile;
34}
35}
36
37namespace lld {
38class DWARFCache;
39
40namespace coff {
41class COFFLinkerContext;
42
43const COFFSyncStream &operator<<(const COFFSyncStream &, const InputFile *);
44
45std::vector<MemoryBufferRef> getArchiveMembers(COFFLinkerContext &,
46 llvm::object::Archive *file);
47
48using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
49using llvm::COFF::MachineTypes;
50using llvm::object::Archive;
51using llvm::object::COFFObjectFile;
52using llvm::object::COFFSymbolRef;
53using llvm::object::coff_import_header;
54using llvm::object::coff_section;
55
56class Chunk;
57class Defined;
58class DefinedImportData;
59class DefinedImportThunk;
60class DefinedRegular;
61class ImportThunkChunk;
62class ImportThunkChunkARM64EC;
63class SectionChunk;
64class Symbol;
65class SymbolTable;
66class Undefined;
67class TpiSource;
68
69// The root class of input files.
70class InputFile {
71public:
72 enum Kind {
73 ArchiveKind,
74 ObjectKind,
75 PDBKind,
76 ImportKind,
77 BitcodeKind,
78 DLLKind
79 };
80 Kind kind() const { return fileKind; }
81 virtual ~InputFile() {}
82
83 // Returns the filename.
84 StringRef getName() const { return mb.getBufferIdentifier(); }
85
86 // Reads a file (the constructor doesn't do that).
87 virtual void parse() = 0;
88
89 // Returns the CPU type this file was compiled to.
90 virtual MachineTypes getMachineType() const {
91 return IMAGE_FILE_MACHINE_UNKNOWN;
92 }
93
94 MemoryBufferRef mb;
95
96 // An archive file name if this file is created from an archive.
97 StringRef parentName;
98
99 // Returns .drectve section contents if exist.
100 StringRef getDirectives() { return directives; }
101
102 SymbolTable &symtab;
103
104protected:
105 InputFile(SymbolTable &s, Kind k, MemoryBufferRef m, bool lazy = false)
106 : mb(m), symtab(s), fileKind(k), lazy(lazy) {}
107
108 StringRef directives;
109
110private:
111 const Kind fileKind;
112
113public:
114 // True if this is a lazy ObjFile or BitcodeFile.
115 bool lazy = false;
116};
117
118// .lib or .a file.
119class ArchiveFile : public InputFile {
120public:
121 explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
122 std::unique_ptr<Archive> &f);
123 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
124 void parse() override;
125
126 // Enqueues an archive member load for the given symbol. If we've already
127 // enqueued a load for the same archive member, this function does nothing,
128 // which ensures that we don't load the same member more than once.
129 void addMember(const Archive::Symbol &sym);
130
131private:
132 std::unique_ptr<Archive> file;
133 llvm::DenseSet<uint64_t> seen;
134};
135
136// .obj or .o file. This may be a member of an archive file.
137class ObjFile : public InputFile {
138public:
139 static ObjFile *create(COFFLinkerContext &ctx, MemoryBufferRef mb,
140 bool lazy = false);
141 explicit ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy);
142
143 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
144 void parse() override;
145 void parseLazy();
146 MachineTypes getMachineType() const override;
147 ArrayRef<Chunk *> getChunks() { return chunks; }
148 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
149 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
150 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
151 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
152 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
153 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
154 ArrayRef<Symbol *> getSymbols() { return symbols; }
155
156 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
157
158 ArrayRef<uint8_t> getDebugSection(StringRef secName);
159
160 // Returns a Symbol object for the symbolIndex'th symbol in the
161 // underlying object file.
162 Symbol *getSymbol(uint32_t symbolIndex) {
163 return symbols[symbolIndex];
164 }
165
166 // Returns the underlying COFF file.
167 COFFObjectFile *getCOFFObj() { return coffObj.get(); }
168
169 // Add a symbol for a range extension thunk. Return the new symbol table
170 // index. This index can be used to modify a relocation.
171 uint32_t addRangeThunkSymbol(Symbol *thunk) {
172 symbols.push_back(x: thunk);
173 return symbols.size() - 1;
174 }
175
176 void includeResourceChunks();
177
178 bool isResourceObjFile() const { return !resourceChunks.empty(); }
179
180 // Flags in the absolute @feat.00 symbol if it is present. These usually
181 // indicate if an object was compiled with certain security features enabled
182 // like stack guard, safeseh, /guard:cf, or other things.
183 uint32_t feat00Flags = 0;
184
185 // True if this object file is compatible with SEH. COFF-specific and
186 // x86-only. COFF spec 5.10.1. The .sxdata section.
187 bool hasSafeSEH() { return feat00Flags & 0x1; }
188
189 // True if this file was compiled with /guard:cf.
190 bool hasGuardCF() { return feat00Flags & 0x800; }
191
192 // True if this file was compiled with /guard:ehcont.
193 bool hasGuardEHCont() { return feat00Flags & 0x4000; }
194
195 // Pointer to the PDB module descriptor builder. Various debug info records
196 // will reference object files by "module index", which is here. Things like
197 // source files and section contributions are also recorded here. Will be null
198 // if we are not producing a PDB.
199 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
200
201 const coff_section *addrsigSec = nullptr;
202
203 const coff_section *callgraphSec = nullptr;
204
205 // When using Microsoft precompiled headers, this is the PCH's key.
206 // The same key is used by both the precompiled object, and objects using the
207 // precompiled object. Any difference indicates out-of-date objects.
208 std::optional<uint32_t> pchSignature;
209
210 // Whether this file was compiled with /hotpatch.
211 bool hotPatchable = false;
212
213 // Whether the object was already merged into the final PDB.
214 bool mergedIntoPDB = false;
215
216 // If the OBJ has a .debug$T stream, this tells how it will be handled.
217 TpiSource *debugTypesObj = nullptr;
218
219 // The .debug$P or .debug$T section data if present. Empty otherwise.
220 ArrayRef<uint8_t> debugTypes;
221
222 std::optional<std::pair<StringRef, uint32_t>>
223 getVariableLocation(StringRef var);
224
225 std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
226 uint32_t sectionIndex);
227
228private:
229 const coff_section* getSection(uint32_t i);
230 const coff_section *getSection(COFFSymbolRef sym) {
231 return getSection(i: sym.getSectionNumber());
232 }
233
234 void enqueuePdbFile(StringRef path, ObjFile *fromFile);
235
236 void initializeChunks();
237 void initializeSymbols();
238 void initializeFlags();
239 void initializeDependencies();
240 void initializeECThunks();
241
242 SectionChunk *
243 readSection(uint32_t sectionNumber,
244 const llvm::object::coff_aux_section_definition *def,
245 StringRef leaderName);
246
247 void readAssociativeDefinition(
248 COFFSymbolRef coffSym,
249 const llvm::object::coff_aux_section_definition *def);
250
251 void readAssociativeDefinition(
252 COFFSymbolRef coffSym,
253 const llvm::object::coff_aux_section_definition *def,
254 uint32_t parentSection);
255
256 void recordPrevailingSymbolForMingw(
257 COFFSymbolRef coffSym,
258 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
259
260 void maybeAssociateSEHForMingw(
261 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
262 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
263
264 // Given a new symbol Sym with comdat selection Selection, if the new
265 // symbol is not (yet) Prevailing and the existing comdat leader set to
266 // Leader, emits a diagnostic if the new symbol and its selection doesn't
267 // match the existing symbol and its selection. If either old or new
268 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
269 // the existing leader. In that case, Prevailing is set to true.
270 void
271 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
272 bool &prevailing, DefinedRegular *leader,
273 const llvm::object::coff_aux_section_definition *def);
274
275 std::optional<Symbol *>
276 createDefined(COFFSymbolRef sym,
277 std::vector<const llvm::object::coff_aux_section_definition *>
278 &comdatDefs,
279 bool &prevailingComdat);
280 Symbol *createRegular(COFFSymbolRef sym);
281 Symbol *createUndefined(COFFSymbolRef sym, bool overrideLazy);
282
283 std::unique_ptr<COFFObjectFile> coffObj;
284
285 // List of all chunks defined by this file. This includes both section
286 // chunks and non-section chunks for common symbols.
287 std::vector<Chunk *> chunks;
288
289 std::vector<SectionChunk *> resourceChunks;
290
291 // CodeView debug info sections.
292 std::vector<SectionChunk *> debugChunks;
293
294 // Chunks containing symbol table indices of exception handlers. Only used for
295 // 32-bit x86.
296 std::vector<SectionChunk *> sxDataChunks;
297
298 // Chunks containing symbol table indices of address taken symbols, address
299 // taken IAT entries, longjmp and ehcont targets. These are not linked into
300 // the final binary when /guard:cf is set.
301 std::vector<SectionChunk *> guardFidChunks;
302 std::vector<SectionChunk *> guardIATChunks;
303 std::vector<SectionChunk *> guardLJmpChunks;
304 std::vector<SectionChunk *> guardEHContChunks;
305
306 std::vector<SectionChunk *> hybmpChunks;
307
308 // This vector contains a list of all symbols defined or referenced by this
309 // file. They are indexed such that you can get a Symbol by symbol
310 // index. Nonexistent indices (which are occupied by auxiliary
311 // symbols in the real symbol table) are filled with null pointers.
312 std::vector<Symbol *> symbols;
313
314 // This vector contains the same chunks as Chunks, but they are
315 // indexed such that you can get a SectionChunk by section index.
316 // Nonexistent section indices are filled with null pointers.
317 // (Because section number is 1-based, the first slot is always a
318 // null pointer.) This vector is only valid during initialization.
319 std::vector<SectionChunk *> sparseChunks;
320
321 DWARFCache *dwarf = nullptr;
322};
323
324// This is a PDB type server dependency, that is not a input file per se, but
325// needs to be treated like one. Such files are discovered from the debug type
326// stream.
327class PDBInputFile : public InputFile {
328public:
329 explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
330 ~PDBInputFile();
331 static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
332 void parse() override;
333
334 static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
335 StringRef path, ObjFile *fromFile);
336
337 // Record possible errors while opening the PDB file
338 std::optional<std::string> loadErrorStr;
339
340 // This is the actual interface to the PDB (if it was opened successfully)
341 std::unique_ptr<llvm::pdb::NativeSession> session;
342
343 // If the PDB has a .debug$T stream, this tells how it will be handled.
344 TpiSource *debugTypesObj = nullptr;
345};
346
347// This type represents import library members that contain DLL names
348// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
349// for details about the format.
350class ImportFile : public InputFile {
351public:
352 explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m);
353
354 static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
355 MachineTypes getMachineType() const override { return getMachineType(m: mb); }
356 static MachineTypes getMachineType(MemoryBufferRef m);
357 bool isSameImport(const ImportFile *other) const;
358 bool isEC() const { return impECSym != nullptr; }
359
360 DefinedImportData *impSym = nullptr;
361 Defined *thunkSym = nullptr;
362 ImportThunkChunkARM64EC *impchkThunk = nullptr;
363 ImportFile *hybridFile = nullptr;
364 std::string dllName;
365
366private:
367 void parse() override;
368 ImportThunkChunk *makeImportThunk();
369
370public:
371 StringRef externalName;
372 const coff_import_header *hdr;
373 Chunk *location = nullptr;
374
375 // Auxiliary IAT symbols and chunks on ARM64EC.
376 DefinedImportData *impECSym = nullptr;
377 Chunk *auxLocation = nullptr;
378 Defined *auxThunkSym = nullptr;
379 DefinedImportData *auxImpCopySym = nullptr;
380 Chunk *auxCopyLocation = nullptr;
381
382 // We want to eliminate dllimported symbols if no one actually refers to them.
383 // These "Live" bits are used to keep track of which import library members
384 // are actually in use.
385 //
386 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
387 // symbols provided by this import library member.
388 bool live;
389};
390
391// Used for LTO.
392class BitcodeFile : public InputFile {
393public:
394 explicit BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
395 std::unique_ptr<llvm::lto::InputFile> &obj, bool lazy);
396 ~BitcodeFile();
397
398 static BitcodeFile *create(COFFLinkerContext &ctx, MemoryBufferRef mb,
399 StringRef archiveName, uint64_t offsetInArchive,
400 bool lazy);
401 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
402 ArrayRef<Symbol *> getSymbols() { return symbols; }
403 MachineTypes getMachineType() const override {
404 return getMachineType(obj: obj.get());
405 }
406 static MachineTypes getMachineType(const llvm::lto::InputFile *obj);
407 void parseLazy();
408 std::unique_ptr<llvm::lto::InputFile> obj;
409
410private:
411 void parse() override;
412
413 std::vector<Symbol *> symbols;
414};
415
416// .dll file. MinGW only.
417class DLLFile : public InputFile {
418public:
419 explicit DLLFile(SymbolTable &symtab, MemoryBufferRef m)
420 : InputFile(symtab, DLLKind, m) {}
421 static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
422 void parse() override;
423 MachineTypes getMachineType() const override;
424
425 struct Symbol {
426 StringRef dllName;
427 StringRef symbolName;
428 llvm::COFF::ImportNameType nameType;
429 llvm::COFF::ImportType importType;
430 };
431
432 void makeImport(Symbol *s);
433
434private:
435 std::unique_ptr<COFFObjectFile> coffObj;
436 llvm::StringSet<> seen;
437};
438
439inline bool isBitcode(MemoryBufferRef mb) {
440 return identify_magic(magic: mb.getBuffer()) == llvm::file_magic::bitcode;
441}
442
443std::string replaceThinLTOSuffix(StringRef path, StringRef suffix,
444 StringRef repl);
445} // namespace coff
446
447std::string toString(const coff::InputFile *file);
448} // namespace lld
449
450#endif
451