1//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/InputFile.h"
10
11#include "llvm/ADT/StringExtras.h"
12#include "llvm/BinaryFormat/Magic.h"
13#include "llvm/DebugInfo/CodeView/CodeView.h"
14#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
16#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
17#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
18#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
19#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
20#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23#include "llvm/DebugInfo/PDB/Native/RawError.h"
24#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25#include "llvm/DebugInfo/PDB/PDB.h"
26#include "llvm/Object/COFF.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/FormatVariadic.h"
29
30using namespace llvm;
31using namespace llvm::codeview;
32using namespace llvm::object;
33using namespace llvm::pdb;
34
35InputFile::InputFile() = default;
36InputFile::~InputFile() = default;
37
38Expected<ModuleDebugStreamRef>
39llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
40 uint32_t Index) {
41 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
42 if (!DbiOrErr)
43 return DbiOrErr.takeError();
44 DbiStream &Dbi = *DbiOrErr;
45 const auto &Modules = Dbi.modules();
46 if (Index >= Modules.getModuleCount())
47 return make_error<RawError>(Args: raw_error_code::index_out_of_bounds,
48 Args: "Invalid module index");
49
50 auto Modi = Modules.getModuleDescriptor(Modi: Index);
51
52 ModuleName = Modi.getModuleName();
53
54 uint16_t ModiStream = Modi.getModuleStreamIndex();
55 if (ModiStream == kInvalidStreamIndex)
56 return make_error<RawError>(Args: raw_error_code::no_stream,
57 Args: "Module stream not present");
58
59 auto ModStreamData = File.createIndexedStream(SN: ModiStream);
60
61 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62 if (auto EC = ModS.reload())
63 return make_error<RawError>(Args: raw_error_code::corrupt_file,
64 Args: "Invalid module stream");
65
66 return std::move(ModS);
67}
68
69Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
70 uint32_t Index) {
71 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
72 if (!DbiOrErr)
73 return DbiOrErr.takeError();
74 DbiStream &Dbi = *DbiOrErr;
75 const auto &Modules = Dbi.modules();
76 auto Modi = Modules.getModuleDescriptor(Modi: Index);
77
78 uint16_t ModiStream = Modi.getModuleStreamIndex();
79 if (ModiStream == kInvalidStreamIndex)
80 return make_error<RawError>(Args: raw_error_code::no_stream,
81 Args: "Module stream not present");
82
83 auto ModStreamData = File.createIndexedStream(SN: ModiStream);
84
85 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86 if (Error Err = ModS.reload())
87 return make_error<RawError>(Args: raw_error_code::corrupt_file,
88 Args: "Invalid module stream");
89
90 return std::move(ModS);
91}
92
93static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
94 StringRef Name,
95 BinaryStreamReader &Reader) {
96 if (Expected<StringRef> NameOrErr = Section.getName()) {
97 if (*NameOrErr != Name)
98 return false;
99 } else {
100 consumeError(Err: NameOrErr.takeError());
101 return false;
102 }
103
104 Expected<StringRef> ContentsOrErr = Section.getContents();
105 if (!ContentsOrErr) {
106 consumeError(Err: ContentsOrErr.takeError());
107 return false;
108 }
109
110 Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);
111 uint32_t Magic;
112 if (Reader.bytesRemaining() < sizeof(uint32_t))
113 return false;
114 cantFail(Err: Reader.readInteger(Dest&: Magic));
115 if (Magic != COFF::DEBUG_SECTION_MAGIC)
116 return false;
117 return true;
118}
119
120static inline bool isDebugSSection(object::SectionRef Section,
121 DebugSubsectionArray &Subsections) {
122 BinaryStreamReader Reader;
123 if (!isCodeViewDebugSubsection(Section, Name: ".debug$S", Reader))
124 return false;
125
126 cantFail(Err: Reader.readArray(Array&: Subsections, Size: Reader.bytesRemaining()));
127 return true;
128}
129
130static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131 BinaryStreamReader Reader;
132 if (!isCodeViewDebugSubsection(Section, Name: ".debug$T", Reader) &&
133 !isCodeViewDebugSubsection(Section, Name: ".debug$P", Reader))
134 return false;
135 cantFail(Err: Reader.readArray(Array&: Types, Size: Reader.bytesRemaining()));
136 return true;
137}
138
139static std::string formatChecksumKind(FileChecksumKind Kind) {
140 switch (Kind) {
141 RETURN_CASE(FileChecksumKind, None, "None");
142 RETURN_CASE(FileChecksumKind, MD5, "MD5");
143 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
144 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
145 }
146 return formatUnknownEnum(Value: Kind);
147}
148
149template <typename... Args>
150static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
151 if (Append)
152 Printer.format(std::forward<Args>(args)...);
153 else
154 Printer.formatLine(std::forward<Args>(args)...);
155}
156
157SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
158 if (!File)
159 return;
160
161 if (File->isPdb())
162 initializeForPdb(Modi: GroupIndex);
163 else {
164 Name = ".debug$S";
165 uint32_t I = 0;
166 for (const auto &S : File->obj().sections()) {
167 DebugSubsectionArray SS;
168 if (!isDebugSSection(Section: S, Subsections&: SS))
169 continue;
170
171 if (!SC.hasChecksums() || !SC.hasStrings())
172 SC.initialize(FragmentRange&: SS);
173
174 if (I == GroupIndex)
175 Subsections = SS;
176
177 if (SC.hasChecksums() && SC.hasStrings())
178 break;
179 }
180 rebuildChecksumMap();
181 }
182}
183
184StringRef SymbolGroup::name() const { return Name; }
185
186void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
187 Subsections = SS;
188}
189
190void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
191
192void SymbolGroup::initializeForPdb(uint32_t Modi) {
193 assert(File && File->isPdb());
194
195 // PDB always uses the same string table, but each module has its own
196 // checksums. So we only set the strings if they're not already set.
197 if (!SC.hasStrings()) {
198 auto StringTable = File->pdb().getStringTable();
199 if (StringTable)
200 SC.setStrings(StringTable->getStringTable());
201 else
202 consumeError(Err: StringTable.takeError());
203 }
204
205 SC.resetChecksums();
206 auto MDS = getModuleDebugStream(File&: File->pdb(), ModuleName&: Name, Index: Modi);
207 if (!MDS) {
208 consumeError(Err: MDS.takeError());
209 return;
210 }
211
212 DebugStream = std::make_shared<ModuleDebugStreamRef>(args: std::move(*MDS));
213 Subsections = DebugStream->getSubsectionsArray();
214 SC.initialize(FragmentRange&: Subsections);
215 rebuildChecksumMap();
216}
217
218void SymbolGroup::rebuildChecksumMap() {
219 if (!SC.hasChecksums())
220 return;
221
222 for (const auto &Entry : SC.checksums()) {
223 auto S = SC.strings().getString(Offset: Entry.FileNameOffset);
224 if (!S)
225 continue;
226 ChecksumsByFile[*S] = Entry;
227 }
228}
229
230const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
231 assert(File && File->isPdb() && DebugStream);
232 return *DebugStream;
233}
234
235Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
236 return SC.strings().getString(Offset);
237}
238
239Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
240 StringRef Name;
241 if (!SC.hasChecksums()) {
242 return std::move(Name);
243 }
244
245 auto Iter = SC.checksums().getArray().at(Offset);
246 if (Iter == SC.checksums().getArray().end()) {
247 return std::move(Name);
248 }
249
250 uint32_t FO = Iter->FileNameOffset;
251 auto ExpectedFile = getNameFromStringTable(Offset: FO);
252 if (!ExpectedFile) {
253 return std::move(Name);
254 }
255
256 return *ExpectedFile;
257}
258
259void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
260 bool Append) const {
261 auto FC = ChecksumsByFile.find(Key: File);
262 if (FC == ChecksumsByFile.end()) {
263 formatInternal(Printer, Append, args: "- (no checksum) {0}", args&: File);
264 return;
265 }
266
267 formatInternal(Printer, Append, args: "- ({0}: {1}) {2}",
268 args: formatChecksumKind(Kind: FC->getValue().Kind),
269 args: toHex(Input: FC->getValue().Checksum), args&: File);
270}
271
272void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
273 uint32_t Offset,
274 bool Append) const {
275 if (!SC.hasChecksums()) {
276 formatInternal(Printer, Append, args: "(unknown file name offset {0})", args&: Offset);
277 return;
278 }
279
280 auto Iter = SC.checksums().getArray().at(Offset);
281 if (Iter == SC.checksums().getArray().end()) {
282 formatInternal(Printer, Append, args: "(unknown file name offset {0})", args&: Offset);
283 return;
284 }
285
286 uint32_t FO = Iter->FileNameOffset;
287 auto ExpectedFile = getNameFromStringTable(Offset: FO);
288 if (!ExpectedFile) {
289 formatInternal(Printer, Append, args: "(unknown file name offset {0})", args&: Offset);
290 consumeError(Err: ExpectedFile.takeError());
291 return;
292 }
293 if (Iter->Kind == FileChecksumKind::None) {
294 formatInternal(Printer, Append, args: "{0} (no checksum)", args&: *ExpectedFile);
295 } else {
296 formatInternal(Printer, Append, args: "{0} ({1}: {2})", args&: *ExpectedFile,
297 args: formatChecksumKind(Kind: Iter->Kind), args: toHex(Input: Iter->Checksum));
298 }
299}
300
301Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
302 InputFile IF;
303 if (!llvm::sys::fs::exists(Path))
304 return make_error<StringError>(Args: formatv(Fmt: "File {0} not found", Vals&: Path),
305 Args: inconvertibleErrorCode());
306
307 file_magic Magic;
308 if (auto EC = identify_magic(path: Path, result&: Magic))
309 return make_error<StringError>(
310 Args: formatv(Fmt: "Unable to identify file type for file {0}", Vals&: Path), Args&: EC);
311
312 if (Magic == file_magic::coff_object) {
313 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
314 if (!BinaryOrErr)
315 return BinaryOrErr.takeError();
316
317 IF.CoffObject = std::move(*BinaryOrErr);
318 IF.PdbOrObj = llvm::cast<COFFObjectFile>(Val: IF.CoffObject.getBinary());
319 return std::move(IF);
320 }
321
322 if (Magic == file_magic::pdb) {
323 std::unique_ptr<IPDBSession> Session;
324 if (auto Err = loadDataForPDB(Type: PDB_ReaderType::Native, Path, Session))
325 return std::move(Err);
326
327 IF.PdbSession.reset(p: static_cast<NativeSession *>(Session.release()));
328 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
329
330 return std::move(IF);
331 }
332
333 if (!AllowUnknownFile)
334 return make_error<StringError>(
335 Args: formatv(Fmt: "File {0} is not a supported file type", Vals&: Path),
336 Args: inconvertibleErrorCode());
337
338 auto Result = MemoryBuffer::getFile(Filename: Path, /*IsText=*/false,
339 /*RequiresNullTerminator=*/false);
340 if (!Result)
341 return make_error<StringError>(
342 Args: formatv(Fmt: "File {0} could not be opened", Vals&: Path), Args: Result.getError());
343
344 IF.UnknownFile = std::move(*Result);
345 IF.PdbOrObj = IF.UnknownFile.get();
346 return std::move(IF);
347}
348
349PDBFile &InputFile::pdb() {
350 assert(isPdb());
351 return *cast<PDBFile *>(Val&: PdbOrObj);
352}
353
354const PDBFile &InputFile::pdb() const {
355 assert(isPdb());
356 return *cast<PDBFile *>(Val: PdbOrObj);
357}
358
359object::COFFObjectFile &InputFile::obj() {
360 assert(isObj());
361 return *cast<object::COFFObjectFile *>(Val&: PdbOrObj);
362}
363
364const object::COFFObjectFile &InputFile::obj() const {
365 assert(isObj());
366 return *cast<object::COFFObjectFile *>(Val: PdbOrObj);
367}
368
369MemoryBuffer &InputFile::unknown() {
370 assert(isUnknown());
371 return *cast<MemoryBuffer *>(Val&: PdbOrObj);
372}
373
374const MemoryBuffer &InputFile::unknown() const {
375 assert(isUnknown());
376 return *cast<MemoryBuffer *>(Val: PdbOrObj);
377}
378
379StringRef InputFile::getFilePath() const {
380 if (isPdb())
381 return pdb().getFilePath();
382 if (isObj())
383 return obj().getFileName();
384 assert(isUnknown());
385 return unknown().getBufferIdentifier();
386}
387
388bool InputFile::hasTypes() const {
389 if (isPdb())
390 return pdb().hasPDBTpiStream();
391
392 for (const auto &Section : obj().sections()) {
393 CVTypeArray Types;
394 if (isDebugTSection(Section, Types))
395 return true;
396 }
397 return false;
398}
399
400bool InputFile::hasIds() const {
401 if (isObj())
402 return false;
403 return pdb().hasPDBIpiStream();
404}
405
406bool InputFile::isPdb() const { return isa<PDBFile *>(Val: PdbOrObj); }
407
408bool InputFile::isObj() const {
409 return isa<object::COFFObjectFile *>(Val: PdbOrObj);
410}
411
412bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(Val: PdbOrObj); }
413
414codeview::LazyRandomTypeCollection &
415InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416 if (Types && Kind == kTypes)
417 return *Types;
418 if (Ids && Kind == kIds)
419 return *Ids;
420
421 if (Kind == kIds) {
422 assert(isPdb() && pdb().hasPDBIpiStream());
423 }
424
425 // If the collection was already initialized, we should have just returned it
426 // in step 1.
427 if (isPdb()) {
428 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429 auto &Stream = cantFail(ValOrErr: (Kind == kIds) ? pdb().getPDBIpiStream()
430 : pdb().getPDBTpiStream());
431
432 auto &Array = Stream.typeArray();
433 uint32_t Count = Stream.getNumTypeRecords();
434 auto Offsets = Stream.getTypeIndexOffsets();
435 Collection =
436 std::make_unique<LazyRandomTypeCollection>(args: Array, args&: Count, args&: Offsets);
437 return *Collection;
438 }
439
440 assert(isObj());
441 assert(Kind == kTypes);
442 assert(!Types);
443
444 for (const auto &Section : obj().sections()) {
445 CVTypeArray Records;
446 if (!isDebugTSection(Section, Types&: Records))
447 continue;
448
449 Types = std::make_unique<LazyRandomTypeCollection>(args&: Records, args: 100);
450 return *Types;
451 }
452
453 Types = std::make_unique<LazyRandomTypeCollection>(args: 100);
454 return *Types;
455}
456
457codeview::LazyRandomTypeCollection &InputFile::types() {
458 return getOrCreateTypeCollection(Kind: kTypes);
459}
460
461codeview::LazyRandomTypeCollection &InputFile::ids() {
462 // Object files have only one type stream that contains both types and ids.
463 // Similarly, some PDBs don't contain an IPI stream, and for those both types
464 // and IDs are in the same stream.
465 if (isObj() || !pdb().hasPDBIpiStream())
466 return types();
467
468 return getOrCreateTypeCollection(Kind: kIds);
469}
470
471iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
472 return make_range<SymbolGroupIterator>(x: symbol_groups_begin(),
473 y: symbol_groups_end());
474}
475
476SymbolGroupIterator InputFile::symbol_groups_begin() {
477 return SymbolGroupIterator(*this);
478}
479
480SymbolGroupIterator InputFile::symbol_groups_end() {
481 return SymbolGroupIterator();
482}
483
484SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
485
486SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
487 if (File.isObj()) {
488 SectionIter = File.obj().section_begin();
489 scanToNextDebugS();
490 }
491}
492
493bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
494 bool E = isEnd();
495 bool RE = R.isEnd();
496 if (E || RE)
497 return E == RE;
498
499 if (Value.File != R.Value.File)
500 return false;
501 return Index == R.Index;
502}
503
504const SymbolGroup &SymbolGroupIterator::operator*() const {
505 assert(!isEnd());
506 return Value;
507}
508SymbolGroup &SymbolGroupIterator::operator*() {
509 assert(!isEnd());
510 return Value;
511}
512
513SymbolGroupIterator &SymbolGroupIterator::operator++() {
514 assert(Value.File && !isEnd());
515 ++Index;
516 if (isEnd())
517 return *this;
518
519 if (Value.File->isPdb()) {
520 Value.updatePdbModi(Modi: Index);
521 return *this;
522 }
523
524 scanToNextDebugS();
525 return *this;
526}
527
528void SymbolGroupIterator::scanToNextDebugS() {
529 assert(SectionIter);
530 auto End = Value.File->obj().section_end();
531 auto &Iter = *SectionIter;
532 assert(!isEnd());
533
534 while (++Iter != End) {
535 DebugSubsectionArray SS;
536 SectionRef SR = *Iter;
537 if (!isDebugSSection(Section: SR, Subsections&: SS))
538 continue;
539
540 Value.updateDebugS(SS);
541 return;
542 }
543}
544
545bool SymbolGroupIterator::isEnd() const {
546 if (!Value.File)
547 return true;
548 if (Value.File->isPdb()) {
549 DbiStream &Dbi = cantFail(ValOrErr: Value.File->pdb().getPDBDbiStream());
550 uint32_t Count = Dbi.modules().getModuleCount();
551 assert(Index <= Count);
552 return Index == Count;
553 }
554
555 assert(SectionIter);
556 return *SectionIter == Value.File->obj().section_end();
557}
558
559static bool isMyCode(const SymbolGroup &Group) {
560 if (Group.getFile().isObj())
561 return true;
562
563 StringRef Name = Group.name();
564 if (Name.starts_with(Prefix: "Import:"))
565 return false;
566 if (Name.ends_with_insensitive(Suffix: ".dll"))
567 return false;
568 if (Name.equals_insensitive(RHS: "* linker *"))
569 return false;
570 if (Name.starts_with_insensitive(Prefix: "f:\\binaries\\Intermediate\\vctools"))
571 return false;
572 if (Name.starts_with_insensitive(Prefix: "f:\\dd\\vctools\\crt"))
573 return false;
574 return true;
575}
576
577bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
578 const FilterOptions &Filters) {
579 if (Filters.JustMyCode && !isMyCode(Group))
580 return false;
581
582 // If the arg was not specified on the command line, always dump all modules.
583 if (!Filters.DumpModi)
584 return true;
585
586 // Otherwise, only dump if this is the same module specified.
587 return (Filters.DumpModi == Idx);
588}
589