| 1 | //===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The data structures defined in this file are based on the reference |
| 10 | // implementation which is available at |
| 11 | // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h |
| 12 | // |
| 13 | // When you are reading the reference source code, you'd find the |
| 14 | // information below useful. |
| 15 | // |
| 16 | // - ppdb1->m_fMinimalDbgInfo seems to be always true. |
| 17 | // - SMALLBUCKETS macro is defined. |
| 18 | // |
| 19 | // The reference doesn't compile, so I learned just by reading code. |
| 20 | // It's not guaranteed to be correct. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" |
| 25 | #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" |
| 26 | #include "llvm/DebugInfo/CodeView/SymbolRecord.h" |
| 27 | #include "llvm/DebugInfo/MSF/MappedBlockStream.h" |
| 28 | #include "llvm/DebugInfo/PDB/Native/RawError.h" |
| 29 | #include "llvm/DebugInfo/PDB/Native/RawTypes.h" |
| 30 | #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" |
| 31 | #include "llvm/Support/BinaryStreamReader.h" |
| 32 | #include "llvm/Support/Error.h" |
| 33 | #include <cstdint> |
| 34 | |
| 35 | using namespace llvm; |
| 36 | using namespace llvm::msf; |
| 37 | using namespace llvm::support; |
| 38 | using namespace llvm::pdb; |
| 39 | |
| 40 | PublicsStream::PublicsStream(std::unique_ptr<MappedBlockStream> Stream) |
| 41 | : Stream(std::move(Stream)) {} |
| 42 | |
| 43 | PublicsStream::~PublicsStream() = default; |
| 44 | |
| 45 | uint32_t PublicsStream::getSymHash() const { return Header->SymHash; } |
| 46 | uint16_t PublicsStream::getThunkTableSection() const { |
| 47 | return Header->ISectThunkTable; |
| 48 | } |
| 49 | uint32_t PublicsStream::getThunkTableOffset() const { |
| 50 | return Header->OffThunkTable; |
| 51 | } |
| 52 | |
| 53 | // Publics stream contains fixed-size headers and a serialized hash table. |
| 54 | // This implementation is not complete yet. It reads till the end of the |
| 55 | // stream so that we verify the stream is at least not corrupted. However, |
| 56 | // we skip over the hash table which we believe contains information about |
| 57 | // public symbols. |
| 58 | Error PublicsStream::reload() { |
| 59 | BinaryStreamReader Reader(*Stream); |
| 60 | |
| 61 | // Check stream size. |
| 62 | if (Reader.bytesRemaining() < |
| 63 | sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader)) |
| 64 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 65 | Args: "Publics Stream does not contain a header." ); |
| 66 | |
| 67 | // Read PSGSIHDR struct. |
| 68 | if (Reader.readObject(Dest&: Header)) |
| 69 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 70 | Args: "Publics Stream does not contain a header." ); |
| 71 | |
| 72 | // Read the hash table. |
| 73 | if (auto E = PublicsTable.read(Reader)) |
| 74 | return E; |
| 75 | |
| 76 | // Something called "address map" follows. |
| 77 | uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t); |
| 78 | if (auto EC = Reader.readArray(Array&: AddressMap, NumItems: NumAddressMapEntries)) |
| 79 | return joinErrors(E1: std::move(EC), |
| 80 | E2: make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 81 | Args: "Could not read an address map." )); |
| 82 | |
| 83 | // Something called "thunk map" follows. |
| 84 | if (auto EC = Reader.readArray(Array&: ThunkMap, NumItems: Header->NumThunks)) |
| 85 | return joinErrors(E1: std::move(EC), |
| 86 | E2: make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 87 | Args: "Could not read a thunk map." )); |
| 88 | |
| 89 | // Something called "section map" follows. |
| 90 | if (Reader.bytesRemaining() > 0) { |
| 91 | if (auto EC = Reader.readArray(Array&: SectionOffsets, NumItems: Header->NumSections)) |
| 92 | return joinErrors(E1: std::move(EC), |
| 93 | E2: make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 94 | Args: "Could not read a section map." )); |
| 95 | } |
| 96 | |
| 97 | if (Reader.bytesRemaining() > 0) |
| 98 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
| 99 | Args: "Corrupted publics stream." ); |
| 100 | return Error::success(); |
| 101 | } |
| 102 | |
| 103 | // This is a reimplementation of NearestSym: |
| 104 | // https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 |
| 105 | std::optional<std::pair<codeview::PublicSym32, size_t>> |
| 106 | PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, |
| 107 | uint32_t Offset) const { |
| 108 | // The address map is sorted by address, so we can use lower_bound to find the |
| 109 | // position. Each element is an offset into the symbols for a public symbol. |
| 110 | auto It = llvm::lower_bound( |
| 111 | Range: AddressMap, Value: std::tuple(Segment, Offset), |
| 112 | C: [&](support::ulittle32_t Cur, auto Addr) { |
| 113 | auto Sym = Symbols.readRecord(Offset: Cur.value()); |
| 114 | if (Sym.kind() != codeview::S_PUB32) |
| 115 | return false; // stop here, this is most likely corrupted debug info |
| 116 | |
| 117 | auto Psym = |
| 118 | codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>( |
| 119 | Symbol: Sym); |
| 120 | if (!Psym) { |
| 121 | consumeError(Err: Psym.takeError()); |
| 122 | return false; |
| 123 | } |
| 124 | |
| 125 | return std::tie(args&: Psym->Segment, args&: Psym->Offset) < Addr; |
| 126 | }); |
| 127 | |
| 128 | if (It == AddressMap.end()) |
| 129 | return std::nullopt; |
| 130 | |
| 131 | auto Sym = Symbols.readRecord(Offset: It->value()); |
| 132 | if (Sym.kind() != codeview::S_PUB32) |
| 133 | return std::nullopt; // this is most likely corrupted debug info |
| 134 | |
| 135 | auto MaybePsym = |
| 136 | codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Symbol: Sym); |
| 137 | if (!MaybePsym) { |
| 138 | consumeError(Err: MaybePsym.takeError()); |
| 139 | return std::nullopt; |
| 140 | } |
| 141 | codeview::PublicSym32 Psym = std::move(*MaybePsym); |
| 142 | |
| 143 | if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset)) |
| 144 | return std::nullopt; |
| 145 | |
| 146 | std::ptrdiff_t IterOffset = It - AddressMap.begin(); |
| 147 | return std::pair{Psym, static_cast<size_t>(IterOffset)}; |
| 148 | } |
| 149 | |