1//===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The data structures defined in this file are based on the reference
10// implementation which is available at
11// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
12//
13// When you are reading the reference source code, you'd find the
14// information below useful.
15//
16// - ppdb1->m_fMinimalDbgInfo seems to be always true.
17// - SMALLBUCKETS macro is defined.
18//
19// The reference doesn't compile, so I learned just by reading code.
20// It's not guaranteed to be correct.
21//
22//===----------------------------------------------------------------------===//
23
24#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
25#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
26#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
28#include "llvm/DebugInfo/PDB/Native/RawError.h"
29#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
30#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
31#include "llvm/Support/BinaryStreamReader.h"
32#include "llvm/Support/Error.h"
33#include <cstdint>
34
35using namespace llvm;
36using namespace llvm::msf;
37using namespace llvm::support;
38using namespace llvm::pdb;
39
40PublicsStream::PublicsStream(std::unique_ptr<MappedBlockStream> Stream)
41 : Stream(std::move(Stream)) {}
42
43PublicsStream::~PublicsStream() = default;
44
45uint32_t PublicsStream::getSymHash() const { return Header->SymHash; }
46uint16_t PublicsStream::getThunkTableSection() const {
47 return Header->ISectThunkTable;
48}
49uint32_t PublicsStream::getThunkTableOffset() const {
50 return Header->OffThunkTable;
51}
52
53// Publics stream contains fixed-size headers and a serialized hash table.
54// This implementation is not complete yet. It reads till the end of the
55// stream so that we verify the stream is at least not corrupted. However,
56// we skip over the hash table which we believe contains information about
57// public symbols.
58Error PublicsStream::reload() {
59 BinaryStreamReader Reader(*Stream);
60
61 // Check stream size.
62 if (Reader.bytesRemaining() <
63 sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader))
64 return make_error<RawError>(Args: raw_error_code::corrupt_file,
65 Args: "Publics Stream does not contain a header.");
66
67 // Read PSGSIHDR struct.
68 if (Reader.readObject(Dest&: Header))
69 return make_error<RawError>(Args: raw_error_code::corrupt_file,
70 Args: "Publics Stream does not contain a header.");
71
72 // Read the hash table.
73 if (auto E = PublicsTable.read(Reader))
74 return E;
75
76 // Something called "address map" follows.
77 uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t);
78 if (auto EC = Reader.readArray(Array&: AddressMap, NumItems: NumAddressMapEntries))
79 return joinErrors(E1: std::move(EC),
80 E2: make_error<RawError>(Args: raw_error_code::corrupt_file,
81 Args: "Could not read an address map."));
82
83 // Something called "thunk map" follows.
84 if (auto EC = Reader.readArray(Array&: ThunkMap, NumItems: Header->NumThunks))
85 return joinErrors(E1: std::move(EC),
86 E2: make_error<RawError>(Args: raw_error_code::corrupt_file,
87 Args: "Could not read a thunk map."));
88
89 // Something called "section map" follows.
90 if (Reader.bytesRemaining() > 0) {
91 if (auto EC = Reader.readArray(Array&: SectionOffsets, NumItems: Header->NumSections))
92 return joinErrors(E1: std::move(EC),
93 E2: make_error<RawError>(Args: raw_error_code::corrupt_file,
94 Args: "Could not read a section map."));
95 }
96
97 if (Reader.bytesRemaining() > 0)
98 return make_error<RawError>(Args: raw_error_code::corrupt_file,
99 Args: "Corrupted publics stream.");
100 return Error::success();
101}
102
103// This is a reimplementation of NearestSym:
104// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
105std::optional<std::pair<codeview::PublicSym32, size_t>>
106PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
107 uint32_t Offset) const {
108 // The address map is sorted by address, so we can use lower_bound to find the
109 // position. Each element is an offset into the symbols for a public symbol.
110 auto It = llvm::lower_bound(
111 Range: AddressMap, Value: std::tuple(Segment, Offset),
112 C: [&](support::ulittle32_t Cur, auto Addr) {
113 auto Sym = Symbols.readRecord(Offset: Cur.value());
114 if (Sym.kind() != codeview::S_PUB32)
115 return false; // stop here, this is most likely corrupted debug info
116
117 auto Psym =
118 codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(
119 Symbol: Sym);
120 if (!Psym) {
121 consumeError(Err: Psym.takeError());
122 return false;
123 }
124
125 return std::tie(args&: Psym->Segment, args&: Psym->Offset) < Addr;
126 });
127
128 if (It == AddressMap.end())
129 return std::nullopt;
130
131 auto Sym = Symbols.readRecord(Offset: It->value());
132 if (Sym.kind() != codeview::S_PUB32)
133 return std::nullopt; // this is most likely corrupted debug info
134
135 auto MaybePsym =
136 codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Symbol: Sym);
137 if (!MaybePsym) {
138 consumeError(Err: MaybePsym.takeError());
139 return std::nullopt;
140 }
141 codeview::PublicSym32 Psym = std::move(*MaybePsym);
142
143 if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset))
144 return std::nullopt;
145
146 std::ptrdiff_t IterOffset = It - AddressMap.begin();
147 return std::pair{Psym, static_cast<size_t>(IterOffset)};
148}
149