1 | //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" |
10 | |
11 | #include "llvm/ADT/ArrayRef.h" |
12 | #include "llvm/DebugInfo/PDB/Native/Hash.h" |
13 | #include "llvm/DebugInfo/PDB/Native/RawTypes.h" |
14 | #include "llvm/Support/BinaryStreamWriter.h" |
15 | #include "llvm/Support/Endian.h" |
16 | #include "llvm/Support/TimeProfiler.h" |
17 | |
18 | #include <map> |
19 | |
20 | using namespace llvm; |
21 | using namespace llvm::msf; |
22 | using namespace llvm::support; |
23 | using namespace llvm::support::endian; |
24 | using namespace llvm::pdb; |
25 | |
26 | StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) |
27 | : Table(&Table) {} |
28 | |
29 | uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { |
30 | // The reference implementation doesn't include code for /src/headerblock |
31 | // handling, but it can only read natvis entries lld's PDB files if |
32 | // this hash function truncates the hash to 16 bit. |
33 | // PDB/include/misc.h in the reference implementation has a hashSz() function |
34 | // that returns an unsigned short, that seems what's being used for |
35 | // /src/headerblock. |
36 | return static_cast<uint16_t>(Table->getIdForString(S)); |
37 | } |
38 | |
39 | StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { |
40 | return Table->getStringForId(Id: Offset); |
41 | } |
42 | |
43 | uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { |
44 | return Table->insert(S); |
45 | } |
46 | |
47 | uint32_t PDBStringTableBuilder::insert(StringRef S) { |
48 | return Strings.insert(S); |
49 | } |
50 | |
51 | uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { |
52 | return Strings.getIdForString(S); |
53 | } |
54 | |
55 | StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { |
56 | return Strings.getStringForId(Id); |
57 | } |
58 | |
59 | static uint32_t computeBucketCount(uint32_t NumStrings) { |
60 | // This is a precomputed list of Buckets given the specified number of |
61 | // strings. Matching the reference algorithm exactly is not strictly |
62 | // necessary for correctness, but it helps when comparing LLD's PDBs with |
63 | // Microsoft's PDBs so as to eliminate superfluous differences. |
64 | // The reference implementation does (in nmt.h, NMT::grow()): |
65 | // unsigned StringCount = 0; |
66 | // unsigned BucketCount = 1; |
67 | // fn insert() { |
68 | // ++StringCount; |
69 | // if (BucketCount * 3 / 4 < StringCount) |
70 | // BucketCount = BucketCount * 3 / 2 + 1; |
71 | // } |
72 | // This list contains all StringCount, BucketCount pairs where BucketCount was |
73 | // just incremented. It ends before the first BucketCount entry where |
74 | // BucketCount * 3 would overflow a 32-bit unsigned int. |
75 | static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = { |
76 | {0, 1}, |
77 | {1, 2}, |
78 | {2, 4}, |
79 | {4, 7}, |
80 | {6, 11}, |
81 | {9, 17}, |
82 | {13, 26}, |
83 | {20, 40}, |
84 | {31, 61}, |
85 | {46, 92}, |
86 | {70, 139}, |
87 | {105, 209}, |
88 | {157, 314}, |
89 | {236, 472}, |
90 | {355, 709}, |
91 | {532, 1064}, |
92 | {799, 1597}, |
93 | {1198, 2396}, |
94 | {1798, 3595}, |
95 | {2697, 5393}, |
96 | {4045, 8090}, |
97 | {6068, 12136}, |
98 | {9103, 18205}, |
99 | {13654, 27308}, |
100 | {20482, 40963}, |
101 | {30723, 61445}, |
102 | {46084, 92168}, |
103 | {69127, 138253}, |
104 | {103690, 207380}, |
105 | {155536, 311071}, |
106 | {233304, 466607}, |
107 | {349956, 699911}, |
108 | {524934, 1049867}, |
109 | {787401, 1574801}, |
110 | {1181101, 2362202}, |
111 | {1771652, 3543304}, |
112 | {2657479, 5314957}, |
113 | {3986218, 7972436}, |
114 | {5979328, 11958655}, |
115 | {8968992, 17937983}, |
116 | {13453488, 26906975}, |
117 | {20180232, 40360463}, |
118 | {30270348, 60540695}, |
119 | {45405522, 90811043}, |
120 | {68108283, 136216565}, |
121 | {102162424, 204324848}, |
122 | {153243637, 306487273}, |
123 | {229865455, 459730910}, |
124 | {344798183, 689596366}, |
125 | {517197275, 1034394550}, |
126 | {775795913, 1551591826}, |
127 | {1163693870, 2327387740}}; |
128 | const auto *Entry = llvm::lower_bound( |
129 | Range: StringsToBuckets, Value: std::make_pair(x&: NumStrings, y: 0U), C: llvm::less_first()); |
130 | assert(Entry != std::end(StringsToBuckets)); |
131 | return Entry->second; |
132 | } |
133 | |
134 | uint32_t PDBStringTableBuilder::calculateHashTableSize() const { |
135 | uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. |
136 | Size += sizeof(uint32_t) * computeBucketCount(NumStrings: Strings.size()); |
137 | |
138 | return Size; |
139 | } |
140 | |
141 | uint32_t PDBStringTableBuilder::calculateSerializedSize() const { |
142 | uint32_t Size = 0; |
143 | Size += sizeof(PDBStringTableHeader); |
144 | Size += Strings.calculateSerializedSize(); |
145 | Size += calculateHashTableSize(); |
146 | Size += sizeof(uint32_t); // The /names stream ends with the string count. |
147 | return Size; |
148 | } |
149 | |
150 | void PDBStringTableBuilder::setStrings( |
151 | const codeview::DebugStringTableSubsection &Strings) { |
152 | this->Strings = Strings; |
153 | } |
154 | |
155 | Error PDBStringTableBuilder::(BinaryStreamWriter &Writer) const { |
156 | // Write a header |
157 | PDBStringTableHeader H; |
158 | H.Signature = PDBStringTableSignature; |
159 | H.HashVersion = 1; |
160 | H.ByteSize = Strings.calculateSerializedSize(); |
161 | if (auto EC = Writer.writeObject(Obj: H)) |
162 | return EC; |
163 | assert(Writer.bytesRemaining() == 0); |
164 | return Error::success(); |
165 | } |
166 | |
167 | Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { |
168 | if (auto EC = Strings.commit(Writer)) |
169 | return EC; |
170 | |
171 | assert(Writer.bytesRemaining() == 0); |
172 | return Error::success(); |
173 | } |
174 | |
175 | Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { |
176 | // Write a hash table. |
177 | uint32_t BucketCount = computeBucketCount(NumStrings: Strings.size()); |
178 | if (auto EC = Writer.writeInteger(Value: BucketCount)) |
179 | return EC; |
180 | std::vector<ulittle32_t> Buckets(BucketCount); |
181 | |
182 | for (const auto &Pair : Strings) { |
183 | StringRef S = Pair.getKey(); |
184 | uint32_t Offset = Pair.getValue(); |
185 | uint32_t Hash = hashStringV1(Str: S); |
186 | |
187 | for (uint32_t I = 0; I != BucketCount; ++I) { |
188 | uint32_t Slot = (Hash + I) % BucketCount; |
189 | if (Buckets[Slot] != 0) |
190 | continue; |
191 | Buckets[Slot] = Offset; |
192 | break; |
193 | } |
194 | } |
195 | |
196 | if (auto EC = Writer.writeArray(Array: ArrayRef<ulittle32_t>(Buckets))) |
197 | return EC; |
198 | |
199 | assert(Writer.bytesRemaining() == 0); |
200 | return Error::success(); |
201 | } |
202 | |
203 | Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { |
204 | if (auto EC = Writer.writeInteger<uint32_t>(Value: Strings.size())) |
205 | return EC; |
206 | assert(Writer.bytesRemaining() == 0); |
207 | return Error::success(); |
208 | } |
209 | |
210 | Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { |
211 | llvm::TimeTraceScope timeScope("Commit strings table" ); |
212 | BinaryStreamWriter SectionWriter; |
213 | |
214 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(PDBStringTableHeader)); |
215 | if (auto EC = writeHeader(Writer&: SectionWriter)) |
216 | return EC; |
217 | |
218 | std::tie(args&: SectionWriter, args&: Writer) = |
219 | Writer.split(Off: Strings.calculateSerializedSize()); |
220 | if (auto EC = writeStrings(Writer&: SectionWriter)) |
221 | return EC; |
222 | |
223 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: calculateHashTableSize()); |
224 | if (auto EC = writeHashTable(Writer&: SectionWriter)) |
225 | return EC; |
226 | |
227 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(uint32_t)); |
228 | if (auto EC = writeEpilogue(Writer&: SectionWriter)) |
229 | return EC; |
230 | |
231 | return Error::success(); |
232 | } |
233 | |