1//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10
11#include "llvm/ADT/ArrayRef.h"
12#include "llvm/DebugInfo/PDB/Native/Hash.h"
13#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14#include "llvm/Support/BinaryStreamWriter.h"
15#include "llvm/Support/Endian.h"
16#include "llvm/Support/TimeProfiler.h"
17
18using namespace llvm;
19using namespace llvm::msf;
20using namespace llvm::support;
21using namespace llvm::support::endian;
22using namespace llvm::pdb;
23
24StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
25 : Table(&Table) {}
26
27uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
28 // The reference implementation doesn't include code for /src/headerblock
29 // handling, but it can only read natvis entries lld's PDB files if
30 // this hash function truncates the hash to 16 bit.
31 // PDB/include/misc.h in the reference implementation has a hashSz() function
32 // that returns an unsigned short, that seems what's being used for
33 // /src/headerblock.
34 return static_cast<uint16_t>(Table->getIdForString(S));
35}
36
37StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
38 return Table->getStringForId(Id: Offset);
39}
40
41uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
42 return Table->insert(S);
43}
44
45uint32_t PDBStringTableBuilder::insert(StringRef S) {
46 return Strings.insert(S);
47}
48
49uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
50 return Strings.getIdForString(S);
51}
52
53StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
54 return Strings.getStringForId(Id);
55}
56
57static uint32_t computeBucketCount(uint32_t NumStrings) {
58 // This is a precomputed list of Buckets given the specified number of
59 // strings. Matching the reference algorithm exactly is not strictly
60 // necessary for correctness, but it helps when comparing LLD's PDBs with
61 // Microsoft's PDBs so as to eliminate superfluous differences.
62 // The reference implementation does (in nmt.h, NMT::grow()):
63 // unsigned StringCount = 0;
64 // unsigned BucketCount = 1;
65 // fn insert() {
66 // ++StringCount;
67 // if (BucketCount * 3 / 4 < StringCount)
68 // BucketCount = BucketCount * 3 / 2 + 1;
69 // }
70 // This list contains all StringCount, BucketCount pairs where BucketCount was
71 // just incremented. It ends before the first BucketCount entry where
72 // BucketCount * 3 would overflow a 32-bit unsigned int.
73 static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
74 {0, 1},
75 {1, 2},
76 {2, 4},
77 {4, 7},
78 {6, 11},
79 {9, 17},
80 {13, 26},
81 {20, 40},
82 {31, 61},
83 {46, 92},
84 {70, 139},
85 {105, 209},
86 {157, 314},
87 {236, 472},
88 {355, 709},
89 {532, 1064},
90 {799, 1597},
91 {1198, 2396},
92 {1798, 3595},
93 {2697, 5393},
94 {4045, 8090},
95 {6068, 12136},
96 {9103, 18205},
97 {13654, 27308},
98 {20482, 40963},
99 {30723, 61445},
100 {46084, 92168},
101 {69127, 138253},
102 {103690, 207380},
103 {155536, 311071},
104 {233304, 466607},
105 {349956, 699911},
106 {524934, 1049867},
107 {787401, 1574801},
108 {1181101, 2362202},
109 {1771652, 3543304},
110 {2657479, 5314957},
111 {3986218, 7972436},
112 {5979328, 11958655},
113 {8968992, 17937983},
114 {13453488, 26906975},
115 {20180232, 40360463},
116 {30270348, 60540695},
117 {45405522, 90811043},
118 {68108283, 136216565},
119 {102162424, 204324848},
120 {153243637, 306487273},
121 {229865455, 459730910},
122 {344798183, 689596366},
123 {517197275, 1034394550},
124 {775795913, 1551591826},
125 {1163693870, 2327387740}};
126 const auto *Entry = llvm::lower_bound(
127 Range: StringsToBuckets, Value: std::make_pair(x&: NumStrings, y: 0U), C: llvm::less_first());
128 assert(Entry != std::end(StringsToBuckets));
129 return Entry->second;
130}
131
132uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134 Size += sizeof(uint32_t) * computeBucketCount(NumStrings: Strings.size());
135
136 return Size;
137}
138
139uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
140 uint32_t Size = 0;
141 Size += sizeof(PDBStringTableHeader);
142 Size += Strings.calculateSerializedSize();
143 Size += calculateHashTableSize();
144 Size += sizeof(uint32_t); // The /names stream ends with the string count.
145 return Size;
146}
147
148void PDBStringTableBuilder::setStrings(
149 const codeview::DebugStringTableSubsection &Strings) {
150 this->Strings = Strings;
151}
152
153Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
154 // Write a header
155 PDBStringTableHeader H;
156 H.Signature = PDBStringTableSignature;
157 H.HashVersion = 1;
158 H.ByteSize = Strings.calculateSerializedSize();
159 if (auto EC = Writer.writeObject(Obj: H))
160 return EC;
161 assert(Writer.bytesRemaining() == 0);
162 return Error::success();
163}
164
165Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166 if (auto EC = Strings.commit(Writer))
167 return EC;
168
169 assert(Writer.bytesRemaining() == 0);
170 return Error::success();
171}
172
173Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174 // Write a hash table.
175 uint32_t BucketCount = computeBucketCount(NumStrings: Strings.size());
176 if (auto EC = Writer.writeInteger(Value: BucketCount))
177 return EC;
178 std::vector<ulittle32_t> Buckets(BucketCount);
179
180 for (const auto &Pair : Strings) {
181 StringRef S = Pair.getKey();
182 uint32_t Offset = Pair.getValue();
183 uint32_t Hash = hashStringV1(Str: S);
184
185 for (uint32_t I = 0; I != BucketCount; ++I) {
186 uint32_t Slot = (Hash + I) % BucketCount;
187 if (Buckets[Slot] != 0)
188 continue;
189 Buckets[Slot] = Offset;
190 break;
191 }
192 }
193
194 if (auto EC = Writer.writeArray(Array: ArrayRef<ulittle32_t>(Buckets)))
195 return EC;
196
197 assert(Writer.bytesRemaining() == 0);
198 return Error::success();
199}
200
201Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202 if (auto EC = Writer.writeInteger<uint32_t>(Value: Strings.size()))
203 return EC;
204 assert(Writer.bytesRemaining() == 0);
205 return Error::success();
206}
207
208Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
209 llvm::TimeTraceScope timeScope("Commit strings table");
210 BinaryStreamWriter SectionWriter;
211
212 std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(PDBStringTableHeader));
213 if (auto EC = writeHeader(Writer&: SectionWriter))
214 return EC;
215
216 std::tie(args&: SectionWriter, args&: Writer) =
217 Writer.split(Off: Strings.calculateSerializedSize());
218 if (auto EC = writeStrings(Writer&: SectionWriter))
219 return EC;
220
221 std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: calculateHashTableSize());
222 if (auto EC = writeHashTable(Writer&: SectionWriter))
223 return EC;
224
225 std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(uint32_t));
226 if (auto EC = writeEpilogue(Writer&: SectionWriter))
227 return EC;
228
229 return Error::success();
230}
231