| 1 | //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" |
| 10 | |
| 11 | #include "llvm/ADT/ArrayRef.h" |
| 12 | #include "llvm/DebugInfo/PDB/Native/Hash.h" |
| 13 | #include "llvm/DebugInfo/PDB/Native/RawTypes.h" |
| 14 | #include "llvm/Support/BinaryStreamWriter.h" |
| 15 | #include "llvm/Support/Endian.h" |
| 16 | #include "llvm/Support/TimeProfiler.h" |
| 17 | |
| 18 | #include <map> |
| 19 | |
| 20 | using namespace llvm; |
| 21 | using namespace llvm::msf; |
| 22 | using namespace llvm::support; |
| 23 | using namespace llvm::support::endian; |
| 24 | using namespace llvm::pdb; |
| 25 | |
| 26 | StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) |
| 27 | : Table(&Table) {} |
| 28 | |
| 29 | uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { |
| 30 | // The reference implementation doesn't include code for /src/headerblock |
| 31 | // handling, but it can only read natvis entries lld's PDB files if |
| 32 | // this hash function truncates the hash to 16 bit. |
| 33 | // PDB/include/misc.h in the reference implementation has a hashSz() function |
| 34 | // that returns an unsigned short, that seems what's being used for |
| 35 | // /src/headerblock. |
| 36 | return static_cast<uint16_t>(Table->getIdForString(S)); |
| 37 | } |
| 38 | |
| 39 | StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { |
| 40 | return Table->getStringForId(Id: Offset); |
| 41 | } |
| 42 | |
| 43 | uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { |
| 44 | return Table->insert(S); |
| 45 | } |
| 46 | |
| 47 | uint32_t PDBStringTableBuilder::insert(StringRef S) { |
| 48 | return Strings.insert(S); |
| 49 | } |
| 50 | |
| 51 | uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { |
| 52 | return Strings.getIdForString(S); |
| 53 | } |
| 54 | |
| 55 | StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { |
| 56 | return Strings.getStringForId(Id); |
| 57 | } |
| 58 | |
| 59 | static uint32_t computeBucketCount(uint32_t NumStrings) { |
| 60 | // This is a precomputed list of Buckets given the specified number of |
| 61 | // strings. Matching the reference algorithm exactly is not strictly |
| 62 | // necessary for correctness, but it helps when comparing LLD's PDBs with |
| 63 | // Microsoft's PDBs so as to eliminate superfluous differences. |
| 64 | // The reference implementation does (in nmt.h, NMT::grow()): |
| 65 | // unsigned StringCount = 0; |
| 66 | // unsigned BucketCount = 1; |
| 67 | // fn insert() { |
| 68 | // ++StringCount; |
| 69 | // if (BucketCount * 3 / 4 < StringCount) |
| 70 | // BucketCount = BucketCount * 3 / 2 + 1; |
| 71 | // } |
| 72 | // This list contains all StringCount, BucketCount pairs where BucketCount was |
| 73 | // just incremented. It ends before the first BucketCount entry where |
| 74 | // BucketCount * 3 would overflow a 32-bit unsigned int. |
| 75 | static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = { |
| 76 | {0, 1}, |
| 77 | {1, 2}, |
| 78 | {2, 4}, |
| 79 | {4, 7}, |
| 80 | {6, 11}, |
| 81 | {9, 17}, |
| 82 | {13, 26}, |
| 83 | {20, 40}, |
| 84 | {31, 61}, |
| 85 | {46, 92}, |
| 86 | {70, 139}, |
| 87 | {105, 209}, |
| 88 | {157, 314}, |
| 89 | {236, 472}, |
| 90 | {355, 709}, |
| 91 | {532, 1064}, |
| 92 | {799, 1597}, |
| 93 | {1198, 2396}, |
| 94 | {1798, 3595}, |
| 95 | {2697, 5393}, |
| 96 | {4045, 8090}, |
| 97 | {6068, 12136}, |
| 98 | {9103, 18205}, |
| 99 | {13654, 27308}, |
| 100 | {20482, 40963}, |
| 101 | {30723, 61445}, |
| 102 | {46084, 92168}, |
| 103 | {69127, 138253}, |
| 104 | {103690, 207380}, |
| 105 | {155536, 311071}, |
| 106 | {233304, 466607}, |
| 107 | {349956, 699911}, |
| 108 | {524934, 1049867}, |
| 109 | {787401, 1574801}, |
| 110 | {1181101, 2362202}, |
| 111 | {1771652, 3543304}, |
| 112 | {2657479, 5314957}, |
| 113 | {3986218, 7972436}, |
| 114 | {5979328, 11958655}, |
| 115 | {8968992, 17937983}, |
| 116 | {13453488, 26906975}, |
| 117 | {20180232, 40360463}, |
| 118 | {30270348, 60540695}, |
| 119 | {45405522, 90811043}, |
| 120 | {68108283, 136216565}, |
| 121 | {102162424, 204324848}, |
| 122 | {153243637, 306487273}, |
| 123 | {229865455, 459730910}, |
| 124 | {344798183, 689596366}, |
| 125 | {517197275, 1034394550}, |
| 126 | {775795913, 1551591826}, |
| 127 | {1163693870, 2327387740}}; |
| 128 | const auto *Entry = llvm::lower_bound( |
| 129 | Range: StringsToBuckets, Value: std::make_pair(x&: NumStrings, y: 0U), C: llvm::less_first()); |
| 130 | assert(Entry != std::end(StringsToBuckets)); |
| 131 | return Entry->second; |
| 132 | } |
| 133 | |
| 134 | uint32_t PDBStringTableBuilder::calculateHashTableSize() const { |
| 135 | uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. |
| 136 | Size += sizeof(uint32_t) * computeBucketCount(NumStrings: Strings.size()); |
| 137 | |
| 138 | return Size; |
| 139 | } |
| 140 | |
| 141 | uint32_t PDBStringTableBuilder::calculateSerializedSize() const { |
| 142 | uint32_t Size = 0; |
| 143 | Size += sizeof(PDBStringTableHeader); |
| 144 | Size += Strings.calculateSerializedSize(); |
| 145 | Size += calculateHashTableSize(); |
| 146 | Size += sizeof(uint32_t); // The /names stream ends with the string count. |
| 147 | return Size; |
| 148 | } |
| 149 | |
| 150 | void PDBStringTableBuilder::setStrings( |
| 151 | const codeview::DebugStringTableSubsection &Strings) { |
| 152 | this->Strings = Strings; |
| 153 | } |
| 154 | |
| 155 | Error PDBStringTableBuilder::(BinaryStreamWriter &Writer) const { |
| 156 | // Write a header |
| 157 | PDBStringTableHeader H; |
| 158 | H.Signature = PDBStringTableSignature; |
| 159 | H.HashVersion = 1; |
| 160 | H.ByteSize = Strings.calculateSerializedSize(); |
| 161 | if (auto EC = Writer.writeObject(Obj: H)) |
| 162 | return EC; |
| 163 | assert(Writer.bytesRemaining() == 0); |
| 164 | return Error::success(); |
| 165 | } |
| 166 | |
| 167 | Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { |
| 168 | if (auto EC = Strings.commit(Writer)) |
| 169 | return EC; |
| 170 | |
| 171 | assert(Writer.bytesRemaining() == 0); |
| 172 | return Error::success(); |
| 173 | } |
| 174 | |
| 175 | Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { |
| 176 | // Write a hash table. |
| 177 | uint32_t BucketCount = computeBucketCount(NumStrings: Strings.size()); |
| 178 | if (auto EC = Writer.writeInteger(Value: BucketCount)) |
| 179 | return EC; |
| 180 | std::vector<ulittle32_t> Buckets(BucketCount); |
| 181 | |
| 182 | for (const auto &Pair : Strings) { |
| 183 | StringRef S = Pair.getKey(); |
| 184 | uint32_t Offset = Pair.getValue(); |
| 185 | uint32_t Hash = hashStringV1(Str: S); |
| 186 | |
| 187 | for (uint32_t I = 0; I != BucketCount; ++I) { |
| 188 | uint32_t Slot = (Hash + I) % BucketCount; |
| 189 | if (Buckets[Slot] != 0) |
| 190 | continue; |
| 191 | Buckets[Slot] = Offset; |
| 192 | break; |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | if (auto EC = Writer.writeArray(Array: ArrayRef<ulittle32_t>(Buckets))) |
| 197 | return EC; |
| 198 | |
| 199 | assert(Writer.bytesRemaining() == 0); |
| 200 | return Error::success(); |
| 201 | } |
| 202 | |
| 203 | Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { |
| 204 | if (auto EC = Writer.writeInteger<uint32_t>(Value: Strings.size())) |
| 205 | return EC; |
| 206 | assert(Writer.bytesRemaining() == 0); |
| 207 | return Error::success(); |
| 208 | } |
| 209 | |
| 210 | Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { |
| 211 | llvm::TimeTraceScope timeScope("Commit strings table" ); |
| 212 | BinaryStreamWriter SectionWriter; |
| 213 | |
| 214 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(PDBStringTableHeader)); |
| 215 | if (auto EC = writeHeader(Writer&: SectionWriter)) |
| 216 | return EC; |
| 217 | |
| 218 | std::tie(args&: SectionWriter, args&: Writer) = |
| 219 | Writer.split(Off: Strings.calculateSerializedSize()); |
| 220 | if (auto EC = writeStrings(Writer&: SectionWriter)) |
| 221 | return EC; |
| 222 | |
| 223 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: calculateHashTableSize()); |
| 224 | if (auto EC = writeHashTable(Writer&: SectionWriter)) |
| 225 | return EC; |
| 226 | |
| 227 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(uint32_t)); |
| 228 | if (auto EC = writeEpilogue(Writer&: SectionWriter)) |
| 229 | return EC; |
| 230 | |
| 231 | return Error::success(); |
| 232 | } |
| 233 | |