| 1 | //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 |  | 
|---|
| 9 | #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" | 
|---|
| 10 |  | 
|---|
| 11 | #include "llvm/ADT/ArrayRef.h" | 
|---|
| 12 | #include "llvm/DebugInfo/PDB/Native/Hash.h" | 
|---|
| 13 | #include "llvm/DebugInfo/PDB/Native/RawTypes.h" | 
|---|
| 14 | #include "llvm/Support/BinaryStreamWriter.h" | 
|---|
| 15 | #include "llvm/Support/Endian.h" | 
|---|
| 16 | #include "llvm/Support/TimeProfiler.h" | 
|---|
| 17 |  | 
|---|
| 18 | #include <map> | 
|---|
| 19 |  | 
|---|
| 20 | using namespace llvm; | 
|---|
| 21 | using namespace llvm::msf; | 
|---|
| 22 | using namespace llvm::support; | 
|---|
| 23 | using namespace llvm::support::endian; | 
|---|
| 24 | using namespace llvm::pdb; | 
|---|
| 25 |  | 
|---|
| 26 | StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) | 
|---|
| 27 | : Table(&Table) {} | 
|---|
| 28 |  | 
|---|
| 29 | uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { | 
|---|
| 30 | // The reference implementation doesn't include code for /src/headerblock | 
|---|
| 31 | // handling, but it can only read natvis entries lld's PDB files if | 
|---|
| 32 | // this hash function truncates the hash to 16 bit. | 
|---|
| 33 | // PDB/include/misc.h in the reference implementation has a hashSz() function | 
|---|
| 34 | // that returns an unsigned short, that seems what's being used for | 
|---|
| 35 | // /src/headerblock. | 
|---|
| 36 | return static_cast<uint16_t>(Table->getIdForString(S)); | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { | 
|---|
| 40 | return Table->getStringForId(Id: Offset); | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { | 
|---|
| 44 | return Table->insert(S); | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | uint32_t PDBStringTableBuilder::insert(StringRef S) { | 
|---|
| 48 | return Strings.insert(S); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { | 
|---|
| 52 | return Strings.getIdForString(S); | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { | 
|---|
| 56 | return Strings.getStringForId(Id); | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | static uint32_t computeBucketCount(uint32_t NumStrings) { | 
|---|
| 60 | // This is a precomputed list of Buckets given the specified number of | 
|---|
| 61 | // strings.  Matching the reference algorithm exactly is not strictly | 
|---|
| 62 | // necessary for correctness, but it helps when comparing LLD's PDBs with | 
|---|
| 63 | // Microsoft's PDBs so as to eliminate superfluous differences. | 
|---|
| 64 | // The reference implementation does (in nmt.h, NMT::grow()): | 
|---|
| 65 | //   unsigned StringCount = 0; | 
|---|
| 66 | //   unsigned BucketCount = 1; | 
|---|
| 67 | //   fn insert() { | 
|---|
| 68 | //     ++StringCount; | 
|---|
| 69 | //     if (BucketCount * 3 / 4 < StringCount) | 
|---|
| 70 | //       BucketCount = BucketCount * 3 / 2 + 1; | 
|---|
| 71 | //   } | 
|---|
| 72 | // This list contains all StringCount, BucketCount pairs where BucketCount was | 
|---|
| 73 | // just incremented.  It ends before the first BucketCount entry where | 
|---|
| 74 | // BucketCount * 3 would overflow a 32-bit unsigned int. | 
|---|
| 75 | static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = { | 
|---|
| 76 | {0, 1}, | 
|---|
| 77 | {1, 2}, | 
|---|
| 78 | {2, 4}, | 
|---|
| 79 | {4, 7}, | 
|---|
| 80 | {6, 11}, | 
|---|
| 81 | {9, 17}, | 
|---|
| 82 | {13, 26}, | 
|---|
| 83 | {20, 40}, | 
|---|
| 84 | {31, 61}, | 
|---|
| 85 | {46, 92}, | 
|---|
| 86 | {70, 139}, | 
|---|
| 87 | {105, 209}, | 
|---|
| 88 | {157, 314}, | 
|---|
| 89 | {236, 472}, | 
|---|
| 90 | {355, 709}, | 
|---|
| 91 | {532, 1064}, | 
|---|
| 92 | {799, 1597}, | 
|---|
| 93 | {1198, 2396}, | 
|---|
| 94 | {1798, 3595}, | 
|---|
| 95 | {2697, 5393}, | 
|---|
| 96 | {4045, 8090}, | 
|---|
| 97 | {6068, 12136}, | 
|---|
| 98 | {9103, 18205}, | 
|---|
| 99 | {13654, 27308}, | 
|---|
| 100 | {20482, 40963}, | 
|---|
| 101 | {30723, 61445}, | 
|---|
| 102 | {46084, 92168}, | 
|---|
| 103 | {69127, 138253}, | 
|---|
| 104 | {103690, 207380}, | 
|---|
| 105 | {155536, 311071}, | 
|---|
| 106 | {233304, 466607}, | 
|---|
| 107 | {349956, 699911}, | 
|---|
| 108 | {524934, 1049867}, | 
|---|
| 109 | {787401, 1574801}, | 
|---|
| 110 | {1181101, 2362202}, | 
|---|
| 111 | {1771652, 3543304}, | 
|---|
| 112 | {2657479, 5314957}, | 
|---|
| 113 | {3986218, 7972436}, | 
|---|
| 114 | {5979328, 11958655}, | 
|---|
| 115 | {8968992, 17937983}, | 
|---|
| 116 | {13453488, 26906975}, | 
|---|
| 117 | {20180232, 40360463}, | 
|---|
| 118 | {30270348, 60540695}, | 
|---|
| 119 | {45405522, 90811043}, | 
|---|
| 120 | {68108283, 136216565}, | 
|---|
| 121 | {102162424, 204324848}, | 
|---|
| 122 | {153243637, 306487273}, | 
|---|
| 123 | {229865455, 459730910}, | 
|---|
| 124 | {344798183, 689596366}, | 
|---|
| 125 | {517197275, 1034394550}, | 
|---|
| 126 | {775795913, 1551591826}, | 
|---|
| 127 | {1163693870, 2327387740}}; | 
|---|
| 128 | const auto *Entry = llvm::lower_bound( | 
|---|
| 129 | Range: StringsToBuckets, Value: std::make_pair(x&: NumStrings, y: 0U), C: llvm::less_first()); | 
|---|
| 130 | assert(Entry != std::end(StringsToBuckets)); | 
|---|
| 131 | return Entry->second; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | uint32_t PDBStringTableBuilder::calculateHashTableSize() const { | 
|---|
| 135 | uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. | 
|---|
| 136 | Size += sizeof(uint32_t) * computeBucketCount(NumStrings: Strings.size()); | 
|---|
| 137 |  | 
|---|
| 138 | return Size; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | uint32_t PDBStringTableBuilder::calculateSerializedSize() const { | 
|---|
| 142 | uint32_t Size = 0; | 
|---|
| 143 | Size += sizeof(PDBStringTableHeader); | 
|---|
| 144 | Size += Strings.calculateSerializedSize(); | 
|---|
| 145 | Size += calculateHashTableSize(); | 
|---|
| 146 | Size += sizeof(uint32_t); // The /names stream ends with the string count. | 
|---|
| 147 | return Size; | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | void PDBStringTableBuilder::setStrings( | 
|---|
| 151 | const codeview::DebugStringTableSubsection &Strings) { | 
|---|
| 152 | this->Strings = Strings; | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | Error PDBStringTableBuilder::(BinaryStreamWriter &Writer) const { | 
|---|
| 156 | // Write a header | 
|---|
| 157 | PDBStringTableHeader H; | 
|---|
| 158 | H.Signature = PDBStringTableSignature; | 
|---|
| 159 | H.HashVersion = 1; | 
|---|
| 160 | H.ByteSize = Strings.calculateSerializedSize(); | 
|---|
| 161 | if (auto EC = Writer.writeObject(Obj: H)) | 
|---|
| 162 | return EC; | 
|---|
| 163 | assert(Writer.bytesRemaining() == 0); | 
|---|
| 164 | return Error::success(); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { | 
|---|
| 168 | if (auto EC = Strings.commit(Writer)) | 
|---|
| 169 | return EC; | 
|---|
| 170 |  | 
|---|
| 171 | assert(Writer.bytesRemaining() == 0); | 
|---|
| 172 | return Error::success(); | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { | 
|---|
| 176 | // Write a hash table. | 
|---|
| 177 | uint32_t BucketCount = computeBucketCount(NumStrings: Strings.size()); | 
|---|
| 178 | if (auto EC = Writer.writeInteger(Value: BucketCount)) | 
|---|
| 179 | return EC; | 
|---|
| 180 | std::vector<ulittle32_t> Buckets(BucketCount); | 
|---|
| 181 |  | 
|---|
| 182 | for (const auto &Pair : Strings) { | 
|---|
| 183 | StringRef S = Pair.getKey(); | 
|---|
| 184 | uint32_t Offset = Pair.getValue(); | 
|---|
| 185 | uint32_t Hash = hashStringV1(Str: S); | 
|---|
| 186 |  | 
|---|
| 187 | for (uint32_t I = 0; I != BucketCount; ++I) { | 
|---|
| 188 | uint32_t Slot = (Hash + I) % BucketCount; | 
|---|
| 189 | if (Buckets[Slot] != 0) | 
|---|
| 190 | continue; | 
|---|
| 191 | Buckets[Slot] = Offset; | 
|---|
| 192 | break; | 
|---|
| 193 | } | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | if (auto EC = Writer.writeArray(Array: ArrayRef<ulittle32_t>(Buckets))) | 
|---|
| 197 | return EC; | 
|---|
| 198 |  | 
|---|
| 199 | assert(Writer.bytesRemaining() == 0); | 
|---|
| 200 | return Error::success(); | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { | 
|---|
| 204 | if (auto EC = Writer.writeInteger<uint32_t>(Value: Strings.size())) | 
|---|
| 205 | return EC; | 
|---|
| 206 | assert(Writer.bytesRemaining() == 0); | 
|---|
| 207 | return Error::success(); | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { | 
|---|
| 211 | llvm::TimeTraceScope timeScope( "Commit strings table"); | 
|---|
| 212 | BinaryStreamWriter SectionWriter; | 
|---|
| 213 |  | 
|---|
| 214 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(PDBStringTableHeader)); | 
|---|
| 215 | if (auto EC = writeHeader(Writer&: SectionWriter)) | 
|---|
| 216 | return EC; | 
|---|
| 217 |  | 
|---|
| 218 | std::tie(args&: SectionWriter, args&: Writer) = | 
|---|
| 219 | Writer.split(Off: Strings.calculateSerializedSize()); | 
|---|
| 220 | if (auto EC = writeStrings(Writer&: SectionWriter)) | 
|---|
| 221 | return EC; | 
|---|
| 222 |  | 
|---|
| 223 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: calculateHashTableSize()); | 
|---|
| 224 | if (auto EC = writeHashTable(Writer&: SectionWriter)) | 
|---|
| 225 | return EC; | 
|---|
| 226 |  | 
|---|
| 227 | std::tie(args&: SectionWriter, args&: Writer) = Writer.split(Off: sizeof(uint32_t)); | 
|---|
| 228 | if (auto EC = writeEpilogue(Writer&: SectionWriter)) | 
|---|
| 229 | return EC; | 
|---|
| 230 |  | 
|---|
| 231 | return Error::success(); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|