1 | //===- StringTableBuilder.cpp - String table building utility -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/MC/StringTableBuilder.h" |
10 | #include "llvm/ADT/ArrayRef.h" |
11 | #include "llvm/ADT/CachedHashString.h" |
12 | #include "llvm/ADT/SmallString.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/BinaryFormat/COFF.h" |
15 | #include "llvm/Support/Endian.h" |
16 | #include "llvm/Support/MathExtras.h" |
17 | #include "llvm/Support/raw_ostream.h" |
18 | #include <cassert> |
19 | #include <cstddef> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <utility> |
23 | #include <vector> |
24 | |
25 | using namespace llvm; |
26 | |
27 | StringTableBuilder::~StringTableBuilder() = default; |
28 | |
29 | void StringTableBuilder::initSize() { |
30 | // Account for leading bytes in table so that offsets returned from add are |
31 | // correct. |
32 | switch (K) { |
33 | case RAW: |
34 | case DWARF: |
35 | Size = 0; |
36 | break; |
37 | case MachOLinked: |
38 | case MachO64Linked: |
39 | Size = 2; |
40 | break; |
41 | case MachO: |
42 | case MachO64: |
43 | case ELF: |
44 | case DXContainer: |
45 | // Start the table with a NUL byte. |
46 | Size = 1; |
47 | break; |
48 | case XCOFF: |
49 | case WinCOFF: |
50 | // Make room to write the table size later. |
51 | Size = 4; |
52 | break; |
53 | } |
54 | } |
55 | |
56 | StringTableBuilder::StringTableBuilder(Kind K, Align Alignment) |
57 | : K(K), Alignment(Alignment) { |
58 | initSize(); |
59 | } |
60 | |
61 | void StringTableBuilder::write(raw_ostream &OS) const { |
62 | assert(isFinalized()); |
63 | SmallString<0> Data; |
64 | Data.resize(N: getSize()); |
65 | write(Buf: (uint8_t *)Data.data()); |
66 | OS << Data; |
67 | } |
68 | |
69 | using StringPair = std::pair<CachedHashStringRef, size_t>; |
70 | |
71 | void StringTableBuilder::write(uint8_t *Buf) const { |
72 | assert(isFinalized()); |
73 | for (const StringPair &P : StringIndexMap) { |
74 | StringRef Data = P.first.val(); |
75 | if (!Data.empty()) |
76 | memcpy(dest: Buf + P.second, src: Data.data(), n: Data.size()); |
77 | } |
78 | // The COFF formats store the size of the string table in the first 4 bytes. |
79 | // For Windows, the format is little-endian; for AIX, it is big-endian. |
80 | if (K == WinCOFF) |
81 | support::endian::write32le(P: Buf, V: Size); |
82 | else if (K == XCOFF) |
83 | support::endian::write32be(P: Buf, V: Size); |
84 | } |
85 | |
86 | // Returns the character at Pos from end of a string. |
87 | static int charTailAt(StringPair *P, size_t Pos) { |
88 | StringRef S = P->first.val(); |
89 | if (Pos >= S.size()) |
90 | return -1; |
91 | return (unsigned char)S[S.size() - Pos - 1]; |
92 | } |
93 | |
94 | // Three-way radix quicksort. This is much faster than std::sort with strcmp |
95 | // because it does not compare characters that we already know the same. |
96 | static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) { |
97 | tailcall: |
98 | if (Vec.size() <= 1) |
99 | return; |
100 | |
101 | // Partition items so that items in [0, I) are greater than the pivot, |
102 | // [I, J) are the same as the pivot, and [J, Vec.size()) are less than |
103 | // the pivot. |
104 | int Pivot = charTailAt(P: Vec[0], Pos); |
105 | size_t I = 0; |
106 | size_t J = Vec.size(); |
107 | for (size_t K = 1; K < J;) { |
108 | int C = charTailAt(P: Vec[K], Pos); |
109 | if (C > Pivot) |
110 | std::swap(a&: Vec[I++], b&: Vec[K++]); |
111 | else if (C < Pivot) |
112 | std::swap(a&: Vec[--J], b&: Vec[K]); |
113 | else |
114 | K++; |
115 | } |
116 | |
117 | multikeySort(Vec: Vec.slice(N: 0, M: I), Pos); |
118 | multikeySort(Vec: Vec.slice(N: J), Pos); |
119 | |
120 | // multikeySort(Vec.slice(I, J - I), Pos + 1), but with |
121 | // tail call optimization. |
122 | if (Pivot != -1) { |
123 | Vec = Vec.slice(N: I, M: J - I); |
124 | ++Pos; |
125 | goto tailcall; |
126 | } |
127 | } |
128 | |
129 | void StringTableBuilder::finalize() { |
130 | assert(K != DWARF); |
131 | finalizeStringTable(/*Optimize=*/true); |
132 | } |
133 | |
134 | void StringTableBuilder::finalizeInOrder() { |
135 | finalizeStringTable(/*Optimize=*/false); |
136 | } |
137 | |
138 | void StringTableBuilder::finalizeStringTable(bool Optimize) { |
139 | Finalized = true; |
140 | |
141 | if (Optimize) { |
142 | std::vector<StringPair *> Strings; |
143 | Strings.reserve(n: StringIndexMap.size()); |
144 | for (StringPair &P : StringIndexMap) |
145 | Strings.push_back(x: &P); |
146 | |
147 | multikeySort(Vec: Strings, Pos: 0); |
148 | initSize(); |
149 | |
150 | StringRef Previous; |
151 | for (StringPair *P : Strings) { |
152 | StringRef S = P->first.val(); |
153 | if (Previous.ends_with(Suffix: S)) { |
154 | size_t Pos = Size - S.size() - (K != RAW); |
155 | if (isAligned(Lhs: Alignment, SizeInBytes: Pos)) { |
156 | P->second = Pos; |
157 | continue; |
158 | } |
159 | } |
160 | |
161 | Size = alignTo(Size, A: Alignment); |
162 | P->second = Size; |
163 | |
164 | Size += S.size(); |
165 | if (K != RAW) |
166 | ++Size; |
167 | Previous = S; |
168 | } |
169 | } |
170 | |
171 | if (K == MachO || K == MachOLinked || K == DXContainer) |
172 | Size = alignTo(Value: Size, Align: 4); // Pad to multiple of 4. |
173 | if (K == MachO64 || K == MachO64Linked) |
174 | Size = alignTo(Value: Size, Align: 8); // Pad to multiple of 8. |
175 | |
176 | // According to ld64 the string table of a final linked Mach-O binary starts |
177 | // with " ", i.e. the first byte is ' ' and the second byte is zero. In |
178 | // 'initSize()' we reserved the first two bytes for holding this string. |
179 | if (K == MachOLinked || K == MachO64Linked) |
180 | StringIndexMap[CachedHashStringRef(" " )] = 0; |
181 | |
182 | // The first byte in an ELF string table must be null, according to the ELF |
183 | // specification. In 'initSize()' we reserved the first byte to hold null for |
184 | // this purpose and here we actually add the string to allow 'getOffset()' to |
185 | // be called on an empty string. |
186 | if (K == ELF) |
187 | StringIndexMap[CachedHashStringRef("" )] = 0; |
188 | } |
189 | |
190 | void StringTableBuilder::clear() { |
191 | Finalized = false; |
192 | StringIndexMap.clear(); |
193 | } |
194 | |
195 | size_t StringTableBuilder::getOffset(CachedHashStringRef S) const { |
196 | assert(isFinalized()); |
197 | auto I = StringIndexMap.find(Val: S); |
198 | assert(I != StringIndexMap.end() && "String is not in table!" ); |
199 | return I->second; |
200 | } |
201 | |
202 | size_t StringTableBuilder::add(CachedHashStringRef S) { |
203 | if (K == WinCOFF) |
204 | assert(S.size() > COFF::NameSize && "Short string in COFF string table!" ); |
205 | |
206 | assert(!isFinalized()); |
207 | auto P = StringIndexMap.insert(KV: std::make_pair(x&: S, y: 0)); |
208 | if (P.second) { |
209 | size_t Start = alignTo(Size, A: Alignment); |
210 | P.first->second = Start; |
211 | Size = Start + S.size() + (K != RAW); |
212 | } |
213 | return P.first->second; |
214 | } |
215 | |