1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains data definitions and a reader and builder for a symbol |
10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of |
11 | // bitcode files to efficiently read the symbol table for symbol resolution |
12 | // purposes without needing to construct a module in memory. |
13 | // |
14 | // As with most object files the symbol table has two parts: the symbol table |
15 | // itself and a string table which is referenced by the symbol table. |
16 | // |
17 | // A symbol table corresponds to a single bitcode file, which may consist of |
18 | // multiple modules, so symbol tables may likewise contain symbols for multiple |
19 | // modules. |
20 | // |
21 | //===----------------------------------------------------------------------===// |
22 | |
23 | #ifndef LLVM_OBJECT_IRSYMTAB_H |
24 | #define LLVM_OBJECT_IRSYMTAB_H |
25 | |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/StringRef.h" |
28 | #include "llvm/ADT/iterator_range.h" |
29 | #include "llvm/IR/Comdat.h" |
30 | #include "llvm/IR/GlobalValue.h" |
31 | #include "llvm/Object/SymbolicFile.h" |
32 | #include "llvm/Support/Allocator.h" |
33 | #include "llvm/Support/Endian.h" |
34 | #include "llvm/Support/Error.h" |
35 | #include <cassert> |
36 | #include <cstdint> |
37 | #include <vector> |
38 | |
39 | namespace llvm { |
40 | |
41 | struct BitcodeFileContents; |
42 | class StringTableBuilder; |
43 | |
44 | namespace irsymtab { |
45 | |
46 | namespace storage { |
47 | |
48 | // The data structures in this namespace define the low-level serialization |
49 | // format. Clients that just want to read a symbol table should use the |
50 | // irsymtab::Reader class. |
51 | |
52 | using Word = support::ulittle32_t; |
53 | |
54 | /// A reference to a string in the string table. |
55 | struct Str { |
56 | Word Offset, Size; |
57 | |
58 | StringRef get(StringRef Strtab) const { |
59 | return {Strtab.data() + Offset, Size}; |
60 | } |
61 | }; |
62 | |
63 | /// A reference to a range of objects in the symbol table. |
64 | template <typename T> struct Range { |
65 | Word Offset, Size; |
66 | |
67 | ArrayRef<T> get(StringRef Symtab) const { |
68 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; |
69 | } |
70 | }; |
71 | |
72 | /// Describes the range of a particular module's symbols within the symbol |
73 | /// table. |
74 | struct Module { |
75 | Word Begin, End; |
76 | |
77 | /// The index of the first Uncommon for this Module. |
78 | Word UncBegin; |
79 | }; |
80 | |
81 | /// This is equivalent to an IR comdat. |
82 | struct Comdat { |
83 | Str Name; |
84 | |
85 | // llvm::Comdat::SelectionKind |
86 | Word SelectionKind; |
87 | }; |
88 | |
89 | /// Contains the information needed by linkers for symbol resolution, as well as |
90 | /// by the LTO implementation itself. |
91 | struct Symbol { |
92 | /// The mangled symbol name. |
93 | Str Name; |
94 | |
95 | /// The unmangled symbol name, or the empty string if this is not an IR |
96 | /// symbol. |
97 | Str IRName; |
98 | |
99 | /// The index into Header::Comdats, or -1 if not a comdat member. |
100 | Word ComdatIndex; |
101 | |
102 | Word Flags; |
103 | enum FlagBits { |
104 | FB_visibility, // 2 bits |
105 | FB_has_uncommon = FB_visibility + 2, |
106 | FB_undefined, |
107 | FB_weak, |
108 | FB_common, |
109 | FB_indirect, |
110 | FB_used, |
111 | FB_tls, |
112 | FB_may_omit, |
113 | FB_global, |
114 | FB_format_specific, |
115 | FB_unnamed_addr, |
116 | FB_executable, |
117 | }; |
118 | }; |
119 | |
120 | /// This data structure contains rarely used symbol fields and is optionally |
121 | /// referenced by a Symbol. |
122 | struct Uncommon { |
123 | Word CommonSize, CommonAlign; |
124 | |
125 | /// COFF-specific: the name of the symbol that a weak external resolves to |
126 | /// if not defined. |
127 | Str COFFWeakExternFallbackName; |
128 | |
129 | /// Specified section name, if any. |
130 | Str SectionName; |
131 | }; |
132 | |
133 | |
134 | struct { |
135 | /// Version number of the symtab format. This number should be incremented |
136 | /// when the format changes, but it does not need to be incremented if a |
137 | /// change to LLVM would cause it to create a different symbol table. |
138 | Word ; |
139 | enum { = 3 }; |
140 | |
141 | /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). |
142 | /// Consumers should rebuild the symbol table from IR if the producer's |
143 | /// version does not match the consumer's version due to potential differences |
144 | /// in symbol table format, symbol enumeration order and so on. |
145 | Str ; |
146 | |
147 | Range<Module> ; |
148 | Range<Comdat> ; |
149 | Range<Symbol> ; |
150 | Range<Uncommon> ; |
151 | |
152 | Str , ; |
153 | |
154 | /// COFF-specific: linker directives. |
155 | Str ; |
156 | |
157 | /// Dependent Library Specifiers |
158 | Range<Str> ; |
159 | }; |
160 | |
161 | } // end namespace storage |
162 | |
163 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for |
164 | /// Mods. |
165 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
166 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); |
167 | |
168 | /// This represents a symbol that has been read from a storage::Symbol and |
169 | /// possibly a storage::Uncommon. |
170 | struct Symbol { |
171 | // Copied from storage::Symbol. |
172 | StringRef Name, IRName; |
173 | int ComdatIndex; |
174 | uint32_t Flags; |
175 | |
176 | // Copied from storage::Uncommon. |
177 | uint32_t CommonSize, CommonAlign; |
178 | StringRef COFFWeakExternFallbackName; |
179 | StringRef SectionName; |
180 | |
181 | /// Returns the mangled symbol name. |
182 | StringRef getName() const { return Name; } |
183 | |
184 | /// Returns the unmangled symbol name, or the empty string if this is not an |
185 | /// IR symbol. |
186 | StringRef getIRName() const { return IRName; } |
187 | |
188 | /// Returns the index into the comdat table (see Reader::getComdatTable()), or |
189 | /// -1 if not a comdat member. |
190 | int getComdatIndex() const { return ComdatIndex; } |
191 | |
192 | using S = storage::Symbol; |
193 | |
194 | GlobalValue::VisibilityTypes getVisibility() const { |
195 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); |
196 | } |
197 | |
198 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } |
199 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } |
200 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } |
201 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } |
202 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } |
203 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } |
204 | |
205 | bool canBeOmittedFromSymbolTable() const { |
206 | return (Flags >> S::FB_may_omit) & 1; |
207 | } |
208 | |
209 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } |
210 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } |
211 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } |
212 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } |
213 | |
214 | uint64_t getCommonSize() const { |
215 | assert(isCommon()); |
216 | return CommonSize; |
217 | } |
218 | |
219 | uint32_t getCommonAlignment() const { |
220 | assert(isCommon()); |
221 | return CommonAlign; |
222 | } |
223 | |
224 | /// COFF-specific: for weak externals, returns the name of the symbol that is |
225 | /// used as a fallback if the weak external remains undefined. |
226 | StringRef getCOFFWeakExternalFallback() const { |
227 | assert(isWeak() && isIndirect()); |
228 | return COFFWeakExternFallbackName; |
229 | } |
230 | |
231 | StringRef getSectionName() const { return SectionName; } |
232 | }; |
233 | |
234 | /// This class can be used to read a Symtab and Strtab produced by |
235 | /// irsymtab::build. |
236 | class Reader { |
237 | StringRef Symtab, Strtab; |
238 | |
239 | ArrayRef<storage::Module> Modules; |
240 | ArrayRef<storage::Comdat> Comdats; |
241 | ArrayRef<storage::Symbol> Symbols; |
242 | ArrayRef<storage::Uncommon> Uncommons; |
243 | ArrayRef<storage::Str> DependentLibraries; |
244 | |
245 | StringRef str(storage::Str S) const { return S.get(Strtab); } |
246 | |
247 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { |
248 | return R.get(Symtab); |
249 | } |
250 | |
251 | const storage::Header &() const { |
252 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); |
253 | } |
254 | |
255 | public: |
256 | class SymbolRef; |
257 | |
258 | Reader() = default; |
259 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { |
260 | Modules = range(R: header().Modules); |
261 | Comdats = range(R: header().Comdats); |
262 | Symbols = range(R: header().Symbols); |
263 | Uncommons = range(R: header().Uncommons); |
264 | DependentLibraries = range(R: header().DependentLibraries); |
265 | } |
266 | |
267 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; |
268 | |
269 | /// Returns the symbol table for the entire bitcode file. |
270 | /// The symbols enumerated by this method are ephemeral, but they can be |
271 | /// copied into an irsymtab::Symbol object. |
272 | symbol_range symbols() const; |
273 | |
274 | size_t getNumModules() const { return Modules.size(); } |
275 | |
276 | /// Returns a slice of the symbol table for the I'th module in the file. |
277 | /// The symbols enumerated by this method are ephemeral, but they can be |
278 | /// copied into an irsymtab::Symbol object. |
279 | symbol_range module_symbols(unsigned I) const; |
280 | |
281 | StringRef getTargetTriple() const { return str(S: header().TargetTriple); } |
282 | |
283 | /// Returns the source file path specified at compile time. |
284 | StringRef getSourceFileName() const { return str(S: header().SourceFileName); } |
285 | |
286 | /// Returns a table with all the comdats used by this file. |
287 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> |
288 | getComdatTable() const { |
289 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable; |
290 | ComdatTable.reserve(n: Comdats.size()); |
291 | for (auto C : Comdats) |
292 | ComdatTable.push_back(x: {str(S: C.Name), llvm::Comdat::SelectionKind( |
293 | uint32_t(C.SelectionKind))}); |
294 | return ComdatTable; |
295 | } |
296 | |
297 | /// COFF-specific: returns linker options specified in the input file. |
298 | StringRef getCOFFLinkerOpts() const { return str(S: header().COFFLinkerOpts); } |
299 | |
300 | /// Returns dependent library specifiers |
301 | std::vector<StringRef> getDependentLibraries() const { |
302 | std::vector<StringRef> Specifiers; |
303 | Specifiers.reserve(n: DependentLibraries.size()); |
304 | for (auto S : DependentLibraries) { |
305 | Specifiers.push_back(x: str(S)); |
306 | } |
307 | return Specifiers; |
308 | } |
309 | }; |
310 | |
311 | /// Ephemeral symbols produced by Reader::symbols() and |
312 | /// Reader::module_symbols(). |
313 | class Reader::SymbolRef : public Symbol { |
314 | const storage::Symbol *SymI, *SymE; |
315 | const storage::Uncommon *UncI; |
316 | const Reader *R; |
317 | |
318 | void read() { |
319 | if (SymI == SymE) |
320 | return; |
321 | |
322 | Name = R->str(S: SymI->Name); |
323 | IRName = R->str(S: SymI->IRName); |
324 | ComdatIndex = SymI->ComdatIndex; |
325 | Flags = SymI->Flags; |
326 | |
327 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { |
328 | CommonSize = UncI->CommonSize; |
329 | CommonAlign = UncI->CommonAlign; |
330 | COFFWeakExternFallbackName = R->str(S: UncI->COFFWeakExternFallbackName); |
331 | SectionName = R->str(S: UncI->SectionName); |
332 | } else |
333 | // Reset this field so it can be queried unconditionally for all symbols. |
334 | SectionName = "" ; |
335 | } |
336 | |
337 | public: |
338 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, |
339 | const storage::Uncommon *UncI, const Reader *R) |
340 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { |
341 | read(); |
342 | } |
343 | |
344 | void moveNext() { |
345 | ++SymI; |
346 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) |
347 | ++UncI; |
348 | read(); |
349 | } |
350 | |
351 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } |
352 | }; |
353 | |
354 | inline Reader::symbol_range Reader::symbols() const { |
355 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), |
356 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; |
357 | } |
358 | |
359 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { |
360 | const storage::Module &M = Modules[I]; |
361 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, |
362 | *MEnd = Symbols.begin() + M.End; |
363 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), |
364 | SymbolRef(MEnd, MEnd, nullptr, this)}; |
365 | } |
366 | |
367 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the |
368 | /// irsymtab are owned by Symtab and Strtab. |
369 | struct FileContents { |
370 | SmallVector<char, 0> Symtab, Strtab; |
371 | Reader TheReader; |
372 | }; |
373 | |
374 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. |
375 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); |
376 | |
377 | } // end namespace irsymtab |
378 | } // end namespace llvm |
379 | |
380 | #endif // LLVM_OBJECT_IRSYMTAB_H |
381 | |