1 | //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Object/IRSymtab.h" |
10 | #include "llvm/ADT/ArrayRef.h" |
11 | #include "llvm/ADT/DenseMap.h" |
12 | #include "llvm/ADT/SmallPtrSet.h" |
13 | #include "llvm/ADT/SmallString.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/Bitcode/BitcodeReader.h" |
17 | #include "llvm/Config/llvm-config.h" |
18 | #include "llvm/IR/Comdat.h" |
19 | #include "llvm/IR/DataLayout.h" |
20 | #include "llvm/IR/GlobalAlias.h" |
21 | #include "llvm/IR/GlobalObject.h" |
22 | #include "llvm/IR/Mangler.h" |
23 | #include "llvm/IR/Metadata.h" |
24 | #include "llvm/IR/Module.h" |
25 | #include "llvm/IR/RuntimeLibcalls.h" |
26 | #include "llvm/MC/StringTableBuilder.h" |
27 | #include "llvm/Object/ModuleSymbolTable.h" |
28 | #include "llvm/Object/SymbolicFile.h" |
29 | #include "llvm/Support/Allocator.h" |
30 | #include "llvm/Support/Casting.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/Error.h" |
33 | #include "llvm/Support/StringSaver.h" |
34 | #include "llvm/Support/VCSRevision.h" |
35 | #include "llvm/Support/raw_ostream.h" |
36 | #include "llvm/TargetParser/Triple.h" |
37 | #include <cassert> |
38 | #include <string> |
39 | #include <utility> |
40 | #include <vector> |
41 | |
42 | using namespace llvm; |
43 | using namespace irsymtab; |
44 | |
45 | static cl::opt<bool> DisableBitcodeVersionUpgrade( |
46 | "disable-bitcode-version-upgrade" , cl::Hidden, |
47 | cl::desc("Disable automatic bitcode upgrade for version mismatch" )); |
48 | |
49 | static const char *PreservedSymbols[] = { |
50 | // There are global variables, so put it here instead of in |
51 | // RuntimeLibcalls.def. |
52 | // TODO: Are there similar such variables? |
53 | "__ssp_canary_word" , |
54 | "__stack_chk_guard" , |
55 | }; |
56 | |
57 | namespace { |
58 | |
59 | const char *getExpectedProducerName() { |
60 | static char DefaultName[] = LLVM_VERSION_STRING |
61 | #ifdef LLVM_REVISION |
62 | " " LLVM_REVISION |
63 | #endif |
64 | ; |
65 | // Allows for testing of the irsymtab writer and upgrade mechanism. This |
66 | // environment variable should not be set by users. |
67 | if (char *OverrideName = getenv(name: "LLVM_OVERRIDE_PRODUCER" )) |
68 | return OverrideName; |
69 | return DefaultName; |
70 | } |
71 | |
72 | const char *kExpectedProducerName = getExpectedProducerName(); |
73 | |
74 | /// Stores the temporary state that is required to build an IR symbol table. |
75 | struct Builder { |
76 | SmallVector<char, 0> &Symtab; |
77 | StringTableBuilder &StrtabBuilder; |
78 | StringSaver Saver; |
79 | |
80 | // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. |
81 | // The StringTableBuilder does not create a copy of any strings added to it, |
82 | // so this provides somewhere to store any strings that we create. |
83 | Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, |
84 | BumpPtrAllocator &Alloc) |
85 | : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} |
86 | |
87 | DenseMap<const Comdat *, int> ComdatMap; |
88 | Mangler Mang; |
89 | Triple TT; |
90 | |
91 | std::vector<storage::Comdat> Comdats; |
92 | std::vector<storage::Module> Mods; |
93 | std::vector<storage::Symbol> Syms; |
94 | std::vector<storage::Uncommon> Uncommons; |
95 | |
96 | std::string COFFLinkerOpts; |
97 | raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; |
98 | |
99 | std::vector<storage::Str> DependentLibraries; |
100 | |
101 | void setStr(storage::Str &S, StringRef Value) { |
102 | S.Offset = StrtabBuilder.add(S: Value); |
103 | S.Size = Value.size(); |
104 | } |
105 | |
106 | template <typename T> |
107 | void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { |
108 | R.Offset = Symtab.size(); |
109 | R.Size = Objs.size(); |
110 | Symtab.insert(I: Symtab.end(), From: reinterpret_cast<const char *>(Objs.data()), |
111 | To: reinterpret_cast<const char *>(Objs.data() + Objs.size())); |
112 | } |
113 | |
114 | Expected<int> getComdatIndex(const Comdat *C, const Module *M); |
115 | |
116 | Error addModule(Module *M); |
117 | Error addSymbol(const ModuleSymbolTable &Msymtab, |
118 | const SmallPtrSet<GlobalValue *, 4> &Used, |
119 | ModuleSymbolTable::Symbol Sym); |
120 | |
121 | Error build(ArrayRef<Module *> Mods); |
122 | }; |
123 | |
124 | Error Builder::addModule(Module *M) { |
125 | if (M->getDataLayoutStr().empty()) |
126 | return make_error<StringError>(Args: "input module has no datalayout" , |
127 | Args: inconvertibleErrorCode()); |
128 | |
129 | // Symbols in the llvm.used list will get the FB_Used bit and will not be |
130 | // internalized. We do this for llvm.compiler.used as well: |
131 | // |
132 | // IR symbol table tracks module-level asm symbol references but not inline |
133 | // asm. A symbol only referenced by inline asm is not in the IR symbol table, |
134 | // so we may not know that the definition (in another translation unit) is |
135 | // referenced. That definition may have __attribute__((used)) (which lowers to |
136 | // llvm.compiler.used on ELF targets) to communicate to the compiler that it |
137 | // may be used by inline asm. The usage is perfectly fine, so we treat |
138 | // llvm.compiler.used conservatively as llvm.used to work around our own |
139 | // limitation. |
140 | SmallVector<GlobalValue *, 4> UsedV; |
141 | collectUsedGlobalVariables(M: *M, Vec&: UsedV, /*CompilerUsed=*/false); |
142 | collectUsedGlobalVariables(M: *M, Vec&: UsedV, /*CompilerUsed=*/true); |
143 | SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end()); |
144 | |
145 | ModuleSymbolTable Msymtab; |
146 | Msymtab.addModule(M); |
147 | |
148 | storage::Module Mod; |
149 | Mod.Begin = Syms.size(); |
150 | Mod.End = Syms.size() + Msymtab.symbols().size(); |
151 | Mod.UncBegin = Uncommons.size(); |
152 | Mods.push_back(x: Mod); |
153 | |
154 | if (TT.isOSBinFormatCOFF()) { |
155 | if (auto E = M->materializeMetadata()) |
156 | return E; |
157 | if (NamedMDNode *LinkerOptions = |
158 | M->getNamedMetadata(Name: "llvm.linker.options" )) { |
159 | for (MDNode *MDOptions : LinkerOptions->operands()) |
160 | for (const MDOperand &MDOption : cast<MDNode>(Val: MDOptions)->operands()) |
161 | COFFLinkerOptsOS << " " << cast<MDString>(Val: MDOption)->getString(); |
162 | } |
163 | } |
164 | |
165 | if (TT.isOSBinFormatELF()) { |
166 | if (auto E = M->materializeMetadata()) |
167 | return E; |
168 | if (NamedMDNode *N = M->getNamedMetadata(Name: "llvm.dependent-libraries" )) { |
169 | for (MDNode *MDOptions : N->operands()) { |
170 | const auto OperandStr = |
171 | cast<MDString>(Val: cast<MDNode>(Val: MDOptions)->getOperand(I: 0))->getString(); |
172 | storage::Str Specifier; |
173 | setStr(S&: Specifier, Value: OperandStr); |
174 | DependentLibraries.emplace_back(args&: Specifier); |
175 | } |
176 | } |
177 | } |
178 | |
179 | for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) |
180 | if (Error Err = addSymbol(Msymtab, Used, Sym: Msym)) |
181 | return Err; |
182 | |
183 | return Error::success(); |
184 | } |
185 | |
186 | Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) { |
187 | auto P = ComdatMap.insert(KV: std::make_pair(x&: C, y: Comdats.size())); |
188 | if (P.second) { |
189 | std::string Name; |
190 | if (TT.isOSBinFormatCOFF()) { |
191 | const GlobalValue *GV = M->getNamedValue(Name: C->getName()); |
192 | if (!GV) |
193 | return make_error<StringError>(Args: "Could not find leader" , |
194 | Args: inconvertibleErrorCode()); |
195 | // Internal leaders do not affect symbol resolution, therefore they do not |
196 | // appear in the symbol table. |
197 | if (GV->hasLocalLinkage()) { |
198 | P.first->second = -1; |
199 | return -1; |
200 | } |
201 | llvm::raw_string_ostream OS(Name); |
202 | Mang.getNameWithPrefix(OS, GV, CannotUsePrivateLabel: false); |
203 | } else { |
204 | Name = std::string(C->getName()); |
205 | } |
206 | |
207 | storage::Comdat Comdat; |
208 | setStr(S&: Comdat.Name, Value: Saver.save(S: Name)); |
209 | Comdat.SelectionKind = C->getSelectionKind(); |
210 | Comdats.push_back(x: Comdat); |
211 | } |
212 | |
213 | return P.first->second; |
214 | } |
215 | |
216 | static DenseSet<StringRef> buildPreservedSymbolsSet(const Triple &TT) { |
217 | DenseSet<StringRef> PreservedSymbolSet(std::begin(arr&: PreservedSymbols), |
218 | std::end(arr&: PreservedSymbols)); |
219 | |
220 | RTLIB::RuntimeLibcallsInfo Libcalls(TT); |
221 | for (const char *Name : Libcalls.getLibcallNames()) { |
222 | if (Name) |
223 | PreservedSymbolSet.insert(V: Name); |
224 | } |
225 | return PreservedSymbolSet; |
226 | } |
227 | |
228 | Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, |
229 | const SmallPtrSet<GlobalValue *, 4> &Used, |
230 | ModuleSymbolTable::Symbol Msym) { |
231 | Syms.emplace_back(); |
232 | storage::Symbol &Sym = Syms.back(); |
233 | Sym = {}; |
234 | |
235 | storage::Uncommon *Unc = nullptr; |
236 | auto Uncommon = [&]() -> storage::Uncommon & { |
237 | if (Unc) |
238 | return *Unc; |
239 | Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; |
240 | Uncommons.emplace_back(); |
241 | Unc = &Uncommons.back(); |
242 | *Unc = {}; |
243 | setStr(S&: Unc->COFFWeakExternFallbackName, Value: "" ); |
244 | setStr(S&: Unc->SectionName, Value: "" ); |
245 | return *Unc; |
246 | }; |
247 | |
248 | SmallString<64> Name; |
249 | { |
250 | raw_svector_ostream OS(Name); |
251 | Msymtab.printSymbolName(OS, S: Msym); |
252 | } |
253 | setStr(S&: Sym.Name, Value: Saver.save(S: Name.str())); |
254 | |
255 | auto Flags = Msymtab.getSymbolFlags(S: Msym); |
256 | if (Flags & object::BasicSymbolRef::SF_Undefined) |
257 | Sym.Flags |= 1 << storage::Symbol::FB_undefined; |
258 | if (Flags & object::BasicSymbolRef::SF_Weak) |
259 | Sym.Flags |= 1 << storage::Symbol::FB_weak; |
260 | if (Flags & object::BasicSymbolRef::SF_Common) |
261 | Sym.Flags |= 1 << storage::Symbol::FB_common; |
262 | if (Flags & object::BasicSymbolRef::SF_Indirect) |
263 | Sym.Flags |= 1 << storage::Symbol::FB_indirect; |
264 | if (Flags & object::BasicSymbolRef::SF_Global) |
265 | Sym.Flags |= 1 << storage::Symbol::FB_global; |
266 | if (Flags & object::BasicSymbolRef::SF_FormatSpecific) |
267 | Sym.Flags |= 1 << storage::Symbol::FB_format_specific; |
268 | if (Flags & object::BasicSymbolRef::SF_Executable) |
269 | Sym.Flags |= 1 << storage::Symbol::FB_executable; |
270 | |
271 | Sym.ComdatIndex = -1; |
272 | auto *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym); |
273 | if (!GV) { |
274 | // Undefined module asm symbols act as GC roots and are implicitly used. |
275 | if (Flags & object::BasicSymbolRef::SF_Undefined) |
276 | Sym.Flags |= 1 << storage::Symbol::FB_used; |
277 | setStr(S&: Sym.IRName, Value: "" ); |
278 | return Error::success(); |
279 | } |
280 | |
281 | setStr(S&: Sym.IRName, Value: GV->getName()); |
282 | |
283 | static const DenseSet<StringRef> PreservedSymbolsSet = |
284 | buildPreservedSymbolsSet( |
285 | TT: llvm::Triple(GV->getParent()->getTargetTriple())); |
286 | bool IsPreservedSymbol = PreservedSymbolsSet.contains(V: GV->getName()); |
287 | |
288 | if (Used.count(Ptr: GV) || IsPreservedSymbol) |
289 | Sym.Flags |= 1 << storage::Symbol::FB_used; |
290 | if (GV->isThreadLocal()) |
291 | Sym.Flags |= 1 << storage::Symbol::FB_tls; |
292 | if (GV->hasGlobalUnnamedAddr()) |
293 | Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; |
294 | if (GV->canBeOmittedFromSymbolTable()) |
295 | Sym.Flags |= 1 << storage::Symbol::FB_may_omit; |
296 | Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; |
297 | |
298 | if (Flags & object::BasicSymbolRef::SF_Common) { |
299 | auto *GVar = dyn_cast<GlobalVariable>(Val: GV); |
300 | if (!GVar) |
301 | return make_error<StringError>(Args: "Only variables can have common linkage!" , |
302 | Args: inconvertibleErrorCode()); |
303 | Uncommon().CommonSize = |
304 | GV->getDataLayout().getTypeAllocSize(Ty: GV->getValueType()); |
305 | Uncommon().CommonAlign = GVar->getAlign() ? GVar->getAlign()->value() : 0; |
306 | } |
307 | |
308 | const GlobalObject *GO = GV->getAliaseeObject(); |
309 | if (!GO) { |
310 | if (isa<GlobalIFunc>(Val: GV)) |
311 | GO = cast<GlobalIFunc>(Val: GV)->getResolverFunction(); |
312 | if (!GO) |
313 | return make_error<StringError>(Args: "Unable to determine comdat of alias!" , |
314 | Args: inconvertibleErrorCode()); |
315 | } |
316 | if (const Comdat *C = GO->getComdat()) { |
317 | Expected<int> ComdatIndexOrErr = getComdatIndex(C, M: GV->getParent()); |
318 | if (!ComdatIndexOrErr) |
319 | return ComdatIndexOrErr.takeError(); |
320 | Sym.ComdatIndex = *ComdatIndexOrErr; |
321 | } |
322 | |
323 | if (TT.isOSBinFormatCOFF()) { |
324 | emitLinkerFlagsForGlobalCOFF(OS&: COFFLinkerOptsOS, GV, TT, Mangler&: Mang); |
325 | |
326 | if ((Flags & object::BasicSymbolRef::SF_Weak) && |
327 | (Flags & object::BasicSymbolRef::SF_Indirect)) { |
328 | auto *Fallback = dyn_cast<GlobalValue>( |
329 | Val: cast<GlobalAlias>(Val: GV)->getAliasee()->stripPointerCasts()); |
330 | if (!Fallback) |
331 | return make_error<StringError>(Args: "Invalid weak external" , |
332 | Args: inconvertibleErrorCode()); |
333 | std::string FallbackName; |
334 | raw_string_ostream OS(FallbackName); |
335 | Msymtab.printSymbolName(OS, S: Fallback); |
336 | OS.flush(); |
337 | setStr(S&: Uncommon().COFFWeakExternFallbackName, Value: Saver.save(S: FallbackName)); |
338 | } |
339 | } |
340 | |
341 | if (!GO->getSection().empty()) |
342 | setStr(S&: Uncommon().SectionName, Value: Saver.save(S: GO->getSection())); |
343 | |
344 | return Error::success(); |
345 | } |
346 | |
347 | Error Builder::build(ArrayRef<Module *> IRMods) { |
348 | storage::Header Hdr; |
349 | |
350 | assert(!IRMods.empty()); |
351 | Hdr.Version = storage::Header::kCurrentVersion; |
352 | setStr(S&: Hdr.Producer, Value: kExpectedProducerName); |
353 | setStr(S&: Hdr.TargetTriple, Value: IRMods[0]->getTargetTriple()); |
354 | setStr(S&: Hdr.SourceFileName, Value: IRMods[0]->getSourceFileName()); |
355 | TT = Triple(IRMods[0]->getTargetTriple()); |
356 | |
357 | for (auto *M : IRMods) |
358 | if (Error Err = addModule(M)) |
359 | return Err; |
360 | |
361 | COFFLinkerOptsOS.flush(); |
362 | setStr(S&: Hdr.COFFLinkerOpts, Value: Saver.save(S: COFFLinkerOpts)); |
363 | |
364 | // We are about to fill in the header's range fields, so reserve space for it |
365 | // and copy it in afterwards. |
366 | Symtab.resize(N: sizeof(storage::Header)); |
367 | writeRange(R&: Hdr.Modules, Objs: Mods); |
368 | writeRange(R&: Hdr.Comdats, Objs: Comdats); |
369 | writeRange(R&: Hdr.Symbols, Objs: Syms); |
370 | writeRange(R&: Hdr.Uncommons, Objs: Uncommons); |
371 | writeRange(R&: Hdr.DependentLibraries, Objs: DependentLibraries); |
372 | *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; |
373 | return Error::success(); |
374 | } |
375 | |
376 | } // end anonymous namespace |
377 | |
378 | Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
379 | StringTableBuilder &StrtabBuilder, |
380 | BumpPtrAllocator &Alloc) { |
381 | return Builder(Symtab, StrtabBuilder, Alloc).build(IRMods: Mods); |
382 | } |
383 | |
384 | // Upgrade a vector of bitcode modules created by an old version of LLVM by |
385 | // creating an irsymtab for them in the current format. |
386 | static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { |
387 | FileContents FC; |
388 | |
389 | LLVMContext Ctx; |
390 | std::vector<Module *> Mods; |
391 | std::vector<std::unique_ptr<Module>> OwnedMods; |
392 | for (auto BM : BMs) { |
393 | Expected<std::unique_ptr<Module>> MOrErr = |
394 | BM.getLazyModule(Context&: Ctx, /*ShouldLazyLoadMetadata*/ true, |
395 | /*IsImporting*/ false); |
396 | if (!MOrErr) |
397 | return MOrErr.takeError(); |
398 | |
399 | Mods.push_back(x: MOrErr->get()); |
400 | OwnedMods.push_back(x: std::move(*MOrErr)); |
401 | } |
402 | |
403 | StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); |
404 | BumpPtrAllocator Alloc; |
405 | if (Error E = build(Mods, Symtab&: FC.Symtab, StrtabBuilder, Alloc)) |
406 | return std::move(E); |
407 | |
408 | StrtabBuilder.finalizeInOrder(); |
409 | FC.Strtab.resize(N: StrtabBuilder.getSize()); |
410 | StrtabBuilder.write(Buf: (uint8_t *)FC.Strtab.data()); |
411 | |
412 | FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, |
413 | {FC.Strtab.data(), FC.Strtab.size()}}; |
414 | return std::move(FC); |
415 | } |
416 | |
417 | Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) { |
418 | if (BFC.Mods.empty()) |
419 | return make_error<StringError>(Args: "Bitcode file does not contain any modules" , |
420 | Args: inconvertibleErrorCode()); |
421 | |
422 | if (!DisableBitcodeVersionUpgrade) { |
423 | if (BFC.StrtabForSymtab.empty() || |
424 | BFC.Symtab.size() < sizeof(storage::Header)) |
425 | return upgrade(BMs: BFC.Mods); |
426 | |
427 | // We cannot use the regular reader to read the version and producer, |
428 | // because it will expect the header to be in the current format. The only |
429 | // thing we can rely on is that the version and producer will be present as |
430 | // the first struct elements. |
431 | auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data()); |
432 | unsigned Version = Hdr->Version; |
433 | StringRef Producer = Hdr->Producer.get(Strtab: BFC.StrtabForSymtab); |
434 | if (Version != storage::Header::kCurrentVersion || |
435 | Producer != kExpectedProducerName) |
436 | return upgrade(BMs: BFC.Mods); |
437 | } |
438 | |
439 | FileContents FC; |
440 | FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, |
441 | {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; |
442 | |
443 | // Finally, make sure that the number of modules in the symbol table matches |
444 | // the number of modules in the bitcode file. If they differ, it may mean that |
445 | // the bitcode file was created by binary concatenation, so we need to create |
446 | // a new symbol table from scratch. |
447 | if (FC.TheReader.getNumModules() != BFC.Mods.size()) |
448 | return upgrade(BMs: std::move(BFC.Mods)); |
449 | |
450 | return std::move(FC); |
451 | } |
452 | |