1//===- SymbolTable.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Symbol table is a bag of all known symbols. We put all symbols of
10// all input files to the symbol table. The symbol table is basically
11// a hash table with the logic to resolve symbol name conflicts using
12// the symbol types.
13//
14//===----------------------------------------------------------------------===//
15
16#include "SymbolTable.h"
17#include "Config.h"
18#include "InputFiles.h"
19#include "Symbols.h"
20#include "lld/Common/Memory.h"
21#include "lld/Common/Strings.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/Demangle/Demangle.h"
24
25using namespace llvm;
26using namespace llvm::object;
27using namespace llvm::ELF;
28using namespace lld;
29using namespace lld::elf;
30
31void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
32 // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
33 int &idx1 = symMap[CachedHashStringRef(sym->getName())];
34 int &idx2 = symMap[CachedHashStringRef(real->getName())];
35 int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
36
37 idx2 = idx1;
38 idx1 = idx3;
39
40 // Propagate symbol usage information to the redirected symbols.
41 if (sym->isUsedInRegularObj)
42 wrap->isUsedInRegularObj = true;
43 if (real->isUsedInRegularObj)
44 sym->isUsedInRegularObj = true;
45 else if (!sym->isDefined())
46 // Now that all references to sym have been redirected to wrap, if there are
47 // no references to real (which has been redirected to sym), we only need to
48 // keep sym if it was defined, otherwise it's unused and can be dropped.
49 sym->isUsedInRegularObj = false;
50
51 // Now renaming is complete, and no one refers to real. We drop real from
52 // .symtab and .dynsym. If real is undefined, it is important that we don't
53 // leave it in .dynsym, because otherwise it might lead to an undefined symbol
54 // error in a subsequent link. If real is defined, we could emit real as an
55 // alias for sym, but that could degrade the user experience of some tools
56 // that can print out only one symbol for each location: sym is a preferred
57 // name than real, but they might print out real instead.
58 memcpy(dest: static_cast<void *>(real), src: sym, n: sizeof(SymbolUnion));
59 real->isUsedInRegularObj = false;
60}
61
62// Find an existing symbol or create a new one.
63Symbol *SymbolTable::insert(StringRef name) {
64 // <name>@@<version> means the symbol is the default version. In that
65 // case <name>@@<version> will be used to resolve references to <name>.
66 //
67 // Since this is a hot path, the following string search code is
68 // optimized for speed. StringRef::find(char) is much faster than
69 // StringRef::find(StringRef).
70 StringRef stem = name;
71 size_t pos = name.find(C: '@');
72 if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
73 stem = name.take_front(N: pos);
74
75 auto p = symMap.insert(KV: {CachedHashStringRef(stem), (int)symVector.size()});
76 if (!p.second) {
77 Symbol *sym = symVector[p.first->second];
78 if (stem.size() != name.size()) {
79 sym->setName(name);
80 sym->hasVersionSuffix = true;
81 }
82 return sym;
83 }
84
85 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
86 symVector.push_back(Elt: sym);
87
88 // make<SymbolUnion>() value-initializes the storage, so the Symbol fields
89 // are zero. Set the ones that need a non-zero value.
90 sym->setName(name);
91 sym->versionId = VER_NDX_GLOBAL;
92 if (pos != StringRef::npos)
93 sym->hasVersionSuffix = true;
94 return sym;
95}
96
97// This variant of addSymbol is used by BinaryFile::parse to check duplicate
98// symbol errors.
99Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) {
100 Symbol *sym = insert(name: newSym.getName());
101 if (sym->isDefined())
102 sym->checkDuplicate(ctx, other: newSym);
103 sym->resolve(ctx, other: newSym);
104 sym->isUsedInRegularObj = true;
105 return sym;
106}
107
108Symbol *SymbolTable::find(StringRef name) {
109 auto it = symMap.find(Val: CachedHashStringRef(name));
110 if (it == symMap.end())
111 return nullptr;
112 return symVector[it->second];
113}
114
115// A version script/dynamic list is only meaningful for a Defined symbol.
116// A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
117// A lazy symbol may be made Defined if an LTO libcall extracts it.
118static bool canBeVersioned(const Symbol &sym) {
119 return sym.isDefined() || sym.isCommon() || sym.isLazy();
120}
121
122// Initialize demangledSyms with a map from demangled symbols to symbol
123// objects. Used to handle "extern C++" directive in version scripts.
124//
125// The map will contain all demangled symbols. That can be very large,
126// and in LLD we generally want to avoid do anything for each symbol.
127// Then, why are we doing this? Here's why.
128//
129// Users can use "extern C++ {}" directive to match against demangled
130// C++ symbols. For example, you can write a pattern such as
131// "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
132// other than trying to match a pattern against all demangled symbols.
133// So, if "extern C++" feature is used, we need to demangle all known
134// symbols.
135StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() {
136 if (!demangledSyms) {
137 demangledSyms.emplace();
138 std::string demangled;
139 for (Symbol *sym : symVector)
140 if (canBeVersioned(sym: *sym)) {
141 StringRef name = sym->getName();
142 size_t pos = name.find(C: '@');
143 std::string substr;
144 if (pos == std::string::npos)
145 demangled = demangle(MangledName: name);
146 else if (pos + 1 == name.size() || name[pos + 1] == '@') {
147 substr = name.substr(Start: 0, N: pos);
148 demangled = demangle(MangledName: substr);
149 } else {
150 substr = name.substr(Start: 0, N: pos);
151 demangled = (demangle(MangledName: substr) + name.substr(Start: pos)).str();
152 }
153 (*demangledSyms)[demangled].push_back(Elt: sym);
154 }
155 }
156 return *demangledSyms;
157}
158
159SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) {
160 if (ver.isExternCpp)
161 return getDemangledSyms().lookup(Key: ver.name);
162 if (Symbol *sym = find(name: ver.name))
163 if (canBeVersioned(sym: *sym))
164 return {sym};
165 return {};
166}
167
168SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver,
169 bool includeNonDefault) {
170 SmallVector<Symbol *, 0> res;
171 SingleStringMatcher m(ver.name);
172 auto check = [&](const Symbol &sym) -> bool {
173 if (!includeNonDefault)
174 return !sym.hasVersionSuffix;
175 StringRef name = sym.getName();
176 size_t pos = name.find(C: '@');
177 return !(pos + 1 < name.size() && name[pos + 1] == '@');
178 };
179
180 if (ver.isExternCpp) {
181 for (auto &p : getDemangledSyms())
182 if (m.match(s: p.first()))
183 for (Symbol *sym : p.second)
184 if (check(*sym))
185 res.push_back(Elt: sym);
186 return res;
187 }
188
189 for (Symbol *sym : symVector)
190 if (canBeVersioned(sym: *sym) && check(*sym) && m.match(s: sym->getName()))
191 res.push_back(Elt: sym);
192 return res;
193}
194
195void SymbolTable::handleDynamicList() {
196 SmallVector<Symbol *, 0> syms;
197 for (SymbolVersion &ver : ctx.arg.dynamicList) {
198 if (ver.hasWildcard)
199 syms = findAllByVersion(ver, /*includeNonDefault=*/true);
200 else
201 syms = findByVersion(ver);
202
203 for (Symbol *sym : syms)
204 sym->isExported = sym->inDynamicList = true;
205 }
206}
207
208// Set symbol versions to symbols. This function handles patterns containing no
209// wildcard characters. Return false if no symbol definition matches ver.
210bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
211 StringRef versionName,
212 bool includeNonDefault) {
213 // Get a list of symbols which we need to assign the version to.
214 SmallVector<Symbol *, 0> syms = findByVersion(ver);
215
216 auto getName = [&ctx = ctx](uint16_t ver) -> std::string {
217 if (ver == VER_NDX_LOCAL)
218 return "VER_NDX_LOCAL";
219 if (ver == VER_NDX_GLOBAL)
220 return "VER_NDX_GLOBAL";
221 return ("version '" + ctx.arg.versionDefinitions[ver].name + "'").str();
222 };
223
224 // Assign the version.
225 for (Symbol *sym : syms) {
226 // For a non-local versionId, skip symbols containing version info because
227 // symbol versions specified by symbol names take precedence over version
228 // scripts. See parseSymbolVersion(ctx).
229 if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
230 sym->getName().contains(C: '@'))
231 continue;
232
233 // If the version has not been assigned, assign versionId to the symbol.
234 if (!sym->versionScriptAssigned) {
235 sym->versionScriptAssigned = true;
236 sym->versionId = versionId;
237 }
238 if (sym->versionId == versionId)
239 continue;
240
241 Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of "
242 << getName(sym->versionId) << " to " << getName(versionId);
243 }
244 return !syms.empty();
245}
246
247void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
248 bool includeNonDefault) {
249 // Exact matching takes precedence over fuzzy matching,
250 // so we set a version to a symbol only if no version has been assigned
251 // to the symbol. This behavior is compatible with GNU.
252 for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
253 if (!sym->versionScriptAssigned) {
254 sym->versionScriptAssigned = true;
255 sym->versionId = versionId;
256 }
257}
258
259// This function processes version scripts by updating the versionId
260// member of symbols.
261// If there's only one anonymous version definition in a version
262// script file, the script does not actually define any symbol version,
263// but just specifies symbols visibilities.
264void SymbolTable::scanVersionScript() {
265 SmallString<128> buf;
266 // First, we assign versions to exact matching symbols,
267 // i.e. version definitions not containing any glob meta-characters.
268 for (VersionDefinition &v : ctx.arg.versionDefinitions) {
269 auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
270 bool found =
271 assignExactVersion(ver: pat, versionId: id, versionName: ver, /*includeNonDefault=*/false);
272 buf.clear();
273 found |= assignExactVersion(ver: {.name: (pat.name + "@" + v.name).toStringRef(Out&: buf),
274 .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: false},
275 versionId: id, versionName: ver, /*includeNonDefault=*/true);
276 if (!found && !ctx.arg.undefinedVersion)
277 Err(ctx) << "version script assignment of '" << ver << "' to symbol '"
278 << pat.name << "' failed: symbol not defined";
279 };
280 for (SymbolVersion &pat : v.nonLocalPatterns)
281 if (!pat.hasWildcard)
282 assignExact(pat, v.id, v.name);
283 for (SymbolVersion pat : v.localPatterns)
284 if (!pat.hasWildcard)
285 assignExact(pat, VER_NDX_LOCAL, "local");
286 }
287
288 // Next, assign versions to wildcards that are not "*". Note that because the
289 // last match takes precedence over previous matches, we iterate over the
290 // definitions in the reverse order.
291 auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
292 assignWildcardVersion(ver: pat, versionId: id, /*includeNonDefault=*/false);
293 buf.clear();
294 assignWildcardVersion(ver: {.name: (pat.name + "@" + ver).toStringRef(Out&: buf),
295 .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: true},
296 versionId: id,
297 /*includeNonDefault=*/true);
298 };
299 for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) {
300 for (SymbolVersion &pat : v.nonLocalPatterns)
301 if (pat.hasWildcard && pat.name != "*")
302 assignWildcard(pat, v.id, v.name);
303 for (SymbolVersion &pat : v.localPatterns)
304 if (pat.hasWildcard && pat.name != "*")
305 assignWildcard(pat, VER_NDX_LOCAL, v.name);
306 }
307
308 // Then, assign versions to "*". In GNU linkers they have lower priority than
309 // other wildcards.
310 bool globalAsteriskFound = false;
311 bool localAsteriskFound = false;
312 bool asteriskReported = false;
313 auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver,
314 bool isLocal) {
315 // Avoid issuing a warning if both '--retain-symbol-file' and a version
316 // script with `global: *` are used.
317 //
318 // '--retain-symbol-file' adds a "*" pattern to
319 // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see
320 // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns',
321 // and may seem counterintuitive, but still works as expected. Here we can
322 // exploit that and skip analyzing the pattern added for this option.
323 if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) {
324 if ((isLocal && globalAsteriskFound) ||
325 (!isLocal && localAsteriskFound)) {
326 Warn(ctx)
327 << "wildcard pattern '*' is used for both 'local' and 'global' "
328 "scopes in version script";
329 asteriskReported = true;
330 } else if (!isLocal && globalAsteriskFound) {
331 Warn(ctx) << "wildcard pattern '*' is used for multiple version "
332 "definitions in "
333 "version script";
334 asteriskReported = true;
335 } else {
336 localAsteriskFound = isLocal;
337 globalAsteriskFound = !isLocal;
338 }
339 }
340 assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name);
341 };
342 for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) {
343 for (SymbolVersion &pat : v.nonLocalPatterns)
344 if (pat.hasWildcard && pat.name == "*")
345 assignAsterisk(pat, &v, false);
346 for (SymbolVersion &pat : v.localPatterns)
347 if (pat.hasWildcard && pat.name == "*")
348 assignAsterisk(pat, &v, true);
349 }
350
351 // Handle --dynamic-list. If a specified symbol is also matched by local: in a
352 // version script, the version script takes precedence.
353 handleDynamicList();
354}
355
356Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
357 return addSymbol(newSym: Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
358}
359