| 1 | //===- SymbolTable.cpp ----------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Symbol table is a bag of all known symbols. We put all symbols of |
| 10 | // all input files to the symbol table. The symbol table is basically |
| 11 | // a hash table with the logic to resolve symbol name conflicts using |
| 12 | // the symbol types. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "SymbolTable.h" |
| 17 | #include "Config.h" |
| 18 | #include "InputFiles.h" |
| 19 | #include "Symbols.h" |
| 20 | #include "lld/Common/Memory.h" |
| 21 | #include "lld/Common/Strings.h" |
| 22 | #include "llvm/ADT/STLExtras.h" |
| 23 | #include "llvm/Demangle/Demangle.h" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace llvm::object; |
| 27 | using namespace llvm::ELF; |
| 28 | using namespace lld; |
| 29 | using namespace lld::elf; |
| 30 | |
| 31 | void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { |
| 32 | // Redirect __real_foo to the original foo and foo to the original __wrap_foo. |
| 33 | int &idx1 = symMap[CachedHashStringRef(sym->getName())]; |
| 34 | int &idx2 = symMap[CachedHashStringRef(real->getName())]; |
| 35 | int &idx3 = symMap[CachedHashStringRef(wrap->getName())]; |
| 36 | |
| 37 | idx2 = idx1; |
| 38 | idx1 = idx3; |
| 39 | |
| 40 | // Propagate symbol usage information to the redirected symbols. |
| 41 | if (sym->isUsedInRegularObj) |
| 42 | wrap->isUsedInRegularObj = true; |
| 43 | if (real->isUsedInRegularObj) |
| 44 | sym->isUsedInRegularObj = true; |
| 45 | else if (!sym->isDefined()) |
| 46 | // Now that all references to sym have been redirected to wrap, if there are |
| 47 | // no references to real (which has been redirected to sym), we only need to |
| 48 | // keep sym if it was defined, otherwise it's unused and can be dropped. |
| 49 | sym->isUsedInRegularObj = false; |
| 50 | |
| 51 | // Now renaming is complete, and no one refers to real. We drop real from |
| 52 | // .symtab and .dynsym. If real is undefined, it is important that we don't |
| 53 | // leave it in .dynsym, because otherwise it might lead to an undefined symbol |
| 54 | // error in a subsequent link. If real is defined, we could emit real as an |
| 55 | // alias for sym, but that could degrade the user experience of some tools |
| 56 | // that can print out only one symbol for each location: sym is a preferred |
| 57 | // name than real, but they might print out real instead. |
| 58 | memcpy(dest: static_cast<void *>(real), src: sym, n: sizeof(SymbolUnion)); |
| 59 | real->isUsedInRegularObj = false; |
| 60 | } |
| 61 | |
| 62 | // Find an existing symbol or create a new one. |
| 63 | Symbol *SymbolTable::insert(StringRef name) { |
| 64 | // <name>@@<version> means the symbol is the default version. In that |
| 65 | // case <name>@@<version> will be used to resolve references to <name>. |
| 66 | // |
| 67 | // Since this is a hot path, the following string search code is |
| 68 | // optimized for speed. StringRef::find(char) is much faster than |
| 69 | // StringRef::find(StringRef). |
| 70 | StringRef stem = name; |
| 71 | size_t pos = name.find(C: '@'); |
| 72 | if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@') |
| 73 | stem = name.take_front(N: pos); |
| 74 | |
| 75 | auto p = symMap.insert(KV: {CachedHashStringRef(stem), (int)symVector.size()}); |
| 76 | if (!p.second) { |
| 77 | Symbol *sym = symVector[p.first->second]; |
| 78 | if (stem.size() != name.size()) { |
| 79 | sym->setName(name); |
| 80 | sym->hasVersionSuffix = true; |
| 81 | } |
| 82 | return sym; |
| 83 | } |
| 84 | |
| 85 | Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); |
| 86 | symVector.push_back(Elt: sym); |
| 87 | |
| 88 | // *sym was not initialized by a constructor. Initialize all Symbol fields. |
| 89 | memset(s: static_cast<void *>(sym), c: 0, n: sizeof(Symbol)); |
| 90 | sym->setName(name); |
| 91 | sym->partition = 1; |
| 92 | sym->versionId = VER_NDX_GLOBAL; |
| 93 | if (pos != StringRef::npos) |
| 94 | sym->hasVersionSuffix = true; |
| 95 | return sym; |
| 96 | } |
| 97 | |
| 98 | // This variant of addSymbol is used by BinaryFile::parse to check duplicate |
| 99 | // symbol errors. |
| 100 | Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) { |
| 101 | Symbol *sym = insert(name: newSym.getName()); |
| 102 | if (sym->isDefined()) |
| 103 | sym->checkDuplicate(ctx, other: newSym); |
| 104 | sym->resolve(ctx, other: newSym); |
| 105 | sym->isUsedInRegularObj = true; |
| 106 | return sym; |
| 107 | } |
| 108 | |
| 109 | Symbol *SymbolTable::find(StringRef name) { |
| 110 | auto it = symMap.find(Val: CachedHashStringRef(name)); |
| 111 | if (it == symMap.end()) |
| 112 | return nullptr; |
| 113 | return symVector[it->second]; |
| 114 | } |
| 115 | |
| 116 | // A version script/dynamic list is only meaningful for a Defined symbol. |
| 117 | // A CommonSymbol will be converted to a Defined in replaceCommonSymbols(). |
| 118 | // A lazy symbol may be made Defined if an LTO libcall extracts it. |
| 119 | static bool canBeVersioned(const Symbol &sym) { |
| 120 | return sym.isDefined() || sym.isCommon() || sym.isLazy(); |
| 121 | } |
| 122 | |
| 123 | // Initialize demangledSyms with a map from demangled symbols to symbol |
| 124 | // objects. Used to handle "extern C++" directive in version scripts. |
| 125 | // |
| 126 | // The map will contain all demangled symbols. That can be very large, |
| 127 | // and in LLD we generally want to avoid do anything for each symbol. |
| 128 | // Then, why are we doing this? Here's why. |
| 129 | // |
| 130 | // Users can use "extern C++ {}" directive to match against demangled |
| 131 | // C++ symbols. For example, you can write a pattern such as |
| 132 | // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this |
| 133 | // other than trying to match a pattern against all demangled symbols. |
| 134 | // So, if "extern C++" feature is used, we need to demangle all known |
| 135 | // symbols. |
| 136 | StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() { |
| 137 | if (!demangledSyms) { |
| 138 | demangledSyms.emplace(); |
| 139 | std::string demangled; |
| 140 | for (Symbol *sym : symVector) |
| 141 | if (canBeVersioned(sym: *sym)) { |
| 142 | StringRef name = sym->getName(); |
| 143 | size_t pos = name.find(C: '@'); |
| 144 | std::string substr; |
| 145 | if (pos == std::string::npos) |
| 146 | demangled = demangle(MangledName: name); |
| 147 | else if (pos + 1 == name.size() || name[pos + 1] == '@') { |
| 148 | substr = name.substr(Start: 0, N: pos); |
| 149 | demangled = demangle(MangledName: substr); |
| 150 | } else { |
| 151 | substr = name.substr(Start: 0, N: pos); |
| 152 | demangled = (demangle(MangledName: substr) + name.substr(Start: pos)).str(); |
| 153 | } |
| 154 | (*demangledSyms)[demangled].push_back(Elt: sym); |
| 155 | } |
| 156 | } |
| 157 | return *demangledSyms; |
| 158 | } |
| 159 | |
| 160 | SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) { |
| 161 | if (ver.isExternCpp) |
| 162 | return getDemangledSyms().lookup(Key: ver.name); |
| 163 | if (Symbol *sym = find(name: ver.name)) |
| 164 | if (canBeVersioned(sym: *sym)) |
| 165 | return {sym}; |
| 166 | return {}; |
| 167 | } |
| 168 | |
| 169 | SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver, |
| 170 | bool includeNonDefault) { |
| 171 | SmallVector<Symbol *, 0> res; |
| 172 | SingleStringMatcher m(ver.name); |
| 173 | auto check = [&](const Symbol &sym) -> bool { |
| 174 | if (!includeNonDefault) |
| 175 | return !sym.hasVersionSuffix; |
| 176 | StringRef name = sym.getName(); |
| 177 | size_t pos = name.find(C: '@'); |
| 178 | return !(pos + 1 < name.size() && name[pos + 1] == '@'); |
| 179 | }; |
| 180 | |
| 181 | if (ver.isExternCpp) { |
| 182 | for (auto &p : getDemangledSyms()) |
| 183 | if (m.match(s: p.first())) |
| 184 | for (Symbol *sym : p.second) |
| 185 | if (check(*sym)) |
| 186 | res.push_back(Elt: sym); |
| 187 | return res; |
| 188 | } |
| 189 | |
| 190 | for (Symbol *sym : symVector) |
| 191 | if (canBeVersioned(sym: *sym) && check(*sym) && m.match(s: sym->getName())) |
| 192 | res.push_back(Elt: sym); |
| 193 | return res; |
| 194 | } |
| 195 | |
| 196 | void SymbolTable::handleDynamicList() { |
| 197 | SmallVector<Symbol *, 0> syms; |
| 198 | for (SymbolVersion &ver : ctx.arg.dynamicList) { |
| 199 | if (ver.hasWildcard) |
| 200 | syms = findAllByVersion(ver, /*includeNonDefault=*/true); |
| 201 | else |
| 202 | syms = findByVersion(ver); |
| 203 | |
| 204 | for (Symbol *sym : syms) |
| 205 | sym->isExported = sym->inDynamicList = true; |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | // Set symbol versions to symbols. This function handles patterns containing no |
| 210 | // wildcard characters. Return false if no symbol definition matches ver. |
| 211 | bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, |
| 212 | StringRef versionName, |
| 213 | bool includeNonDefault) { |
| 214 | // Get a list of symbols which we need to assign the version to. |
| 215 | SmallVector<Symbol *, 0> syms = findByVersion(ver); |
| 216 | |
| 217 | auto getName = [&ctx = ctx](uint16_t ver) -> std::string { |
| 218 | if (ver == VER_NDX_LOCAL) |
| 219 | return "VER_NDX_LOCAL" ; |
| 220 | if (ver == VER_NDX_GLOBAL) |
| 221 | return "VER_NDX_GLOBAL" ; |
| 222 | return ("version '" + ctx.arg.versionDefinitions[ver].name + "'" ).str(); |
| 223 | }; |
| 224 | |
| 225 | // Assign the version. |
| 226 | for (Symbol *sym : syms) { |
| 227 | // For a non-local versionId, skip symbols containing version info because |
| 228 | // symbol versions specified by symbol names take precedence over version |
| 229 | // scripts. See parseSymbolVersion(ctx). |
| 230 | if (!includeNonDefault && versionId != VER_NDX_LOCAL && |
| 231 | sym->getName().contains(C: '@')) |
| 232 | continue; |
| 233 | |
| 234 | // If the version has not been assigned, assign versionId to the symbol. |
| 235 | if (!sym->versionScriptAssigned) { |
| 236 | sym->versionScriptAssigned = true; |
| 237 | sym->versionId = versionId; |
| 238 | } |
| 239 | if (sym->versionId == versionId) |
| 240 | continue; |
| 241 | |
| 242 | Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of " |
| 243 | << getName(sym->versionId) << " to " << getName(versionId); |
| 244 | } |
| 245 | return !syms.empty(); |
| 246 | } |
| 247 | |
| 248 | void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, |
| 249 | bool includeNonDefault) { |
| 250 | // Exact matching takes precedence over fuzzy matching, |
| 251 | // so we set a version to a symbol only if no version has been assigned |
| 252 | // to the symbol. This behavior is compatible with GNU. |
| 253 | for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) |
| 254 | if (!sym->versionScriptAssigned) { |
| 255 | sym->versionScriptAssigned = true; |
| 256 | sym->versionId = versionId; |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | // This function processes version scripts by updating the versionId |
| 261 | // member of symbols. |
| 262 | // If there's only one anonymous version definition in a version |
| 263 | // script file, the script does not actually define any symbol version, |
| 264 | // but just specifies symbols visibilities. |
| 265 | void SymbolTable::scanVersionScript() { |
| 266 | SmallString<128> buf; |
| 267 | // First, we assign versions to exact matching symbols, |
| 268 | // i.e. version definitions not containing any glob meta-characters. |
| 269 | for (VersionDefinition &v : ctx.arg.versionDefinitions) { |
| 270 | auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
| 271 | bool found = |
| 272 | assignExactVersion(ver: pat, versionId: id, versionName: ver, /*includeNonDefault=*/false); |
| 273 | buf.clear(); |
| 274 | found |= assignExactVersion(ver: {.name: (pat.name + "@" + v.name).toStringRef(Out&: buf), |
| 275 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: false}, |
| 276 | versionId: id, versionName: ver, /*includeNonDefault=*/true); |
| 277 | if (!found && !ctx.arg.undefinedVersion) |
| 278 | Err(ctx) << "version script assignment of '" << ver << "' to symbol '" |
| 279 | << pat.name << "' failed: symbol not defined" ; |
| 280 | }; |
| 281 | for (SymbolVersion &pat : v.nonLocalPatterns) |
| 282 | if (!pat.hasWildcard) |
| 283 | assignExact(pat, v.id, v.name); |
| 284 | for (SymbolVersion pat : v.localPatterns) |
| 285 | if (!pat.hasWildcard) |
| 286 | assignExact(pat, VER_NDX_LOCAL, "local" ); |
| 287 | } |
| 288 | |
| 289 | // Next, assign versions to wildcards that are not "*". Note that because the |
| 290 | // last match takes precedence over previous matches, we iterate over the |
| 291 | // definitions in the reverse order. |
| 292 | auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
| 293 | assignWildcardVersion(ver: pat, versionId: id, /*includeNonDefault=*/false); |
| 294 | buf.clear(); |
| 295 | assignWildcardVersion(ver: {.name: (pat.name + "@" + ver).toStringRef(Out&: buf), |
| 296 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: true}, |
| 297 | versionId: id, |
| 298 | /*includeNonDefault=*/true); |
| 299 | }; |
| 300 | for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) { |
| 301 | for (SymbolVersion &pat : v.nonLocalPatterns) |
| 302 | if (pat.hasWildcard && pat.name != "*" ) |
| 303 | assignWildcard(pat, v.id, v.name); |
| 304 | for (SymbolVersion &pat : v.localPatterns) |
| 305 | if (pat.hasWildcard && pat.name != "*" ) |
| 306 | assignWildcard(pat, VER_NDX_LOCAL, v.name); |
| 307 | } |
| 308 | |
| 309 | // Then, assign versions to "*". In GNU linkers they have lower priority than |
| 310 | // other wildcards. |
| 311 | bool globalAsteriskFound = false; |
| 312 | bool localAsteriskFound = false; |
| 313 | bool asteriskReported = false; |
| 314 | auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver, |
| 315 | bool isLocal) { |
| 316 | // Avoid issuing a warning if both '--retain-symbol-file' and a version |
| 317 | // script with `global: *` are used. |
| 318 | // |
| 319 | // '--retain-symbol-file' adds a "*" pattern to |
| 320 | // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see |
| 321 | // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns', |
| 322 | // and may seem counterintuitive, but still works as expected. Here we can |
| 323 | // exploit that and skip analyzing the pattern added for this option. |
| 324 | if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) { |
| 325 | if ((isLocal && globalAsteriskFound) || |
| 326 | (!isLocal && localAsteriskFound)) { |
| 327 | Warn(ctx) |
| 328 | << "wildcard pattern '*' is used for both 'local' and 'global' " |
| 329 | "scopes in version script" ; |
| 330 | asteriskReported = true; |
| 331 | } else if (!isLocal && globalAsteriskFound) { |
| 332 | Warn(ctx) << "wildcard pattern '*' is used for multiple version " |
| 333 | "definitions in " |
| 334 | "version script" ; |
| 335 | asteriskReported = true; |
| 336 | } else { |
| 337 | localAsteriskFound = isLocal; |
| 338 | globalAsteriskFound = !isLocal; |
| 339 | } |
| 340 | } |
| 341 | assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name); |
| 342 | }; |
| 343 | for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) { |
| 344 | for (SymbolVersion &pat : v.nonLocalPatterns) |
| 345 | if (pat.hasWildcard && pat.name == "*" ) |
| 346 | assignAsterisk(pat, &v, false); |
| 347 | for (SymbolVersion &pat : v.localPatterns) |
| 348 | if (pat.hasWildcard && pat.name == "*" ) |
| 349 | assignAsterisk(pat, &v, true); |
| 350 | } |
| 351 | |
| 352 | // Handle --dynamic-list. If a specified symbol is also matched by local: in a |
| 353 | // version script, the version script takes precedence. |
| 354 | handleDynamicList(); |
| 355 | } |
| 356 | |
| 357 | Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) { |
| 358 | return addSymbol(newSym: Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0}); |
| 359 | } |
| 360 | |