1//===- LibraryResolver.cpp - Library Resolution of Unresolved Symbols ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Library resolution impl for unresolved symbols
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
14#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
15
16#include "llvm/ADT/StringSet.h"
17
18#include "llvm/BinaryFormat/MachO.h"
19#include "llvm/Object/COFF.h"
20#include "llvm/Object/ELF.h"
21#include "llvm/Object/ELFObjectFile.h"
22#include "llvm/Object/MachO.h"
23#include "llvm/Object/ObjectFile.h"
24#include "llvm/Support/DJB.h"
25#include "llvm/Support/Error.h"
26
27#include <mutex>
28
29#define DEBUG_TYPE "orc-resolver"
30
31namespace llvm::orc {
32
33LibraryResolver::LibraryResolver(const LibraryResolver::Setup &S)
34 : LibMgr(LibraryManager()),
35 LibPathCache(std::make_shared<LibraryPathCache>()),
36 LibPathResolver(std::make_shared<PathResolver>(args&: LibPathCache)),
37 ScanHelper(S.BasePaths, LibPathCache, LibPathResolver),
38 FB(S.FilterBuilder),
39 ShouldScanCall(S.ShouldScanCall ? S.ShouldScanCall
40 : [](StringRef) -> bool { return true; }),
41 scanBatchSize(S.ScanBatchSize) {
42
43 if (!ScanHelper.hasSearchPath()) {
44 LLVM_DEBUG(dbgs() << "Warning: No base paths provided for scanning.\n");
45 }
46}
47
48std::unique_ptr<LibraryResolutionDriver>
49LibraryResolutionDriver::create(const LibraryResolver::Setup &S) {
50 auto LR = std::make_unique<LibraryResolver>(args: S);
51 return std::unique_ptr<LibraryResolutionDriver>(
52 new LibraryResolutionDriver(std::move(LR)));
53}
54
55void LibraryResolutionDriver::addScanPath(const std::string &Path, PathType K) {
56 LR->ScanHelper.addBasePath(P: Path, Kind: K);
57}
58
59void LibraryResolutionDriver::markLibraryLoaded(StringRef Path) {
60 LR->LibMgr.markLoaded(Path);
61}
62
63void LibraryResolutionDriver::markLibraryUnLoaded(StringRef Path) {
64 LR->LibMgr.markUnloaded(Path);
65}
66
67void LibraryResolutionDriver::resolveSymbols(
68 ArrayRef<StringRef> Symbols, LibraryResolver::OnSearchComplete OnCompletion,
69 const SearchConfig &Config) {
70 LR->searchSymbolsInLibraries(SymList: Symbols, OnComplete: std::move(OnCompletion), Config);
71}
72
73static bool shouldIgnoreSymbol(const object::SymbolRef &Sym,
74 uint32_t IgnoreFlags) {
75 Expected<uint32_t> FlagsOrErr = Sym.getFlags();
76 if (!FlagsOrErr) {
77 consumeError(Err: FlagsOrErr.takeError());
78 return true;
79 }
80
81 uint32_t Flags = *FlagsOrErr;
82
83 using Filter = SymbolEnumeratorOptions;
84 if ((IgnoreFlags & Filter::IgnoreUndefined) &&
85 (Flags & object::SymbolRef::SF_Undefined))
86 return true;
87 if ((IgnoreFlags & Filter::IgnoreNonExported) &&
88 !(Flags & object::SymbolRef::SF_Exported))
89 return true;
90 if ((IgnoreFlags & Filter::IgnoreNonGlobal) &&
91 !(Flags & object::SymbolRef::SF_Global))
92 return true;
93 if ((IgnoreFlags & Filter::IgnoreHidden) &&
94 (Flags & object::SymbolRef::SF_Hidden))
95 return true;
96 if ((IgnoreFlags & Filter::IgnoreIndirect) &&
97 (Flags & object::SymbolRef::SF_Indirect))
98 return true;
99 if ((IgnoreFlags & Filter::IgnoreWeak) &&
100 (Flags & object::SymbolRef::SF_Weak))
101 return true;
102
103 return false;
104}
105
106bool SymbolEnumerator::enumerateSymbols(object::ObjectFile *Obj,
107 OnEachSymbolFn OnEach,
108 const SymbolEnumeratorOptions &Opts) {
109 if (!Obj)
110 return false;
111
112 auto processSymbolRange =
113 [&](object::ObjectFile::symbol_iterator_range Range) -> EnumerateResult {
114 for (const auto &Sym : Range) {
115 if (shouldIgnoreSymbol(Sym, IgnoreFlags: Opts.FilterFlags))
116 continue;
117
118 auto NameOrErr = Sym.getName();
119 if (!NameOrErr) {
120 consumeError(Err: NameOrErr.takeError());
121 continue;
122 }
123
124 StringRef Name = *NameOrErr;
125 if (Name.empty())
126 continue;
127
128 EnumerateResult Res = OnEach(Name);
129 if (Res != EnumerateResult::Continue)
130 return Res;
131 }
132 return EnumerateResult::Continue;
133 };
134
135 EnumerateResult Res = processSymbolRange(Obj->symbols());
136 if (Res != EnumerateResult::Continue)
137 return Res == EnumerateResult::Stop;
138
139 if (Obj->isELF()) {
140 const auto *ElfObj = cast<object::ELFObjectFileBase>(Val: Obj);
141 Res = processSymbolRange(ElfObj->getDynamicSymbolIterators());
142 if (Res != EnumerateResult::Continue)
143 return Res == EnumerateResult::Stop;
144 } else if (Obj->isCOFF()) {
145 const auto *CoffObj = cast<object::COFFObjectFile>(Val: Obj);
146 for (auto I = CoffObj->export_directory_begin(),
147 E = CoffObj->export_directory_end();
148 I != E; ++I) {
149 StringRef Name;
150 if (I->getSymbolName(Result&: Name))
151 continue;
152 if (Name.empty())
153 continue;
154
155 EnumerateResult Res = OnEach(Name);
156 if (Res != EnumerateResult::Continue)
157 return Res == EnumerateResult::Stop;
158 }
159 } else if (Obj->isMachO()) {
160 }
161
162 return true;
163}
164
165bool SymbolEnumerator::enumerateSymbols(StringRef Path, OnEachSymbolFn OnEach,
166 const SymbolEnumeratorOptions &Opts) {
167 ObjectFileLoader ObjLoader(Path);
168
169 auto ObjOrErr = ObjLoader.getObjectFile();
170 if (!ObjOrErr) {
171 std::string ErrMsg;
172 handleAllErrors(E: ObjOrErr.takeError(),
173 Handlers: [&](const ErrorInfoBase &EIB) { ErrMsg = EIB.message(); });
174 LLVM_DEBUG(dbgs() << "Failed loading object file: " << Path
175 << "\nError: " << ErrMsg << "\n");
176 return false;
177 }
178
179 return SymbolEnumerator::enumerateSymbols(Obj: &ObjOrErr.get(), OnEach, Opts);
180}
181
182static StringRef GetGnuHashSection(llvm::object::ObjectFile *file) {
183 for (auto S : file->sections()) {
184 StringRef name = llvm::cantFail(ValOrErr: S.getName());
185 if (name == ".gnu.hash") {
186 return llvm::cantFail(ValOrErr: S.getContents());
187 }
188 }
189 return "";
190}
191
192/// Bloom filter is a stochastic data structure which can tell us if a symbol
193/// name does not exist in a library with 100% certainty. If it tells us it
194/// exists this may not be true:
195/// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
196///
197/// ELF has this optimization in the new linkers by default, It is stored in the
198/// gnu.hash section of the object file.
199///
200///\returns true if the symbol may be in the library.
201static bool MayExistInElfObjectFile(llvm::object::ObjectFile *soFile,
202 StringRef Sym) {
203 assert(soFile->isELF() && "Not ELF");
204
205 uint32_t hash = djbHash(Buffer: Sym);
206 // Compute the platform bitness -- either 64 or 32.
207 const unsigned bits = 8 * soFile->getBytesInAddress();
208
209 StringRef contents = GetGnuHashSection(file: soFile);
210 if (contents.size() < 16)
211 // We need to search if the library doesn't have .gnu.hash section!
212 return true;
213 const char *hashContent = contents.data();
214
215 // See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash
216 // table layout.
217 uint32_t maskWords = *reinterpret_cast<const uint32_t *>(hashContent + 8);
218 uint32_t shift2 = *reinterpret_cast<const uint32_t *>(hashContent + 12);
219 uint32_t hash2 = hash >> shift2;
220 uint32_t n = (hash / bits) % maskWords;
221
222 const char *bloomfilter = hashContent + 16;
223 const char *hash_pos = bloomfilter + n * (bits / 8); // * (Bits / 8)
224 uint64_t word = *reinterpret_cast<const uint64_t *>(hash_pos);
225 uint64_t bitmask = ((1ULL << (hash % bits)) | (1ULL << (hash2 % bits)));
226 return (bitmask & word) == bitmask;
227}
228
229void LibraryResolver::resolveSymbolsInLibrary(
230 LibraryInfo *Lib, SymbolQuery &Query, const SymbolEnumeratorOptions &Opts) {
231 LLVM_DEBUG(dbgs() << "Checking unresolved symbols "
232 << " in library : " << Lib->getFileName() << "\n";);
233
234 if (!Query.hasUnresolved()) {
235 LLVM_DEBUG(dbgs() << "Skipping library: " << Lib->getFullPath()
236 << " — unresolved symbols exist.\n";);
237 return;
238 }
239
240 bool HadAnySym = false;
241
242 // Build candidate vector
243 SmallVector<StringRef, 24> CandidateVec;
244
245 Query.getUnresolvedSymbols(Unresolved&: CandidateVec, Allow: [&](StringRef S) {
246 return !Lib->hasFilter() || Lib->mayContain(Symbol: S);
247 });
248
249 LLVM_DEBUG(dbgs() << "Total candidate symbols : " << CandidateVec.size()
250 << "\n";);
251 if (CandidateVec.empty()) {
252 LLVM_DEBUG(dbgs() << "No symbol Exist "
253 " in library: "
254 << Lib->getFullPath() << "\n";);
255 return;
256 }
257
258 bool BuildingFilter = !Lib->hasFilter();
259
260 ObjectFileLoader ObjLoader(Lib->getFullPath());
261 auto ObjOrErr = ObjLoader.getObjectFile();
262 if (!ObjOrErr) {
263 std::string ErrMsg;
264 handleAllErrors(E: ObjOrErr.takeError(),
265 Handlers: [&](const ErrorInfoBase &EIB) { ErrMsg = EIB.message(); });
266 LLVM_DEBUG(dbgs() << "Failed loading object file: " << Lib->getFullPath()
267 << "\nError: " << ErrMsg << "\n");
268 return;
269 }
270
271 object::ObjectFile *Obj = &ObjOrErr.get();
272 if (BuildingFilter && Obj->isELF()) {
273
274 erase_if(C&: CandidateVec,
275 P: [&](StringRef C) { return !MayExistInElfObjectFile(soFile: Obj, Sym: C); });
276 if (CandidateVec.empty())
277 return;
278 }
279
280 SmallVector<StringRef, 256> SymbolVec;
281
282 LLVM_DEBUG(dbgs() << "Enumerating symbols in library: " << Lib->getFullPath()
283 << "\n";);
284
285 SymbolEnumerator::enumerateSymbols(
286 Obj,
287 OnEach: [&](StringRef S) {
288 // Collect symbols if we're building a filter
289 if (BuildingFilter)
290 SymbolVec.push_back(Elt: S);
291
292 // auto It = std::lower_bound(CandidateVec.begin(),
293 // CandidateVec.end(), S);
294 auto It = std::find(first: CandidateVec.begin(), last: CandidateVec.end(), val: S);
295 if (It != CandidateVec.end() && *It == S) {
296 // Resolve and remove from CandidateVec
297 LLVM_DEBUG(dbgs() << "Symbol '" << S << "' resolved in library: "
298 << Lib->getFullPath() << "\n";);
299 Query.resolve(Sym: S, LibPath: Lib->getFullPath());
300 HadAnySym = true;
301 *It = CandidateVec.back();
302 CandidateVec.pop_back();
303
304 // Stop — if nothing remains, stop enumeration
305 if (!BuildingFilter && CandidateVec.empty()) {
306 return EnumerateResult::Stop;
307 }
308 // Also stop if SymbolQuery has no more unresolved symbols
309 if (!BuildingFilter && !Query.hasUnresolved())
310 return EnumerateResult::Stop;
311 }
312
313 return EnumerateResult::Continue;
314 },
315 Opts);
316
317 if (BuildingFilter) {
318 LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib->getFullPath()
319 << "\n";);
320 if (SymbolVec.empty()) {
321 LLVM_DEBUG(dbgs() << " Skip : No symbols found in : "
322 << Lib->getFullPath() << "\n";);
323 return;
324 }
325
326 Lib->ensureFilterBuilt(FB, Symbols: SymbolVec);
327 LLVM_DEBUG({
328 dbgs() << "DiscoveredSymbols : " << SymbolVec.size() << "\n";
329 for (const auto &S : SymbolVec)
330 dbgs() << "DiscoveredSymbols : " << S << "\n";
331 });
332 }
333
334 if (HadAnySym && Lib->getState() != LibState::Loaded)
335 Lib->setState(LibState::Queried);
336}
337
338void LibraryResolver::searchSymbolsInLibraries(ArrayRef<StringRef> SymbolList,
339 OnSearchComplete OnComplete,
340 const SearchConfig &Config) {
341 SymbolQuery Q(SymbolList);
342
343 using LibraryType = PathType;
344 auto tryResolveFrom = [&](LibState S, LibraryType K) {
345 LLVM_DEBUG(dbgs() << "Trying resolve from state=" << static_cast<int>(S)
346 << " type=" << static_cast<int>(K) << "\n";);
347
348 LibraryCursor Cur = LibMgr.getCursor(K, S);
349 while (!Q.allResolved()) {
350 const LibraryInfo *Lib = Cur.nextValidLib();
351 // Cursor not valid?
352 if (!Lib) {
353 if (!scanForNewLibraries(K, Cur))
354 break; // nothing new was added
355 continue; // Try to resolve next library
356 }
357
358 // can use Async here?
359 resolveSymbolsInLibrary(Lib: const_cast<LibraryInfo *>(Lib), Query&: Q,
360 Opts: Config.Options);
361 if (Q.allResolved())
362 break;
363 }
364 };
365
366 for (const auto &[St, Ty] : Config.Policy.Plan) {
367 tryResolveFrom(St, Ty);
368 if (Q.allResolved())
369 break;
370 }
371
372 // done:
373 LLVM_DEBUG({
374 dbgs() << "Search complete.\n";
375 for (const auto &r : Q.getAllResults())
376 dbgs() << "Resolved Symbol:" << r->Name << " -> " << r->ResolvedLibPath
377 << "\n";
378 });
379
380 OnComplete(Q);
381}
382
383bool LibraryResolver::scanForNewLibraries(PathType K, LibraryCursor &Cur) {
384 while (ScanHelper.leftToScan(K)) {
385 scanLibrariesIfNeeded(K, BatchSize: scanBatchSize);
386
387 // Check if scanning added new libraries
388 if (Cur.hasMoreValidLib())
389 return true;
390 }
391
392 // No new libraries were added
393 return false;
394}
395
396bool LibraryResolver::scanLibrariesIfNeeded(PathType PK, size_t BatchSize) {
397 LLVM_DEBUG(dbgs() << "LibraryResolver::scanLibrariesIfNeeded: Scanning for "
398 << (PK == PathType::User ? "User" : "System")
399 << " libraries\n";);
400 if (!ScanHelper.leftToScan(K: PK))
401 return false;
402
403 LibraryScanner Scanner(ScanHelper, LibMgr, ShouldScanCall);
404 Scanner.scanNext(Kind: PK, batchSize: BatchSize);
405 return true;
406}
407} // end namespace llvm::orc
408