1 | //===-- LLVMSymbolize.cpp -------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation for LLVM symbolization library. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
14 | |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/DebugInfo/BTF/BTFContext.h" |
17 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
18 | #include "llvm/DebugInfo/GSYM/GsymContext.h" |
19 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
20 | #include "llvm/DebugInfo/PDB/PDB.h" |
21 | #include "llvm/DebugInfo/PDB/PDBContext.h" |
22 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
23 | #include "llvm/Demangle/Demangle.h" |
24 | #include "llvm/Object/BuildID.h" |
25 | #include "llvm/Object/COFF.h" |
26 | #include "llvm/Object/ELFObjectFile.h" |
27 | #include "llvm/Object/MachO.h" |
28 | #include "llvm/Object/MachOUniversal.h" |
29 | #include "llvm/Support/CRC.h" |
30 | #include "llvm/Support/Casting.h" |
31 | #include "llvm/Support/DataExtractor.h" |
32 | #include "llvm/Support/Errc.h" |
33 | #include "llvm/Support/FileSystem.h" |
34 | #include "llvm/Support/MemoryBuffer.h" |
35 | #include "llvm/Support/Path.h" |
36 | #include <cassert> |
37 | #include <cstring> |
38 | |
39 | namespace llvm { |
40 | namespace codeview { |
41 | union DebugInfo; |
42 | } |
43 | namespace symbolize { |
44 | |
45 | LLVMSymbolizer::LLVMSymbolizer() = default; |
46 | |
47 | LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) |
48 | : Opts(Opts), |
49 | BIDFetcher(std::make_unique<BuildIDFetcher>(args: Opts.DebugFileDirectory)) {} |
50 | |
51 | LLVMSymbolizer::~LLVMSymbolizer() = default; |
52 | |
53 | template <typename T> |
54 | Expected<DILineInfo> |
55 | LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, |
56 | object::SectionedAddress ModuleOffset) { |
57 | |
58 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
59 | if (!InfoOrErr) |
60 | return InfoOrErr.takeError(); |
61 | |
62 | SymbolizableModule *Info = *InfoOrErr; |
63 | |
64 | // A null module means an error has already been reported. Return an empty |
65 | // result. |
66 | if (!Info) |
67 | return DILineInfo(); |
68 | |
69 | // If the user is giving us relative addresses, add the preferred base of the |
70 | // object to the offset before we do the query. It's what DIContext expects. |
71 | if (Opts.RelativeAddresses) |
72 | ModuleOffset.Address += Info->getModulePreferredBase(); |
73 | |
74 | DILineInfo LineInfo = Info->symbolizeCode( |
75 | ModuleOffset, |
76 | LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, |
77 | Opts.SkipLineZero), |
78 | UseSymbolTable: Opts.UseSymbolTable); |
79 | if (Opts.Demangle) |
80 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
81 | return LineInfo; |
82 | } |
83 | |
84 | Expected<DILineInfo> |
85 | LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, |
86 | object::SectionedAddress ModuleOffset) { |
87 | return symbolizeCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
88 | } |
89 | |
90 | Expected<DILineInfo> |
91 | LLVMSymbolizer::symbolizeCode(StringRef ModuleName, |
92 | object::SectionedAddress ModuleOffset) { |
93 | return symbolizeCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
94 | } |
95 | |
96 | Expected<DILineInfo> |
97 | LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, |
98 | object::SectionedAddress ModuleOffset) { |
99 | return symbolizeCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
100 | } |
101 | |
102 | template <typename T> |
103 | Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( |
104 | const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { |
105 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
106 | if (!InfoOrErr) |
107 | return InfoOrErr.takeError(); |
108 | |
109 | SymbolizableModule *Info = *InfoOrErr; |
110 | |
111 | // A null module means an error has already been reported. Return an empty |
112 | // result. |
113 | if (!Info) |
114 | return DIInliningInfo(); |
115 | |
116 | // If the user is giving us relative addresses, add the preferred base of the |
117 | // object to the offset before we do the query. It's what DIContext expects. |
118 | if (Opts.RelativeAddresses) |
119 | ModuleOffset.Address += Info->getModulePreferredBase(); |
120 | |
121 | DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( |
122 | ModuleOffset, |
123 | LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, |
124 | Opts.SkipLineZero), |
125 | UseSymbolTable: Opts.UseSymbolTable); |
126 | if (Opts.Demangle) { |
127 | for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { |
128 | auto *Frame = InlinedContext.getMutableFrame(Index: i); |
129 | Frame->FunctionName = DemangleName(Name: Frame->FunctionName, DbiModuleDescriptor: Info); |
130 | } |
131 | } |
132 | return InlinedContext; |
133 | } |
134 | |
135 | Expected<DIInliningInfo> |
136 | LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, |
137 | object::SectionedAddress ModuleOffset) { |
138 | return symbolizeInlinedCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
139 | } |
140 | |
141 | Expected<DIInliningInfo> |
142 | LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName, |
143 | object::SectionedAddress ModuleOffset) { |
144 | return symbolizeInlinedCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
145 | } |
146 | |
147 | Expected<DIInliningInfo> |
148 | LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, |
149 | object::SectionedAddress ModuleOffset) { |
150 | return symbolizeInlinedCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
151 | } |
152 | |
153 | template <typename T> |
154 | Expected<DIGlobal> |
155 | LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, |
156 | object::SectionedAddress ModuleOffset) { |
157 | |
158 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
159 | if (!InfoOrErr) |
160 | return InfoOrErr.takeError(); |
161 | |
162 | SymbolizableModule *Info = *InfoOrErr; |
163 | // A null module means an error has already been reported. Return an empty |
164 | // result. |
165 | if (!Info) |
166 | return DIGlobal(); |
167 | |
168 | // If the user is giving us relative addresses, add the preferred base of |
169 | // the object to the offset before we do the query. It's what DIContext |
170 | // expects. |
171 | if (Opts.RelativeAddresses) |
172 | ModuleOffset.Address += Info->getModulePreferredBase(); |
173 | |
174 | DIGlobal Global = Info->symbolizeData(ModuleOffset); |
175 | if (Opts.Demangle) |
176 | Global.Name = DemangleName(Name: Global.Name, DbiModuleDescriptor: Info); |
177 | return Global; |
178 | } |
179 | |
180 | Expected<DIGlobal> |
181 | LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, |
182 | object::SectionedAddress ModuleOffset) { |
183 | return symbolizeDataCommon(ModuleSpecifier: Obj, ModuleOffset); |
184 | } |
185 | |
186 | Expected<DIGlobal> |
187 | LLVMSymbolizer::symbolizeData(StringRef ModuleName, |
188 | object::SectionedAddress ModuleOffset) { |
189 | return symbolizeDataCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
190 | } |
191 | |
192 | Expected<DIGlobal> |
193 | LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, |
194 | object::SectionedAddress ModuleOffset) { |
195 | return symbolizeDataCommon(ModuleSpecifier: BuildID, ModuleOffset); |
196 | } |
197 | |
198 | template <typename T> |
199 | Expected<std::vector<DILocal>> |
200 | LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, |
201 | object::SectionedAddress ModuleOffset) { |
202 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
203 | if (!InfoOrErr) |
204 | return InfoOrErr.takeError(); |
205 | |
206 | SymbolizableModule *Info = *InfoOrErr; |
207 | // A null module means an error has already been reported. Return an empty |
208 | // result. |
209 | if (!Info) |
210 | return std::vector<DILocal>(); |
211 | |
212 | // If the user is giving us relative addresses, add the preferred base of |
213 | // the object to the offset before we do the query. It's what DIContext |
214 | // expects. |
215 | if (Opts.RelativeAddresses) |
216 | ModuleOffset.Address += Info->getModulePreferredBase(); |
217 | |
218 | return Info->symbolizeFrame(ModuleOffset); |
219 | } |
220 | |
221 | Expected<std::vector<DILocal>> |
222 | LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, |
223 | object::SectionedAddress ModuleOffset) { |
224 | return symbolizeFrameCommon(ModuleSpecifier: Obj, ModuleOffset); |
225 | } |
226 | |
227 | Expected<std::vector<DILocal>> |
228 | LLVMSymbolizer::symbolizeFrame(StringRef ModuleName, |
229 | object::SectionedAddress ModuleOffset) { |
230 | return symbolizeFrameCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
231 | } |
232 | |
233 | Expected<std::vector<DILocal>> |
234 | LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, |
235 | object::SectionedAddress ModuleOffset) { |
236 | return symbolizeFrameCommon(ModuleSpecifier: BuildID, ModuleOffset); |
237 | } |
238 | |
239 | template <typename T> |
240 | Expected<std::vector<DILineInfo>> |
241 | LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, |
242 | uint64_t Offset) { |
243 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
244 | if (!InfoOrErr) |
245 | return InfoOrErr.takeError(); |
246 | |
247 | SymbolizableModule *Info = *InfoOrErr; |
248 | std::vector<DILineInfo> Result; |
249 | |
250 | // A null module means an error has already been reported. Return an empty |
251 | // result. |
252 | if (!Info) |
253 | return Result; |
254 | |
255 | for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { |
256 | DILineInfo LineInfo = Info->symbolizeCode( |
257 | ModuleOffset: A, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), |
258 | UseSymbolTable: Opts.UseSymbolTable); |
259 | if (LineInfo.FileName != DILineInfo::BadString) { |
260 | if (Opts.Demangle) |
261 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
262 | Result.push_back(x: std::move(LineInfo)); |
263 | } |
264 | } |
265 | |
266 | return Result; |
267 | } |
268 | |
269 | Expected<std::vector<DILineInfo>> |
270 | LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, |
271 | uint64_t Offset) { |
272 | return findSymbolCommon(ModuleSpecifier: Obj, Symbol, Offset); |
273 | } |
274 | |
275 | Expected<std::vector<DILineInfo>> |
276 | LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol, |
277 | uint64_t Offset) { |
278 | return findSymbolCommon(ModuleSpecifier: ModuleName, Symbol, Offset); |
279 | } |
280 | |
281 | Expected<std::vector<DILineInfo>> |
282 | LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, |
283 | uint64_t Offset) { |
284 | return findSymbolCommon(ModuleSpecifier: BuildID, Symbol, Offset); |
285 | } |
286 | |
287 | void LLVMSymbolizer::flush() { |
288 | ObjectForUBPathAndArch.clear(); |
289 | LRUBinaries.clear(); |
290 | CacheSize = 0; |
291 | BinaryForPath.clear(); |
292 | ObjectPairForPathArch.clear(); |
293 | Modules.clear(); |
294 | BuildIDPaths.clear(); |
295 | } |
296 | |
297 | namespace { |
298 | |
299 | // For Path="/path/to/foo" and Basename="foo" assume that debug info is in |
300 | // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. |
301 | // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in |
302 | // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. |
303 | std::string getDarwinDWARFResourceForPath(const std::string &Path, |
304 | const std::string &Basename) { |
305 | SmallString<16> ResourceName = StringRef(Path); |
306 | if (sys::path::extension(path: Path) != ".dSYM" ) { |
307 | ResourceName += ".dSYM" ; |
308 | } |
309 | sys::path::append(path&: ResourceName, a: "Contents" , b: "Resources" , c: "DWARF" ); |
310 | sys::path::append(path&: ResourceName, a: Basename); |
311 | return std::string(ResourceName); |
312 | } |
313 | |
314 | bool checkFileCRC(StringRef Path, uint32_t CRCHash) { |
315 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
316 | MemoryBuffer::getFileOrSTDIN(Filename: Path); |
317 | if (!MB) |
318 | return false; |
319 | return CRCHash == llvm::crc32(Data: arrayRefFromStringRef(Input: MB.get()->getBuffer())); |
320 | } |
321 | |
322 | bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, |
323 | uint32_t &CRCHash) { |
324 | if (!Obj) |
325 | return false; |
326 | for (const SectionRef &Section : Obj->sections()) { |
327 | StringRef Name; |
328 | consumeError(Err: Section.getName().moveInto(Value&: Name)); |
329 | |
330 | Name = Name.substr(Start: Name.find_first_not_of(Chars: "._" )); |
331 | if (Name == "gnu_debuglink" ) { |
332 | Expected<StringRef> ContentsOrErr = Section.getContents(); |
333 | if (!ContentsOrErr) { |
334 | consumeError(Err: ContentsOrErr.takeError()); |
335 | return false; |
336 | } |
337 | DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); |
338 | uint64_t Offset = 0; |
339 | if (const char *DebugNameStr = DE.getCStr(OffsetPtr: &Offset)) { |
340 | // 4-byte align the offset. |
341 | Offset = (Offset + 3) & ~0x3; |
342 | if (DE.isValidOffsetForDataOfSize(offset: Offset, length: 4)) { |
343 | DebugName = DebugNameStr; |
344 | CRCHash = DE.getU32(offset_ptr: &Offset); |
345 | return true; |
346 | } |
347 | } |
348 | break; |
349 | } |
350 | } |
351 | return false; |
352 | } |
353 | |
354 | bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, |
355 | const MachOObjectFile *Obj) { |
356 | ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); |
357 | ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); |
358 | if (dbg_uuid.empty() || bin_uuid.empty()) |
359 | return false; |
360 | return !memcmp(s1: dbg_uuid.data(), s2: bin_uuid.data(), n: dbg_uuid.size()); |
361 | } |
362 | |
363 | } // end anonymous namespace |
364 | |
365 | ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, |
366 | const MachOObjectFile *MachExeObj, |
367 | const std::string &ArchName) { |
368 | // On Darwin we may find DWARF in separate object file in |
369 | // resource directory. |
370 | std::vector<std::string> DsymPaths; |
371 | StringRef Filename = sys::path::filename(path: ExePath); |
372 | DsymPaths.push_back( |
373 | x: getDarwinDWARFResourceForPath(Path: ExePath, Basename: std::string(Filename))); |
374 | for (const auto &Path : Opts.DsymHints) { |
375 | DsymPaths.push_back( |
376 | x: getDarwinDWARFResourceForPath(Path, Basename: std::string(Filename))); |
377 | } |
378 | for (const auto &Path : DsymPaths) { |
379 | auto DbgObjOrErr = getOrCreateObject(Path, ArchName); |
380 | if (!DbgObjOrErr) { |
381 | // Ignore errors, the file might not exist. |
382 | consumeError(Err: DbgObjOrErr.takeError()); |
383 | continue; |
384 | } |
385 | ObjectFile *DbgObj = DbgObjOrErr.get(); |
386 | if (!DbgObj) |
387 | continue; |
388 | const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(Val: DbgObj); |
389 | if (!MachDbgObj) |
390 | continue; |
391 | if (darwinDsymMatchesBinary(DbgObj: MachDbgObj, Obj: MachExeObj)) |
392 | return DbgObj; |
393 | } |
394 | return nullptr; |
395 | } |
396 | |
397 | ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, |
398 | const ObjectFile *Obj, |
399 | const std::string &ArchName) { |
400 | std::string DebuglinkName; |
401 | uint32_t CRCHash; |
402 | std::string DebugBinaryPath; |
403 | if (!getGNUDebuglinkContents(Obj, DebugName&: DebuglinkName, CRCHash)) |
404 | return nullptr; |
405 | if (!findDebugBinary(OrigPath: Path, DebuglinkName, CRCHash, Result&: DebugBinaryPath)) |
406 | return nullptr; |
407 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
408 | if (!DbgObjOrErr) { |
409 | // Ignore errors, the file might not exist. |
410 | consumeError(Err: DbgObjOrErr.takeError()); |
411 | return nullptr; |
412 | } |
413 | return DbgObjOrErr.get(); |
414 | } |
415 | |
416 | ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, |
417 | const ELFObjectFileBase *Obj, |
418 | const std::string &ArchName) { |
419 | auto BuildID = getBuildID(Obj); |
420 | if (BuildID.size() < 2) |
421 | return nullptr; |
422 | std::string DebugBinaryPath; |
423 | if (!getOrFindDebugBinary(BuildID, Result&: DebugBinaryPath)) |
424 | return nullptr; |
425 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
426 | if (!DbgObjOrErr) { |
427 | consumeError(Err: DbgObjOrErr.takeError()); |
428 | return nullptr; |
429 | } |
430 | return DbgObjOrErr.get(); |
431 | } |
432 | |
433 | bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, |
434 | const std::string &DebuglinkName, |
435 | uint32_t CRCHash, std::string &Result) { |
436 | SmallString<16> OrigDir(OrigPath); |
437 | llvm::sys::path::remove_filename(path&: OrigDir); |
438 | SmallString<16> DebugPath = OrigDir; |
439 | // Try relative/path/to/original_binary/debuglink_name |
440 | llvm::sys::path::append(path&: DebugPath, a: DebuglinkName); |
441 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
442 | Result = std::string(DebugPath); |
443 | return true; |
444 | } |
445 | // Try relative/path/to/original_binary/.debug/debuglink_name |
446 | DebugPath = OrigDir; |
447 | llvm::sys::path::append(path&: DebugPath, a: ".debug" , b: DebuglinkName); |
448 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
449 | Result = std::string(DebugPath); |
450 | return true; |
451 | } |
452 | // Make the path absolute so that lookups will go to |
453 | // "/usr/lib/debug/full/path/to/debug", not |
454 | // "/usr/lib/debug/to/debug" |
455 | llvm::sys::fs::make_absolute(path&: OrigDir); |
456 | if (!Opts.FallbackDebugPath.empty()) { |
457 | // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name |
458 | DebugPath = Opts.FallbackDebugPath; |
459 | } else { |
460 | #if defined(__NetBSD__) |
461 | // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name |
462 | DebugPath = "/usr/libdata/debug" ; |
463 | #else |
464 | // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name |
465 | DebugPath = "/usr/lib/debug" ; |
466 | #endif |
467 | } |
468 | llvm::sys::path::append(path&: DebugPath, a: llvm::sys::path::relative_path(path: OrigDir), |
469 | b: DebuglinkName); |
470 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
471 | Result = std::string(DebugPath); |
472 | return true; |
473 | } |
474 | return false; |
475 | } |
476 | |
477 | static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { |
478 | return StringRef(reinterpret_cast<const char *>(BuildID.data()), |
479 | BuildID.size()); |
480 | } |
481 | |
482 | bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, |
483 | std::string &Result) { |
484 | StringRef BuildIDStr = getBuildIDStr(BuildID); |
485 | auto I = BuildIDPaths.find(Key: BuildIDStr); |
486 | if (I != BuildIDPaths.end()) { |
487 | Result = I->second; |
488 | return true; |
489 | } |
490 | if (!BIDFetcher) |
491 | return false; |
492 | if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { |
493 | Result = *Path; |
494 | auto InsertResult = BuildIDPaths.insert(KV: {BuildIDStr, Result}); |
495 | assert(InsertResult.second); |
496 | (void)InsertResult; |
497 | return true; |
498 | } |
499 | |
500 | return false; |
501 | } |
502 | |
503 | std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) { |
504 | if (Opts.DisableGsym) |
505 | return {}; |
506 | |
507 | auto CheckGsymFile = [](const llvm::StringRef &GsymPath) { |
508 | sys::fs::file_status Status; |
509 | std::error_code EC = llvm::sys::fs::status(path: GsymPath, result&: Status); |
510 | return !EC && !llvm::sys::fs::is_directory(status: Status); |
511 | }; |
512 | |
513 | // First, look beside the binary file |
514 | if (const auto GsymPath = Path + ".gsym" ; CheckGsymFile(GsymPath)) |
515 | return GsymPath; |
516 | |
517 | // Then, look in the directories specified by GsymFileDirectory |
518 | |
519 | for (const auto &Directory : Opts.GsymFileDirectory) { |
520 | SmallString<16> GsymPath = llvm::StringRef{Directory}; |
521 | llvm::sys::path::append(path&: GsymPath, |
522 | a: llvm::sys::path::filename(path: Path) + ".gsym" ); |
523 | |
524 | if (CheckGsymFile(GsymPath)) |
525 | return static_cast<std::string>(GsymPath); |
526 | } |
527 | |
528 | return {}; |
529 | } |
530 | |
531 | Expected<LLVMSymbolizer::ObjectPair> |
532 | LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, |
533 | const std::string &ArchName) { |
534 | auto I = ObjectPairForPathArch.find(x: std::make_pair(x: Path, y: ArchName)); |
535 | if (I != ObjectPairForPathArch.end()) { |
536 | recordAccess(Bin&: BinaryForPath.find(x: Path)->second); |
537 | return I->second; |
538 | } |
539 | |
540 | auto ObjOrErr = getOrCreateObject(Path, ArchName); |
541 | if (!ObjOrErr) { |
542 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
543 | args: ObjectPair(nullptr, nullptr)); |
544 | return ObjOrErr.takeError(); |
545 | } |
546 | |
547 | ObjectFile *Obj = ObjOrErr.get(); |
548 | assert(Obj != nullptr); |
549 | ObjectFile *DbgObj = nullptr; |
550 | |
551 | if (auto MachObj = dyn_cast<const MachOObjectFile>(Val: Obj)) |
552 | DbgObj = lookUpDsymFile(ExePath: Path, MachExeObj: MachObj, ArchName); |
553 | else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Val: Obj)) |
554 | DbgObj = lookUpBuildIDObject(Path, Obj: ELFObj, ArchName); |
555 | if (!DbgObj) |
556 | DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); |
557 | if (!DbgObj) |
558 | DbgObj = Obj; |
559 | ObjectPair Res = std::make_pair(x&: Obj, y&: DbgObj); |
560 | std::string DbgObjPath = DbgObj->getFileName().str(); |
561 | auto Pair = |
562 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), args&: Res); |
563 | BinaryForPath.find(x: DbgObjPath)->second.pushEvictor(Evictor: [this, I = Pair.first]() { |
564 | ObjectPairForPathArch.erase(position: I); |
565 | }); |
566 | return Res; |
567 | } |
568 | |
569 | Expected<ObjectFile *> |
570 | LLVMSymbolizer::getOrCreateObject(const std::string &Path, |
571 | const std::string &ArchName) { |
572 | Binary *Bin; |
573 | auto Pair = BinaryForPath.emplace(args: Path, args: OwningBinary<Binary>()); |
574 | if (!Pair.second) { |
575 | Bin = Pair.first->second->getBinary(); |
576 | recordAccess(Bin&: Pair.first->second); |
577 | } else { |
578 | Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); |
579 | if (!BinOrErr) |
580 | return BinOrErr.takeError(); |
581 | |
582 | CachedBinary &CachedBin = Pair.first->second; |
583 | CachedBin = std::move(BinOrErr.get()); |
584 | CachedBin.pushEvictor(Evictor: [this, I = Pair.first]() { BinaryForPath.erase(position: I); }); |
585 | LRUBinaries.push_back(Node&: CachedBin); |
586 | CacheSize += CachedBin.size(); |
587 | Bin = CachedBin->getBinary(); |
588 | } |
589 | |
590 | if (!Bin) |
591 | return static_cast<ObjectFile *>(nullptr); |
592 | |
593 | if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Val: Bin)) { |
594 | auto I = ObjectForUBPathAndArch.find(x: std::make_pair(x: Path, y: ArchName)); |
595 | if (I != ObjectForUBPathAndArch.end()) |
596 | return I->second.get(); |
597 | |
598 | Expected<std::unique_ptr<ObjectFile>> ObjOrErr = |
599 | UB->getMachOObjectForArch(ArchName); |
600 | if (!ObjOrErr) { |
601 | ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
602 | args: std::unique_ptr<ObjectFile>()); |
603 | return ObjOrErr.takeError(); |
604 | } |
605 | ObjectFile *Res = ObjOrErr->get(); |
606 | auto Pair = ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
607 | args: std::move(ObjOrErr.get())); |
608 | BinaryForPath.find(x: Path)->second.pushEvictor( |
609 | Evictor: [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(position: Iter); }); |
610 | return Res; |
611 | } |
612 | if (Bin->isObject()) { |
613 | return cast<ObjectFile>(Val: Bin); |
614 | } |
615 | return errorCodeToError(EC: object_error::arch_not_found); |
616 | } |
617 | |
618 | Expected<SymbolizableModule *> |
619 | LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, |
620 | std::unique_ptr<DIContext> Context, |
621 | StringRef ModuleName) { |
622 | auto InfoOrErr = SymbolizableObjectFile::create(Obj, DICtx: std::move(Context), |
623 | UntagAddresses: Opts.UntagAddresses); |
624 | std::unique_ptr<SymbolizableModule> SymMod; |
625 | if (InfoOrErr) |
626 | SymMod = std::move(*InfoOrErr); |
627 | auto InsertResult = Modules.insert( |
628 | x: std::make_pair(x: std::string(ModuleName), y: std::move(SymMod))); |
629 | assert(InsertResult.second); |
630 | if (!InfoOrErr) |
631 | return InfoOrErr.takeError(); |
632 | return InsertResult.first->second.get(); |
633 | } |
634 | |
635 | Expected<SymbolizableModule *> |
636 | LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) { |
637 | StringRef BinaryName = ModuleName; |
638 | StringRef ArchName = Opts.DefaultArch; |
639 | size_t ColonPos = ModuleName.find_last_of(C: ':'); |
640 | // Verify that substring after colon form a valid arch name. |
641 | if (ColonPos != std::string::npos) { |
642 | StringRef ArchStr = ModuleName.substr(Start: ColonPos + 1); |
643 | if (Triple(ArchStr).getArch() != Triple::UnknownArch) { |
644 | BinaryName = ModuleName.substr(Start: 0, N: ColonPos); |
645 | ArchName = ArchStr; |
646 | } |
647 | } |
648 | |
649 | auto I = Modules.find(x: ModuleName); |
650 | if (I != Modules.end()) { |
651 | recordAccess(Bin&: BinaryForPath.find(x: BinaryName)->second); |
652 | return I->second.get(); |
653 | } |
654 | |
655 | auto ObjectsOrErr = |
656 | getOrCreateObjectPair(Path: std::string{BinaryName}, ArchName: std::string{ArchName}); |
657 | if (!ObjectsOrErr) { |
658 | // Failed to find valid object file. |
659 | Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
660 | return ObjectsOrErr.takeError(); |
661 | } |
662 | ObjectPair Objects = ObjectsOrErr.get(); |
663 | |
664 | std::unique_ptr<DIContext> Context; |
665 | // If this is a COFF object containing PDB info and not containing DWARF |
666 | // section, use a PDBContext to symbolize. Otherwise, use DWARF. |
667 | // Create a DIContext to symbolize as follows: |
668 | // - If there is a GSYM file, create a GsymContext. |
669 | // - Otherwise, if this is a COFF object containing PDB info, create a |
670 | // PDBContext. |
671 | // - Otherwise, create a DWARFContext. |
672 | const auto GsymFile = lookUpGsymFile(Path: BinaryName.str()); |
673 | if (!GsymFile.empty()) { |
674 | auto ReaderOrErr = gsym::GsymReader::openFile(Path: GsymFile); |
675 | |
676 | if (ReaderOrErr) { |
677 | std::unique_ptr<gsym::GsymReader> Reader = |
678 | std::make_unique<gsym::GsymReader>(args: std::move(*ReaderOrErr)); |
679 | |
680 | Context = std::make_unique<gsym::GsymContext>(args: std::move(Reader)); |
681 | } |
682 | } |
683 | if (!Context) { |
684 | if (auto CoffObject = dyn_cast<COFFObjectFile>(Val: Objects.first)) { |
685 | const codeview::DebugInfo *DebugInfo; |
686 | StringRef PDBFileName; |
687 | auto EC = CoffObject->getDebugPDBInfo(Info&: DebugInfo, PDBFileName); |
688 | // Use DWARF if there're DWARF sections. |
689 | bool HasDwarf = llvm::any_of( |
690 | Range: Objects.first->sections(), P: [](SectionRef Section) -> bool { |
691 | if (Expected<StringRef> SectionName = Section.getName()) |
692 | return SectionName.get() == ".debug_info" ; |
693 | return false; |
694 | }); |
695 | if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { |
696 | using namespace pdb; |
697 | std::unique_ptr<IPDBSession> Session; |
698 | |
699 | PDB_ReaderType ReaderType = |
700 | Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; |
701 | if (auto Err = loadDataForEXE(Type: ReaderType, Path: Objects.first->getFileName(), |
702 | Session)) { |
703 | Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
704 | // Return along the PDB filename to provide more context |
705 | return createFileError(F: PDBFileName, E: std::move(Err)); |
706 | } |
707 | Context.reset(p: new PDBContext(*CoffObject, std::move(Session))); |
708 | } |
709 | } |
710 | } |
711 | if (!Context) |
712 | Context = DWARFContext::create( |
713 | Obj: *Objects.second, RelocAction: DWARFContext::ProcessDebugRelocations::Process, |
714 | L: nullptr, DWPName: Opts.DWPName); |
715 | auto ModuleOrErr = |
716 | createModuleInfo(Obj: Objects.first, Context: std::move(Context), ModuleName); |
717 | if (ModuleOrErr) { |
718 | auto I = Modules.find(x: ModuleName); |
719 | BinaryForPath.find(x: BinaryName)->second.pushEvictor(Evictor: [this, I]() { |
720 | Modules.erase(position: I); |
721 | }); |
722 | } |
723 | return ModuleOrErr; |
724 | } |
725 | |
726 | // For BPF programs .BTF.ext section contains line numbers information, |
727 | // use it if regular DWARF is not available (e.g. for stripped binary). |
728 | static bool useBTFContext(const ObjectFile &Obj) { |
729 | return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && |
730 | BTFParser::hasBTFSections(Obj); |
731 | } |
732 | |
733 | Expected<SymbolizableModule *> |
734 | LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { |
735 | StringRef ObjName = Obj.getFileName(); |
736 | auto I = Modules.find(x: ObjName); |
737 | if (I != Modules.end()) |
738 | return I->second.get(); |
739 | |
740 | std::unique_ptr<DIContext> Context; |
741 | if (useBTFContext(Obj)) |
742 | Context = BTFContext::create(Obj); |
743 | else |
744 | Context = DWARFContext::create(Obj); |
745 | // FIXME: handle COFF object with PDB info to use PDBContext |
746 | return createModuleInfo(Obj: &Obj, Context: std::move(Context), ModuleName: ObjName); |
747 | } |
748 | |
749 | Expected<SymbolizableModule *> |
750 | LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { |
751 | std::string Path; |
752 | if (!getOrFindDebugBinary(BuildID, Result&: Path)) { |
753 | return createStringError(EC: errc::no_such_file_or_directory, |
754 | S: "could not find build ID" ); |
755 | } |
756 | return getOrCreateModuleInfo(ModuleName: Path); |
757 | } |
758 | |
759 | namespace { |
760 | |
761 | // Undo these various manglings for Win32 extern "C" functions: |
762 | // cdecl - _foo |
763 | // stdcall - _foo@12 |
764 | // fastcall - @foo@12 |
765 | // vectorcall - foo@@12 |
766 | // These are all different linkage names for 'foo'. |
767 | StringRef demanglePE32ExternCFunc(StringRef SymbolName) { |
768 | char Front = SymbolName.empty() ? '\0' : SymbolName[0]; |
769 | |
770 | // Remove any '@[0-9]+' suffix. |
771 | bool HasAtNumSuffix = false; |
772 | if (Front != '?') { |
773 | size_t AtPos = SymbolName.rfind(C: '@'); |
774 | if (AtPos != StringRef::npos && |
775 | all_of(Range: drop_begin(RangeOrContainer&: SymbolName, N: AtPos + 1), P: isDigit)) { |
776 | SymbolName = SymbolName.substr(Start: 0, N: AtPos); |
777 | HasAtNumSuffix = true; |
778 | } |
779 | } |
780 | |
781 | // Remove any ending '@' for vectorcall. |
782 | bool IsVectorCall = false; |
783 | if (HasAtNumSuffix && SymbolName.ends_with(Suffix: "@" )) { |
784 | SymbolName = SymbolName.drop_back(); |
785 | IsVectorCall = true; |
786 | } |
787 | |
788 | // If not vectorcall, remove any '_' or '@' prefix. |
789 | if (!IsVectorCall && (Front == '_' || Front == '@')) |
790 | SymbolName = SymbolName.drop_front(); |
791 | |
792 | return SymbolName; |
793 | } |
794 | |
795 | } // end anonymous namespace |
796 | |
797 | std::string |
798 | LLVMSymbolizer::DemangleName(StringRef Name, |
799 | const SymbolizableModule *DbiModuleDescriptor) { |
800 | std::string Result; |
801 | if (nonMicrosoftDemangle(MangledName: Name, Result)) |
802 | return Result; |
803 | |
804 | if (Name.starts_with(Prefix: '?')) { |
805 | // Only do MSVC C++ demangling on symbols starting with '?'. |
806 | int status = 0; |
807 | char *DemangledName = microsoftDemangle( |
808 | mangled_name: Name, n_read: nullptr, status: &status, |
809 | Flags: MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | |
810 | MSDF_NoMemberType | MSDF_NoReturnType)); |
811 | if (status != 0) |
812 | return std::string{Name}; |
813 | Result = DemangledName; |
814 | free(ptr: DemangledName); |
815 | return Result; |
816 | } |
817 | |
818 | if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { |
819 | std::string DemangledCName(demanglePE32ExternCFunc(SymbolName: Name)); |
820 | // On i386 Windows, the C name mangling for different calling conventions |
821 | // may also be applied on top of the Itanium or Rust name mangling. |
822 | if (nonMicrosoftDemangle(MangledName: DemangledCName, Result)) |
823 | return Result; |
824 | return DemangledCName; |
825 | } |
826 | return std::string{Name}; |
827 | } |
828 | |
829 | void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { |
830 | if (Bin->getBinary()) |
831 | LRUBinaries.splice(I: LRUBinaries.end(), L2&: LRUBinaries, Node: Bin.getIterator()); |
832 | } |
833 | |
834 | void LLVMSymbolizer::pruneCache() { |
835 | // Evict the LRU binary until the max cache size is reached or there's <= 1 |
836 | // item in the cache. The MRU binary is always kept to avoid thrashing if it's |
837 | // larger than the cache size. |
838 | while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && |
839 | std::next(x: LRUBinaries.begin()) != LRUBinaries.end()) { |
840 | CachedBinary &Bin = LRUBinaries.front(); |
841 | CacheSize -= Bin.size(); |
842 | LRUBinaries.pop_front(); |
843 | Bin.evict(); |
844 | } |
845 | } |
846 | |
847 | void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { |
848 | if (Evictor) { |
849 | this->Evictor = [OldEvictor = std::move(this->Evictor), |
850 | NewEvictor = std::move(NewEvictor)]() { |
851 | NewEvictor(); |
852 | OldEvictor(); |
853 | }; |
854 | } else { |
855 | this->Evictor = std::move(NewEvictor); |
856 | } |
857 | } |
858 | |
859 | } // namespace symbolize |
860 | } // namespace llvm |
861 | |