1 | //===-- LLVMSymbolize.cpp -------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation for LLVM symbolization library. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
14 | |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/DebugInfo/BTF/BTFContext.h" |
17 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
18 | #include "llvm/DebugInfo/PDB/PDB.h" |
19 | #include "llvm/DebugInfo/PDB/PDBContext.h" |
20 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
21 | #include "llvm/Demangle/Demangle.h" |
22 | #include "llvm/Object/BuildID.h" |
23 | #include "llvm/Object/COFF.h" |
24 | #include "llvm/Object/ELFObjectFile.h" |
25 | #include "llvm/Object/MachO.h" |
26 | #include "llvm/Object/MachOUniversal.h" |
27 | #include "llvm/Support/CRC.h" |
28 | #include "llvm/Support/Casting.h" |
29 | #include "llvm/Support/DataExtractor.h" |
30 | #include "llvm/Support/Errc.h" |
31 | #include "llvm/Support/FileSystem.h" |
32 | #include "llvm/Support/MemoryBuffer.h" |
33 | #include "llvm/Support/Path.h" |
34 | #include <algorithm> |
35 | #include <cassert> |
36 | #include <cstring> |
37 | |
38 | namespace llvm { |
39 | namespace codeview { |
40 | union DebugInfo; |
41 | } |
42 | namespace symbolize { |
43 | |
44 | LLVMSymbolizer::LLVMSymbolizer() = default; |
45 | |
46 | LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) |
47 | : Opts(Opts), |
48 | BIDFetcher(std::make_unique<BuildIDFetcher>(args: Opts.DebugFileDirectory)) {} |
49 | |
50 | LLVMSymbolizer::~LLVMSymbolizer() = default; |
51 | |
52 | template <typename T> |
53 | Expected<DILineInfo> |
54 | LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, |
55 | object::SectionedAddress ModuleOffset) { |
56 | |
57 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
58 | if (!InfoOrErr) |
59 | return InfoOrErr.takeError(); |
60 | |
61 | SymbolizableModule *Info = *InfoOrErr; |
62 | |
63 | // A null module means an error has already been reported. Return an empty |
64 | // result. |
65 | if (!Info) |
66 | return DILineInfo(); |
67 | |
68 | // If the user is giving us relative addresses, add the preferred base of the |
69 | // object to the offset before we do the query. It's what DIContext expects. |
70 | if (Opts.RelativeAddresses) |
71 | ModuleOffset.Address += Info->getModulePreferredBase(); |
72 | |
73 | DILineInfo LineInfo = Info->symbolizeCode( |
74 | ModuleOffset, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), |
75 | UseSymbolTable: Opts.UseSymbolTable); |
76 | if (Opts.Demangle) |
77 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
78 | return LineInfo; |
79 | } |
80 | |
81 | Expected<DILineInfo> |
82 | LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, |
83 | object::SectionedAddress ModuleOffset) { |
84 | return symbolizeCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
85 | } |
86 | |
87 | Expected<DILineInfo> |
88 | LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, |
89 | object::SectionedAddress ModuleOffset) { |
90 | return symbolizeCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
91 | } |
92 | |
93 | Expected<DILineInfo> |
94 | LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, |
95 | object::SectionedAddress ModuleOffset) { |
96 | return symbolizeCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
97 | } |
98 | |
99 | template <typename T> |
100 | Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( |
101 | const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { |
102 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
103 | if (!InfoOrErr) |
104 | return InfoOrErr.takeError(); |
105 | |
106 | SymbolizableModule *Info = *InfoOrErr; |
107 | |
108 | // A null module means an error has already been reported. Return an empty |
109 | // result. |
110 | if (!Info) |
111 | return DIInliningInfo(); |
112 | |
113 | // If the user is giving us relative addresses, add the preferred base of the |
114 | // object to the offset before we do the query. It's what DIContext expects. |
115 | if (Opts.RelativeAddresses) |
116 | ModuleOffset.Address += Info->getModulePreferredBase(); |
117 | |
118 | DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( |
119 | ModuleOffset, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), |
120 | UseSymbolTable: Opts.UseSymbolTable); |
121 | if (Opts.Demangle) { |
122 | for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { |
123 | auto *Frame = InlinedContext.getMutableFrame(Index: i); |
124 | Frame->FunctionName = DemangleName(Name: Frame->FunctionName, DbiModuleDescriptor: Info); |
125 | } |
126 | } |
127 | return InlinedContext; |
128 | } |
129 | |
130 | Expected<DIInliningInfo> |
131 | LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, |
132 | object::SectionedAddress ModuleOffset) { |
133 | return symbolizeInlinedCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
134 | } |
135 | |
136 | Expected<DIInliningInfo> |
137 | LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, |
138 | object::SectionedAddress ModuleOffset) { |
139 | return symbolizeInlinedCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
140 | } |
141 | |
142 | Expected<DIInliningInfo> |
143 | LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, |
144 | object::SectionedAddress ModuleOffset) { |
145 | return symbolizeInlinedCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
146 | } |
147 | |
148 | template <typename T> |
149 | Expected<DIGlobal> |
150 | LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, |
151 | object::SectionedAddress ModuleOffset) { |
152 | |
153 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
154 | if (!InfoOrErr) |
155 | return InfoOrErr.takeError(); |
156 | |
157 | SymbolizableModule *Info = *InfoOrErr; |
158 | // A null module means an error has already been reported. Return an empty |
159 | // result. |
160 | if (!Info) |
161 | return DIGlobal(); |
162 | |
163 | // If the user is giving us relative addresses, add the preferred base of |
164 | // the object to the offset before we do the query. It's what DIContext |
165 | // expects. |
166 | if (Opts.RelativeAddresses) |
167 | ModuleOffset.Address += Info->getModulePreferredBase(); |
168 | |
169 | DIGlobal Global = Info->symbolizeData(ModuleOffset); |
170 | if (Opts.Demangle) |
171 | Global.Name = DemangleName(Name: Global.Name, DbiModuleDescriptor: Info); |
172 | return Global; |
173 | } |
174 | |
175 | Expected<DIGlobal> |
176 | LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, |
177 | object::SectionedAddress ModuleOffset) { |
178 | return symbolizeDataCommon(ModuleSpecifier: Obj, ModuleOffset); |
179 | } |
180 | |
181 | Expected<DIGlobal> |
182 | LLVMSymbolizer::symbolizeData(const std::string &ModuleName, |
183 | object::SectionedAddress ModuleOffset) { |
184 | return symbolizeDataCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
185 | } |
186 | |
187 | Expected<DIGlobal> |
188 | LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, |
189 | object::SectionedAddress ModuleOffset) { |
190 | return symbolizeDataCommon(ModuleSpecifier: BuildID, ModuleOffset); |
191 | } |
192 | |
193 | template <typename T> |
194 | Expected<std::vector<DILocal>> |
195 | LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, |
196 | object::SectionedAddress ModuleOffset) { |
197 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
198 | if (!InfoOrErr) |
199 | return InfoOrErr.takeError(); |
200 | |
201 | SymbolizableModule *Info = *InfoOrErr; |
202 | // A null module means an error has already been reported. Return an empty |
203 | // result. |
204 | if (!Info) |
205 | return std::vector<DILocal>(); |
206 | |
207 | // If the user is giving us relative addresses, add the preferred base of |
208 | // the object to the offset before we do the query. It's what DIContext |
209 | // expects. |
210 | if (Opts.RelativeAddresses) |
211 | ModuleOffset.Address += Info->getModulePreferredBase(); |
212 | |
213 | return Info->symbolizeFrame(ModuleOffset); |
214 | } |
215 | |
216 | Expected<std::vector<DILocal>> |
217 | LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, |
218 | object::SectionedAddress ModuleOffset) { |
219 | return symbolizeFrameCommon(ModuleSpecifier: Obj, ModuleOffset); |
220 | } |
221 | |
222 | Expected<std::vector<DILocal>> |
223 | LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, |
224 | object::SectionedAddress ModuleOffset) { |
225 | return symbolizeFrameCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
226 | } |
227 | |
228 | Expected<std::vector<DILocal>> |
229 | LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, |
230 | object::SectionedAddress ModuleOffset) { |
231 | return symbolizeFrameCommon(ModuleSpecifier: BuildID, ModuleOffset); |
232 | } |
233 | |
234 | template <typename T> |
235 | Expected<std::vector<DILineInfo>> |
236 | LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, |
237 | uint64_t Offset) { |
238 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
239 | if (!InfoOrErr) |
240 | return InfoOrErr.takeError(); |
241 | |
242 | SymbolizableModule *Info = *InfoOrErr; |
243 | std::vector<DILineInfo> Result; |
244 | |
245 | // A null module means an error has already been reported. Return an empty |
246 | // result. |
247 | if (!Info) |
248 | return Result; |
249 | |
250 | for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { |
251 | DILineInfo LineInfo = Info->symbolizeCode( |
252 | ModuleOffset: A, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), |
253 | UseSymbolTable: Opts.UseSymbolTable); |
254 | if (LineInfo.FileName != DILineInfo::BadString) { |
255 | if (Opts.Demangle) |
256 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
257 | Result.push_back(x: LineInfo); |
258 | } |
259 | } |
260 | |
261 | return Result; |
262 | } |
263 | |
264 | Expected<std::vector<DILineInfo>> |
265 | LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, |
266 | uint64_t Offset) { |
267 | return findSymbolCommon(ModuleSpecifier: Obj, Symbol, Offset); |
268 | } |
269 | |
270 | Expected<std::vector<DILineInfo>> |
271 | LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol, |
272 | uint64_t Offset) { |
273 | return findSymbolCommon(ModuleSpecifier: ModuleName, Symbol, Offset); |
274 | } |
275 | |
276 | Expected<std::vector<DILineInfo>> |
277 | LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, |
278 | uint64_t Offset) { |
279 | return findSymbolCommon(ModuleSpecifier: BuildID, Symbol, Offset); |
280 | } |
281 | |
282 | void LLVMSymbolizer::flush() { |
283 | ObjectForUBPathAndArch.clear(); |
284 | LRUBinaries.clear(); |
285 | CacheSize = 0; |
286 | BinaryForPath.clear(); |
287 | ObjectPairForPathArch.clear(); |
288 | Modules.clear(); |
289 | BuildIDPaths.clear(); |
290 | } |
291 | |
292 | namespace { |
293 | |
294 | // For Path="/path/to/foo" and Basename="foo" assume that debug info is in |
295 | // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. |
296 | // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in |
297 | // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. |
298 | std::string getDarwinDWARFResourceForPath(const std::string &Path, |
299 | const std::string &Basename) { |
300 | SmallString<16> ResourceName = StringRef(Path); |
301 | if (sys::path::extension(path: Path) != ".dSYM" ) { |
302 | ResourceName += ".dSYM" ; |
303 | } |
304 | sys::path::append(path&: ResourceName, a: "Contents" , b: "Resources" , c: "DWARF" ); |
305 | sys::path::append(path&: ResourceName, a: Basename); |
306 | return std::string(ResourceName); |
307 | } |
308 | |
309 | bool checkFileCRC(StringRef Path, uint32_t CRCHash) { |
310 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
311 | MemoryBuffer::getFileOrSTDIN(Filename: Path); |
312 | if (!MB) |
313 | return false; |
314 | return CRCHash == llvm::crc32(Data: arrayRefFromStringRef(Input: MB.get()->getBuffer())); |
315 | } |
316 | |
317 | bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, |
318 | uint32_t &CRCHash) { |
319 | if (!Obj) |
320 | return false; |
321 | for (const SectionRef &Section : Obj->sections()) { |
322 | StringRef Name; |
323 | consumeError(Err: Section.getName().moveInto(Value&: Name)); |
324 | |
325 | Name = Name.substr(Start: Name.find_first_not_of(Chars: "._" )); |
326 | if (Name == "gnu_debuglink" ) { |
327 | Expected<StringRef> ContentsOrErr = Section.getContents(); |
328 | if (!ContentsOrErr) { |
329 | consumeError(Err: ContentsOrErr.takeError()); |
330 | return false; |
331 | } |
332 | DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); |
333 | uint64_t Offset = 0; |
334 | if (const char *DebugNameStr = DE.getCStr(OffsetPtr: &Offset)) { |
335 | // 4-byte align the offset. |
336 | Offset = (Offset + 3) & ~0x3; |
337 | if (DE.isValidOffsetForDataOfSize(offset: Offset, length: 4)) { |
338 | DebugName = DebugNameStr; |
339 | CRCHash = DE.getU32(offset_ptr: &Offset); |
340 | return true; |
341 | } |
342 | } |
343 | break; |
344 | } |
345 | } |
346 | return false; |
347 | } |
348 | |
349 | bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, |
350 | const MachOObjectFile *Obj) { |
351 | ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); |
352 | ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); |
353 | if (dbg_uuid.empty() || bin_uuid.empty()) |
354 | return false; |
355 | return !memcmp(s1: dbg_uuid.data(), s2: bin_uuid.data(), n: dbg_uuid.size()); |
356 | } |
357 | |
358 | } // end anonymous namespace |
359 | |
360 | ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, |
361 | const MachOObjectFile *MachExeObj, |
362 | const std::string &ArchName) { |
363 | // On Darwin we may find DWARF in separate object file in |
364 | // resource directory. |
365 | std::vector<std::string> DsymPaths; |
366 | StringRef Filename = sys::path::filename(path: ExePath); |
367 | DsymPaths.push_back( |
368 | x: getDarwinDWARFResourceForPath(Path: ExePath, Basename: std::string(Filename))); |
369 | for (const auto &Path : Opts.DsymHints) { |
370 | DsymPaths.push_back( |
371 | x: getDarwinDWARFResourceForPath(Path, Basename: std::string(Filename))); |
372 | } |
373 | for (const auto &Path : DsymPaths) { |
374 | auto DbgObjOrErr = getOrCreateObject(Path, ArchName); |
375 | if (!DbgObjOrErr) { |
376 | // Ignore errors, the file might not exist. |
377 | consumeError(Err: DbgObjOrErr.takeError()); |
378 | continue; |
379 | } |
380 | ObjectFile *DbgObj = DbgObjOrErr.get(); |
381 | if (!DbgObj) |
382 | continue; |
383 | const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(Val: DbgObj); |
384 | if (!MachDbgObj) |
385 | continue; |
386 | if (darwinDsymMatchesBinary(DbgObj: MachDbgObj, Obj: MachExeObj)) |
387 | return DbgObj; |
388 | } |
389 | return nullptr; |
390 | } |
391 | |
392 | ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, |
393 | const ObjectFile *Obj, |
394 | const std::string &ArchName) { |
395 | std::string DebuglinkName; |
396 | uint32_t CRCHash; |
397 | std::string DebugBinaryPath; |
398 | if (!getGNUDebuglinkContents(Obj, DebugName&: DebuglinkName, CRCHash)) |
399 | return nullptr; |
400 | if (!findDebugBinary(OrigPath: Path, DebuglinkName, CRCHash, Result&: DebugBinaryPath)) |
401 | return nullptr; |
402 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
403 | if (!DbgObjOrErr) { |
404 | // Ignore errors, the file might not exist. |
405 | consumeError(Err: DbgObjOrErr.takeError()); |
406 | return nullptr; |
407 | } |
408 | return DbgObjOrErr.get(); |
409 | } |
410 | |
411 | ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, |
412 | const ELFObjectFileBase *Obj, |
413 | const std::string &ArchName) { |
414 | auto BuildID = getBuildID(Obj); |
415 | if (BuildID.size() < 2) |
416 | return nullptr; |
417 | std::string DebugBinaryPath; |
418 | if (!getOrFindDebugBinary(BuildID, Result&: DebugBinaryPath)) |
419 | return nullptr; |
420 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
421 | if (!DbgObjOrErr) { |
422 | consumeError(Err: DbgObjOrErr.takeError()); |
423 | return nullptr; |
424 | } |
425 | return DbgObjOrErr.get(); |
426 | } |
427 | |
428 | bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, |
429 | const std::string &DebuglinkName, |
430 | uint32_t CRCHash, std::string &Result) { |
431 | SmallString<16> OrigDir(OrigPath); |
432 | llvm::sys::path::remove_filename(path&: OrigDir); |
433 | SmallString<16> DebugPath = OrigDir; |
434 | // Try relative/path/to/original_binary/debuglink_name |
435 | llvm::sys::path::append(path&: DebugPath, a: DebuglinkName); |
436 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
437 | Result = std::string(DebugPath); |
438 | return true; |
439 | } |
440 | // Try relative/path/to/original_binary/.debug/debuglink_name |
441 | DebugPath = OrigDir; |
442 | llvm::sys::path::append(path&: DebugPath, a: ".debug" , b: DebuglinkName); |
443 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
444 | Result = std::string(DebugPath); |
445 | return true; |
446 | } |
447 | // Make the path absolute so that lookups will go to |
448 | // "/usr/lib/debug/full/path/to/debug", not |
449 | // "/usr/lib/debug/to/debug" |
450 | llvm::sys::fs::make_absolute(path&: OrigDir); |
451 | if (!Opts.FallbackDebugPath.empty()) { |
452 | // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name |
453 | DebugPath = Opts.FallbackDebugPath; |
454 | } else { |
455 | #if defined(__NetBSD__) |
456 | // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name |
457 | DebugPath = "/usr/libdata/debug" ; |
458 | #else |
459 | // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name |
460 | DebugPath = "/usr/lib/debug" ; |
461 | #endif |
462 | } |
463 | llvm::sys::path::append(path&: DebugPath, a: llvm::sys::path::relative_path(path: OrigDir), |
464 | b: DebuglinkName); |
465 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
466 | Result = std::string(DebugPath); |
467 | return true; |
468 | } |
469 | return false; |
470 | } |
471 | |
472 | static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { |
473 | return StringRef(reinterpret_cast<const char *>(BuildID.data()), |
474 | BuildID.size()); |
475 | } |
476 | |
477 | bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, |
478 | std::string &Result) { |
479 | StringRef BuildIDStr = getBuildIDStr(BuildID); |
480 | auto I = BuildIDPaths.find(Key: BuildIDStr); |
481 | if (I != BuildIDPaths.end()) { |
482 | Result = I->second; |
483 | return true; |
484 | } |
485 | if (!BIDFetcher) |
486 | return false; |
487 | if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { |
488 | Result = *Path; |
489 | auto InsertResult = BuildIDPaths.insert(KV: {BuildIDStr, Result}); |
490 | assert(InsertResult.second); |
491 | (void)InsertResult; |
492 | return true; |
493 | } |
494 | |
495 | return false; |
496 | } |
497 | |
498 | Expected<LLVMSymbolizer::ObjectPair> |
499 | LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, |
500 | const std::string &ArchName) { |
501 | auto I = ObjectPairForPathArch.find(x: std::make_pair(x: Path, y: ArchName)); |
502 | if (I != ObjectPairForPathArch.end()) { |
503 | recordAccess(Bin&: BinaryForPath.find(x: Path)->second); |
504 | return I->second; |
505 | } |
506 | |
507 | auto ObjOrErr = getOrCreateObject(Path, ArchName); |
508 | if (!ObjOrErr) { |
509 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
510 | args: ObjectPair(nullptr, nullptr)); |
511 | return ObjOrErr.takeError(); |
512 | } |
513 | |
514 | ObjectFile *Obj = ObjOrErr.get(); |
515 | assert(Obj != nullptr); |
516 | ObjectFile *DbgObj = nullptr; |
517 | |
518 | if (auto MachObj = dyn_cast<const MachOObjectFile>(Val: Obj)) |
519 | DbgObj = lookUpDsymFile(ExePath: Path, MachExeObj: MachObj, ArchName); |
520 | else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Val: Obj)) |
521 | DbgObj = lookUpBuildIDObject(Path, Obj: ELFObj, ArchName); |
522 | if (!DbgObj) |
523 | DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); |
524 | if (!DbgObj) |
525 | DbgObj = Obj; |
526 | ObjectPair Res = std::make_pair(x&: Obj, y&: DbgObj); |
527 | std::string DbgObjPath = DbgObj->getFileName().str(); |
528 | auto Pair = |
529 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), args&: Res); |
530 | BinaryForPath.find(x: DbgObjPath)->second.pushEvictor(Evictor: [this, I = Pair.first]() { |
531 | ObjectPairForPathArch.erase(position: I); |
532 | }); |
533 | return Res; |
534 | } |
535 | |
536 | Expected<ObjectFile *> |
537 | LLVMSymbolizer::getOrCreateObject(const std::string &Path, |
538 | const std::string &ArchName) { |
539 | Binary *Bin; |
540 | auto Pair = BinaryForPath.emplace(args: Path, args: OwningBinary<Binary>()); |
541 | if (!Pair.second) { |
542 | Bin = Pair.first->second->getBinary(); |
543 | recordAccess(Bin&: Pair.first->second); |
544 | } else { |
545 | Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); |
546 | if (!BinOrErr) |
547 | return BinOrErr.takeError(); |
548 | |
549 | CachedBinary &CachedBin = Pair.first->second; |
550 | CachedBin = std::move(BinOrErr.get()); |
551 | CachedBin.pushEvictor(Evictor: [this, I = Pair.first]() { BinaryForPath.erase(position: I); }); |
552 | LRUBinaries.push_back(Node&: CachedBin); |
553 | CacheSize += CachedBin.size(); |
554 | Bin = CachedBin->getBinary(); |
555 | } |
556 | |
557 | if (!Bin) |
558 | return static_cast<ObjectFile *>(nullptr); |
559 | |
560 | if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Val: Bin)) { |
561 | auto I = ObjectForUBPathAndArch.find(x: std::make_pair(x: Path, y: ArchName)); |
562 | if (I != ObjectForUBPathAndArch.end()) |
563 | return I->second.get(); |
564 | |
565 | Expected<std::unique_ptr<ObjectFile>> ObjOrErr = |
566 | UB->getMachOObjectForArch(ArchName); |
567 | if (!ObjOrErr) { |
568 | ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
569 | args: std::unique_ptr<ObjectFile>()); |
570 | return ObjOrErr.takeError(); |
571 | } |
572 | ObjectFile *Res = ObjOrErr->get(); |
573 | auto Pair = ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
574 | args: std::move(ObjOrErr.get())); |
575 | BinaryForPath.find(x: Path)->second.pushEvictor( |
576 | Evictor: [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(position: Iter); }); |
577 | return Res; |
578 | } |
579 | if (Bin->isObject()) { |
580 | return cast<ObjectFile>(Val: Bin); |
581 | } |
582 | return errorCodeToError(EC: object_error::arch_not_found); |
583 | } |
584 | |
585 | Expected<SymbolizableModule *> |
586 | LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, |
587 | std::unique_ptr<DIContext> Context, |
588 | StringRef ModuleName) { |
589 | auto InfoOrErr = SymbolizableObjectFile::create(Obj, DICtx: std::move(Context), |
590 | UntagAddresses: Opts.UntagAddresses); |
591 | std::unique_ptr<SymbolizableModule> SymMod; |
592 | if (InfoOrErr) |
593 | SymMod = std::move(*InfoOrErr); |
594 | auto InsertResult = Modules.insert( |
595 | x: std::make_pair(x: std::string(ModuleName), y: std::move(SymMod))); |
596 | assert(InsertResult.second); |
597 | if (!InfoOrErr) |
598 | return InfoOrErr.takeError(); |
599 | return InsertResult.first->second.get(); |
600 | } |
601 | |
602 | Expected<SymbolizableModule *> |
603 | LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { |
604 | std::string BinaryName = ModuleName; |
605 | std::string ArchName = Opts.DefaultArch; |
606 | size_t ColonPos = ModuleName.find_last_of(c: ':'); |
607 | // Verify that substring after colon form a valid arch name. |
608 | if (ColonPos != std::string::npos) { |
609 | std::string ArchStr = ModuleName.substr(pos: ColonPos + 1); |
610 | if (Triple(ArchStr).getArch() != Triple::UnknownArch) { |
611 | BinaryName = ModuleName.substr(pos: 0, n: ColonPos); |
612 | ArchName = ArchStr; |
613 | } |
614 | } |
615 | |
616 | auto I = Modules.find(x: ModuleName); |
617 | if (I != Modules.end()) { |
618 | recordAccess(Bin&: BinaryForPath.find(x: BinaryName)->second); |
619 | return I->second.get(); |
620 | } |
621 | |
622 | auto ObjectsOrErr = getOrCreateObjectPair(Path: BinaryName, ArchName); |
623 | if (!ObjectsOrErr) { |
624 | // Failed to find valid object file. |
625 | Modules.emplace(args: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
626 | return ObjectsOrErr.takeError(); |
627 | } |
628 | ObjectPair Objects = ObjectsOrErr.get(); |
629 | |
630 | std::unique_ptr<DIContext> Context; |
631 | // If this is a COFF object containing PDB info and not containing DWARF |
632 | // section, use a PDBContext to symbolize. Otherwise, use DWARF. |
633 | if (auto CoffObject = dyn_cast<COFFObjectFile>(Val: Objects.first)) { |
634 | const codeview::DebugInfo *DebugInfo; |
635 | StringRef PDBFileName; |
636 | auto EC = CoffObject->getDebugPDBInfo(Info&: DebugInfo, PDBFileName); |
637 | // Use DWARF if there're DWARF sections. |
638 | bool HasDwarf = |
639 | llvm::any_of(Range: Objects.first->sections(), P: [](SectionRef Section) -> bool { |
640 | if (Expected<StringRef> SectionName = Section.getName()) |
641 | return SectionName.get() == ".debug_info" ; |
642 | return false; |
643 | }); |
644 | if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { |
645 | using namespace pdb; |
646 | std::unique_ptr<IPDBSession> Session; |
647 | |
648 | PDB_ReaderType ReaderType = |
649 | Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; |
650 | if (auto Err = loadDataForEXE(Type: ReaderType, Path: Objects.first->getFileName(), |
651 | Session)) { |
652 | Modules.emplace(args: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
653 | // Return along the PDB filename to provide more context |
654 | return createFileError(F: PDBFileName, E: std::move(Err)); |
655 | } |
656 | Context.reset(p: new PDBContext(*CoffObject, std::move(Session))); |
657 | } |
658 | } |
659 | if (!Context) |
660 | Context = DWARFContext::create( |
661 | Obj: *Objects.second, RelocAction: DWARFContext::ProcessDebugRelocations::Process, |
662 | L: nullptr, DWPName: Opts.DWPName); |
663 | auto ModuleOrErr = |
664 | createModuleInfo(Obj: Objects.first, Context: std::move(Context), ModuleName); |
665 | if (ModuleOrErr) { |
666 | auto I = Modules.find(x: ModuleName); |
667 | BinaryForPath.find(x: BinaryName)->second.pushEvictor(Evictor: [this, I]() { |
668 | Modules.erase(position: I); |
669 | }); |
670 | } |
671 | return ModuleOrErr; |
672 | } |
673 | |
674 | // For BPF programs .BTF.ext section contains line numbers information, |
675 | // use it if regular DWARF is not available (e.g. for stripped binary). |
676 | static bool useBTFContext(const ObjectFile &Obj) { |
677 | return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && |
678 | BTFParser::hasBTFSections(Obj); |
679 | } |
680 | |
681 | Expected<SymbolizableModule *> |
682 | LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { |
683 | StringRef ObjName = Obj.getFileName(); |
684 | auto I = Modules.find(x: ObjName); |
685 | if (I != Modules.end()) |
686 | return I->second.get(); |
687 | |
688 | std::unique_ptr<DIContext> Context; |
689 | if (useBTFContext(Obj)) |
690 | Context = BTFContext::create(Obj); |
691 | else |
692 | Context = DWARFContext::create(Obj); |
693 | // FIXME: handle COFF object with PDB info to use PDBContext |
694 | return createModuleInfo(Obj: &Obj, Context: std::move(Context), ModuleName: ObjName); |
695 | } |
696 | |
697 | Expected<SymbolizableModule *> |
698 | LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { |
699 | std::string Path; |
700 | if (!getOrFindDebugBinary(BuildID, Result&: Path)) { |
701 | return createStringError(EC: errc::no_such_file_or_directory, |
702 | S: "could not find build ID" ); |
703 | } |
704 | return getOrCreateModuleInfo(ModuleName: Path); |
705 | } |
706 | |
707 | namespace { |
708 | |
709 | // Undo these various manglings for Win32 extern "C" functions: |
710 | // cdecl - _foo |
711 | // stdcall - _foo@12 |
712 | // fastcall - @foo@12 |
713 | // vectorcall - foo@@12 |
714 | // These are all different linkage names for 'foo'. |
715 | StringRef demanglePE32ExternCFunc(StringRef SymbolName) { |
716 | char Front = SymbolName.empty() ? '\0' : SymbolName[0]; |
717 | |
718 | // Remove any '@[0-9]+' suffix. |
719 | bool HasAtNumSuffix = false; |
720 | if (Front != '?') { |
721 | size_t AtPos = SymbolName.rfind(C: '@'); |
722 | if (AtPos != StringRef::npos && |
723 | all_of(Range: drop_begin(RangeOrContainer&: SymbolName, N: AtPos + 1), P: isDigit)) { |
724 | SymbolName = SymbolName.substr(Start: 0, N: AtPos); |
725 | HasAtNumSuffix = true; |
726 | } |
727 | } |
728 | |
729 | // Remove any ending '@' for vectorcall. |
730 | bool IsVectorCall = false; |
731 | if (HasAtNumSuffix && SymbolName.ends_with(Suffix: "@" )) { |
732 | SymbolName = SymbolName.drop_back(); |
733 | IsVectorCall = true; |
734 | } |
735 | |
736 | // If not vectorcall, remove any '_' or '@' prefix. |
737 | if (!IsVectorCall && (Front == '_' || Front == '@')) |
738 | SymbolName = SymbolName.drop_front(); |
739 | |
740 | return SymbolName; |
741 | } |
742 | |
743 | } // end anonymous namespace |
744 | |
745 | std::string |
746 | LLVMSymbolizer::DemangleName(const std::string &Name, |
747 | const SymbolizableModule *DbiModuleDescriptor) { |
748 | std::string Result; |
749 | if (nonMicrosoftDemangle(MangledName: Name, Result)) |
750 | return Result; |
751 | |
752 | if (!Name.empty() && Name.front() == '?') { |
753 | // Only do MSVC C++ demangling on symbols starting with '?'. |
754 | int status = 0; |
755 | char *DemangledName = microsoftDemangle( |
756 | mangled_name: Name, n_read: nullptr, status: &status, |
757 | Flags: MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | |
758 | MSDF_NoMemberType | MSDF_NoReturnType)); |
759 | if (status != 0) |
760 | return Name; |
761 | Result = DemangledName; |
762 | free(ptr: DemangledName); |
763 | return Result; |
764 | } |
765 | |
766 | if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { |
767 | std::string DemangledCName(demanglePE32ExternCFunc(SymbolName: Name)); |
768 | // On i386 Windows, the C name mangling for different calling conventions |
769 | // may also be applied on top of the Itanium or Rust name mangling. |
770 | if (nonMicrosoftDemangle(MangledName: DemangledCName, Result)) |
771 | return Result; |
772 | return DemangledCName; |
773 | } |
774 | return Name; |
775 | } |
776 | |
777 | void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { |
778 | if (Bin->getBinary()) |
779 | LRUBinaries.splice(I: LRUBinaries.end(), L2&: LRUBinaries, Node: Bin.getIterator()); |
780 | } |
781 | |
782 | void LLVMSymbolizer::pruneCache() { |
783 | // Evict the LRU binary until the max cache size is reached or there's <= 1 |
784 | // item in the cache. The MRU binary is always kept to avoid thrashing if it's |
785 | // larger than the cache size. |
786 | while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && |
787 | std::next(x: LRUBinaries.begin()) != LRUBinaries.end()) { |
788 | CachedBinary &Bin = LRUBinaries.front(); |
789 | CacheSize -= Bin.size(); |
790 | LRUBinaries.pop_front(); |
791 | Bin.evict(); |
792 | } |
793 | } |
794 | |
795 | void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { |
796 | if (Evictor) { |
797 | this->Evictor = [OldEvictor = std::move(this->Evictor), |
798 | NewEvictor = std::move(NewEvictor)]() { |
799 | NewEvictor(); |
800 | OldEvictor(); |
801 | }; |
802 | } else { |
803 | this->Evictor = std::move(NewEvictor); |
804 | } |
805 | } |
806 | |
807 | } // namespace symbolize |
808 | } // namespace llvm |
809 | |