1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation for LLVM symbolization library.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/DebugInfo/Symbolize/Symbolize.h"
14
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/DebugInfo/BTF/BTFContext.h"
17#include "llvm/DebugInfo/DWARF/DWARFContext.h"
18#include "llvm/DebugInfo/GSYM/GsymContext.h"
19#include "llvm/DebugInfo/GSYM/GsymReader.h"
20#include "llvm/DebugInfo/PDB/PDB.h"
21#include "llvm/DebugInfo/PDB/PDBContext.h"
22#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Object/BuildID.h"
25#include "llvm/Object/COFF.h"
26#include "llvm/Object/ELFObjectFile.h"
27#include "llvm/Object/MachO.h"
28#include "llvm/Object/MachOUniversal.h"
29#include "llvm/Support/CRC.h"
30#include "llvm/Support/Casting.h"
31#include "llvm/Support/DataExtractor.h"
32#include "llvm/Support/Errc.h"
33#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/Path.h"
36#include <cassert>
37#include <cstring>
38
39namespace llvm {
40namespace codeview {
41union DebugInfo;
42}
43namespace symbolize {
44
45LLVMSymbolizer::LLVMSymbolizer() = default;
46
47LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
48 : Opts(Opts),
49 BIDFetcher(std::make_unique<BuildIDFetcher>(args: Opts.DebugFileDirectory)) {}
50
51LLVMSymbolizer::~LLVMSymbolizer() = default;
52
53template <typename T>
54Expected<DILineInfo>
55LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
56 object::SectionedAddress ModuleOffset) {
57
58 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
59 if (!InfoOrErr)
60 return InfoOrErr.takeError();
61
62 SymbolizableModule *Info = *InfoOrErr;
63
64 // A null module means an error has already been reported. Return an empty
65 // result.
66 if (!Info)
67 return DILineInfo();
68
69 // If the user is giving us relative addresses, add the preferred base of the
70 // object to the offset before we do the query. It's what DIContext expects.
71 if (Opts.RelativeAddresses)
72 ModuleOffset.Address += Info->getModulePreferredBase();
73
74 DILineInfo LineInfo = Info->symbolizeCode(
75 ModuleOffset,
76 LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
77 Opts.SkipLineZero),
78 UseSymbolTable: Opts.UseSymbolTable);
79 if (Opts.Demangle)
80 LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info);
81 return LineInfo;
82}
83
84Expected<DILineInfo>
85LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
86 object::SectionedAddress ModuleOffset) {
87 return symbolizeCodeCommon(ModuleSpecifier: Obj, ModuleOffset);
88}
89
90Expected<DILineInfo>
91LLVMSymbolizer::symbolizeCode(StringRef ModuleName,
92 object::SectionedAddress ModuleOffset) {
93 return symbolizeCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset);
94}
95
96Expected<DILineInfo>
97LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
98 object::SectionedAddress ModuleOffset) {
99 return symbolizeCodeCommon(ModuleSpecifier: BuildID, ModuleOffset);
100}
101
102template <typename T>
103Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
104 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
105 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
106 if (!InfoOrErr)
107 return InfoOrErr.takeError();
108
109 SymbolizableModule *Info = *InfoOrErr;
110
111 // A null module means an error has already been reported. Return an empty
112 // result.
113 if (!Info)
114 return DIInliningInfo();
115
116 // If the user is giving us relative addresses, add the preferred base of the
117 // object to the offset before we do the query. It's what DIContext expects.
118 if (Opts.RelativeAddresses)
119 ModuleOffset.Address += Info->getModulePreferredBase();
120
121 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
122 ModuleOffset,
123 LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
124 Opts.SkipLineZero),
125 UseSymbolTable: Opts.UseSymbolTable);
126 if (Opts.Demangle) {
127 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
128 auto *Frame = InlinedContext.getMutableFrame(Index: i);
129 Frame->FunctionName = DemangleName(Name: Frame->FunctionName, DbiModuleDescriptor: Info);
130 }
131 }
132 return InlinedContext;
133}
134
135Expected<DIInliningInfo>
136LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
137 object::SectionedAddress ModuleOffset) {
138 return symbolizeInlinedCodeCommon(ModuleSpecifier: Obj, ModuleOffset);
139}
140
141Expected<DIInliningInfo>
142LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName,
143 object::SectionedAddress ModuleOffset) {
144 return symbolizeInlinedCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset);
145}
146
147Expected<DIInliningInfo>
148LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
149 object::SectionedAddress ModuleOffset) {
150 return symbolizeInlinedCodeCommon(ModuleSpecifier: BuildID, ModuleOffset);
151}
152
153template <typename T>
154Expected<DIGlobal>
155LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
156 object::SectionedAddress ModuleOffset) {
157
158 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
159 if (!InfoOrErr)
160 return InfoOrErr.takeError();
161
162 SymbolizableModule *Info = *InfoOrErr;
163 // A null module means an error has already been reported. Return an empty
164 // result.
165 if (!Info)
166 return DIGlobal();
167
168 // If the user is giving us relative addresses, add the preferred base of
169 // the object to the offset before we do the query. It's what DIContext
170 // expects.
171 if (Opts.RelativeAddresses)
172 ModuleOffset.Address += Info->getModulePreferredBase();
173
174 DIGlobal Global = Info->symbolizeData(ModuleOffset);
175 if (Opts.Demangle)
176 Global.Name = DemangleName(Name: Global.Name, DbiModuleDescriptor: Info);
177 return Global;
178}
179
180Expected<DIGlobal>
181LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
182 object::SectionedAddress ModuleOffset) {
183 return symbolizeDataCommon(ModuleSpecifier: Obj, ModuleOffset);
184}
185
186Expected<DIGlobal>
187LLVMSymbolizer::symbolizeData(StringRef ModuleName,
188 object::SectionedAddress ModuleOffset) {
189 return symbolizeDataCommon(ModuleSpecifier: ModuleName, ModuleOffset);
190}
191
192Expected<DIGlobal>
193LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
194 object::SectionedAddress ModuleOffset) {
195 return symbolizeDataCommon(ModuleSpecifier: BuildID, ModuleOffset);
196}
197
198template <typename T>
199Expected<std::vector<DILocal>>
200LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
201 object::SectionedAddress ModuleOffset) {
202 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
203 if (!InfoOrErr)
204 return InfoOrErr.takeError();
205
206 SymbolizableModule *Info = *InfoOrErr;
207 // A null module means an error has already been reported. Return an empty
208 // result.
209 if (!Info)
210 return std::vector<DILocal>();
211
212 // If the user is giving us relative addresses, add the preferred base of
213 // the object to the offset before we do the query. It's what DIContext
214 // expects.
215 if (Opts.RelativeAddresses)
216 ModuleOffset.Address += Info->getModulePreferredBase();
217
218 return Info->symbolizeFrame(ModuleOffset);
219}
220
221Expected<std::vector<DILocal>>
222LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
223 object::SectionedAddress ModuleOffset) {
224 return symbolizeFrameCommon(ModuleSpecifier: Obj, ModuleOffset);
225}
226
227Expected<std::vector<DILocal>>
228LLVMSymbolizer::symbolizeFrame(StringRef ModuleName,
229 object::SectionedAddress ModuleOffset) {
230 return symbolizeFrameCommon(ModuleSpecifier: ModuleName, ModuleOffset);
231}
232
233Expected<std::vector<DILocal>>
234LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
235 object::SectionedAddress ModuleOffset) {
236 return symbolizeFrameCommon(ModuleSpecifier: BuildID, ModuleOffset);
237}
238
239template <typename T>
240Expected<std::vector<DILineInfo>>
241LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
242 uint64_t Offset) {
243 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
244 if (!InfoOrErr)
245 return InfoOrErr.takeError();
246
247 SymbolizableModule *Info = *InfoOrErr;
248 std::vector<DILineInfo> Result;
249
250 // A null module means an error has already been reported. Return an empty
251 // result.
252 if (!Info)
253 return Result;
254
255 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
256 DILineInfo LineInfo = Info->symbolizeCode(
257 ModuleOffset: A, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
258 UseSymbolTable: Opts.UseSymbolTable);
259 if (LineInfo.FileName != DILineInfo::BadString) {
260 if (Opts.Demangle)
261 LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info);
262 Result.push_back(x: std::move(LineInfo));
263 }
264 }
265
266 return Result;
267}
268
269Expected<std::vector<DILineInfo>>
270LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
271 uint64_t Offset) {
272 return findSymbolCommon(ModuleSpecifier: Obj, Symbol, Offset);
273}
274
275Expected<std::vector<DILineInfo>>
276LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol,
277 uint64_t Offset) {
278 return findSymbolCommon(ModuleSpecifier: ModuleName, Symbol, Offset);
279}
280
281Expected<std::vector<DILineInfo>>
282LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
283 uint64_t Offset) {
284 return findSymbolCommon(ModuleSpecifier: BuildID, Symbol, Offset);
285}
286
287void LLVMSymbolizer::flush() {
288 ObjectForUBPathAndArch.clear();
289 LRUBinaries.clear();
290 CacheSize = 0;
291 BinaryForPath.clear();
292 ObjectPairForPathArch.clear();
293 Modules.clear();
294 BuildIDPaths.clear();
295}
296
297namespace {
298
299// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
300// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
301// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
302// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
303std::string getDarwinDWARFResourceForPath(const std::string &Path,
304 const std::string &Basename) {
305 SmallString<16> ResourceName = StringRef(Path);
306 if (sys::path::extension(path: Path) != ".dSYM") {
307 ResourceName += ".dSYM";
308 }
309 sys::path::append(path&: ResourceName, a: "Contents", b: "Resources", c: "DWARF");
310 sys::path::append(path&: ResourceName, a: Basename);
311 return std::string(ResourceName);
312}
313
314bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
315 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
316 MemoryBuffer::getFileOrSTDIN(Filename: Path);
317 if (!MB)
318 return false;
319 return CRCHash == llvm::crc32(Data: arrayRefFromStringRef(Input: MB.get()->getBuffer()));
320}
321
322bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
323 uint32_t &CRCHash) {
324 if (!Obj)
325 return false;
326 for (const SectionRef &Section : Obj->sections()) {
327 StringRef Name;
328 consumeError(Err: Section.getName().moveInto(Value&: Name));
329
330 Name = Name.substr(Start: Name.find_first_not_of(Chars: "._"));
331 if (Name == "gnu_debuglink") {
332 Expected<StringRef> ContentsOrErr = Section.getContents();
333 if (!ContentsOrErr) {
334 consumeError(Err: ContentsOrErr.takeError());
335 return false;
336 }
337 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
338 uint64_t Offset = 0;
339 if (const char *DebugNameStr = DE.getCStr(OffsetPtr: &Offset)) {
340 // 4-byte align the offset.
341 Offset = (Offset + 3) & ~0x3;
342 if (DE.isValidOffsetForDataOfSize(offset: Offset, length: 4)) {
343 DebugName = DebugNameStr;
344 CRCHash = DE.getU32(offset_ptr: &Offset);
345 return true;
346 }
347 }
348 break;
349 }
350 }
351 return false;
352}
353
354bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
355 const MachOObjectFile *Obj) {
356 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
357 ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
358 if (dbg_uuid.empty() || bin_uuid.empty())
359 return false;
360 return !memcmp(s1: dbg_uuid.data(), s2: bin_uuid.data(), n: dbg_uuid.size());
361}
362
363} // end anonymous namespace
364
365ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
366 const MachOObjectFile *MachExeObj,
367 const std::string &ArchName) {
368 // On Darwin we may find DWARF in separate object file in
369 // resource directory.
370 std::vector<std::string> DsymPaths;
371 StringRef Filename = sys::path::filename(path: ExePath);
372 DsymPaths.push_back(
373 x: getDarwinDWARFResourceForPath(Path: ExePath, Basename: std::string(Filename)));
374 for (const auto &Path : Opts.DsymHints) {
375 DsymPaths.push_back(
376 x: getDarwinDWARFResourceForPath(Path, Basename: std::string(Filename)));
377 }
378 for (const auto &Path : DsymPaths) {
379 auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
380 if (!DbgObjOrErr) {
381 // Ignore errors, the file might not exist.
382 consumeError(Err: DbgObjOrErr.takeError());
383 continue;
384 }
385 ObjectFile *DbgObj = DbgObjOrErr.get();
386 if (!DbgObj)
387 continue;
388 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(Val: DbgObj);
389 if (!MachDbgObj)
390 continue;
391 if (darwinDsymMatchesBinary(DbgObj: MachDbgObj, Obj: MachExeObj))
392 return DbgObj;
393 }
394 return nullptr;
395}
396
397ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
398 const ObjectFile *Obj,
399 const std::string &ArchName) {
400 std::string DebuglinkName;
401 uint32_t CRCHash;
402 std::string DebugBinaryPath;
403 if (!getGNUDebuglinkContents(Obj, DebugName&: DebuglinkName, CRCHash))
404 return nullptr;
405 if (!findDebugBinary(OrigPath: Path, DebuglinkName, CRCHash, Result&: DebugBinaryPath))
406 return nullptr;
407 auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName);
408 if (!DbgObjOrErr) {
409 // Ignore errors, the file might not exist.
410 consumeError(Err: DbgObjOrErr.takeError());
411 return nullptr;
412 }
413 return DbgObjOrErr.get();
414}
415
416ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
417 const ELFObjectFileBase *Obj,
418 const std::string &ArchName) {
419 auto BuildID = getBuildID(Obj);
420 if (BuildID.size() < 2)
421 return nullptr;
422 std::string DebugBinaryPath;
423 if (!getOrFindDebugBinary(BuildID, Result&: DebugBinaryPath))
424 return nullptr;
425 auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName);
426 if (!DbgObjOrErr) {
427 consumeError(Err: DbgObjOrErr.takeError());
428 return nullptr;
429 }
430 return DbgObjOrErr.get();
431}
432
433bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
434 const std::string &DebuglinkName,
435 uint32_t CRCHash, std::string &Result) {
436 SmallString<16> OrigDir(OrigPath);
437 llvm::sys::path::remove_filename(path&: OrigDir);
438 SmallString<16> DebugPath = OrigDir;
439 // Try relative/path/to/original_binary/debuglink_name
440 llvm::sys::path::append(path&: DebugPath, a: DebuglinkName);
441 if (checkFileCRC(Path: DebugPath, CRCHash)) {
442 Result = std::string(DebugPath);
443 return true;
444 }
445 // Try relative/path/to/original_binary/.debug/debuglink_name
446 DebugPath = OrigDir;
447 llvm::sys::path::append(path&: DebugPath, a: ".debug", b: DebuglinkName);
448 if (checkFileCRC(Path: DebugPath, CRCHash)) {
449 Result = std::string(DebugPath);
450 return true;
451 }
452 // Make the path absolute so that lookups will go to
453 // "/usr/lib/debug/full/path/to/debug", not
454 // "/usr/lib/debug/to/debug"
455 llvm::sys::fs::make_absolute(path&: OrigDir);
456 if (!Opts.FallbackDebugPath.empty()) {
457 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
458 DebugPath = Opts.FallbackDebugPath;
459 } else {
460#if defined(__NetBSD__)
461 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
462 DebugPath = "/usr/libdata/debug";
463#else
464 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
465 DebugPath = "/usr/lib/debug";
466#endif
467 }
468 llvm::sys::path::append(path&: DebugPath, a: llvm::sys::path::relative_path(path: OrigDir),
469 b: DebuglinkName);
470 if (checkFileCRC(Path: DebugPath, CRCHash)) {
471 Result = std::string(DebugPath);
472 return true;
473 }
474 return false;
475}
476
477static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
478 return StringRef(reinterpret_cast<const char *>(BuildID.data()),
479 BuildID.size());
480}
481
482bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
483 std::string &Result) {
484 StringRef BuildIDStr = getBuildIDStr(BuildID);
485 auto I = BuildIDPaths.find(Key: BuildIDStr);
486 if (I != BuildIDPaths.end()) {
487 Result = I->second;
488 return true;
489 }
490 if (!BIDFetcher)
491 return false;
492 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
493 Result = *Path;
494 auto InsertResult = BuildIDPaths.insert(KV: {BuildIDStr, Result});
495 assert(InsertResult.second);
496 (void)InsertResult;
497 return true;
498 }
499
500 return false;
501}
502
503std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) {
504 if (Opts.DisableGsym)
505 return {};
506
507 auto CheckGsymFile = [](const llvm::StringRef &GsymPath) {
508 sys::fs::file_status Status;
509 std::error_code EC = llvm::sys::fs::status(path: GsymPath, result&: Status);
510 return !EC && !llvm::sys::fs::is_directory(status: Status);
511 };
512
513 // First, look beside the binary file
514 if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath))
515 return GsymPath;
516
517 // Then, look in the directories specified by GsymFileDirectory
518
519 for (const auto &Directory : Opts.GsymFileDirectory) {
520 SmallString<16> GsymPath = llvm::StringRef{Directory};
521 llvm::sys::path::append(path&: GsymPath,
522 a: llvm::sys::path::filename(path: Path) + ".gsym");
523
524 if (CheckGsymFile(GsymPath))
525 return static_cast<std::string>(GsymPath);
526 }
527
528 return {};
529}
530
531Expected<LLVMSymbolizer::ObjectPair>
532LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
533 const std::string &ArchName) {
534 auto I = ObjectPairForPathArch.find(x: std::make_pair(x: Path, y: ArchName));
535 if (I != ObjectPairForPathArch.end()) {
536 recordAccess(Bin&: BinaryForPath.find(x: Path)->second);
537 return I->second;
538 }
539
540 auto ObjOrErr = getOrCreateObject(Path, ArchName);
541 if (!ObjOrErr) {
542 ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName),
543 args: ObjectPair(nullptr, nullptr));
544 return ObjOrErr.takeError();
545 }
546
547 ObjectFile *Obj = ObjOrErr.get();
548 assert(Obj != nullptr);
549 ObjectFile *DbgObj = nullptr;
550
551 if (auto MachObj = dyn_cast<const MachOObjectFile>(Val: Obj))
552 DbgObj = lookUpDsymFile(ExePath: Path, MachExeObj: MachObj, ArchName);
553 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Val: Obj))
554 DbgObj = lookUpBuildIDObject(Path, Obj: ELFObj, ArchName);
555 if (!DbgObj)
556 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
557 if (!DbgObj)
558 DbgObj = Obj;
559 ObjectPair Res = std::make_pair(x&: Obj, y&: DbgObj);
560 std::string DbgObjPath = DbgObj->getFileName().str();
561 auto Pair =
562 ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), args&: Res);
563 BinaryForPath.find(x: DbgObjPath)->second.pushEvictor(Evictor: [this, I = Pair.first]() {
564 ObjectPairForPathArch.erase(position: I);
565 });
566 return Res;
567}
568
569Expected<ObjectFile *>
570LLVMSymbolizer::getOrCreateObject(const std::string &Path,
571 const std::string &ArchName) {
572 Binary *Bin;
573 auto Pair = BinaryForPath.emplace(args: Path, args: OwningBinary<Binary>());
574 if (!Pair.second) {
575 Bin = Pair.first->second->getBinary();
576 recordAccess(Bin&: Pair.first->second);
577 } else {
578 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
579 if (!BinOrErr)
580 return BinOrErr.takeError();
581
582 CachedBinary &CachedBin = Pair.first->second;
583 CachedBin = std::move(BinOrErr.get());
584 CachedBin.pushEvictor(Evictor: [this, I = Pair.first]() { BinaryForPath.erase(position: I); });
585 LRUBinaries.push_back(Node&: CachedBin);
586 CacheSize += CachedBin.size();
587 Bin = CachedBin->getBinary();
588 }
589
590 if (!Bin)
591 return static_cast<ObjectFile *>(nullptr);
592
593 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Val: Bin)) {
594 auto I = ObjectForUBPathAndArch.find(x: std::make_pair(x: Path, y: ArchName));
595 if (I != ObjectForUBPathAndArch.end())
596 return I->second.get();
597
598 Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
599 UB->getMachOObjectForArch(ArchName);
600 if (!ObjOrErr) {
601 ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName),
602 args: std::unique_ptr<ObjectFile>());
603 return ObjOrErr.takeError();
604 }
605 ObjectFile *Res = ObjOrErr->get();
606 auto Pair = ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName),
607 args: std::move(ObjOrErr.get()));
608 BinaryForPath.find(x: Path)->second.pushEvictor(
609 Evictor: [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(position: Iter); });
610 return Res;
611 }
612 if (Bin->isObject()) {
613 return cast<ObjectFile>(Val: Bin);
614 }
615 return errorCodeToError(EC: object_error::arch_not_found);
616}
617
618Expected<SymbolizableModule *>
619LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
620 std::unique_ptr<DIContext> Context,
621 StringRef ModuleName) {
622 auto InfoOrErr = SymbolizableObjectFile::create(Obj, DICtx: std::move(Context),
623 UntagAddresses: Opts.UntagAddresses);
624 std::unique_ptr<SymbolizableModule> SymMod;
625 if (InfoOrErr)
626 SymMod = std::move(*InfoOrErr);
627 auto InsertResult = Modules.insert(
628 x: std::make_pair(x: std::string(ModuleName), y: std::move(SymMod)));
629 assert(InsertResult.second);
630 if (!InfoOrErr)
631 return InfoOrErr.takeError();
632 return InsertResult.first->second.get();
633}
634
635Expected<SymbolizableModule *>
636LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
637 StringRef BinaryName = ModuleName;
638 StringRef ArchName = Opts.DefaultArch;
639 size_t ColonPos = ModuleName.find_last_of(C: ':');
640 // Verify that substring after colon form a valid arch name.
641 if (ColonPos != std::string::npos) {
642 StringRef ArchStr = ModuleName.substr(Start: ColonPos + 1);
643 if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
644 BinaryName = ModuleName.substr(Start: 0, N: ColonPos);
645 ArchName = ArchStr;
646 }
647 }
648
649 auto I = Modules.find(x: ModuleName);
650 if (I != Modules.end()) {
651 recordAccess(Bin&: BinaryForPath.find(x: BinaryName)->second);
652 return I->second.get();
653 }
654
655 auto ObjectsOrErr =
656 getOrCreateObjectPair(Path: std::string{BinaryName}, ArchName: std::string{ArchName});
657 if (!ObjectsOrErr) {
658 // Failed to find valid object file.
659 Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>());
660 return ObjectsOrErr.takeError();
661 }
662 ObjectPair Objects = ObjectsOrErr.get();
663
664 std::unique_ptr<DIContext> Context;
665 // If this is a COFF object containing PDB info and not containing DWARF
666 // section, use a PDBContext to symbolize. Otherwise, use DWARF.
667 // Create a DIContext to symbolize as follows:
668 // - If there is a GSYM file, create a GsymContext.
669 // - Otherwise, if this is a COFF object containing PDB info, create a
670 // PDBContext.
671 // - Otherwise, create a DWARFContext.
672 const auto GsymFile = lookUpGsymFile(Path: BinaryName.str());
673 if (!GsymFile.empty()) {
674 auto ReaderOrErr = gsym::GsymReader::openFile(Path: GsymFile);
675
676 if (ReaderOrErr) {
677 std::unique_ptr<gsym::GsymReader> Reader =
678 std::make_unique<gsym::GsymReader>(args: std::move(*ReaderOrErr));
679
680 Context = std::make_unique<gsym::GsymContext>(args: std::move(Reader));
681 }
682 }
683 if (!Context) {
684 if (auto CoffObject = dyn_cast<COFFObjectFile>(Val: Objects.first)) {
685 const codeview::DebugInfo *DebugInfo;
686 StringRef PDBFileName;
687 auto EC = CoffObject->getDebugPDBInfo(Info&: DebugInfo, PDBFileName);
688 // Use DWARF if there're DWARF sections.
689 bool HasDwarf = llvm::any_of(
690 Range: Objects.first->sections(), P: [](SectionRef Section) -> bool {
691 if (Expected<StringRef> SectionName = Section.getName())
692 return SectionName.get() == ".debug_info";
693 return false;
694 });
695 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
696 using namespace pdb;
697 std::unique_ptr<IPDBSession> Session;
698
699 PDB_ReaderType ReaderType =
700 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
701 if (auto Err = loadDataForEXE(Type: ReaderType, Path: Objects.first->getFileName(),
702 Session)) {
703 Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>());
704 // Return along the PDB filename to provide more context
705 return createFileError(F: PDBFileName, E: std::move(Err));
706 }
707 Context.reset(p: new PDBContext(*CoffObject, std::move(Session)));
708 }
709 }
710 }
711 if (!Context)
712 Context = DWARFContext::create(
713 Obj: *Objects.second, RelocAction: DWARFContext::ProcessDebugRelocations::Process,
714 L: nullptr, DWPName: Opts.DWPName);
715 auto ModuleOrErr =
716 createModuleInfo(Obj: Objects.first, Context: std::move(Context), ModuleName);
717 if (ModuleOrErr) {
718 auto I = Modules.find(x: ModuleName);
719 BinaryForPath.find(x: BinaryName)->second.pushEvictor(Evictor: [this, I]() {
720 Modules.erase(position: I);
721 });
722 }
723 return ModuleOrErr;
724}
725
726// For BPF programs .BTF.ext section contains line numbers information,
727// use it if regular DWARF is not available (e.g. for stripped binary).
728static bool useBTFContext(const ObjectFile &Obj) {
729 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
730 BTFParser::hasBTFSections(Obj);
731}
732
733Expected<SymbolizableModule *>
734LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
735 StringRef ObjName = Obj.getFileName();
736 auto I = Modules.find(x: ObjName);
737 if (I != Modules.end())
738 return I->second.get();
739
740 std::unique_ptr<DIContext> Context;
741 if (useBTFContext(Obj))
742 Context = BTFContext::create(Obj);
743 else
744 Context = DWARFContext::create(Obj);
745 // FIXME: handle COFF object with PDB info to use PDBContext
746 return createModuleInfo(Obj: &Obj, Context: std::move(Context), ModuleName: ObjName);
747}
748
749Expected<SymbolizableModule *>
750LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
751 std::string Path;
752 if (!getOrFindDebugBinary(BuildID, Result&: Path)) {
753 return createStringError(EC: errc::no_such_file_or_directory,
754 S: "could not find build ID");
755 }
756 return getOrCreateModuleInfo(ModuleName: Path);
757}
758
759namespace {
760
761// Undo these various manglings for Win32 extern "C" functions:
762// cdecl - _foo
763// stdcall - _foo@12
764// fastcall - @foo@12
765// vectorcall - foo@@12
766// These are all different linkage names for 'foo'.
767StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
768 char Front = SymbolName.empty() ? '\0' : SymbolName[0];
769
770 // Remove any '@[0-9]+' suffix.
771 bool HasAtNumSuffix = false;
772 if (Front != '?') {
773 size_t AtPos = SymbolName.rfind(C: '@');
774 if (AtPos != StringRef::npos &&
775 all_of(Range: drop_begin(RangeOrContainer&: SymbolName, N: AtPos + 1), P: isDigit)) {
776 SymbolName = SymbolName.substr(Start: 0, N: AtPos);
777 HasAtNumSuffix = true;
778 }
779 }
780
781 // Remove any ending '@' for vectorcall.
782 bool IsVectorCall = false;
783 if (HasAtNumSuffix && SymbolName.ends_with(Suffix: "@")) {
784 SymbolName = SymbolName.drop_back();
785 IsVectorCall = true;
786 }
787
788 // If not vectorcall, remove any '_' or '@' prefix.
789 if (!IsVectorCall && (Front == '_' || Front == '@'))
790 SymbolName = SymbolName.drop_front();
791
792 return SymbolName;
793}
794
795} // end anonymous namespace
796
797std::string
798LLVMSymbolizer::DemangleName(StringRef Name,
799 const SymbolizableModule *DbiModuleDescriptor) {
800 std::string Result;
801 if (nonMicrosoftDemangle(MangledName: Name, Result))
802 return Result;
803
804 if (Name.starts_with(Prefix: '?')) {
805 // Only do MSVC C++ demangling on symbols starting with '?'.
806 int status = 0;
807 char *DemangledName = microsoftDemangle(
808 mangled_name: Name, n_read: nullptr, status: &status,
809 Flags: MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
810 MSDF_NoMemberType | MSDF_NoReturnType));
811 if (status != 0)
812 return std::string{Name};
813 Result = DemangledName;
814 free(ptr: DemangledName);
815 return Result;
816 }
817
818 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
819 std::string DemangledCName(demanglePE32ExternCFunc(SymbolName: Name));
820 // On i386 Windows, the C name mangling for different calling conventions
821 // may also be applied on top of the Itanium or Rust name mangling.
822 if (nonMicrosoftDemangle(MangledName: DemangledCName, Result))
823 return Result;
824 return DemangledCName;
825 }
826 return std::string{Name};
827}
828
829void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
830 if (Bin->getBinary())
831 LRUBinaries.splice(I: LRUBinaries.end(), L2&: LRUBinaries, Node: Bin.getIterator());
832}
833
834void LLVMSymbolizer::pruneCache() {
835 // Evict the LRU binary until the max cache size is reached or there's <= 1
836 // item in the cache. The MRU binary is always kept to avoid thrashing if it's
837 // larger than the cache size.
838 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
839 std::next(x: LRUBinaries.begin()) != LRUBinaries.end()) {
840 CachedBinary &Bin = LRUBinaries.front();
841 CacheSize -= Bin.size();
842 LRUBinaries.pop_front();
843 Bin.evict();
844 }
845}
846
847void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
848 if (Evictor) {
849 this->Evictor = [OldEvictor = std::move(this->Evictor),
850 NewEvictor = std::move(NewEvictor)]() {
851 NewEvictor();
852 OldEvictor();
853 };
854 } else {
855 this->Evictor = std::move(NewEvictor);
856 }
857}
858
859} // namespace symbolize
860} // namespace llvm
861