| 1 | //===-- LLVMSymbolize.cpp -------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation for LLVM symbolization library. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
| 14 | |
| 15 | #include "llvm/ADT/STLExtras.h" |
| 16 | #include "llvm/DebugInfo/BTF/BTFContext.h" |
| 17 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| 18 | #include "llvm/DebugInfo/GSYM/GsymContext.h" |
| 19 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
| 20 | #include "llvm/DebugInfo/PDB/PDB.h" |
| 21 | #include "llvm/DebugInfo/PDB/PDBContext.h" |
| 22 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
| 23 | #include "llvm/Demangle/Demangle.h" |
| 24 | #include "llvm/Object/BuildID.h" |
| 25 | #include "llvm/Object/COFF.h" |
| 26 | #include "llvm/Object/ELFObjectFile.h" |
| 27 | #include "llvm/Object/MachO.h" |
| 28 | #include "llvm/Object/MachOUniversal.h" |
| 29 | #include "llvm/Support/CRC.h" |
| 30 | #include "llvm/Support/Casting.h" |
| 31 | #include "llvm/Support/DataExtractor.h" |
| 32 | #include "llvm/Support/Errc.h" |
| 33 | #include "llvm/Support/FileSystem.h" |
| 34 | #include "llvm/Support/MemoryBuffer.h" |
| 35 | #include "llvm/Support/Path.h" |
| 36 | #include <cassert> |
| 37 | #include <cstring> |
| 38 | |
| 39 | namespace llvm { |
| 40 | namespace codeview { |
| 41 | union DebugInfo; |
| 42 | } |
| 43 | namespace symbolize { |
| 44 | |
| 45 | LLVMSymbolizer::LLVMSymbolizer() = default; |
| 46 | |
| 47 | LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) |
| 48 | : Opts(Opts), |
| 49 | BIDFetcher(std::make_unique<BuildIDFetcher>(args: Opts.DebugFileDirectory)) {} |
| 50 | |
| 51 | LLVMSymbolizer::~LLVMSymbolizer() = default; |
| 52 | |
| 53 | template <typename T> |
| 54 | Expected<DILineInfo> |
| 55 | LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, |
| 56 | object::SectionedAddress ModuleOffset) { |
| 57 | |
| 58 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
| 59 | if (!InfoOrErr) |
| 60 | return InfoOrErr.takeError(); |
| 61 | |
| 62 | SymbolizableModule *Info = *InfoOrErr; |
| 63 | |
| 64 | // A null module means an error has already been reported. Return an empty |
| 65 | // result. |
| 66 | if (!Info) |
| 67 | return DILineInfo(); |
| 68 | |
| 69 | // If the user is giving us relative addresses, add the preferred base of the |
| 70 | // object to the offset before we do the query. It's what DIContext expects. |
| 71 | if (Opts.RelativeAddresses) |
| 72 | ModuleOffset.Address += Info->getModulePreferredBase(); |
| 73 | |
| 74 | DILineInfo LineInfo = Info->symbolizeCode( |
| 75 | ModuleOffset, |
| 76 | LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, |
| 77 | Opts.SkipLineZero), |
| 78 | UseSymbolTable: Opts.UseSymbolTable); |
| 79 | if (Opts.Demangle) |
| 80 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
| 81 | return LineInfo; |
| 82 | } |
| 83 | |
| 84 | Expected<DILineInfo> |
| 85 | LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, |
| 86 | object::SectionedAddress ModuleOffset) { |
| 87 | return symbolizeCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
| 88 | } |
| 89 | |
| 90 | Expected<DILineInfo> |
| 91 | LLVMSymbolizer::symbolizeCode(StringRef ModuleName, |
| 92 | object::SectionedAddress ModuleOffset) { |
| 93 | return symbolizeCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
| 94 | } |
| 95 | |
| 96 | Expected<DILineInfo> |
| 97 | LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, |
| 98 | object::SectionedAddress ModuleOffset) { |
| 99 | return symbolizeCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
| 100 | } |
| 101 | |
| 102 | template <typename T> |
| 103 | Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( |
| 104 | const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { |
| 105 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
| 106 | if (!InfoOrErr) |
| 107 | return InfoOrErr.takeError(); |
| 108 | |
| 109 | SymbolizableModule *Info = *InfoOrErr; |
| 110 | |
| 111 | // A null module means an error has already been reported. Return an empty |
| 112 | // result. |
| 113 | if (!Info) |
| 114 | return DIInliningInfo(); |
| 115 | |
| 116 | // If the user is giving us relative addresses, add the preferred base of the |
| 117 | // object to the offset before we do the query. It's what DIContext expects. |
| 118 | if (Opts.RelativeAddresses) |
| 119 | ModuleOffset.Address += Info->getModulePreferredBase(); |
| 120 | |
| 121 | DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( |
| 122 | ModuleOffset, |
| 123 | LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, |
| 124 | Opts.SkipLineZero), |
| 125 | UseSymbolTable: Opts.UseSymbolTable); |
| 126 | if (Opts.Demangle) { |
| 127 | for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { |
| 128 | auto *Frame = InlinedContext.getMutableFrame(Index: i); |
| 129 | Frame->FunctionName = DemangleName(Name: Frame->FunctionName, DbiModuleDescriptor: Info); |
| 130 | } |
| 131 | } |
| 132 | return InlinedContext; |
| 133 | } |
| 134 | |
| 135 | Expected<DIInliningInfo> |
| 136 | LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, |
| 137 | object::SectionedAddress ModuleOffset) { |
| 138 | return symbolizeInlinedCodeCommon(ModuleSpecifier: Obj, ModuleOffset); |
| 139 | } |
| 140 | |
| 141 | Expected<DIInliningInfo> |
| 142 | LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName, |
| 143 | object::SectionedAddress ModuleOffset) { |
| 144 | return symbolizeInlinedCodeCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
| 145 | } |
| 146 | |
| 147 | Expected<DIInliningInfo> |
| 148 | LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, |
| 149 | object::SectionedAddress ModuleOffset) { |
| 150 | return symbolizeInlinedCodeCommon(ModuleSpecifier: BuildID, ModuleOffset); |
| 151 | } |
| 152 | |
| 153 | template <typename T> |
| 154 | Expected<DIGlobal> |
| 155 | LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, |
| 156 | object::SectionedAddress ModuleOffset) { |
| 157 | |
| 158 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
| 159 | if (!InfoOrErr) |
| 160 | return InfoOrErr.takeError(); |
| 161 | |
| 162 | SymbolizableModule *Info = *InfoOrErr; |
| 163 | // A null module means an error has already been reported. Return an empty |
| 164 | // result. |
| 165 | if (!Info) |
| 166 | return DIGlobal(); |
| 167 | |
| 168 | // If the user is giving us relative addresses, add the preferred base of |
| 169 | // the object to the offset before we do the query. It's what DIContext |
| 170 | // expects. |
| 171 | if (Opts.RelativeAddresses) |
| 172 | ModuleOffset.Address += Info->getModulePreferredBase(); |
| 173 | |
| 174 | DIGlobal Global = Info->symbolizeData(ModuleOffset); |
| 175 | if (Opts.Demangle) |
| 176 | Global.Name = DemangleName(Name: Global.Name, DbiModuleDescriptor: Info); |
| 177 | return Global; |
| 178 | } |
| 179 | |
| 180 | Expected<DIGlobal> |
| 181 | LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, |
| 182 | object::SectionedAddress ModuleOffset) { |
| 183 | return symbolizeDataCommon(ModuleSpecifier: Obj, ModuleOffset); |
| 184 | } |
| 185 | |
| 186 | Expected<DIGlobal> |
| 187 | LLVMSymbolizer::symbolizeData(StringRef ModuleName, |
| 188 | object::SectionedAddress ModuleOffset) { |
| 189 | return symbolizeDataCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
| 190 | } |
| 191 | |
| 192 | Expected<DIGlobal> |
| 193 | LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, |
| 194 | object::SectionedAddress ModuleOffset) { |
| 195 | return symbolizeDataCommon(ModuleSpecifier: BuildID, ModuleOffset); |
| 196 | } |
| 197 | |
| 198 | template <typename T> |
| 199 | Expected<std::vector<DILocal>> |
| 200 | LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, |
| 201 | object::SectionedAddress ModuleOffset) { |
| 202 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
| 203 | if (!InfoOrErr) |
| 204 | return InfoOrErr.takeError(); |
| 205 | |
| 206 | SymbolizableModule *Info = *InfoOrErr; |
| 207 | // A null module means an error has already been reported. Return an empty |
| 208 | // result. |
| 209 | if (!Info) |
| 210 | return std::vector<DILocal>(); |
| 211 | |
| 212 | // If the user is giving us relative addresses, add the preferred base of |
| 213 | // the object to the offset before we do the query. It's what DIContext |
| 214 | // expects. |
| 215 | if (Opts.RelativeAddresses) |
| 216 | ModuleOffset.Address += Info->getModulePreferredBase(); |
| 217 | |
| 218 | return Info->symbolizeFrame(ModuleOffset); |
| 219 | } |
| 220 | |
| 221 | Expected<std::vector<DILocal>> |
| 222 | LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, |
| 223 | object::SectionedAddress ModuleOffset) { |
| 224 | return symbolizeFrameCommon(ModuleSpecifier: Obj, ModuleOffset); |
| 225 | } |
| 226 | |
| 227 | Expected<std::vector<DILocal>> |
| 228 | LLVMSymbolizer::symbolizeFrame(StringRef ModuleName, |
| 229 | object::SectionedAddress ModuleOffset) { |
| 230 | return symbolizeFrameCommon(ModuleSpecifier: ModuleName, ModuleOffset); |
| 231 | } |
| 232 | |
| 233 | Expected<std::vector<DILocal>> |
| 234 | LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, |
| 235 | object::SectionedAddress ModuleOffset) { |
| 236 | return symbolizeFrameCommon(ModuleSpecifier: BuildID, ModuleOffset); |
| 237 | } |
| 238 | |
| 239 | template <typename T> |
| 240 | Expected<std::vector<DILineInfo>> |
| 241 | LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, |
| 242 | uint64_t Offset) { |
| 243 | auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); |
| 244 | if (!InfoOrErr) |
| 245 | return InfoOrErr.takeError(); |
| 246 | |
| 247 | SymbolizableModule *Info = *InfoOrErr; |
| 248 | std::vector<DILineInfo> Result; |
| 249 | |
| 250 | // A null module means an error has already been reported. Return an empty |
| 251 | // result. |
| 252 | if (!Info) |
| 253 | return Result; |
| 254 | |
| 255 | for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { |
| 256 | DILineInfo LineInfo = Info->symbolizeCode( |
| 257 | ModuleOffset: A, LineInfoSpecifier: DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), |
| 258 | UseSymbolTable: Opts.UseSymbolTable); |
| 259 | if (LineInfo.FileName != DILineInfo::BadString) { |
| 260 | if (Opts.Demangle) |
| 261 | LineInfo.FunctionName = DemangleName(Name: LineInfo.FunctionName, DbiModuleDescriptor: Info); |
| 262 | Result.push_back(x: std::move(LineInfo)); |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | return Result; |
| 267 | } |
| 268 | |
| 269 | Expected<std::vector<DILineInfo>> |
| 270 | LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, |
| 271 | uint64_t Offset) { |
| 272 | return findSymbolCommon(ModuleSpecifier: Obj, Symbol, Offset); |
| 273 | } |
| 274 | |
| 275 | Expected<std::vector<DILineInfo>> |
| 276 | LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol, |
| 277 | uint64_t Offset) { |
| 278 | return findSymbolCommon(ModuleSpecifier: ModuleName, Symbol, Offset); |
| 279 | } |
| 280 | |
| 281 | Expected<std::vector<DILineInfo>> |
| 282 | LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, |
| 283 | uint64_t Offset) { |
| 284 | return findSymbolCommon(ModuleSpecifier: BuildID, Symbol, Offset); |
| 285 | } |
| 286 | |
| 287 | void LLVMSymbolizer::flush() { |
| 288 | ObjectForUBPathAndArch.clear(); |
| 289 | LRUBinaries.clear(); |
| 290 | CacheSize = 0; |
| 291 | BinaryForPath.clear(); |
| 292 | ObjectPairForPathArch.clear(); |
| 293 | Modules.clear(); |
| 294 | BuildIDPaths.clear(); |
| 295 | } |
| 296 | |
| 297 | namespace { |
| 298 | |
| 299 | // For Path="/path/to/foo" and Basename="foo" assume that debug info is in |
| 300 | // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. |
| 301 | // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in |
| 302 | // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. |
| 303 | std::string getDarwinDWARFResourceForPath(const std::string &Path, |
| 304 | const std::string &Basename) { |
| 305 | SmallString<16> ResourceName = StringRef(Path); |
| 306 | if (sys::path::extension(path: Path) != ".dSYM" ) { |
| 307 | ResourceName += ".dSYM" ; |
| 308 | } |
| 309 | sys::path::append(path&: ResourceName, a: "Contents" , b: "Resources" , c: "DWARF" ); |
| 310 | sys::path::append(path&: ResourceName, a: Basename); |
| 311 | return std::string(ResourceName); |
| 312 | } |
| 313 | |
| 314 | bool checkFileCRC(StringRef Path, uint32_t CRCHash) { |
| 315 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
| 316 | MemoryBuffer::getFileOrSTDIN(Filename: Path); |
| 317 | if (!MB) |
| 318 | return false; |
| 319 | return CRCHash == llvm::crc32(Data: arrayRefFromStringRef(Input: MB.get()->getBuffer())); |
| 320 | } |
| 321 | |
| 322 | bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, |
| 323 | uint32_t &CRCHash) { |
| 324 | if (!Obj) |
| 325 | return false; |
| 326 | for (const SectionRef &Section : Obj->sections()) { |
| 327 | StringRef Name; |
| 328 | consumeError(Err: Section.getName().moveInto(Value&: Name)); |
| 329 | |
| 330 | Name = Name.substr(Start: Name.find_first_not_of(Chars: "._" )); |
| 331 | if (Name == "gnu_debuglink" ) { |
| 332 | Expected<StringRef> ContentsOrErr = Section.getContents(); |
| 333 | if (!ContentsOrErr) { |
| 334 | consumeError(Err: ContentsOrErr.takeError()); |
| 335 | return false; |
| 336 | } |
| 337 | DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); |
| 338 | uint64_t Offset = 0; |
| 339 | if (const char *DebugNameStr = DE.getCStr(OffsetPtr: &Offset)) { |
| 340 | // 4-byte align the offset. |
| 341 | Offset = (Offset + 3) & ~0x3; |
| 342 | if (DE.isValidOffsetForDataOfSize(offset: Offset, length: 4)) { |
| 343 | DebugName = DebugNameStr; |
| 344 | CRCHash = DE.getU32(offset_ptr: &Offset); |
| 345 | return true; |
| 346 | } |
| 347 | } |
| 348 | break; |
| 349 | } |
| 350 | } |
| 351 | return false; |
| 352 | } |
| 353 | |
| 354 | bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, |
| 355 | const MachOObjectFile *Obj) { |
| 356 | ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); |
| 357 | ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); |
| 358 | if (dbg_uuid.empty() || bin_uuid.empty()) |
| 359 | return false; |
| 360 | return !memcmp(s1: dbg_uuid.data(), s2: bin_uuid.data(), n: dbg_uuid.size()); |
| 361 | } |
| 362 | |
| 363 | } // end anonymous namespace |
| 364 | |
| 365 | ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, |
| 366 | const MachOObjectFile *MachExeObj, |
| 367 | const std::string &ArchName) { |
| 368 | // On Darwin we may find DWARF in separate object file in |
| 369 | // resource directory. |
| 370 | std::vector<std::string> DsymPaths; |
| 371 | StringRef Filename = sys::path::filename(path: ExePath); |
| 372 | DsymPaths.push_back( |
| 373 | x: getDarwinDWARFResourceForPath(Path: ExePath, Basename: std::string(Filename))); |
| 374 | for (const auto &Path : Opts.DsymHints) { |
| 375 | DsymPaths.push_back( |
| 376 | x: getDarwinDWARFResourceForPath(Path, Basename: std::string(Filename))); |
| 377 | } |
| 378 | for (const auto &Path : DsymPaths) { |
| 379 | auto DbgObjOrErr = getOrCreateObject(Path, ArchName); |
| 380 | if (!DbgObjOrErr) { |
| 381 | // Ignore errors, the file might not exist. |
| 382 | consumeError(Err: DbgObjOrErr.takeError()); |
| 383 | continue; |
| 384 | } |
| 385 | ObjectFile *DbgObj = DbgObjOrErr.get(); |
| 386 | if (!DbgObj) |
| 387 | continue; |
| 388 | const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(Val: DbgObj); |
| 389 | if (!MachDbgObj) |
| 390 | continue; |
| 391 | if (darwinDsymMatchesBinary(DbgObj: MachDbgObj, Obj: MachExeObj)) |
| 392 | return DbgObj; |
| 393 | } |
| 394 | return nullptr; |
| 395 | } |
| 396 | |
| 397 | ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, |
| 398 | const ObjectFile *Obj, |
| 399 | const std::string &ArchName) { |
| 400 | std::string DebuglinkName; |
| 401 | uint32_t CRCHash; |
| 402 | std::string DebugBinaryPath; |
| 403 | if (!getGNUDebuglinkContents(Obj, DebugName&: DebuglinkName, CRCHash)) |
| 404 | return nullptr; |
| 405 | if (!findDebugBinary(OrigPath: Path, DebuglinkName, CRCHash, Result&: DebugBinaryPath)) |
| 406 | return nullptr; |
| 407 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
| 408 | if (!DbgObjOrErr) { |
| 409 | // Ignore errors, the file might not exist. |
| 410 | consumeError(Err: DbgObjOrErr.takeError()); |
| 411 | return nullptr; |
| 412 | } |
| 413 | return DbgObjOrErr.get(); |
| 414 | } |
| 415 | |
| 416 | ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, |
| 417 | const ELFObjectFileBase *Obj, |
| 418 | const std::string &ArchName) { |
| 419 | auto BuildID = getBuildID(Obj); |
| 420 | if (BuildID.size() < 2) |
| 421 | return nullptr; |
| 422 | std::string DebugBinaryPath; |
| 423 | if (!getOrFindDebugBinary(BuildID, Result&: DebugBinaryPath)) |
| 424 | return nullptr; |
| 425 | auto DbgObjOrErr = getOrCreateObject(Path: DebugBinaryPath, ArchName); |
| 426 | if (!DbgObjOrErr) { |
| 427 | consumeError(Err: DbgObjOrErr.takeError()); |
| 428 | return nullptr; |
| 429 | } |
| 430 | return DbgObjOrErr.get(); |
| 431 | } |
| 432 | |
| 433 | bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, |
| 434 | const std::string &DebuglinkName, |
| 435 | uint32_t CRCHash, std::string &Result) { |
| 436 | SmallString<16> OrigDir(OrigPath); |
| 437 | llvm::sys::path::remove_filename(path&: OrigDir); |
| 438 | SmallString<16> DebugPath = OrigDir; |
| 439 | // Try relative/path/to/original_binary/debuglink_name |
| 440 | llvm::sys::path::append(path&: DebugPath, a: DebuglinkName); |
| 441 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
| 442 | Result = std::string(DebugPath); |
| 443 | return true; |
| 444 | } |
| 445 | // Try relative/path/to/original_binary/.debug/debuglink_name |
| 446 | DebugPath = OrigDir; |
| 447 | llvm::sys::path::append(path&: DebugPath, a: ".debug" , b: DebuglinkName); |
| 448 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
| 449 | Result = std::string(DebugPath); |
| 450 | return true; |
| 451 | } |
| 452 | // Make the path absolute so that lookups will go to |
| 453 | // "/usr/lib/debug/full/path/to/debug", not |
| 454 | // "/usr/lib/debug/to/debug" |
| 455 | llvm::sys::fs::make_absolute(path&: OrigDir); |
| 456 | if (!Opts.FallbackDebugPath.empty()) { |
| 457 | // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name |
| 458 | DebugPath = Opts.FallbackDebugPath; |
| 459 | } else { |
| 460 | #if defined(__NetBSD__) |
| 461 | // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name |
| 462 | DebugPath = "/usr/libdata/debug" ; |
| 463 | #else |
| 464 | // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name |
| 465 | DebugPath = "/usr/lib/debug" ; |
| 466 | #endif |
| 467 | } |
| 468 | llvm::sys::path::append(path&: DebugPath, a: llvm::sys::path::relative_path(path: OrigDir), |
| 469 | b: DebuglinkName); |
| 470 | if (checkFileCRC(Path: DebugPath, CRCHash)) { |
| 471 | Result = std::string(DebugPath); |
| 472 | return true; |
| 473 | } |
| 474 | return false; |
| 475 | } |
| 476 | |
| 477 | static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { |
| 478 | return StringRef(reinterpret_cast<const char *>(BuildID.data()), |
| 479 | BuildID.size()); |
| 480 | } |
| 481 | |
| 482 | bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, |
| 483 | std::string &Result) { |
| 484 | StringRef BuildIDStr = getBuildIDStr(BuildID); |
| 485 | auto I = BuildIDPaths.find(Key: BuildIDStr); |
| 486 | if (I != BuildIDPaths.end()) { |
| 487 | Result = I->second; |
| 488 | return true; |
| 489 | } |
| 490 | if (!BIDFetcher) |
| 491 | return false; |
| 492 | if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { |
| 493 | Result = *Path; |
| 494 | auto InsertResult = BuildIDPaths.insert(KV: {BuildIDStr, Result}); |
| 495 | assert(InsertResult.second); |
| 496 | (void)InsertResult; |
| 497 | return true; |
| 498 | } |
| 499 | |
| 500 | return false; |
| 501 | } |
| 502 | |
| 503 | std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) { |
| 504 | if (Opts.DisableGsym) |
| 505 | return {}; |
| 506 | |
| 507 | auto CheckGsymFile = [](const llvm::StringRef &GsymPath) { |
| 508 | sys::fs::file_status Status; |
| 509 | std::error_code EC = llvm::sys::fs::status(path: GsymPath, result&: Status); |
| 510 | return !EC && !llvm::sys::fs::is_directory(status: Status); |
| 511 | }; |
| 512 | |
| 513 | // First, look beside the binary file |
| 514 | if (const auto GsymPath = Path + ".gsym" ; CheckGsymFile(GsymPath)) |
| 515 | return GsymPath; |
| 516 | |
| 517 | // Then, look in the directories specified by GsymFileDirectory |
| 518 | |
| 519 | for (const auto &Directory : Opts.GsymFileDirectory) { |
| 520 | SmallString<16> GsymPath = llvm::StringRef{Directory}; |
| 521 | llvm::sys::path::append(path&: GsymPath, |
| 522 | a: llvm::sys::path::filename(path: Path) + ".gsym" ); |
| 523 | |
| 524 | if (CheckGsymFile(GsymPath)) |
| 525 | return static_cast<std::string>(GsymPath); |
| 526 | } |
| 527 | |
| 528 | return {}; |
| 529 | } |
| 530 | |
| 531 | Expected<LLVMSymbolizer::ObjectPair> |
| 532 | LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, |
| 533 | const std::string &ArchName) { |
| 534 | auto I = ObjectPairForPathArch.find(x: std::make_pair(x: Path, y: ArchName)); |
| 535 | if (I != ObjectPairForPathArch.end()) { |
| 536 | recordAccess(Bin&: BinaryForPath.find(x: Path)->second); |
| 537 | return I->second; |
| 538 | } |
| 539 | |
| 540 | auto ObjOrErr = getOrCreateObject(Path, ArchName); |
| 541 | if (!ObjOrErr) { |
| 542 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
| 543 | args: ObjectPair(nullptr, nullptr)); |
| 544 | return ObjOrErr.takeError(); |
| 545 | } |
| 546 | |
| 547 | ObjectFile *Obj = ObjOrErr.get(); |
| 548 | assert(Obj != nullptr); |
| 549 | ObjectFile *DbgObj = nullptr; |
| 550 | |
| 551 | if (auto MachObj = dyn_cast<const MachOObjectFile>(Val: Obj)) |
| 552 | DbgObj = lookUpDsymFile(ExePath: Path, MachExeObj: MachObj, ArchName); |
| 553 | else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Val: Obj)) |
| 554 | DbgObj = lookUpBuildIDObject(Path, Obj: ELFObj, ArchName); |
| 555 | if (!DbgObj) |
| 556 | DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); |
| 557 | if (!DbgObj) |
| 558 | DbgObj = Obj; |
| 559 | ObjectPair Res = std::make_pair(x&: Obj, y&: DbgObj); |
| 560 | std::string DbgObjPath = DbgObj->getFileName().str(); |
| 561 | auto Pair = |
| 562 | ObjectPairForPathArch.emplace(args: std::make_pair(x: Path, y: ArchName), args&: Res); |
| 563 | BinaryForPath.find(x: DbgObjPath)->second.pushEvictor(Evictor: [this, I = Pair.first]() { |
| 564 | ObjectPairForPathArch.erase(position: I); |
| 565 | }); |
| 566 | return Res; |
| 567 | } |
| 568 | |
| 569 | Expected<ObjectFile *> |
| 570 | LLVMSymbolizer::getOrCreateObject(const std::string &Path, |
| 571 | const std::string &ArchName) { |
| 572 | Binary *Bin; |
| 573 | auto Pair = BinaryForPath.emplace(args: Path, args: OwningBinary<Binary>()); |
| 574 | if (!Pair.second) { |
| 575 | Bin = Pair.first->second->getBinary(); |
| 576 | recordAccess(Bin&: Pair.first->second); |
| 577 | } else { |
| 578 | Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); |
| 579 | if (!BinOrErr) |
| 580 | return BinOrErr.takeError(); |
| 581 | |
| 582 | CachedBinary &CachedBin = Pair.first->second; |
| 583 | CachedBin = std::move(BinOrErr.get()); |
| 584 | CachedBin.pushEvictor(Evictor: [this, I = Pair.first]() { BinaryForPath.erase(position: I); }); |
| 585 | LRUBinaries.push_back(Node&: CachedBin); |
| 586 | CacheSize += CachedBin.size(); |
| 587 | Bin = CachedBin->getBinary(); |
| 588 | } |
| 589 | |
| 590 | if (!Bin) |
| 591 | return static_cast<ObjectFile *>(nullptr); |
| 592 | |
| 593 | if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Val: Bin)) { |
| 594 | auto I = ObjectForUBPathAndArch.find(x: std::make_pair(x: Path, y: ArchName)); |
| 595 | if (I != ObjectForUBPathAndArch.end()) |
| 596 | return I->second.get(); |
| 597 | |
| 598 | Expected<std::unique_ptr<ObjectFile>> ObjOrErr = |
| 599 | UB->getMachOObjectForArch(ArchName); |
| 600 | if (!ObjOrErr) { |
| 601 | ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
| 602 | args: std::unique_ptr<ObjectFile>()); |
| 603 | return ObjOrErr.takeError(); |
| 604 | } |
| 605 | ObjectFile *Res = ObjOrErr->get(); |
| 606 | auto Pair = ObjectForUBPathAndArch.emplace(args: std::make_pair(x: Path, y: ArchName), |
| 607 | args: std::move(ObjOrErr.get())); |
| 608 | BinaryForPath.find(x: Path)->second.pushEvictor( |
| 609 | Evictor: [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(position: Iter); }); |
| 610 | return Res; |
| 611 | } |
| 612 | if (Bin->isObject()) { |
| 613 | return cast<ObjectFile>(Val: Bin); |
| 614 | } |
| 615 | return errorCodeToError(EC: object_error::arch_not_found); |
| 616 | } |
| 617 | |
| 618 | Expected<SymbolizableModule *> |
| 619 | LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, |
| 620 | std::unique_ptr<DIContext> Context, |
| 621 | StringRef ModuleName) { |
| 622 | auto InfoOrErr = SymbolizableObjectFile::create(Obj, DICtx: std::move(Context), |
| 623 | UntagAddresses: Opts.UntagAddresses); |
| 624 | std::unique_ptr<SymbolizableModule> SymMod; |
| 625 | if (InfoOrErr) |
| 626 | SymMod = std::move(*InfoOrErr); |
| 627 | auto InsertResult = Modules.insert( |
| 628 | x: std::make_pair(x: std::string(ModuleName), y: std::move(SymMod))); |
| 629 | assert(InsertResult.second); |
| 630 | if (!InfoOrErr) |
| 631 | return InfoOrErr.takeError(); |
| 632 | return InsertResult.first->second.get(); |
| 633 | } |
| 634 | |
| 635 | Expected<SymbolizableModule *> |
| 636 | LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) { |
| 637 | StringRef BinaryName = ModuleName; |
| 638 | StringRef ArchName = Opts.DefaultArch; |
| 639 | size_t ColonPos = ModuleName.find_last_of(C: ':'); |
| 640 | // Verify that substring after colon form a valid arch name. |
| 641 | if (ColonPos != std::string::npos) { |
| 642 | StringRef ArchStr = ModuleName.substr(Start: ColonPos + 1); |
| 643 | if (Triple(ArchStr).getArch() != Triple::UnknownArch) { |
| 644 | BinaryName = ModuleName.substr(Start: 0, N: ColonPos); |
| 645 | ArchName = ArchStr; |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | auto I = Modules.find(x: ModuleName); |
| 650 | if (I != Modules.end()) { |
| 651 | recordAccess(Bin&: BinaryForPath.find(x: BinaryName)->second); |
| 652 | return I->second.get(); |
| 653 | } |
| 654 | |
| 655 | auto ObjectsOrErr = |
| 656 | getOrCreateObjectPair(Path: std::string{BinaryName}, ArchName: std::string{ArchName}); |
| 657 | if (!ObjectsOrErr) { |
| 658 | // Failed to find valid object file. |
| 659 | Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
| 660 | return ObjectsOrErr.takeError(); |
| 661 | } |
| 662 | ObjectPair Objects = ObjectsOrErr.get(); |
| 663 | |
| 664 | std::unique_ptr<DIContext> Context; |
| 665 | // If this is a COFF object containing PDB info and not containing DWARF |
| 666 | // section, use a PDBContext to symbolize. Otherwise, use DWARF. |
| 667 | // Create a DIContext to symbolize as follows: |
| 668 | // - If there is a GSYM file, create a GsymContext. |
| 669 | // - Otherwise, if this is a COFF object containing PDB info, create a |
| 670 | // PDBContext. |
| 671 | // - Otherwise, create a DWARFContext. |
| 672 | const auto GsymFile = lookUpGsymFile(Path: BinaryName.str()); |
| 673 | if (!GsymFile.empty()) { |
| 674 | auto ReaderOrErr = gsym::GsymReader::openFile(Path: GsymFile); |
| 675 | |
| 676 | if (ReaderOrErr) { |
| 677 | std::unique_ptr<gsym::GsymReader> Reader = |
| 678 | std::make_unique<gsym::GsymReader>(args: std::move(*ReaderOrErr)); |
| 679 | |
| 680 | Context = std::make_unique<gsym::GsymContext>(args: std::move(Reader)); |
| 681 | } |
| 682 | } |
| 683 | if (!Context) { |
| 684 | if (auto CoffObject = dyn_cast<COFFObjectFile>(Val: Objects.first)) { |
| 685 | const codeview::DebugInfo *DebugInfo; |
| 686 | StringRef PDBFileName; |
| 687 | auto EC = CoffObject->getDebugPDBInfo(Info&: DebugInfo, PDBFileName); |
| 688 | // Use DWARF if there're DWARF sections. |
| 689 | bool HasDwarf = llvm::any_of( |
| 690 | Range: Objects.first->sections(), P: [](SectionRef Section) -> bool { |
| 691 | if (Expected<StringRef> SectionName = Section.getName()) |
| 692 | return SectionName.get() == ".debug_info" ; |
| 693 | return false; |
| 694 | }); |
| 695 | if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { |
| 696 | using namespace pdb; |
| 697 | std::unique_ptr<IPDBSession> Session; |
| 698 | |
| 699 | PDB_ReaderType ReaderType = |
| 700 | Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; |
| 701 | if (auto Err = loadDataForEXE(Type: ReaderType, Path: Objects.first->getFileName(), |
| 702 | Session)) { |
| 703 | Modules.emplace(args&: ModuleName, args: std::unique_ptr<SymbolizableModule>()); |
| 704 | // Return along the PDB filename to provide more context |
| 705 | return createFileError(F: PDBFileName, E: std::move(Err)); |
| 706 | } |
| 707 | Context.reset(p: new PDBContext(*CoffObject, std::move(Session))); |
| 708 | } |
| 709 | } |
| 710 | } |
| 711 | if (!Context) |
| 712 | Context = DWARFContext::create( |
| 713 | Obj: *Objects.second, RelocAction: DWARFContext::ProcessDebugRelocations::Process, |
| 714 | L: nullptr, DWPName: Opts.DWPName); |
| 715 | auto ModuleOrErr = |
| 716 | createModuleInfo(Obj: Objects.first, Context: std::move(Context), ModuleName); |
| 717 | if (ModuleOrErr) { |
| 718 | auto I = Modules.find(x: ModuleName); |
| 719 | BinaryForPath.find(x: BinaryName)->second.pushEvictor(Evictor: [this, I]() { |
| 720 | Modules.erase(position: I); |
| 721 | }); |
| 722 | } |
| 723 | return ModuleOrErr; |
| 724 | } |
| 725 | |
| 726 | // For BPF programs .BTF.ext section contains line numbers information, |
| 727 | // use it if regular DWARF is not available (e.g. for stripped binary). |
| 728 | static bool useBTFContext(const ObjectFile &Obj) { |
| 729 | return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && |
| 730 | BTFParser::hasBTFSections(Obj); |
| 731 | } |
| 732 | |
| 733 | Expected<SymbolizableModule *> |
| 734 | LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { |
| 735 | StringRef ObjName = Obj.getFileName(); |
| 736 | auto I = Modules.find(x: ObjName); |
| 737 | if (I != Modules.end()) |
| 738 | return I->second.get(); |
| 739 | |
| 740 | std::unique_ptr<DIContext> Context; |
| 741 | if (useBTFContext(Obj)) |
| 742 | Context = BTFContext::create(Obj); |
| 743 | else |
| 744 | Context = DWARFContext::create(Obj); |
| 745 | // FIXME: handle COFF object with PDB info to use PDBContext |
| 746 | return createModuleInfo(Obj: &Obj, Context: std::move(Context), ModuleName: ObjName); |
| 747 | } |
| 748 | |
| 749 | Expected<SymbolizableModule *> |
| 750 | LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { |
| 751 | std::string Path; |
| 752 | if (!getOrFindDebugBinary(BuildID, Result&: Path)) { |
| 753 | return createStringError(EC: errc::no_such_file_or_directory, |
| 754 | S: "could not find build ID" ); |
| 755 | } |
| 756 | return getOrCreateModuleInfo(ModuleName: Path); |
| 757 | } |
| 758 | |
| 759 | namespace { |
| 760 | |
| 761 | // Undo these various manglings for Win32 extern "C" functions: |
| 762 | // cdecl - _foo |
| 763 | // stdcall - _foo@12 |
| 764 | // fastcall - @foo@12 |
| 765 | // vectorcall - foo@@12 |
| 766 | // These are all different linkage names for 'foo'. |
| 767 | StringRef demanglePE32ExternCFunc(StringRef SymbolName) { |
| 768 | char Front = SymbolName.empty() ? '\0' : SymbolName[0]; |
| 769 | |
| 770 | // Remove any '@[0-9]+' suffix. |
| 771 | bool HasAtNumSuffix = false; |
| 772 | if (Front != '?') { |
| 773 | size_t AtPos = SymbolName.rfind(C: '@'); |
| 774 | if (AtPos != StringRef::npos && |
| 775 | all_of(Range: drop_begin(RangeOrContainer&: SymbolName, N: AtPos + 1), P: isDigit)) { |
| 776 | SymbolName = SymbolName.substr(Start: 0, N: AtPos); |
| 777 | HasAtNumSuffix = true; |
| 778 | } |
| 779 | } |
| 780 | |
| 781 | // Remove any ending '@' for vectorcall. |
| 782 | bool IsVectorCall = false; |
| 783 | if (HasAtNumSuffix && SymbolName.ends_with(Suffix: "@" )) { |
| 784 | SymbolName = SymbolName.drop_back(); |
| 785 | IsVectorCall = true; |
| 786 | } |
| 787 | |
| 788 | // If not vectorcall, remove any '_' or '@' prefix. |
| 789 | if (!IsVectorCall && (Front == '_' || Front == '@')) |
| 790 | SymbolName = SymbolName.drop_front(); |
| 791 | |
| 792 | return SymbolName; |
| 793 | } |
| 794 | |
| 795 | } // end anonymous namespace |
| 796 | |
| 797 | std::string |
| 798 | LLVMSymbolizer::DemangleName(StringRef Name, |
| 799 | const SymbolizableModule *DbiModuleDescriptor) { |
| 800 | std::string Result; |
| 801 | if (nonMicrosoftDemangle(MangledName: Name, Result)) |
| 802 | return Result; |
| 803 | |
| 804 | if (Name.starts_with(Prefix: '?')) { |
| 805 | // Only do MSVC C++ demangling on symbols starting with '?'. |
| 806 | int status = 0; |
| 807 | char *DemangledName = microsoftDemangle( |
| 808 | mangled_name: Name, n_read: nullptr, status: &status, |
| 809 | Flags: MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | |
| 810 | MSDF_NoMemberType | MSDF_NoReturnType)); |
| 811 | if (status != 0) |
| 812 | return std::string{Name}; |
| 813 | Result = DemangledName; |
| 814 | free(ptr: DemangledName); |
| 815 | return Result; |
| 816 | } |
| 817 | |
| 818 | if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { |
| 819 | std::string DemangledCName(demanglePE32ExternCFunc(SymbolName: Name)); |
| 820 | // On i386 Windows, the C name mangling for different calling conventions |
| 821 | // may also be applied on top of the Itanium or Rust name mangling. |
| 822 | if (nonMicrosoftDemangle(MangledName: DemangledCName, Result)) |
| 823 | return Result; |
| 824 | return DemangledCName; |
| 825 | } |
| 826 | return std::string{Name}; |
| 827 | } |
| 828 | |
| 829 | void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { |
| 830 | if (Bin->getBinary()) |
| 831 | LRUBinaries.splice(I: LRUBinaries.end(), L2&: LRUBinaries, Node: Bin.getIterator()); |
| 832 | } |
| 833 | |
| 834 | void LLVMSymbolizer::pruneCache() { |
| 835 | // Evict the LRU binary until the max cache size is reached or there's <= 1 |
| 836 | // item in the cache. The MRU binary is always kept to avoid thrashing if it's |
| 837 | // larger than the cache size. |
| 838 | while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && |
| 839 | std::next(x: LRUBinaries.begin()) != LRUBinaries.end()) { |
| 840 | CachedBinary &Bin = LRUBinaries.front(); |
| 841 | CacheSize -= Bin.size(); |
| 842 | LRUBinaries.pop_front(); |
| 843 | Bin.evict(); |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { |
| 848 | if (Evictor) { |
| 849 | this->Evictor = [OldEvictor = std::move(this->Evictor), |
| 850 | NewEvictor = std::move(NewEvictor)]() { |
| 851 | NewEvictor(); |
| 852 | OldEvictor(); |
| 853 | }; |
| 854 | } else { |
| 855 | this->Evictor = std::move(NewEvictor); |
| 856 | } |
| 857 | } |
| 858 | |
| 859 | } // namespace symbolize |
| 860 | } // namespace llvm |
| 861 | |