1//===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BinaryHolder.h"
10#include "DebugMap.h"
11#include "MachOUtils.h"
12#include "RelocationMap.h"
13#include "dsymutil.h"
14#include "llvm/ADT/DenseSet.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/StringSet.h"
17#include "llvm/Object/MachO.h"
18#include "llvm/Support/Chrono.h"
19#include "llvm/Support/Path.h"
20#include "llvm/Support/WithColor.h"
21#include "llvm/Support/raw_ostream.h"
22#include <optional>
23#include <vector>
24
25namespace {
26using namespace llvm;
27using namespace llvm::dsymutil;
28using namespace llvm::object;
29
30class MachODebugMapParser {
31public:
32 MachODebugMapParser(
33 BinaryHolder &BinHolder, StringRef BinaryPath,
34 ArrayRef<std::string> Archs, ArrayRef<std::string> DSYMSearchPaths,
35 StringRef PathPrefix = "", StringRef VariantSuffix = "",
36 bool Verbose = false,
37 const std::optional<StringSet<>> &ObjectFilter = std::nullopt,
38 ObjectFilterType ObjectFilterType = ObjectFilterType::Allow)
39 : BinaryPath(std::string(BinaryPath)), Archs(Archs),
40 DSYMSearchPaths(DSYMSearchPaths), PathPrefix(std::string(PathPrefix)),
41 VariantSuffix(std::string(VariantSuffix)), BinHolder(BinHolder),
42 CurrentDebugMapObject(nullptr), SkipDebugMapObject(false),
43 ObjectFilter(ObjectFilter), ObjectFilterType(ObjectFilterType) {}
44
45 /// Parses and returns the DebugMaps of the input binary. The binary contains
46 /// multiple maps in case it is a universal binary.
47 /// \returns an error in case the provided BinaryPath doesn't exist
48 /// or isn't of a supported type.
49 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse();
50
51 /// Walk the symbol table and dump it.
52 bool dumpStab();
53
54 using OSO = std::pair<llvm::StringRef, uint64_t>;
55
56private:
57 std::string BinaryPath;
58 SmallVector<StringRef, 1> Archs;
59 SmallVector<StringRef, 1> DSYMSearchPaths;
60 std::string PathPrefix;
61 std::string VariantSuffix;
62
63 /// Owns the MemoryBuffer for the main binary.
64 BinaryHolder &BinHolder;
65 /// Map of the binary symbol addresses.
66 StringMap<uint64_t> MainBinarySymbolAddresses;
67 StringRef MainBinaryStrings;
68 /// The constructed DebugMap.
69 std::unique_ptr<DebugMap> Result;
70 /// List of common symbols that need to be added to the debug map.
71 std::vector<std::string> CommonSymbols;
72
73 /// Map of the currently processed object file symbol addresses.
74 StringMap<std::optional<uint64_t>> CurrentObjectAddresses;
75
76 /// Lazily computed map of symbols aliased to the processed object file.
77 StringMap<std::optional<uint64_t>> CurrentObjectAliasMap;
78
79 /// If CurrentObjectAliasMap has been computed for a given address.
80 SmallSet<uint64_t, 4> SeenAliasValues;
81
82 /// Element of the debug map corresponding to the current object file.
83 DebugMapObject *CurrentDebugMapObject;
84
85 /// Whether we need to skip the current debug map object.
86 bool SkipDebugMapObject;
87
88 /// Optional set of object paths to filter on.
89 const std::optional<StringSet<>> &ObjectFilter;
90
91 /// Whether ObjectFilter is an allow list or a disallow list.
92 enum ObjectFilterType ObjectFilterType;
93
94 /// Holds function info while function scope processing.
95 const char *CurrentFunctionName;
96 uint64_t CurrentFunctionAddress;
97
98 std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary,
99 StringRef BinaryPath);
100 void handleStabDebugMap(
101 const MachOObjectFile &MainBinary,
102 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F);
103
104 void
105 switchToNewDebugMapObject(StringRef Filename,
106 sys::TimePoint<std::chrono::seconds> Timestamp);
107 void
108 switchToNewLibDebugMapObject(StringRef Filename,
109 sys::TimePoint<std::chrono::seconds> Timestamp);
110 void resetParserState();
111 uint64_t getMainBinarySymbolAddress(StringRef Name);
112 std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value);
113 void loadMainBinarySymbols(const MachOObjectFile &MainBinary);
114 void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj);
115
116 void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type,
117 uint8_t SectionIndex, uint16_t Flags, uint64_t Value,
118 llvm::DenseSet<OSO> &OSOs,
119 llvm::SmallSet<OSO, 4> &Duplicates);
120 void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type,
121 uint8_t SectionIndex, uint16_t Flags,
122 uint64_t Value,
123 const llvm::SmallSet<OSO, 4> &Duplicates);
124
125 template <typename STEType>
126 void handleStabDebugMapEntry(
127 const STEType &STE,
128 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
129 F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value);
130 }
131
132 void addCommonSymbols();
133
134 /// Check if a debug map object should be included based on the
135 /// object filter.
136 bool shouldIncludeObject(StringRef Path) const {
137 if (!ObjectFilter.has_value())
138 return true;
139 bool InSet = ObjectFilter->contains(key: Path);
140 return ObjectFilterType == Allow ? InSet : !InSet;
141 }
142
143 /// Dump the symbol table output header.
144 void dumpSymTabHeader(raw_ostream &OS, StringRef Arch);
145
146 /// Dump the contents of nlist entries.
147 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex,
148 uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
149 uint64_t Value);
150
151 template <typename STEType>
152 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) {
153 dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc,
154 STE.n_value);
155 }
156 void dumpOneBinaryStab(const MachOObjectFile &MainBinary,
157 StringRef BinaryPath);
158
159 void Warning(const Twine &Msg, StringRef File = StringRef()) {
160 assert(Result &&
161 "The debug map must be initialized before calling this function");
162 WithColor::warning() << "("
163 << MachOUtils::getArchName(
164 Arch: Result->getTriple().getArchName())
165 << ") " << File << " " << Msg << "\n";
166 }
167};
168
169} // anonymous namespace
170
171/// Reset the parser state corresponding to the current object
172/// file. This is to be called after an object file is finished
173/// processing.
174void MachODebugMapParser::resetParserState() {
175 CommonSymbols.clear();
176 CurrentObjectAddresses.clear();
177 CurrentObjectAliasMap.clear();
178 SeenAliasValues.clear();
179 CurrentDebugMapObject = nullptr;
180 SkipDebugMapObject = false;
181}
182
183/// Commons symbols won't show up in the symbol map but might need to be
184/// relocated. We can add them to the symbol table ourselves by combining the
185/// information in the object file (the symbol name) and the main binary (the
186/// address).
187void MachODebugMapParser::addCommonSymbols() {
188 for (auto &CommonSymbol : CommonSymbols) {
189 uint64_t CommonAddr = getMainBinarySymbolAddress(Name: CommonSymbol);
190 if (CommonAddr == 0) {
191 // The main binary doesn't have an address for the given symbol.
192 continue;
193 }
194 if (!CurrentDebugMapObject->addSymbol(SymName: CommonSymbol,
195 ObjectAddress: std::nullopt /*ObjectAddress*/,
196 LinkedAddress: CommonAddr, Size: 0 /*size*/)) {
197 // The symbol is already present.
198 continue;
199 }
200 }
201}
202
203/// Create a new DebugMapObject. This function resets the state of the
204/// parser that was referring to the last object file and sets
205/// everything up to add symbols to the new one.
206void MachODebugMapParser::switchToNewDebugMapObject(
207 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) {
208 addCommonSymbols();
209 resetParserState();
210
211 SmallString<80> Path(PathPrefix);
212 sys::path::append(path&: Path, a: Filename);
213
214 if (!shouldIncludeObject(Path)) {
215 SkipDebugMapObject = true;
216 return;
217 }
218
219 auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp);
220 if (!ObjectEntry) {
221 auto Err = ObjectEntry.takeError();
222 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
223 File: Path.str());
224 return;
225 }
226
227 auto Object = ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple());
228 if (!Object) {
229 auto Err = Object.takeError();
230 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
231 File: Path.str());
232 return;
233 }
234
235 CurrentDebugMapObject =
236 &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_OSO);
237
238 loadCurrentObjectFileSymbols(Obj: *Object);
239}
240
241/// Create a new DebugMapObject of type MachO::N_LIB.
242/// This function resets the state of the parser that was
243/// referring to the last object file and sets everything
244/// up to add symbols to the new one.
245void MachODebugMapParser::switchToNewLibDebugMapObject(
246 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) {
247
248 if (DSYMSearchPaths.empty()) {
249 Warning(Msg: "no dSYM search path was specified");
250 return;
251 }
252
253 StringRef LeafName = sys::path::filename(path: Filename);
254 SmallString<128> VariantLeafName;
255 SmallString<128> ProductName(LeafName);
256
257 // For Framework.framework/Framework and -build-variant-suffix=_debug,
258 // look in the following order:
259 // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug
260 // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework
261 //
262 // For libName.dylib and -build-variant-suffix=_debug,
263 // look in the following order:
264 // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib
265 // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib
266
267 size_t libExt = LeafName.rfind(Str: ".dylib");
268 if (libExt != StringRef::npos) {
269 if (!VariantSuffix.empty()) {
270 VariantLeafName.append(RHS: LeafName.substr(Start: 0, N: libExt));
271 VariantLeafName.append(RHS: VariantSuffix);
272 VariantLeafName.append(RHS: ".dylib");
273 }
274 } else {
275 // Expected to be a framework
276 ProductName.append(RHS: ".framework");
277 if (!VariantSuffix.empty()) {
278 VariantLeafName.append(RHS: LeafName);
279 VariantLeafName.append(RHS: VariantSuffix);
280 }
281 }
282
283 for (auto DSYMSearchPath : DSYMSearchPaths) {
284 SmallString<256> Path(DSYMSearchPath);
285 SmallString<256> FallbackPath(Path);
286
287 SmallString<256> DSYMPath(ProductName);
288 DSYMPath.append(RHS: ".dSYM");
289 sys::path::append(path&: DSYMPath, a: "Contents", b: "Resources", c: "DWARF");
290
291 if (!VariantSuffix.empty()) {
292 sys::path::append(path&: Path, a: DSYMPath, b: VariantLeafName);
293 sys::path::append(path&: FallbackPath, a: DSYMPath, b: LeafName);
294 } else {
295 sys::path::append(path&: Path, a: DSYMPath, b: LeafName);
296 }
297
298 auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp);
299 if (!ObjectEntry) {
300 auto Err = ObjectEntry.takeError();
301 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
302 File: Path.str());
303 if (!VariantSuffix.empty()) {
304 ObjectEntry = BinHolder.getObjectEntry(Filename: FallbackPath, Timestamp);
305 if (!ObjectEntry) {
306 auto Err = ObjectEntry.takeError();
307 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
308 File: FallbackPath.str());
309 continue;
310 }
311 Path.assign(RHS: FallbackPath);
312 } else {
313 continue;
314 }
315 }
316
317 auto Object =
318 ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple());
319 if (!Object) {
320 auto Err = Object.takeError();
321 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
322 File: Path.str());
323 continue;
324 }
325
326 if (CurrentDebugMapObject &&
327 CurrentDebugMapObject->getType() == MachO::N_LIB &&
328 CurrentDebugMapObject->getObjectFilename() == Path) {
329 return;
330 }
331
332 addCommonSymbols();
333 resetParserState();
334
335 CurrentDebugMapObject =
336 &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_LIB);
337
338 CurrentDebugMapObject->setInstallName(Filename);
339
340 SmallString<256> RMPath(DSYMSearchPath);
341 sys::path::append(path&: RMPath, a: ProductName);
342 RMPath.append(RHS: ".dSYM");
343 StringRef ArchName = Triple::getArchName(Kind: Result->getTriple().getArch(),
344 SubArch: Result->getTriple().getSubArch());
345 sys::path::append(path&: RMPath, a: "Contents", b: "Resources", c: "Relocations", d: ArchName);
346 sys::path::append(path&: RMPath, a: LeafName);
347 RMPath.append(RHS: ".yml");
348 const auto &RelocMapPtrOrErr =
349 RelocationMap::parseYAMLRelocationMap(InputFile: RMPath, PrependPath: PathPrefix);
350 if (auto EC = RelocMapPtrOrErr.getError()) {
351 Warning(Msg: "cannot parse relocation map file: " + EC.message(),
352 File: RMPath.str());
353 return;
354 }
355 CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get());
356
357 loadCurrentObjectFileSymbols(Obj: *Object);
358
359 // Found and loaded new dSYM file
360 return;
361 }
362}
363
364static std::string getArchName(const object::MachOObjectFile &Obj) {
365 Triple T = Obj.getArchTriple();
366 return std::string(T.getArchName());
367}
368
369void MachODebugMapParser::handleStabDebugMap(
370 const MachOObjectFile &MainBinary,
371 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
372 for (const SymbolRef &Symbol : MainBinary.symbols()) {
373 const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
374 if (MainBinary.is64Bit())
375 handleStabDebugMapEntry(STE: MainBinary.getSymbol64TableEntry(DRI), F);
376 else
377 handleStabDebugMapEntry(STE: MainBinary.getSymbolTableEntry(DRI), F);
378 }
379}
380
381std::unique_ptr<DebugMap>
382MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
383 StringRef BinaryPath) {
384 Result = std::make_unique<DebugMap>(args: MainBinary.getArchTriple(), args&: BinaryPath,
385 args: MainBinary.getUuid());
386 loadMainBinarySymbols(MainBinary);
387 MainBinaryStrings = MainBinary.getStringTableData();
388
389 // Static archives can contain multiple object files with identical names, in
390 // which case the timestamp is used to disambiguate. However, if both are
391 // identical, there's no way to tell them apart. Detect this and skip
392 // duplicate debug map objects.
393 llvm::DenseSet<OSO> OSOs;
394 llvm::SmallSet<OSO, 4> Duplicates;
395
396 // Iterate over all the STABS to find duplicate OSO entries.
397 handleStabDebugMap(MainBinary,
398 F: [&](uint32_t StringIndex, uint8_t Type,
399 uint8_t SectionIndex, uint16_t Flags, uint64_t Value) {
400 handleStabOSOEntry(StringIndex, Type, SectionIndex,
401 Flags, Value, OSOs, Duplicates);
402 });
403
404 // Print an informative warning with the duplicate object file name and time
405 // stamp.
406 for (const auto &OSO : Duplicates) {
407 std::string Buffer;
408 llvm::raw_string_ostream OS(Buffer);
409 OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(T: OSO.second));
410 Warning(Msg: "skipping debug map object with duplicate name and timestamp: " +
411 Buffer + Twine(" ") + Twine(OSO.first));
412 }
413
414 // Build the debug map by iterating over the STABS again but ignore the
415 // duplicate debug objects.
416 handleStabDebugMap(MainBinary, F: [&](uint32_t StringIndex, uint8_t Type,
417 uint8_t SectionIndex, uint16_t Flags,
418 uint64_t Value) {
419 handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value,
420 Duplicates);
421 });
422
423 resetParserState();
424 return std::move(Result);
425}
426
427// Table that maps Darwin's Mach-O stab constants to strings to allow printing.
428// llvm-nm has very similar code, the strings used here are however slightly
429// different and part of the interface of dsymutil (some project's build-systems
430// parse the ouptut of dsymutil -s), thus they shouldn't be changed.
431struct DarwinStabName {
432 uint8_t NType;
433 const char *Name;
434};
435
436const struct DarwinStabName DarwinStabNames[] = {{.NType: MachO::N_GSYM, .Name: "N_GSYM"},
437 {.NType: MachO::N_FNAME, .Name: "N_FNAME"},
438 {.NType: MachO::N_FUN, .Name: "N_FUN"},
439 {.NType: MachO::N_STSYM, .Name: "N_STSYM"},
440 {.NType: MachO::N_LCSYM, .Name: "N_LCSYM"},
441 {.NType: MachO::N_BNSYM, .Name: "N_BNSYM"},
442 {.NType: MachO::N_PC, .Name: "N_PC"},
443 {.NType: MachO::N_AST, .Name: "N_AST"},
444 {.NType: MachO::N_OPT, .Name: "N_OPT"},
445 {.NType: MachO::N_RSYM, .Name: "N_RSYM"},
446 {.NType: MachO::N_SLINE, .Name: "N_SLINE"},
447 {.NType: MachO::N_ENSYM, .Name: "N_ENSYM"},
448 {.NType: MachO::N_SSYM, .Name: "N_SSYM"},
449 {.NType: MachO::N_SO, .Name: "N_SO"},
450 {.NType: MachO::N_OSO, .Name: "N_OSO"},
451 {.NType: MachO::N_LIB, .Name: "N_LIB"},
452 {.NType: MachO::N_LSYM, .Name: "N_LSYM"},
453 {.NType: MachO::N_BINCL, .Name: "N_BINCL"},
454 {.NType: MachO::N_SOL, .Name: "N_SOL"},
455 {.NType: MachO::N_PARAMS, .Name: "N_PARAM"},
456 {.NType: MachO::N_VERSION, .Name: "N_VERS"},
457 {.NType: MachO::N_OLEVEL, .Name: "N_OLEV"},
458 {.NType: MachO::N_PSYM, .Name: "N_PSYM"},
459 {.NType: MachO::N_EINCL, .Name: "N_EINCL"},
460 {.NType: MachO::N_ENTRY, .Name: "N_ENTRY"},
461 {.NType: MachO::N_LBRAC, .Name: "N_LBRAC"},
462 {.NType: MachO::N_EXCL, .Name: "N_EXCL"},
463 {.NType: MachO::N_RBRAC, .Name: "N_RBRAC"},
464 {.NType: MachO::N_BCOMM, .Name: "N_BCOMM"},
465 {.NType: MachO::N_ECOMM, .Name: "N_ECOMM"},
466 {.NType: MachO::N_ECOML, .Name: "N_ECOML"},
467 {.NType: MachO::N_LENG, .Name: "N_LENG"},
468 {.NType: 0, .Name: nullptr}};
469
470static const char *getDarwinStabString(uint8_t NType) {
471 for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
472 if (DarwinStabNames[i].NType == NType)
473 return DarwinStabNames[i].Name;
474 }
475 return nullptr;
476}
477
478void MachODebugMapParser::dumpSymTabHeader(raw_ostream &OS, StringRef Arch) {
479 OS << "-----------------------------------"
480 "-----------------------------------\n";
481 OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n";
482 OS << "-----------------------------------"
483 "-----------------------------------\n";
484 OS << "Index n_strx n_type n_sect n_desc n_value\n";
485 OS << "======== -------- ------------------ ------ ------ ----------------\n";
486}
487
488void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index,
489 uint32_t StringIndex, uint8_t Type,
490 uint8_t SectionIndex, uint16_t Flags,
491 uint64_t Value) {
492 // Index
493 OS << '[' << format_decimal(N: Index, Width: 6)
494 << "] "
495 // n_strx
496 << format_hex_no_prefix(N: StringIndex, Width: 8)
497 << ' '
498 // n_type...
499 << format_hex_no_prefix(N: Type, Width: 2) << " (";
500
501 if (Type & MachO::N_STAB)
502 OS << left_justify(Str: getDarwinStabString(NType: Type), Width: 13);
503 else {
504 if (Type & MachO::N_PEXT)
505 OS << "PEXT ";
506 else
507 OS << " ";
508 switch (Type & MachO::N_TYPE) {
509 case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT
510 OS << "UNDF";
511 break;
512 case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT
513 OS << "ABS ";
514 break;
515 case MachO::N_SECT: // 0xe defined in section number n_sect
516 OS << "SECT";
517 break;
518 case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib)
519 OS << "PBUD";
520 break;
521 case MachO::N_INDR: // 0xa indirect
522 OS << "INDR";
523 break;
524 default:
525 OS << format_hex_no_prefix(N: Type, Width: 2) << " ";
526 break;
527 }
528 if (Type & MachO::N_EXT)
529 OS << " EXT";
530 else
531 OS << " ";
532 }
533
534 OS << ") "
535 // n_sect
536 << format_hex_no_prefix(N: SectionIndex, Width: 2)
537 << " "
538 // n_desc
539 << format_hex_no_prefix(N: Flags, Width: 4)
540 << " "
541 // n_value
542 << format_hex_no_prefix(N: Value, Width: 16);
543
544 const char *Name = &MainBinaryStrings.data()[StringIndex];
545 if (Name && Name[0])
546 OS << " '" << Name << "'";
547
548 OS << "\n";
549}
550
551void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary,
552 StringRef BinaryPath) {
553 loadMainBinarySymbols(MainBinary);
554 MainBinaryStrings = MainBinary.getStringTableData();
555 raw_ostream &OS(llvm::outs());
556
557 dumpSymTabHeader(OS, Arch: getArchName(Obj: MainBinary));
558 uint64_t Idx = 0;
559 for (const SymbolRef &Symbol : MainBinary.symbols()) {
560 const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
561 if (MainBinary.is64Bit())
562 dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbol64TableEntry(DRI));
563 else
564 dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbolTableEntry(DRI));
565 Idx++;
566 }
567
568 OS << "\n\n";
569 resetParserState();
570}
571
572static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) {
573 if (Archs.empty() || is_contained(Range&: Archs, Element: "all") || is_contained(Range&: Archs, Element: "*"))
574 return true;
575
576 if (Arch.starts_with(Prefix: "arm") && Arch != "arm64" && is_contained(Range&: Archs, Element: "arm"))
577 return true;
578
579 SmallString<16> ArchName = Arch;
580 if (Arch.starts_with(Prefix: "thumb"))
581 ArchName = ("arm" + Arch.substr(Start: 5)).str();
582
583 return is_contained(Range&: Archs, Element: ArchName);
584}
585
586bool MachODebugMapParser::dumpStab() {
587 auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath);
588 if (!ObjectEntry) {
589 auto Err = ObjectEntry.takeError();
590 WithColor::error() << "cannot load '" << BinaryPath
591 << "': " << toString(E: std::move(Err)) << '\n';
592 return false;
593 }
594
595 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>();
596 if (!Objects) {
597 auto Err = Objects.takeError();
598 WithColor::error() << "cannot get '" << BinaryPath
599 << "' as MachO file: " << toString(E: std::move(Err))
600 << "\n";
601 return false;
602 }
603
604 for (const auto *Object : *Objects)
605 if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName()))
606 dumpOneBinaryStab(MainBinary: *Object, BinaryPath);
607
608 return true;
609}
610
611/// This main parsing routine tries to open the main binary and if
612/// successful iterates over the STAB entries. The real parsing is
613/// done in handleStabSymbolTableEntry.
614ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() {
615 auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath);
616 if (!ObjectEntry) {
617 return errorToErrorCode(Err: ObjectEntry.takeError());
618 }
619
620 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>();
621 if (!Objects) {
622 return errorToErrorCode(Err: Objects.takeError());
623 }
624
625 std::vector<std::unique_ptr<DebugMap>> Results;
626 for (const auto *Object : *Objects)
627 if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName()))
628 Results.push_back(x: parseOneBinary(MainBinary: *Object, BinaryPath));
629
630 return std::move(Results);
631}
632
633void MachODebugMapParser::handleStabOSOEntry(
634 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
635 uint64_t Value, llvm::DenseSet<OSO> &OSOs,
636 llvm::SmallSet<OSO, 4> &Duplicates) {
637 if (Type != MachO::N_OSO)
638 return;
639
640 OSO O(&MainBinaryStrings.data()[StringIndex], Value);
641 if (!OSOs.insert(V: O).second)
642 Duplicates.insert(V: O);
643}
644
645/// Interpret the STAB entries to fill the DebugMap.
646void MachODebugMapParser::handleStabSymbolTableEntry(
647 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
648 uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) {
649 if (!(Type & MachO::N_STAB))
650 return;
651
652 const char *Name = &MainBinaryStrings.data()[StringIndex];
653
654 // An N_LIB entry represents the start of a new library file description.
655 if (Type == MachO::N_LIB) {
656 switchToNewLibDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value));
657 return;
658 }
659
660 // An N_OSO entry represents the start of a new object file description.
661 // If an N_LIB entry was present, this is parsed only if the library
662 // dSYM file could not be found.
663 if (Type == MachO::N_OSO) {
664 if (!CurrentDebugMapObject ||
665 CurrentDebugMapObject->getType() != MachO::N_LIB) {
666 if (Duplicates.count(V: OSO(Name, Value))) {
667 SkipDebugMapObject = true;
668 return;
669 }
670 switchToNewDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value));
671 }
672 return;
673 }
674
675 if (SkipDebugMapObject)
676 return;
677
678 if (Type == MachO::N_AST) {
679 SmallString<80> Path(PathPrefix);
680 sys::path::append(path&: Path, a: Name);
681 Result->addDebugMapObject(ObjectFilePath: Path, Timestamp: sys::toTimePoint(T: Value), Type);
682 return;
683 }
684
685 // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be
686 // null. Do not update anything until we find the next valid N_OSO entry.
687 if (!CurrentDebugMapObject)
688 return;
689
690 uint32_t Size = 0;
691 switch (Type) {
692 case MachO::N_GSYM:
693 // This is a global variable. We need to query the main binary
694 // symbol table to find its address as it might not be in the
695 // debug map (for common symbols).
696 Value = getMainBinarySymbolAddress(Name);
697 break;
698 case MachO::N_FUN:
699 // Functions are scopes in STABS. They have an end marker that
700 // contains the function size.
701 if (Name[0] == '\0') {
702 Size = Value;
703 Value = CurrentFunctionAddress;
704 Name = CurrentFunctionName;
705 break;
706 } else {
707 CurrentFunctionName = Name;
708 CurrentFunctionAddress = Value;
709 return;
710 }
711 case MachO::N_STSYM:
712 break;
713 default:
714 return;
715 }
716
717 auto ObjectSymIt = CurrentObjectAddresses.find(Key: Name);
718
719 // If the name of a (non-static) symbol is not in the current object, we
720 // check all its aliases from the main binary.
721 if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) {
722 if (SeenAliasValues.count(V: Value) == 0) {
723 auto Aliases = getMainBinarySymbolNames(Value);
724 for (const auto &Alias : Aliases) {
725 auto It = CurrentObjectAddresses.find(Key: Alias);
726 if (It != CurrentObjectAddresses.end()) {
727 auto AliasValue = It->getValue();
728 for (const auto &Alias : Aliases)
729 CurrentObjectAliasMap[Alias] = AliasValue;
730 break;
731 }
732 }
733 SeenAliasValues.insert(V: Value);
734 }
735
736 auto AliasIt = CurrentObjectAliasMap.find(Key: Name);
737 if (AliasIt != CurrentObjectAliasMap.end())
738 ObjectSymIt = AliasIt;
739 }
740
741 // ThinLTO adds a unique suffix to exported private symbols.
742 if (ObjectSymIt == CurrentObjectAddresses.end()) {
743 for (auto Iter = CurrentObjectAddresses.begin();
744 Iter != CurrentObjectAddresses.end(); ++Iter) {
745 llvm::StringRef SymbolName = Iter->getKey();
746 auto Pos = SymbolName.rfind(Str: ".llvm.");
747 if (Pos != llvm::StringRef::npos && SymbolName.substr(Start: 0, N: Pos) == Name) {
748 ObjectSymIt = Iter;
749 break;
750 }
751 }
752 }
753
754 if (ObjectSymIt == CurrentObjectAddresses.end()) {
755 Warning(Msg: "could not find symbol '" + Twine(Name) + "' in object file '" +
756 CurrentDebugMapObject->getObjectFilename() + "'");
757 return;
758 }
759
760 if (!CurrentDebugMapObject->addSymbol(SymName: Name, ObjectAddress: ObjectSymIt->getValue(), LinkedAddress: Value,
761 Size)) {
762 Warning(Msg: Twine("failed to insert symbol '") + Name + "' in the debug map.");
763 return;
764 }
765}
766
767/// Load the current object file symbols into CurrentObjectAddresses.
768void MachODebugMapParser::loadCurrentObjectFileSymbols(
769 const object::MachOObjectFile &Obj) {
770 CurrentObjectAddresses.clear();
771
772 for (auto Sym : Obj.symbols()) {
773 uint64_t Addr = cantFail(ValOrErr: Sym.getValue());
774 Expected<StringRef> Name = Sym.getName();
775 if (!Name) {
776 auto Err = Name.takeError();
777 Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)),
778 File: Obj.getFileName());
779 continue;
780 }
781 // The value of some categories of symbols isn't meaningful. For
782 // example common symbols store their size in the value field, not
783 // their address. Absolute symbols have a fixed address that can
784 // conflict with standard symbols. These symbols (especially the
785 // common ones), might still be referenced by relocations. These
786 // relocations will use the symbol itself, and won't need an
787 // object file address. The object file address field is optional
788 // in the DebugMap, leave it unassigned for these symbols.
789 uint32_t Flags = cantFail(ValOrErr: Sym.getFlags());
790 if (Flags & SymbolRef::SF_Absolute) {
791 CurrentObjectAddresses[*Name] = std::nullopt;
792 } else if (Flags & SymbolRef::SF_Common) {
793 CurrentObjectAddresses[*Name] = std::nullopt;
794 CommonSymbols.push_back(x: std::string(*Name));
795 } else {
796 CurrentObjectAddresses[*Name] = Addr;
797 }
798 }
799}
800
801/// Lookup a symbol address in the main binary symbol table. The
802/// parser only needs to query common symbols, thus not every symbol's
803/// address is available through this function.
804uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) {
805 auto Sym = MainBinarySymbolAddresses.find(Key: Name);
806 if (Sym == MainBinarySymbolAddresses.end())
807 return 0;
808 return Sym->second;
809}
810
811/// Get all symbol names in the main binary for the given value.
812std::vector<StringRef>
813MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) {
814 std::vector<StringRef> Names;
815 for (const auto &Entry : MainBinarySymbolAddresses) {
816 if (Entry.second == Value)
817 Names.push_back(x: Entry.first());
818 }
819 return Names;
820}
821
822/// Load the interesting main binary symbols' addresses into
823/// MainBinarySymbolAddresses.
824void MachODebugMapParser::loadMainBinarySymbols(
825 const MachOObjectFile &MainBinary) {
826 section_iterator Section = MainBinary.section_end();
827 MainBinarySymbolAddresses.clear();
828 for (const auto &Sym : MainBinary.symbols()) {
829 Expected<SymbolRef::Type> TypeOrErr = Sym.getType();
830 if (!TypeOrErr) {
831 auto Err = TypeOrErr.takeError();
832 Warning(Msg: "failed to get symbol type: " + toString(E: std::move(Err)),
833 File: MainBinary.getFileName());
834 continue;
835 }
836 SymbolRef::Type Type = *TypeOrErr;
837 // Skip undefined and STAB entries.
838 if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown))
839 continue;
840 // In theory, the only symbols of interest are the global variables. These
841 // are the only ones that need to be queried because the address of common
842 // data won't be described in the debug map. All other addresses should be
843 // fetched for the debug map. In reality, by playing with 'ld -r' and
844 // export lists, you can get symbols described as N_GSYM in the debug map,
845 // but associated with a local symbol. Gather all the symbols, but prefer
846 // the global ones.
847 uint8_t SymType =
848 MainBinary.getSymbolTableEntry(DRI: Sym.getRawDataRefImpl()).n_type;
849 bool Extern = SymType & (MachO::N_EXT | MachO::N_PEXT);
850 Expected<section_iterator> SectionOrErr = Sym.getSection();
851 if (!SectionOrErr) {
852 auto Err = TypeOrErr.takeError();
853 Warning(Msg: "failed to get symbol section: " + toString(E: std::move(Err)),
854 File: MainBinary.getFileName());
855 continue;
856 }
857 Section = *SectionOrErr;
858 if ((Section == MainBinary.section_end() || Section->isText()) && !Extern)
859 continue;
860 uint64_t Addr = cantFail(ValOrErr: Sym.getValue());
861 Expected<StringRef> NameOrErr = Sym.getName();
862 if (!NameOrErr) {
863 auto Err = NameOrErr.takeError();
864 Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)),
865 File: MainBinary.getFileName());
866 continue;
867 }
868 StringRef Name = *NameOrErr;
869 if (Name.size() == 0 || Name[0] == '\0')
870 continue;
871 // Override only if the new key is global.
872 if (Extern)
873 MainBinarySymbolAddresses[Name] = Addr;
874 else
875 MainBinarySymbolAddresses.try_emplace(Key: Name, Args&: Addr);
876 }
877}
878
879namespace llvm {
880namespace dsymutil {
881llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
882parseDebugMap(BinaryHolder &BinHolder, StringRef InputFile,
883 ArrayRef<std::string> Archs,
884 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath,
885 StringRef VariantSuffix, bool Verbose, bool InputIsYAML,
886 const std::optional<StringSet<>> &ObjectFilter,
887 enum ObjectFilterType ObjectFilterType) {
888 if (InputIsYAML)
889 return DebugMap::parseYAMLDebugMap(BinHolder, InputFile, PrependPath,
890 Verbose);
891
892 MachODebugMapParser Parser(BinHolder, InputFile, Archs, DSYMSearchPaths,
893 PrependPath, VariantSuffix, Verbose, ObjectFilter,
894 ObjectFilterType);
895
896 return Parser.parse();
897}
898
899bool dumpStab(BinaryHolder &BinHolder, StringRef InputFile,
900 ArrayRef<std::string> Archs,
901 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath,
902 StringRef VariantSuffix) {
903 MachODebugMapParser Parser(BinHolder, InputFile, Archs, DSYMSearchPaths,
904 PrependPath, VariantSuffix, false);
905 return Parser.dumpStab();
906}
907} // namespace dsymutil
908} // namespace llvm
909