1//===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BinaryHolder.h"
10#include "DebugMap.h"
11#include "MachOUtils.h"
12#include "RelocationMap.h"
13#include "llvm/ADT/DenseSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/Object/MachO.h"
16#include "llvm/Support/Chrono.h"
17#include "llvm/Support/Path.h"
18#include "llvm/Support/WithColor.h"
19#include "llvm/Support/raw_ostream.h"
20#include <optional>
21#include <vector>
22
23namespace {
24using namespace llvm;
25using namespace llvm::dsymutil;
26using namespace llvm::object;
27
28class MachODebugMapParser {
29public:
30 MachODebugMapParser(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
31 StringRef BinaryPath, ArrayRef<std::string> Archs,
32 ArrayRef<std::string> DSYMSearchPaths,
33 StringRef PathPrefix = "", StringRef VariantSuffix = "",
34 bool Verbose = false)
35 : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()),
36 DSYMSearchPaths(DSYMSearchPaths.begin(), DSYMSearchPaths.end()),
37 PathPrefix(std::string(PathPrefix)),
38 VariantSuffix(std::string(VariantSuffix)), BinHolder(VFS, Verbose),
39 CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {}
40
41 /// Parses and returns the DebugMaps of the input binary. The binary contains
42 /// multiple maps in case it is a universal binary.
43 /// \returns an error in case the provided BinaryPath doesn't exist
44 /// or isn't of a supported type.
45 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse();
46
47 /// Walk the symbol table and dump it.
48 bool dumpStab();
49
50 using OSO = std::pair<llvm::StringRef, uint64_t>;
51
52private:
53 std::string BinaryPath;
54 SmallVector<StringRef, 1> Archs;
55 SmallVector<StringRef, 1> DSYMSearchPaths;
56 std::string PathPrefix;
57 std::string VariantSuffix;
58
59 /// Owns the MemoryBuffer for the main binary.
60 BinaryHolder BinHolder;
61 /// Map of the binary symbol addresses.
62 StringMap<uint64_t> MainBinarySymbolAddresses;
63 StringRef MainBinaryStrings;
64 /// The constructed DebugMap.
65 std::unique_ptr<DebugMap> Result;
66 /// List of common symbols that need to be added to the debug map.
67 std::vector<std::string> CommonSymbols;
68
69 /// Map of the currently processed object file symbol addresses.
70 StringMap<std::optional<uint64_t>> CurrentObjectAddresses;
71
72 /// Lazily computed map of symbols aliased to the processed object file.
73 StringMap<std::optional<uint64_t>> CurrentObjectAliasMap;
74
75 /// If CurrentObjectAliasMap has been computed for a given address.
76 SmallSet<uint64_t, 4> SeenAliasValues;
77
78 /// Element of the debug map corresponding to the current object file.
79 DebugMapObject *CurrentDebugMapObject;
80
81 /// Whether we need to skip the current debug map object.
82 bool SkipDebugMapObject;
83
84 /// Holds function info while function scope processing.
85 const char *CurrentFunctionName;
86 uint64_t CurrentFunctionAddress;
87
88 std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary,
89 StringRef BinaryPath);
90 void handleStabDebugMap(
91 const MachOObjectFile &MainBinary,
92 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F);
93
94 void
95 switchToNewDebugMapObject(StringRef Filename,
96 sys::TimePoint<std::chrono::seconds> Timestamp);
97 void
98 switchToNewLibDebugMapObject(StringRef Filename,
99 sys::TimePoint<std::chrono::seconds> Timestamp);
100 void resetParserState();
101 uint64_t getMainBinarySymbolAddress(StringRef Name);
102 std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value);
103 void loadMainBinarySymbols(const MachOObjectFile &MainBinary);
104 void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj);
105
106 void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type,
107 uint8_t SectionIndex, uint16_t Flags, uint64_t Value,
108 llvm::DenseSet<OSO> &OSOs,
109 llvm::SmallSet<OSO, 4> &Duplicates);
110 void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type,
111 uint8_t SectionIndex, uint16_t Flags,
112 uint64_t Value,
113 const llvm::SmallSet<OSO, 4> &Duplicates);
114
115 template <typename STEType>
116 void handleStabDebugMapEntry(
117 const STEType &STE,
118 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
119 F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value);
120 }
121
122 void addCommonSymbols();
123
124 /// Dump the symbol table output header.
125 void dumpSymTabHeader(raw_ostream &OS, StringRef Arch);
126
127 /// Dump the contents of nlist entries.
128 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex,
129 uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
130 uint64_t Value);
131
132 template <typename STEType>
133 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) {
134 dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc,
135 STE.n_value);
136 }
137 void dumpOneBinaryStab(const MachOObjectFile &MainBinary,
138 StringRef BinaryPath);
139
140 void Warning(const Twine &Msg, StringRef File = StringRef()) {
141 assert(Result &&
142 "The debug map must be initialized before calling this function");
143 WithColor::warning() << "("
144 << MachOUtils::getArchName(
145 Arch: Result->getTriple().getArchName())
146 << ") " << File << " " << Msg << "\n";
147 }
148};
149
150} // anonymous namespace
151
152/// Reset the parser state corresponding to the current object
153/// file. This is to be called after an object file is finished
154/// processing.
155void MachODebugMapParser::resetParserState() {
156 CommonSymbols.clear();
157 CurrentObjectAddresses.clear();
158 CurrentObjectAliasMap.clear();
159 SeenAliasValues.clear();
160 CurrentDebugMapObject = nullptr;
161 SkipDebugMapObject = false;
162}
163
164/// Commons symbols won't show up in the symbol map but might need to be
165/// relocated. We can add them to the symbol table ourselves by combining the
166/// information in the object file (the symbol name) and the main binary (the
167/// address).
168void MachODebugMapParser::addCommonSymbols() {
169 for (auto &CommonSymbol : CommonSymbols) {
170 uint64_t CommonAddr = getMainBinarySymbolAddress(Name: CommonSymbol);
171 if (CommonAddr == 0) {
172 // The main binary doesn't have an address for the given symbol.
173 continue;
174 }
175 if (!CurrentDebugMapObject->addSymbol(SymName: CommonSymbol,
176 ObjectAddress: std::nullopt /*ObjectAddress*/,
177 LinkedAddress: CommonAddr, Size: 0 /*size*/)) {
178 // The symbol is already present.
179 continue;
180 }
181 }
182}
183
184/// Create a new DebugMapObject. This function resets the state of the
185/// parser that was referring to the last object file and sets
186/// everything up to add symbols to the new one.
187void MachODebugMapParser::switchToNewDebugMapObject(
188 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) {
189 addCommonSymbols();
190 resetParserState();
191
192 SmallString<80> Path(PathPrefix);
193 sys::path::append(path&: Path, a: Filename);
194
195 auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp);
196 if (!ObjectEntry) {
197 auto Err = ObjectEntry.takeError();
198 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
199 File: Path.str());
200 return;
201 }
202
203 auto Object = ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple());
204 if (!Object) {
205 auto Err = Object.takeError();
206 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
207 File: Path.str());
208 return;
209 }
210
211 CurrentDebugMapObject =
212 &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_OSO);
213
214 loadCurrentObjectFileSymbols(Obj: *Object);
215}
216
217/// Create a new DebugMapObject of type MachO::N_LIB.
218/// This function resets the state of the parser that was
219/// referring to the last object file and sets everything
220/// up to add symbols to the new one.
221void MachODebugMapParser::switchToNewLibDebugMapObject(
222 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) {
223
224 if (DSYMSearchPaths.empty()) {
225 Warning(Msg: "no dSYM search path was specified");
226 return;
227 }
228
229 StringRef LeafName = sys::path::filename(path: Filename);
230 SmallString<128> VariantLeafName;
231 SmallString<128> ProductName(LeafName);
232
233 // For Framework.framework/Framework and -build-variant-suffix=_debug,
234 // look in the following order:
235 // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug
236 // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework
237 //
238 // For libName.dylib and -build-variant-suffix=_debug,
239 // look in the following order:
240 // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib
241 // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib
242
243 size_t libExt = LeafName.rfind(Str: ".dylib");
244 if (libExt != StringRef::npos) {
245 if (!VariantSuffix.empty()) {
246 VariantLeafName.append(RHS: LeafName.substr(Start: 0, N: libExt));
247 VariantLeafName.append(RHS: VariantSuffix);
248 VariantLeafName.append(RHS: ".dylib");
249 }
250 } else {
251 // Expected to be a framework
252 ProductName.append(RHS: ".framework");
253 if (!VariantSuffix.empty()) {
254 VariantLeafName.append(RHS: LeafName);
255 VariantLeafName.append(RHS: VariantSuffix);
256 }
257 }
258
259 for (auto DSYMSearchPath : DSYMSearchPaths) {
260 SmallString<256> Path(DSYMSearchPath);
261 SmallString<256> FallbackPath(Path);
262
263 SmallString<256> DSYMPath(ProductName);
264 DSYMPath.append(RHS: ".dSYM");
265 sys::path::append(path&: DSYMPath, a: "Contents", b: "Resources", c: "DWARF");
266
267 if (!VariantSuffix.empty()) {
268 sys::path::append(path&: Path, a: DSYMPath, b: VariantLeafName);
269 sys::path::append(path&: FallbackPath, a: DSYMPath, b: LeafName);
270 } else {
271 sys::path::append(path&: Path, a: DSYMPath, b: LeafName);
272 }
273
274 auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp);
275 if (!ObjectEntry) {
276 auto Err = ObjectEntry.takeError();
277 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
278 File: Path.str());
279 if (!VariantSuffix.empty()) {
280 ObjectEntry = BinHolder.getObjectEntry(Filename: FallbackPath, Timestamp);
281 if (!ObjectEntry) {
282 auto Err = ObjectEntry.takeError();
283 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
284 File: FallbackPath.str());
285 continue;
286 }
287 Path.assign(RHS: FallbackPath);
288 } else {
289 continue;
290 }
291 }
292
293 auto Object =
294 ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple());
295 if (!Object) {
296 auto Err = Object.takeError();
297 Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)),
298 File: Path.str());
299 continue;
300 }
301
302 if (CurrentDebugMapObject &&
303 CurrentDebugMapObject->getType() == MachO::N_LIB &&
304 CurrentDebugMapObject->getObjectFilename() == Path) {
305 return;
306 }
307
308 addCommonSymbols();
309 resetParserState();
310
311 CurrentDebugMapObject =
312 &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_LIB);
313
314 CurrentDebugMapObject->setInstallName(Filename);
315
316 SmallString<256> RMPath(DSYMSearchPath);
317 sys::path::append(path&: RMPath, a: ProductName);
318 RMPath.append(RHS: ".dSYM");
319 StringRef ArchName = Triple::getArchName(Kind: Result->getTriple().getArch(),
320 SubArch: Result->getTriple().getSubArch());
321 sys::path::append(path&: RMPath, a: "Contents", b: "Resources", c: "Relocations", d: ArchName);
322 sys::path::append(path&: RMPath, a: LeafName);
323 RMPath.append(RHS: ".yml");
324 const auto &RelocMapPtrOrErr =
325 RelocationMap::parseYAMLRelocationMap(InputFile: RMPath, PrependPath: PathPrefix);
326 if (auto EC = RelocMapPtrOrErr.getError()) {
327 Warning(Msg: "cannot parse relocation map file: " + EC.message(),
328 File: RMPath.str());
329 return;
330 }
331 CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get());
332
333 loadCurrentObjectFileSymbols(Obj: *Object);
334
335 // Found and loaded new dSYM file
336 return;
337 }
338}
339
340static std::string getArchName(const object::MachOObjectFile &Obj) {
341 Triple T = Obj.getArchTriple();
342 return std::string(T.getArchName());
343}
344
345void MachODebugMapParser::handleStabDebugMap(
346 const MachOObjectFile &MainBinary,
347 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
348 for (const SymbolRef &Symbol : MainBinary.symbols()) {
349 const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
350 if (MainBinary.is64Bit())
351 handleStabDebugMapEntry(STE: MainBinary.getSymbol64TableEntry(DRI), F);
352 else
353 handleStabDebugMapEntry(STE: MainBinary.getSymbolTableEntry(DRI), F);
354 }
355}
356
357std::unique_ptr<DebugMap>
358MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
359 StringRef BinaryPath) {
360 Result = std::make_unique<DebugMap>(args: MainBinary.getArchTriple(), args&: BinaryPath,
361 args: MainBinary.getUuid());
362 loadMainBinarySymbols(MainBinary);
363 MainBinaryStrings = MainBinary.getStringTableData();
364
365 // Static archives can contain multiple object files with identical names, in
366 // which case the timestamp is used to disambiguate. However, if both are
367 // identical, there's no way to tell them apart. Detect this and skip
368 // duplicate debug map objects.
369 llvm::DenseSet<OSO> OSOs;
370 llvm::SmallSet<OSO, 4> Duplicates;
371
372 // Iterate over all the STABS to find duplicate OSO entries.
373 handleStabDebugMap(MainBinary,
374 F: [&](uint32_t StringIndex, uint8_t Type,
375 uint8_t SectionIndex, uint16_t Flags, uint64_t Value) {
376 handleStabOSOEntry(StringIndex, Type, SectionIndex,
377 Flags, Value, OSOs, Duplicates);
378 });
379
380 // Print an informative warning with the duplicate object file name and time
381 // stamp.
382 for (const auto &OSO : Duplicates) {
383 std::string Buffer;
384 llvm::raw_string_ostream OS(Buffer);
385 OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(T: OSO.second));
386 Warning(Msg: "skipping debug map object with duplicate name and timestamp: " +
387 OS.str() + Twine(" ") + Twine(OSO.first));
388 }
389
390 // Build the debug map by iterating over the STABS again but ignore the
391 // duplicate debug objects.
392 handleStabDebugMap(MainBinary, F: [&](uint32_t StringIndex, uint8_t Type,
393 uint8_t SectionIndex, uint16_t Flags,
394 uint64_t Value) {
395 handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value,
396 Duplicates);
397 });
398
399 resetParserState();
400 return std::move(Result);
401}
402
403// Table that maps Darwin's Mach-O stab constants to strings to allow printing.
404// llvm-nm has very similar code, the strings used here are however slightly
405// different and part of the interface of dsymutil (some project's build-systems
406// parse the ouptut of dsymutil -s), thus they shouldn't be changed.
407struct DarwinStabName {
408 uint8_t NType;
409 const char *Name;
410};
411
412const struct DarwinStabName DarwinStabNames[] = {{.NType: MachO::N_GSYM, .Name: "N_GSYM"},
413 {.NType: MachO::N_FNAME, .Name: "N_FNAME"},
414 {.NType: MachO::N_FUN, .Name: "N_FUN"},
415 {.NType: MachO::N_STSYM, .Name: "N_STSYM"},
416 {.NType: MachO::N_LCSYM, .Name: "N_LCSYM"},
417 {.NType: MachO::N_BNSYM, .Name: "N_BNSYM"},
418 {.NType: MachO::N_PC, .Name: "N_PC"},
419 {.NType: MachO::N_AST, .Name: "N_AST"},
420 {.NType: MachO::N_OPT, .Name: "N_OPT"},
421 {.NType: MachO::N_RSYM, .Name: "N_RSYM"},
422 {.NType: MachO::N_SLINE, .Name: "N_SLINE"},
423 {.NType: MachO::N_ENSYM, .Name: "N_ENSYM"},
424 {.NType: MachO::N_SSYM, .Name: "N_SSYM"},
425 {.NType: MachO::N_SO, .Name: "N_SO"},
426 {.NType: MachO::N_OSO, .Name: "N_OSO"},
427 {.NType: MachO::N_LIB, .Name: "N_LIB"},
428 {.NType: MachO::N_LSYM, .Name: "N_LSYM"},
429 {.NType: MachO::N_BINCL, .Name: "N_BINCL"},
430 {.NType: MachO::N_SOL, .Name: "N_SOL"},
431 {.NType: MachO::N_PARAMS, .Name: "N_PARAM"},
432 {.NType: MachO::N_VERSION, .Name: "N_VERS"},
433 {.NType: MachO::N_OLEVEL, .Name: "N_OLEV"},
434 {.NType: MachO::N_PSYM, .Name: "N_PSYM"},
435 {.NType: MachO::N_EINCL, .Name: "N_EINCL"},
436 {.NType: MachO::N_ENTRY, .Name: "N_ENTRY"},
437 {.NType: MachO::N_LBRAC, .Name: "N_LBRAC"},
438 {.NType: MachO::N_EXCL, .Name: "N_EXCL"},
439 {.NType: MachO::N_RBRAC, .Name: "N_RBRAC"},
440 {.NType: MachO::N_BCOMM, .Name: "N_BCOMM"},
441 {.NType: MachO::N_ECOMM, .Name: "N_ECOMM"},
442 {.NType: MachO::N_ECOML, .Name: "N_ECOML"},
443 {.NType: MachO::N_LENG, .Name: "N_LENG"},
444 {.NType: 0, .Name: nullptr}};
445
446static const char *getDarwinStabString(uint8_t NType) {
447 for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
448 if (DarwinStabNames[i].NType == NType)
449 return DarwinStabNames[i].Name;
450 }
451 return nullptr;
452}
453
454void MachODebugMapParser::dumpSymTabHeader(raw_ostream &OS, StringRef Arch) {
455 OS << "-----------------------------------"
456 "-----------------------------------\n";
457 OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n";
458 OS << "-----------------------------------"
459 "-----------------------------------\n";
460 OS << "Index n_strx n_type n_sect n_desc n_value\n";
461 OS << "======== -------- ------------------ ------ ------ ----------------\n";
462}
463
464void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index,
465 uint32_t StringIndex, uint8_t Type,
466 uint8_t SectionIndex, uint16_t Flags,
467 uint64_t Value) {
468 // Index
469 OS << '[' << format_decimal(N: Index, Width: 6)
470 << "] "
471 // n_strx
472 << format_hex_no_prefix(N: StringIndex, Width: 8)
473 << ' '
474 // n_type...
475 << format_hex_no_prefix(N: Type, Width: 2) << " (";
476
477 if (Type & MachO::N_STAB)
478 OS << left_justify(Str: getDarwinStabString(NType: Type), Width: 13);
479 else {
480 if (Type & MachO::N_PEXT)
481 OS << "PEXT ";
482 else
483 OS << " ";
484 switch (Type & MachO::N_TYPE) {
485 case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT
486 OS << "UNDF";
487 break;
488 case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT
489 OS << "ABS ";
490 break;
491 case MachO::N_SECT: // 0xe defined in section number n_sect
492 OS << "SECT";
493 break;
494 case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib)
495 OS << "PBUD";
496 break;
497 case MachO::N_INDR: // 0xa indirect
498 OS << "INDR";
499 break;
500 default:
501 OS << format_hex_no_prefix(N: Type, Width: 2) << " ";
502 break;
503 }
504 if (Type & MachO::N_EXT)
505 OS << " EXT";
506 else
507 OS << " ";
508 }
509
510 OS << ") "
511 // n_sect
512 << format_hex_no_prefix(N: SectionIndex, Width: 2)
513 << " "
514 // n_desc
515 << format_hex_no_prefix(N: Flags, Width: 4)
516 << " "
517 // n_value
518 << format_hex_no_prefix(N: Value, Width: 16);
519
520 const char *Name = &MainBinaryStrings.data()[StringIndex];
521 if (Name && Name[0])
522 OS << " '" << Name << "'";
523
524 OS << "\n";
525}
526
527void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary,
528 StringRef BinaryPath) {
529 loadMainBinarySymbols(MainBinary);
530 MainBinaryStrings = MainBinary.getStringTableData();
531 raw_ostream &OS(llvm::outs());
532
533 dumpSymTabHeader(OS, Arch: getArchName(Obj: MainBinary));
534 uint64_t Idx = 0;
535 for (const SymbolRef &Symbol : MainBinary.symbols()) {
536 const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
537 if (MainBinary.is64Bit())
538 dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbol64TableEntry(DRI));
539 else
540 dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbolTableEntry(DRI));
541 Idx++;
542 }
543
544 OS << "\n\n";
545 resetParserState();
546}
547
548static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) {
549 if (Archs.empty() || is_contained(Range&: Archs, Element: "all") || is_contained(Range&: Archs, Element: "*"))
550 return true;
551
552 if (Arch.starts_with(Prefix: "arm") && Arch != "arm64" && is_contained(Range&: Archs, Element: "arm"))
553 return true;
554
555 SmallString<16> ArchName = Arch;
556 if (Arch.starts_with(Prefix: "thumb"))
557 ArchName = ("arm" + Arch.substr(Start: 5)).str();
558
559 return is_contained(Range&: Archs, Element: ArchName);
560}
561
562bool MachODebugMapParser::dumpStab() {
563 auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath);
564 if (!ObjectEntry) {
565 auto Err = ObjectEntry.takeError();
566 WithColor::error() << "cannot load '" << BinaryPath
567 << "': " << toString(E: std::move(Err)) << '\n';
568 return false;
569 }
570
571 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>();
572 if (!Objects) {
573 auto Err = Objects.takeError();
574 WithColor::error() << "cannot get '" << BinaryPath
575 << "' as MachO file: " << toString(E: std::move(Err))
576 << "\n";
577 return false;
578 }
579
580 for (const auto *Object : *Objects)
581 if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName()))
582 dumpOneBinaryStab(MainBinary: *Object, BinaryPath);
583
584 return true;
585}
586
587/// This main parsing routine tries to open the main binary and if
588/// successful iterates over the STAB entries. The real parsing is
589/// done in handleStabSymbolTableEntry.
590ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() {
591 auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath);
592 if (!ObjectEntry) {
593 return errorToErrorCode(Err: ObjectEntry.takeError());
594 }
595
596 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>();
597 if (!Objects) {
598 return errorToErrorCode(Err: Objects.takeError());
599 }
600
601 std::vector<std::unique_ptr<DebugMap>> Results;
602 for (const auto *Object : *Objects)
603 if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName()))
604 Results.push_back(x: parseOneBinary(MainBinary: *Object, BinaryPath));
605
606 return std::move(Results);
607}
608
609void MachODebugMapParser::handleStabOSOEntry(
610 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
611 uint64_t Value, llvm::DenseSet<OSO> &OSOs,
612 llvm::SmallSet<OSO, 4> &Duplicates) {
613 if (Type != MachO::N_OSO)
614 return;
615
616 OSO O(&MainBinaryStrings.data()[StringIndex], Value);
617 if (!OSOs.insert(V: O).second)
618 Duplicates.insert(V: O);
619}
620
621/// Interpret the STAB entries to fill the DebugMap.
622void MachODebugMapParser::handleStabSymbolTableEntry(
623 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
624 uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) {
625 if (!(Type & MachO::N_STAB))
626 return;
627
628 const char *Name = &MainBinaryStrings.data()[StringIndex];
629
630 // An N_LIB entry represents the start of a new library file description.
631 if (Type == MachO::N_LIB) {
632 switchToNewLibDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value));
633 return;
634 }
635
636 // An N_OSO entry represents the start of a new object file description.
637 // If an N_LIB entry was present, this is parsed only if the library
638 // dSYM file could not be found.
639 if (Type == MachO::N_OSO) {
640 if (!CurrentDebugMapObject ||
641 CurrentDebugMapObject->getType() != MachO::N_LIB) {
642 if (Duplicates.count(V: OSO(Name, Value))) {
643 SkipDebugMapObject = true;
644 return;
645 }
646 switchToNewDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value));
647 }
648 return;
649 }
650
651 if (SkipDebugMapObject)
652 return;
653
654 if (Type == MachO::N_AST) {
655 SmallString<80> Path(PathPrefix);
656 sys::path::append(path&: Path, a: Name);
657 Result->addDebugMapObject(ObjectFilePath: Path, Timestamp: sys::toTimePoint(T: Value), Type);
658 return;
659 }
660
661 // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be
662 // null. Do not update anything until we find the next valid N_OSO entry.
663 if (!CurrentDebugMapObject)
664 return;
665
666 uint32_t Size = 0;
667 switch (Type) {
668 case MachO::N_GSYM:
669 // This is a global variable. We need to query the main binary
670 // symbol table to find its address as it might not be in the
671 // debug map (for common symbols).
672 Value = getMainBinarySymbolAddress(Name);
673 break;
674 case MachO::N_FUN:
675 // Functions are scopes in STABS. They have an end marker that
676 // contains the function size.
677 if (Name[0] == '\0') {
678 Size = Value;
679 Value = CurrentFunctionAddress;
680 Name = CurrentFunctionName;
681 break;
682 } else {
683 CurrentFunctionName = Name;
684 CurrentFunctionAddress = Value;
685 return;
686 }
687 case MachO::N_STSYM:
688 break;
689 default:
690 return;
691 }
692
693 auto ObjectSymIt = CurrentObjectAddresses.find(Key: Name);
694
695 // If the name of a (non-static) symbol is not in the current object, we
696 // check all its aliases from the main binary.
697 if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) {
698 if (SeenAliasValues.count(V: Value) == 0) {
699 auto Aliases = getMainBinarySymbolNames(Value);
700 for (const auto &Alias : Aliases) {
701 auto It = CurrentObjectAddresses.find(Key: Alias);
702 if (It != CurrentObjectAddresses.end()) {
703 auto AliasValue = It->getValue();
704 for (const auto &Alias : Aliases)
705 CurrentObjectAliasMap[Alias] = AliasValue;
706 break;
707 }
708 }
709 SeenAliasValues.insert(V: Value);
710 }
711
712 auto AliasIt = CurrentObjectAliasMap.find(Key: Name);
713 if (AliasIt != CurrentObjectAliasMap.end())
714 ObjectSymIt = AliasIt;
715 }
716
717 // ThinLTO adds a unique suffix to exported private symbols.
718 if (ObjectSymIt == CurrentObjectAddresses.end()) {
719 for (auto Iter = CurrentObjectAddresses.begin();
720 Iter != CurrentObjectAddresses.end(); ++Iter) {
721 llvm::StringRef SymbolName = Iter->getKey();
722 auto Pos = SymbolName.rfind(Str: ".llvm.");
723 if (Pos != llvm::StringRef::npos && SymbolName.substr(Start: 0, N: Pos) == Name) {
724 ObjectSymIt = Iter;
725 break;
726 }
727 }
728 }
729
730 if (ObjectSymIt == CurrentObjectAddresses.end()) {
731 Warning(Msg: "could not find symbol '" + Twine(Name) + "' in object file '" +
732 CurrentDebugMapObject->getObjectFilename() + "'");
733 return;
734 }
735
736 if (!CurrentDebugMapObject->addSymbol(SymName: Name, ObjectAddress: ObjectSymIt->getValue(), LinkedAddress: Value,
737 Size)) {
738 Warning(Msg: Twine("failed to insert symbol '") + Name + "' in the debug map.");
739 return;
740 }
741}
742
743/// Load the current object file symbols into CurrentObjectAddresses.
744void MachODebugMapParser::loadCurrentObjectFileSymbols(
745 const object::MachOObjectFile &Obj) {
746 CurrentObjectAddresses.clear();
747
748 for (auto Sym : Obj.symbols()) {
749 uint64_t Addr = cantFail(ValOrErr: Sym.getValue());
750 Expected<StringRef> Name = Sym.getName();
751 if (!Name) {
752 auto Err = Name.takeError();
753 Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)),
754 File: Obj.getFileName());
755 continue;
756 }
757 // The value of some categories of symbols isn't meaningful. For
758 // example common symbols store their size in the value field, not
759 // their address. Absolute symbols have a fixed address that can
760 // conflict with standard symbols. These symbols (especially the
761 // common ones), might still be referenced by relocations. These
762 // relocations will use the symbol itself, and won't need an
763 // object file address. The object file address field is optional
764 // in the DebugMap, leave it unassigned for these symbols.
765 uint32_t Flags = cantFail(ValOrErr: Sym.getFlags());
766 if (Flags & SymbolRef::SF_Absolute) {
767 CurrentObjectAddresses[*Name] = std::nullopt;
768 } else if (Flags & SymbolRef::SF_Common) {
769 CurrentObjectAddresses[*Name] = std::nullopt;
770 CommonSymbols.push_back(x: std::string(*Name));
771 } else {
772 CurrentObjectAddresses[*Name] = Addr;
773 }
774 }
775}
776
777/// Lookup a symbol address in the main binary symbol table. The
778/// parser only needs to query common symbols, thus not every symbol's
779/// address is available through this function.
780uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) {
781 auto Sym = MainBinarySymbolAddresses.find(Key: Name);
782 if (Sym == MainBinarySymbolAddresses.end())
783 return 0;
784 return Sym->second;
785}
786
787/// Get all symbol names in the main binary for the given value.
788std::vector<StringRef>
789MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) {
790 std::vector<StringRef> Names;
791 for (const auto &Entry : MainBinarySymbolAddresses) {
792 if (Entry.second == Value)
793 Names.push_back(x: Entry.first());
794 }
795 return Names;
796}
797
798/// Load the interesting main binary symbols' addresses into
799/// MainBinarySymbolAddresses.
800void MachODebugMapParser::loadMainBinarySymbols(
801 const MachOObjectFile &MainBinary) {
802 section_iterator Section = MainBinary.section_end();
803 MainBinarySymbolAddresses.clear();
804 for (const auto &Sym : MainBinary.symbols()) {
805 Expected<SymbolRef::Type> TypeOrErr = Sym.getType();
806 if (!TypeOrErr) {
807 auto Err = TypeOrErr.takeError();
808 Warning(Msg: "failed to get symbol type: " + toString(E: std::move(Err)),
809 File: MainBinary.getFileName());
810 continue;
811 }
812 SymbolRef::Type Type = *TypeOrErr;
813 // Skip undefined and STAB entries.
814 if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown))
815 continue;
816 // In theory, the only symbols of interest are the global variables. These
817 // are the only ones that need to be queried because the address of common
818 // data won't be described in the debug map. All other addresses should be
819 // fetched for the debug map. In reality, by playing with 'ld -r' and
820 // export lists, you can get symbols described as N_GSYM in the debug map,
821 // but associated with a local symbol. Gather all the symbols, but prefer
822 // the global ones.
823 uint8_t SymType =
824 MainBinary.getSymbolTableEntry(DRI: Sym.getRawDataRefImpl()).n_type;
825 bool Extern = SymType & (MachO::N_EXT | MachO::N_PEXT);
826 Expected<section_iterator> SectionOrErr = Sym.getSection();
827 if (!SectionOrErr) {
828 auto Err = TypeOrErr.takeError();
829 Warning(Msg: "failed to get symbol section: " + toString(E: std::move(Err)),
830 File: MainBinary.getFileName());
831 continue;
832 }
833 Section = *SectionOrErr;
834 if ((Section == MainBinary.section_end() || Section->isText()) && !Extern)
835 continue;
836 uint64_t Addr = cantFail(ValOrErr: Sym.getValue());
837 Expected<StringRef> NameOrErr = Sym.getName();
838 if (!NameOrErr) {
839 auto Err = NameOrErr.takeError();
840 Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)),
841 File: MainBinary.getFileName());
842 continue;
843 }
844 StringRef Name = *NameOrErr;
845 if (Name.size() == 0 || Name[0] == '\0')
846 continue;
847 // Override only if the new key is global.
848 if (Extern)
849 MainBinarySymbolAddresses[Name] = Addr;
850 else
851 MainBinarySymbolAddresses.try_emplace(Key: Name, Args&: Addr);
852 }
853}
854
855namespace llvm {
856namespace dsymutil {
857llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
858parseDebugMap(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
859 StringRef InputFile, ArrayRef<std::string> Archs,
860 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath,
861 StringRef VariantSuffix, bool Verbose, bool InputIsYAML) {
862 if (InputIsYAML)
863 return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose);
864
865 MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths,
866 PrependPath, VariantSuffix, Verbose);
867
868 return Parser.parse();
869}
870
871bool dumpStab(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
872 StringRef InputFile, ArrayRef<std::string> Archs,
873 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath,
874 StringRef VariantSuffix) {
875 MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths,
876 PrependPath, VariantSuffix, false);
877 return Parser.dumpStab();
878}
879} // namespace dsymutil
880} // namespace llvm
881