1 | //===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "BinaryHolder.h" |
10 | #include "DebugMap.h" |
11 | #include "MachOUtils.h" |
12 | #include "RelocationMap.h" |
13 | #include "llvm/ADT/DenseSet.h" |
14 | #include "llvm/ADT/SmallSet.h" |
15 | #include "llvm/Object/MachO.h" |
16 | #include "llvm/Support/Chrono.h" |
17 | #include "llvm/Support/Path.h" |
18 | #include "llvm/Support/WithColor.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | #include <optional> |
21 | #include <vector> |
22 | |
23 | namespace { |
24 | using namespace llvm; |
25 | using namespace llvm::dsymutil; |
26 | using namespace llvm::object; |
27 | |
28 | class MachODebugMapParser { |
29 | public: |
30 | MachODebugMapParser(BinaryHolder &BinHolder, StringRef BinaryPath, |
31 | ArrayRef<std::string> Archs, |
32 | ArrayRef<std::string> DSYMSearchPaths, |
33 | StringRef PathPrefix = "" , StringRef VariantSuffix = "" , |
34 | bool Verbose = false) |
35 | : BinaryPath(std::string(BinaryPath)), Archs(Archs), |
36 | DSYMSearchPaths(DSYMSearchPaths), PathPrefix(std::string(PathPrefix)), |
37 | VariantSuffix(std::string(VariantSuffix)), BinHolder(BinHolder), |
38 | CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {} |
39 | |
40 | /// Parses and returns the DebugMaps of the input binary. The binary contains |
41 | /// multiple maps in case it is a universal binary. |
42 | /// \returns an error in case the provided BinaryPath doesn't exist |
43 | /// or isn't of a supported type. |
44 | ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse(); |
45 | |
46 | /// Walk the symbol table and dump it. |
47 | bool dumpStab(); |
48 | |
49 | using OSO = std::pair<llvm::StringRef, uint64_t>; |
50 | |
51 | private: |
52 | std::string BinaryPath; |
53 | SmallVector<StringRef, 1> Archs; |
54 | SmallVector<StringRef, 1> DSYMSearchPaths; |
55 | std::string PathPrefix; |
56 | std::string VariantSuffix; |
57 | |
58 | /// Owns the MemoryBuffer for the main binary. |
59 | BinaryHolder &BinHolder; |
60 | /// Map of the binary symbol addresses. |
61 | StringMap<uint64_t> MainBinarySymbolAddresses; |
62 | StringRef MainBinaryStrings; |
63 | /// The constructed DebugMap. |
64 | std::unique_ptr<DebugMap> Result; |
65 | /// List of common symbols that need to be added to the debug map. |
66 | std::vector<std::string> CommonSymbols; |
67 | |
68 | /// Map of the currently processed object file symbol addresses. |
69 | StringMap<std::optional<uint64_t>> CurrentObjectAddresses; |
70 | |
71 | /// Lazily computed map of symbols aliased to the processed object file. |
72 | StringMap<std::optional<uint64_t>> CurrentObjectAliasMap; |
73 | |
74 | /// If CurrentObjectAliasMap has been computed for a given address. |
75 | SmallSet<uint64_t, 4> SeenAliasValues; |
76 | |
77 | /// Element of the debug map corresponding to the current object file. |
78 | DebugMapObject *CurrentDebugMapObject; |
79 | |
80 | /// Whether we need to skip the current debug map object. |
81 | bool SkipDebugMapObject; |
82 | |
83 | /// Holds function info while function scope processing. |
84 | const char *CurrentFunctionName; |
85 | uint64_t CurrentFunctionAddress; |
86 | |
87 | std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary, |
88 | StringRef BinaryPath); |
89 | void handleStabDebugMap( |
90 | const MachOObjectFile &MainBinary, |
91 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F); |
92 | |
93 | void |
94 | switchToNewDebugMapObject(StringRef Filename, |
95 | sys::TimePoint<std::chrono::seconds> Timestamp); |
96 | void |
97 | switchToNewLibDebugMapObject(StringRef Filename, |
98 | sys::TimePoint<std::chrono::seconds> Timestamp); |
99 | void resetParserState(); |
100 | uint64_t getMainBinarySymbolAddress(StringRef Name); |
101 | std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value); |
102 | void loadMainBinarySymbols(const MachOObjectFile &MainBinary); |
103 | void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj); |
104 | |
105 | void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type, |
106 | uint8_t SectionIndex, uint16_t Flags, uint64_t Value, |
107 | llvm::DenseSet<OSO> &OSOs, |
108 | llvm::SmallSet<OSO, 4> &Duplicates); |
109 | void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type, |
110 | uint8_t SectionIndex, uint16_t Flags, |
111 | uint64_t Value, |
112 | const llvm::SmallSet<OSO, 4> &Duplicates); |
113 | |
114 | template <typename STEType> |
115 | void handleStabDebugMapEntry( |
116 | const STEType &STE, |
117 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { |
118 | F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value); |
119 | } |
120 | |
121 | void addCommonSymbols(); |
122 | |
123 | /// Dump the symbol table output header. |
124 | void dumpSymTabHeader(raw_ostream &OS, StringRef Arch); |
125 | |
126 | /// Dump the contents of nlist entries. |
127 | void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex, |
128 | uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
129 | uint64_t Value); |
130 | |
131 | template <typename STEType> |
132 | void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) { |
133 | dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, |
134 | STE.n_value); |
135 | } |
136 | void dumpOneBinaryStab(const MachOObjectFile &MainBinary, |
137 | StringRef BinaryPath); |
138 | |
139 | void Warning(const Twine &Msg, StringRef File = StringRef()) { |
140 | assert(Result && |
141 | "The debug map must be initialized before calling this function" ); |
142 | WithColor::warning() << "(" |
143 | << MachOUtils::getArchName( |
144 | Arch: Result->getTriple().getArchName()) |
145 | << ") " << File << " " << Msg << "\n" ; |
146 | } |
147 | }; |
148 | |
149 | } // anonymous namespace |
150 | |
151 | /// Reset the parser state corresponding to the current object |
152 | /// file. This is to be called after an object file is finished |
153 | /// processing. |
154 | void MachODebugMapParser::resetParserState() { |
155 | CommonSymbols.clear(); |
156 | CurrentObjectAddresses.clear(); |
157 | CurrentObjectAliasMap.clear(); |
158 | SeenAliasValues.clear(); |
159 | CurrentDebugMapObject = nullptr; |
160 | SkipDebugMapObject = false; |
161 | } |
162 | |
163 | /// Commons symbols won't show up in the symbol map but might need to be |
164 | /// relocated. We can add them to the symbol table ourselves by combining the |
165 | /// information in the object file (the symbol name) and the main binary (the |
166 | /// address). |
167 | void MachODebugMapParser::addCommonSymbols() { |
168 | for (auto &CommonSymbol : CommonSymbols) { |
169 | uint64_t CommonAddr = getMainBinarySymbolAddress(Name: CommonSymbol); |
170 | if (CommonAddr == 0) { |
171 | // The main binary doesn't have an address for the given symbol. |
172 | continue; |
173 | } |
174 | if (!CurrentDebugMapObject->addSymbol(SymName: CommonSymbol, |
175 | ObjectAddress: std::nullopt /*ObjectAddress*/, |
176 | LinkedAddress: CommonAddr, Size: 0 /*size*/)) { |
177 | // The symbol is already present. |
178 | continue; |
179 | } |
180 | } |
181 | } |
182 | |
183 | /// Create a new DebugMapObject. This function resets the state of the |
184 | /// parser that was referring to the last object file and sets |
185 | /// everything up to add symbols to the new one. |
186 | void MachODebugMapParser::switchToNewDebugMapObject( |
187 | StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { |
188 | addCommonSymbols(); |
189 | resetParserState(); |
190 | |
191 | SmallString<80> Path(PathPrefix); |
192 | sys::path::append(path&: Path, a: Filename); |
193 | |
194 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp); |
195 | if (!ObjectEntry) { |
196 | auto Err = ObjectEntry.takeError(); |
197 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
198 | File: Path.str()); |
199 | return; |
200 | } |
201 | |
202 | auto Object = ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple()); |
203 | if (!Object) { |
204 | auto Err = Object.takeError(); |
205 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
206 | File: Path.str()); |
207 | return; |
208 | } |
209 | |
210 | CurrentDebugMapObject = |
211 | &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_OSO); |
212 | |
213 | loadCurrentObjectFileSymbols(Obj: *Object); |
214 | } |
215 | |
216 | /// Create a new DebugMapObject of type MachO::N_LIB. |
217 | /// This function resets the state of the parser that was |
218 | /// referring to the last object file and sets everything |
219 | /// up to add symbols to the new one. |
220 | void MachODebugMapParser::switchToNewLibDebugMapObject( |
221 | StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { |
222 | |
223 | if (DSYMSearchPaths.empty()) { |
224 | Warning(Msg: "no dSYM search path was specified" ); |
225 | return; |
226 | } |
227 | |
228 | StringRef LeafName = sys::path::filename(path: Filename); |
229 | SmallString<128> VariantLeafName; |
230 | SmallString<128> ProductName(LeafName); |
231 | |
232 | // For Framework.framework/Framework and -build-variant-suffix=_debug, |
233 | // look in the following order: |
234 | // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug |
235 | // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework |
236 | // |
237 | // For libName.dylib and -build-variant-suffix=_debug, |
238 | // look in the following order: |
239 | // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib |
240 | // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib |
241 | |
242 | size_t libExt = LeafName.rfind(Str: ".dylib" ); |
243 | if (libExt != StringRef::npos) { |
244 | if (!VariantSuffix.empty()) { |
245 | VariantLeafName.append(RHS: LeafName.substr(Start: 0, N: libExt)); |
246 | VariantLeafName.append(RHS: VariantSuffix); |
247 | VariantLeafName.append(RHS: ".dylib" ); |
248 | } |
249 | } else { |
250 | // Expected to be a framework |
251 | ProductName.append(RHS: ".framework" ); |
252 | if (!VariantSuffix.empty()) { |
253 | VariantLeafName.append(RHS: LeafName); |
254 | VariantLeafName.append(RHS: VariantSuffix); |
255 | } |
256 | } |
257 | |
258 | for (auto DSYMSearchPath : DSYMSearchPaths) { |
259 | SmallString<256> Path(DSYMSearchPath); |
260 | SmallString<256> FallbackPath(Path); |
261 | |
262 | SmallString<256> DSYMPath(ProductName); |
263 | DSYMPath.append(RHS: ".dSYM" ); |
264 | sys::path::append(path&: DSYMPath, a: "Contents" , b: "Resources" , c: "DWARF" ); |
265 | |
266 | if (!VariantSuffix.empty()) { |
267 | sys::path::append(path&: Path, a: DSYMPath, b: VariantLeafName); |
268 | sys::path::append(path&: FallbackPath, a: DSYMPath, b: LeafName); |
269 | } else { |
270 | sys::path::append(path&: Path, a: DSYMPath, b: LeafName); |
271 | } |
272 | |
273 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp); |
274 | if (!ObjectEntry) { |
275 | auto Err = ObjectEntry.takeError(); |
276 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
277 | File: Path.str()); |
278 | if (!VariantSuffix.empty()) { |
279 | ObjectEntry = BinHolder.getObjectEntry(Filename: FallbackPath, Timestamp); |
280 | if (!ObjectEntry) { |
281 | auto Err = ObjectEntry.takeError(); |
282 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
283 | File: FallbackPath.str()); |
284 | continue; |
285 | } |
286 | Path.assign(RHS: FallbackPath); |
287 | } else { |
288 | continue; |
289 | } |
290 | } |
291 | |
292 | auto Object = |
293 | ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple()); |
294 | if (!Object) { |
295 | auto Err = Object.takeError(); |
296 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
297 | File: Path.str()); |
298 | continue; |
299 | } |
300 | |
301 | if (CurrentDebugMapObject && |
302 | CurrentDebugMapObject->getType() == MachO::N_LIB && |
303 | CurrentDebugMapObject->getObjectFilename() == Path) { |
304 | return; |
305 | } |
306 | |
307 | addCommonSymbols(); |
308 | resetParserState(); |
309 | |
310 | CurrentDebugMapObject = |
311 | &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_LIB); |
312 | |
313 | CurrentDebugMapObject->setInstallName(Filename); |
314 | |
315 | SmallString<256> RMPath(DSYMSearchPath); |
316 | sys::path::append(path&: RMPath, a: ProductName); |
317 | RMPath.append(RHS: ".dSYM" ); |
318 | StringRef ArchName = Triple::getArchName(Kind: Result->getTriple().getArch(), |
319 | SubArch: Result->getTriple().getSubArch()); |
320 | sys::path::append(path&: RMPath, a: "Contents" , b: "Resources" , c: "Relocations" , d: ArchName); |
321 | sys::path::append(path&: RMPath, a: LeafName); |
322 | RMPath.append(RHS: ".yml" ); |
323 | const auto &RelocMapPtrOrErr = |
324 | RelocationMap::parseYAMLRelocationMap(InputFile: RMPath, PrependPath: PathPrefix); |
325 | if (auto EC = RelocMapPtrOrErr.getError()) { |
326 | Warning(Msg: "cannot parse relocation map file: " + EC.message(), |
327 | File: RMPath.str()); |
328 | return; |
329 | } |
330 | CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get()); |
331 | |
332 | loadCurrentObjectFileSymbols(Obj: *Object); |
333 | |
334 | // Found and loaded new dSYM file |
335 | return; |
336 | } |
337 | } |
338 | |
339 | static std::string getArchName(const object::MachOObjectFile &Obj) { |
340 | Triple T = Obj.getArchTriple(); |
341 | return std::string(T.getArchName()); |
342 | } |
343 | |
344 | void MachODebugMapParser::handleStabDebugMap( |
345 | const MachOObjectFile &MainBinary, |
346 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { |
347 | for (const SymbolRef &Symbol : MainBinary.symbols()) { |
348 | const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); |
349 | if (MainBinary.is64Bit()) |
350 | handleStabDebugMapEntry(STE: MainBinary.getSymbol64TableEntry(DRI), F); |
351 | else |
352 | handleStabDebugMapEntry(STE: MainBinary.getSymbolTableEntry(DRI), F); |
353 | } |
354 | } |
355 | |
356 | std::unique_ptr<DebugMap> |
357 | MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, |
358 | StringRef BinaryPath) { |
359 | Result = std::make_unique<DebugMap>(args: MainBinary.getArchTriple(), args&: BinaryPath, |
360 | args: MainBinary.getUuid()); |
361 | loadMainBinarySymbols(MainBinary); |
362 | MainBinaryStrings = MainBinary.getStringTableData(); |
363 | |
364 | // Static archives can contain multiple object files with identical names, in |
365 | // which case the timestamp is used to disambiguate. However, if both are |
366 | // identical, there's no way to tell them apart. Detect this and skip |
367 | // duplicate debug map objects. |
368 | llvm::DenseSet<OSO> OSOs; |
369 | llvm::SmallSet<OSO, 4> Duplicates; |
370 | |
371 | // Iterate over all the STABS to find duplicate OSO entries. |
372 | handleStabDebugMap(MainBinary, |
373 | F: [&](uint32_t StringIndex, uint8_t Type, |
374 | uint8_t SectionIndex, uint16_t Flags, uint64_t Value) { |
375 | handleStabOSOEntry(StringIndex, Type, SectionIndex, |
376 | Flags, Value, OSOs, Duplicates); |
377 | }); |
378 | |
379 | // Print an informative warning with the duplicate object file name and time |
380 | // stamp. |
381 | for (const auto &OSO : Duplicates) { |
382 | std::string Buffer; |
383 | llvm::raw_string_ostream OS(Buffer); |
384 | OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(T: OSO.second)); |
385 | Warning(Msg: "skipping debug map object with duplicate name and timestamp: " + |
386 | Buffer + Twine(" " ) + Twine(OSO.first)); |
387 | } |
388 | |
389 | // Build the debug map by iterating over the STABS again but ignore the |
390 | // duplicate debug objects. |
391 | handleStabDebugMap(MainBinary, F: [&](uint32_t StringIndex, uint8_t Type, |
392 | uint8_t SectionIndex, uint16_t Flags, |
393 | uint64_t Value) { |
394 | handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value, |
395 | Duplicates); |
396 | }); |
397 | |
398 | resetParserState(); |
399 | return std::move(Result); |
400 | } |
401 | |
402 | // Table that maps Darwin's Mach-O stab constants to strings to allow printing. |
403 | // llvm-nm has very similar code, the strings used here are however slightly |
404 | // different and part of the interface of dsymutil (some project's build-systems |
405 | // parse the ouptut of dsymutil -s), thus they shouldn't be changed. |
406 | struct DarwinStabName { |
407 | uint8_t NType; |
408 | const char *Name; |
409 | }; |
410 | |
411 | const struct DarwinStabName DarwinStabNames[] = {{.NType: MachO::N_GSYM, .Name: "N_GSYM" }, |
412 | {.NType: MachO::N_FNAME, .Name: "N_FNAME" }, |
413 | {.NType: MachO::N_FUN, .Name: "N_FUN" }, |
414 | {.NType: MachO::N_STSYM, .Name: "N_STSYM" }, |
415 | {.NType: MachO::N_LCSYM, .Name: "N_LCSYM" }, |
416 | {.NType: MachO::N_BNSYM, .Name: "N_BNSYM" }, |
417 | {.NType: MachO::N_PC, .Name: "N_PC" }, |
418 | {.NType: MachO::N_AST, .Name: "N_AST" }, |
419 | {.NType: MachO::N_OPT, .Name: "N_OPT" }, |
420 | {.NType: MachO::N_RSYM, .Name: "N_RSYM" }, |
421 | {.NType: MachO::N_SLINE, .Name: "N_SLINE" }, |
422 | {.NType: MachO::N_ENSYM, .Name: "N_ENSYM" }, |
423 | {.NType: MachO::N_SSYM, .Name: "N_SSYM" }, |
424 | {.NType: MachO::N_SO, .Name: "N_SO" }, |
425 | {.NType: MachO::N_OSO, .Name: "N_OSO" }, |
426 | {.NType: MachO::N_LIB, .Name: "N_LIB" }, |
427 | {.NType: MachO::N_LSYM, .Name: "N_LSYM" }, |
428 | {.NType: MachO::N_BINCL, .Name: "N_BINCL" }, |
429 | {.NType: MachO::N_SOL, .Name: "N_SOL" }, |
430 | {.NType: MachO::N_PARAMS, .Name: "N_PARAM" }, |
431 | {.NType: MachO::N_VERSION, .Name: "N_VERS" }, |
432 | {.NType: MachO::N_OLEVEL, .Name: "N_OLEV" }, |
433 | {.NType: MachO::N_PSYM, .Name: "N_PSYM" }, |
434 | {.NType: MachO::N_EINCL, .Name: "N_EINCL" }, |
435 | {.NType: MachO::N_ENTRY, .Name: "N_ENTRY" }, |
436 | {.NType: MachO::N_LBRAC, .Name: "N_LBRAC" }, |
437 | {.NType: MachO::N_EXCL, .Name: "N_EXCL" }, |
438 | {.NType: MachO::N_RBRAC, .Name: "N_RBRAC" }, |
439 | {.NType: MachO::N_BCOMM, .Name: "N_BCOMM" }, |
440 | {.NType: MachO::N_ECOMM, .Name: "N_ECOMM" }, |
441 | {.NType: MachO::N_ECOML, .Name: "N_ECOML" }, |
442 | {.NType: MachO::N_LENG, .Name: "N_LENG" }, |
443 | {.NType: 0, .Name: nullptr}}; |
444 | |
445 | static const char *getDarwinStabString(uint8_t NType) { |
446 | for (unsigned i = 0; DarwinStabNames[i].Name; i++) { |
447 | if (DarwinStabNames[i].NType == NType) |
448 | return DarwinStabNames[i].Name; |
449 | } |
450 | return nullptr; |
451 | } |
452 | |
453 | void MachODebugMapParser::(raw_ostream &OS, StringRef Arch) { |
454 | OS << "-----------------------------------" |
455 | "-----------------------------------\n" ; |
456 | OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n" ; |
457 | OS << "-----------------------------------" |
458 | "-----------------------------------\n" ; |
459 | OS << "Index n_strx n_type n_sect n_desc n_value\n" ; |
460 | OS << "======== -------- ------------------ ------ ------ ----------------\n" ; |
461 | } |
462 | |
463 | void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index, |
464 | uint32_t StringIndex, uint8_t Type, |
465 | uint8_t SectionIndex, uint16_t Flags, |
466 | uint64_t Value) { |
467 | // Index |
468 | OS << '[' << format_decimal(N: Index, Width: 6) |
469 | << "] " |
470 | // n_strx |
471 | << format_hex_no_prefix(N: StringIndex, Width: 8) |
472 | << ' ' |
473 | // n_type... |
474 | << format_hex_no_prefix(N: Type, Width: 2) << " (" ; |
475 | |
476 | if (Type & MachO::N_STAB) |
477 | OS << left_justify(Str: getDarwinStabString(NType: Type), Width: 13); |
478 | else { |
479 | if (Type & MachO::N_PEXT) |
480 | OS << "PEXT " ; |
481 | else |
482 | OS << " " ; |
483 | switch (Type & MachO::N_TYPE) { |
484 | case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT |
485 | OS << "UNDF" ; |
486 | break; |
487 | case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT |
488 | OS << "ABS " ; |
489 | break; |
490 | case MachO::N_SECT: // 0xe defined in section number n_sect |
491 | OS << "SECT" ; |
492 | break; |
493 | case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib) |
494 | OS << "PBUD" ; |
495 | break; |
496 | case MachO::N_INDR: // 0xa indirect |
497 | OS << "INDR" ; |
498 | break; |
499 | default: |
500 | OS << format_hex_no_prefix(N: Type, Width: 2) << " " ; |
501 | break; |
502 | } |
503 | if (Type & MachO::N_EXT) |
504 | OS << " EXT" ; |
505 | else |
506 | OS << " " ; |
507 | } |
508 | |
509 | OS << ") " |
510 | // n_sect |
511 | << format_hex_no_prefix(N: SectionIndex, Width: 2) |
512 | << " " |
513 | // n_desc |
514 | << format_hex_no_prefix(N: Flags, Width: 4) |
515 | << " " |
516 | // n_value |
517 | << format_hex_no_prefix(N: Value, Width: 16); |
518 | |
519 | const char *Name = &MainBinaryStrings.data()[StringIndex]; |
520 | if (Name && Name[0]) |
521 | OS << " '" << Name << "'" ; |
522 | |
523 | OS << "\n" ; |
524 | } |
525 | |
526 | void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary, |
527 | StringRef BinaryPath) { |
528 | loadMainBinarySymbols(MainBinary); |
529 | MainBinaryStrings = MainBinary.getStringTableData(); |
530 | raw_ostream &OS(llvm::outs()); |
531 | |
532 | dumpSymTabHeader(OS, Arch: getArchName(Obj: MainBinary)); |
533 | uint64_t Idx = 0; |
534 | for (const SymbolRef &Symbol : MainBinary.symbols()) { |
535 | const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); |
536 | if (MainBinary.is64Bit()) |
537 | dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbol64TableEntry(DRI)); |
538 | else |
539 | dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbolTableEntry(DRI)); |
540 | Idx++; |
541 | } |
542 | |
543 | OS << "\n\n" ; |
544 | resetParserState(); |
545 | } |
546 | |
547 | static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) { |
548 | if (Archs.empty() || is_contained(Range&: Archs, Element: "all" ) || is_contained(Range&: Archs, Element: "*" )) |
549 | return true; |
550 | |
551 | if (Arch.starts_with(Prefix: "arm" ) && Arch != "arm64" && is_contained(Range&: Archs, Element: "arm" )) |
552 | return true; |
553 | |
554 | SmallString<16> ArchName = Arch; |
555 | if (Arch.starts_with(Prefix: "thumb" )) |
556 | ArchName = ("arm" + Arch.substr(Start: 5)).str(); |
557 | |
558 | return is_contained(Range&: Archs, Element: ArchName); |
559 | } |
560 | |
561 | bool MachODebugMapParser::dumpStab() { |
562 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath); |
563 | if (!ObjectEntry) { |
564 | auto Err = ObjectEntry.takeError(); |
565 | WithColor::error() << "cannot load '" << BinaryPath |
566 | << "': " << toString(E: std::move(Err)) << '\n'; |
567 | return false; |
568 | } |
569 | |
570 | auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); |
571 | if (!Objects) { |
572 | auto Err = Objects.takeError(); |
573 | WithColor::error() << "cannot get '" << BinaryPath |
574 | << "' as MachO file: " << toString(E: std::move(Err)) |
575 | << "\n" ; |
576 | return false; |
577 | } |
578 | |
579 | for (const auto *Object : *Objects) |
580 | if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName())) |
581 | dumpOneBinaryStab(MainBinary: *Object, BinaryPath); |
582 | |
583 | return true; |
584 | } |
585 | |
586 | /// This main parsing routine tries to open the main binary and if |
587 | /// successful iterates over the STAB entries. The real parsing is |
588 | /// done in handleStabSymbolTableEntry. |
589 | ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() { |
590 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath); |
591 | if (!ObjectEntry) { |
592 | return errorToErrorCode(Err: ObjectEntry.takeError()); |
593 | } |
594 | |
595 | auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); |
596 | if (!Objects) { |
597 | return errorToErrorCode(Err: Objects.takeError()); |
598 | } |
599 | |
600 | std::vector<std::unique_ptr<DebugMap>> Results; |
601 | for (const auto *Object : *Objects) |
602 | if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName())) |
603 | Results.push_back(x: parseOneBinary(MainBinary: *Object, BinaryPath)); |
604 | |
605 | return std::move(Results); |
606 | } |
607 | |
608 | void MachODebugMapParser::handleStabOSOEntry( |
609 | uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
610 | uint64_t Value, llvm::DenseSet<OSO> &OSOs, |
611 | llvm::SmallSet<OSO, 4> &Duplicates) { |
612 | if (Type != MachO::N_OSO) |
613 | return; |
614 | |
615 | OSO O(&MainBinaryStrings.data()[StringIndex], Value); |
616 | if (!OSOs.insert(V: O).second) |
617 | Duplicates.insert(V: O); |
618 | } |
619 | |
620 | /// Interpret the STAB entries to fill the DebugMap. |
621 | void MachODebugMapParser::handleStabSymbolTableEntry( |
622 | uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
623 | uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) { |
624 | if (!(Type & MachO::N_STAB)) |
625 | return; |
626 | |
627 | const char *Name = &MainBinaryStrings.data()[StringIndex]; |
628 | |
629 | // An N_LIB entry represents the start of a new library file description. |
630 | if (Type == MachO::N_LIB) { |
631 | switchToNewLibDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value)); |
632 | return; |
633 | } |
634 | |
635 | // An N_OSO entry represents the start of a new object file description. |
636 | // If an N_LIB entry was present, this is parsed only if the library |
637 | // dSYM file could not be found. |
638 | if (Type == MachO::N_OSO) { |
639 | if (!CurrentDebugMapObject || |
640 | CurrentDebugMapObject->getType() != MachO::N_LIB) { |
641 | if (Duplicates.count(V: OSO(Name, Value))) { |
642 | SkipDebugMapObject = true; |
643 | return; |
644 | } |
645 | switchToNewDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value)); |
646 | } |
647 | return; |
648 | } |
649 | |
650 | if (SkipDebugMapObject) |
651 | return; |
652 | |
653 | if (Type == MachO::N_AST) { |
654 | SmallString<80> Path(PathPrefix); |
655 | sys::path::append(path&: Path, a: Name); |
656 | Result->addDebugMapObject(ObjectFilePath: Path, Timestamp: sys::toTimePoint(T: Value), Type); |
657 | return; |
658 | } |
659 | |
660 | // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be |
661 | // null. Do not update anything until we find the next valid N_OSO entry. |
662 | if (!CurrentDebugMapObject) |
663 | return; |
664 | |
665 | uint32_t Size = 0; |
666 | switch (Type) { |
667 | case MachO::N_GSYM: |
668 | // This is a global variable. We need to query the main binary |
669 | // symbol table to find its address as it might not be in the |
670 | // debug map (for common symbols). |
671 | Value = getMainBinarySymbolAddress(Name); |
672 | break; |
673 | case MachO::N_FUN: |
674 | // Functions are scopes in STABS. They have an end marker that |
675 | // contains the function size. |
676 | if (Name[0] == '\0') { |
677 | Size = Value; |
678 | Value = CurrentFunctionAddress; |
679 | Name = CurrentFunctionName; |
680 | break; |
681 | } else { |
682 | CurrentFunctionName = Name; |
683 | CurrentFunctionAddress = Value; |
684 | return; |
685 | } |
686 | case MachO::N_STSYM: |
687 | break; |
688 | default: |
689 | return; |
690 | } |
691 | |
692 | auto ObjectSymIt = CurrentObjectAddresses.find(Key: Name); |
693 | |
694 | // If the name of a (non-static) symbol is not in the current object, we |
695 | // check all its aliases from the main binary. |
696 | if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) { |
697 | if (SeenAliasValues.count(V: Value) == 0) { |
698 | auto Aliases = getMainBinarySymbolNames(Value); |
699 | for (const auto &Alias : Aliases) { |
700 | auto It = CurrentObjectAddresses.find(Key: Alias); |
701 | if (It != CurrentObjectAddresses.end()) { |
702 | auto AliasValue = It->getValue(); |
703 | for (const auto &Alias : Aliases) |
704 | CurrentObjectAliasMap[Alias] = AliasValue; |
705 | break; |
706 | } |
707 | } |
708 | SeenAliasValues.insert(V: Value); |
709 | } |
710 | |
711 | auto AliasIt = CurrentObjectAliasMap.find(Key: Name); |
712 | if (AliasIt != CurrentObjectAliasMap.end()) |
713 | ObjectSymIt = AliasIt; |
714 | } |
715 | |
716 | // ThinLTO adds a unique suffix to exported private symbols. |
717 | if (ObjectSymIt == CurrentObjectAddresses.end()) { |
718 | for (auto Iter = CurrentObjectAddresses.begin(); |
719 | Iter != CurrentObjectAddresses.end(); ++Iter) { |
720 | llvm::StringRef SymbolName = Iter->getKey(); |
721 | auto Pos = SymbolName.rfind(Str: ".llvm." ); |
722 | if (Pos != llvm::StringRef::npos && SymbolName.substr(Start: 0, N: Pos) == Name) { |
723 | ObjectSymIt = Iter; |
724 | break; |
725 | } |
726 | } |
727 | } |
728 | |
729 | if (ObjectSymIt == CurrentObjectAddresses.end()) { |
730 | Warning(Msg: "could not find symbol '" + Twine(Name) + "' in object file '" + |
731 | CurrentDebugMapObject->getObjectFilename() + "'" ); |
732 | return; |
733 | } |
734 | |
735 | if (!CurrentDebugMapObject->addSymbol(SymName: Name, ObjectAddress: ObjectSymIt->getValue(), LinkedAddress: Value, |
736 | Size)) { |
737 | Warning(Msg: Twine("failed to insert symbol '" ) + Name + "' in the debug map." ); |
738 | return; |
739 | } |
740 | } |
741 | |
742 | /// Load the current object file symbols into CurrentObjectAddresses. |
743 | void MachODebugMapParser::loadCurrentObjectFileSymbols( |
744 | const object::MachOObjectFile &Obj) { |
745 | CurrentObjectAddresses.clear(); |
746 | |
747 | for (auto Sym : Obj.symbols()) { |
748 | uint64_t Addr = cantFail(ValOrErr: Sym.getValue()); |
749 | Expected<StringRef> Name = Sym.getName(); |
750 | if (!Name) { |
751 | auto Err = Name.takeError(); |
752 | Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)), |
753 | File: Obj.getFileName()); |
754 | continue; |
755 | } |
756 | // The value of some categories of symbols isn't meaningful. For |
757 | // example common symbols store their size in the value field, not |
758 | // their address. Absolute symbols have a fixed address that can |
759 | // conflict with standard symbols. These symbols (especially the |
760 | // common ones), might still be referenced by relocations. These |
761 | // relocations will use the symbol itself, and won't need an |
762 | // object file address. The object file address field is optional |
763 | // in the DebugMap, leave it unassigned for these symbols. |
764 | uint32_t Flags = cantFail(ValOrErr: Sym.getFlags()); |
765 | if (Flags & SymbolRef::SF_Absolute) { |
766 | CurrentObjectAddresses[*Name] = std::nullopt; |
767 | } else if (Flags & SymbolRef::SF_Common) { |
768 | CurrentObjectAddresses[*Name] = std::nullopt; |
769 | CommonSymbols.push_back(x: std::string(*Name)); |
770 | } else { |
771 | CurrentObjectAddresses[*Name] = Addr; |
772 | } |
773 | } |
774 | } |
775 | |
776 | /// Lookup a symbol address in the main binary symbol table. The |
777 | /// parser only needs to query common symbols, thus not every symbol's |
778 | /// address is available through this function. |
779 | uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) { |
780 | auto Sym = MainBinarySymbolAddresses.find(Key: Name); |
781 | if (Sym == MainBinarySymbolAddresses.end()) |
782 | return 0; |
783 | return Sym->second; |
784 | } |
785 | |
786 | /// Get all symbol names in the main binary for the given value. |
787 | std::vector<StringRef> |
788 | MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) { |
789 | std::vector<StringRef> Names; |
790 | for (const auto &Entry : MainBinarySymbolAddresses) { |
791 | if (Entry.second == Value) |
792 | Names.push_back(x: Entry.first()); |
793 | } |
794 | return Names; |
795 | } |
796 | |
797 | /// Load the interesting main binary symbols' addresses into |
798 | /// MainBinarySymbolAddresses. |
799 | void MachODebugMapParser::loadMainBinarySymbols( |
800 | const MachOObjectFile &MainBinary) { |
801 | section_iterator Section = MainBinary.section_end(); |
802 | MainBinarySymbolAddresses.clear(); |
803 | for (const auto &Sym : MainBinary.symbols()) { |
804 | Expected<SymbolRef::Type> TypeOrErr = Sym.getType(); |
805 | if (!TypeOrErr) { |
806 | auto Err = TypeOrErr.takeError(); |
807 | Warning(Msg: "failed to get symbol type: " + toString(E: std::move(Err)), |
808 | File: MainBinary.getFileName()); |
809 | continue; |
810 | } |
811 | SymbolRef::Type Type = *TypeOrErr; |
812 | // Skip undefined and STAB entries. |
813 | if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown)) |
814 | continue; |
815 | // In theory, the only symbols of interest are the global variables. These |
816 | // are the only ones that need to be queried because the address of common |
817 | // data won't be described in the debug map. All other addresses should be |
818 | // fetched for the debug map. In reality, by playing with 'ld -r' and |
819 | // export lists, you can get symbols described as N_GSYM in the debug map, |
820 | // but associated with a local symbol. Gather all the symbols, but prefer |
821 | // the global ones. |
822 | uint8_t SymType = |
823 | MainBinary.getSymbolTableEntry(DRI: Sym.getRawDataRefImpl()).n_type; |
824 | bool Extern = SymType & (MachO::N_EXT | MachO::N_PEXT); |
825 | Expected<section_iterator> SectionOrErr = Sym.getSection(); |
826 | if (!SectionOrErr) { |
827 | auto Err = TypeOrErr.takeError(); |
828 | Warning(Msg: "failed to get symbol section: " + toString(E: std::move(Err)), |
829 | File: MainBinary.getFileName()); |
830 | continue; |
831 | } |
832 | Section = *SectionOrErr; |
833 | if ((Section == MainBinary.section_end() || Section->isText()) && !Extern) |
834 | continue; |
835 | uint64_t Addr = cantFail(ValOrErr: Sym.getValue()); |
836 | Expected<StringRef> NameOrErr = Sym.getName(); |
837 | if (!NameOrErr) { |
838 | auto Err = NameOrErr.takeError(); |
839 | Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)), |
840 | File: MainBinary.getFileName()); |
841 | continue; |
842 | } |
843 | StringRef Name = *NameOrErr; |
844 | if (Name.size() == 0 || Name[0] == '\0') |
845 | continue; |
846 | // Override only if the new key is global. |
847 | if (Extern) |
848 | MainBinarySymbolAddresses[Name] = Addr; |
849 | else |
850 | MainBinarySymbolAddresses.try_emplace(Key: Name, Args&: Addr); |
851 | } |
852 | } |
853 | |
854 | namespace llvm { |
855 | namespace dsymutil { |
856 | llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>> |
857 | parseDebugMap(BinaryHolder &BinHolder, StringRef InputFile, |
858 | ArrayRef<std::string> Archs, |
859 | ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, |
860 | StringRef VariantSuffix, bool Verbose, bool InputIsYAML) { |
861 | if (InputIsYAML) |
862 | return DebugMap::parseYAMLDebugMap(BinHolder, InputFile, PrependPath, |
863 | Verbose); |
864 | |
865 | MachODebugMapParser Parser(BinHolder, InputFile, Archs, DSYMSearchPaths, |
866 | PrependPath, VariantSuffix, Verbose); |
867 | |
868 | return Parser.parse(); |
869 | } |
870 | |
871 | bool dumpStab(BinaryHolder &BinHolder, StringRef InputFile, |
872 | ArrayRef<std::string> Archs, |
873 | ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, |
874 | StringRef VariantSuffix) { |
875 | MachODebugMapParser Parser(BinHolder, InputFile, Archs, DSYMSearchPaths, |
876 | PrependPath, VariantSuffix, false); |
877 | return Parser.dumpStab(); |
878 | } |
879 | } // namespace dsymutil |
880 | } // namespace llvm |
881 | |