1 | //===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "BinaryHolder.h" |
10 | #include "DebugMap.h" |
11 | #include "MachOUtils.h" |
12 | #include "RelocationMap.h" |
13 | #include "llvm/ADT/DenseSet.h" |
14 | #include "llvm/ADT/SmallSet.h" |
15 | #include "llvm/Object/MachO.h" |
16 | #include "llvm/Support/Chrono.h" |
17 | #include "llvm/Support/Path.h" |
18 | #include "llvm/Support/WithColor.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | #include <optional> |
21 | #include <vector> |
22 | |
23 | namespace { |
24 | using namespace llvm; |
25 | using namespace llvm::dsymutil; |
26 | using namespace llvm::object; |
27 | |
28 | class MachODebugMapParser { |
29 | public: |
30 | MachODebugMapParser(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, |
31 | StringRef BinaryPath, ArrayRef<std::string> Archs, |
32 | ArrayRef<std::string> DSYMSearchPaths, |
33 | StringRef PathPrefix = "" , StringRef VariantSuffix = "" , |
34 | bool Verbose = false) |
35 | : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()), |
36 | DSYMSearchPaths(DSYMSearchPaths.begin(), DSYMSearchPaths.end()), |
37 | PathPrefix(std::string(PathPrefix)), |
38 | VariantSuffix(std::string(VariantSuffix)), BinHolder(VFS, Verbose), |
39 | CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {} |
40 | |
41 | /// Parses and returns the DebugMaps of the input binary. The binary contains |
42 | /// multiple maps in case it is a universal binary. |
43 | /// \returns an error in case the provided BinaryPath doesn't exist |
44 | /// or isn't of a supported type. |
45 | ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse(); |
46 | |
47 | /// Walk the symbol table and dump it. |
48 | bool dumpStab(); |
49 | |
50 | using OSO = std::pair<llvm::StringRef, uint64_t>; |
51 | |
52 | private: |
53 | std::string BinaryPath; |
54 | SmallVector<StringRef, 1> Archs; |
55 | SmallVector<StringRef, 1> DSYMSearchPaths; |
56 | std::string PathPrefix; |
57 | std::string VariantSuffix; |
58 | |
59 | /// Owns the MemoryBuffer for the main binary. |
60 | BinaryHolder BinHolder; |
61 | /// Map of the binary symbol addresses. |
62 | StringMap<uint64_t> MainBinarySymbolAddresses; |
63 | StringRef MainBinaryStrings; |
64 | /// The constructed DebugMap. |
65 | std::unique_ptr<DebugMap> Result; |
66 | /// List of common symbols that need to be added to the debug map. |
67 | std::vector<std::string> CommonSymbols; |
68 | |
69 | /// Map of the currently processed object file symbol addresses. |
70 | StringMap<std::optional<uint64_t>> CurrentObjectAddresses; |
71 | |
72 | /// Lazily computed map of symbols aliased to the processed object file. |
73 | StringMap<std::optional<uint64_t>> CurrentObjectAliasMap; |
74 | |
75 | /// If CurrentObjectAliasMap has been computed for a given address. |
76 | SmallSet<uint64_t, 4> SeenAliasValues; |
77 | |
78 | /// Element of the debug map corresponding to the current object file. |
79 | DebugMapObject *CurrentDebugMapObject; |
80 | |
81 | /// Whether we need to skip the current debug map object. |
82 | bool SkipDebugMapObject; |
83 | |
84 | /// Holds function info while function scope processing. |
85 | const char *CurrentFunctionName; |
86 | uint64_t CurrentFunctionAddress; |
87 | |
88 | std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary, |
89 | StringRef BinaryPath); |
90 | void handleStabDebugMap( |
91 | const MachOObjectFile &MainBinary, |
92 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F); |
93 | |
94 | void |
95 | switchToNewDebugMapObject(StringRef Filename, |
96 | sys::TimePoint<std::chrono::seconds> Timestamp); |
97 | void |
98 | switchToNewLibDebugMapObject(StringRef Filename, |
99 | sys::TimePoint<std::chrono::seconds> Timestamp); |
100 | void resetParserState(); |
101 | uint64_t getMainBinarySymbolAddress(StringRef Name); |
102 | std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value); |
103 | void loadMainBinarySymbols(const MachOObjectFile &MainBinary); |
104 | void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj); |
105 | |
106 | void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type, |
107 | uint8_t SectionIndex, uint16_t Flags, uint64_t Value, |
108 | llvm::DenseSet<OSO> &OSOs, |
109 | llvm::SmallSet<OSO, 4> &Duplicates); |
110 | void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type, |
111 | uint8_t SectionIndex, uint16_t Flags, |
112 | uint64_t Value, |
113 | const llvm::SmallSet<OSO, 4> &Duplicates); |
114 | |
115 | template <typename STEType> |
116 | void handleStabDebugMapEntry( |
117 | const STEType &STE, |
118 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { |
119 | F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value); |
120 | } |
121 | |
122 | void addCommonSymbols(); |
123 | |
124 | /// Dump the symbol table output header. |
125 | void dumpSymTabHeader(raw_ostream &OS, StringRef Arch); |
126 | |
127 | /// Dump the contents of nlist entries. |
128 | void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex, |
129 | uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
130 | uint64_t Value); |
131 | |
132 | template <typename STEType> |
133 | void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) { |
134 | dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, |
135 | STE.n_value); |
136 | } |
137 | void dumpOneBinaryStab(const MachOObjectFile &MainBinary, |
138 | StringRef BinaryPath); |
139 | |
140 | void Warning(const Twine &Msg, StringRef File = StringRef()) { |
141 | assert(Result && |
142 | "The debug map must be initialized before calling this function" ); |
143 | WithColor::warning() << "(" |
144 | << MachOUtils::getArchName( |
145 | Arch: Result->getTriple().getArchName()) |
146 | << ") " << File << " " << Msg << "\n" ; |
147 | } |
148 | }; |
149 | |
150 | } // anonymous namespace |
151 | |
152 | /// Reset the parser state corresponding to the current object |
153 | /// file. This is to be called after an object file is finished |
154 | /// processing. |
155 | void MachODebugMapParser::resetParserState() { |
156 | CommonSymbols.clear(); |
157 | CurrentObjectAddresses.clear(); |
158 | CurrentObjectAliasMap.clear(); |
159 | SeenAliasValues.clear(); |
160 | CurrentDebugMapObject = nullptr; |
161 | SkipDebugMapObject = false; |
162 | } |
163 | |
164 | /// Commons symbols won't show up in the symbol map but might need to be |
165 | /// relocated. We can add them to the symbol table ourselves by combining the |
166 | /// information in the object file (the symbol name) and the main binary (the |
167 | /// address). |
168 | void MachODebugMapParser::addCommonSymbols() { |
169 | for (auto &CommonSymbol : CommonSymbols) { |
170 | uint64_t CommonAddr = getMainBinarySymbolAddress(Name: CommonSymbol); |
171 | if (CommonAddr == 0) { |
172 | // The main binary doesn't have an address for the given symbol. |
173 | continue; |
174 | } |
175 | if (!CurrentDebugMapObject->addSymbol(SymName: CommonSymbol, |
176 | ObjectAddress: std::nullopt /*ObjectAddress*/, |
177 | LinkedAddress: CommonAddr, Size: 0 /*size*/)) { |
178 | // The symbol is already present. |
179 | continue; |
180 | } |
181 | } |
182 | } |
183 | |
184 | /// Create a new DebugMapObject. This function resets the state of the |
185 | /// parser that was referring to the last object file and sets |
186 | /// everything up to add symbols to the new one. |
187 | void MachODebugMapParser::switchToNewDebugMapObject( |
188 | StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { |
189 | addCommonSymbols(); |
190 | resetParserState(); |
191 | |
192 | SmallString<80> Path(PathPrefix); |
193 | sys::path::append(path&: Path, a: Filename); |
194 | |
195 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp); |
196 | if (!ObjectEntry) { |
197 | auto Err = ObjectEntry.takeError(); |
198 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
199 | File: Path.str()); |
200 | return; |
201 | } |
202 | |
203 | auto Object = ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple()); |
204 | if (!Object) { |
205 | auto Err = Object.takeError(); |
206 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
207 | File: Path.str()); |
208 | return; |
209 | } |
210 | |
211 | CurrentDebugMapObject = |
212 | &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_OSO); |
213 | |
214 | loadCurrentObjectFileSymbols(Obj: *Object); |
215 | } |
216 | |
217 | /// Create a new DebugMapObject of type MachO::N_LIB. |
218 | /// This function resets the state of the parser that was |
219 | /// referring to the last object file and sets everything |
220 | /// up to add symbols to the new one. |
221 | void MachODebugMapParser::switchToNewLibDebugMapObject( |
222 | StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { |
223 | |
224 | if (DSYMSearchPaths.empty()) { |
225 | Warning(Msg: "no dSYM search path was specified" ); |
226 | return; |
227 | } |
228 | |
229 | StringRef LeafName = sys::path::filename(path: Filename); |
230 | SmallString<128> VariantLeafName; |
231 | SmallString<128> ProductName(LeafName); |
232 | |
233 | // For Framework.framework/Framework and -build-variant-suffix=_debug, |
234 | // look in the following order: |
235 | // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug |
236 | // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework |
237 | // |
238 | // For libName.dylib and -build-variant-suffix=_debug, |
239 | // look in the following order: |
240 | // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib |
241 | // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib |
242 | |
243 | size_t libExt = LeafName.rfind(Str: ".dylib" ); |
244 | if (libExt != StringRef::npos) { |
245 | if (!VariantSuffix.empty()) { |
246 | VariantLeafName.append(RHS: LeafName.substr(Start: 0, N: libExt)); |
247 | VariantLeafName.append(RHS: VariantSuffix); |
248 | VariantLeafName.append(RHS: ".dylib" ); |
249 | } |
250 | } else { |
251 | // Expected to be a framework |
252 | ProductName.append(RHS: ".framework" ); |
253 | if (!VariantSuffix.empty()) { |
254 | VariantLeafName.append(RHS: LeafName); |
255 | VariantLeafName.append(RHS: VariantSuffix); |
256 | } |
257 | } |
258 | |
259 | for (auto DSYMSearchPath : DSYMSearchPaths) { |
260 | SmallString<256> Path(DSYMSearchPath); |
261 | SmallString<256> FallbackPath(Path); |
262 | |
263 | SmallString<256> DSYMPath(ProductName); |
264 | DSYMPath.append(RHS: ".dSYM" ); |
265 | sys::path::append(path&: DSYMPath, a: "Contents" , b: "Resources" , c: "DWARF" ); |
266 | |
267 | if (!VariantSuffix.empty()) { |
268 | sys::path::append(path&: Path, a: DSYMPath, b: VariantLeafName); |
269 | sys::path::append(path&: FallbackPath, a: DSYMPath, b: LeafName); |
270 | } else { |
271 | sys::path::append(path&: Path, a: DSYMPath, b: LeafName); |
272 | } |
273 | |
274 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: Path, Timestamp); |
275 | if (!ObjectEntry) { |
276 | auto Err = ObjectEntry.takeError(); |
277 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
278 | File: Path.str()); |
279 | if (!VariantSuffix.empty()) { |
280 | ObjectEntry = BinHolder.getObjectEntry(Filename: FallbackPath, Timestamp); |
281 | if (!ObjectEntry) { |
282 | auto Err = ObjectEntry.takeError(); |
283 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
284 | File: FallbackPath.str()); |
285 | continue; |
286 | } |
287 | Path.assign(RHS: FallbackPath); |
288 | } else { |
289 | continue; |
290 | } |
291 | } |
292 | |
293 | auto Object = |
294 | ObjectEntry->getObjectAs<MachOObjectFile>(T: Result->getTriple()); |
295 | if (!Object) { |
296 | auto Err = Object.takeError(); |
297 | Warning(Msg: "unable to open object file: " + toString(E: std::move(Err)), |
298 | File: Path.str()); |
299 | continue; |
300 | } |
301 | |
302 | if (CurrentDebugMapObject && |
303 | CurrentDebugMapObject->getType() == MachO::N_LIB && |
304 | CurrentDebugMapObject->getObjectFilename() == Path) { |
305 | return; |
306 | } |
307 | |
308 | addCommonSymbols(); |
309 | resetParserState(); |
310 | |
311 | CurrentDebugMapObject = |
312 | &Result->addDebugMapObject(ObjectFilePath: Path, Timestamp, Type: MachO::N_LIB); |
313 | |
314 | CurrentDebugMapObject->setInstallName(Filename); |
315 | |
316 | SmallString<256> RMPath(DSYMSearchPath); |
317 | sys::path::append(path&: RMPath, a: ProductName); |
318 | RMPath.append(RHS: ".dSYM" ); |
319 | StringRef ArchName = Triple::getArchName(Kind: Result->getTriple().getArch(), |
320 | SubArch: Result->getTriple().getSubArch()); |
321 | sys::path::append(path&: RMPath, a: "Contents" , b: "Resources" , c: "Relocations" , d: ArchName); |
322 | sys::path::append(path&: RMPath, a: LeafName); |
323 | RMPath.append(RHS: ".yml" ); |
324 | const auto &RelocMapPtrOrErr = |
325 | RelocationMap::parseYAMLRelocationMap(InputFile: RMPath, PrependPath: PathPrefix); |
326 | if (auto EC = RelocMapPtrOrErr.getError()) { |
327 | Warning(Msg: "cannot parse relocation map file: " + EC.message(), |
328 | File: RMPath.str()); |
329 | return; |
330 | } |
331 | CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get()); |
332 | |
333 | loadCurrentObjectFileSymbols(Obj: *Object); |
334 | |
335 | // Found and loaded new dSYM file |
336 | return; |
337 | } |
338 | } |
339 | |
340 | static std::string getArchName(const object::MachOObjectFile &Obj) { |
341 | Triple T = Obj.getArchTriple(); |
342 | return std::string(T.getArchName()); |
343 | } |
344 | |
345 | void MachODebugMapParser::handleStabDebugMap( |
346 | const MachOObjectFile &MainBinary, |
347 | std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { |
348 | for (const SymbolRef &Symbol : MainBinary.symbols()) { |
349 | const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); |
350 | if (MainBinary.is64Bit()) |
351 | handleStabDebugMapEntry(STE: MainBinary.getSymbol64TableEntry(DRI), F); |
352 | else |
353 | handleStabDebugMapEntry(STE: MainBinary.getSymbolTableEntry(DRI), F); |
354 | } |
355 | } |
356 | |
357 | std::unique_ptr<DebugMap> |
358 | MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, |
359 | StringRef BinaryPath) { |
360 | Result = std::make_unique<DebugMap>(args: MainBinary.getArchTriple(), args&: BinaryPath, |
361 | args: MainBinary.getUuid()); |
362 | loadMainBinarySymbols(MainBinary); |
363 | MainBinaryStrings = MainBinary.getStringTableData(); |
364 | |
365 | // Static archives can contain multiple object files with identical names, in |
366 | // which case the timestamp is used to disambiguate. However, if both are |
367 | // identical, there's no way to tell them apart. Detect this and skip |
368 | // duplicate debug map objects. |
369 | llvm::DenseSet<OSO> OSOs; |
370 | llvm::SmallSet<OSO, 4> Duplicates; |
371 | |
372 | // Iterate over all the STABS to find duplicate OSO entries. |
373 | handleStabDebugMap(MainBinary, |
374 | F: [&](uint32_t StringIndex, uint8_t Type, |
375 | uint8_t SectionIndex, uint16_t Flags, uint64_t Value) { |
376 | handleStabOSOEntry(StringIndex, Type, SectionIndex, |
377 | Flags, Value, OSOs, Duplicates); |
378 | }); |
379 | |
380 | // Print an informative warning with the duplicate object file name and time |
381 | // stamp. |
382 | for (const auto &OSO : Duplicates) { |
383 | std::string Buffer; |
384 | llvm::raw_string_ostream OS(Buffer); |
385 | OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(T: OSO.second)); |
386 | Warning(Msg: "skipping debug map object with duplicate name and timestamp: " + |
387 | OS.str() + Twine(" " ) + Twine(OSO.first)); |
388 | } |
389 | |
390 | // Build the debug map by iterating over the STABS again but ignore the |
391 | // duplicate debug objects. |
392 | handleStabDebugMap(MainBinary, F: [&](uint32_t StringIndex, uint8_t Type, |
393 | uint8_t SectionIndex, uint16_t Flags, |
394 | uint64_t Value) { |
395 | handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value, |
396 | Duplicates); |
397 | }); |
398 | |
399 | resetParserState(); |
400 | return std::move(Result); |
401 | } |
402 | |
403 | // Table that maps Darwin's Mach-O stab constants to strings to allow printing. |
404 | // llvm-nm has very similar code, the strings used here are however slightly |
405 | // different and part of the interface of dsymutil (some project's build-systems |
406 | // parse the ouptut of dsymutil -s), thus they shouldn't be changed. |
407 | struct DarwinStabName { |
408 | uint8_t NType; |
409 | const char *Name; |
410 | }; |
411 | |
412 | const struct DarwinStabName DarwinStabNames[] = {{.NType: MachO::N_GSYM, .Name: "N_GSYM" }, |
413 | {.NType: MachO::N_FNAME, .Name: "N_FNAME" }, |
414 | {.NType: MachO::N_FUN, .Name: "N_FUN" }, |
415 | {.NType: MachO::N_STSYM, .Name: "N_STSYM" }, |
416 | {.NType: MachO::N_LCSYM, .Name: "N_LCSYM" }, |
417 | {.NType: MachO::N_BNSYM, .Name: "N_BNSYM" }, |
418 | {.NType: MachO::N_PC, .Name: "N_PC" }, |
419 | {.NType: MachO::N_AST, .Name: "N_AST" }, |
420 | {.NType: MachO::N_OPT, .Name: "N_OPT" }, |
421 | {.NType: MachO::N_RSYM, .Name: "N_RSYM" }, |
422 | {.NType: MachO::N_SLINE, .Name: "N_SLINE" }, |
423 | {.NType: MachO::N_ENSYM, .Name: "N_ENSYM" }, |
424 | {.NType: MachO::N_SSYM, .Name: "N_SSYM" }, |
425 | {.NType: MachO::N_SO, .Name: "N_SO" }, |
426 | {.NType: MachO::N_OSO, .Name: "N_OSO" }, |
427 | {.NType: MachO::N_LIB, .Name: "N_LIB" }, |
428 | {.NType: MachO::N_LSYM, .Name: "N_LSYM" }, |
429 | {.NType: MachO::N_BINCL, .Name: "N_BINCL" }, |
430 | {.NType: MachO::N_SOL, .Name: "N_SOL" }, |
431 | {.NType: MachO::N_PARAMS, .Name: "N_PARAM" }, |
432 | {.NType: MachO::N_VERSION, .Name: "N_VERS" }, |
433 | {.NType: MachO::N_OLEVEL, .Name: "N_OLEV" }, |
434 | {.NType: MachO::N_PSYM, .Name: "N_PSYM" }, |
435 | {.NType: MachO::N_EINCL, .Name: "N_EINCL" }, |
436 | {.NType: MachO::N_ENTRY, .Name: "N_ENTRY" }, |
437 | {.NType: MachO::N_LBRAC, .Name: "N_LBRAC" }, |
438 | {.NType: MachO::N_EXCL, .Name: "N_EXCL" }, |
439 | {.NType: MachO::N_RBRAC, .Name: "N_RBRAC" }, |
440 | {.NType: MachO::N_BCOMM, .Name: "N_BCOMM" }, |
441 | {.NType: MachO::N_ECOMM, .Name: "N_ECOMM" }, |
442 | {.NType: MachO::N_ECOML, .Name: "N_ECOML" }, |
443 | {.NType: MachO::N_LENG, .Name: "N_LENG" }, |
444 | {.NType: 0, .Name: nullptr}}; |
445 | |
446 | static const char *getDarwinStabString(uint8_t NType) { |
447 | for (unsigned i = 0; DarwinStabNames[i].Name; i++) { |
448 | if (DarwinStabNames[i].NType == NType) |
449 | return DarwinStabNames[i].Name; |
450 | } |
451 | return nullptr; |
452 | } |
453 | |
454 | void MachODebugMapParser::(raw_ostream &OS, StringRef Arch) { |
455 | OS << "-----------------------------------" |
456 | "-----------------------------------\n" ; |
457 | OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n" ; |
458 | OS << "-----------------------------------" |
459 | "-----------------------------------\n" ; |
460 | OS << "Index n_strx n_type n_sect n_desc n_value\n" ; |
461 | OS << "======== -------- ------------------ ------ ------ ----------------\n" ; |
462 | } |
463 | |
464 | void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index, |
465 | uint32_t StringIndex, uint8_t Type, |
466 | uint8_t SectionIndex, uint16_t Flags, |
467 | uint64_t Value) { |
468 | // Index |
469 | OS << '[' << format_decimal(N: Index, Width: 6) |
470 | << "] " |
471 | // n_strx |
472 | << format_hex_no_prefix(N: StringIndex, Width: 8) |
473 | << ' ' |
474 | // n_type... |
475 | << format_hex_no_prefix(N: Type, Width: 2) << " (" ; |
476 | |
477 | if (Type & MachO::N_STAB) |
478 | OS << left_justify(Str: getDarwinStabString(NType: Type), Width: 13); |
479 | else { |
480 | if (Type & MachO::N_PEXT) |
481 | OS << "PEXT " ; |
482 | else |
483 | OS << " " ; |
484 | switch (Type & MachO::N_TYPE) { |
485 | case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT |
486 | OS << "UNDF" ; |
487 | break; |
488 | case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT |
489 | OS << "ABS " ; |
490 | break; |
491 | case MachO::N_SECT: // 0xe defined in section number n_sect |
492 | OS << "SECT" ; |
493 | break; |
494 | case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib) |
495 | OS << "PBUD" ; |
496 | break; |
497 | case MachO::N_INDR: // 0xa indirect |
498 | OS << "INDR" ; |
499 | break; |
500 | default: |
501 | OS << format_hex_no_prefix(N: Type, Width: 2) << " " ; |
502 | break; |
503 | } |
504 | if (Type & MachO::N_EXT) |
505 | OS << " EXT" ; |
506 | else |
507 | OS << " " ; |
508 | } |
509 | |
510 | OS << ") " |
511 | // n_sect |
512 | << format_hex_no_prefix(N: SectionIndex, Width: 2) |
513 | << " " |
514 | // n_desc |
515 | << format_hex_no_prefix(N: Flags, Width: 4) |
516 | << " " |
517 | // n_value |
518 | << format_hex_no_prefix(N: Value, Width: 16); |
519 | |
520 | const char *Name = &MainBinaryStrings.data()[StringIndex]; |
521 | if (Name && Name[0]) |
522 | OS << " '" << Name << "'" ; |
523 | |
524 | OS << "\n" ; |
525 | } |
526 | |
527 | void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary, |
528 | StringRef BinaryPath) { |
529 | loadMainBinarySymbols(MainBinary); |
530 | MainBinaryStrings = MainBinary.getStringTableData(); |
531 | raw_ostream &OS(llvm::outs()); |
532 | |
533 | dumpSymTabHeader(OS, Arch: getArchName(Obj: MainBinary)); |
534 | uint64_t Idx = 0; |
535 | for (const SymbolRef &Symbol : MainBinary.symbols()) { |
536 | const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); |
537 | if (MainBinary.is64Bit()) |
538 | dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbol64TableEntry(DRI)); |
539 | else |
540 | dumpSymTabEntry(OS, Index: Idx, STE: MainBinary.getSymbolTableEntry(DRI)); |
541 | Idx++; |
542 | } |
543 | |
544 | OS << "\n\n" ; |
545 | resetParserState(); |
546 | } |
547 | |
548 | static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) { |
549 | if (Archs.empty() || is_contained(Range&: Archs, Element: "all" ) || is_contained(Range&: Archs, Element: "*" )) |
550 | return true; |
551 | |
552 | if (Arch.starts_with(Prefix: "arm" ) && Arch != "arm64" && is_contained(Range&: Archs, Element: "arm" )) |
553 | return true; |
554 | |
555 | SmallString<16> ArchName = Arch; |
556 | if (Arch.starts_with(Prefix: "thumb" )) |
557 | ArchName = ("arm" + Arch.substr(Start: 5)).str(); |
558 | |
559 | return is_contained(Range&: Archs, Element: ArchName); |
560 | } |
561 | |
562 | bool MachODebugMapParser::dumpStab() { |
563 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath); |
564 | if (!ObjectEntry) { |
565 | auto Err = ObjectEntry.takeError(); |
566 | WithColor::error() << "cannot load '" << BinaryPath |
567 | << "': " << toString(E: std::move(Err)) << '\n'; |
568 | return false; |
569 | } |
570 | |
571 | auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); |
572 | if (!Objects) { |
573 | auto Err = Objects.takeError(); |
574 | WithColor::error() << "cannot get '" << BinaryPath |
575 | << "' as MachO file: " << toString(E: std::move(Err)) |
576 | << "\n" ; |
577 | return false; |
578 | } |
579 | |
580 | for (const auto *Object : *Objects) |
581 | if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName())) |
582 | dumpOneBinaryStab(MainBinary: *Object, BinaryPath); |
583 | |
584 | return true; |
585 | } |
586 | |
587 | /// This main parsing routine tries to open the main binary and if |
588 | /// successful iterates over the STAB entries. The real parsing is |
589 | /// done in handleStabSymbolTableEntry. |
590 | ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() { |
591 | auto ObjectEntry = BinHolder.getObjectEntry(Filename: BinaryPath); |
592 | if (!ObjectEntry) { |
593 | return errorToErrorCode(Err: ObjectEntry.takeError()); |
594 | } |
595 | |
596 | auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); |
597 | if (!Objects) { |
598 | return errorToErrorCode(Err: Objects.takeError()); |
599 | } |
600 | |
601 | std::vector<std::unique_ptr<DebugMap>> Results; |
602 | for (const auto *Object : *Objects) |
603 | if (shouldLinkArch(Archs, Arch: Object->getArchTriple().getArchName())) |
604 | Results.push_back(x: parseOneBinary(MainBinary: *Object, BinaryPath)); |
605 | |
606 | return std::move(Results); |
607 | } |
608 | |
609 | void MachODebugMapParser::handleStabOSOEntry( |
610 | uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
611 | uint64_t Value, llvm::DenseSet<OSO> &OSOs, |
612 | llvm::SmallSet<OSO, 4> &Duplicates) { |
613 | if (Type != MachO::N_OSO) |
614 | return; |
615 | |
616 | OSO O(&MainBinaryStrings.data()[StringIndex], Value); |
617 | if (!OSOs.insert(V: O).second) |
618 | Duplicates.insert(V: O); |
619 | } |
620 | |
621 | /// Interpret the STAB entries to fill the DebugMap. |
622 | void MachODebugMapParser::handleStabSymbolTableEntry( |
623 | uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, |
624 | uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) { |
625 | if (!(Type & MachO::N_STAB)) |
626 | return; |
627 | |
628 | const char *Name = &MainBinaryStrings.data()[StringIndex]; |
629 | |
630 | // An N_LIB entry represents the start of a new library file description. |
631 | if (Type == MachO::N_LIB) { |
632 | switchToNewLibDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value)); |
633 | return; |
634 | } |
635 | |
636 | // An N_OSO entry represents the start of a new object file description. |
637 | // If an N_LIB entry was present, this is parsed only if the library |
638 | // dSYM file could not be found. |
639 | if (Type == MachO::N_OSO) { |
640 | if (!CurrentDebugMapObject || |
641 | CurrentDebugMapObject->getType() != MachO::N_LIB) { |
642 | if (Duplicates.count(V: OSO(Name, Value))) { |
643 | SkipDebugMapObject = true; |
644 | return; |
645 | } |
646 | switchToNewDebugMapObject(Filename: Name, Timestamp: sys::toTimePoint(T: Value)); |
647 | } |
648 | return; |
649 | } |
650 | |
651 | if (SkipDebugMapObject) |
652 | return; |
653 | |
654 | if (Type == MachO::N_AST) { |
655 | SmallString<80> Path(PathPrefix); |
656 | sys::path::append(path&: Path, a: Name); |
657 | Result->addDebugMapObject(ObjectFilePath: Path, Timestamp: sys::toTimePoint(T: Value), Type); |
658 | return; |
659 | } |
660 | |
661 | // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be |
662 | // null. Do not update anything until we find the next valid N_OSO entry. |
663 | if (!CurrentDebugMapObject) |
664 | return; |
665 | |
666 | uint32_t Size = 0; |
667 | switch (Type) { |
668 | case MachO::N_GSYM: |
669 | // This is a global variable. We need to query the main binary |
670 | // symbol table to find its address as it might not be in the |
671 | // debug map (for common symbols). |
672 | Value = getMainBinarySymbolAddress(Name); |
673 | break; |
674 | case MachO::N_FUN: |
675 | // Functions are scopes in STABS. They have an end marker that |
676 | // contains the function size. |
677 | if (Name[0] == '\0') { |
678 | Size = Value; |
679 | Value = CurrentFunctionAddress; |
680 | Name = CurrentFunctionName; |
681 | break; |
682 | } else { |
683 | CurrentFunctionName = Name; |
684 | CurrentFunctionAddress = Value; |
685 | return; |
686 | } |
687 | case MachO::N_STSYM: |
688 | break; |
689 | default: |
690 | return; |
691 | } |
692 | |
693 | auto ObjectSymIt = CurrentObjectAddresses.find(Key: Name); |
694 | |
695 | // If the name of a (non-static) symbol is not in the current object, we |
696 | // check all its aliases from the main binary. |
697 | if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) { |
698 | if (SeenAliasValues.count(V: Value) == 0) { |
699 | auto Aliases = getMainBinarySymbolNames(Value); |
700 | for (const auto &Alias : Aliases) { |
701 | auto It = CurrentObjectAddresses.find(Key: Alias); |
702 | if (It != CurrentObjectAddresses.end()) { |
703 | auto AliasValue = It->getValue(); |
704 | for (const auto &Alias : Aliases) |
705 | CurrentObjectAliasMap[Alias] = AliasValue; |
706 | break; |
707 | } |
708 | } |
709 | SeenAliasValues.insert(V: Value); |
710 | } |
711 | |
712 | auto AliasIt = CurrentObjectAliasMap.find(Key: Name); |
713 | if (AliasIt != CurrentObjectAliasMap.end()) |
714 | ObjectSymIt = AliasIt; |
715 | } |
716 | |
717 | // ThinLTO adds a unique suffix to exported private symbols. |
718 | if (ObjectSymIt == CurrentObjectAddresses.end()) { |
719 | for (auto Iter = CurrentObjectAddresses.begin(); |
720 | Iter != CurrentObjectAddresses.end(); ++Iter) { |
721 | llvm::StringRef SymbolName = Iter->getKey(); |
722 | auto Pos = SymbolName.rfind(Str: ".llvm." ); |
723 | if (Pos != llvm::StringRef::npos && SymbolName.substr(Start: 0, N: Pos) == Name) { |
724 | ObjectSymIt = Iter; |
725 | break; |
726 | } |
727 | } |
728 | } |
729 | |
730 | if (ObjectSymIt == CurrentObjectAddresses.end()) { |
731 | Warning(Msg: "could not find symbol '" + Twine(Name) + "' in object file '" + |
732 | CurrentDebugMapObject->getObjectFilename() + "'" ); |
733 | return; |
734 | } |
735 | |
736 | if (!CurrentDebugMapObject->addSymbol(SymName: Name, ObjectAddress: ObjectSymIt->getValue(), LinkedAddress: Value, |
737 | Size)) { |
738 | Warning(Msg: Twine("failed to insert symbol '" ) + Name + "' in the debug map." ); |
739 | return; |
740 | } |
741 | } |
742 | |
743 | /// Load the current object file symbols into CurrentObjectAddresses. |
744 | void MachODebugMapParser::loadCurrentObjectFileSymbols( |
745 | const object::MachOObjectFile &Obj) { |
746 | CurrentObjectAddresses.clear(); |
747 | |
748 | for (auto Sym : Obj.symbols()) { |
749 | uint64_t Addr = cantFail(ValOrErr: Sym.getValue()); |
750 | Expected<StringRef> Name = Sym.getName(); |
751 | if (!Name) { |
752 | auto Err = Name.takeError(); |
753 | Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)), |
754 | File: Obj.getFileName()); |
755 | continue; |
756 | } |
757 | // The value of some categories of symbols isn't meaningful. For |
758 | // example common symbols store their size in the value field, not |
759 | // their address. Absolute symbols have a fixed address that can |
760 | // conflict with standard symbols. These symbols (especially the |
761 | // common ones), might still be referenced by relocations. These |
762 | // relocations will use the symbol itself, and won't need an |
763 | // object file address. The object file address field is optional |
764 | // in the DebugMap, leave it unassigned for these symbols. |
765 | uint32_t Flags = cantFail(ValOrErr: Sym.getFlags()); |
766 | if (Flags & SymbolRef::SF_Absolute) { |
767 | CurrentObjectAddresses[*Name] = std::nullopt; |
768 | } else if (Flags & SymbolRef::SF_Common) { |
769 | CurrentObjectAddresses[*Name] = std::nullopt; |
770 | CommonSymbols.push_back(x: std::string(*Name)); |
771 | } else { |
772 | CurrentObjectAddresses[*Name] = Addr; |
773 | } |
774 | } |
775 | } |
776 | |
777 | /// Lookup a symbol address in the main binary symbol table. The |
778 | /// parser only needs to query common symbols, thus not every symbol's |
779 | /// address is available through this function. |
780 | uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) { |
781 | auto Sym = MainBinarySymbolAddresses.find(Key: Name); |
782 | if (Sym == MainBinarySymbolAddresses.end()) |
783 | return 0; |
784 | return Sym->second; |
785 | } |
786 | |
787 | /// Get all symbol names in the main binary for the given value. |
788 | std::vector<StringRef> |
789 | MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) { |
790 | std::vector<StringRef> Names; |
791 | for (const auto &Entry : MainBinarySymbolAddresses) { |
792 | if (Entry.second == Value) |
793 | Names.push_back(x: Entry.first()); |
794 | } |
795 | return Names; |
796 | } |
797 | |
798 | /// Load the interesting main binary symbols' addresses into |
799 | /// MainBinarySymbolAddresses. |
800 | void MachODebugMapParser::loadMainBinarySymbols( |
801 | const MachOObjectFile &MainBinary) { |
802 | section_iterator Section = MainBinary.section_end(); |
803 | MainBinarySymbolAddresses.clear(); |
804 | for (const auto &Sym : MainBinary.symbols()) { |
805 | Expected<SymbolRef::Type> TypeOrErr = Sym.getType(); |
806 | if (!TypeOrErr) { |
807 | auto Err = TypeOrErr.takeError(); |
808 | Warning(Msg: "failed to get symbol type: " + toString(E: std::move(Err)), |
809 | File: MainBinary.getFileName()); |
810 | continue; |
811 | } |
812 | SymbolRef::Type Type = *TypeOrErr; |
813 | // Skip undefined and STAB entries. |
814 | if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown)) |
815 | continue; |
816 | // In theory, the only symbols of interest are the global variables. These |
817 | // are the only ones that need to be queried because the address of common |
818 | // data won't be described in the debug map. All other addresses should be |
819 | // fetched for the debug map. In reality, by playing with 'ld -r' and |
820 | // export lists, you can get symbols described as N_GSYM in the debug map, |
821 | // but associated with a local symbol. Gather all the symbols, but prefer |
822 | // the global ones. |
823 | uint8_t SymType = |
824 | MainBinary.getSymbolTableEntry(DRI: Sym.getRawDataRefImpl()).n_type; |
825 | bool Extern = SymType & (MachO::N_EXT | MachO::N_PEXT); |
826 | Expected<section_iterator> SectionOrErr = Sym.getSection(); |
827 | if (!SectionOrErr) { |
828 | auto Err = TypeOrErr.takeError(); |
829 | Warning(Msg: "failed to get symbol section: " + toString(E: std::move(Err)), |
830 | File: MainBinary.getFileName()); |
831 | continue; |
832 | } |
833 | Section = *SectionOrErr; |
834 | if ((Section == MainBinary.section_end() || Section->isText()) && !Extern) |
835 | continue; |
836 | uint64_t Addr = cantFail(ValOrErr: Sym.getValue()); |
837 | Expected<StringRef> NameOrErr = Sym.getName(); |
838 | if (!NameOrErr) { |
839 | auto Err = NameOrErr.takeError(); |
840 | Warning(Msg: "failed to get symbol name: " + toString(E: std::move(Err)), |
841 | File: MainBinary.getFileName()); |
842 | continue; |
843 | } |
844 | StringRef Name = *NameOrErr; |
845 | if (Name.size() == 0 || Name[0] == '\0') |
846 | continue; |
847 | // Override only if the new key is global. |
848 | if (Extern) |
849 | MainBinarySymbolAddresses[Name] = Addr; |
850 | else |
851 | MainBinarySymbolAddresses.try_emplace(Key: Name, Args&: Addr); |
852 | } |
853 | } |
854 | |
855 | namespace llvm { |
856 | namespace dsymutil { |
857 | llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>> |
858 | parseDebugMap(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, |
859 | StringRef InputFile, ArrayRef<std::string> Archs, |
860 | ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, |
861 | StringRef VariantSuffix, bool Verbose, bool InputIsYAML) { |
862 | if (InputIsYAML) |
863 | return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose); |
864 | |
865 | MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, |
866 | PrependPath, VariantSuffix, Verbose); |
867 | |
868 | return Parser.parse(); |
869 | } |
870 | |
871 | bool dumpStab(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, |
872 | StringRef InputFile, ArrayRef<std::string> Archs, |
873 | ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, |
874 | StringRef VariantSuffix) { |
875 | MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, |
876 | PrependPath, VariantSuffix, false); |
877 | return Parser.dumpStab(); |
878 | } |
879 | } // namespace dsymutil |
880 | } // namespace llvm |
881 | |