1//===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "obj2yaml.h"
10#include "llvm/DebugInfo/DWARF/DWARFContext.h"
11#include "llvm/Object/MachOUniversal.h"
12#include "llvm/ObjectYAML/DWARFYAML.h"
13#include "llvm/ObjectYAML/ObjectYAML.h"
14#include "llvm/Support/Errc.h"
15#include "llvm/Support/Error.h"
16#include "llvm/Support/ErrorHandling.h"
17#include "llvm/Support/LEB128.h"
18#include "llvm/Support/SystemZ/zOSSupport.h"
19
20#include <string.h> // for memcpy
21
22using namespace llvm;
23
24class MachODumper {
25
26 template <typename StructType>
27 Expected<const char *> processLoadCommandData(
28 MachOYAML::LoadCommand &LC,
29 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
30 MachOYAML::Object &Y);
31
32 const object::MachOObjectFile &Obj;
33 std::unique_ptr<DWARFContext> DWARFCtx;
34 unsigned RawSegment;
35 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
36 Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
37 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
38 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
39 void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
40 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
41 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
42 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
43 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
44 void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
45 void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y);
46 void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y);
47
48 template <typename SectionType>
49 Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
50 size_t SecIndex);
51 template <typename SectionType>
52 Expected<MachOYAML::Section> constructSection(SectionType Sec,
53 size_t SecIndex);
54 template <typename SectionType, typename SegmentType>
55 Expected<const char *>
56 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
57 std::vector<MachOYAML::Section> &Sections,
58 MachOYAML::Object &Y);
59
60public:
61 MachODumper(const object::MachOObjectFile &O,
62 std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
63 : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
64 Expected<std::unique_ptr<MachOYAML::Object>> dump();
65};
66
67#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
68 case MachO::LCName: \
69 memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
70 sizeof(MachO::LCStruct)); \
71 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
72 MachO::swapStruct(LC.Data.LCStruct##_data); \
73 if (Expected<const char *> ExpectedEndPtr = \
74 processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \
75 EndPtr = *ExpectedEndPtr; \
76 else \
77 return ExpectedEndPtr.takeError(); \
78 break;
79
80template <typename SectionType>
81Expected<MachOYAML::Section>
82MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
83 MachOYAML::Section TempSec;
84 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
85 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
86 TempSec.addr = Sec.addr;
87 TempSec.size = Sec.size;
88 TempSec.offset = Sec.offset;
89 TempSec.align = Sec.align;
90 TempSec.reloff = Sec.reloff;
91 TempSec.nreloc = Sec.nreloc;
92 TempSec.flags = Sec.flags;
93 TempSec.reserved1 = Sec.reserved1;
94 TempSec.reserved2 = Sec.reserved2;
95 TempSec.reserved3 = 0;
96 if (!MachO::isVirtualSection(type: Sec.flags & MachO::SECTION_TYPE))
97 TempSec.content =
98 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
99
100 if (Expected<object::SectionRef> SecRef = Obj.getSection(SectionIndex: SecIndex)) {
101 TempSec.relocations.reserve(n: TempSec.nreloc);
102 for (const object::RelocationRef &Reloc : SecRef->relocations()) {
103 const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
104 const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
105 MachOYAML::Relocation R;
106 R.address = Obj.getAnyRelocationAddress(RE);
107 R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
108 R.length = Obj.getAnyRelocationLength(RE);
109 R.type = Obj.getAnyRelocationType(RE);
110 R.is_scattered = Obj.isRelocationScattered(RE);
111 R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
112 R.is_extern =
113 (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
114 R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
115 TempSec.relocations.push_back(x: R);
116 }
117 } else {
118 return SecRef.takeError();
119 }
120 return TempSec;
121}
122
123template <>
124Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
125 size_t SecIndex) {
126 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
127 if (TempSec)
128 TempSec->reserved3 = 0;
129 return TempSec;
130}
131
132template <>
133Expected<MachOYAML::Section>
134MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
135 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
136 if (TempSec)
137 TempSec->reserved3 = Sec.reserved3;
138 return TempSec;
139}
140
141static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
142 DWARFYAML::Data &DWARF) {
143 if (SecName == "__debug_abbrev")
144 return dumpDebugAbbrev(DCtx, Y&: DWARF);
145 if (SecName == "__debug_aranges")
146 return dumpDebugARanges(DCtx, Y&: DWARF);
147 if (SecName == "__debug_info") {
148 dumpDebugInfo(DCtx, Y&: DWARF);
149 return Error::success();
150 }
151 if (SecName == "__debug_line") {
152 dumpDebugLines(DCtx, Y&: DWARF);
153 return Error::success();
154 }
155 if (SecName.starts_with(Prefix: "__debug_pub")) {
156 // FIXME: We should extract pub-section dumpers from this function.
157 dumpDebugPubSections(DCtx, Y&: DWARF);
158 return Error::success();
159 }
160 if (SecName == "__debug_ranges")
161 return dumpDebugRanges(DCtx, Y&: DWARF);
162 if (SecName == "__debug_str")
163 return dumpDebugStrings(DCtx, Y&: DWARF);
164 return createStringError(EC: errc::not_supported,
165 S: "dumping " + SecName + " section is not supported");
166}
167
168template <typename SectionType, typename SegmentType>
169Expected<const char *> MachODumper::extractSections(
170 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
171 std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
172 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
173 const SectionType *Curr =
174 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
175 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
176 SectionType Sec;
177 memcpy((void *)&Sec, Curr, sizeof(SectionType));
178 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
179 MachO::swapStruct(Sec);
180 // For MachO section indices start from 1.
181 if (Expected<MachOYAML::Section> S =
182 constructSection(Sec, Sections.size() + 1)) {
183 StringRef SecName(S->sectname);
184
185 // Copy data sections if requested.
186 if ((RawSegment & ::RawSegments::data) &&
187 StringRef(S->segname).starts_with(Prefix: "__DATA"))
188 S->content =
189 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
190
191 if (SecName.starts_with(Prefix: "__debug_")) {
192 // If the DWARF section cannot be successfully parsed, emit raw content
193 // instead of an entry in the DWARF section of the YAML.
194 if (Error Err = dumpDebugSection(SecName, DCtx&: *DWARFCtx, DWARF&: Y.DWARF))
195 consumeError(Err: std::move(Err));
196 else
197 S->content.reset();
198 }
199 Sections.push_back(x: std::move(*S));
200 } else
201 return S.takeError();
202 }
203 return reinterpret_cast<const char *>(Curr);
204}
205
206template <typename StructType>
207Expected<const char *> MachODumper::processLoadCommandData(
208 MachOYAML::LoadCommand &LC,
209 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
210 MachOYAML::Object &Y) {
211 return LoadCmd.Ptr + sizeof(StructType);
212}
213
214template <>
215Expected<const char *>
216MachODumper::processLoadCommandData<MachO::segment_command>(
217 MachOYAML::LoadCommand &LC,
218 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
219 MachOYAML::Object &Y) {
220 return extractSections<MachO::section, MachO::segment_command>(
221 LoadCmd, Sections&: LC.Sections, Y);
222}
223
224template <>
225Expected<const char *>
226MachODumper::processLoadCommandData<MachO::segment_command_64>(
227 MachOYAML::LoadCommand &LC,
228 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
229 MachOYAML::Object &Y) {
230 return extractSections<MachO::section_64, MachO::segment_command_64>(
231 LoadCmd, Sections&: LC.Sections, Y);
232}
233
234template <typename StructType>
235const char *
236readString(MachOYAML::LoadCommand &LC,
237 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
238 auto Start = LoadCmd.Ptr + sizeof(StructType);
239 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
240 auto Size = strnlen(string: Start, maxlen: MaxSize);
241 LC.Content = StringRef(Start, Size).str();
242 return Start + Size;
243}
244
245template <>
246Expected<const char *>
247MachODumper::processLoadCommandData<MachO::dylib_command>(
248 MachOYAML::LoadCommand &LC,
249 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
250 MachOYAML::Object &Y) {
251 return readString<MachO::dylib_command>(LC, LoadCmd);
252}
253
254template <>
255Expected<const char *>
256MachODumper::processLoadCommandData<MachO::dylinker_command>(
257 MachOYAML::LoadCommand &LC,
258 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
259 MachOYAML::Object &Y) {
260 return readString<MachO::dylinker_command>(LC, LoadCmd);
261}
262
263template <>
264Expected<const char *>
265MachODumper::processLoadCommandData<MachO::rpath_command>(
266 MachOYAML::LoadCommand &LC,
267 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
268 MachOYAML::Object &Y) {
269 return readString<MachO::rpath_command>(LC, LoadCmd);
270}
271
272template <>
273Expected<const char *>
274MachODumper::processLoadCommandData<MachO::build_version_command>(
275 MachOYAML::LoadCommand &LC,
276 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
277 MachOYAML::Object &Y) {
278 auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
279 auto NTools = LC.Data.build_version_command_data.ntools;
280 for (unsigned i = 0; i < NTools; ++i) {
281 auto Curr = Start + i * sizeof(MachO::build_tool_version);
282 MachO::build_tool_version BV;
283 memcpy(dest: (void *)&BV, src: Curr, n: sizeof(MachO::build_tool_version));
284 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
285 MachO::swapStruct(C&: BV);
286 LC.Tools.push_back(x: BV);
287 }
288 return Start + NTools * sizeof(MachO::build_tool_version);
289}
290
291Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
292 auto Y = std::make_unique<MachOYAML::Object>();
293 Y->IsLittleEndian = Obj.isLittleEndian();
294 dumpHeader(Y);
295 if (Error Err = dumpLoadCommands(Y))
296 return std::move(Err);
297 if (RawSegment & ::RawSegments::linkedit)
298 Y->RawLinkEditSegment =
299 yaml::BinaryRef(Obj.getSegmentContents(SegmentName: "__LINKEDIT"));
300 else
301 dumpLinkEdit(Y);
302
303 return std::move(Y);
304}
305
306void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
307 Y->Header.magic = Obj.getHeader().magic;
308 Y->Header.cputype = Obj.getHeader().cputype;
309 Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
310 Y->Header.filetype = Obj.getHeader().filetype;
311 Y->Header.ncmds = Obj.getHeader().ncmds;
312 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
313 Y->Header.flags = Obj.getHeader().flags;
314 Y->Header.reserved = 0;
315}
316
317Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
318 for (auto LoadCmd : Obj.load_commands()) {
319 MachOYAML::LoadCommand LC;
320 const char *EndPtr = LoadCmd.Ptr;
321 switch (LoadCmd.C.cmd) {
322 default:
323 memcpy(dest: (void *)&(LC.Data.load_command_data), src: LoadCmd.Ptr,
324 n: sizeof(MachO::load_command));
325 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
326 MachO::swapStruct(lc&: LC.Data.load_command_data);
327 if (Expected<const char *> ExpectedEndPtr =
328 processLoadCommandData<MachO::load_command>(LC, LoadCmd, Y&: *Y))
329 EndPtr = *ExpectedEndPtr;
330 else
331 return ExpectedEndPtr.takeError();
332 break;
333#include "llvm/BinaryFormat/MachO.def"
334 }
335 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
336 if (!std::all_of(first: EndPtr, last: &EndPtr[RemainingBytes],
337 pred: [](const char C) { return C == 0; })) {
338 LC.PayloadBytes.insert(position: LC.PayloadBytes.end(), first: EndPtr,
339 last: &EndPtr[RemainingBytes]);
340 RemainingBytes = 0;
341 }
342 LC.ZeroPadBytes = RemainingBytes;
343 Y->LoadCommands.push_back(x: std::move(LC));
344 }
345 return Error::success();
346}
347
348void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
349 dumpRebaseOpcodes(Y);
350 dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.BindOpcodes, OpcodeBuffer: Obj.getDyldInfoBindOpcodes());
351 dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.WeakBindOpcodes,
352 OpcodeBuffer: Obj.getDyldInfoWeakBindOpcodes());
353 dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.LazyBindOpcodes, OpcodeBuffer: Obj.getDyldInfoLazyBindOpcodes(),
354 Lazy: true);
355 dumpExportTrie(Y);
356 dumpSymbols(Y);
357 dumpIndirectSymbols(Y);
358 dumpFunctionStarts(Y);
359 dumpChainedFixups(Y);
360 dumpDataInCode(Y);
361}
362
363void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
364 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
365
366 auto FunctionStarts = Obj.getFunctionStarts();
367 for (auto Addr : FunctionStarts)
368 LEData.FunctionStarts.push_back(x: Addr);
369}
370
371void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
372 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
373
374 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
375 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
376 ++OpCode) {
377 MachOYAML::RebaseOpcode RebaseOp;
378 RebaseOp.Opcode =
379 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
380 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
381
382 unsigned Count;
383 uint64_t ULEB = 0;
384
385 switch (RebaseOp.Opcode) {
386 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
387
388 ULEB = decodeULEB128(p: OpCode + 1, n: &Count);
389 RebaseOp.ExtraData.push_back(x: ULEB);
390 OpCode += Count;
391 [[fallthrough]];
392 // Intentionally no break here -- This opcode has two ULEB values
393 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
394 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
395 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
396 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
397
398 ULEB = decodeULEB128(p: OpCode + 1, n: &Count);
399 RebaseOp.ExtraData.push_back(x: ULEB);
400 OpCode += Count;
401 break;
402 default:
403 break;
404 }
405
406 LEData.RebaseOpcodes.push_back(x: RebaseOp);
407
408 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
409 break;
410 }
411}
412
413StringRef ReadStringRef(const uint8_t *Start) {
414 const uint8_t *Itr = Start;
415 for (; *Itr; ++Itr)
416 ;
417 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
418}
419
420void MachODumper::dumpBindOpcodes(
421 std::vector<MachOYAML::BindOpcode> &BindOpcodes,
422 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
423 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
424 ++OpCode) {
425 MachOYAML::BindOpcode BindOp;
426 BindOp.Opcode =
427 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
428 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
429
430 unsigned Count;
431 uint64_t ULEB = 0;
432 int64_t SLEB = 0;
433
434 switch (BindOp.Opcode) {
435 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
436 ULEB = decodeULEB128(p: OpCode + 1, n: &Count);
437 BindOp.ULEBExtraData.push_back(x: ULEB);
438 OpCode += Count;
439 [[fallthrough]];
440 // Intentionally no break here -- this opcode has two ULEB values
441
442 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
443 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
444 case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
445 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
446 ULEB = decodeULEB128(p: OpCode + 1, n: &Count);
447 BindOp.ULEBExtraData.push_back(x: ULEB);
448 OpCode += Count;
449 break;
450
451 case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
452 SLEB = decodeSLEB128(p: OpCode + 1, n: &Count);
453 BindOp.SLEBExtraData.push_back(x: SLEB);
454 OpCode += Count;
455 break;
456
457 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
458 BindOp.Symbol = ReadStringRef(Start: OpCode + 1);
459 OpCode += BindOp.Symbol.size() + 1;
460 break;
461 default:
462 break;
463 }
464
465 BindOpcodes.push_back(x: BindOp);
466
467 // Lazy bindings have DONE opcodes between operations, so we need to keep
468 // processing after a DONE.
469 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
470 break;
471 }
472}
473
474/*!
475 * /brief processes a node from the export trie, and its children.
476 *
477 * To my knowledge there is no documentation of the encoded format of this data
478 * other than in the heads of the Apple linker engineers. To that end hopefully
479 * this comment and the implementation below can serve to light the way for
480 * anyone crazy enough to come down this path in the future.
481 *
482 * This function reads and preserves the trie structure of the export trie. To
483 * my knowledge there is no code anywhere else that reads the data and preserves
484 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
485 * implementation that parses the export trie into a vector. That code as well
486 * as LLVM's libObject MachO implementation were the basis for this.
487 *
488 * The export trie is an encoded trie. The node serialization is a bit awkward.
489 * The below pseudo-code is the best description I've come up with for it.
490 *
491 * struct SerializedNode {
492 * ULEB128 TerminalSize;
493 * struct TerminalData { <-- This is only present if TerminalSize > 0
494 * ULEB128 Flags;
495 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
496 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
497 * Flags & STUB_AND_RESOLVER )
498 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
499 * }
500 * uint8_t ChildrenCount;
501 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
502 * SerializedNode Children[ChildrenCount]
503 * }
504 *
505 * Terminal nodes are nodes that represent actual exports. They can appear
506 * anywhere in the tree other than at the root; they do not need to be leaf
507 * nodes. When reading the data out of the trie this routine reads it in-order,
508 * but it puts the child names and offsets directly into the child nodes. This
509 * results in looping over the children twice during serialization and
510 * de-serialization, but it makes the YAML representation more human readable.
511 *
512 * Below is an example of the graph from a "Hello World" executable:
513 *
514 * -------
515 * | '' |
516 * -------
517 * |
518 * -------
519 * | '_' |
520 * -------
521 * |
522 * |----------------------------------------|
523 * | |
524 * ------------------------ ---------------------
525 * | '_mh_execute_header' | | 'main' |
526 * | Flags: 0x00000000 | | Flags: 0x00000000 |
527 * | Addr: 0x00000000 | | Addr: 0x00001160 |
528 * ------------------------ ---------------------
529 *
530 * This graph represents the trie for the exports "__mh_execute_header" and
531 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
532 * terminal.
533*/
534
535const uint8_t *processExportNode(const uint8_t *Start, const uint8_t *CurrPtr,
536 const uint8_t *const End,
537 MachOYAML::ExportEntry &Entry) {
538 if (CurrPtr >= End)
539 return CurrPtr;
540 unsigned Count = 0;
541 Entry.TerminalSize = decodeULEB128(p: CurrPtr, n: &Count);
542 CurrPtr += Count;
543 if (Entry.TerminalSize != 0) {
544 Entry.Flags = decodeULEB128(p: CurrPtr, n: &Count);
545 CurrPtr += Count;
546 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
547 Entry.Address = 0;
548 Entry.Other = decodeULEB128(p: CurrPtr, n: &Count);
549 CurrPtr += Count;
550 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
551 } else {
552 Entry.Address = decodeULEB128(p: CurrPtr, n: &Count);
553 CurrPtr += Count;
554 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
555 Entry.Other = decodeULEB128(p: CurrPtr, n: &Count);
556 CurrPtr += Count;
557 } else
558 Entry.Other = 0;
559 }
560 }
561 uint8_t childrenCount = *CurrPtr++;
562 if (childrenCount == 0)
563 return CurrPtr;
564
565 Entry.Children.insert(position: Entry.Children.begin(), n: (size_t)childrenCount,
566 x: MachOYAML::ExportEntry());
567 for (auto &Child : Entry.Children) {
568 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
569 CurrPtr += Child.Name.length() + 1;
570 Child.NodeOffset = decodeULEB128(p: CurrPtr, n: &Count);
571 CurrPtr += Count;
572 }
573 for (auto &Child : Entry.Children) {
574 CurrPtr = processExportNode(Start, CurrPtr: Start + Child.NodeOffset, End, Entry&: Child);
575 }
576 return CurrPtr;
577}
578
579void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
580 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
581 // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE
582 auto ExportsTrie = Obj.getDyldInfoExportsTrie();
583 if (ExportsTrie.empty())
584 ExportsTrie = Obj.getDyldExportsTrie();
585 processExportNode(Start: ExportsTrie.begin(), CurrPtr: ExportsTrie.begin(), End: ExportsTrie.end(),
586 Entry&: LEData.ExportTrie);
587}
588
589template <typename nlist_t>
590MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
591 MachOYAML::NListEntry NL;
592 NL.n_strx = nlist.n_strx;
593 NL.n_type = nlist.n_type;
594 NL.n_sect = nlist.n_sect;
595 NL.n_desc = nlist.n_desc;
596 NL.n_value = nlist.n_value;
597 return NL;
598}
599
600void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
601 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
602
603 for (auto Symbol : Obj.symbols()) {
604 MachOYAML::NListEntry NLE =
605 Obj.is64Bit()
606 ? constructNameList<MachO::nlist_64>(
607 nlist: Obj.getSymbol64TableEntry(DRI: Symbol.getRawDataRefImpl()))
608 : constructNameList<MachO::nlist>(
609 nlist: Obj.getSymbolTableEntry(DRI: Symbol.getRawDataRefImpl()));
610 LEData.NameList.push_back(x: NLE);
611 }
612
613 StringRef RemainingTable = Obj.getStringTableData();
614 while (RemainingTable.size() > 0) {
615 auto SymbolPair = RemainingTable.split(Separator: '\0');
616 RemainingTable = SymbolPair.second;
617 LEData.StringTable.push_back(x: SymbolPair.first);
618 }
619}
620
621void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
622 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
623
624 MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
625 for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
626 LEData.IndirectSymbols.push_back(x: Obj.getIndirectSymbolTableEntry(DLC, Index: i));
627}
628
629void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) {
630 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
631
632 for (const auto &LC : Y->LoadCommands) {
633 if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) {
634 const MachO::linkedit_data_command &DC =
635 LC.Data.linkedit_data_command_data;
636 if (DC.dataoff) {
637 assert(DC.dataoff < Obj.getData().size());
638 assert(DC.dataoff + DC.datasize <= Obj.getData().size());
639 const char *Bytes = Obj.getData().data() + DC.dataoff;
640 for (size_t Idx = 0; Idx < DC.datasize; Idx++) {
641 LEData.ChainedFixups.push_back(x: Bytes[Idx]);
642 }
643 }
644 break;
645 }
646 }
647}
648
649void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) {
650 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
651
652 MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand();
653 uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry);
654 for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) {
655 MachO::data_in_code_entry DICE =
656 Obj.getDataInCodeTableEntry(DataOffset: DIC.dataoff, Index: Idx);
657 MachOYAML::DataInCodeEntry Entry{.Offset: DICE.offset, .Length: DICE.length, .Kind: DICE.kind};
658 LEData.DataInCode.emplace_back(args&: Entry);
659 }
660}
661
662Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
663 unsigned RawSegments) {
664 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
665 MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
666 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
667 if (!YAML)
668 return YAML.takeError();
669
670 yaml::YamlObjectFile YAMLFile;
671 YAMLFile.MachO = std::move(YAML.get());
672
673 yaml::Output Yout(Out);
674 Yout << YAMLFile;
675 return Error::success();
676}
677
678Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
679 unsigned RawSegments) {
680 yaml::YamlObjectFile YAMLFile;
681 YAMLFile.FatMachO.reset(p: new MachOYAML::UniversalBinary());
682 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
683 YAML.Header.magic = Obj.getMagic();
684 YAML.Header.nfat_arch = Obj.getNumberOfObjects();
685
686 for (auto Slice : Obj.objects()) {
687 MachOYAML::FatArch arch;
688 arch.cputype = Slice.getCPUType();
689 arch.cpusubtype = Slice.getCPUSubType();
690 arch.offset = Slice.getOffset();
691 arch.size = Slice.getSize();
692 arch.align = Slice.getAlign();
693 arch.reserved = Slice.getReserved();
694 YAML.FatArchs.push_back(x: arch);
695
696 auto SliceObj = Slice.getAsObjectFile();
697 if (!SliceObj)
698 return SliceObj.takeError();
699
700 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj: *SliceObj.get());
701 MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
702 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
703 if (!YAMLObj)
704 return YAMLObj.takeError();
705 YAML.Slices.push_back(x: *YAMLObj.get());
706 }
707
708 yaml::Output Yout(Out);
709 Yout << YAML;
710 return Error::success();
711}
712
713Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
714 unsigned RawSegments) {
715 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(Val: &Binary))
716 return macho2yaml(Out, Obj: *MachOObj, RawSegments);
717
718 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(Val: &Binary))
719 return macho2yaml(Out, Obj: *MachOObj, RawSegments);
720
721 llvm_unreachable("unexpected Mach-O file format");
722}
723