1 | //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "obj2yaml.h" |
10 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
11 | #include "llvm/Object/MachOUniversal.h" |
12 | #include "llvm/ObjectYAML/DWARFYAML.h" |
13 | #include "llvm/ObjectYAML/ObjectYAML.h" |
14 | #include "llvm/Support/Errc.h" |
15 | #include "llvm/Support/Error.h" |
16 | #include "llvm/Support/ErrorHandling.h" |
17 | #include "llvm/Support/LEB128.h" |
18 | #include "llvm/Support/SystemZ/zOSSupport.h" |
19 | |
20 | #include <string.h> // for memcpy |
21 | |
22 | using namespace llvm; |
23 | |
24 | class MachODumper { |
25 | |
26 | template <typename StructType> |
27 | Expected<const char *> processLoadCommandData( |
28 | MachOYAML::LoadCommand &LC, |
29 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
30 | MachOYAML::Object &Y); |
31 | |
32 | const object::MachOObjectFile &Obj; |
33 | std::unique_ptr<DWARFContext> DWARFCtx; |
34 | unsigned RawSegment; |
35 | void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); |
36 | Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); |
37 | void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); |
38 | void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); |
39 | void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y); |
40 | void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, |
41 | ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); |
42 | void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); |
43 | void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); |
44 | void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y); |
45 | void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y); |
46 | void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y); |
47 | |
48 | template <typename SectionType> |
49 | Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec, |
50 | size_t SecIndex); |
51 | template <typename SectionType> |
52 | Expected<MachOYAML::Section> constructSection(SectionType Sec, |
53 | size_t SecIndex); |
54 | template <typename SectionType, typename SegmentType> |
55 | Expected<const char *> |
56 | extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
57 | std::vector<MachOYAML::Section> &Sections, |
58 | MachOYAML::Object &Y); |
59 | |
60 | public: |
61 | MachODumper(const object::MachOObjectFile &O, |
62 | std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments) |
63 | : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {} |
64 | Expected<std::unique_ptr<MachOYAML::Object>> dump(); |
65 | }; |
66 | |
67 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
68 | case MachO::LCName: \ |
69 | memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \ |
70 | sizeof(MachO::LCStruct)); \ |
71 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ |
72 | MachO::swapStruct(LC.Data.LCStruct##_data); \ |
73 | if (Expected<const char *> ExpectedEndPtr = \ |
74 | processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \ |
75 | EndPtr = *ExpectedEndPtr; \ |
76 | else \ |
77 | return ExpectedEndPtr.takeError(); \ |
78 | break; |
79 | |
80 | template <typename SectionType> |
81 | Expected<MachOYAML::Section> |
82 | MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) { |
83 | MachOYAML::Section TempSec; |
84 | memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); |
85 | memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); |
86 | TempSec.addr = Sec.addr; |
87 | TempSec.size = Sec.size; |
88 | TempSec.offset = Sec.offset; |
89 | TempSec.align = Sec.align; |
90 | TempSec.reloff = Sec.reloff; |
91 | TempSec.nreloc = Sec.nreloc; |
92 | TempSec.flags = Sec.flags; |
93 | TempSec.reserved1 = Sec.reserved1; |
94 | TempSec.reserved2 = Sec.reserved2; |
95 | TempSec.reserved3 = 0; |
96 | if (!MachO::isVirtualSection(type: Sec.flags & MachO::SECTION_TYPE)) |
97 | TempSec.content = |
98 | yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); |
99 | |
100 | if (Expected<object::SectionRef> SecRef = Obj.getSection(SectionIndex: SecIndex)) { |
101 | TempSec.relocations.reserve(n: TempSec.nreloc); |
102 | for (const object::RelocationRef &Reloc : SecRef->relocations()) { |
103 | const object::DataRefImpl Rel = Reloc.getRawDataRefImpl(); |
104 | const MachO::any_relocation_info RE = Obj.getRelocation(Rel); |
105 | MachOYAML::Relocation R; |
106 | R.address = Obj.getAnyRelocationAddress(RE); |
107 | R.is_pcrel = Obj.getAnyRelocationPCRel(RE); |
108 | R.length = Obj.getAnyRelocationLength(RE); |
109 | R.type = Obj.getAnyRelocationType(RE); |
110 | R.is_scattered = Obj.isRelocationScattered(RE); |
111 | R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE)); |
112 | R.is_extern = |
113 | (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE)); |
114 | R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0); |
115 | TempSec.relocations.push_back(x: R); |
116 | } |
117 | } else { |
118 | return SecRef.takeError(); |
119 | } |
120 | return TempSec; |
121 | } |
122 | |
123 | template <> |
124 | Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec, |
125 | size_t SecIndex) { |
126 | Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); |
127 | if (TempSec) |
128 | TempSec->reserved3 = 0; |
129 | return TempSec; |
130 | } |
131 | |
132 | template <> |
133 | Expected<MachOYAML::Section> |
134 | MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) { |
135 | Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); |
136 | if (TempSec) |
137 | TempSec->reserved3 = Sec.reserved3; |
138 | return TempSec; |
139 | } |
140 | |
141 | static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, |
142 | DWARFYAML::Data &DWARF) { |
143 | if (SecName == "__debug_abbrev" ) |
144 | return dumpDebugAbbrev(DCtx, Y&: DWARF); |
145 | if (SecName == "__debug_aranges" ) |
146 | return dumpDebugARanges(DCtx, Y&: DWARF); |
147 | if (SecName == "__debug_info" ) { |
148 | dumpDebugInfo(DCtx, Y&: DWARF); |
149 | return Error::success(); |
150 | } |
151 | if (SecName == "__debug_line" ) { |
152 | dumpDebugLines(DCtx, Y&: DWARF); |
153 | return Error::success(); |
154 | } |
155 | if (SecName.starts_with(Prefix: "__debug_pub" )) { |
156 | // FIXME: We should extract pub-section dumpers from this function. |
157 | dumpDebugPubSections(DCtx, Y&: DWARF); |
158 | return Error::success(); |
159 | } |
160 | if (SecName == "__debug_ranges" ) |
161 | return dumpDebugRanges(DCtx, Y&: DWARF); |
162 | if (SecName == "__debug_str" ) |
163 | return dumpDebugStrings(DCtx, Y&: DWARF); |
164 | return createStringError(EC: errc::not_supported, |
165 | S: "dumping " + SecName + " section is not supported" ); |
166 | } |
167 | |
168 | template <typename SectionType, typename SegmentType> |
169 | Expected<const char *> MachODumper::extractSections( |
170 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
171 | std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) { |
172 | auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; |
173 | const SectionType *Curr = |
174 | reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); |
175 | for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { |
176 | SectionType Sec; |
177 | memcpy((void *)&Sec, Curr, sizeof(SectionType)); |
178 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
179 | MachO::swapStruct(Sec); |
180 | // For MachO section indices start from 1. |
181 | if (Expected<MachOYAML::Section> S = |
182 | constructSection(Sec, Sections.size() + 1)) { |
183 | StringRef SecName(S->sectname); |
184 | |
185 | // Copy data sections if requested. |
186 | if ((RawSegment & ::RawSegments::data) && |
187 | StringRef(S->segname).starts_with(Prefix: "__DATA" )) |
188 | S->content = |
189 | yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); |
190 | |
191 | if (SecName.starts_with(Prefix: "__debug_" )) { |
192 | // If the DWARF section cannot be successfully parsed, emit raw content |
193 | // instead of an entry in the DWARF section of the YAML. |
194 | if (Error Err = dumpDebugSection(SecName, DCtx&: *DWARFCtx, DWARF&: Y.DWARF)) |
195 | consumeError(Err: std::move(Err)); |
196 | else |
197 | S->content.reset(); |
198 | } |
199 | Sections.push_back(x: std::move(*S)); |
200 | } else |
201 | return S.takeError(); |
202 | } |
203 | return reinterpret_cast<const char *>(Curr); |
204 | } |
205 | |
206 | template <typename StructType> |
207 | Expected<const char *> MachODumper::processLoadCommandData( |
208 | MachOYAML::LoadCommand &LC, |
209 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
210 | MachOYAML::Object &Y) { |
211 | return LoadCmd.Ptr + sizeof(StructType); |
212 | } |
213 | |
214 | template <> |
215 | Expected<const char *> |
216 | MachODumper::processLoadCommandData<MachO::segment_command>( |
217 | MachOYAML::LoadCommand &LC, |
218 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
219 | MachOYAML::Object &Y) { |
220 | return extractSections<MachO::section, MachO::segment_command>( |
221 | LoadCmd, Sections&: LC.Sections, Y); |
222 | } |
223 | |
224 | template <> |
225 | Expected<const char *> |
226 | MachODumper::processLoadCommandData<MachO::segment_command_64>( |
227 | MachOYAML::LoadCommand &LC, |
228 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
229 | MachOYAML::Object &Y) { |
230 | return extractSections<MachO::section_64, MachO::segment_command_64>( |
231 | LoadCmd, Sections&: LC.Sections, Y); |
232 | } |
233 | |
234 | template <typename StructType> |
235 | const char * |
236 | readString(MachOYAML::LoadCommand &LC, |
237 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { |
238 | auto Start = LoadCmd.Ptr + sizeof(StructType); |
239 | auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); |
240 | auto Size = strnlen(string: Start, maxlen: MaxSize); |
241 | LC.Content = StringRef(Start, Size).str(); |
242 | return Start + Size; |
243 | } |
244 | |
245 | template <> |
246 | Expected<const char *> |
247 | MachODumper::processLoadCommandData<MachO::dylib_command>( |
248 | MachOYAML::LoadCommand &LC, |
249 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
250 | MachOYAML::Object &Y) { |
251 | return readString<MachO::dylib_command>(LC, LoadCmd); |
252 | } |
253 | |
254 | template <> |
255 | Expected<const char *> |
256 | MachODumper::processLoadCommandData<MachO::dylinker_command>( |
257 | MachOYAML::LoadCommand &LC, |
258 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
259 | MachOYAML::Object &Y) { |
260 | return readString<MachO::dylinker_command>(LC, LoadCmd); |
261 | } |
262 | |
263 | template <> |
264 | Expected<const char *> |
265 | MachODumper::processLoadCommandData<MachO::rpath_command>( |
266 | MachOYAML::LoadCommand &LC, |
267 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
268 | MachOYAML::Object &Y) { |
269 | return readString<MachO::rpath_command>(LC, LoadCmd); |
270 | } |
271 | |
272 | template <> |
273 | Expected<const char *> |
274 | MachODumper::processLoadCommandData<MachO::build_version_command>( |
275 | MachOYAML::LoadCommand &LC, |
276 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
277 | MachOYAML::Object &Y) { |
278 | auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command); |
279 | auto NTools = LC.Data.build_version_command_data.ntools; |
280 | for (unsigned i = 0; i < NTools; ++i) { |
281 | auto Curr = Start + i * sizeof(MachO::build_tool_version); |
282 | MachO::build_tool_version BV; |
283 | memcpy(dest: (void *)&BV, src: Curr, n: sizeof(MachO::build_tool_version)); |
284 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
285 | MachO::swapStruct(C&: BV); |
286 | LC.Tools.push_back(x: BV); |
287 | } |
288 | return Start + NTools * sizeof(MachO::build_tool_version); |
289 | } |
290 | |
291 | Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { |
292 | auto Y = std::make_unique<MachOYAML::Object>(); |
293 | Y->IsLittleEndian = Obj.isLittleEndian(); |
294 | dumpHeader(Y); |
295 | if (Error Err = dumpLoadCommands(Y)) |
296 | return std::move(Err); |
297 | if (RawSegment & ::RawSegments::linkedit) |
298 | Y->RawLinkEditSegment = |
299 | yaml::BinaryRef(Obj.getSegmentContents(SegmentName: "__LINKEDIT" )); |
300 | else |
301 | dumpLinkEdit(Y); |
302 | |
303 | return std::move(Y); |
304 | } |
305 | |
306 | void MachODumper::(std::unique_ptr<MachOYAML::Object> &Y) { |
307 | Y->Header.magic = Obj.getHeader().magic; |
308 | Y->Header.cputype = Obj.getHeader().cputype; |
309 | Y->Header.cpusubtype = Obj.getHeader().cpusubtype; |
310 | Y->Header.filetype = Obj.getHeader().filetype; |
311 | Y->Header.ncmds = Obj.getHeader().ncmds; |
312 | Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; |
313 | Y->Header.flags = Obj.getHeader().flags; |
314 | Y->Header.reserved = 0; |
315 | } |
316 | |
317 | Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { |
318 | for (auto LoadCmd : Obj.load_commands()) { |
319 | MachOYAML::LoadCommand LC; |
320 | const char *EndPtr = LoadCmd.Ptr; |
321 | switch (LoadCmd.C.cmd) { |
322 | default: |
323 | memcpy(dest: (void *)&(LC.Data.load_command_data), src: LoadCmd.Ptr, |
324 | n: sizeof(MachO::load_command)); |
325 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
326 | MachO::swapStruct(lc&: LC.Data.load_command_data); |
327 | if (Expected<const char *> ExpectedEndPtr = |
328 | processLoadCommandData<MachO::load_command>(LC, LoadCmd, Y&: *Y)) |
329 | EndPtr = *ExpectedEndPtr; |
330 | else |
331 | return ExpectedEndPtr.takeError(); |
332 | break; |
333 | #include "llvm/BinaryFormat/MachO.def" |
334 | } |
335 | auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); |
336 | if (!std::all_of(first: EndPtr, last: &EndPtr[RemainingBytes], |
337 | pred: [](const char C) { return C == 0; })) { |
338 | LC.PayloadBytes.insert(position: LC.PayloadBytes.end(), first: EndPtr, |
339 | last: &EndPtr[RemainingBytes]); |
340 | RemainingBytes = 0; |
341 | } |
342 | LC.ZeroPadBytes = RemainingBytes; |
343 | Y->LoadCommands.push_back(x: std::move(LC)); |
344 | } |
345 | return Error::success(); |
346 | } |
347 | |
348 | void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { |
349 | dumpRebaseOpcodes(Y); |
350 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.BindOpcodes, OpcodeBuffer: Obj.getDyldInfoBindOpcodes()); |
351 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.WeakBindOpcodes, |
352 | OpcodeBuffer: Obj.getDyldInfoWeakBindOpcodes()); |
353 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.LazyBindOpcodes, OpcodeBuffer: Obj.getDyldInfoLazyBindOpcodes(), |
354 | Lazy: true); |
355 | dumpExportTrie(Y); |
356 | dumpSymbols(Y); |
357 | dumpIndirectSymbols(Y); |
358 | dumpFunctionStarts(Y); |
359 | dumpChainedFixups(Y); |
360 | dumpDataInCode(Y); |
361 | } |
362 | |
363 | void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) { |
364 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
365 | |
366 | auto FunctionStarts = Obj.getFunctionStarts(); |
367 | for (auto Addr : FunctionStarts) |
368 | LEData.FunctionStarts.push_back(x: Addr); |
369 | } |
370 | |
371 | void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { |
372 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
373 | |
374 | auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); |
375 | for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); |
376 | ++OpCode) { |
377 | MachOYAML::RebaseOpcode RebaseOp; |
378 | RebaseOp.Opcode = |
379 | static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); |
380 | RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; |
381 | |
382 | unsigned Count; |
383 | uint64_t ULEB = 0; |
384 | |
385 | switch (RebaseOp.Opcode) { |
386 | case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: |
387 | |
388 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
389 | RebaseOp.ExtraData.push_back(x: ULEB); |
390 | OpCode += Count; |
391 | [[fallthrough]]; |
392 | // Intentionally no break here -- This opcode has two ULEB values |
393 | case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: |
394 | case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: |
395 | case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: |
396 | case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: |
397 | |
398 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
399 | RebaseOp.ExtraData.push_back(x: ULEB); |
400 | OpCode += Count; |
401 | break; |
402 | default: |
403 | break; |
404 | } |
405 | |
406 | LEData.RebaseOpcodes.push_back(x: RebaseOp); |
407 | |
408 | if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) |
409 | break; |
410 | } |
411 | } |
412 | |
413 | StringRef ReadStringRef(const uint8_t *Start) { |
414 | const uint8_t *Itr = Start; |
415 | for (; *Itr; ++Itr) |
416 | ; |
417 | return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); |
418 | } |
419 | |
420 | void MachODumper::dumpBindOpcodes( |
421 | std::vector<MachOYAML::BindOpcode> &BindOpcodes, |
422 | ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { |
423 | for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); |
424 | ++OpCode) { |
425 | MachOYAML::BindOpcode BindOp; |
426 | BindOp.Opcode = |
427 | static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); |
428 | BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; |
429 | |
430 | unsigned Count; |
431 | uint64_t ULEB = 0; |
432 | int64_t SLEB = 0; |
433 | |
434 | switch (BindOp.Opcode) { |
435 | case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: |
436 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
437 | BindOp.ULEBExtraData.push_back(x: ULEB); |
438 | OpCode += Count; |
439 | [[fallthrough]]; |
440 | // Intentionally no break here -- this opcode has two ULEB values |
441 | |
442 | case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: |
443 | case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: |
444 | case MachO::BIND_OPCODE_ADD_ADDR_ULEB: |
445 | case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: |
446 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
447 | BindOp.ULEBExtraData.push_back(x: ULEB); |
448 | OpCode += Count; |
449 | break; |
450 | |
451 | case MachO::BIND_OPCODE_SET_ADDEND_SLEB: |
452 | SLEB = decodeSLEB128(p: OpCode + 1, n: &Count); |
453 | BindOp.SLEBExtraData.push_back(x: SLEB); |
454 | OpCode += Count; |
455 | break; |
456 | |
457 | case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: |
458 | BindOp.Symbol = ReadStringRef(Start: OpCode + 1); |
459 | OpCode += BindOp.Symbol.size() + 1; |
460 | break; |
461 | default: |
462 | break; |
463 | } |
464 | |
465 | BindOpcodes.push_back(x: BindOp); |
466 | |
467 | // Lazy bindings have DONE opcodes between operations, so we need to keep |
468 | // processing after a DONE. |
469 | if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) |
470 | break; |
471 | } |
472 | } |
473 | |
474 | /*! |
475 | * /brief processes a node from the export trie, and its children. |
476 | * |
477 | * To my knowledge there is no documentation of the encoded format of this data |
478 | * other than in the heads of the Apple linker engineers. To that end hopefully |
479 | * this comment and the implementation below can serve to light the way for |
480 | * anyone crazy enough to come down this path in the future. |
481 | * |
482 | * This function reads and preserves the trie structure of the export trie. To |
483 | * my knowledge there is no code anywhere else that reads the data and preserves |
484 | * the Trie. LD64 (sources available at opensource.apple.com) has a similar |
485 | * implementation that parses the export trie into a vector. That code as well |
486 | * as LLVM's libObject MachO implementation were the basis for this. |
487 | * |
488 | * The export trie is an encoded trie. The node serialization is a bit awkward. |
489 | * The below pseudo-code is the best description I've come up with for it. |
490 | * |
491 | * struct SerializedNode { |
492 | * ULEB128 TerminalSize; |
493 | * struct TerminalData { <-- This is only present if TerminalSize > 0 |
494 | * ULEB128 Flags; |
495 | * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) |
496 | * ULEB128 Other; <-- Present if ( Flags & REEXPORT || |
497 | * Flags & STUB_AND_RESOLVER ) |
498 | * char[] ImportName; <-- Present if ( Flags & REEXPORT ) |
499 | * } |
500 | * uint8_t ChildrenCount; |
501 | * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; |
502 | * SerializedNode Children[ChildrenCount] |
503 | * } |
504 | * |
505 | * Terminal nodes are nodes that represent actual exports. They can appear |
506 | * anywhere in the tree other than at the root; they do not need to be leaf |
507 | * nodes. When reading the data out of the trie this routine reads it in-order, |
508 | * but it puts the child names and offsets directly into the child nodes. This |
509 | * results in looping over the children twice during serialization and |
510 | * de-serialization, but it makes the YAML representation more human readable. |
511 | * |
512 | * Below is an example of the graph from a "Hello World" executable: |
513 | * |
514 | * ------- |
515 | * | '' | |
516 | * ------- |
517 | * | |
518 | * ------- |
519 | * | '_' | |
520 | * ------- |
521 | * | |
522 | * |----------------------------------------| |
523 | * | | |
524 | * ------------------------ --------------------- |
525 | * | '_mh_execute_header' | | 'main' | |
526 | * | Flags: 0x00000000 | | Flags: 0x00000000 | |
527 | * | Addr: 0x00000000 | | Addr: 0x00001160 | |
528 | * ------------------------ --------------------- |
529 | * |
530 | * This graph represents the trie for the exports "__mh_execute_header" and |
531 | * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are |
532 | * terminal. |
533 | */ |
534 | |
535 | const uint8_t *processExportNode(const uint8_t *Start, const uint8_t *CurrPtr, |
536 | const uint8_t *const End, |
537 | MachOYAML::ExportEntry &Entry) { |
538 | if (CurrPtr >= End) |
539 | return CurrPtr; |
540 | unsigned Count = 0; |
541 | Entry.TerminalSize = decodeULEB128(p: CurrPtr, n: &Count); |
542 | CurrPtr += Count; |
543 | if (Entry.TerminalSize != 0) { |
544 | Entry.Flags = decodeULEB128(p: CurrPtr, n: &Count); |
545 | CurrPtr += Count; |
546 | if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { |
547 | Entry.Address = 0; |
548 | Entry.Other = decodeULEB128(p: CurrPtr, n: &Count); |
549 | CurrPtr += Count; |
550 | Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); |
551 | } else { |
552 | Entry.Address = decodeULEB128(p: CurrPtr, n: &Count); |
553 | CurrPtr += Count; |
554 | if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { |
555 | Entry.Other = decodeULEB128(p: CurrPtr, n: &Count); |
556 | CurrPtr += Count; |
557 | } else |
558 | Entry.Other = 0; |
559 | } |
560 | } |
561 | uint8_t childrenCount = *CurrPtr++; |
562 | if (childrenCount == 0) |
563 | return CurrPtr; |
564 | |
565 | Entry.Children.insert(position: Entry.Children.begin(), n: (size_t)childrenCount, |
566 | x: MachOYAML::ExportEntry()); |
567 | for (auto &Child : Entry.Children) { |
568 | Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); |
569 | CurrPtr += Child.Name.length() + 1; |
570 | Child.NodeOffset = decodeULEB128(p: CurrPtr, n: &Count); |
571 | CurrPtr += Count; |
572 | } |
573 | for (auto &Child : Entry.Children) { |
574 | CurrPtr = processExportNode(Start, CurrPtr: Start + Child.NodeOffset, End, Entry&: Child); |
575 | } |
576 | return CurrPtr; |
577 | } |
578 | |
579 | void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { |
580 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
581 | // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE |
582 | auto ExportsTrie = Obj.getDyldInfoExportsTrie(); |
583 | if (ExportsTrie.empty()) |
584 | ExportsTrie = Obj.getDyldExportsTrie(); |
585 | processExportNode(Start: ExportsTrie.begin(), CurrPtr: ExportsTrie.begin(), End: ExportsTrie.end(), |
586 | Entry&: LEData.ExportTrie); |
587 | } |
588 | |
589 | template <typename nlist_t> |
590 | MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { |
591 | MachOYAML::NListEntry NL; |
592 | NL.n_strx = nlist.n_strx; |
593 | NL.n_type = nlist.n_type; |
594 | NL.n_sect = nlist.n_sect; |
595 | NL.n_desc = nlist.n_desc; |
596 | NL.n_value = nlist.n_value; |
597 | return NL; |
598 | } |
599 | |
600 | void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { |
601 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
602 | |
603 | for (auto Symbol : Obj.symbols()) { |
604 | MachOYAML::NListEntry NLE = |
605 | Obj.is64Bit() |
606 | ? constructNameList<MachO::nlist_64>( |
607 | nlist: Obj.getSymbol64TableEntry(DRI: Symbol.getRawDataRefImpl())) |
608 | : constructNameList<MachO::nlist>( |
609 | nlist: Obj.getSymbolTableEntry(DRI: Symbol.getRawDataRefImpl())); |
610 | LEData.NameList.push_back(x: NLE); |
611 | } |
612 | |
613 | StringRef RemainingTable = Obj.getStringTableData(); |
614 | while (RemainingTable.size() > 0) { |
615 | auto SymbolPair = RemainingTable.split(Separator: '\0'); |
616 | RemainingTable = SymbolPair.second; |
617 | LEData.StringTable.push_back(x: SymbolPair.first); |
618 | } |
619 | } |
620 | |
621 | void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) { |
622 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
623 | |
624 | MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand(); |
625 | for (unsigned i = 0; i < DLC.nindirectsyms; ++i) |
626 | LEData.IndirectSymbols.push_back(x: Obj.getIndirectSymbolTableEntry(DLC, Index: i)); |
627 | } |
628 | |
629 | void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) { |
630 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
631 | |
632 | for (const auto &LC : Y->LoadCommands) { |
633 | if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) { |
634 | const MachO::linkedit_data_command &DC = |
635 | LC.Data.linkedit_data_command_data; |
636 | if (DC.dataoff) { |
637 | assert(DC.dataoff < Obj.getData().size()); |
638 | assert(DC.dataoff + DC.datasize <= Obj.getData().size()); |
639 | const char *Bytes = Obj.getData().data() + DC.dataoff; |
640 | for (size_t Idx = 0; Idx < DC.datasize; Idx++) { |
641 | LEData.ChainedFixups.push_back(x: Bytes[Idx]); |
642 | } |
643 | } |
644 | break; |
645 | } |
646 | } |
647 | } |
648 | |
649 | void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) { |
650 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
651 | |
652 | MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand(); |
653 | uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry); |
654 | for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) { |
655 | MachO::data_in_code_entry DICE = |
656 | Obj.getDataInCodeTableEntry(DataOffset: DIC.dataoff, Index: Idx); |
657 | MachOYAML::DataInCodeEntry Entry{.Offset: DICE.offset, .Length: DICE.length, .Kind: DICE.kind}; |
658 | LEData.DataInCode.emplace_back(args&: Entry); |
659 | } |
660 | } |
661 | |
662 | Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, |
663 | unsigned RawSegments) { |
664 | std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj); |
665 | MachODumper Dumper(Obj, std::move(DCtx), RawSegments); |
666 | Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); |
667 | if (!YAML) |
668 | return YAML.takeError(); |
669 | |
670 | yaml::YamlObjectFile YAMLFile; |
671 | YAMLFile.MachO = std::move(YAML.get()); |
672 | |
673 | yaml::Output Yout(Out); |
674 | Yout << YAMLFile; |
675 | return Error::success(); |
676 | } |
677 | |
678 | Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, |
679 | unsigned RawSegments) { |
680 | yaml::YamlObjectFile YAMLFile; |
681 | YAMLFile.FatMachO.reset(p: new MachOYAML::UniversalBinary()); |
682 | MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; |
683 | YAML.Header.magic = Obj.getMagic(); |
684 | YAML.Header.nfat_arch = Obj.getNumberOfObjects(); |
685 | |
686 | for (auto Slice : Obj.objects()) { |
687 | MachOYAML::FatArch arch; |
688 | arch.cputype = Slice.getCPUType(); |
689 | arch.cpusubtype = Slice.getCPUSubType(); |
690 | arch.offset = Slice.getOffset(); |
691 | arch.size = Slice.getSize(); |
692 | arch.align = Slice.getAlign(); |
693 | arch.reserved = Slice.getReserved(); |
694 | YAML.FatArchs.push_back(x: arch); |
695 | |
696 | auto SliceObj = Slice.getAsObjectFile(); |
697 | if (!SliceObj) |
698 | return SliceObj.takeError(); |
699 | |
700 | std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj: *SliceObj.get()); |
701 | MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments); |
702 | Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); |
703 | if (!YAMLObj) |
704 | return YAMLObj.takeError(); |
705 | YAML.Slices.push_back(x: *YAMLObj.get()); |
706 | } |
707 | |
708 | yaml::Output Yout(Out); |
709 | Yout << YAML; |
710 | return Error::success(); |
711 | } |
712 | |
713 | Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, |
714 | unsigned RawSegments) { |
715 | if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(Val: &Binary)) |
716 | return macho2yaml(Out, Obj: *MachOObj, RawSegments); |
717 | |
718 | if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(Val: &Binary)) |
719 | return macho2yaml(Out, Obj: *MachOObj, RawSegments); |
720 | |
721 | llvm_unreachable("unexpected Mach-O file format" ); |
722 | } |
723 | |