| 1 | //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "obj2yaml.h" |
| 10 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| 11 | #include "llvm/Object/MachOUniversal.h" |
| 12 | #include "llvm/ObjectYAML/DWARFYAML.h" |
| 13 | #include "llvm/ObjectYAML/ObjectYAML.h" |
| 14 | #include "llvm/Support/Errc.h" |
| 15 | #include "llvm/Support/Error.h" |
| 16 | #include "llvm/Support/ErrorHandling.h" |
| 17 | #include "llvm/Support/LEB128.h" |
| 18 | #include "llvm/Support/SystemZ/zOSSupport.h" |
| 19 | |
| 20 | #include <string.h> // for memcpy |
| 21 | |
| 22 | using namespace llvm; |
| 23 | |
| 24 | class MachODumper { |
| 25 | |
| 26 | template <typename StructType> |
| 27 | Expected<const char *> processLoadCommandData( |
| 28 | MachOYAML::LoadCommand &LC, |
| 29 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 30 | MachOYAML::Object &Y); |
| 31 | |
| 32 | const object::MachOObjectFile &Obj; |
| 33 | std::unique_ptr<DWARFContext> DWARFCtx; |
| 34 | unsigned RawSegment; |
| 35 | void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); |
| 36 | Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); |
| 37 | void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); |
| 38 | void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); |
| 39 | void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y); |
| 40 | void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, |
| 41 | ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); |
| 42 | void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); |
| 43 | void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); |
| 44 | void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y); |
| 45 | void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y); |
| 46 | void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y); |
| 47 | |
| 48 | template <typename SectionType> |
| 49 | Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec, |
| 50 | size_t SecIndex); |
| 51 | template <typename SectionType> |
| 52 | Expected<MachOYAML::Section> constructSection(SectionType Sec, |
| 53 | size_t SecIndex); |
| 54 | template <typename SectionType, typename SegmentType> |
| 55 | Expected<const char *> |
| 56 | extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 57 | std::vector<MachOYAML::Section> &Sections, |
| 58 | MachOYAML::Object &Y); |
| 59 | |
| 60 | public: |
| 61 | MachODumper(const object::MachOObjectFile &O, |
| 62 | std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments) |
| 63 | : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {} |
| 64 | Expected<std::unique_ptr<MachOYAML::Object>> dump(); |
| 65 | }; |
| 66 | |
| 67 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
| 68 | case MachO::LCName: \ |
| 69 | memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \ |
| 70 | sizeof(MachO::LCStruct)); \ |
| 71 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ |
| 72 | MachO::swapStruct(LC.Data.LCStruct##_data); \ |
| 73 | if (Expected<const char *> ExpectedEndPtr = \ |
| 74 | processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \ |
| 75 | EndPtr = *ExpectedEndPtr; \ |
| 76 | else \ |
| 77 | return ExpectedEndPtr.takeError(); \ |
| 78 | break; |
| 79 | |
| 80 | template <typename SectionType> |
| 81 | Expected<MachOYAML::Section> |
| 82 | MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) { |
| 83 | MachOYAML::Section TempSec; |
| 84 | memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); |
| 85 | memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); |
| 86 | TempSec.addr = Sec.addr; |
| 87 | TempSec.size = Sec.size; |
| 88 | TempSec.offset = Sec.offset; |
| 89 | TempSec.align = Sec.align; |
| 90 | TempSec.reloff = Sec.reloff; |
| 91 | TempSec.nreloc = Sec.nreloc; |
| 92 | TempSec.flags = Sec.flags; |
| 93 | TempSec.reserved1 = Sec.reserved1; |
| 94 | TempSec.reserved2 = Sec.reserved2; |
| 95 | TempSec.reserved3 = 0; |
| 96 | if (!MachO::isVirtualSection(type: Sec.flags & MachO::SECTION_TYPE)) |
| 97 | TempSec.content = |
| 98 | yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); |
| 99 | |
| 100 | if (Expected<object::SectionRef> SecRef = Obj.getSection(SectionIndex: SecIndex)) { |
| 101 | TempSec.relocations.reserve(n: TempSec.nreloc); |
| 102 | for (const object::RelocationRef &Reloc : SecRef->relocations()) { |
| 103 | const object::DataRefImpl Rel = Reloc.getRawDataRefImpl(); |
| 104 | const MachO::any_relocation_info RE = Obj.getRelocation(Rel); |
| 105 | MachOYAML::Relocation R; |
| 106 | R.address = Obj.getAnyRelocationAddress(RE); |
| 107 | R.is_pcrel = Obj.getAnyRelocationPCRel(RE); |
| 108 | R.length = Obj.getAnyRelocationLength(RE); |
| 109 | R.type = Obj.getAnyRelocationType(RE); |
| 110 | R.is_scattered = Obj.isRelocationScattered(RE); |
| 111 | R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE)); |
| 112 | R.is_extern = |
| 113 | (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE)); |
| 114 | R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0); |
| 115 | TempSec.relocations.push_back(x: R); |
| 116 | } |
| 117 | } else { |
| 118 | return SecRef.takeError(); |
| 119 | } |
| 120 | return TempSec; |
| 121 | } |
| 122 | |
| 123 | template <> |
| 124 | Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec, |
| 125 | size_t SecIndex) { |
| 126 | Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); |
| 127 | if (TempSec) |
| 128 | TempSec->reserved3 = 0; |
| 129 | return TempSec; |
| 130 | } |
| 131 | |
| 132 | template <> |
| 133 | Expected<MachOYAML::Section> |
| 134 | MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) { |
| 135 | Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); |
| 136 | if (TempSec) |
| 137 | TempSec->reserved3 = Sec.reserved3; |
| 138 | return TempSec; |
| 139 | } |
| 140 | |
| 141 | static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, |
| 142 | DWARFYAML::Data &DWARF) { |
| 143 | if (SecName == "__debug_abbrev" ) |
| 144 | return dumpDebugAbbrev(DCtx, Y&: DWARF); |
| 145 | if (SecName == "__debug_aranges" ) |
| 146 | return dumpDebugARanges(DCtx, Y&: DWARF); |
| 147 | if (SecName == "__debug_info" ) { |
| 148 | dumpDebugInfo(DCtx, Y&: DWARF); |
| 149 | return Error::success(); |
| 150 | } |
| 151 | if (SecName == "__debug_line" ) { |
| 152 | dumpDebugLines(DCtx, Y&: DWARF); |
| 153 | return Error::success(); |
| 154 | } |
| 155 | if (SecName.starts_with(Prefix: "__debug_pub" )) { |
| 156 | // FIXME: We should extract pub-section dumpers from this function. |
| 157 | dumpDebugPubSections(DCtx, Y&: DWARF); |
| 158 | return Error::success(); |
| 159 | } |
| 160 | if (SecName == "__debug_ranges" ) |
| 161 | return dumpDebugRanges(DCtx, Y&: DWARF); |
| 162 | if (SecName == "__debug_str" ) |
| 163 | return dumpDebugStrings(DCtx, Y&: DWARF); |
| 164 | return createStringError(EC: errc::not_supported, |
| 165 | S: "dumping " + SecName + " section is not supported" ); |
| 166 | } |
| 167 | |
| 168 | template <typename SectionType, typename SegmentType> |
| 169 | Expected<const char *> MachODumper::extractSections( |
| 170 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 171 | std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) { |
| 172 | auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; |
| 173 | const SectionType *Curr = |
| 174 | reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); |
| 175 | for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { |
| 176 | SectionType Sec; |
| 177 | memcpy((void *)&Sec, Curr, sizeof(SectionType)); |
| 178 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
| 179 | MachO::swapStruct(Sec); |
| 180 | // For MachO section indices start from 1. |
| 181 | if (Expected<MachOYAML::Section> S = |
| 182 | constructSection(Sec, Sections.size() + 1)) { |
| 183 | StringRef SecName(S->sectname); |
| 184 | |
| 185 | // Copy data sections if requested. |
| 186 | if ((RawSegment & ::RawSegments::data) && |
| 187 | StringRef(S->segname).starts_with(Prefix: "__DATA" )) |
| 188 | S->content = |
| 189 | yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); |
| 190 | |
| 191 | if (SecName.starts_with(Prefix: "__debug_" )) { |
| 192 | // If the DWARF section cannot be successfully parsed, emit raw content |
| 193 | // instead of an entry in the DWARF section of the YAML. |
| 194 | if (Error Err = dumpDebugSection(SecName, DCtx&: *DWARFCtx, DWARF&: Y.DWARF)) |
| 195 | consumeError(Err: std::move(Err)); |
| 196 | else |
| 197 | S->content.reset(); |
| 198 | } |
| 199 | Sections.push_back(x: std::move(*S)); |
| 200 | } else |
| 201 | return S.takeError(); |
| 202 | } |
| 203 | return reinterpret_cast<const char *>(Curr); |
| 204 | } |
| 205 | |
| 206 | template <typename StructType> |
| 207 | Expected<const char *> MachODumper::processLoadCommandData( |
| 208 | MachOYAML::LoadCommand &LC, |
| 209 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 210 | MachOYAML::Object &Y) { |
| 211 | return LoadCmd.Ptr + sizeof(StructType); |
| 212 | } |
| 213 | |
| 214 | template <> |
| 215 | Expected<const char *> |
| 216 | MachODumper::processLoadCommandData<MachO::segment_command>( |
| 217 | MachOYAML::LoadCommand &LC, |
| 218 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 219 | MachOYAML::Object &Y) { |
| 220 | return extractSections<MachO::section, MachO::segment_command>( |
| 221 | LoadCmd, Sections&: LC.Sections, Y); |
| 222 | } |
| 223 | |
| 224 | template <> |
| 225 | Expected<const char *> |
| 226 | MachODumper::processLoadCommandData<MachO::segment_command_64>( |
| 227 | MachOYAML::LoadCommand &LC, |
| 228 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 229 | MachOYAML::Object &Y) { |
| 230 | return extractSections<MachO::section_64, MachO::segment_command_64>( |
| 231 | LoadCmd, Sections&: LC.Sections, Y); |
| 232 | } |
| 233 | |
| 234 | template <typename StructType> |
| 235 | const char * |
| 236 | readString(MachOYAML::LoadCommand &LC, |
| 237 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { |
| 238 | auto Start = LoadCmd.Ptr + sizeof(StructType); |
| 239 | auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); |
| 240 | auto Size = strnlen(string: Start, maxlen: MaxSize); |
| 241 | LC.Content = StringRef(Start, Size).str(); |
| 242 | return Start + Size; |
| 243 | } |
| 244 | |
| 245 | template <> |
| 246 | Expected<const char *> |
| 247 | MachODumper::processLoadCommandData<MachO::dylib_command>( |
| 248 | MachOYAML::LoadCommand &LC, |
| 249 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 250 | MachOYAML::Object &Y) { |
| 251 | return readString<MachO::dylib_command>(LC, LoadCmd); |
| 252 | } |
| 253 | |
| 254 | template <> |
| 255 | Expected<const char *> |
| 256 | MachODumper::processLoadCommandData<MachO::dylinker_command>( |
| 257 | MachOYAML::LoadCommand &LC, |
| 258 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 259 | MachOYAML::Object &Y) { |
| 260 | return readString<MachO::dylinker_command>(LC, LoadCmd); |
| 261 | } |
| 262 | |
| 263 | template <> |
| 264 | Expected<const char *> |
| 265 | MachODumper::processLoadCommandData<MachO::rpath_command>( |
| 266 | MachOYAML::LoadCommand &LC, |
| 267 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 268 | MachOYAML::Object &Y) { |
| 269 | return readString<MachO::rpath_command>(LC, LoadCmd); |
| 270 | } |
| 271 | |
| 272 | template <> |
| 273 | Expected<const char *> |
| 274 | MachODumper::processLoadCommandData<MachO::build_version_command>( |
| 275 | MachOYAML::LoadCommand &LC, |
| 276 | const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 277 | MachOYAML::Object &Y) { |
| 278 | auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command); |
| 279 | auto NTools = LC.Data.build_version_command_data.ntools; |
| 280 | for (unsigned i = 0; i < NTools; ++i) { |
| 281 | auto Curr = Start + i * sizeof(MachO::build_tool_version); |
| 282 | MachO::build_tool_version BV; |
| 283 | memcpy(dest: (void *)&BV, src: Curr, n: sizeof(MachO::build_tool_version)); |
| 284 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
| 285 | MachO::swapStruct(C&: BV); |
| 286 | LC.Tools.push_back(x: BV); |
| 287 | } |
| 288 | return Start + NTools * sizeof(MachO::build_tool_version); |
| 289 | } |
| 290 | |
| 291 | Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { |
| 292 | auto Y = std::make_unique<MachOYAML::Object>(); |
| 293 | Y->IsLittleEndian = Obj.isLittleEndian(); |
| 294 | dumpHeader(Y); |
| 295 | if (Error Err = dumpLoadCommands(Y)) |
| 296 | return std::move(Err); |
| 297 | if (RawSegment & ::RawSegments::linkedit) |
| 298 | Y->RawLinkEditSegment = |
| 299 | yaml::BinaryRef(Obj.getSegmentContents(SegmentName: "__LINKEDIT" )); |
| 300 | else |
| 301 | dumpLinkEdit(Y); |
| 302 | |
| 303 | return std::move(Y); |
| 304 | } |
| 305 | |
| 306 | void MachODumper::(std::unique_ptr<MachOYAML::Object> &Y) { |
| 307 | Y->Header.magic = Obj.getHeader().magic; |
| 308 | Y->Header.cputype = Obj.getHeader().cputype; |
| 309 | Y->Header.cpusubtype = Obj.getHeader().cpusubtype; |
| 310 | Y->Header.filetype = Obj.getHeader().filetype; |
| 311 | Y->Header.ncmds = Obj.getHeader().ncmds; |
| 312 | Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; |
| 313 | Y->Header.flags = Obj.getHeader().flags; |
| 314 | Y->Header.reserved = 0; |
| 315 | } |
| 316 | |
| 317 | Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { |
| 318 | for (auto LoadCmd : Obj.load_commands()) { |
| 319 | MachOYAML::LoadCommand LC; |
| 320 | const char *EndPtr = LoadCmd.Ptr; |
| 321 | switch (LoadCmd.C.cmd) { |
| 322 | default: |
| 323 | memcpy(dest: (void *)&(LC.Data.load_command_data), src: LoadCmd.Ptr, |
| 324 | n: sizeof(MachO::load_command)); |
| 325 | if (Obj.isLittleEndian() != sys::IsLittleEndianHost) |
| 326 | MachO::swapStruct(lc&: LC.Data.load_command_data); |
| 327 | if (Expected<const char *> ExpectedEndPtr = |
| 328 | processLoadCommandData<MachO::load_command>(LC, LoadCmd, Y&: *Y)) |
| 329 | EndPtr = *ExpectedEndPtr; |
| 330 | else |
| 331 | return ExpectedEndPtr.takeError(); |
| 332 | break; |
| 333 | #include "llvm/BinaryFormat/MachO.def" |
| 334 | } |
| 335 | auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); |
| 336 | if (!std::all_of(first: EndPtr, last: &EndPtr[RemainingBytes], |
| 337 | pred: [](const char C) { return C == 0; })) { |
| 338 | LC.PayloadBytes.insert(position: LC.PayloadBytes.end(), first: EndPtr, |
| 339 | last: &EndPtr[RemainingBytes]); |
| 340 | RemainingBytes = 0; |
| 341 | } |
| 342 | LC.ZeroPadBytes = RemainingBytes; |
| 343 | Y->LoadCommands.push_back(x: std::move(LC)); |
| 344 | } |
| 345 | return Error::success(); |
| 346 | } |
| 347 | |
| 348 | void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { |
| 349 | dumpRebaseOpcodes(Y); |
| 350 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.BindOpcodes, OpcodeBuffer: Obj.getDyldInfoBindOpcodes()); |
| 351 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.WeakBindOpcodes, |
| 352 | OpcodeBuffer: Obj.getDyldInfoWeakBindOpcodes()); |
| 353 | dumpBindOpcodes(BindOpcodes&: Y->LinkEdit.LazyBindOpcodes, OpcodeBuffer: Obj.getDyldInfoLazyBindOpcodes(), |
| 354 | Lazy: true); |
| 355 | dumpExportTrie(Y); |
| 356 | dumpSymbols(Y); |
| 357 | dumpIndirectSymbols(Y); |
| 358 | dumpFunctionStarts(Y); |
| 359 | dumpChainedFixups(Y); |
| 360 | dumpDataInCode(Y); |
| 361 | } |
| 362 | |
| 363 | void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) { |
| 364 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 365 | |
| 366 | auto FunctionStarts = Obj.getFunctionStarts(); |
| 367 | llvm::append_range(C&: LEData.FunctionStarts, R&: FunctionStarts); |
| 368 | } |
| 369 | |
| 370 | void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { |
| 371 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 372 | |
| 373 | auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); |
| 374 | for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); |
| 375 | ++OpCode) { |
| 376 | MachOYAML::RebaseOpcode RebaseOp; |
| 377 | RebaseOp.Opcode = |
| 378 | static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); |
| 379 | RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; |
| 380 | |
| 381 | unsigned Count; |
| 382 | uint64_t ULEB = 0; |
| 383 | |
| 384 | switch (RebaseOp.Opcode) { |
| 385 | case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: |
| 386 | |
| 387 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
| 388 | RebaseOp.ExtraData.push_back(x: ULEB); |
| 389 | OpCode += Count; |
| 390 | [[fallthrough]]; |
| 391 | // Intentionally no break here -- This opcode has two ULEB values |
| 392 | case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: |
| 393 | case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: |
| 394 | case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: |
| 395 | case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: |
| 396 | |
| 397 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
| 398 | RebaseOp.ExtraData.push_back(x: ULEB); |
| 399 | OpCode += Count; |
| 400 | break; |
| 401 | default: |
| 402 | break; |
| 403 | } |
| 404 | |
| 405 | LEData.RebaseOpcodes.push_back(x: RebaseOp); |
| 406 | |
| 407 | if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) |
| 408 | break; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | StringRef ReadStringRef(const uint8_t *Start) { |
| 413 | const uint8_t *Itr = Start; |
| 414 | for (; *Itr; ++Itr) |
| 415 | ; |
| 416 | return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); |
| 417 | } |
| 418 | |
| 419 | void MachODumper::dumpBindOpcodes( |
| 420 | std::vector<MachOYAML::BindOpcode> &BindOpcodes, |
| 421 | ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { |
| 422 | for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); |
| 423 | ++OpCode) { |
| 424 | MachOYAML::BindOpcode BindOp; |
| 425 | BindOp.Opcode = |
| 426 | static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); |
| 427 | BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; |
| 428 | |
| 429 | unsigned Count; |
| 430 | uint64_t ULEB = 0; |
| 431 | int64_t SLEB = 0; |
| 432 | |
| 433 | switch (BindOp.Opcode) { |
| 434 | case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: |
| 435 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
| 436 | BindOp.ULEBExtraData.push_back(x: ULEB); |
| 437 | OpCode += Count; |
| 438 | [[fallthrough]]; |
| 439 | // Intentionally no break here -- this opcode has two ULEB values |
| 440 | |
| 441 | case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: |
| 442 | case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: |
| 443 | case MachO::BIND_OPCODE_ADD_ADDR_ULEB: |
| 444 | case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: |
| 445 | ULEB = decodeULEB128(p: OpCode + 1, n: &Count); |
| 446 | BindOp.ULEBExtraData.push_back(x: ULEB); |
| 447 | OpCode += Count; |
| 448 | break; |
| 449 | |
| 450 | case MachO::BIND_OPCODE_SET_ADDEND_SLEB: |
| 451 | SLEB = decodeSLEB128(p: OpCode + 1, n: &Count); |
| 452 | BindOp.SLEBExtraData.push_back(x: SLEB); |
| 453 | OpCode += Count; |
| 454 | break; |
| 455 | |
| 456 | case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: |
| 457 | BindOp.Symbol = ReadStringRef(Start: OpCode + 1); |
| 458 | OpCode += BindOp.Symbol.size() + 1; |
| 459 | break; |
| 460 | default: |
| 461 | break; |
| 462 | } |
| 463 | |
| 464 | BindOpcodes.push_back(x: BindOp); |
| 465 | |
| 466 | // Lazy bindings have DONE opcodes between operations, so we need to keep |
| 467 | // processing after a DONE. |
| 468 | if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) |
| 469 | break; |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | /*! |
| 474 | * /brief processes a node from the export trie, and its children. |
| 475 | * |
| 476 | * To my knowledge there is no documentation of the encoded format of this data |
| 477 | * other than in the heads of the Apple linker engineers. To that end hopefully |
| 478 | * this comment and the implementation below can serve to light the way for |
| 479 | * anyone crazy enough to come down this path in the future. |
| 480 | * |
| 481 | * This function reads and preserves the trie structure of the export trie. To |
| 482 | * my knowledge there is no code anywhere else that reads the data and preserves |
| 483 | * the Trie. LD64 (sources available at opensource.apple.com) has a similar |
| 484 | * implementation that parses the export trie into a vector. That code as well |
| 485 | * as LLVM's libObject MachO implementation were the basis for this. |
| 486 | * |
| 487 | * The export trie is an encoded trie. The node serialization is a bit awkward. |
| 488 | * The below pseudo-code is the best description I've come up with for it. |
| 489 | * |
| 490 | * struct SerializedNode { |
| 491 | * ULEB128 TerminalSize; |
| 492 | * struct TerminalData { <-- This is only present if TerminalSize > 0 |
| 493 | * ULEB128 Flags; |
| 494 | * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) |
| 495 | * ULEB128 Other; <-- Present if ( Flags & REEXPORT || |
| 496 | * Flags & STUB_AND_RESOLVER ) |
| 497 | * char[] ImportName; <-- Present if ( Flags & REEXPORT ) |
| 498 | * } |
| 499 | * uint8_t ChildrenCount; |
| 500 | * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; |
| 501 | * SerializedNode Children[ChildrenCount] |
| 502 | * } |
| 503 | * |
| 504 | * Terminal nodes are nodes that represent actual exports. They can appear |
| 505 | * anywhere in the tree other than at the root; they do not need to be leaf |
| 506 | * nodes. When reading the data out of the trie this routine reads it in-order, |
| 507 | * but it puts the child names and offsets directly into the child nodes. This |
| 508 | * results in looping over the children twice during serialization and |
| 509 | * de-serialization, but it makes the YAML representation more human readable. |
| 510 | * |
| 511 | * Below is an example of the graph from a "Hello World" executable: |
| 512 | * |
| 513 | * ------- |
| 514 | * | '' | |
| 515 | * ------- |
| 516 | * | |
| 517 | * ------- |
| 518 | * | '_' | |
| 519 | * ------- |
| 520 | * | |
| 521 | * |----------------------------------------| |
| 522 | * | | |
| 523 | * ------------------------ --------------------- |
| 524 | * | '_mh_execute_header' | | 'main' | |
| 525 | * | Flags: 0x00000000 | | Flags: 0x00000000 | |
| 526 | * | Addr: 0x00000000 | | Addr: 0x00001160 | |
| 527 | * ------------------------ --------------------- |
| 528 | * |
| 529 | * This graph represents the trie for the exports "__mh_execute_header" and |
| 530 | * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are |
| 531 | * terminal. |
| 532 | */ |
| 533 | |
| 534 | const uint8_t *processExportNode(const uint8_t *Start, const uint8_t *CurrPtr, |
| 535 | const uint8_t *const End, |
| 536 | MachOYAML::ExportEntry &Entry) { |
| 537 | if (CurrPtr >= End) |
| 538 | return CurrPtr; |
| 539 | unsigned Count = 0; |
| 540 | Entry.TerminalSize = decodeULEB128(p: CurrPtr, n: &Count); |
| 541 | CurrPtr += Count; |
| 542 | if (Entry.TerminalSize != 0) { |
| 543 | Entry.Flags = decodeULEB128(p: CurrPtr, n: &Count); |
| 544 | CurrPtr += Count; |
| 545 | if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { |
| 546 | Entry.Address = 0; |
| 547 | Entry.Other = decodeULEB128(p: CurrPtr, n: &Count); |
| 548 | CurrPtr += Count; |
| 549 | Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); |
| 550 | } else { |
| 551 | Entry.Address = decodeULEB128(p: CurrPtr, n: &Count); |
| 552 | CurrPtr += Count; |
| 553 | if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { |
| 554 | Entry.Other = decodeULEB128(p: CurrPtr, n: &Count); |
| 555 | CurrPtr += Count; |
| 556 | } else |
| 557 | Entry.Other = 0; |
| 558 | } |
| 559 | } |
| 560 | uint8_t childrenCount = *CurrPtr++; |
| 561 | if (childrenCount == 0) |
| 562 | return CurrPtr; |
| 563 | |
| 564 | Entry.Children.insert(position: Entry.Children.begin(), n: (size_t)childrenCount, |
| 565 | x: MachOYAML::ExportEntry()); |
| 566 | for (auto &Child : Entry.Children) { |
| 567 | Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); |
| 568 | CurrPtr += Child.Name.length() + 1; |
| 569 | Child.NodeOffset = decodeULEB128(p: CurrPtr, n: &Count); |
| 570 | CurrPtr += Count; |
| 571 | } |
| 572 | for (auto &Child : Entry.Children) { |
| 573 | CurrPtr = processExportNode(Start, CurrPtr: Start + Child.NodeOffset, End, Entry&: Child); |
| 574 | } |
| 575 | return CurrPtr; |
| 576 | } |
| 577 | |
| 578 | void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { |
| 579 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 580 | // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE |
| 581 | auto ExportsTrie = Obj.getDyldInfoExportsTrie(); |
| 582 | if (ExportsTrie.empty()) |
| 583 | ExportsTrie = Obj.getDyldExportsTrie(); |
| 584 | processExportNode(Start: ExportsTrie.begin(), CurrPtr: ExportsTrie.begin(), End: ExportsTrie.end(), |
| 585 | Entry&: LEData.ExportTrie); |
| 586 | } |
| 587 | |
| 588 | template <typename nlist_t> |
| 589 | MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { |
| 590 | MachOYAML::NListEntry NL; |
| 591 | NL.n_strx = nlist.n_strx; |
| 592 | NL.n_type = nlist.n_type; |
| 593 | NL.n_sect = nlist.n_sect; |
| 594 | NL.n_desc = nlist.n_desc; |
| 595 | NL.n_value = nlist.n_value; |
| 596 | return NL; |
| 597 | } |
| 598 | |
| 599 | void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { |
| 600 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 601 | |
| 602 | for (auto Symbol : Obj.symbols()) { |
| 603 | MachOYAML::NListEntry NLE = |
| 604 | Obj.is64Bit() |
| 605 | ? constructNameList<MachO::nlist_64>( |
| 606 | nlist: Obj.getSymbol64TableEntry(DRI: Symbol.getRawDataRefImpl())) |
| 607 | : constructNameList<MachO::nlist>( |
| 608 | nlist: Obj.getSymbolTableEntry(DRI: Symbol.getRawDataRefImpl())); |
| 609 | LEData.NameList.push_back(x: NLE); |
| 610 | } |
| 611 | |
| 612 | StringRef RemainingTable = Obj.getStringTableData(); |
| 613 | while (RemainingTable.size() > 0) { |
| 614 | auto SymbolPair = RemainingTable.split(Separator: '\0'); |
| 615 | RemainingTable = SymbolPair.second; |
| 616 | LEData.StringTable.push_back(x: SymbolPair.first); |
| 617 | } |
| 618 | } |
| 619 | |
| 620 | void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) { |
| 621 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 622 | |
| 623 | MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand(); |
| 624 | for (unsigned i = 0; i < DLC.nindirectsyms; ++i) |
| 625 | LEData.IndirectSymbols.push_back(x: Obj.getIndirectSymbolTableEntry(DLC, Index: i)); |
| 626 | } |
| 627 | |
| 628 | void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) { |
| 629 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 630 | |
| 631 | for (const auto &LC : Y->LoadCommands) { |
| 632 | if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) { |
| 633 | const MachO::linkedit_data_command &DC = |
| 634 | LC.Data.linkedit_data_command_data; |
| 635 | if (DC.dataoff) { |
| 636 | assert(DC.dataoff < Obj.getData().size()); |
| 637 | assert(DC.dataoff + DC.datasize <= Obj.getData().size()); |
| 638 | const char *Bytes = Obj.getData().data() + DC.dataoff; |
| 639 | llvm::append_range(C&: LEData.ChainedFixups, R: ArrayRef(Bytes, DC.datasize)); |
| 640 | } |
| 641 | break; |
| 642 | } |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) { |
| 647 | MachOYAML::LinkEditData &LEData = Y->LinkEdit; |
| 648 | |
| 649 | MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand(); |
| 650 | uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry); |
| 651 | for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) { |
| 652 | MachO::data_in_code_entry DICE = |
| 653 | Obj.getDataInCodeTableEntry(DataOffset: DIC.dataoff, Index: Idx); |
| 654 | MachOYAML::DataInCodeEntry Entry{.Offset: DICE.offset, .Length: DICE.length, .Kind: DICE.kind}; |
| 655 | LEData.DataInCode.emplace_back(args&: Entry); |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, |
| 660 | unsigned RawSegments) { |
| 661 | std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj); |
| 662 | MachODumper Dumper(Obj, std::move(DCtx), RawSegments); |
| 663 | Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); |
| 664 | if (!YAML) |
| 665 | return YAML.takeError(); |
| 666 | |
| 667 | yaml::YamlObjectFile YAMLFile; |
| 668 | YAMLFile.MachO = std::move(YAML.get()); |
| 669 | |
| 670 | yaml::Output Yout(Out); |
| 671 | Yout << YAMLFile; |
| 672 | return Error::success(); |
| 673 | } |
| 674 | |
| 675 | Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, |
| 676 | unsigned RawSegments) { |
| 677 | yaml::YamlObjectFile YAMLFile; |
| 678 | YAMLFile.FatMachO.reset(p: new MachOYAML::UniversalBinary()); |
| 679 | MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; |
| 680 | YAML.Header.magic = Obj.getMagic(); |
| 681 | YAML.Header.nfat_arch = Obj.getNumberOfObjects(); |
| 682 | |
| 683 | for (auto Slice : Obj.objects()) { |
| 684 | MachOYAML::FatArch arch; |
| 685 | arch.cputype = Slice.getCPUType(); |
| 686 | arch.cpusubtype = Slice.getCPUSubType(); |
| 687 | arch.offset = Slice.getOffset(); |
| 688 | arch.size = Slice.getSize(); |
| 689 | arch.align = Slice.getAlign(); |
| 690 | arch.reserved = Slice.getReserved(); |
| 691 | YAML.FatArchs.push_back(x: arch); |
| 692 | |
| 693 | auto SliceObj = Slice.getAsObjectFile(); |
| 694 | if (!SliceObj) |
| 695 | return SliceObj.takeError(); |
| 696 | |
| 697 | std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj: *SliceObj.get()); |
| 698 | MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments); |
| 699 | Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); |
| 700 | if (!YAMLObj) |
| 701 | return YAMLObj.takeError(); |
| 702 | YAML.Slices.push_back(x: *YAMLObj.get()); |
| 703 | } |
| 704 | |
| 705 | yaml::Output Yout(Out); |
| 706 | Yout << YAML; |
| 707 | return Error::success(); |
| 708 | } |
| 709 | |
| 710 | Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, |
| 711 | unsigned RawSegments) { |
| 712 | if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(Val: &Binary)) |
| 713 | return macho2yaml(Out, Obj: *MachOObj, RawSegments); |
| 714 | |
| 715 | if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(Val: &Binary)) |
| 716 | return macho2yaml(Out, Obj: *MachOObj, RawSegments); |
| 717 | |
| 718 | llvm_unreachable("unexpected Mach-O file format" ); |
| 719 | } |
| 720 | |