| 1 | //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "MachOReader.h" |
| 10 | #include "MachOObject.h" |
| 11 | #include "llvm/BinaryFormat/MachO.h" |
| 12 | #include "llvm/Object/MachO.h" |
| 13 | #include "llvm/Support/SystemZ/zOSSupport.h" |
| 14 | #include <memory> |
| 15 | |
| 16 | using namespace llvm; |
| 17 | using namespace llvm::objcopy; |
| 18 | using namespace llvm::objcopy::macho; |
| 19 | |
| 20 | void MachOReader::(Object &O) const { |
| 21 | O.Header.Magic = MachOObj.getHeader().magic; |
| 22 | O.Header.CPUType = MachOObj.getHeader().cputype; |
| 23 | O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; |
| 24 | O.Header.FileType = MachOObj.getHeader().filetype; |
| 25 | O.Header.NCmds = MachOObj.getHeader().ncmds; |
| 26 | O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; |
| 27 | O.Header.Flags = MachOObj.getHeader().flags; |
| 28 | } |
| 29 | |
| 30 | template <typename SectionType> |
| 31 | static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { |
| 32 | StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); |
| 33 | StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); |
| 34 | Section S(SegName, SectName); |
| 35 | S.Index = Index; |
| 36 | S.Addr = Sec.addr; |
| 37 | S.Size = Sec.size; |
| 38 | S.OriginalOffset = Sec.offset; |
| 39 | S.Align = Sec.align; |
| 40 | S.RelOff = Sec.reloff; |
| 41 | S.NReloc = Sec.nreloc; |
| 42 | S.Flags = Sec.flags; |
| 43 | S.Reserved1 = Sec.reserved1; |
| 44 | S.Reserved2 = Sec.reserved2; |
| 45 | S.Reserved3 = 0; |
| 46 | return S; |
| 47 | } |
| 48 | |
| 49 | Section constructSection(const MachO::section &Sec, uint32_t Index) { |
| 50 | return constructSectionCommon(Sec, Index); |
| 51 | } |
| 52 | |
| 53 | Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { |
| 54 | Section S = constructSectionCommon(Sec, Index); |
| 55 | S.Reserved3 = Sec.reserved3; |
| 56 | return S; |
| 57 | } |
| 58 | |
| 59 | template <typename SectionType, typename SegmentType> |
| 60 | Expected<std::vector<std::unique_ptr<Section>>> static extractSections( |
| 61 | const object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
| 62 | const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { |
| 63 | std::vector<std::unique_ptr<Section>> Sections; |
| 64 | for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
| 65 | sizeof(SegmentType)), |
| 66 | End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
| 67 | LoadCmd.C.cmdsize); |
| 68 | Curr < End; ++Curr) { |
| 69 | SectionType Sec; |
| 70 | memcpy(dest: (void *)&Sec, src: reinterpret_cast<const char *>(Curr), |
| 71 | n: sizeof(SectionType)); |
| 72 | |
| 73 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
| 74 | MachO::swapStruct(Sec); |
| 75 | |
| 76 | Sections.push_back( |
| 77 | std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); |
| 78 | |
| 79 | Section &S = *Sections.back(); |
| 80 | |
| 81 | Expected<object::SectionRef> SecRef = |
| 82 | MachOObj.getSection(SectionIndex: NextSectionIndex++); |
| 83 | if (!SecRef) |
| 84 | return SecRef.takeError(); |
| 85 | |
| 86 | Expected<ArrayRef<uint8_t>> Data = |
| 87 | MachOObj.getSectionContents(Sec: SecRef->getRawDataRefImpl()); |
| 88 | if (!Data) |
| 89 | return Data.takeError(); |
| 90 | |
| 91 | S.Content = |
| 92 | StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); |
| 93 | |
| 94 | const uint32_t CPUType = MachOObj.getHeader().cputype; |
| 95 | S.Relocations.reserve(n: S.NReloc); |
| 96 | for (auto RI = MachOObj.section_rel_begin(Sec: SecRef->getRawDataRefImpl()), |
| 97 | RE = MachOObj.section_rel_end(Sec: SecRef->getRawDataRefImpl()); |
| 98 | RI != RE; ++RI) { |
| 99 | RelocationInfo R; |
| 100 | R.Symbol = nullptr; // We'll fill this field later. |
| 101 | R.Info = MachOObj.getRelocation(Rel: RI->getRawDataRefImpl()); |
| 102 | R.Scattered = MachOObj.isRelocationScattered(RE: R.Info); |
| 103 | unsigned Type = MachOObj.getAnyRelocationType(RE: R.Info); |
| 104 | // TODO Support CPU_TYPE_ARM. |
| 105 | R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && |
| 106 | Type == MachO::ARM64_RELOC_ADDEND); |
| 107 | R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(RE: R.Info); |
| 108 | S.Relocations.push_back(x: R); |
| 109 | } |
| 110 | |
| 111 | assert(S.NReloc == S.Relocations.size() && |
| 112 | "Incorrect number of relocations" ); |
| 113 | } |
| 114 | return std::move(Sections); |
| 115 | } |
| 116 | |
| 117 | Error MachOReader::readLoadCommands(Object &O) const { |
| 118 | // For MachO sections indices start from 1. |
| 119 | uint32_t NextSectionIndex = 1; |
| 120 | static constexpr char TextSegmentName[] = "__TEXT" ; |
| 121 | for (auto LoadCmd : MachOObj.load_commands()) { |
| 122 | LoadCommand LC; |
| 123 | switch (LoadCmd.C.cmd) { |
| 124 | case MachO::LC_CODE_SIGNATURE: |
| 125 | O.CodeSignatureCommandIndex = O.LoadCommands.size(); |
| 126 | break; |
| 127 | case MachO::LC_SEGMENT: |
| 128 | // LoadCmd.Ptr might not be aligned temporarily as |
| 129 | // MachO::segment_command requires, but the segname char pointer do not |
| 130 | // have alignment restrictions. |
| 131 | if (StringRef(reinterpret_cast<const char *>( |
| 132 | LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == |
| 133 | TextSegmentName) |
| 134 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
| 135 | |
| 136 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
| 137 | extractSections<MachO::section, MachO::segment_command>( |
| 138 | LoadCmd, MachOObj, NextSectionIndex)) |
| 139 | LC.Sections = std::move(*Sections); |
| 140 | else |
| 141 | return Sections.takeError(); |
| 142 | break; |
| 143 | case MachO::LC_SEGMENT_64: |
| 144 | // LoadCmd.Ptr might not be aligned temporarily as |
| 145 | // MachO::segment_command_64 requires, but the segname char pointer do |
| 146 | // not have alignment restrictions. |
| 147 | if (StringRef(reinterpret_cast<const char *>( |
| 148 | LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == |
| 149 | TextSegmentName) |
| 150 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
| 151 | |
| 152 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
| 153 | extractSections<MachO::section_64, MachO::segment_command_64>( |
| 154 | LoadCmd, MachOObj, NextSectionIndex)) |
| 155 | LC.Sections = std::move(*Sections); |
| 156 | else |
| 157 | return Sections.takeError(); |
| 158 | break; |
| 159 | case MachO::LC_SYMTAB: |
| 160 | O.SymTabCommandIndex = O.LoadCommands.size(); |
| 161 | break; |
| 162 | case MachO::LC_DYSYMTAB: |
| 163 | O.DySymTabCommandIndex = O.LoadCommands.size(); |
| 164 | break; |
| 165 | case MachO::LC_DYLD_INFO: |
| 166 | case MachO::LC_DYLD_INFO_ONLY: |
| 167 | O.DyLdInfoCommandIndex = O.LoadCommands.size(); |
| 168 | break; |
| 169 | case MachO::LC_DATA_IN_CODE: |
| 170 | O.DataInCodeCommandIndex = O.LoadCommands.size(); |
| 171 | break; |
| 172 | case MachO::LC_LINKER_OPTIMIZATION_HINT: |
| 173 | O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); |
| 174 | break; |
| 175 | case MachO::LC_FUNCTION_STARTS: |
| 176 | O.FunctionStartsCommandIndex = O.LoadCommands.size(); |
| 177 | break; |
| 178 | case MachO::LC_DYLIB_CODE_SIGN_DRS: |
| 179 | O.DylibCodeSignDRsIndex = O.LoadCommands.size(); |
| 180 | break; |
| 181 | case MachO::LC_DYLD_EXPORTS_TRIE: |
| 182 | O.ExportsTrieCommandIndex = O.LoadCommands.size(); |
| 183 | break; |
| 184 | case MachO::LC_DYLD_CHAINED_FIXUPS: |
| 185 | O.ChainedFixupsCommandIndex = O.LoadCommands.size(); |
| 186 | break; |
| 187 | } |
| 188 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
| 189 | case MachO::LCName: \ |
| 190 | memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ |
| 191 | sizeof(MachO::LCStruct)); \ |
| 192 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ |
| 193 | MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ |
| 194 | if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ |
| 195 | LC.Payload = ArrayRef<uint8_t>( \ |
| 196 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ |
| 197 | sizeof(MachO::LCStruct), \ |
| 198 | LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ |
| 199 | break; |
| 200 | |
| 201 | switch (LoadCmd.C.cmd) { |
| 202 | default: |
| 203 | memcpy(dest: (void *)&(LC.MachOLoadCommand.load_command_data), src: LoadCmd.Ptr, |
| 204 | n: sizeof(MachO::load_command)); |
| 205 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
| 206 | MachO::swapStruct(lc&: LC.MachOLoadCommand.load_command_data); |
| 207 | if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) |
| 208 | LC.Payload = ArrayRef<uint8_t>( |
| 209 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + |
| 210 | sizeof(MachO::load_command), |
| 211 | LoadCmd.C.cmdsize - sizeof(MachO::load_command)); |
| 212 | break; |
| 213 | #include "llvm/BinaryFormat/MachO.def" |
| 214 | } |
| 215 | O.LoadCommands.push_back(x: std::move(LC)); |
| 216 | } |
| 217 | return Error::success(); |
| 218 | } |
| 219 | |
| 220 | template <typename nlist_t> |
| 221 | SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { |
| 222 | assert(nlist.n_strx < StrTable.size() && |
| 223 | "n_strx exceeds the size of the string table" ); |
| 224 | SymbolEntry SE; |
| 225 | SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); |
| 226 | SE.n_type = nlist.n_type; |
| 227 | SE.n_sect = nlist.n_sect; |
| 228 | SE.n_desc = nlist.n_desc; |
| 229 | SE.n_value = nlist.n_value; |
| 230 | return SE; |
| 231 | } |
| 232 | |
| 233 | void MachOReader::readSymbolTable(Object &O) const { |
| 234 | StringRef StrTable = MachOObj.getStringTableData(); |
| 235 | for (auto Symbol : MachOObj.symbols()) { |
| 236 | SymbolEntry SE = |
| 237 | (MachOObj.is64Bit() |
| 238 | ? constructSymbolEntry(StrTable, nlist: MachOObj.getSymbol64TableEntry( |
| 239 | DRI: Symbol.getRawDataRefImpl())) |
| 240 | : constructSymbolEntry(StrTable, nlist: MachOObj.getSymbolTableEntry( |
| 241 | DRI: Symbol.getRawDataRefImpl()))); |
| 242 | |
| 243 | O.SymTable.Symbols.push_back(x: std::make_unique<SymbolEntry>(args&: SE)); |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | void MachOReader::setSymbolInRelocationInfo(Object &O) const { |
| 248 | std::vector<const Section *> Sections; |
| 249 | for (auto &LC : O.LoadCommands) |
| 250 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
| 251 | Sections.push_back(x: Sec.get()); |
| 252 | |
| 253 | for (LoadCommand &LC : O.LoadCommands) |
| 254 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
| 255 | for (auto &Reloc : Sec->Relocations) |
| 256 | if (!Reloc.Scattered && !Reloc.IsAddend) { |
| 257 | const uint32_t SymbolNum = |
| 258 | Reloc.getPlainRelocationSymbolNum(IsLittleEndian: MachOObj.isLittleEndian()); |
| 259 | if (Reloc.Extern) { |
| 260 | Reloc.Symbol = O.SymTable.getSymbolByIndex(Index: SymbolNum); |
| 261 | } else { |
| 262 | // FIXME: Refactor error handling in MachOReader and report an error |
| 263 | // if we encounter an invalid relocation. |
| 264 | assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && |
| 265 | "Invalid section index." ); |
| 266 | Reloc.Sec = Sections[SymbolNum - 1]; |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | void MachOReader::readRebaseInfo(Object &O) const { |
| 272 | O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); |
| 273 | } |
| 274 | |
| 275 | void MachOReader::readBindInfo(Object &O) const { |
| 276 | O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); |
| 277 | } |
| 278 | |
| 279 | void MachOReader::readWeakBindInfo(Object &O) const { |
| 280 | O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); |
| 281 | } |
| 282 | |
| 283 | void MachOReader::readLazyBindInfo(Object &O) const { |
| 284 | O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); |
| 285 | } |
| 286 | |
| 287 | void MachOReader::readExportInfo(Object &O) const { |
| 288 | // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE |
| 289 | ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); |
| 290 | if (Trie.empty()) |
| 291 | Trie = MachOObj.getDyldExportsTrie(); |
| 292 | O.Exports.Trie = Trie; |
| 293 | } |
| 294 | |
| 295 | void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, |
| 296 | LinkData &LD) const { |
| 297 | if (!LCIndex) |
| 298 | return; |
| 299 | const MachO::linkedit_data_command &LC = |
| 300 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
| 301 | LD.Data = |
| 302 | arrayRefFromStringRef(Input: MachOObj.getData().substr(Start: LC.dataoff, N: LC.datasize)); |
| 303 | } |
| 304 | |
| 305 | void MachOReader::readDataInCodeData(Object &O) const { |
| 306 | return readLinkData(O, LCIndex: O.DataInCodeCommandIndex, LD&: O.DataInCode); |
| 307 | } |
| 308 | |
| 309 | void MachOReader::readLinkerOptimizationHint(Object &O) const { |
| 310 | return readLinkData(O, LCIndex: O.LinkerOptimizationHintCommandIndex, |
| 311 | LD&: O.LinkerOptimizationHint); |
| 312 | } |
| 313 | |
| 314 | void MachOReader::readFunctionStartsData(Object &O) const { |
| 315 | return readLinkData(O, LCIndex: O.FunctionStartsCommandIndex, LD&: O.FunctionStarts); |
| 316 | } |
| 317 | |
| 318 | void MachOReader::readDylibCodeSignDRs(Object &O) const { |
| 319 | return readLinkData(O, LCIndex: O.DylibCodeSignDRsIndex, LD&: O.DylibCodeSignDRs); |
| 320 | } |
| 321 | |
| 322 | void MachOReader::readExportsTrie(Object &O) const { |
| 323 | return readLinkData(O, LCIndex: O.ExportsTrieCommandIndex, LD&: O.ExportsTrie); |
| 324 | } |
| 325 | |
| 326 | void MachOReader::readChainedFixups(Object &O) const { |
| 327 | return readLinkData(O, LCIndex: O.ChainedFixupsCommandIndex, LD&: O.ChainedFixups); |
| 328 | } |
| 329 | |
| 330 | void MachOReader::readIndirectSymbolTable(Object &O) const { |
| 331 | MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); |
| 332 | constexpr uint32_t AbsOrLocalMask = |
| 333 | MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; |
| 334 | for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { |
| 335 | uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DLC: DySymTab, Index: i); |
| 336 | if ((Index & AbsOrLocalMask) != 0) |
| 337 | O.IndirectSymTable.Symbols.emplace_back(args&: Index, args: std::nullopt); |
| 338 | else |
| 339 | O.IndirectSymTable.Symbols.emplace_back( |
| 340 | args&: Index, args: O.SymTable.getSymbolByIndex(Index)); |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | void MachOReader::readSwiftVersion(Object &O) const { |
| 345 | struct ObjCImageInfo { |
| 346 | uint32_t Version; |
| 347 | uint32_t Flags; |
| 348 | } ImageInfo; |
| 349 | |
| 350 | for (const LoadCommand &LC : O.LoadCommands) |
| 351 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
| 352 | if (Sec->Sectname == "__objc_imageinfo" && |
| 353 | (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || |
| 354 | Sec->Segname == "__DATA_DIRTY" ) && |
| 355 | Sec->Content.size() >= sizeof(ObjCImageInfo)) { |
| 356 | memcpy(dest: &ImageInfo, src: Sec->Content.data(), n: sizeof(ObjCImageInfo)); |
| 357 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { |
| 358 | sys::swapByteOrder(Value&: ImageInfo.Version); |
| 359 | sys::swapByteOrder(Value&: ImageInfo.Flags); |
| 360 | } |
| 361 | O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; |
| 362 | return; |
| 363 | } |
| 364 | } |
| 365 | |
| 366 | Expected<std::unique_ptr<Object>> MachOReader::create() const { |
| 367 | auto Obj = std::make_unique<Object>(); |
| 368 | readHeader(O&: *Obj); |
| 369 | if (Error E = readLoadCommands(O&: *Obj)) |
| 370 | return std::move(E); |
| 371 | readSymbolTable(O&: *Obj); |
| 372 | setSymbolInRelocationInfo(*Obj); |
| 373 | readRebaseInfo(O&: *Obj); |
| 374 | readBindInfo(O&: *Obj); |
| 375 | readWeakBindInfo(O&: *Obj); |
| 376 | readLazyBindInfo(O&: *Obj); |
| 377 | readExportInfo(O&: *Obj); |
| 378 | readDataInCodeData(O&: *Obj); |
| 379 | readLinkerOptimizationHint(O&: *Obj); |
| 380 | readFunctionStartsData(O&: *Obj); |
| 381 | readDylibCodeSignDRs(O&: *Obj); |
| 382 | readExportsTrie(O&: *Obj); |
| 383 | readChainedFixups(O&: *Obj); |
| 384 | readIndirectSymbolTable(O&: *Obj); |
| 385 | readSwiftVersion(O&: *Obj); |
| 386 | return std::move(Obj); |
| 387 | } |
| 388 | |