1//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOReader.h"
10#include "MachOObject.h"
11#include "llvm/BinaryFormat/MachO.h"
12#include "llvm/Object/MachO.h"
13#include "llvm/Support/Errc.h"
14#include "llvm/Support/SystemZ/zOSSupport.h"
15#include <memory>
16
17using namespace llvm;
18using namespace llvm::objcopy;
19using namespace llvm::objcopy::macho;
20
21void MachOReader::readHeader(Object &O) const {
22 O.Header.Magic = MachOObj.getHeader().magic;
23 O.Header.CPUType = MachOObj.getHeader().cputype;
24 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
25 O.Header.FileType = MachOObj.getHeader().filetype;
26 O.Header.NCmds = MachOObj.getHeader().ncmds;
27 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
28 O.Header.Flags = MachOObj.getHeader().flags;
29}
30
31template <typename SectionType>
32static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
33 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
34 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
35 Section S(SegName, SectName);
36 S.Index = Index;
37 S.Addr = Sec.addr;
38 S.Size = Sec.size;
39 S.OriginalOffset = Sec.offset;
40 S.Align = Sec.align;
41 S.RelOff = Sec.reloff;
42 S.NReloc = Sec.nreloc;
43 S.Flags = Sec.flags;
44 S.Reserved1 = Sec.reserved1;
45 S.Reserved2 = Sec.reserved2;
46 S.Reserved3 = 0;
47 return S;
48}
49
50Section constructSection(const MachO::section &Sec, uint32_t Index) {
51 return constructSectionCommon(Sec, Index);
52}
53
54Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
55 Section S = constructSectionCommon(Sec, Index);
56 S.Reserved3 = Sec.reserved3;
57 return S;
58}
59
60template <typename SectionType, typename SegmentType>
61Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
62 const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
63 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
64 std::vector<std::unique_ptr<Section>> Sections;
65 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
66 sizeof(SegmentType)),
67 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
68 LoadCmd.C.cmdsize);
69 Curr < End; ++Curr) {
70 SectionType Sec;
71 memcpy(dest: (void *)&Sec, src: reinterpret_cast<const char *>(Curr),
72 n: sizeof(SectionType));
73
74 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
75 MachO::swapStruct(Sec);
76
77 Sections.push_back(
78 std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
79
80 Section &S = *Sections.back();
81
82 Expected<object::SectionRef> SecRef =
83 MachOObj.getSection(SectionIndex: NextSectionIndex++);
84 if (!SecRef)
85 return SecRef.takeError();
86
87 Expected<ArrayRef<uint8_t>> Data =
88 MachOObj.getSectionContents(Sec: SecRef->getRawDataRefImpl());
89 if (!Data)
90 return Data.takeError();
91
92 S.Content =
93 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
94
95 const uint32_t CPUType = MachOObj.getHeader().cputype;
96 S.Relocations.reserve(n: S.NReloc);
97 for (auto RI = MachOObj.section_rel_begin(Sec: SecRef->getRawDataRefImpl()),
98 RE = MachOObj.section_rel_end(Sec: SecRef->getRawDataRefImpl());
99 RI != RE; ++RI) {
100 RelocationInfo R;
101 R.Symbol = nullptr; // We'll fill this field later.
102 R.Info = MachOObj.getRelocation(Rel: RI->getRawDataRefImpl());
103 R.Scattered = MachOObj.isRelocationScattered(RE: R.Info);
104 unsigned Type = MachOObj.getAnyRelocationType(RE: R.Info);
105 // TODO Support CPU_TYPE_ARM.
106 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
107 Type == MachO::ARM64_RELOC_ADDEND);
108 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(RE: R.Info);
109 S.Relocations.push_back(x: R);
110 }
111
112 assert(S.NReloc == S.Relocations.size() &&
113 "Incorrect number of relocations");
114 }
115 return std::move(Sections);
116}
117
118Error MachOReader::readLoadCommands(Object &O) const {
119 // For MachO sections indices start from 1.
120 uint32_t NextSectionIndex = 1;
121 static constexpr char TextSegmentName[] = "__TEXT";
122 for (auto LoadCmd : MachOObj.load_commands()) {
123 LoadCommand LC;
124 switch (LoadCmd.C.cmd) {
125 case MachO::LC_CODE_SIGNATURE:
126 O.CodeSignatureCommandIndex = O.LoadCommands.size();
127 break;
128 case MachO::LC_SEGMENT:
129 // LoadCmd.Ptr might not be aligned temporarily as
130 // MachO::segment_command requires, but the segname char pointer do not
131 // have alignment restrictions.
132 if (StringRef(reinterpret_cast<const char *>(
133 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
134 TextSegmentName)
135 O.TextSegmentCommandIndex = O.LoadCommands.size();
136
137 if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
138 extractSections<MachO::section, MachO::segment_command>(
139 LoadCmd, MachOObj, NextSectionIndex))
140 LC.Sections = std::move(*Sections);
141 else
142 return Sections.takeError();
143 break;
144 case MachO::LC_SEGMENT_64:
145 // LoadCmd.Ptr might not be aligned temporarily as
146 // MachO::segment_command_64 requires, but the segname char pointer do
147 // not have alignment restrictions.
148 if (StringRef(reinterpret_cast<const char *>(
149 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
150 TextSegmentName)
151 O.TextSegmentCommandIndex = O.LoadCommands.size();
152
153 if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
154 extractSections<MachO::section_64, MachO::segment_command_64>(
155 LoadCmd, MachOObj, NextSectionIndex))
156 LC.Sections = std::move(*Sections);
157 else
158 return Sections.takeError();
159 break;
160 case MachO::LC_SYMTAB:
161 O.SymTabCommandIndex = O.LoadCommands.size();
162 break;
163 case MachO::LC_DYSYMTAB:
164 O.DySymTabCommandIndex = O.LoadCommands.size();
165 break;
166 case MachO::LC_DYLD_INFO:
167 case MachO::LC_DYLD_INFO_ONLY:
168 O.DyLdInfoCommandIndex = O.LoadCommands.size();
169 break;
170 case MachO::LC_DATA_IN_CODE:
171 O.DataInCodeCommandIndex = O.LoadCommands.size();
172 break;
173 case MachO::LC_LINKER_OPTIMIZATION_HINT:
174 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
175 break;
176 case MachO::LC_FUNCTION_STARTS:
177 O.FunctionStartsCommandIndex = O.LoadCommands.size();
178 break;
179 case MachO::LC_DYLIB_CODE_SIGN_DRS:
180 O.DylibCodeSignDRsIndex = O.LoadCommands.size();
181 break;
182 case MachO::LC_DYLD_EXPORTS_TRIE:
183 O.ExportsTrieCommandIndex = O.LoadCommands.size();
184 break;
185 case MachO::LC_DYLD_CHAINED_FIXUPS:
186 O.ChainedFixupsCommandIndex = O.LoadCommands.size();
187 break;
188 }
189#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
190 case MachO::LCName: \
191 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \
192 sizeof(MachO::LCStruct)); \
193 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
194 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
195 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \
196 LC.Payload = ArrayRef<uint8_t>( \
197 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
198 sizeof(MachO::LCStruct), \
199 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
200 break;
201
202 switch (LoadCmd.C.cmd) {
203 default:
204 memcpy(dest: (void *)&(LC.MachOLoadCommand.load_command_data), src: LoadCmd.Ptr,
205 n: sizeof(MachO::load_command));
206 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
207 MachO::swapStruct(lc&: LC.MachOLoadCommand.load_command_data);
208 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
209 LC.Payload = ArrayRef<uint8_t>(
210 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
211 sizeof(MachO::load_command),
212 LoadCmd.C.cmdsize - sizeof(MachO::load_command));
213 break;
214#include "llvm/BinaryFormat/MachO.def"
215 }
216 O.LoadCommands.push_back(x: std::move(LC));
217 }
218 return Error::success();
219}
220
221template <typename nlist_t>
222SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
223 assert(nlist.n_strx < StrTable.size() &&
224 "n_strx exceeds the size of the string table");
225 SymbolEntry SE;
226 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
227 SE.n_type = nlist.n_type;
228 SE.n_sect = nlist.n_sect;
229 SE.n_desc = nlist.n_desc;
230 SE.n_value = nlist.n_value;
231 return SE;
232}
233
234void MachOReader::readSymbolTable(Object &O) const {
235 StringRef StrTable = MachOObj.getStringTableData();
236 for (auto Symbol : MachOObj.symbols()) {
237 SymbolEntry SE =
238 (MachOObj.is64Bit()
239 ? constructSymbolEntry(StrTable, nlist: MachOObj.getSymbol64TableEntry(
240 DRI: Symbol.getRawDataRefImpl()))
241 : constructSymbolEntry(StrTable, nlist: MachOObj.getSymbolTableEntry(
242 DRI: Symbol.getRawDataRefImpl())));
243
244 O.SymTable.Symbols.push_back(x: std::make_unique<SymbolEntry>(args&: SE));
245 }
246}
247
248void MachOReader::setSymbolInRelocationInfo(Object &O) const {
249 std::vector<const Section *> Sections;
250 for (auto &LC : O.LoadCommands)
251 for (std::unique_ptr<Section> &Sec : LC.Sections)
252 Sections.push_back(x: Sec.get());
253
254 for (LoadCommand &LC : O.LoadCommands)
255 for (std::unique_ptr<Section> &Sec : LC.Sections)
256 for (auto &Reloc : Sec->Relocations)
257 if (!Reloc.Scattered && !Reloc.IsAddend) {
258 const uint32_t SymbolNum =
259 Reloc.getPlainRelocationSymbolNum(IsLittleEndian: MachOObj.isLittleEndian());
260 if (Reloc.Extern) {
261 Reloc.Symbol = O.SymTable.getSymbolByIndex(Index: SymbolNum);
262 } else {
263 // FIXME: Refactor error handling in MachOReader and report an error
264 // if we encounter an invalid relocation.
265 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
266 "Invalid section index.");
267 Reloc.Sec = Sections[SymbolNum - 1];
268 }
269 }
270}
271
272void MachOReader::readRebaseInfo(Object &O) const {
273 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
274}
275
276void MachOReader::readBindInfo(Object &O) const {
277 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
278}
279
280void MachOReader::readWeakBindInfo(Object &O) const {
281 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
282}
283
284void MachOReader::readLazyBindInfo(Object &O) const {
285 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
286}
287
288void MachOReader::readExportInfo(Object &O) const {
289 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
290 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
291 if (Trie.empty())
292 Trie = MachOObj.getDyldExportsTrie();
293 O.Exports.Trie = Trie;
294}
295
296void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
297 LinkData &LD) const {
298 if (!LCIndex)
299 return;
300 const MachO::linkedit_data_command &LC =
301 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
302 LD.Data =
303 arrayRefFromStringRef(Input: MachOObj.getData().substr(Start: LC.dataoff, N: LC.datasize));
304}
305
306void MachOReader::readDataInCodeData(Object &O) const {
307 return readLinkData(O, LCIndex: O.DataInCodeCommandIndex, LD&: O.DataInCode);
308}
309
310void MachOReader::readLinkerOptimizationHint(Object &O) const {
311 return readLinkData(O, LCIndex: O.LinkerOptimizationHintCommandIndex,
312 LD&: O.LinkerOptimizationHint);
313}
314
315void MachOReader::readFunctionStartsData(Object &O) const {
316 return readLinkData(O, LCIndex: O.FunctionStartsCommandIndex, LD&: O.FunctionStarts);
317}
318
319void MachOReader::readDylibCodeSignDRs(Object &O) const {
320 return readLinkData(O, LCIndex: O.DylibCodeSignDRsIndex, LD&: O.DylibCodeSignDRs);
321}
322
323void MachOReader::readExportsTrie(Object &O) const {
324 return readLinkData(O, LCIndex: O.ExportsTrieCommandIndex, LD&: O.ExportsTrie);
325}
326
327void MachOReader::readChainedFixups(Object &O) const {
328 return readLinkData(O, LCIndex: O.ChainedFixupsCommandIndex, LD&: O.ChainedFixups);
329}
330
331void MachOReader::readIndirectSymbolTable(Object &O) const {
332 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
333 constexpr uint32_t AbsOrLocalMask =
334 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
335 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
336 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DLC: DySymTab, Index: i);
337 if ((Index & AbsOrLocalMask) != 0)
338 O.IndirectSymTable.Symbols.emplace_back(args&: Index, args: std::nullopt);
339 else
340 O.IndirectSymTable.Symbols.emplace_back(
341 args&: Index, args: O.SymTable.getSymbolByIndex(Index));
342 }
343}
344
345void MachOReader::readSwiftVersion(Object &O) const {
346 struct ObjCImageInfo {
347 uint32_t Version;
348 uint32_t Flags;
349 } ImageInfo;
350
351 for (const LoadCommand &LC : O.LoadCommands)
352 for (const std::unique_ptr<Section> &Sec : LC.Sections)
353 if (Sec->Sectname == "__objc_imageinfo" &&
354 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
355 Sec->Segname == "__DATA_DIRTY") &&
356 Sec->Content.size() >= sizeof(ObjCImageInfo)) {
357 memcpy(dest: &ImageInfo, src: Sec->Content.data(), n: sizeof(ObjCImageInfo));
358 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
359 sys::swapByteOrder(Value&: ImageInfo.Version);
360 sys::swapByteOrder(Value&: ImageInfo.Flags);
361 }
362 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
363 return;
364 }
365}
366
367Expected<std::unique_ptr<Object>> MachOReader::create() const {
368 auto Obj = std::make_unique<Object>();
369 readHeader(O&: *Obj);
370 if (Error E = readLoadCommands(O&: *Obj))
371 return std::move(E);
372 readSymbolTable(O&: *Obj);
373 setSymbolInRelocationInfo(*Obj);
374 readRebaseInfo(O&: *Obj);
375 readBindInfo(O&: *Obj);
376 readWeakBindInfo(O&: *Obj);
377 readLazyBindInfo(O&: *Obj);
378 readExportInfo(O&: *Obj);
379 readDataInCodeData(O&: *Obj);
380 readLinkerOptimizationHint(O&: *Obj);
381 readFunctionStartsData(O&: *Obj);
382 readDylibCodeSignDRs(O&: *Obj);
383 readExportsTrie(O&: *Obj);
384 readChainedFixups(O&: *Obj);
385 readIndirectSymbolTable(O&: *Obj);
386 readSwiftVersion(O&: *Obj);
387 return std::move(Obj);
388}
389