1//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOReader.h"
10#include "MachOObject.h"
11#include "llvm/BinaryFormat/MachO.h"
12#include "llvm/Object/MachO.h"
13#include "llvm/Support/SystemZ/zOSSupport.h"
14#include <memory>
15
16using namespace llvm;
17using namespace llvm::objcopy;
18using namespace llvm::objcopy::macho;
19
20void MachOReader::readHeader(Object &O) const {
21 O.Header.Magic = MachOObj.getHeader().magic;
22 O.Header.CPUType = MachOObj.getHeader().cputype;
23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
24 O.Header.FileType = MachOObj.getHeader().filetype;
25 O.Header.NCmds = MachOObj.getHeader().ncmds;
26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
27 O.Header.Flags = MachOObj.getHeader().flags;
28}
29
30template <typename SectionType>
31static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
34 Section S(SegName, SectName);
35 S.Index = Index;
36 S.Addr = Sec.addr;
37 S.Size = Sec.size;
38 S.OriginalOffset = Sec.offset;
39 S.Align = Sec.align;
40 S.RelOff = Sec.reloff;
41 S.NReloc = Sec.nreloc;
42 S.Flags = Sec.flags;
43 S.Reserved1 = Sec.reserved1;
44 S.Reserved2 = Sec.reserved2;
45 S.Reserved3 = 0;
46 return S;
47}
48
49Section constructSection(const MachO::section &Sec, uint32_t Index) {
50 return constructSectionCommon(Sec, Index);
51}
52
53Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
54 Section S = constructSectionCommon(Sec, Index);
55 S.Reserved3 = Sec.reserved3;
56 return S;
57}
58
59template <typename SectionType, typename SegmentType>
60Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
61 const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
62 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
63 std::vector<std::unique_ptr<Section>> Sections;
64 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
65 sizeof(SegmentType)),
66 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
67 LoadCmd.C.cmdsize);
68 Curr < End; ++Curr) {
69 SectionType Sec;
70 memcpy(dest: (void *)&Sec, src: reinterpret_cast<const char *>(Curr),
71 n: sizeof(SectionType));
72
73 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
74 MachO::swapStruct(Sec);
75
76 Sections.push_back(
77 std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
78
79 Section &S = *Sections.back();
80
81 Expected<object::SectionRef> SecRef =
82 MachOObj.getSection(SectionIndex: NextSectionIndex++);
83 if (!SecRef)
84 return SecRef.takeError();
85
86 Expected<ArrayRef<uint8_t>> Data =
87 MachOObj.getSectionContents(Sec: SecRef->getRawDataRefImpl());
88 if (!Data)
89 return Data.takeError();
90
91 S.Content =
92 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
93
94 const uint32_t CPUType = MachOObj.getHeader().cputype;
95 S.Relocations.reserve(n: S.NReloc);
96 for (auto RI = MachOObj.section_rel_begin(Sec: SecRef->getRawDataRefImpl()),
97 RE = MachOObj.section_rel_end(Sec: SecRef->getRawDataRefImpl());
98 RI != RE; ++RI) {
99 RelocationInfo R;
100 R.Symbol = nullptr; // We'll fill this field later.
101 R.Info = MachOObj.getRelocation(Rel: RI->getRawDataRefImpl());
102 R.Scattered = MachOObj.isRelocationScattered(RE: R.Info);
103 unsigned Type = MachOObj.getAnyRelocationType(RE: R.Info);
104 // TODO Support CPU_TYPE_ARM.
105 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
106 Type == MachO::ARM64_RELOC_ADDEND);
107 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(RE: R.Info);
108 S.Relocations.push_back(x: R);
109 }
110
111 assert(S.NReloc == S.Relocations.size() &&
112 "Incorrect number of relocations");
113 }
114 return std::move(Sections);
115}
116
117Error MachOReader::readLoadCommands(Object &O) const {
118 // For MachO sections indices start from 1.
119 uint32_t NextSectionIndex = 1;
120 static constexpr char TextSegmentName[] = "__TEXT";
121 for (auto LoadCmd : MachOObj.load_commands()) {
122 LoadCommand LC;
123 switch (LoadCmd.C.cmd) {
124 case MachO::LC_CODE_SIGNATURE:
125 O.CodeSignatureCommandIndex = O.LoadCommands.size();
126 break;
127 case MachO::LC_SEGMENT:
128 // LoadCmd.Ptr might not be aligned temporarily as
129 // MachO::segment_command requires, but the segname char pointer do not
130 // have alignment restrictions.
131 if (StringRef(reinterpret_cast<const char *>(
132 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
133 TextSegmentName)
134 O.TextSegmentCommandIndex = O.LoadCommands.size();
135
136 if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
137 extractSections<MachO::section, MachO::segment_command>(
138 LoadCmd, MachOObj, NextSectionIndex))
139 LC.Sections = std::move(*Sections);
140 else
141 return Sections.takeError();
142 break;
143 case MachO::LC_SEGMENT_64:
144 // LoadCmd.Ptr might not be aligned temporarily as
145 // MachO::segment_command_64 requires, but the segname char pointer do
146 // not have alignment restrictions.
147 if (StringRef(reinterpret_cast<const char *>(
148 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
149 TextSegmentName)
150 O.TextSegmentCommandIndex = O.LoadCommands.size();
151
152 if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
153 extractSections<MachO::section_64, MachO::segment_command_64>(
154 LoadCmd, MachOObj, NextSectionIndex))
155 LC.Sections = std::move(*Sections);
156 else
157 return Sections.takeError();
158 break;
159 case MachO::LC_SYMTAB:
160 O.SymTabCommandIndex = O.LoadCommands.size();
161 break;
162 case MachO::LC_DYSYMTAB:
163 O.DySymTabCommandIndex = O.LoadCommands.size();
164 break;
165 case MachO::LC_DYLD_INFO:
166 case MachO::LC_DYLD_INFO_ONLY:
167 O.DyLdInfoCommandIndex = O.LoadCommands.size();
168 break;
169 case MachO::LC_DATA_IN_CODE:
170 O.DataInCodeCommandIndex = O.LoadCommands.size();
171 break;
172 case MachO::LC_LINKER_OPTIMIZATION_HINT:
173 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
174 break;
175 case MachO::LC_FUNCTION_STARTS:
176 O.FunctionStartsCommandIndex = O.LoadCommands.size();
177 break;
178 case MachO::LC_DYLIB_CODE_SIGN_DRS:
179 O.DylibCodeSignDRsIndex = O.LoadCommands.size();
180 break;
181 case MachO::LC_DYLD_EXPORTS_TRIE:
182 O.ExportsTrieCommandIndex = O.LoadCommands.size();
183 break;
184 case MachO::LC_DYLD_CHAINED_FIXUPS:
185 O.ChainedFixupsCommandIndex = O.LoadCommands.size();
186 break;
187 }
188#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
189 case MachO::LCName: \
190 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \
191 sizeof(MachO::LCStruct)); \
192 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
193 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
194 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \
195 LC.Payload = ArrayRef<uint8_t>( \
196 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
197 sizeof(MachO::LCStruct), \
198 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
199 break;
200
201 switch (LoadCmd.C.cmd) {
202 default:
203 memcpy(dest: (void *)&(LC.MachOLoadCommand.load_command_data), src: LoadCmd.Ptr,
204 n: sizeof(MachO::load_command));
205 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
206 MachO::swapStruct(lc&: LC.MachOLoadCommand.load_command_data);
207 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
208 LC.Payload = ArrayRef<uint8_t>(
209 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
210 sizeof(MachO::load_command),
211 LoadCmd.C.cmdsize - sizeof(MachO::load_command));
212 break;
213#include "llvm/BinaryFormat/MachO.def"
214 }
215 O.LoadCommands.push_back(x: std::move(LC));
216 }
217 return Error::success();
218}
219
220template <typename nlist_t>
221SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
222 assert(nlist.n_strx < StrTable.size() &&
223 "n_strx exceeds the size of the string table");
224 SymbolEntry SE;
225 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
226 SE.n_type = nlist.n_type;
227 SE.n_sect = nlist.n_sect;
228 SE.n_desc = nlist.n_desc;
229 SE.n_value = nlist.n_value;
230 return SE;
231}
232
233void MachOReader::readSymbolTable(Object &O) const {
234 StringRef StrTable = MachOObj.getStringTableData();
235 for (auto Symbol : MachOObj.symbols()) {
236 SymbolEntry SE =
237 (MachOObj.is64Bit()
238 ? constructSymbolEntry(StrTable, nlist: MachOObj.getSymbol64TableEntry(
239 DRI: Symbol.getRawDataRefImpl()))
240 : constructSymbolEntry(StrTable, nlist: MachOObj.getSymbolTableEntry(
241 DRI: Symbol.getRawDataRefImpl())));
242
243 O.SymTable.Symbols.push_back(x: std::make_unique<SymbolEntry>(args&: SE));
244 }
245}
246
247void MachOReader::setSymbolInRelocationInfo(Object &O) const {
248 std::vector<const Section *> Sections;
249 for (auto &LC : O.LoadCommands)
250 for (std::unique_ptr<Section> &Sec : LC.Sections)
251 Sections.push_back(x: Sec.get());
252
253 for (LoadCommand &LC : O.LoadCommands)
254 for (std::unique_ptr<Section> &Sec : LC.Sections)
255 for (auto &Reloc : Sec->Relocations)
256 if (!Reloc.Scattered && !Reloc.IsAddend) {
257 const uint32_t SymbolNum =
258 Reloc.getPlainRelocationSymbolNum(IsLittleEndian: MachOObj.isLittleEndian());
259 if (Reloc.Extern) {
260 Reloc.Symbol = O.SymTable.getSymbolByIndex(Index: SymbolNum);
261 } else {
262 // FIXME: Refactor error handling in MachOReader and report an error
263 // if we encounter an invalid relocation.
264 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
265 "Invalid section index.");
266 Reloc.Sec = Sections[SymbolNum - 1];
267 }
268 }
269}
270
271void MachOReader::readRebaseInfo(Object &O) const {
272 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
273}
274
275void MachOReader::readBindInfo(Object &O) const {
276 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
277}
278
279void MachOReader::readWeakBindInfo(Object &O) const {
280 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
281}
282
283void MachOReader::readLazyBindInfo(Object &O) const {
284 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
285}
286
287void MachOReader::readExportInfo(Object &O) const {
288 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
289 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
290 if (Trie.empty())
291 Trie = MachOObj.getDyldExportsTrie();
292 O.Exports.Trie = Trie;
293}
294
295void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
296 LinkData &LD) const {
297 if (!LCIndex)
298 return;
299 const MachO::linkedit_data_command &LC =
300 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
301 LD.Data =
302 arrayRefFromStringRef(Input: MachOObj.getData().substr(Start: LC.dataoff, N: LC.datasize));
303}
304
305void MachOReader::readDataInCodeData(Object &O) const {
306 return readLinkData(O, LCIndex: O.DataInCodeCommandIndex, LD&: O.DataInCode);
307}
308
309void MachOReader::readLinkerOptimizationHint(Object &O) const {
310 return readLinkData(O, LCIndex: O.LinkerOptimizationHintCommandIndex,
311 LD&: O.LinkerOptimizationHint);
312}
313
314void MachOReader::readFunctionStartsData(Object &O) const {
315 return readLinkData(O, LCIndex: O.FunctionStartsCommandIndex, LD&: O.FunctionStarts);
316}
317
318void MachOReader::readDylibCodeSignDRs(Object &O) const {
319 return readLinkData(O, LCIndex: O.DylibCodeSignDRsIndex, LD&: O.DylibCodeSignDRs);
320}
321
322void MachOReader::readExportsTrie(Object &O) const {
323 return readLinkData(O, LCIndex: O.ExportsTrieCommandIndex, LD&: O.ExportsTrie);
324}
325
326void MachOReader::readChainedFixups(Object &O) const {
327 return readLinkData(O, LCIndex: O.ChainedFixupsCommandIndex, LD&: O.ChainedFixups);
328}
329
330void MachOReader::readIndirectSymbolTable(Object &O) const {
331 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
332 constexpr uint32_t AbsOrLocalMask =
333 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
334 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
335 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DLC: DySymTab, Index: i);
336 if ((Index & AbsOrLocalMask) != 0)
337 O.IndirectSymTable.Symbols.emplace_back(args&: Index, args: std::nullopt);
338 else
339 O.IndirectSymTable.Symbols.emplace_back(
340 args&: Index, args: O.SymTable.getSymbolByIndex(Index));
341 }
342}
343
344void MachOReader::readSwiftVersion(Object &O) const {
345 struct ObjCImageInfo {
346 uint32_t Version;
347 uint32_t Flags;
348 } ImageInfo;
349
350 for (const LoadCommand &LC : O.LoadCommands)
351 for (const std::unique_ptr<Section> &Sec : LC.Sections)
352 if (Sec->Sectname == "__objc_imageinfo" &&
353 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
354 Sec->Segname == "__DATA_DIRTY") &&
355 Sec->Content.size() >= sizeof(ObjCImageInfo)) {
356 memcpy(dest: &ImageInfo, src: Sec->Content.data(), n: sizeof(ObjCImageInfo));
357 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
358 sys::swapByteOrder(Value&: ImageInfo.Version);
359 sys::swapByteOrder(Value&: ImageInfo.Flags);
360 }
361 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
362 return;
363 }
364}
365
366Expected<std::unique_ptr<Object>> MachOReader::create() const {
367 auto Obj = std::make_unique<Object>();
368 readHeader(O&: *Obj);
369 if (Error E = readLoadCommands(O&: *Obj))
370 return std::move(E);
371 readSymbolTable(O&: *Obj);
372 setSymbolInRelocationInfo(*Obj);
373 readRebaseInfo(O&: *Obj);
374 readBindInfo(O&: *Obj);
375 readWeakBindInfo(O&: *Obj);
376 readLazyBindInfo(O&: *Obj);
377 readExportInfo(O&: *Obj);
378 readDataInCodeData(O&: *Obj);
379 readLinkerOptimizationHint(O&: *Obj);
380 readFunctionStartsData(O&: *Obj);
381 readDylibCodeSignDRs(O&: *Obj);
382 readExportsTrie(O&: *Obj);
383 readChainedFixups(O&: *Obj);
384 readIndirectSymbolTable(O&: *Obj);
385 readSwiftVersion(O&: *Obj);
386 return std::move(Obj);
387}
388