1 | //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "MachOReader.h" |
10 | #include "MachOObject.h" |
11 | #include "llvm/BinaryFormat/MachO.h" |
12 | #include "llvm/Object/MachO.h" |
13 | #include "llvm/Support/Errc.h" |
14 | #include "llvm/Support/SystemZ/zOSSupport.h" |
15 | #include <memory> |
16 | |
17 | using namespace llvm; |
18 | using namespace llvm::objcopy; |
19 | using namespace llvm::objcopy::macho; |
20 | |
21 | void MachOReader::(Object &O) const { |
22 | O.Header.Magic = MachOObj.getHeader().magic; |
23 | O.Header.CPUType = MachOObj.getHeader().cputype; |
24 | O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; |
25 | O.Header.FileType = MachOObj.getHeader().filetype; |
26 | O.Header.NCmds = MachOObj.getHeader().ncmds; |
27 | O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; |
28 | O.Header.Flags = MachOObj.getHeader().flags; |
29 | } |
30 | |
31 | template <typename SectionType> |
32 | static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { |
33 | StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); |
34 | StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); |
35 | Section S(SegName, SectName); |
36 | S.Index = Index; |
37 | S.Addr = Sec.addr; |
38 | S.Size = Sec.size; |
39 | S.OriginalOffset = Sec.offset; |
40 | S.Align = Sec.align; |
41 | S.RelOff = Sec.reloff; |
42 | S.NReloc = Sec.nreloc; |
43 | S.Flags = Sec.flags; |
44 | S.Reserved1 = Sec.reserved1; |
45 | S.Reserved2 = Sec.reserved2; |
46 | S.Reserved3 = 0; |
47 | return S; |
48 | } |
49 | |
50 | Section constructSection(const MachO::section &Sec, uint32_t Index) { |
51 | return constructSectionCommon(Sec, Index); |
52 | } |
53 | |
54 | Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { |
55 | Section S = constructSectionCommon(Sec, Index); |
56 | S.Reserved3 = Sec.reserved3; |
57 | return S; |
58 | } |
59 | |
60 | template <typename SectionType, typename SegmentType> |
61 | Expected<std::vector<std::unique_ptr<Section>>> static extractSections( |
62 | const object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
63 | const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { |
64 | std::vector<std::unique_ptr<Section>> Sections; |
65 | for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
66 | sizeof(SegmentType)), |
67 | End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
68 | LoadCmd.C.cmdsize); |
69 | Curr < End; ++Curr) { |
70 | SectionType Sec; |
71 | memcpy(dest: (void *)&Sec, src: reinterpret_cast<const char *>(Curr), |
72 | n: sizeof(SectionType)); |
73 | |
74 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
75 | MachO::swapStruct(Sec); |
76 | |
77 | Sections.push_back( |
78 | std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); |
79 | |
80 | Section &S = *Sections.back(); |
81 | |
82 | Expected<object::SectionRef> SecRef = |
83 | MachOObj.getSection(SectionIndex: NextSectionIndex++); |
84 | if (!SecRef) |
85 | return SecRef.takeError(); |
86 | |
87 | Expected<ArrayRef<uint8_t>> Data = |
88 | MachOObj.getSectionContents(Sec: SecRef->getRawDataRefImpl()); |
89 | if (!Data) |
90 | return Data.takeError(); |
91 | |
92 | S.Content = |
93 | StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); |
94 | |
95 | const uint32_t CPUType = MachOObj.getHeader().cputype; |
96 | S.Relocations.reserve(n: S.NReloc); |
97 | for (auto RI = MachOObj.section_rel_begin(Sec: SecRef->getRawDataRefImpl()), |
98 | RE = MachOObj.section_rel_end(Sec: SecRef->getRawDataRefImpl()); |
99 | RI != RE; ++RI) { |
100 | RelocationInfo R; |
101 | R.Symbol = nullptr; // We'll fill this field later. |
102 | R.Info = MachOObj.getRelocation(Rel: RI->getRawDataRefImpl()); |
103 | R.Scattered = MachOObj.isRelocationScattered(RE: R.Info); |
104 | unsigned Type = MachOObj.getAnyRelocationType(RE: R.Info); |
105 | // TODO Support CPU_TYPE_ARM. |
106 | R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && |
107 | Type == MachO::ARM64_RELOC_ADDEND); |
108 | R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(RE: R.Info); |
109 | S.Relocations.push_back(x: R); |
110 | } |
111 | |
112 | assert(S.NReloc == S.Relocations.size() && |
113 | "Incorrect number of relocations" ); |
114 | } |
115 | return std::move(Sections); |
116 | } |
117 | |
118 | Error MachOReader::readLoadCommands(Object &O) const { |
119 | // For MachO sections indices start from 1. |
120 | uint32_t NextSectionIndex = 1; |
121 | static constexpr char TextSegmentName[] = "__TEXT" ; |
122 | for (auto LoadCmd : MachOObj.load_commands()) { |
123 | LoadCommand LC; |
124 | switch (LoadCmd.C.cmd) { |
125 | case MachO::LC_CODE_SIGNATURE: |
126 | O.CodeSignatureCommandIndex = O.LoadCommands.size(); |
127 | break; |
128 | case MachO::LC_SEGMENT: |
129 | // LoadCmd.Ptr might not be aligned temporarily as |
130 | // MachO::segment_command requires, but the segname char pointer do not |
131 | // have alignment restrictions. |
132 | if (StringRef(reinterpret_cast<const char *>( |
133 | LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == |
134 | TextSegmentName) |
135 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
136 | |
137 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
138 | extractSections<MachO::section, MachO::segment_command>( |
139 | LoadCmd, MachOObj, NextSectionIndex)) |
140 | LC.Sections = std::move(*Sections); |
141 | else |
142 | return Sections.takeError(); |
143 | break; |
144 | case MachO::LC_SEGMENT_64: |
145 | // LoadCmd.Ptr might not be aligned temporarily as |
146 | // MachO::segment_command_64 requires, but the segname char pointer do |
147 | // not have alignment restrictions. |
148 | if (StringRef(reinterpret_cast<const char *>( |
149 | LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == |
150 | TextSegmentName) |
151 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
152 | |
153 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
154 | extractSections<MachO::section_64, MachO::segment_command_64>( |
155 | LoadCmd, MachOObj, NextSectionIndex)) |
156 | LC.Sections = std::move(*Sections); |
157 | else |
158 | return Sections.takeError(); |
159 | break; |
160 | case MachO::LC_SYMTAB: |
161 | O.SymTabCommandIndex = O.LoadCommands.size(); |
162 | break; |
163 | case MachO::LC_DYSYMTAB: |
164 | O.DySymTabCommandIndex = O.LoadCommands.size(); |
165 | break; |
166 | case MachO::LC_DYLD_INFO: |
167 | case MachO::LC_DYLD_INFO_ONLY: |
168 | O.DyLdInfoCommandIndex = O.LoadCommands.size(); |
169 | break; |
170 | case MachO::LC_DATA_IN_CODE: |
171 | O.DataInCodeCommandIndex = O.LoadCommands.size(); |
172 | break; |
173 | case MachO::LC_LINKER_OPTIMIZATION_HINT: |
174 | O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); |
175 | break; |
176 | case MachO::LC_FUNCTION_STARTS: |
177 | O.FunctionStartsCommandIndex = O.LoadCommands.size(); |
178 | break; |
179 | case MachO::LC_DYLIB_CODE_SIGN_DRS: |
180 | O.DylibCodeSignDRsIndex = O.LoadCommands.size(); |
181 | break; |
182 | case MachO::LC_DYLD_EXPORTS_TRIE: |
183 | O.ExportsTrieCommandIndex = O.LoadCommands.size(); |
184 | break; |
185 | case MachO::LC_DYLD_CHAINED_FIXUPS: |
186 | O.ChainedFixupsCommandIndex = O.LoadCommands.size(); |
187 | break; |
188 | } |
189 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
190 | case MachO::LCName: \ |
191 | memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ |
192 | sizeof(MachO::LCStruct)); \ |
193 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ |
194 | MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ |
195 | if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ |
196 | LC.Payload = ArrayRef<uint8_t>( \ |
197 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ |
198 | sizeof(MachO::LCStruct), \ |
199 | LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ |
200 | break; |
201 | |
202 | switch (LoadCmd.C.cmd) { |
203 | default: |
204 | memcpy(dest: (void *)&(LC.MachOLoadCommand.load_command_data), src: LoadCmd.Ptr, |
205 | n: sizeof(MachO::load_command)); |
206 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
207 | MachO::swapStruct(lc&: LC.MachOLoadCommand.load_command_data); |
208 | if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) |
209 | LC.Payload = ArrayRef<uint8_t>( |
210 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + |
211 | sizeof(MachO::load_command), |
212 | LoadCmd.C.cmdsize - sizeof(MachO::load_command)); |
213 | break; |
214 | #include "llvm/BinaryFormat/MachO.def" |
215 | } |
216 | O.LoadCommands.push_back(x: std::move(LC)); |
217 | } |
218 | return Error::success(); |
219 | } |
220 | |
221 | template <typename nlist_t> |
222 | SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { |
223 | assert(nlist.n_strx < StrTable.size() && |
224 | "n_strx exceeds the size of the string table" ); |
225 | SymbolEntry SE; |
226 | SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); |
227 | SE.n_type = nlist.n_type; |
228 | SE.n_sect = nlist.n_sect; |
229 | SE.n_desc = nlist.n_desc; |
230 | SE.n_value = nlist.n_value; |
231 | return SE; |
232 | } |
233 | |
234 | void MachOReader::readSymbolTable(Object &O) const { |
235 | StringRef StrTable = MachOObj.getStringTableData(); |
236 | for (auto Symbol : MachOObj.symbols()) { |
237 | SymbolEntry SE = |
238 | (MachOObj.is64Bit() |
239 | ? constructSymbolEntry(StrTable, nlist: MachOObj.getSymbol64TableEntry( |
240 | DRI: Symbol.getRawDataRefImpl())) |
241 | : constructSymbolEntry(StrTable, nlist: MachOObj.getSymbolTableEntry( |
242 | DRI: Symbol.getRawDataRefImpl()))); |
243 | |
244 | O.SymTable.Symbols.push_back(x: std::make_unique<SymbolEntry>(args&: SE)); |
245 | } |
246 | } |
247 | |
248 | void MachOReader::setSymbolInRelocationInfo(Object &O) const { |
249 | std::vector<const Section *> Sections; |
250 | for (auto &LC : O.LoadCommands) |
251 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
252 | Sections.push_back(x: Sec.get()); |
253 | |
254 | for (LoadCommand &LC : O.LoadCommands) |
255 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
256 | for (auto &Reloc : Sec->Relocations) |
257 | if (!Reloc.Scattered && !Reloc.IsAddend) { |
258 | const uint32_t SymbolNum = |
259 | Reloc.getPlainRelocationSymbolNum(IsLittleEndian: MachOObj.isLittleEndian()); |
260 | if (Reloc.Extern) { |
261 | Reloc.Symbol = O.SymTable.getSymbolByIndex(Index: SymbolNum); |
262 | } else { |
263 | // FIXME: Refactor error handling in MachOReader and report an error |
264 | // if we encounter an invalid relocation. |
265 | assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && |
266 | "Invalid section index." ); |
267 | Reloc.Sec = Sections[SymbolNum - 1]; |
268 | } |
269 | } |
270 | } |
271 | |
272 | void MachOReader::readRebaseInfo(Object &O) const { |
273 | O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); |
274 | } |
275 | |
276 | void MachOReader::readBindInfo(Object &O) const { |
277 | O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); |
278 | } |
279 | |
280 | void MachOReader::readWeakBindInfo(Object &O) const { |
281 | O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); |
282 | } |
283 | |
284 | void MachOReader::readLazyBindInfo(Object &O) const { |
285 | O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); |
286 | } |
287 | |
288 | void MachOReader::readExportInfo(Object &O) const { |
289 | // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE |
290 | ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); |
291 | if (Trie.empty()) |
292 | Trie = MachOObj.getDyldExportsTrie(); |
293 | O.Exports.Trie = Trie; |
294 | } |
295 | |
296 | void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, |
297 | LinkData &LD) const { |
298 | if (!LCIndex) |
299 | return; |
300 | const MachO::linkedit_data_command &LC = |
301 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
302 | LD.Data = |
303 | arrayRefFromStringRef(Input: MachOObj.getData().substr(Start: LC.dataoff, N: LC.datasize)); |
304 | } |
305 | |
306 | void MachOReader::readDataInCodeData(Object &O) const { |
307 | return readLinkData(O, LCIndex: O.DataInCodeCommandIndex, LD&: O.DataInCode); |
308 | } |
309 | |
310 | void MachOReader::readLinkerOptimizationHint(Object &O) const { |
311 | return readLinkData(O, LCIndex: O.LinkerOptimizationHintCommandIndex, |
312 | LD&: O.LinkerOptimizationHint); |
313 | } |
314 | |
315 | void MachOReader::readFunctionStartsData(Object &O) const { |
316 | return readLinkData(O, LCIndex: O.FunctionStartsCommandIndex, LD&: O.FunctionStarts); |
317 | } |
318 | |
319 | void MachOReader::readDylibCodeSignDRs(Object &O) const { |
320 | return readLinkData(O, LCIndex: O.DylibCodeSignDRsIndex, LD&: O.DylibCodeSignDRs); |
321 | } |
322 | |
323 | void MachOReader::readExportsTrie(Object &O) const { |
324 | return readLinkData(O, LCIndex: O.ExportsTrieCommandIndex, LD&: O.ExportsTrie); |
325 | } |
326 | |
327 | void MachOReader::readChainedFixups(Object &O) const { |
328 | return readLinkData(O, LCIndex: O.ChainedFixupsCommandIndex, LD&: O.ChainedFixups); |
329 | } |
330 | |
331 | void MachOReader::readIndirectSymbolTable(Object &O) const { |
332 | MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); |
333 | constexpr uint32_t AbsOrLocalMask = |
334 | MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; |
335 | for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { |
336 | uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DLC: DySymTab, Index: i); |
337 | if ((Index & AbsOrLocalMask) != 0) |
338 | O.IndirectSymTable.Symbols.emplace_back(args&: Index, args: std::nullopt); |
339 | else |
340 | O.IndirectSymTable.Symbols.emplace_back( |
341 | args&: Index, args: O.SymTable.getSymbolByIndex(Index)); |
342 | } |
343 | } |
344 | |
345 | void MachOReader::readSwiftVersion(Object &O) const { |
346 | struct ObjCImageInfo { |
347 | uint32_t Version; |
348 | uint32_t Flags; |
349 | } ImageInfo; |
350 | |
351 | for (const LoadCommand &LC : O.LoadCommands) |
352 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
353 | if (Sec->Sectname == "__objc_imageinfo" && |
354 | (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || |
355 | Sec->Segname == "__DATA_DIRTY" ) && |
356 | Sec->Content.size() >= sizeof(ObjCImageInfo)) { |
357 | memcpy(dest: &ImageInfo, src: Sec->Content.data(), n: sizeof(ObjCImageInfo)); |
358 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { |
359 | sys::swapByteOrder(Value&: ImageInfo.Version); |
360 | sys::swapByteOrder(Value&: ImageInfo.Flags); |
361 | } |
362 | O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; |
363 | return; |
364 | } |
365 | } |
366 | |
367 | Expected<std::unique_ptr<Object>> MachOReader::create() const { |
368 | auto Obj = std::make_unique<Object>(); |
369 | readHeader(O&: *Obj); |
370 | if (Error E = readLoadCommands(O&: *Obj)) |
371 | return std::move(E); |
372 | readSymbolTable(O&: *Obj); |
373 | setSymbolInRelocationInfo(*Obj); |
374 | readRebaseInfo(O&: *Obj); |
375 | readBindInfo(O&: *Obj); |
376 | readWeakBindInfo(O&: *Obj); |
377 | readLazyBindInfo(O&: *Obj); |
378 | readExportInfo(O&: *Obj); |
379 | readDataInCodeData(O&: *Obj); |
380 | readLinkerOptimizationHint(O&: *Obj); |
381 | readFunctionStartsData(O&: *Obj); |
382 | readDylibCodeSignDRs(O&: *Obj); |
383 | readExportsTrie(O&: *Obj); |
384 | readChainedFixups(O&: *Obj); |
385 | readIndirectSymbolTable(O&: *Obj); |
386 | readSwiftVersion(O&: *Obj); |
387 | return std::move(Obj); |
388 | } |
389 | |