1 | //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "MachOReader.h" |
10 | #include "MachOObject.h" |
11 | #include "llvm/BinaryFormat/MachO.h" |
12 | #include "llvm/Object/MachO.h" |
13 | #include "llvm/Support/SystemZ/zOSSupport.h" |
14 | #include <memory> |
15 | |
16 | using namespace llvm; |
17 | using namespace llvm::objcopy; |
18 | using namespace llvm::objcopy::macho; |
19 | |
20 | void MachOReader::(Object &O) const { |
21 | O.Header.Magic = MachOObj.getHeader().magic; |
22 | O.Header.CPUType = MachOObj.getHeader().cputype; |
23 | O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; |
24 | O.Header.FileType = MachOObj.getHeader().filetype; |
25 | O.Header.NCmds = MachOObj.getHeader().ncmds; |
26 | O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; |
27 | O.Header.Flags = MachOObj.getHeader().flags; |
28 | } |
29 | |
30 | template <typename SectionType> |
31 | static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { |
32 | StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); |
33 | StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); |
34 | Section S(SegName, SectName); |
35 | S.Index = Index; |
36 | S.Addr = Sec.addr; |
37 | S.Size = Sec.size; |
38 | S.OriginalOffset = Sec.offset; |
39 | S.Align = Sec.align; |
40 | S.RelOff = Sec.reloff; |
41 | S.NReloc = Sec.nreloc; |
42 | S.Flags = Sec.flags; |
43 | S.Reserved1 = Sec.reserved1; |
44 | S.Reserved2 = Sec.reserved2; |
45 | S.Reserved3 = 0; |
46 | return S; |
47 | } |
48 | |
49 | Section constructSection(const MachO::section &Sec, uint32_t Index) { |
50 | return constructSectionCommon(Sec, Index); |
51 | } |
52 | |
53 | Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { |
54 | Section S = constructSectionCommon(Sec, Index); |
55 | S.Reserved3 = Sec.reserved3; |
56 | return S; |
57 | } |
58 | |
59 | template <typename SectionType, typename SegmentType> |
60 | Expected<std::vector<std::unique_ptr<Section>>> static extractSections( |
61 | const object::MachOObjectFile::LoadCommandInfo &LoadCmd, |
62 | const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { |
63 | std::vector<std::unique_ptr<Section>> Sections; |
64 | for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
65 | sizeof(SegmentType)), |
66 | End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + |
67 | LoadCmd.C.cmdsize); |
68 | Curr < End; ++Curr) { |
69 | SectionType Sec; |
70 | memcpy(dest: (void *)&Sec, src: reinterpret_cast<const char *>(Curr), |
71 | n: sizeof(SectionType)); |
72 | |
73 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
74 | MachO::swapStruct(Sec); |
75 | |
76 | Sections.push_back( |
77 | std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); |
78 | |
79 | Section &S = *Sections.back(); |
80 | |
81 | Expected<object::SectionRef> SecRef = |
82 | MachOObj.getSection(SectionIndex: NextSectionIndex++); |
83 | if (!SecRef) |
84 | return SecRef.takeError(); |
85 | |
86 | Expected<ArrayRef<uint8_t>> Data = |
87 | MachOObj.getSectionContents(Sec: SecRef->getRawDataRefImpl()); |
88 | if (!Data) |
89 | return Data.takeError(); |
90 | |
91 | S.Content = |
92 | StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); |
93 | |
94 | const uint32_t CPUType = MachOObj.getHeader().cputype; |
95 | S.Relocations.reserve(n: S.NReloc); |
96 | for (auto RI = MachOObj.section_rel_begin(Sec: SecRef->getRawDataRefImpl()), |
97 | RE = MachOObj.section_rel_end(Sec: SecRef->getRawDataRefImpl()); |
98 | RI != RE; ++RI) { |
99 | RelocationInfo R; |
100 | R.Symbol = nullptr; // We'll fill this field later. |
101 | R.Info = MachOObj.getRelocation(Rel: RI->getRawDataRefImpl()); |
102 | R.Scattered = MachOObj.isRelocationScattered(RE: R.Info); |
103 | unsigned Type = MachOObj.getAnyRelocationType(RE: R.Info); |
104 | // TODO Support CPU_TYPE_ARM. |
105 | R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && |
106 | Type == MachO::ARM64_RELOC_ADDEND); |
107 | R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(RE: R.Info); |
108 | S.Relocations.push_back(x: R); |
109 | } |
110 | |
111 | assert(S.NReloc == S.Relocations.size() && |
112 | "Incorrect number of relocations" ); |
113 | } |
114 | return std::move(Sections); |
115 | } |
116 | |
117 | Error MachOReader::readLoadCommands(Object &O) const { |
118 | // For MachO sections indices start from 1. |
119 | uint32_t NextSectionIndex = 1; |
120 | static constexpr char TextSegmentName[] = "__TEXT" ; |
121 | for (auto LoadCmd : MachOObj.load_commands()) { |
122 | LoadCommand LC; |
123 | switch (LoadCmd.C.cmd) { |
124 | case MachO::LC_CODE_SIGNATURE: |
125 | O.CodeSignatureCommandIndex = O.LoadCommands.size(); |
126 | break; |
127 | case MachO::LC_SEGMENT: |
128 | // LoadCmd.Ptr might not be aligned temporarily as |
129 | // MachO::segment_command requires, but the segname char pointer do not |
130 | // have alignment restrictions. |
131 | if (StringRef(reinterpret_cast<const char *>( |
132 | LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == |
133 | TextSegmentName) |
134 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
135 | |
136 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
137 | extractSections<MachO::section, MachO::segment_command>( |
138 | LoadCmd, MachOObj, NextSectionIndex)) |
139 | LC.Sections = std::move(*Sections); |
140 | else |
141 | return Sections.takeError(); |
142 | break; |
143 | case MachO::LC_SEGMENT_64: |
144 | // LoadCmd.Ptr might not be aligned temporarily as |
145 | // MachO::segment_command_64 requires, but the segname char pointer do |
146 | // not have alignment restrictions. |
147 | if (StringRef(reinterpret_cast<const char *>( |
148 | LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == |
149 | TextSegmentName) |
150 | O.TextSegmentCommandIndex = O.LoadCommands.size(); |
151 | |
152 | if (Expected<std::vector<std::unique_ptr<Section>>> Sections = |
153 | extractSections<MachO::section_64, MachO::segment_command_64>( |
154 | LoadCmd, MachOObj, NextSectionIndex)) |
155 | LC.Sections = std::move(*Sections); |
156 | else |
157 | return Sections.takeError(); |
158 | break; |
159 | case MachO::LC_SYMTAB: |
160 | O.SymTabCommandIndex = O.LoadCommands.size(); |
161 | break; |
162 | case MachO::LC_DYSYMTAB: |
163 | O.DySymTabCommandIndex = O.LoadCommands.size(); |
164 | break; |
165 | case MachO::LC_DYLD_INFO: |
166 | case MachO::LC_DYLD_INFO_ONLY: |
167 | O.DyLdInfoCommandIndex = O.LoadCommands.size(); |
168 | break; |
169 | case MachO::LC_DATA_IN_CODE: |
170 | O.DataInCodeCommandIndex = O.LoadCommands.size(); |
171 | break; |
172 | case MachO::LC_LINKER_OPTIMIZATION_HINT: |
173 | O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); |
174 | break; |
175 | case MachO::LC_FUNCTION_STARTS: |
176 | O.FunctionStartsCommandIndex = O.LoadCommands.size(); |
177 | break; |
178 | case MachO::LC_DYLIB_CODE_SIGN_DRS: |
179 | O.DylibCodeSignDRsIndex = O.LoadCommands.size(); |
180 | break; |
181 | case MachO::LC_DYLD_EXPORTS_TRIE: |
182 | O.ExportsTrieCommandIndex = O.LoadCommands.size(); |
183 | break; |
184 | case MachO::LC_DYLD_CHAINED_FIXUPS: |
185 | O.ChainedFixupsCommandIndex = O.LoadCommands.size(); |
186 | break; |
187 | } |
188 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
189 | case MachO::LCName: \ |
190 | memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ |
191 | sizeof(MachO::LCStruct)); \ |
192 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ |
193 | MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ |
194 | if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ |
195 | LC.Payload = ArrayRef<uint8_t>( \ |
196 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ |
197 | sizeof(MachO::LCStruct), \ |
198 | LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ |
199 | break; |
200 | |
201 | switch (LoadCmd.C.cmd) { |
202 | default: |
203 | memcpy(dest: (void *)&(LC.MachOLoadCommand.load_command_data), src: LoadCmd.Ptr, |
204 | n: sizeof(MachO::load_command)); |
205 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) |
206 | MachO::swapStruct(lc&: LC.MachOLoadCommand.load_command_data); |
207 | if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) |
208 | LC.Payload = ArrayRef<uint8_t>( |
209 | reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + |
210 | sizeof(MachO::load_command), |
211 | LoadCmd.C.cmdsize - sizeof(MachO::load_command)); |
212 | break; |
213 | #include "llvm/BinaryFormat/MachO.def" |
214 | } |
215 | O.LoadCommands.push_back(x: std::move(LC)); |
216 | } |
217 | return Error::success(); |
218 | } |
219 | |
220 | template <typename nlist_t> |
221 | SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { |
222 | assert(nlist.n_strx < StrTable.size() && |
223 | "n_strx exceeds the size of the string table" ); |
224 | SymbolEntry SE; |
225 | SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); |
226 | SE.n_type = nlist.n_type; |
227 | SE.n_sect = nlist.n_sect; |
228 | SE.n_desc = nlist.n_desc; |
229 | SE.n_value = nlist.n_value; |
230 | return SE; |
231 | } |
232 | |
233 | void MachOReader::readSymbolTable(Object &O) const { |
234 | StringRef StrTable = MachOObj.getStringTableData(); |
235 | for (auto Symbol : MachOObj.symbols()) { |
236 | SymbolEntry SE = |
237 | (MachOObj.is64Bit() |
238 | ? constructSymbolEntry(StrTable, nlist: MachOObj.getSymbol64TableEntry( |
239 | DRI: Symbol.getRawDataRefImpl())) |
240 | : constructSymbolEntry(StrTable, nlist: MachOObj.getSymbolTableEntry( |
241 | DRI: Symbol.getRawDataRefImpl()))); |
242 | |
243 | O.SymTable.Symbols.push_back(x: std::make_unique<SymbolEntry>(args&: SE)); |
244 | } |
245 | } |
246 | |
247 | void MachOReader::setSymbolInRelocationInfo(Object &O) const { |
248 | std::vector<const Section *> Sections; |
249 | for (auto &LC : O.LoadCommands) |
250 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
251 | Sections.push_back(x: Sec.get()); |
252 | |
253 | for (LoadCommand &LC : O.LoadCommands) |
254 | for (std::unique_ptr<Section> &Sec : LC.Sections) |
255 | for (auto &Reloc : Sec->Relocations) |
256 | if (!Reloc.Scattered && !Reloc.IsAddend) { |
257 | const uint32_t SymbolNum = |
258 | Reloc.getPlainRelocationSymbolNum(IsLittleEndian: MachOObj.isLittleEndian()); |
259 | if (Reloc.Extern) { |
260 | Reloc.Symbol = O.SymTable.getSymbolByIndex(Index: SymbolNum); |
261 | } else { |
262 | // FIXME: Refactor error handling in MachOReader and report an error |
263 | // if we encounter an invalid relocation. |
264 | assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && |
265 | "Invalid section index." ); |
266 | Reloc.Sec = Sections[SymbolNum - 1]; |
267 | } |
268 | } |
269 | } |
270 | |
271 | void MachOReader::readRebaseInfo(Object &O) const { |
272 | O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); |
273 | } |
274 | |
275 | void MachOReader::readBindInfo(Object &O) const { |
276 | O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); |
277 | } |
278 | |
279 | void MachOReader::readWeakBindInfo(Object &O) const { |
280 | O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); |
281 | } |
282 | |
283 | void MachOReader::readLazyBindInfo(Object &O) const { |
284 | O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); |
285 | } |
286 | |
287 | void MachOReader::readExportInfo(Object &O) const { |
288 | // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE |
289 | ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); |
290 | if (Trie.empty()) |
291 | Trie = MachOObj.getDyldExportsTrie(); |
292 | O.Exports.Trie = Trie; |
293 | } |
294 | |
295 | void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, |
296 | LinkData &LD) const { |
297 | if (!LCIndex) |
298 | return; |
299 | const MachO::linkedit_data_command &LC = |
300 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
301 | LD.Data = |
302 | arrayRefFromStringRef(Input: MachOObj.getData().substr(Start: LC.dataoff, N: LC.datasize)); |
303 | } |
304 | |
305 | void MachOReader::readDataInCodeData(Object &O) const { |
306 | return readLinkData(O, LCIndex: O.DataInCodeCommandIndex, LD&: O.DataInCode); |
307 | } |
308 | |
309 | void MachOReader::readLinkerOptimizationHint(Object &O) const { |
310 | return readLinkData(O, LCIndex: O.LinkerOptimizationHintCommandIndex, |
311 | LD&: O.LinkerOptimizationHint); |
312 | } |
313 | |
314 | void MachOReader::readFunctionStartsData(Object &O) const { |
315 | return readLinkData(O, LCIndex: O.FunctionStartsCommandIndex, LD&: O.FunctionStarts); |
316 | } |
317 | |
318 | void MachOReader::readDylibCodeSignDRs(Object &O) const { |
319 | return readLinkData(O, LCIndex: O.DylibCodeSignDRsIndex, LD&: O.DylibCodeSignDRs); |
320 | } |
321 | |
322 | void MachOReader::readExportsTrie(Object &O) const { |
323 | return readLinkData(O, LCIndex: O.ExportsTrieCommandIndex, LD&: O.ExportsTrie); |
324 | } |
325 | |
326 | void MachOReader::readChainedFixups(Object &O) const { |
327 | return readLinkData(O, LCIndex: O.ChainedFixupsCommandIndex, LD&: O.ChainedFixups); |
328 | } |
329 | |
330 | void MachOReader::readIndirectSymbolTable(Object &O) const { |
331 | MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); |
332 | constexpr uint32_t AbsOrLocalMask = |
333 | MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; |
334 | for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { |
335 | uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DLC: DySymTab, Index: i); |
336 | if ((Index & AbsOrLocalMask) != 0) |
337 | O.IndirectSymTable.Symbols.emplace_back(args&: Index, args: std::nullopt); |
338 | else |
339 | O.IndirectSymTable.Symbols.emplace_back( |
340 | args&: Index, args: O.SymTable.getSymbolByIndex(Index)); |
341 | } |
342 | } |
343 | |
344 | void MachOReader::readSwiftVersion(Object &O) const { |
345 | struct ObjCImageInfo { |
346 | uint32_t Version; |
347 | uint32_t Flags; |
348 | } ImageInfo; |
349 | |
350 | for (const LoadCommand &LC : O.LoadCommands) |
351 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
352 | if (Sec->Sectname == "__objc_imageinfo" && |
353 | (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || |
354 | Sec->Segname == "__DATA_DIRTY" ) && |
355 | Sec->Content.size() >= sizeof(ObjCImageInfo)) { |
356 | memcpy(dest: &ImageInfo, src: Sec->Content.data(), n: sizeof(ObjCImageInfo)); |
357 | if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { |
358 | sys::swapByteOrder(Value&: ImageInfo.Version); |
359 | sys::swapByteOrder(Value&: ImageInfo.Flags); |
360 | } |
361 | O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; |
362 | return; |
363 | } |
364 | } |
365 | |
366 | Expected<std::unique_ptr<Object>> MachOReader::create() const { |
367 | auto Obj = std::make_unique<Object>(); |
368 | readHeader(O&: *Obj); |
369 | if (Error E = readLoadCommands(O&: *Obj)) |
370 | return std::move(E); |
371 | readSymbolTable(O&: *Obj); |
372 | setSymbolInRelocationInfo(*Obj); |
373 | readRebaseInfo(O&: *Obj); |
374 | readBindInfo(O&: *Obj); |
375 | readWeakBindInfo(O&: *Obj); |
376 | readLazyBindInfo(O&: *Obj); |
377 | readExportInfo(O&: *Obj); |
378 | readDataInCodeData(O&: *Obj); |
379 | readLinkerOptimizationHint(O&: *Obj); |
380 | readFunctionStartsData(O&: *Obj); |
381 | readDylibCodeSignDRs(O&: *Obj); |
382 | readExportsTrie(O&: *Obj); |
383 | readChainedFixups(O&: *Obj); |
384 | readIndirectSymbolTable(O&: *Obj); |
385 | readSwiftVersion(O&: *Obj); |
386 | return std::move(Obj); |
387 | } |
388 | |