1 | //===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "MachOWriter.h" |
10 | #include "MachOLayoutBuilder.h" |
11 | #include "MachOObject.h" |
12 | #include "llvm/ADT/STLExtras.h" |
13 | #include "llvm/BinaryFormat/MachO.h" |
14 | #include "llvm/Support/Errc.h" |
15 | #include "llvm/Support/ErrorHandling.h" |
16 | #include "llvm/Support/SHA256.h" |
17 | #include <memory> |
18 | |
19 | #if defined(__APPLE__) |
20 | #include <sys/mman.h> |
21 | #endif |
22 | |
23 | using namespace llvm; |
24 | using namespace llvm::objcopy::macho; |
25 | using namespace llvm::support::endian; |
26 | |
27 | size_t MachOWriter::() const { |
28 | return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
29 | } |
30 | |
31 | size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } |
32 | |
33 | size_t MachOWriter::symTableSize() const { |
34 | return O.SymTable.Symbols.size() * |
35 | (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); |
36 | } |
37 | |
38 | size_t MachOWriter::totalSize() const { |
39 | // Going from tail to head and looking for an appropriate "anchor" to |
40 | // calculate the total size assuming that all the offsets are either valid |
41 | // ("true") or 0 (0 indicates that the corresponding part is missing). |
42 | |
43 | SmallVector<size_t, 7> Ends; |
44 | if (O.SymTabCommandIndex) { |
45 | const MachO::symtab_command &SymTabCommand = |
46 | O.LoadCommands[*O.SymTabCommandIndex] |
47 | .MachOLoadCommand.symtab_command_data; |
48 | if (SymTabCommand.symoff) |
49 | Ends.push_back(Elt: SymTabCommand.symoff + symTableSize()); |
50 | if (SymTabCommand.stroff) |
51 | Ends.push_back(Elt: SymTabCommand.stroff + SymTabCommand.strsize); |
52 | } |
53 | if (O.DyLdInfoCommandIndex) { |
54 | const MachO::dyld_info_command &DyLdInfoCommand = |
55 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
56 | .MachOLoadCommand.dyld_info_command_data; |
57 | if (DyLdInfoCommand.rebase_off) { |
58 | assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && |
59 | "Incorrect rebase opcodes size" ); |
60 | Ends.push_back(Elt: DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); |
61 | } |
62 | if (DyLdInfoCommand.bind_off) { |
63 | assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && |
64 | "Incorrect bind opcodes size" ); |
65 | Ends.push_back(Elt: DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); |
66 | } |
67 | if (DyLdInfoCommand.weak_bind_off) { |
68 | assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && |
69 | "Incorrect weak bind opcodes size" ); |
70 | Ends.push_back(Elt: DyLdInfoCommand.weak_bind_off + |
71 | DyLdInfoCommand.weak_bind_size); |
72 | } |
73 | if (DyLdInfoCommand.lazy_bind_off) { |
74 | assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && |
75 | "Incorrect lazy bind opcodes size" ); |
76 | Ends.push_back(Elt: DyLdInfoCommand.lazy_bind_off + |
77 | DyLdInfoCommand.lazy_bind_size); |
78 | } |
79 | if (DyLdInfoCommand.export_off) { |
80 | assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && |
81 | "Incorrect trie size" ); |
82 | Ends.push_back(Elt: DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); |
83 | } |
84 | } |
85 | |
86 | if (O.DySymTabCommandIndex) { |
87 | const MachO::dysymtab_command &DySymTabCommand = |
88 | O.LoadCommands[*O.DySymTabCommandIndex] |
89 | .MachOLoadCommand.dysymtab_command_data; |
90 | |
91 | if (DySymTabCommand.indirectsymoff) |
92 | Ends.push_back(Elt: DySymTabCommand.indirectsymoff + |
93 | sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); |
94 | } |
95 | |
96 | for (std::optional<size_t> LinkEditDataCommandIndex : |
97 | {O.CodeSignatureCommandIndex, O.DylibCodeSignDRsIndex, |
98 | O.DataInCodeCommandIndex, O.LinkerOptimizationHintCommandIndex, |
99 | O.FunctionStartsCommandIndex, O.ChainedFixupsCommandIndex, |
100 | O.ExportsTrieCommandIndex}) |
101 | if (LinkEditDataCommandIndex) { |
102 | const MachO::linkedit_data_command &LinkEditDataCommand = |
103 | O.LoadCommands[*LinkEditDataCommandIndex] |
104 | .MachOLoadCommand.linkedit_data_command_data; |
105 | if (LinkEditDataCommand.dataoff) |
106 | Ends.push_back(Elt: LinkEditDataCommand.dataoff + |
107 | LinkEditDataCommand.datasize); |
108 | } |
109 | |
110 | // Otherwise, use the last section / reloction. |
111 | for (const LoadCommand &LC : O.LoadCommands) |
112 | for (const std::unique_ptr<Section> &S : LC.Sections) { |
113 | if (!S->hasValidOffset()) { |
114 | assert((S->Offset == 0) && "Skipped section's offset must be zero" ); |
115 | assert((S->isVirtualSection() || S->Size == 0) && |
116 | "Non-zero-fill sections with zero offset must have zero size" ); |
117 | continue; |
118 | } |
119 | assert((S->Offset != 0) && |
120 | "Non-zero-fill section's offset cannot be zero" ); |
121 | Ends.push_back(Elt: S->Offset + S->Size); |
122 | if (S->RelOff) |
123 | Ends.push_back(Elt: S->RelOff + |
124 | S->NReloc * sizeof(MachO::any_relocation_info)); |
125 | } |
126 | |
127 | if (!Ends.empty()) |
128 | return *llvm::max_element(Range&: Ends); |
129 | |
130 | // Otherwise, we have only Mach header and load commands. |
131 | return headerSize() + loadCommandsSize(); |
132 | } |
133 | |
134 | void MachOWriter::() { |
135 | MachO::mach_header_64 ; |
136 | |
137 | Header.magic = O.Header.Magic; |
138 | Header.cputype = O.Header.CPUType; |
139 | Header.cpusubtype = O.Header.CPUSubType; |
140 | Header.filetype = O.Header.FileType; |
141 | Header.ncmds = O.Header.NCmds; |
142 | Header.sizeofcmds = O.Header.SizeOfCmds; |
143 | Header.flags = O.Header.Flags; |
144 | Header.reserved = O.Header.Reserved; |
145 | |
146 | if (IsLittleEndian != sys::IsLittleEndianHost) |
147 | MachO::swapStruct(H&: Header); |
148 | |
149 | auto = |
150 | Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
151 | memcpy(dest: Buf->getBufferStart(), src: &Header, n: HeaderSize); |
152 | } |
153 | |
154 | void MachOWriter::writeLoadCommands() { |
155 | uint8_t *Begin = |
156 | reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + headerSize(); |
157 | for (const LoadCommand &LC : O.LoadCommands) { |
158 | // Construct a load command. |
159 | MachO::macho_load_command MLC = LC.MachOLoadCommand; |
160 | switch (MLC.load_command_data.cmd) { |
161 | case MachO::LC_SEGMENT: |
162 | if (IsLittleEndian != sys::IsLittleEndianHost) |
163 | MachO::swapStruct(seg&: MLC.segment_command_data); |
164 | memcpy(dest: Begin, src: &MLC.segment_command_data, n: sizeof(MachO::segment_command)); |
165 | Begin += sizeof(MachO::segment_command); |
166 | |
167 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
168 | writeSectionInLoadCommand<MachO::section>(Sec: *Sec, Out&: Begin); |
169 | continue; |
170 | case MachO::LC_SEGMENT_64: |
171 | if (IsLittleEndian != sys::IsLittleEndianHost) |
172 | MachO::swapStruct(seg&: MLC.segment_command_64_data); |
173 | memcpy(dest: Begin, src: &MLC.segment_command_64_data, |
174 | n: sizeof(MachO::segment_command_64)); |
175 | Begin += sizeof(MachO::segment_command_64); |
176 | |
177 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
178 | writeSectionInLoadCommand<MachO::section_64>(Sec: *Sec, Out&: Begin); |
179 | continue; |
180 | } |
181 | |
182 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
183 | case MachO::LCName: \ |
184 | assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ |
185 | MLC.load_command_data.cmdsize); \ |
186 | if (IsLittleEndian != sys::IsLittleEndianHost) \ |
187 | MachO::swapStruct(MLC.LCStruct##_data); \ |
188 | memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ |
189 | Begin += sizeof(MachO::LCStruct); \ |
190 | if (!LC.Payload.empty()) \ |
191 | memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ |
192 | Begin += LC.Payload.size(); \ |
193 | break; |
194 | |
195 | // Copy the load command as it is. |
196 | switch (MLC.load_command_data.cmd) { |
197 | default: |
198 | assert(sizeof(MachO::load_command) + LC.Payload.size() == |
199 | MLC.load_command_data.cmdsize); |
200 | if (IsLittleEndian != sys::IsLittleEndianHost) |
201 | MachO::swapStruct(lc&: MLC.load_command_data); |
202 | memcpy(dest: Begin, src: &MLC.load_command_data, n: sizeof(MachO::load_command)); |
203 | Begin += sizeof(MachO::load_command); |
204 | if (!LC.Payload.empty()) |
205 | memcpy(dest: Begin, src: LC.Payload.data(), n: LC.Payload.size()); |
206 | Begin += LC.Payload.size(); |
207 | break; |
208 | #include "llvm/BinaryFormat/MachO.def" |
209 | } |
210 | } |
211 | } |
212 | |
213 | template <typename StructType> |
214 | void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { |
215 | StructType Temp; |
216 | assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name" ); |
217 | assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && |
218 | "too long section name" ); |
219 | memset(&Temp, 0, sizeof(StructType)); |
220 | memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); |
221 | memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); |
222 | Temp.addr = Sec.Addr; |
223 | Temp.size = Sec.Size; |
224 | Temp.offset = Sec.Offset; |
225 | Temp.align = Sec.Align; |
226 | Temp.reloff = Sec.RelOff; |
227 | Temp.nreloc = Sec.NReloc; |
228 | Temp.flags = Sec.Flags; |
229 | Temp.reserved1 = Sec.Reserved1; |
230 | Temp.reserved2 = Sec.Reserved2; |
231 | |
232 | if (IsLittleEndian != sys::IsLittleEndianHost) |
233 | MachO::swapStruct(Temp); |
234 | memcpy(Out, &Temp, sizeof(StructType)); |
235 | Out += sizeof(StructType); |
236 | } |
237 | |
238 | void MachOWriter::writeSections() { |
239 | for (const LoadCommand &LC : O.LoadCommands) |
240 | for (const std::unique_ptr<Section> &Sec : LC.Sections) { |
241 | if (!Sec->hasValidOffset()) { |
242 | assert((Sec->Offset == 0) && "Skipped section's offset must be zero" ); |
243 | assert((Sec->isVirtualSection() || Sec->Size == 0) && |
244 | "Non-zero-fill sections with zero offset must have zero size" ); |
245 | continue; |
246 | } |
247 | |
248 | assert(Sec->Offset && "Section offset can not be zero" ); |
249 | assert((Sec->Size == Sec->Content.size()) && "Incorrect section size" ); |
250 | memcpy(dest: Buf->getBufferStart() + Sec->Offset, src: Sec->Content.data(), |
251 | n: Sec->Content.size()); |
252 | for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { |
253 | RelocationInfo RelocInfo = Sec->Relocations[Index]; |
254 | if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { |
255 | const uint32_t SymbolNum = RelocInfo.Extern |
256 | ? (*RelocInfo.Symbol)->Index |
257 | : (*RelocInfo.Sec)->Index; |
258 | RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); |
259 | } |
260 | if (IsLittleEndian != sys::IsLittleEndianHost) |
261 | MachO::swapStruct( |
262 | reloc&: reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); |
263 | memcpy(dest: Buf->getBufferStart() + Sec->RelOff + |
264 | Index * sizeof(MachO::any_relocation_info), |
265 | src: &RelocInfo.Info, n: sizeof(RelocInfo.Info)); |
266 | } |
267 | } |
268 | } |
269 | |
270 | template <typename NListType> |
271 | void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, |
272 | uint32_t Nstrx) { |
273 | NListType ListEntry; |
274 | ListEntry.n_strx = Nstrx; |
275 | ListEntry.n_type = SE.n_type; |
276 | ListEntry.n_sect = SE.n_sect; |
277 | ListEntry.n_desc = SE.n_desc; |
278 | ListEntry.n_value = SE.n_value; |
279 | |
280 | if (IsLittleEndian != sys::IsLittleEndianHost) |
281 | MachO::swapStruct(ListEntry); |
282 | memcpy(dest: Out, src: reinterpret_cast<const char *>(&ListEntry), n: sizeof(NListType)); |
283 | Out += sizeof(NListType); |
284 | } |
285 | |
286 | void MachOWriter::writeStringTable() { |
287 | if (!O.SymTabCommandIndex) |
288 | return; |
289 | const MachO::symtab_command &SymTabCommand = |
290 | O.LoadCommands[*O.SymTabCommandIndex] |
291 | .MachOLoadCommand.symtab_command_data; |
292 | |
293 | uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; |
294 | LayoutBuilder.getStringTableBuilder().write(Buf: StrTable); |
295 | } |
296 | |
297 | void MachOWriter::writeSymbolTable() { |
298 | if (!O.SymTabCommandIndex) |
299 | return; |
300 | const MachO::symtab_command &SymTabCommand = |
301 | O.LoadCommands[*O.SymTabCommandIndex] |
302 | .MachOLoadCommand.symtab_command_data; |
303 | |
304 | char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; |
305 | for (auto &Symbol : O.SymTable.Symbols) { |
306 | SymbolEntry *Sym = Symbol.get(); |
307 | uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(S: Sym->Name); |
308 | |
309 | if (Is64Bit) |
310 | writeNListEntry<MachO::nlist_64>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
311 | else |
312 | writeNListEntry<MachO::nlist>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
313 | } |
314 | } |
315 | |
316 | void MachOWriter::writeRebaseInfo() { |
317 | if (!O.DyLdInfoCommandIndex) |
318 | return; |
319 | const MachO::dyld_info_command &DyLdInfoCommand = |
320 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
321 | .MachOLoadCommand.dyld_info_command_data; |
322 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; |
323 | assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && |
324 | "Incorrect rebase opcodes size" ); |
325 | memcpy(dest: Out, src: O.Rebases.Opcodes.data(), n: O.Rebases.Opcodes.size()); |
326 | } |
327 | |
328 | void MachOWriter::writeBindInfo() { |
329 | if (!O.DyLdInfoCommandIndex) |
330 | return; |
331 | const MachO::dyld_info_command &DyLdInfoCommand = |
332 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
333 | .MachOLoadCommand.dyld_info_command_data; |
334 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; |
335 | assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && |
336 | "Incorrect bind opcodes size" ); |
337 | memcpy(dest: Out, src: O.Binds.Opcodes.data(), n: O.Binds.Opcodes.size()); |
338 | } |
339 | |
340 | void MachOWriter::writeWeakBindInfo() { |
341 | if (!O.DyLdInfoCommandIndex) |
342 | return; |
343 | const MachO::dyld_info_command &DyLdInfoCommand = |
344 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
345 | .MachOLoadCommand.dyld_info_command_data; |
346 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; |
347 | assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && |
348 | "Incorrect weak bind opcodes size" ); |
349 | memcpy(dest: Out, src: O.WeakBinds.Opcodes.data(), n: O.WeakBinds.Opcodes.size()); |
350 | } |
351 | |
352 | void MachOWriter::writeLazyBindInfo() { |
353 | if (!O.DyLdInfoCommandIndex) |
354 | return; |
355 | const MachO::dyld_info_command &DyLdInfoCommand = |
356 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
357 | .MachOLoadCommand.dyld_info_command_data; |
358 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; |
359 | assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && |
360 | "Incorrect lazy bind opcodes size" ); |
361 | memcpy(dest: Out, src: O.LazyBinds.Opcodes.data(), n: O.LazyBinds.Opcodes.size()); |
362 | } |
363 | |
364 | void MachOWriter::writeExportInfo() { |
365 | if (!O.DyLdInfoCommandIndex) |
366 | return; |
367 | const MachO::dyld_info_command &DyLdInfoCommand = |
368 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
369 | .MachOLoadCommand.dyld_info_command_data; |
370 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; |
371 | assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && |
372 | "Incorrect export trie size" ); |
373 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
374 | } |
375 | |
376 | void MachOWriter::writeIndirectSymbolTable() { |
377 | if (!O.DySymTabCommandIndex) |
378 | return; |
379 | |
380 | const MachO::dysymtab_command &DySymTabCommand = |
381 | O.LoadCommands[*O.DySymTabCommandIndex] |
382 | .MachOLoadCommand.dysymtab_command_data; |
383 | |
384 | uint32_t *Out = |
385 | (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); |
386 | for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { |
387 | uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; |
388 | if (IsLittleEndian != sys::IsLittleEndianHost) |
389 | sys::swapByteOrder(Value&: Entry); |
390 | *Out++ = Entry; |
391 | } |
392 | } |
393 | |
394 | void MachOWriter::writeLinkData(std::optional<size_t> LCIndex, |
395 | const LinkData &LD) { |
396 | if (!LCIndex) |
397 | return; |
398 | const MachO::linkedit_data_command &LinkEditDataCommand = |
399 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
400 | char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; |
401 | assert((LinkEditDataCommand.datasize == LD.Data.size()) && |
402 | "Incorrect data size" ); |
403 | memcpy(dest: Out, src: LD.Data.data(), n: LD.Data.size()); |
404 | } |
405 | |
406 | static uint64_t |
407 | getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { |
408 | const MachO::macho_load_command &MLC = |
409 | TextSegmentLoadCommand.MachOLoadCommand; |
410 | switch (MLC.load_command_data.cmd) { |
411 | case MachO::LC_SEGMENT: |
412 | return MLC.segment_command_data.fileoff; |
413 | case MachO::LC_SEGMENT_64: |
414 | return MLC.segment_command_64_data.fileoff; |
415 | default: |
416 | return 0; |
417 | } |
418 | } |
419 | |
420 | static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { |
421 | const MachO::macho_load_command &MLC = |
422 | TextSegmentLoadCommand.MachOLoadCommand; |
423 | switch (MLC.load_command_data.cmd) { |
424 | case MachO::LC_SEGMENT: |
425 | return MLC.segment_command_data.filesize; |
426 | case MachO::LC_SEGMENT_64: |
427 | return MLC.segment_command_64_data.filesize; |
428 | default: |
429 | return 0; |
430 | } |
431 | } |
432 | |
433 | void MachOWriter::writeCodeSignatureData() { |
434 | // NOTE: This CodeSignature section behaviour must be kept in sync with that |
435 | // performed in LLD's CodeSignatureSection::write / |
436 | // CodeSignatureSection::writeHashes. Furthermore, this call must occur only |
437 | // after the rest of the binary has already been written to the buffer. This |
438 | // is because the buffer is read from to perform the necessary hashing. |
439 | |
440 | // The CodeSignature section is the last section in the MachO binary and |
441 | // contains a hash of all content in the binary before it. Since llvm-objcopy |
442 | // has likely modified the target binary, the hash must be regenerated |
443 | // entirely. To generate this hash, we must read from the start of the binary |
444 | // (HashReadStart) to just before the start of the CodeSignature section |
445 | // (HashReadEnd). |
446 | |
447 | const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); |
448 | |
449 | uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); |
450 | uint8_t *HashReadStart = BufferStart; |
451 | uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; |
452 | |
453 | // The CodeSignature section begins with a header, after which the hashes |
454 | // of each page of the binary are written. |
455 | uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; |
456 | |
457 | uint32_t TextSegmentFileOff = 0; |
458 | uint32_t TextSegmentFileSize = 0; |
459 | if (O.TextSegmentCommandIndex) { |
460 | const LoadCommand &TextSegmentLoadCommand = |
461 | O.LoadCommands[*O.TextSegmentCommandIndex]; |
462 | assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
463 | MachO::LC_SEGMENT || |
464 | TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
465 | MachO::LC_SEGMENT_64); |
466 | assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand |
467 | .segment_command_data.segname) == "__TEXT" ); |
468 | TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); |
469 | TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); |
470 | } |
471 | |
472 | const uint32_t FileNamePad = CodeSignature.AllHeadersSize - |
473 | CodeSignature.FixedHeadersSize - |
474 | CodeSignature.OutputFileName.size(); |
475 | |
476 | // Write code section header. |
477 | auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd); |
478 | write32be(P: &SuperBlob->magic, V: MachO::CSMAGIC_EMBEDDED_SIGNATURE); |
479 | write32be(P: &SuperBlob->length, V: CodeSignature.Size); |
480 | write32be(P: &SuperBlob->count, V: 1); |
481 | auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]); |
482 | write32be(P: &BlobIndex->type, V: MachO::CSSLOT_CODEDIRECTORY); |
483 | write32be(P: &BlobIndex->offset, V: CodeSignature.BlobHeadersSize); |
484 | auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>( |
485 | HashReadEnd + CodeSignature.BlobHeadersSize); |
486 | write32be(P: &CodeDirectory->magic, V: MachO::CSMAGIC_CODEDIRECTORY); |
487 | write32be(P: &CodeDirectory->length, |
488 | V: CodeSignature.Size - CodeSignature.BlobHeadersSize); |
489 | write32be(P: &CodeDirectory->version, V: MachO::CS_SUPPORTSEXECSEG); |
490 | write32be(P: &CodeDirectory->flags, V: MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); |
491 | write32be(P: &CodeDirectory->hashOffset, |
492 | V: sizeof(MachO::CS_CodeDirectory) + |
493 | CodeSignature.OutputFileName.size() + FileNamePad); |
494 | write32be(P: &CodeDirectory->identOffset, V: sizeof(MachO::CS_CodeDirectory)); |
495 | CodeDirectory->nSpecialSlots = 0; |
496 | write32be(P: &CodeDirectory->nCodeSlots, V: CodeSignature.BlockCount); |
497 | write32be(P: &CodeDirectory->codeLimit, V: CodeSignature.StartOffset); |
498 | CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize); |
499 | CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; |
500 | CodeDirectory->platform = 0; |
501 | CodeDirectory->pageSize = CodeSignature.BlockSizeShift; |
502 | CodeDirectory->spare2 = 0; |
503 | CodeDirectory->scatterOffset = 0; |
504 | CodeDirectory->teamOffset = 0; |
505 | CodeDirectory->spare3 = 0; |
506 | CodeDirectory->codeLimit64 = 0; |
507 | write64be(P: &CodeDirectory->execSegBase, V: TextSegmentFileOff); |
508 | write64be(P: &CodeDirectory->execSegLimit, V: TextSegmentFileSize); |
509 | write64be(P: &CodeDirectory->execSegFlags, V: O.Header.FileType == MachO::MH_EXECUTE |
510 | ? MachO::CS_EXECSEG_MAIN_BINARY |
511 | : 0); |
512 | |
513 | auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]); |
514 | memcpy(dest: Id, src: CodeSignature.OutputFileName.begin(), |
515 | n: CodeSignature.OutputFileName.size()); |
516 | memset(s: Id + CodeSignature.OutputFileName.size(), c: 0, n: FileNamePad); |
517 | |
518 | // Write the hashes. |
519 | uint8_t *CurrHashReadPosition = HashReadStart; |
520 | uint8_t *CurrHashWritePosition = HashWriteStart; |
521 | while (CurrHashReadPosition < HashReadEnd) { |
522 | StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition), |
523 | std::min(a: static_cast<size_t>(HashReadEnd |
524 | - CurrHashReadPosition), |
525 | b: static_cast<size_t>(CodeSignature.BlockSize))); |
526 | SHA256 Hasher; |
527 | Hasher.update(Str: Block); |
528 | std::array<uint8_t, 32> Hash = Hasher.final(); |
529 | assert(Hash.size() == CodeSignature.HashSize); |
530 | memcpy(dest: CurrHashWritePosition, src: Hash.data(), n: CodeSignature.HashSize); |
531 | CurrHashReadPosition += CodeSignature.BlockSize; |
532 | CurrHashWritePosition += CodeSignature.HashSize; |
533 | } |
534 | #if defined(__APPLE__) |
535 | // This is macOS-specific work-around and makes no sense for any |
536 | // other host OS. See https://openradar.appspot.com/FB8914231 |
537 | // |
538 | // The macOS kernel maintains a signature-verification cache to |
539 | // quickly validate applications at time of execve(2). The trouble |
540 | // is that for the kernel creates the cache entry at the time of the |
541 | // mmap(2) call, before we have a chance to write either the code to |
542 | // sign or the signature header+hashes. The fix is to invalidate |
543 | // all cached data associated with the output file, thus discarding |
544 | // the bogus prematurely-cached signature. |
545 | msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, |
546 | MS_INVALIDATE); |
547 | #endif |
548 | } |
549 | |
550 | void MachOWriter::writeDataInCodeData() { |
551 | return writeLinkData(LCIndex: O.DataInCodeCommandIndex, LD: O.DataInCode); |
552 | } |
553 | |
554 | void MachOWriter::writeLinkerOptimizationHint() { |
555 | return writeLinkData(LCIndex: O.LinkerOptimizationHintCommandIndex, |
556 | LD: O.LinkerOptimizationHint); |
557 | } |
558 | |
559 | void MachOWriter::writeFunctionStartsData() { |
560 | return writeLinkData(LCIndex: O.FunctionStartsCommandIndex, LD: O.FunctionStarts); |
561 | } |
562 | |
563 | void MachOWriter::writeDylibCodeSignDRsData() { |
564 | return writeLinkData(LCIndex: O.DylibCodeSignDRsIndex, LD: O.DylibCodeSignDRs); |
565 | } |
566 | |
567 | void MachOWriter::writeChainedFixupsData() { |
568 | return writeLinkData(LCIndex: O.ChainedFixupsCommandIndex, LD: O.ChainedFixups); |
569 | } |
570 | |
571 | void MachOWriter::writeExportsTrieData() { |
572 | if (!O.ExportsTrieCommandIndex) |
573 | return; |
574 | const MachO::linkedit_data_command &ExportsTrieCmd = |
575 | O.LoadCommands[*O.ExportsTrieCommandIndex] |
576 | .MachOLoadCommand.linkedit_data_command_data; |
577 | char *Out = (char *)Buf->getBufferStart() + ExportsTrieCmd.dataoff; |
578 | assert((ExportsTrieCmd.datasize == O.Exports.Trie.size()) && |
579 | "Incorrect export trie size" ); |
580 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
581 | } |
582 | |
583 | void MachOWriter::writeTail() { |
584 | typedef void (MachOWriter::*WriteHandlerType)(); |
585 | typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; |
586 | SmallVector<WriteOperation, 7> Queue; |
587 | |
588 | if (O.SymTabCommandIndex) { |
589 | const MachO::symtab_command &SymTabCommand = |
590 | O.LoadCommands[*O.SymTabCommandIndex] |
591 | .MachOLoadCommand.symtab_command_data; |
592 | if (SymTabCommand.symoff) |
593 | Queue.push_back(Elt: {SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); |
594 | if (SymTabCommand.stroff) |
595 | Queue.push_back(Elt: {SymTabCommand.stroff, &MachOWriter::writeStringTable}); |
596 | } |
597 | |
598 | if (O.DyLdInfoCommandIndex) { |
599 | const MachO::dyld_info_command &DyLdInfoCommand = |
600 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
601 | .MachOLoadCommand.dyld_info_command_data; |
602 | if (DyLdInfoCommand.rebase_off) |
603 | Queue.push_back( |
604 | Elt: {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); |
605 | if (DyLdInfoCommand.bind_off) |
606 | Queue.push_back(Elt: {DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); |
607 | if (DyLdInfoCommand.weak_bind_off) |
608 | Queue.push_back( |
609 | Elt: {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); |
610 | if (DyLdInfoCommand.lazy_bind_off) |
611 | Queue.push_back( |
612 | Elt: {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); |
613 | if (DyLdInfoCommand.export_off) |
614 | Queue.push_back( |
615 | Elt: {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); |
616 | } |
617 | |
618 | if (O.DySymTabCommandIndex) { |
619 | const MachO::dysymtab_command &DySymTabCommand = |
620 | O.LoadCommands[*O.DySymTabCommandIndex] |
621 | .MachOLoadCommand.dysymtab_command_data; |
622 | |
623 | if (DySymTabCommand.indirectsymoff) |
624 | Queue.emplace_back(Args: DySymTabCommand.indirectsymoff, |
625 | Args: &MachOWriter::writeIndirectSymbolTable); |
626 | } |
627 | |
628 | std::initializer_list<std::pair<std::optional<size_t>, WriteHandlerType>> |
629 | LinkEditDataCommandWriters = { |
630 | {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData}, |
631 | {O.DylibCodeSignDRsIndex, &MachOWriter::writeDylibCodeSignDRsData}, |
632 | {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData}, |
633 | {O.LinkerOptimizationHintCommandIndex, |
634 | &MachOWriter::writeLinkerOptimizationHint}, |
635 | {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData}, |
636 | {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData}, |
637 | {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}}; |
638 | for (const auto &W : LinkEditDataCommandWriters) { |
639 | std::optional<size_t> LinkEditDataCommandIndex; |
640 | WriteHandlerType WriteHandler; |
641 | std::tie(args&: LinkEditDataCommandIndex, args&: WriteHandler) = W; |
642 | if (LinkEditDataCommandIndex) { |
643 | const MachO::linkedit_data_command &LinkEditDataCommand = |
644 | O.LoadCommands[*LinkEditDataCommandIndex] |
645 | .MachOLoadCommand.linkedit_data_command_data; |
646 | if (LinkEditDataCommand.dataoff) |
647 | Queue.emplace_back(Args: LinkEditDataCommand.dataoff, Args&: WriteHandler); |
648 | } |
649 | } |
650 | |
651 | llvm::sort(C&: Queue, Comp: llvm::less_first()); |
652 | |
653 | for (auto WriteOp : Queue) |
654 | (this->*WriteOp.second)(); |
655 | } |
656 | |
657 | Error MachOWriter::finalize() { return LayoutBuilder.layout(); } |
658 | |
659 | Error MachOWriter::write() { |
660 | size_t TotalSize = totalSize(); |
661 | Buf = WritableMemoryBuffer::getNewMemBuffer(Size: TotalSize); |
662 | if (!Buf) |
663 | return createStringError(EC: errc::not_enough_memory, |
664 | S: "failed to allocate memory buffer of " + |
665 | Twine::utohexstr(Val: TotalSize) + " bytes" ); |
666 | writeHeader(); |
667 | writeLoadCommands(); |
668 | writeSections(); |
669 | writeTail(); |
670 | |
671 | // TODO: Implement direct writing to the output stream (without intermediate |
672 | // memory buffer Buf). |
673 | Out.write(Ptr: Buf->getBufferStart(), Size: Buf->getBufferSize()); |
674 | return Error::success(); |
675 | } |
676 | |