| 1 | //===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "MachOWriter.h" |
| 10 | #include "MachOLayoutBuilder.h" |
| 11 | #include "MachOObject.h" |
| 12 | #include "llvm/ADT/STLExtras.h" |
| 13 | #include "llvm/BinaryFormat/MachO.h" |
| 14 | #include "llvm/Support/Errc.h" |
| 15 | #include "llvm/Support/ErrorHandling.h" |
| 16 | #include "llvm/Support/SHA256.h" |
| 17 | #include <memory> |
| 18 | |
| 19 | #if defined(__APPLE__) |
| 20 | #include <sys/mman.h> |
| 21 | #endif |
| 22 | |
| 23 | using namespace llvm; |
| 24 | using namespace llvm::objcopy::macho; |
| 25 | using namespace llvm::support::endian; |
| 26 | |
| 27 | #ifndef NDEBUG |
| 28 | static uint64_t paddedLinkEditEntrySize(uint64_t Size, bool Is64Bit) { |
| 29 | return alignToPowerOf2(Size, Is64Bit ? 8 : 4); |
| 30 | } |
| 31 | #endif |
| 32 | |
| 33 | size_t MachOWriter::() const { |
| 34 | return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
| 35 | } |
| 36 | |
| 37 | size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } |
| 38 | |
| 39 | size_t MachOWriter::symTableSize() const { |
| 40 | return O.SymTable.Symbols.size() * |
| 41 | (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); |
| 42 | } |
| 43 | |
| 44 | size_t MachOWriter::totalSize() const { |
| 45 | // Going from tail to head and looking for an appropriate "anchor" to |
| 46 | // calculate the total size assuming that all the offsets are either valid |
| 47 | // ("true") or 0 (0 indicates that the corresponding part is missing). |
| 48 | |
| 49 | SmallVector<size_t, 7> Ends; |
| 50 | if (O.SymTabCommandIndex) { |
| 51 | const MachO::symtab_command &SymTabCommand = |
| 52 | O.LoadCommands[*O.SymTabCommandIndex] |
| 53 | .MachOLoadCommand.symtab_command_data; |
| 54 | if (SymTabCommand.symoff) |
| 55 | Ends.push_back(Elt: SymTabCommand.symoff + symTableSize()); |
| 56 | if (SymTabCommand.stroff) |
| 57 | Ends.push_back(Elt: SymTabCommand.stroff + SymTabCommand.strsize); |
| 58 | } |
| 59 | if (O.DyLdInfoCommandIndex) { |
| 60 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 61 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 62 | .MachOLoadCommand.dyld_info_command_data; |
| 63 | if (DyLdInfoCommand.rebase_off) { |
| 64 | assert((DyLdInfoCommand.rebase_size == |
| 65 | paddedLinkEditEntrySize(O.Rebases.Opcodes.size(), Is64Bit)) && |
| 66 | "Incorrect rebase opcodes size" ); |
| 67 | Ends.push_back(Elt: DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); |
| 68 | } |
| 69 | if (DyLdInfoCommand.bind_off) { |
| 70 | assert((DyLdInfoCommand.bind_size == |
| 71 | paddedLinkEditEntrySize(O.Binds.Opcodes.size(), Is64Bit)) && |
| 72 | "Incorrect bind opcodes size" ); |
| 73 | Ends.push_back(Elt: DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); |
| 74 | } |
| 75 | if (DyLdInfoCommand.weak_bind_off) { |
| 76 | assert((DyLdInfoCommand.weak_bind_size == |
| 77 | paddedLinkEditEntrySize(O.WeakBinds.Opcodes.size(), Is64Bit)) && |
| 78 | "Incorrect weak bind opcodes size" ); |
| 79 | Ends.push_back(Elt: DyLdInfoCommand.weak_bind_off + |
| 80 | DyLdInfoCommand.weak_bind_size); |
| 81 | } |
| 82 | if (DyLdInfoCommand.lazy_bind_off) { |
| 83 | assert((DyLdInfoCommand.lazy_bind_size == |
| 84 | paddedLinkEditEntrySize(O.LazyBinds.Opcodes.size(), Is64Bit)) && |
| 85 | "Incorrect lazy bind opcodes size" ); |
| 86 | Ends.push_back(Elt: DyLdInfoCommand.lazy_bind_off + |
| 87 | DyLdInfoCommand.lazy_bind_size); |
| 88 | } |
| 89 | if (DyLdInfoCommand.export_off) { |
| 90 | assert((DyLdInfoCommand.export_size == |
| 91 | paddedLinkEditEntrySize(O.Exports.Trie.size(), Is64Bit)) && |
| 92 | "Incorrect trie size" ); |
| 93 | Ends.push_back(Elt: DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | if (O.DySymTabCommandIndex) { |
| 98 | const MachO::dysymtab_command &DySymTabCommand = |
| 99 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 100 | .MachOLoadCommand.dysymtab_command_data; |
| 101 | |
| 102 | if (DySymTabCommand.indirectsymoff) |
| 103 | Ends.push_back(Elt: DySymTabCommand.indirectsymoff + |
| 104 | sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); |
| 105 | } |
| 106 | |
| 107 | for (std::optional<size_t> LinkEditDataCommandIndex : |
| 108 | {O.CodeSignatureCommandIndex, O.DylibCodeSignDRsIndex, |
| 109 | O.DataInCodeCommandIndex, O.LinkerOptimizationHintCommandIndex, |
| 110 | O.FunctionStartsCommandIndex, O.ChainedFixupsCommandIndex, |
| 111 | O.ExportsTrieCommandIndex}) |
| 112 | if (LinkEditDataCommandIndex) { |
| 113 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 114 | O.LoadCommands[*LinkEditDataCommandIndex] |
| 115 | .MachOLoadCommand.linkedit_data_command_data; |
| 116 | if (LinkEditDataCommand.dataoff) |
| 117 | Ends.push_back(Elt: LinkEditDataCommand.dataoff + |
| 118 | LinkEditDataCommand.datasize); |
| 119 | } |
| 120 | |
| 121 | // Otherwise, use the last section / reloction. |
| 122 | for (const LoadCommand &LC : O.LoadCommands) |
| 123 | for (const std::unique_ptr<Section> &S : LC.Sections) { |
| 124 | if (!S->hasValidOffset()) { |
| 125 | assert((S->Offset == 0) && "Skipped section's offset must be zero" ); |
| 126 | assert((S->isBssSection() || S->Size == 0) && |
| 127 | "Non-zero-fill sections with zero offset must have zero size" ); |
| 128 | continue; |
| 129 | } |
| 130 | assert((S->Offset != 0) && |
| 131 | "Non-zero-fill section's offset cannot be zero" ); |
| 132 | Ends.push_back(Elt: S->Offset + S->Size); |
| 133 | if (S->RelOff) |
| 134 | Ends.push_back(Elt: S->RelOff + |
| 135 | S->NReloc * sizeof(MachO::any_relocation_info)); |
| 136 | } |
| 137 | |
| 138 | if (!Ends.empty()) |
| 139 | return *llvm::max_element(Range&: Ends); |
| 140 | |
| 141 | // Otherwise, we have only Mach header and load commands. |
| 142 | return headerSize() + loadCommandsSize(); |
| 143 | } |
| 144 | |
| 145 | void MachOWriter::() { |
| 146 | MachO::mach_header_64 ; |
| 147 | |
| 148 | Header.magic = O.Header.Magic; |
| 149 | Header.cputype = O.Header.CPUType; |
| 150 | Header.cpusubtype = O.Header.CPUSubType; |
| 151 | Header.filetype = O.Header.FileType; |
| 152 | Header.ncmds = O.Header.NCmds; |
| 153 | Header.sizeofcmds = O.Header.SizeOfCmds; |
| 154 | Header.flags = O.Header.Flags; |
| 155 | Header.reserved = O.Header.Reserved; |
| 156 | |
| 157 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 158 | MachO::swapStruct(H&: Header); |
| 159 | |
| 160 | auto = |
| 161 | Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
| 162 | memcpy(dest: Buf->getBufferStart(), src: &Header, n: HeaderSize); |
| 163 | } |
| 164 | |
| 165 | void MachOWriter::writeLoadCommands() { |
| 166 | uint8_t *Begin = |
| 167 | reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + headerSize(); |
| 168 | for (const LoadCommand &LC : O.LoadCommands) { |
| 169 | // Construct a load command. |
| 170 | MachO::macho_load_command MLC = LC.MachOLoadCommand; |
| 171 | switch (MLC.load_command_data.cmd) { |
| 172 | case MachO::LC_SEGMENT: |
| 173 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 174 | MachO::swapStruct(seg&: MLC.segment_command_data); |
| 175 | memcpy(dest: Begin, src: &MLC.segment_command_data, n: sizeof(MachO::segment_command)); |
| 176 | Begin += sizeof(MachO::segment_command); |
| 177 | |
| 178 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
| 179 | writeSectionInLoadCommand<MachO::section>(Sec: *Sec, Out&: Begin); |
| 180 | continue; |
| 181 | case MachO::LC_SEGMENT_64: |
| 182 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 183 | MachO::swapStruct(seg&: MLC.segment_command_64_data); |
| 184 | memcpy(dest: Begin, src: &MLC.segment_command_64_data, |
| 185 | n: sizeof(MachO::segment_command_64)); |
| 186 | Begin += sizeof(MachO::segment_command_64); |
| 187 | |
| 188 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
| 189 | writeSectionInLoadCommand<MachO::section_64>(Sec: *Sec, Out&: Begin); |
| 190 | continue; |
| 191 | } |
| 192 | |
| 193 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
| 194 | case MachO::LCName: \ |
| 195 | assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ |
| 196 | MLC.load_command_data.cmdsize); \ |
| 197 | if (IsLittleEndian != sys::IsLittleEndianHost) \ |
| 198 | MachO::swapStruct(MLC.LCStruct##_data); \ |
| 199 | memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ |
| 200 | Begin += sizeof(MachO::LCStruct); \ |
| 201 | if (!LC.Payload.empty()) \ |
| 202 | memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ |
| 203 | Begin += LC.Payload.size(); \ |
| 204 | break; |
| 205 | |
| 206 | // Copy the load command as it is. |
| 207 | switch (MLC.load_command_data.cmd) { |
| 208 | default: |
| 209 | assert(sizeof(MachO::load_command) + LC.Payload.size() == |
| 210 | MLC.load_command_data.cmdsize); |
| 211 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 212 | MachO::swapStruct(lc&: MLC.load_command_data); |
| 213 | memcpy(dest: Begin, src: &MLC.load_command_data, n: sizeof(MachO::load_command)); |
| 214 | Begin += sizeof(MachO::load_command); |
| 215 | if (!LC.Payload.empty()) |
| 216 | memcpy(dest: Begin, src: LC.Payload.data(), n: LC.Payload.size()); |
| 217 | Begin += LC.Payload.size(); |
| 218 | break; |
| 219 | #include "llvm/BinaryFormat/MachO.def" |
| 220 | } |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | template <typename StructType> |
| 225 | void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { |
| 226 | StructType Temp; |
| 227 | assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name" ); |
| 228 | assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && |
| 229 | "too long section name" ); |
| 230 | memset(&Temp, 0, sizeof(StructType)); |
| 231 | memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); |
| 232 | memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); |
| 233 | Temp.addr = Sec.Addr; |
| 234 | Temp.size = Sec.Size; |
| 235 | Temp.offset = Sec.Offset; |
| 236 | Temp.align = Sec.Align; |
| 237 | Temp.reloff = Sec.RelOff; |
| 238 | Temp.nreloc = Sec.NReloc; |
| 239 | Temp.flags = Sec.Flags; |
| 240 | Temp.reserved1 = Sec.Reserved1; |
| 241 | Temp.reserved2 = Sec.Reserved2; |
| 242 | |
| 243 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 244 | MachO::swapStruct(Temp); |
| 245 | memcpy(Out, &Temp, sizeof(StructType)); |
| 246 | Out += sizeof(StructType); |
| 247 | } |
| 248 | |
| 249 | void MachOWriter::writeSections() { |
| 250 | for (const LoadCommand &LC : O.LoadCommands) |
| 251 | for (const std::unique_ptr<Section> &Sec : LC.Sections) { |
| 252 | if (!Sec->hasValidOffset()) { |
| 253 | assert((Sec->Offset == 0) && "Skipped section's offset must be zero" ); |
| 254 | assert((Sec->isBssSection() || Sec->Size == 0) && |
| 255 | "Non-zero-fill sections with zero offset must have zero size" ); |
| 256 | continue; |
| 257 | } |
| 258 | |
| 259 | assert(Sec->Offset && "Section offset can not be zero" ); |
| 260 | assert((Sec->Size == Sec->Content.size()) && "Incorrect section size" ); |
| 261 | memcpy(dest: Buf->getBufferStart() + Sec->Offset, src: Sec->Content.data(), |
| 262 | n: Sec->Content.size()); |
| 263 | for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { |
| 264 | RelocationInfo RelocInfo = Sec->Relocations[Index]; |
| 265 | if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { |
| 266 | const uint32_t SymbolNum = RelocInfo.Extern |
| 267 | ? (*RelocInfo.Symbol)->Index |
| 268 | : (*RelocInfo.Sec)->Index; |
| 269 | RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); |
| 270 | } |
| 271 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 272 | MachO::swapStruct( |
| 273 | reloc&: reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); |
| 274 | memcpy(dest: Buf->getBufferStart() + Sec->RelOff + |
| 275 | Index * sizeof(MachO::any_relocation_info), |
| 276 | src: &RelocInfo.Info, n: sizeof(RelocInfo.Info)); |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | template <typename NListType> |
| 282 | void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, |
| 283 | uint32_t Nstrx) { |
| 284 | NListType ListEntry; |
| 285 | ListEntry.n_strx = Nstrx; |
| 286 | ListEntry.n_type = SE.n_type; |
| 287 | ListEntry.n_sect = SE.n_sect; |
| 288 | ListEntry.n_desc = SE.n_desc; |
| 289 | ListEntry.n_value = SE.n_value; |
| 290 | |
| 291 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 292 | MachO::swapStruct(ListEntry); |
| 293 | memcpy(dest: Out, src: reinterpret_cast<const char *>(&ListEntry), n: sizeof(NListType)); |
| 294 | Out += sizeof(NListType); |
| 295 | } |
| 296 | |
| 297 | void MachOWriter::writeStringTable() { |
| 298 | if (!O.SymTabCommandIndex) |
| 299 | return; |
| 300 | const MachO::symtab_command &SymTabCommand = |
| 301 | O.LoadCommands[*O.SymTabCommandIndex] |
| 302 | .MachOLoadCommand.symtab_command_data; |
| 303 | |
| 304 | uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; |
| 305 | LayoutBuilder.getStringTableBuilder().write(Buf: StrTable); |
| 306 | } |
| 307 | |
| 308 | void MachOWriter::writeSymbolTable() { |
| 309 | if (!O.SymTabCommandIndex) |
| 310 | return; |
| 311 | const MachO::symtab_command &SymTabCommand = |
| 312 | O.LoadCommands[*O.SymTabCommandIndex] |
| 313 | .MachOLoadCommand.symtab_command_data; |
| 314 | |
| 315 | char *SymTable = Buf->getBufferStart() + SymTabCommand.symoff; |
| 316 | for (auto &Symbol : O.SymTable.Symbols) { |
| 317 | SymbolEntry *Sym = Symbol.get(); |
| 318 | uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(S: Sym->Name); |
| 319 | |
| 320 | if (Is64Bit) |
| 321 | writeNListEntry<MachO::nlist_64>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
| 322 | else |
| 323 | writeNListEntry<MachO::nlist>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | void MachOWriter::writeRebaseInfo() { |
| 328 | if (!O.DyLdInfoCommandIndex) |
| 329 | return; |
| 330 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 331 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 332 | .MachOLoadCommand.dyld_info_command_data; |
| 333 | char *Out = Buf->getBufferStart() + DyLdInfoCommand.rebase_off; |
| 334 | assert((DyLdInfoCommand.rebase_size == |
| 335 | paddedLinkEditEntrySize(O.Rebases.Opcodes.size(), Is64Bit)) && |
| 336 | "Incorrect rebase opcodes size" ); |
| 337 | memcpy(dest: Out, src: O.Rebases.Opcodes.data(), n: O.Rebases.Opcodes.size()); |
| 338 | } |
| 339 | |
| 340 | void MachOWriter::writeBindInfo() { |
| 341 | if (!O.DyLdInfoCommandIndex) |
| 342 | return; |
| 343 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 344 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 345 | .MachOLoadCommand.dyld_info_command_data; |
| 346 | char *Out = Buf->getBufferStart() + DyLdInfoCommand.bind_off; |
| 347 | assert((DyLdInfoCommand.bind_size == |
| 348 | paddedLinkEditEntrySize(O.Binds.Opcodes.size(), Is64Bit)) && |
| 349 | "Incorrect bind opcodes size" ); |
| 350 | memcpy(dest: Out, src: O.Binds.Opcodes.data(), n: O.Binds.Opcodes.size()); |
| 351 | } |
| 352 | |
| 353 | void MachOWriter::writeWeakBindInfo() { |
| 354 | if (!O.DyLdInfoCommandIndex) |
| 355 | return; |
| 356 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 357 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 358 | .MachOLoadCommand.dyld_info_command_data; |
| 359 | char *Out = Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; |
| 360 | assert((DyLdInfoCommand.weak_bind_size == |
| 361 | paddedLinkEditEntrySize(O.WeakBinds.Opcodes.size(), Is64Bit)) && |
| 362 | "Incorrect weak bind opcodes size" ); |
| 363 | memcpy(dest: Out, src: O.WeakBinds.Opcodes.data(), n: O.WeakBinds.Opcodes.size()); |
| 364 | } |
| 365 | |
| 366 | void MachOWriter::writeLazyBindInfo() { |
| 367 | if (!O.DyLdInfoCommandIndex) |
| 368 | return; |
| 369 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 370 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 371 | .MachOLoadCommand.dyld_info_command_data; |
| 372 | char *Out = Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; |
| 373 | assert((DyLdInfoCommand.lazy_bind_size == |
| 374 | paddedLinkEditEntrySize(O.LazyBinds.Opcodes.size(), Is64Bit)) && |
| 375 | "Incorrect lazy bind opcodes size" ); |
| 376 | memcpy(dest: Out, src: O.LazyBinds.Opcodes.data(), n: O.LazyBinds.Opcodes.size()); |
| 377 | } |
| 378 | |
| 379 | void MachOWriter::writeExportInfo() { |
| 380 | if (!O.DyLdInfoCommandIndex) |
| 381 | return; |
| 382 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 383 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 384 | .MachOLoadCommand.dyld_info_command_data; |
| 385 | char *Out = Buf->getBufferStart() + DyLdInfoCommand.export_off; |
| 386 | assert((DyLdInfoCommand.export_size == |
| 387 | paddedLinkEditEntrySize(O.Exports.Trie.size(), Is64Bit)) && |
| 388 | "Incorrect export trie size" ); |
| 389 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
| 390 | } |
| 391 | |
| 392 | void MachOWriter::writeIndirectSymbolTable() { |
| 393 | if (!O.DySymTabCommandIndex) |
| 394 | return; |
| 395 | |
| 396 | const MachO::dysymtab_command &DySymTabCommand = |
| 397 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 398 | .MachOLoadCommand.dysymtab_command_data; |
| 399 | |
| 400 | uint32_t *Out = |
| 401 | (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); |
| 402 | for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { |
| 403 | uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; |
| 404 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 405 | sys::swapByteOrder(Value&: Entry); |
| 406 | *Out++ = Entry; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | void MachOWriter::writeLinkData(std::optional<size_t> LCIndex, |
| 411 | const LinkData &LD) { |
| 412 | if (!LCIndex) |
| 413 | return; |
| 414 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 415 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
| 416 | char *Out = Buf->getBufferStart() + LinkEditDataCommand.dataoff; |
| 417 | assert((LinkEditDataCommand.datasize == |
| 418 | paddedLinkEditEntrySize(LD.Data.size(), Is64Bit)) && |
| 419 | "Incorrect data size" ); |
| 420 | memcpy(dest: Out, src: LD.Data.data(), n: LD.Data.size()); |
| 421 | } |
| 422 | |
| 423 | static uint64_t |
| 424 | getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { |
| 425 | const MachO::macho_load_command &MLC = |
| 426 | TextSegmentLoadCommand.MachOLoadCommand; |
| 427 | switch (MLC.load_command_data.cmd) { |
| 428 | case MachO::LC_SEGMENT: |
| 429 | return MLC.segment_command_data.fileoff; |
| 430 | case MachO::LC_SEGMENT_64: |
| 431 | return MLC.segment_command_64_data.fileoff; |
| 432 | default: |
| 433 | return 0; |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { |
| 438 | const MachO::macho_load_command &MLC = |
| 439 | TextSegmentLoadCommand.MachOLoadCommand; |
| 440 | switch (MLC.load_command_data.cmd) { |
| 441 | case MachO::LC_SEGMENT: |
| 442 | return MLC.segment_command_data.filesize; |
| 443 | case MachO::LC_SEGMENT_64: |
| 444 | return MLC.segment_command_64_data.filesize; |
| 445 | default: |
| 446 | return 0; |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | void MachOWriter::writeCodeSignatureData() { |
| 451 | // NOTE: This CodeSignature section behaviour must be kept in sync with that |
| 452 | // performed in LLD's CodeSignatureSection::write / |
| 453 | // CodeSignatureSection::writeHashes. Furthermore, this call must occur only |
| 454 | // after the rest of the binary has already been written to the buffer. This |
| 455 | // is because the buffer is read from to perform the necessary hashing. |
| 456 | |
| 457 | // The CodeSignature section is the last section in the MachO binary and |
| 458 | // contains a hash of all content in the binary before it. Since llvm-objcopy |
| 459 | // has likely modified the target binary, the hash must be regenerated |
| 460 | // entirely. To generate this hash, we must read from the start of the binary |
| 461 | // (HashReadStart) to just before the start of the CodeSignature section |
| 462 | // (HashReadEnd). |
| 463 | |
| 464 | const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); |
| 465 | |
| 466 | uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); |
| 467 | uint8_t *HashReadStart = BufferStart; |
| 468 | uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; |
| 469 | |
| 470 | // The CodeSignature section begins with a header, after which the hashes |
| 471 | // of each page of the binary are written. |
| 472 | uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; |
| 473 | |
| 474 | uint32_t TextSegmentFileOff = 0; |
| 475 | uint32_t TextSegmentFileSize = 0; |
| 476 | if (O.TextSegmentCommandIndex) { |
| 477 | const LoadCommand &TextSegmentLoadCommand = |
| 478 | O.LoadCommands[*O.TextSegmentCommandIndex]; |
| 479 | assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
| 480 | MachO::LC_SEGMENT || |
| 481 | TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
| 482 | MachO::LC_SEGMENT_64); |
| 483 | assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand |
| 484 | .segment_command_data.segname) == "__TEXT" ); |
| 485 | TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); |
| 486 | TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); |
| 487 | } |
| 488 | |
| 489 | const uint32_t FileNamePad = CodeSignature.AllHeadersSize - |
| 490 | CodeSignature.FixedHeadersSize - |
| 491 | CodeSignature.OutputFileName.size(); |
| 492 | |
| 493 | // Write code section header. |
| 494 | auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd); |
| 495 | write32be(P: &SuperBlob->magic, V: MachO::CSMAGIC_EMBEDDED_SIGNATURE); |
| 496 | write32be(P: &SuperBlob->length, V: CodeSignature.Size); |
| 497 | write32be(P: &SuperBlob->count, V: 1); |
| 498 | auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]); |
| 499 | write32be(P: &BlobIndex->type, V: MachO::CSSLOT_CODEDIRECTORY); |
| 500 | write32be(P: &BlobIndex->offset, V: CodeSignature.BlobHeadersSize); |
| 501 | auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>( |
| 502 | HashReadEnd + CodeSignature.BlobHeadersSize); |
| 503 | write32be(P: &CodeDirectory->magic, V: MachO::CSMAGIC_CODEDIRECTORY); |
| 504 | write32be(P: &CodeDirectory->length, |
| 505 | V: CodeSignature.Size - CodeSignature.BlobHeadersSize); |
| 506 | write32be(P: &CodeDirectory->version, V: MachO::CS_SUPPORTSEXECSEG); |
| 507 | write32be(P: &CodeDirectory->flags, V: MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); |
| 508 | write32be(P: &CodeDirectory->hashOffset, |
| 509 | V: sizeof(MachO::CS_CodeDirectory) + |
| 510 | CodeSignature.OutputFileName.size() + FileNamePad); |
| 511 | write32be(P: &CodeDirectory->identOffset, V: sizeof(MachO::CS_CodeDirectory)); |
| 512 | CodeDirectory->nSpecialSlots = 0; |
| 513 | write32be(P: &CodeDirectory->nCodeSlots, V: CodeSignature.BlockCount); |
| 514 | write32be(P: &CodeDirectory->codeLimit, V: CodeSignature.StartOffset); |
| 515 | CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize); |
| 516 | CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; |
| 517 | CodeDirectory->platform = 0; |
| 518 | CodeDirectory->pageSize = CodeSignature.BlockSizeShift; |
| 519 | CodeDirectory->spare2 = 0; |
| 520 | CodeDirectory->scatterOffset = 0; |
| 521 | CodeDirectory->teamOffset = 0; |
| 522 | CodeDirectory->spare3 = 0; |
| 523 | CodeDirectory->codeLimit64 = 0; |
| 524 | write64be(P: &CodeDirectory->execSegBase, V: TextSegmentFileOff); |
| 525 | write64be(P: &CodeDirectory->execSegLimit, V: TextSegmentFileSize); |
| 526 | write64be(P: &CodeDirectory->execSegFlags, V: O.Header.FileType == MachO::MH_EXECUTE |
| 527 | ? MachO::CS_EXECSEG_MAIN_BINARY |
| 528 | : 0); |
| 529 | |
| 530 | auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]); |
| 531 | memcpy(dest: Id, src: CodeSignature.OutputFileName.begin(), |
| 532 | n: CodeSignature.OutputFileName.size()); |
| 533 | memset(s: Id + CodeSignature.OutputFileName.size(), c: 0, n: FileNamePad); |
| 534 | |
| 535 | // Write the hashes. |
| 536 | uint8_t *CurrHashReadPosition = HashReadStart; |
| 537 | uint8_t *CurrHashWritePosition = HashWriteStart; |
| 538 | while (CurrHashReadPosition < HashReadEnd) { |
| 539 | StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition), |
| 540 | std::min(a: static_cast<size_t>(HashReadEnd |
| 541 | - CurrHashReadPosition), |
| 542 | b: static_cast<size_t>(CodeSignature.BlockSize))); |
| 543 | SHA256 Hasher; |
| 544 | Hasher.update(Str: Block); |
| 545 | std::array<uint8_t, 32> Hash = Hasher.final(); |
| 546 | assert(Hash.size() == CodeSignature.HashSize); |
| 547 | memcpy(dest: CurrHashWritePosition, src: Hash.data(), n: CodeSignature.HashSize); |
| 548 | CurrHashReadPosition += CodeSignature.BlockSize; |
| 549 | CurrHashWritePosition += CodeSignature.HashSize; |
| 550 | } |
| 551 | #if defined(__APPLE__) |
| 552 | // This is macOS-specific work-around and makes no sense for any |
| 553 | // other host OS. See https://openradar.appspot.com/FB8914231 |
| 554 | // |
| 555 | // The macOS kernel maintains a signature-verification cache to |
| 556 | // quickly validate applications at time of execve(2). The trouble |
| 557 | // is that for the kernel creates the cache entry at the time of the |
| 558 | // mmap(2) call, before we have a chance to write either the code to |
| 559 | // sign or the signature header+hashes. The fix is to invalidate |
| 560 | // all cached data associated with the output file, thus discarding |
| 561 | // the bogus prematurely-cached signature. |
| 562 | msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, |
| 563 | MS_INVALIDATE); |
| 564 | #endif |
| 565 | } |
| 566 | |
| 567 | void MachOWriter::writeDataInCodeData() { |
| 568 | return writeLinkData(LCIndex: O.DataInCodeCommandIndex, LD: O.DataInCode); |
| 569 | } |
| 570 | |
| 571 | void MachOWriter::writeLinkerOptimizationHint() { |
| 572 | return writeLinkData(LCIndex: O.LinkerOptimizationHintCommandIndex, |
| 573 | LD: O.LinkerOptimizationHint); |
| 574 | } |
| 575 | |
| 576 | void MachOWriter::writeFunctionStartsData() { |
| 577 | return writeLinkData(LCIndex: O.FunctionStartsCommandIndex, LD: O.FunctionStarts); |
| 578 | } |
| 579 | |
| 580 | void MachOWriter::writeDylibCodeSignDRsData() { |
| 581 | return writeLinkData(LCIndex: O.DylibCodeSignDRsIndex, LD: O.DylibCodeSignDRs); |
| 582 | } |
| 583 | |
| 584 | void MachOWriter::writeChainedFixupsData() { |
| 585 | return writeLinkData(LCIndex: O.ChainedFixupsCommandIndex, LD: O.ChainedFixups); |
| 586 | } |
| 587 | |
| 588 | void MachOWriter::writeExportsTrieData() { |
| 589 | if (!O.ExportsTrieCommandIndex) |
| 590 | return; |
| 591 | const MachO::linkedit_data_command &ExportsTrieCmd = |
| 592 | O.LoadCommands[*O.ExportsTrieCommandIndex] |
| 593 | .MachOLoadCommand.linkedit_data_command_data; |
| 594 | char *Out = Buf->getBufferStart() + ExportsTrieCmd.dataoff; |
| 595 | assert((ExportsTrieCmd.datasize == |
| 596 | paddedLinkEditEntrySize(O.Exports.Trie.size(), Is64Bit)) && |
| 597 | "Incorrect export trie size" ); |
| 598 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
| 599 | } |
| 600 | |
| 601 | void MachOWriter::writeTail() { |
| 602 | typedef void (MachOWriter::*WriteHandlerType)(); |
| 603 | typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; |
| 604 | SmallVector<WriteOperation, 7> Queue; |
| 605 | |
| 606 | if (O.SymTabCommandIndex) { |
| 607 | const MachO::symtab_command &SymTabCommand = |
| 608 | O.LoadCommands[*O.SymTabCommandIndex] |
| 609 | .MachOLoadCommand.symtab_command_data; |
| 610 | if (SymTabCommand.symoff) |
| 611 | Queue.push_back(Elt: {SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); |
| 612 | if (SymTabCommand.stroff) |
| 613 | Queue.push_back(Elt: {SymTabCommand.stroff, &MachOWriter::writeStringTable}); |
| 614 | } |
| 615 | |
| 616 | if (O.DyLdInfoCommandIndex) { |
| 617 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 618 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 619 | .MachOLoadCommand.dyld_info_command_data; |
| 620 | if (DyLdInfoCommand.rebase_off) |
| 621 | Queue.push_back( |
| 622 | Elt: {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); |
| 623 | if (DyLdInfoCommand.bind_off) |
| 624 | Queue.push_back(Elt: {DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); |
| 625 | if (DyLdInfoCommand.weak_bind_off) |
| 626 | Queue.push_back( |
| 627 | Elt: {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); |
| 628 | if (DyLdInfoCommand.lazy_bind_off) |
| 629 | Queue.push_back( |
| 630 | Elt: {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); |
| 631 | if (DyLdInfoCommand.export_off) |
| 632 | Queue.push_back( |
| 633 | Elt: {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); |
| 634 | } |
| 635 | |
| 636 | if (O.DySymTabCommandIndex) { |
| 637 | const MachO::dysymtab_command &DySymTabCommand = |
| 638 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 639 | .MachOLoadCommand.dysymtab_command_data; |
| 640 | |
| 641 | if (DySymTabCommand.indirectsymoff) |
| 642 | Queue.emplace_back(Args: DySymTabCommand.indirectsymoff, |
| 643 | Args: &MachOWriter::writeIndirectSymbolTable); |
| 644 | } |
| 645 | |
| 646 | std::initializer_list<std::pair<std::optional<size_t>, WriteHandlerType>> |
| 647 | LinkEditDataCommandWriters = { |
| 648 | {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData}, |
| 649 | {O.DylibCodeSignDRsIndex, &MachOWriter::writeDylibCodeSignDRsData}, |
| 650 | {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData}, |
| 651 | {O.LinkerOptimizationHintCommandIndex, |
| 652 | &MachOWriter::writeLinkerOptimizationHint}, |
| 653 | {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData}, |
| 654 | {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData}, |
| 655 | {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}}; |
| 656 | for (const auto &W : LinkEditDataCommandWriters) { |
| 657 | std::optional<size_t> LinkEditDataCommandIndex; |
| 658 | WriteHandlerType WriteHandler; |
| 659 | std::tie(args&: LinkEditDataCommandIndex, args&: WriteHandler) = W; |
| 660 | if (LinkEditDataCommandIndex) { |
| 661 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 662 | O.LoadCommands[*LinkEditDataCommandIndex] |
| 663 | .MachOLoadCommand.linkedit_data_command_data; |
| 664 | if (LinkEditDataCommand.dataoff) |
| 665 | Queue.emplace_back(Args: LinkEditDataCommand.dataoff, Args&: WriteHandler); |
| 666 | } |
| 667 | } |
| 668 | |
| 669 | llvm::sort(C&: Queue, Comp: llvm::less_first()); |
| 670 | |
| 671 | for (auto WriteOp : Queue) |
| 672 | (this->*WriteOp.second)(); |
| 673 | } |
| 674 | |
| 675 | Error MachOWriter::finalize() { return LayoutBuilder.layout(); } |
| 676 | |
| 677 | Error MachOWriter::write() { |
| 678 | size_t TotalSize = totalSize(); |
| 679 | Buf = WritableMemoryBuffer::getNewMemBuffer(Size: TotalSize); |
| 680 | if (!Buf) |
| 681 | return createStringError(EC: errc::not_enough_memory, |
| 682 | S: "failed to allocate memory buffer of " + |
| 683 | Twine::utohexstr(Val: TotalSize) + " bytes" ); |
| 684 | writeHeader(); |
| 685 | writeLoadCommands(); |
| 686 | writeSections(); |
| 687 | writeTail(); |
| 688 | |
| 689 | // TODO: Implement direct writing to the output stream (without intermediate |
| 690 | // memory buffer Buf). |
| 691 | Out.write(Ptr: Buf->getBufferStart(), Size: Buf->getBufferSize()); |
| 692 | return Error::success(); |
| 693 | } |
| 694 | |