| 1 | //===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "MachOWriter.h" |
| 10 | #include "MachOLayoutBuilder.h" |
| 11 | #include "MachOObject.h" |
| 12 | #include "llvm/ADT/STLExtras.h" |
| 13 | #include "llvm/BinaryFormat/MachO.h" |
| 14 | #include "llvm/Support/Errc.h" |
| 15 | #include "llvm/Support/ErrorHandling.h" |
| 16 | #include "llvm/Support/SHA256.h" |
| 17 | #include <memory> |
| 18 | |
| 19 | #if defined(__APPLE__) |
| 20 | #include <sys/mman.h> |
| 21 | #endif |
| 22 | |
| 23 | using namespace llvm; |
| 24 | using namespace llvm::objcopy::macho; |
| 25 | using namespace llvm::support::endian; |
| 26 | |
| 27 | size_t MachOWriter::() const { |
| 28 | return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
| 29 | } |
| 30 | |
| 31 | size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } |
| 32 | |
| 33 | size_t MachOWriter::symTableSize() const { |
| 34 | return O.SymTable.Symbols.size() * |
| 35 | (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); |
| 36 | } |
| 37 | |
| 38 | size_t MachOWriter::totalSize() const { |
| 39 | // Going from tail to head and looking for an appropriate "anchor" to |
| 40 | // calculate the total size assuming that all the offsets are either valid |
| 41 | // ("true") or 0 (0 indicates that the corresponding part is missing). |
| 42 | |
| 43 | SmallVector<size_t, 7> Ends; |
| 44 | if (O.SymTabCommandIndex) { |
| 45 | const MachO::symtab_command &SymTabCommand = |
| 46 | O.LoadCommands[*O.SymTabCommandIndex] |
| 47 | .MachOLoadCommand.symtab_command_data; |
| 48 | if (SymTabCommand.symoff) |
| 49 | Ends.push_back(Elt: SymTabCommand.symoff + symTableSize()); |
| 50 | if (SymTabCommand.stroff) |
| 51 | Ends.push_back(Elt: SymTabCommand.stroff + SymTabCommand.strsize); |
| 52 | } |
| 53 | if (O.DyLdInfoCommandIndex) { |
| 54 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 55 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 56 | .MachOLoadCommand.dyld_info_command_data; |
| 57 | if (DyLdInfoCommand.rebase_off) { |
| 58 | assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && |
| 59 | "Incorrect rebase opcodes size" ); |
| 60 | Ends.push_back(Elt: DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); |
| 61 | } |
| 62 | if (DyLdInfoCommand.bind_off) { |
| 63 | assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && |
| 64 | "Incorrect bind opcodes size" ); |
| 65 | Ends.push_back(Elt: DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); |
| 66 | } |
| 67 | if (DyLdInfoCommand.weak_bind_off) { |
| 68 | assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && |
| 69 | "Incorrect weak bind opcodes size" ); |
| 70 | Ends.push_back(Elt: DyLdInfoCommand.weak_bind_off + |
| 71 | DyLdInfoCommand.weak_bind_size); |
| 72 | } |
| 73 | if (DyLdInfoCommand.lazy_bind_off) { |
| 74 | assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && |
| 75 | "Incorrect lazy bind opcodes size" ); |
| 76 | Ends.push_back(Elt: DyLdInfoCommand.lazy_bind_off + |
| 77 | DyLdInfoCommand.lazy_bind_size); |
| 78 | } |
| 79 | if (DyLdInfoCommand.export_off) { |
| 80 | assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && |
| 81 | "Incorrect trie size" ); |
| 82 | Ends.push_back(Elt: DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | if (O.DySymTabCommandIndex) { |
| 87 | const MachO::dysymtab_command &DySymTabCommand = |
| 88 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 89 | .MachOLoadCommand.dysymtab_command_data; |
| 90 | |
| 91 | if (DySymTabCommand.indirectsymoff) |
| 92 | Ends.push_back(Elt: DySymTabCommand.indirectsymoff + |
| 93 | sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); |
| 94 | } |
| 95 | |
| 96 | for (std::optional<size_t> LinkEditDataCommandIndex : |
| 97 | {O.CodeSignatureCommandIndex, O.DylibCodeSignDRsIndex, |
| 98 | O.DataInCodeCommandIndex, O.LinkerOptimizationHintCommandIndex, |
| 99 | O.FunctionStartsCommandIndex, O.ChainedFixupsCommandIndex, |
| 100 | O.ExportsTrieCommandIndex}) |
| 101 | if (LinkEditDataCommandIndex) { |
| 102 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 103 | O.LoadCommands[*LinkEditDataCommandIndex] |
| 104 | .MachOLoadCommand.linkedit_data_command_data; |
| 105 | if (LinkEditDataCommand.dataoff) |
| 106 | Ends.push_back(Elt: LinkEditDataCommand.dataoff + |
| 107 | LinkEditDataCommand.datasize); |
| 108 | } |
| 109 | |
| 110 | // Otherwise, use the last section / reloction. |
| 111 | for (const LoadCommand &LC : O.LoadCommands) |
| 112 | for (const std::unique_ptr<Section> &S : LC.Sections) { |
| 113 | if (!S->hasValidOffset()) { |
| 114 | assert((S->Offset == 0) && "Skipped section's offset must be zero" ); |
| 115 | assert((S->isVirtualSection() || S->Size == 0) && |
| 116 | "Non-zero-fill sections with zero offset must have zero size" ); |
| 117 | continue; |
| 118 | } |
| 119 | assert((S->Offset != 0) && |
| 120 | "Non-zero-fill section's offset cannot be zero" ); |
| 121 | Ends.push_back(Elt: S->Offset + S->Size); |
| 122 | if (S->RelOff) |
| 123 | Ends.push_back(Elt: S->RelOff + |
| 124 | S->NReloc * sizeof(MachO::any_relocation_info)); |
| 125 | } |
| 126 | |
| 127 | if (!Ends.empty()) |
| 128 | return *llvm::max_element(Range&: Ends); |
| 129 | |
| 130 | // Otherwise, we have only Mach header and load commands. |
| 131 | return headerSize() + loadCommandsSize(); |
| 132 | } |
| 133 | |
| 134 | void MachOWriter::() { |
| 135 | MachO::mach_header_64 ; |
| 136 | |
| 137 | Header.magic = O.Header.Magic; |
| 138 | Header.cputype = O.Header.CPUType; |
| 139 | Header.cpusubtype = O.Header.CPUSubType; |
| 140 | Header.filetype = O.Header.FileType; |
| 141 | Header.ncmds = O.Header.NCmds; |
| 142 | Header.sizeofcmds = O.Header.SizeOfCmds; |
| 143 | Header.flags = O.Header.Flags; |
| 144 | Header.reserved = O.Header.Reserved; |
| 145 | |
| 146 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 147 | MachO::swapStruct(H&: Header); |
| 148 | |
| 149 | auto = |
| 150 | Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
| 151 | memcpy(dest: Buf->getBufferStart(), src: &Header, n: HeaderSize); |
| 152 | } |
| 153 | |
| 154 | void MachOWriter::writeLoadCommands() { |
| 155 | uint8_t *Begin = |
| 156 | reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + headerSize(); |
| 157 | for (const LoadCommand &LC : O.LoadCommands) { |
| 158 | // Construct a load command. |
| 159 | MachO::macho_load_command MLC = LC.MachOLoadCommand; |
| 160 | switch (MLC.load_command_data.cmd) { |
| 161 | case MachO::LC_SEGMENT: |
| 162 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 163 | MachO::swapStruct(seg&: MLC.segment_command_data); |
| 164 | memcpy(dest: Begin, src: &MLC.segment_command_data, n: sizeof(MachO::segment_command)); |
| 165 | Begin += sizeof(MachO::segment_command); |
| 166 | |
| 167 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
| 168 | writeSectionInLoadCommand<MachO::section>(Sec: *Sec, Out&: Begin); |
| 169 | continue; |
| 170 | case MachO::LC_SEGMENT_64: |
| 171 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 172 | MachO::swapStruct(seg&: MLC.segment_command_64_data); |
| 173 | memcpy(dest: Begin, src: &MLC.segment_command_64_data, |
| 174 | n: sizeof(MachO::segment_command_64)); |
| 175 | Begin += sizeof(MachO::segment_command_64); |
| 176 | |
| 177 | for (const std::unique_ptr<Section> &Sec : LC.Sections) |
| 178 | writeSectionInLoadCommand<MachO::section_64>(Sec: *Sec, Out&: Begin); |
| 179 | continue; |
| 180 | } |
| 181 | |
| 182 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
| 183 | case MachO::LCName: \ |
| 184 | assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ |
| 185 | MLC.load_command_data.cmdsize); \ |
| 186 | if (IsLittleEndian != sys::IsLittleEndianHost) \ |
| 187 | MachO::swapStruct(MLC.LCStruct##_data); \ |
| 188 | memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ |
| 189 | Begin += sizeof(MachO::LCStruct); \ |
| 190 | if (!LC.Payload.empty()) \ |
| 191 | memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ |
| 192 | Begin += LC.Payload.size(); \ |
| 193 | break; |
| 194 | |
| 195 | // Copy the load command as it is. |
| 196 | switch (MLC.load_command_data.cmd) { |
| 197 | default: |
| 198 | assert(sizeof(MachO::load_command) + LC.Payload.size() == |
| 199 | MLC.load_command_data.cmdsize); |
| 200 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 201 | MachO::swapStruct(lc&: MLC.load_command_data); |
| 202 | memcpy(dest: Begin, src: &MLC.load_command_data, n: sizeof(MachO::load_command)); |
| 203 | Begin += sizeof(MachO::load_command); |
| 204 | if (!LC.Payload.empty()) |
| 205 | memcpy(dest: Begin, src: LC.Payload.data(), n: LC.Payload.size()); |
| 206 | Begin += LC.Payload.size(); |
| 207 | break; |
| 208 | #include "llvm/BinaryFormat/MachO.def" |
| 209 | } |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | template <typename StructType> |
| 214 | void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { |
| 215 | StructType Temp; |
| 216 | assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name" ); |
| 217 | assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && |
| 218 | "too long section name" ); |
| 219 | memset(&Temp, 0, sizeof(StructType)); |
| 220 | memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); |
| 221 | memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); |
| 222 | Temp.addr = Sec.Addr; |
| 223 | Temp.size = Sec.Size; |
| 224 | Temp.offset = Sec.Offset; |
| 225 | Temp.align = Sec.Align; |
| 226 | Temp.reloff = Sec.RelOff; |
| 227 | Temp.nreloc = Sec.NReloc; |
| 228 | Temp.flags = Sec.Flags; |
| 229 | Temp.reserved1 = Sec.Reserved1; |
| 230 | Temp.reserved2 = Sec.Reserved2; |
| 231 | |
| 232 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 233 | MachO::swapStruct(Temp); |
| 234 | memcpy(Out, &Temp, sizeof(StructType)); |
| 235 | Out += sizeof(StructType); |
| 236 | } |
| 237 | |
| 238 | void MachOWriter::writeSections() { |
| 239 | for (const LoadCommand &LC : O.LoadCommands) |
| 240 | for (const std::unique_ptr<Section> &Sec : LC.Sections) { |
| 241 | if (!Sec->hasValidOffset()) { |
| 242 | assert((Sec->Offset == 0) && "Skipped section's offset must be zero" ); |
| 243 | assert((Sec->isVirtualSection() || Sec->Size == 0) && |
| 244 | "Non-zero-fill sections with zero offset must have zero size" ); |
| 245 | continue; |
| 246 | } |
| 247 | |
| 248 | assert(Sec->Offset && "Section offset can not be zero" ); |
| 249 | assert((Sec->Size == Sec->Content.size()) && "Incorrect section size" ); |
| 250 | memcpy(dest: Buf->getBufferStart() + Sec->Offset, src: Sec->Content.data(), |
| 251 | n: Sec->Content.size()); |
| 252 | for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { |
| 253 | RelocationInfo RelocInfo = Sec->Relocations[Index]; |
| 254 | if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { |
| 255 | const uint32_t SymbolNum = RelocInfo.Extern |
| 256 | ? (*RelocInfo.Symbol)->Index |
| 257 | : (*RelocInfo.Sec)->Index; |
| 258 | RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); |
| 259 | } |
| 260 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 261 | MachO::swapStruct( |
| 262 | reloc&: reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); |
| 263 | memcpy(dest: Buf->getBufferStart() + Sec->RelOff + |
| 264 | Index * sizeof(MachO::any_relocation_info), |
| 265 | src: &RelocInfo.Info, n: sizeof(RelocInfo.Info)); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | template <typename NListType> |
| 271 | void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, |
| 272 | uint32_t Nstrx) { |
| 273 | NListType ListEntry; |
| 274 | ListEntry.n_strx = Nstrx; |
| 275 | ListEntry.n_type = SE.n_type; |
| 276 | ListEntry.n_sect = SE.n_sect; |
| 277 | ListEntry.n_desc = SE.n_desc; |
| 278 | ListEntry.n_value = SE.n_value; |
| 279 | |
| 280 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 281 | MachO::swapStruct(ListEntry); |
| 282 | memcpy(dest: Out, src: reinterpret_cast<const char *>(&ListEntry), n: sizeof(NListType)); |
| 283 | Out += sizeof(NListType); |
| 284 | } |
| 285 | |
| 286 | void MachOWriter::writeStringTable() { |
| 287 | if (!O.SymTabCommandIndex) |
| 288 | return; |
| 289 | const MachO::symtab_command &SymTabCommand = |
| 290 | O.LoadCommands[*O.SymTabCommandIndex] |
| 291 | .MachOLoadCommand.symtab_command_data; |
| 292 | |
| 293 | uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; |
| 294 | LayoutBuilder.getStringTableBuilder().write(Buf: StrTable); |
| 295 | } |
| 296 | |
| 297 | void MachOWriter::writeSymbolTable() { |
| 298 | if (!O.SymTabCommandIndex) |
| 299 | return; |
| 300 | const MachO::symtab_command &SymTabCommand = |
| 301 | O.LoadCommands[*O.SymTabCommandIndex] |
| 302 | .MachOLoadCommand.symtab_command_data; |
| 303 | |
| 304 | char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; |
| 305 | for (auto &Symbol : O.SymTable.Symbols) { |
| 306 | SymbolEntry *Sym = Symbol.get(); |
| 307 | uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(S: Sym->Name); |
| 308 | |
| 309 | if (Is64Bit) |
| 310 | writeNListEntry<MachO::nlist_64>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
| 311 | else |
| 312 | writeNListEntry<MachO::nlist>(SE: *Sym, IsLittleEndian, Out&: SymTable, Nstrx); |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | void MachOWriter::writeRebaseInfo() { |
| 317 | if (!O.DyLdInfoCommandIndex) |
| 318 | return; |
| 319 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 320 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 321 | .MachOLoadCommand.dyld_info_command_data; |
| 322 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; |
| 323 | assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && |
| 324 | "Incorrect rebase opcodes size" ); |
| 325 | memcpy(dest: Out, src: O.Rebases.Opcodes.data(), n: O.Rebases.Opcodes.size()); |
| 326 | } |
| 327 | |
| 328 | void MachOWriter::writeBindInfo() { |
| 329 | if (!O.DyLdInfoCommandIndex) |
| 330 | return; |
| 331 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 332 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 333 | .MachOLoadCommand.dyld_info_command_data; |
| 334 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; |
| 335 | assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && |
| 336 | "Incorrect bind opcodes size" ); |
| 337 | memcpy(dest: Out, src: O.Binds.Opcodes.data(), n: O.Binds.Opcodes.size()); |
| 338 | } |
| 339 | |
| 340 | void MachOWriter::writeWeakBindInfo() { |
| 341 | if (!O.DyLdInfoCommandIndex) |
| 342 | return; |
| 343 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 344 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 345 | .MachOLoadCommand.dyld_info_command_data; |
| 346 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; |
| 347 | assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && |
| 348 | "Incorrect weak bind opcodes size" ); |
| 349 | memcpy(dest: Out, src: O.WeakBinds.Opcodes.data(), n: O.WeakBinds.Opcodes.size()); |
| 350 | } |
| 351 | |
| 352 | void MachOWriter::writeLazyBindInfo() { |
| 353 | if (!O.DyLdInfoCommandIndex) |
| 354 | return; |
| 355 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 356 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 357 | .MachOLoadCommand.dyld_info_command_data; |
| 358 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; |
| 359 | assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && |
| 360 | "Incorrect lazy bind opcodes size" ); |
| 361 | memcpy(dest: Out, src: O.LazyBinds.Opcodes.data(), n: O.LazyBinds.Opcodes.size()); |
| 362 | } |
| 363 | |
| 364 | void MachOWriter::writeExportInfo() { |
| 365 | if (!O.DyLdInfoCommandIndex) |
| 366 | return; |
| 367 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 368 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 369 | .MachOLoadCommand.dyld_info_command_data; |
| 370 | char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; |
| 371 | assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && |
| 372 | "Incorrect export trie size" ); |
| 373 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
| 374 | } |
| 375 | |
| 376 | void MachOWriter::writeIndirectSymbolTable() { |
| 377 | if (!O.DySymTabCommandIndex) |
| 378 | return; |
| 379 | |
| 380 | const MachO::dysymtab_command &DySymTabCommand = |
| 381 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 382 | .MachOLoadCommand.dysymtab_command_data; |
| 383 | |
| 384 | uint32_t *Out = |
| 385 | (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); |
| 386 | for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { |
| 387 | uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; |
| 388 | if (IsLittleEndian != sys::IsLittleEndianHost) |
| 389 | sys::swapByteOrder(Value&: Entry); |
| 390 | *Out++ = Entry; |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | void MachOWriter::writeLinkData(std::optional<size_t> LCIndex, |
| 395 | const LinkData &LD) { |
| 396 | if (!LCIndex) |
| 397 | return; |
| 398 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 399 | O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; |
| 400 | char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; |
| 401 | assert((LinkEditDataCommand.datasize == LD.Data.size()) && |
| 402 | "Incorrect data size" ); |
| 403 | memcpy(dest: Out, src: LD.Data.data(), n: LD.Data.size()); |
| 404 | } |
| 405 | |
| 406 | static uint64_t |
| 407 | getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { |
| 408 | const MachO::macho_load_command &MLC = |
| 409 | TextSegmentLoadCommand.MachOLoadCommand; |
| 410 | switch (MLC.load_command_data.cmd) { |
| 411 | case MachO::LC_SEGMENT: |
| 412 | return MLC.segment_command_data.fileoff; |
| 413 | case MachO::LC_SEGMENT_64: |
| 414 | return MLC.segment_command_64_data.fileoff; |
| 415 | default: |
| 416 | return 0; |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { |
| 421 | const MachO::macho_load_command &MLC = |
| 422 | TextSegmentLoadCommand.MachOLoadCommand; |
| 423 | switch (MLC.load_command_data.cmd) { |
| 424 | case MachO::LC_SEGMENT: |
| 425 | return MLC.segment_command_data.filesize; |
| 426 | case MachO::LC_SEGMENT_64: |
| 427 | return MLC.segment_command_64_data.filesize; |
| 428 | default: |
| 429 | return 0; |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | void MachOWriter::writeCodeSignatureData() { |
| 434 | // NOTE: This CodeSignature section behaviour must be kept in sync with that |
| 435 | // performed in LLD's CodeSignatureSection::write / |
| 436 | // CodeSignatureSection::writeHashes. Furthermore, this call must occur only |
| 437 | // after the rest of the binary has already been written to the buffer. This |
| 438 | // is because the buffer is read from to perform the necessary hashing. |
| 439 | |
| 440 | // The CodeSignature section is the last section in the MachO binary and |
| 441 | // contains a hash of all content in the binary before it. Since llvm-objcopy |
| 442 | // has likely modified the target binary, the hash must be regenerated |
| 443 | // entirely. To generate this hash, we must read from the start of the binary |
| 444 | // (HashReadStart) to just before the start of the CodeSignature section |
| 445 | // (HashReadEnd). |
| 446 | |
| 447 | const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); |
| 448 | |
| 449 | uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); |
| 450 | uint8_t *HashReadStart = BufferStart; |
| 451 | uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; |
| 452 | |
| 453 | // The CodeSignature section begins with a header, after which the hashes |
| 454 | // of each page of the binary are written. |
| 455 | uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; |
| 456 | |
| 457 | uint32_t TextSegmentFileOff = 0; |
| 458 | uint32_t TextSegmentFileSize = 0; |
| 459 | if (O.TextSegmentCommandIndex) { |
| 460 | const LoadCommand &TextSegmentLoadCommand = |
| 461 | O.LoadCommands[*O.TextSegmentCommandIndex]; |
| 462 | assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
| 463 | MachO::LC_SEGMENT || |
| 464 | TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == |
| 465 | MachO::LC_SEGMENT_64); |
| 466 | assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand |
| 467 | .segment_command_data.segname) == "__TEXT" ); |
| 468 | TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); |
| 469 | TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); |
| 470 | } |
| 471 | |
| 472 | const uint32_t FileNamePad = CodeSignature.AllHeadersSize - |
| 473 | CodeSignature.FixedHeadersSize - |
| 474 | CodeSignature.OutputFileName.size(); |
| 475 | |
| 476 | // Write code section header. |
| 477 | auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd); |
| 478 | write32be(P: &SuperBlob->magic, V: MachO::CSMAGIC_EMBEDDED_SIGNATURE); |
| 479 | write32be(P: &SuperBlob->length, V: CodeSignature.Size); |
| 480 | write32be(P: &SuperBlob->count, V: 1); |
| 481 | auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]); |
| 482 | write32be(P: &BlobIndex->type, V: MachO::CSSLOT_CODEDIRECTORY); |
| 483 | write32be(P: &BlobIndex->offset, V: CodeSignature.BlobHeadersSize); |
| 484 | auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>( |
| 485 | HashReadEnd + CodeSignature.BlobHeadersSize); |
| 486 | write32be(P: &CodeDirectory->magic, V: MachO::CSMAGIC_CODEDIRECTORY); |
| 487 | write32be(P: &CodeDirectory->length, |
| 488 | V: CodeSignature.Size - CodeSignature.BlobHeadersSize); |
| 489 | write32be(P: &CodeDirectory->version, V: MachO::CS_SUPPORTSEXECSEG); |
| 490 | write32be(P: &CodeDirectory->flags, V: MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); |
| 491 | write32be(P: &CodeDirectory->hashOffset, |
| 492 | V: sizeof(MachO::CS_CodeDirectory) + |
| 493 | CodeSignature.OutputFileName.size() + FileNamePad); |
| 494 | write32be(P: &CodeDirectory->identOffset, V: sizeof(MachO::CS_CodeDirectory)); |
| 495 | CodeDirectory->nSpecialSlots = 0; |
| 496 | write32be(P: &CodeDirectory->nCodeSlots, V: CodeSignature.BlockCount); |
| 497 | write32be(P: &CodeDirectory->codeLimit, V: CodeSignature.StartOffset); |
| 498 | CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize); |
| 499 | CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; |
| 500 | CodeDirectory->platform = 0; |
| 501 | CodeDirectory->pageSize = CodeSignature.BlockSizeShift; |
| 502 | CodeDirectory->spare2 = 0; |
| 503 | CodeDirectory->scatterOffset = 0; |
| 504 | CodeDirectory->teamOffset = 0; |
| 505 | CodeDirectory->spare3 = 0; |
| 506 | CodeDirectory->codeLimit64 = 0; |
| 507 | write64be(P: &CodeDirectory->execSegBase, V: TextSegmentFileOff); |
| 508 | write64be(P: &CodeDirectory->execSegLimit, V: TextSegmentFileSize); |
| 509 | write64be(P: &CodeDirectory->execSegFlags, V: O.Header.FileType == MachO::MH_EXECUTE |
| 510 | ? MachO::CS_EXECSEG_MAIN_BINARY |
| 511 | : 0); |
| 512 | |
| 513 | auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]); |
| 514 | memcpy(dest: Id, src: CodeSignature.OutputFileName.begin(), |
| 515 | n: CodeSignature.OutputFileName.size()); |
| 516 | memset(s: Id + CodeSignature.OutputFileName.size(), c: 0, n: FileNamePad); |
| 517 | |
| 518 | // Write the hashes. |
| 519 | uint8_t *CurrHashReadPosition = HashReadStart; |
| 520 | uint8_t *CurrHashWritePosition = HashWriteStart; |
| 521 | while (CurrHashReadPosition < HashReadEnd) { |
| 522 | StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition), |
| 523 | std::min(a: static_cast<size_t>(HashReadEnd |
| 524 | - CurrHashReadPosition), |
| 525 | b: static_cast<size_t>(CodeSignature.BlockSize))); |
| 526 | SHA256 Hasher; |
| 527 | Hasher.update(Str: Block); |
| 528 | std::array<uint8_t, 32> Hash = Hasher.final(); |
| 529 | assert(Hash.size() == CodeSignature.HashSize); |
| 530 | memcpy(dest: CurrHashWritePosition, src: Hash.data(), n: CodeSignature.HashSize); |
| 531 | CurrHashReadPosition += CodeSignature.BlockSize; |
| 532 | CurrHashWritePosition += CodeSignature.HashSize; |
| 533 | } |
| 534 | #if defined(__APPLE__) |
| 535 | // This is macOS-specific work-around and makes no sense for any |
| 536 | // other host OS. See https://openradar.appspot.com/FB8914231 |
| 537 | // |
| 538 | // The macOS kernel maintains a signature-verification cache to |
| 539 | // quickly validate applications at time of execve(2). The trouble |
| 540 | // is that for the kernel creates the cache entry at the time of the |
| 541 | // mmap(2) call, before we have a chance to write either the code to |
| 542 | // sign or the signature header+hashes. The fix is to invalidate |
| 543 | // all cached data associated with the output file, thus discarding |
| 544 | // the bogus prematurely-cached signature. |
| 545 | msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, |
| 546 | MS_INVALIDATE); |
| 547 | #endif |
| 548 | } |
| 549 | |
| 550 | void MachOWriter::writeDataInCodeData() { |
| 551 | return writeLinkData(LCIndex: O.DataInCodeCommandIndex, LD: O.DataInCode); |
| 552 | } |
| 553 | |
| 554 | void MachOWriter::writeLinkerOptimizationHint() { |
| 555 | return writeLinkData(LCIndex: O.LinkerOptimizationHintCommandIndex, |
| 556 | LD: O.LinkerOptimizationHint); |
| 557 | } |
| 558 | |
| 559 | void MachOWriter::writeFunctionStartsData() { |
| 560 | return writeLinkData(LCIndex: O.FunctionStartsCommandIndex, LD: O.FunctionStarts); |
| 561 | } |
| 562 | |
| 563 | void MachOWriter::writeDylibCodeSignDRsData() { |
| 564 | return writeLinkData(LCIndex: O.DylibCodeSignDRsIndex, LD: O.DylibCodeSignDRs); |
| 565 | } |
| 566 | |
| 567 | void MachOWriter::writeChainedFixupsData() { |
| 568 | return writeLinkData(LCIndex: O.ChainedFixupsCommandIndex, LD: O.ChainedFixups); |
| 569 | } |
| 570 | |
| 571 | void MachOWriter::writeExportsTrieData() { |
| 572 | if (!O.ExportsTrieCommandIndex) |
| 573 | return; |
| 574 | const MachO::linkedit_data_command &ExportsTrieCmd = |
| 575 | O.LoadCommands[*O.ExportsTrieCommandIndex] |
| 576 | .MachOLoadCommand.linkedit_data_command_data; |
| 577 | char *Out = (char *)Buf->getBufferStart() + ExportsTrieCmd.dataoff; |
| 578 | assert((ExportsTrieCmd.datasize == O.Exports.Trie.size()) && |
| 579 | "Incorrect export trie size" ); |
| 580 | memcpy(dest: Out, src: O.Exports.Trie.data(), n: O.Exports.Trie.size()); |
| 581 | } |
| 582 | |
| 583 | void MachOWriter::writeTail() { |
| 584 | typedef void (MachOWriter::*WriteHandlerType)(); |
| 585 | typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; |
| 586 | SmallVector<WriteOperation, 7> Queue; |
| 587 | |
| 588 | if (O.SymTabCommandIndex) { |
| 589 | const MachO::symtab_command &SymTabCommand = |
| 590 | O.LoadCommands[*O.SymTabCommandIndex] |
| 591 | .MachOLoadCommand.symtab_command_data; |
| 592 | if (SymTabCommand.symoff) |
| 593 | Queue.push_back(Elt: {SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); |
| 594 | if (SymTabCommand.stroff) |
| 595 | Queue.push_back(Elt: {SymTabCommand.stroff, &MachOWriter::writeStringTable}); |
| 596 | } |
| 597 | |
| 598 | if (O.DyLdInfoCommandIndex) { |
| 599 | const MachO::dyld_info_command &DyLdInfoCommand = |
| 600 | O.LoadCommands[*O.DyLdInfoCommandIndex] |
| 601 | .MachOLoadCommand.dyld_info_command_data; |
| 602 | if (DyLdInfoCommand.rebase_off) |
| 603 | Queue.push_back( |
| 604 | Elt: {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); |
| 605 | if (DyLdInfoCommand.bind_off) |
| 606 | Queue.push_back(Elt: {DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); |
| 607 | if (DyLdInfoCommand.weak_bind_off) |
| 608 | Queue.push_back( |
| 609 | Elt: {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); |
| 610 | if (DyLdInfoCommand.lazy_bind_off) |
| 611 | Queue.push_back( |
| 612 | Elt: {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); |
| 613 | if (DyLdInfoCommand.export_off) |
| 614 | Queue.push_back( |
| 615 | Elt: {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); |
| 616 | } |
| 617 | |
| 618 | if (O.DySymTabCommandIndex) { |
| 619 | const MachO::dysymtab_command &DySymTabCommand = |
| 620 | O.LoadCommands[*O.DySymTabCommandIndex] |
| 621 | .MachOLoadCommand.dysymtab_command_data; |
| 622 | |
| 623 | if (DySymTabCommand.indirectsymoff) |
| 624 | Queue.emplace_back(Args: DySymTabCommand.indirectsymoff, |
| 625 | Args: &MachOWriter::writeIndirectSymbolTable); |
| 626 | } |
| 627 | |
| 628 | std::initializer_list<std::pair<std::optional<size_t>, WriteHandlerType>> |
| 629 | LinkEditDataCommandWriters = { |
| 630 | {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData}, |
| 631 | {O.DylibCodeSignDRsIndex, &MachOWriter::writeDylibCodeSignDRsData}, |
| 632 | {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData}, |
| 633 | {O.LinkerOptimizationHintCommandIndex, |
| 634 | &MachOWriter::writeLinkerOptimizationHint}, |
| 635 | {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData}, |
| 636 | {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData}, |
| 637 | {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}}; |
| 638 | for (const auto &W : LinkEditDataCommandWriters) { |
| 639 | std::optional<size_t> LinkEditDataCommandIndex; |
| 640 | WriteHandlerType WriteHandler; |
| 641 | std::tie(args&: LinkEditDataCommandIndex, args&: WriteHandler) = W; |
| 642 | if (LinkEditDataCommandIndex) { |
| 643 | const MachO::linkedit_data_command &LinkEditDataCommand = |
| 644 | O.LoadCommands[*LinkEditDataCommandIndex] |
| 645 | .MachOLoadCommand.linkedit_data_command_data; |
| 646 | if (LinkEditDataCommand.dataoff) |
| 647 | Queue.emplace_back(Args: LinkEditDataCommand.dataoff, Args&: WriteHandler); |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | llvm::sort(C&: Queue, Comp: llvm::less_first()); |
| 652 | |
| 653 | for (auto WriteOp : Queue) |
| 654 | (this->*WriteOp.second)(); |
| 655 | } |
| 656 | |
| 657 | Error MachOWriter::finalize() { return LayoutBuilder.layout(); } |
| 658 | |
| 659 | Error MachOWriter::write() { |
| 660 | size_t TotalSize = totalSize(); |
| 661 | Buf = WritableMemoryBuffer::getNewMemBuffer(Size: TotalSize); |
| 662 | if (!Buf) |
| 663 | return createStringError(EC: errc::not_enough_memory, |
| 664 | S: "failed to allocate memory buffer of " + |
| 665 | Twine::utohexstr(Val: TotalSize) + " bytes" ); |
| 666 | writeHeader(); |
| 667 | writeLoadCommands(); |
| 668 | writeSections(); |
| 669 | writeTail(); |
| 670 | |
| 671 | // TODO: Implement direct writing to the output stream (without intermediate |
| 672 | // memory buffer Buf). |
| 673 | Out.write(Ptr: Buf->getBufferStart(), Size: Buf->getBufferSize()); |
| 674 | return Error::success(); |
| 675 | } |
| 676 | |