1//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOObject.h"
10#include "llvm/ADT/SmallPtrSet.h"
11
12using namespace llvm;
13using namespace llvm::objcopy::macho;
14
15Section::Section(StringRef SegName, StringRef SectName)
16 : Segname(SegName), Sectname(SectName),
17 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
18
19Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
20 : Segname(SegName), Sectname(SectName),
21 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
22 Content(Content) {}
23
24const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
25 assert(Index < Symbols.size() && "invalid symbol index");
26 return Symbols[Index].get();
27}
28
29SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
30 return const_cast<SymbolEntry *>(
31 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
32}
33
34void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) {
35 for (auto &Sym : Symbols)
36 Callable(*Sym);
37
38 // Partition symbols: local < defined external < undefined external.
39 auto ExternalBegin = std::stable_partition(
40 first: std::begin(cont&: Symbols), last: std::end(cont&: Symbols),
41 pred: [](const auto &Sym) { return Sym->isLocalSymbol(); });
42 std::stable_partition(first: ExternalBegin, last: std::end(cont&: Symbols), pred: [](const auto &Sym) {
43 return !Sym->isUndefinedSymbol();
44 });
45}
46
47void SymbolTable::removeSymbols(
48 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
49 llvm::erase_if(C&: Symbols, P: ToRemove);
50}
51
52void Object::updateLoadCommandIndexes() {
53 static constexpr char TextSegmentName[] = "__TEXT";
54 // Update indices of special load commands
55 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
56 LoadCommand &LC = LoadCommands[Index];
57 switch (LC.MachOLoadCommand.load_command_data.cmd) {
58 case MachO::LC_CODE_SIGNATURE:
59 CodeSignatureCommandIndex = Index;
60 break;
61 case MachO::LC_SEGMENT:
62 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
63 TextSegmentName)
64 TextSegmentCommandIndex = Index;
65 break;
66 case MachO::LC_SEGMENT_64:
67 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
68 TextSegmentName)
69 TextSegmentCommandIndex = Index;
70 break;
71 case MachO::LC_SYMTAB:
72 SymTabCommandIndex = Index;
73 break;
74 case MachO::LC_DYSYMTAB:
75 DySymTabCommandIndex = Index;
76 break;
77 case MachO::LC_DYLD_INFO:
78 case MachO::LC_DYLD_INFO_ONLY:
79 DyLdInfoCommandIndex = Index;
80 break;
81 case MachO::LC_DATA_IN_CODE:
82 DataInCodeCommandIndex = Index;
83 break;
84 case MachO::LC_LINKER_OPTIMIZATION_HINT:
85 LinkerOptimizationHintCommandIndex = Index;
86 break;
87 case MachO::LC_FUNCTION_STARTS:
88 FunctionStartsCommandIndex = Index;
89 break;
90 case MachO::LC_DYLIB_CODE_SIGN_DRS:
91 DylibCodeSignDRsIndex = Index;
92 break;
93 case MachO::LC_DYLD_CHAINED_FIXUPS:
94 ChainedFixupsCommandIndex = Index;
95 break;
96 case MachO::LC_DYLD_EXPORTS_TRIE:
97 ExportsTrieCommandIndex = Index;
98 break;
99 }
100 }
101}
102
103Error Object::removeLoadCommands(
104 function_ref<bool(const LoadCommand &)> ToRemove) {
105 auto It = std::stable_partition(
106 first: LoadCommands.begin(), last: LoadCommands.end(),
107 pred: [&](const LoadCommand &LC) { return !ToRemove(LC); });
108 LoadCommands.erase(first: It, last: LoadCommands.end());
109
110 updateLoadCommandIndexes();
111 return Error::success();
112}
113
114Error Object::removeSections(
115 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
116 DenseMap<uint32_t, const Section *> OldIndexToSection;
117 uint32_t NextSectionIndex = 1;
118 for (LoadCommand &LC : LoadCommands) {
119 auto It = std::stable_partition(
120 first: std::begin(cont&: LC.Sections), last: std::end(cont&: LC.Sections),
121 pred: [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
122 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
123 OldIndexToSection[(*I)->Index] = I->get();
124 (*I)->Index = NextSectionIndex++;
125 }
126 LC.Sections.erase(first: It, last: LC.Sections.end());
127 }
128
129 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
130 std::optional<uint32_t> Section = S->section();
131 return (Section && !OldIndexToSection.count(Val: *Section));
132 };
133
134 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
135 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
136 if (IsDead(Sym))
137 DeadSymbols.insert(Ptr: Sym.get());
138
139 for (const LoadCommand &LC : LoadCommands)
140 for (const std::unique_ptr<Section> &Sec : LC.Sections)
141 for (const RelocationInfo &R : Sec->Relocations)
142 if (R.Symbol && *R.Symbol && DeadSymbols.count(Ptr: *R.Symbol))
143 return createStringError(EC: std::errc::invalid_argument,
144 Fmt: "symbol '%s' defined in section with index "
145 "'%u' cannot be removed because it is "
146 "referenced by a relocation in section '%s'",
147 Vals: (*R.Symbol)->Name.c_str(),
148 Vals: *((*R.Symbol)->section()),
149 Vals: Sec->CanonicalName.c_str());
150 SymTable.removeSymbols(ToRemove: IsDead);
151 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
152 if (S->section())
153 S->n_sect = OldIndexToSection[S->n_sect]->Index;
154 return Error::success();
155}
156
157uint64_t Object::nextAvailableSegmentAddress() const {
158 uint64_t HeaderSize =
159 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
160 uint64_t Addr = HeaderSize + Header.SizeOfCmds;
161 for (const LoadCommand &LC : LoadCommands) {
162 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
163 switch (MLC.load_command_data.cmd) {
164 case MachO::LC_SEGMENT:
165 Addr = std::max(a: Addr,
166 b: static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
167 MLC.segment_command_data.vmsize);
168 break;
169 case MachO::LC_SEGMENT_64:
170 Addr = std::max(a: Addr, b: MLC.segment_command_64_data.vmaddr +
171 MLC.segment_command_64_data.vmsize);
172 break;
173 default:
174 continue;
175 }
176 }
177 return Addr;
178}
179
180template <typename SegmentType>
181static void
182constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
183 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
184 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
185 memset(&Seg, 0, sizeof(SegmentType));
186 Seg.cmd = CmdType;
187 strncpy(Seg.segname, SegName.data(), SegName.size());
188 Seg.maxprot |=
189 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
190 Seg.initprot |=
191 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
192 Seg.vmaddr = SegVMAddr;
193 Seg.vmsize = SegVMSize;
194}
195
196LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
197 LoadCommand LC;
198 const uint64_t SegVMAddr = nextAvailableSegmentAddress();
199 if (is64Bit())
200 constructSegment(Seg&: LC.MachOLoadCommand.segment_command_64_data,
201 CmdType: MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
202 else
203 constructSegment(Seg&: LC.MachOLoadCommand.segment_command_data,
204 CmdType: MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
205
206 LoadCommands.push_back(x: std::move(LC));
207 return LoadCommands.back();
208}
209
210/// Extracts a segment name from a string which is possibly non-null-terminated.
211static StringRef extractSegmentName(const char *SegName) {
212 return StringRef(SegName,
213 strnlen(string: SegName, maxlen: sizeof(MachO::segment_command::segname)));
214}
215
216std::optional<StringRef> LoadCommand::getSegmentName() const {
217 const MachO::macho_load_command &MLC = MachOLoadCommand;
218 switch (MLC.load_command_data.cmd) {
219 case MachO::LC_SEGMENT:
220 return extractSegmentName(SegName: MLC.segment_command_data.segname);
221 case MachO::LC_SEGMENT_64:
222 return extractSegmentName(SegName: MLC.segment_command_64_data.segname);
223 default:
224 return std::nullopt;
225 }
226}
227
228std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
229 const MachO::macho_load_command &MLC = MachOLoadCommand;
230 switch (MLC.load_command_data.cmd) {
231 case MachO::LC_SEGMENT:
232 return MLC.segment_command_data.vmaddr;
233 case MachO::LC_SEGMENT_64:
234 return MLC.segment_command_64_data.vmaddr;
235 default:
236 return std::nullopt;
237 }
238}
239