1 | //===- ARM.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputFiles.h" |
10 | #include "OutputSections.h" |
11 | #include "SymbolTable.h" |
12 | #include "Symbols.h" |
13 | #include "SyntheticSections.h" |
14 | #include "Target.h" |
15 | #include "lld/Common/ErrorHandler.h" |
16 | #include "lld/Common/Filesystem.h" |
17 | #include "llvm/BinaryFormat/ELF.h" |
18 | #include "llvm/Support/Endian.h" |
19 | |
20 | using namespace llvm; |
21 | using namespace llvm::support::endian; |
22 | using namespace llvm::support; |
23 | using namespace llvm::ELF; |
24 | using namespace lld; |
25 | using namespace lld::elf; |
26 | using namespace llvm::object; |
27 | |
28 | namespace { |
29 | class ARM final : public TargetInfo { |
30 | public: |
31 | ARM(); |
32 | uint32_t calcEFlags() const override; |
33 | RelExpr getRelExpr(RelType type, const Symbol &s, |
34 | const uint8_t *loc) const override; |
35 | RelType getDynRel(RelType type) const override; |
36 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
37 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
38 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
39 | void writePltHeader(uint8_t *buf) const override; |
40 | void writePlt(uint8_t *buf, const Symbol &sym, |
41 | uint64_t pltEntryAddr) const override; |
42 | void addPltSymbols(InputSection &isec, uint64_t off) const override; |
43 | void addPltHeaderSymbols(InputSection &isd) const override; |
44 | bool needsThunk(RelExpr expr, RelType type, const InputFile *file, |
45 | uint64_t branchAddr, const Symbol &s, |
46 | int64_t a) const override; |
47 | uint32_t getThunkSectionSpacing() const override; |
48 | bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; |
49 | void relocate(uint8_t *loc, const Relocation &rel, |
50 | uint64_t val) const override; |
51 | }; |
52 | enum class CodeState { Data = 0, Thumb = 2, Arm = 4 }; |
53 | } // namespace |
54 | |
55 | static DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap{}; |
56 | |
57 | ARM::ARM() { |
58 | copyRel = R_ARM_COPY; |
59 | relativeRel = R_ARM_RELATIVE; |
60 | iRelativeRel = R_ARM_IRELATIVE; |
61 | gotRel = R_ARM_GLOB_DAT; |
62 | pltRel = R_ARM_JUMP_SLOT; |
63 | symbolicRel = R_ARM_ABS32; |
64 | tlsGotRel = R_ARM_TLS_TPOFF32; |
65 | tlsModuleIndexRel = R_ARM_TLS_DTPMOD32; |
66 | tlsOffsetRel = R_ARM_TLS_DTPOFF32; |
67 | pltHeaderSize = 32; |
68 | pltEntrySize = 16; |
69 | ipltEntrySize = 16; |
70 | trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; |
71 | needsThunks = true; |
72 | defaultMaxPageSize = 65536; |
73 | } |
74 | |
75 | uint32_t ARM::calcEFlags() const { |
76 | // The ABIFloatType is used by loaders to detect the floating point calling |
77 | // convention. |
78 | uint32_t abiFloatType = 0; |
79 | |
80 | // Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian |
81 | // with BE-8 code. |
82 | uint32_t armBE8 = 0; |
83 | |
84 | if (config->armVFPArgs == ARMVFPArgKind::Base || |
85 | config->armVFPArgs == ARMVFPArgKind::Default) |
86 | abiFloatType = EF_ARM_ABI_FLOAT_SOFT; |
87 | else if (config->armVFPArgs == ARMVFPArgKind::VFP) |
88 | abiFloatType = EF_ARM_ABI_FLOAT_HARD; |
89 | |
90 | if (!config->isLE && config->armBe8) |
91 | armBE8 = EF_ARM_BE8; |
92 | |
93 | // We don't currently use any features incompatible with EF_ARM_EABI_VER5, |
94 | // but we don't have any firm guarantees of conformance. Linux AArch64 |
95 | // kernels (as of 2016) require an EABI version to be set. |
96 | return EF_ARM_EABI_VER5 | abiFloatType | armBE8; |
97 | } |
98 | |
99 | RelExpr ARM::getRelExpr(RelType type, const Symbol &s, |
100 | const uint8_t *loc) const { |
101 | switch (type) { |
102 | case R_ARM_ABS32: |
103 | case R_ARM_MOVW_ABS_NC: |
104 | case R_ARM_MOVT_ABS: |
105 | case R_ARM_THM_MOVW_ABS_NC: |
106 | case R_ARM_THM_MOVT_ABS: |
107 | case R_ARM_THM_ALU_ABS_G0_NC: |
108 | case R_ARM_THM_ALU_ABS_G1_NC: |
109 | case R_ARM_THM_ALU_ABS_G2_NC: |
110 | case R_ARM_THM_ALU_ABS_G3: |
111 | return R_ABS; |
112 | case R_ARM_THM_JUMP8: |
113 | case R_ARM_THM_JUMP11: |
114 | return R_PC; |
115 | case R_ARM_CALL: |
116 | case R_ARM_JUMP24: |
117 | case R_ARM_PC24: |
118 | case R_ARM_PLT32: |
119 | case R_ARM_PREL31: |
120 | case R_ARM_THM_JUMP19: |
121 | case R_ARM_THM_JUMP24: |
122 | case R_ARM_THM_CALL: |
123 | return R_PLT_PC; |
124 | case R_ARM_GOTOFF32: |
125 | // (S + A) - GOT_ORG |
126 | return R_GOTREL; |
127 | case R_ARM_GOT_BREL: |
128 | // GOT(S) + A - GOT_ORG |
129 | return R_GOT_OFF; |
130 | case R_ARM_GOT_PREL: |
131 | case R_ARM_TLS_IE32: |
132 | // GOT(S) + A - P |
133 | return R_GOT_PC; |
134 | case R_ARM_SBREL32: |
135 | return R_ARM_SBREL; |
136 | case R_ARM_TARGET1: |
137 | return config->target1Rel ? R_PC : R_ABS; |
138 | case R_ARM_TARGET2: |
139 | if (config->target2 == Target2Policy::Rel) |
140 | return R_PC; |
141 | if (config->target2 == Target2Policy::Abs) |
142 | return R_ABS; |
143 | return R_GOT_PC; |
144 | case R_ARM_TLS_GD32: |
145 | return R_TLSGD_PC; |
146 | case R_ARM_TLS_LDM32: |
147 | return R_TLSLD_PC; |
148 | case R_ARM_TLS_LDO32: |
149 | return R_DTPREL; |
150 | case R_ARM_BASE_PREL: |
151 | // B(S) + A - P |
152 | // FIXME: currently B(S) assumed to be .got, this may not hold for all |
153 | // platforms. |
154 | return R_GOTONLY_PC; |
155 | case R_ARM_MOVW_PREL_NC: |
156 | case R_ARM_MOVT_PREL: |
157 | case R_ARM_REL32: |
158 | case R_ARM_THM_MOVW_PREL_NC: |
159 | case R_ARM_THM_MOVT_PREL: |
160 | return R_PC; |
161 | case R_ARM_ALU_PC_G0: |
162 | case R_ARM_ALU_PC_G0_NC: |
163 | case R_ARM_ALU_PC_G1: |
164 | case R_ARM_ALU_PC_G1_NC: |
165 | case R_ARM_ALU_PC_G2: |
166 | case R_ARM_LDR_PC_G0: |
167 | case R_ARM_LDR_PC_G1: |
168 | case R_ARM_LDR_PC_G2: |
169 | case R_ARM_LDRS_PC_G0: |
170 | case R_ARM_LDRS_PC_G1: |
171 | case R_ARM_LDRS_PC_G2: |
172 | case R_ARM_THM_ALU_PREL_11_0: |
173 | case R_ARM_THM_PC8: |
174 | case R_ARM_THM_PC12: |
175 | return R_ARM_PCA; |
176 | case R_ARM_MOVW_BREL_NC: |
177 | case R_ARM_MOVW_BREL: |
178 | case R_ARM_MOVT_BREL: |
179 | case R_ARM_THM_MOVW_BREL_NC: |
180 | case R_ARM_THM_MOVW_BREL: |
181 | case R_ARM_THM_MOVT_BREL: |
182 | return R_ARM_SBREL; |
183 | case R_ARM_NONE: |
184 | return R_NONE; |
185 | case R_ARM_TLS_LE32: |
186 | return R_TPREL; |
187 | case R_ARM_V4BX: |
188 | // V4BX is just a marker to indicate there's a "bx rN" instruction at the |
189 | // given address. It can be used to implement a special linker mode which |
190 | // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and |
191 | // not ARMv4 output, we can just ignore it. |
192 | return R_NONE; |
193 | default: |
194 | error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine(type) + |
195 | ") against symbol " + toString(s)); |
196 | return R_NONE; |
197 | } |
198 | } |
199 | |
200 | RelType ARM::getDynRel(RelType type) const { |
201 | if ((type == R_ARM_ABS32) || (type == R_ARM_TARGET1 && !config->target1Rel)) |
202 | return R_ARM_ABS32; |
203 | return R_ARM_NONE; |
204 | } |
205 | |
206 | void ARM::writeGotPlt(uint8_t *buf, const Symbol &) const { |
207 | write32(p: buf, v: in.plt->getVA()); |
208 | } |
209 | |
210 | void ARM::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
211 | // An ARM entry is the address of the ifunc resolver function. |
212 | write32(p: buf, v: s.getVA()); |
213 | } |
214 | |
215 | // Long form PLT Header that does not have any restrictions on the displacement |
216 | // of the .plt from the .got.plt. |
217 | static void (uint8_t *buf) { |
218 | write32(p: buf + 0, v: 0xe52de004); // str lr, [sp,#-4]! |
219 | write32(p: buf + 4, v: 0xe59fe004); // ldr lr, L2 |
220 | write32(p: buf + 8, v: 0xe08fe00e); // L1: add lr, pc, lr |
221 | write32(p: buf + 12, v: 0xe5bef008); // ldr pc, [lr, #8] |
222 | write32(p: buf + 16, v: 0x00000000); // L2: .word &(.got.plt) - L1 - 8 |
223 | write32(p: buf + 20, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
224 | write32(p: buf + 24, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
225 | write32(p: buf + 28, v: 0xd4d4d4d4); |
226 | uint64_t gotPlt = in.gotPlt->getVA(); |
227 | uint64_t l1 = in.plt->getVA() + 8; |
228 | write32(p: buf + 16, v: gotPlt - l1 - 8); |
229 | } |
230 | |
231 | // True if we should use Thumb PLTs, which currently require Thumb2, and are |
232 | // only used if the target does not have the ARM ISA. |
233 | static bool useThumbPLTs() { |
234 | return config->armHasThumb2ISA && !config->armHasArmISA; |
235 | } |
236 | |
237 | // The default PLT header requires the .got.plt to be within 128 Mb of the |
238 | // .plt in the positive direction. |
239 | void ARM::(uint8_t *buf) const { |
240 | if (useThumbPLTs()) { |
241 | // The instruction sequence for thumb: |
242 | // |
243 | // 0: b500 push {lr} |
244 | // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe> |
245 | // 6: 44fe add lr, pc |
246 | // 8: f85e ff08 ldr pc, [lr, #8]! |
247 | // e: .word .got.plt - .plt - 16 |
248 | // |
249 | // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from |
250 | // `pc` in the add instruction and 8 bytes for the `lr` adjustment. |
251 | // |
252 | uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16; |
253 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset" ); |
254 | write16(p: buf + 0, v: 0xb500); |
255 | // Split into two halves to support endianness correctly. |
256 | write16(p: buf + 2, v: 0xf8df); |
257 | write16(p: buf + 4, v: 0xe008); |
258 | write16(p: buf + 6, v: 0x44fe); |
259 | // Split into two halves to support endianness correctly. |
260 | write16(p: buf + 8, v: 0xf85e); |
261 | write16(p: buf + 10, v: 0xff08); |
262 | write32(p: buf + 12, v: offset); |
263 | |
264 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
265 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
266 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
267 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
268 | } else { |
269 | // Use a similar sequence to that in writePlt(), the difference is the |
270 | // calling conventions mean we use lr instead of ip. The PLT entry is |
271 | // responsible for saving lr on the stack, the dynamic loader is responsible |
272 | // for reloading it. |
273 | const uint32_t pltData[] = { |
274 | 0xe52de004, // L1: str lr, [sp,#-4]! |
275 | 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) |
276 | 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) |
277 | 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) |
278 | }; |
279 | |
280 | uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; |
281 | if (!llvm::isUInt<27>(x: offset)) { |
282 | // We cannot encode the Offset, use the long form. |
283 | writePltHeaderLong(buf); |
284 | return; |
285 | } |
286 | write32(p: buf + 0, v: pltData[0]); |
287 | write32(p: buf + 4, v: pltData[1] | ((offset >> 20) & 0xff)); |
288 | write32(p: buf + 8, v: pltData[2] | ((offset >> 12) & 0xff)); |
289 | write32(p: buf + 12, v: pltData[3] | (offset & 0xfff)); |
290 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
291 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
292 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
293 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
294 | } |
295 | } |
296 | |
297 | void ARM::(InputSection &isec) const { |
298 | if (useThumbPLTs()) { |
299 | addSyntheticLocal(name: "$t" , type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
300 | addSyntheticLocal(name: "$d" , type: STT_NOTYPE, value: 12, size: 0, section&: isec); |
301 | } else { |
302 | addSyntheticLocal(name: "$a" , type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
303 | addSyntheticLocal(name: "$d" , type: STT_NOTYPE, value: 16, size: 0, section&: isec); |
304 | } |
305 | } |
306 | |
307 | // Long form PLT entries that do not have any restrictions on the displacement |
308 | // of the .plt from the .got.plt. |
309 | static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, |
310 | uint64_t pltEntryAddr) { |
311 | write32(p: buf + 0, v: 0xe59fc004); // ldr ip, L2 |
312 | write32(p: buf + 4, v: 0xe08cc00f); // L1: add ip, ip, pc |
313 | write32(p: buf + 8, v: 0xe59cf000); // ldr pc, [ip] |
314 | write32(p: buf + 12, v: 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8 |
315 | uint64_t l1 = pltEntryAddr + 4; |
316 | write32(p: buf + 12, v: gotPltEntryAddr - l1 - 8); |
317 | } |
318 | |
319 | // The default PLT entries require the .got.plt to be within 128 Mb of the |
320 | // .plt in the positive direction. |
321 | void ARM::writePlt(uint8_t *buf, const Symbol &sym, |
322 | uint64_t pltEntryAddr) const { |
323 | |
324 | if (!useThumbPLTs()) { |
325 | uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; |
326 | |
327 | // The PLT entry is similar to the example given in Appendix A of ELF for |
328 | // the Arm Architecture. Instead of using the Group Relocations to find the |
329 | // optimal rotation for the 8-bit immediate used in the add instructions we |
330 | // hard code the most compact rotations for simplicity. This saves a load |
331 | // instruction over the long plt sequences. |
332 | const uint32_t pltData[] = { |
333 | 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 |
334 | 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 |
335 | 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 |
336 | }; |
337 | if (!llvm::isUInt<27>(x: offset)) { |
338 | // We cannot encode the Offset, use the long form. |
339 | writePltLong(buf, gotPltEntryAddr: sym.getGotPltVA(), pltEntryAddr); |
340 | return; |
341 | } |
342 | write32(p: buf + 0, v: pltData[0] | ((offset >> 20) & 0xff)); |
343 | write32(p: buf + 4, v: pltData[1] | ((offset >> 12) & 0xff)); |
344 | write32(p: buf + 8, v: pltData[2] | (offset & 0xfff)); |
345 | memcpy(dest: buf + 12, src: trapInstr.data(), n: 4); // Pad to 16-byte boundary |
346 | } else { |
347 | uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12; |
348 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset" ); |
349 | |
350 | // A PLT entry will be: |
351 | // |
352 | // movw ip, #<lower 16 bits> |
353 | // movt ip, #<upper 16 bits> |
354 | // add ip, pc |
355 | // L1: ldr.w pc, [ip] |
356 | // b L1 |
357 | // |
358 | // where ip = r12 = 0xc |
359 | |
360 | // movw ip, #<lower 16 bits> |
361 | write16(p: buf + 2, v: 0x0c00); // use `ip` |
362 | relocateNoSym(loc: buf, type: R_ARM_THM_MOVW_ABS_NC, val: offset); |
363 | |
364 | // movt ip, #<upper 16 bits> |
365 | write16(p: buf + 6, v: 0x0c00); // use `ip` |
366 | relocateNoSym(loc: buf + 4, type: R_ARM_THM_MOVT_ABS, val: offset); |
367 | |
368 | write16(p: buf + 8, v: 0x44fc); // add ip, pc |
369 | write16(p: buf + 10, v: 0xf8dc); // ldr.w pc, [ip] (bottom half) |
370 | write16(p: buf + 12, v: 0xf000); // ldr.w pc, [ip] (upper half) |
371 | write16(p: buf + 14, v: 0xe7fc); // Branch to previous instruction |
372 | } |
373 | } |
374 | |
375 | void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { |
376 | if (useThumbPLTs()) { |
377 | addSyntheticLocal(name: "$t" , type: STT_NOTYPE, value: off, size: 0, section&: isec); |
378 | } else { |
379 | addSyntheticLocal(name: "$a" , type: STT_NOTYPE, value: off, size: 0, section&: isec); |
380 | addSyntheticLocal(name: "$d" , type: STT_NOTYPE, value: off + 12, size: 0, section&: isec); |
381 | } |
382 | } |
383 | |
384 | bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, |
385 | uint64_t branchAddr, const Symbol &s, |
386 | int64_t a) const { |
387 | // If s is an undefined weak symbol and does not have a PLT entry then it will |
388 | // be resolved as a branch to the next instruction. If it is hidden, its |
389 | // binding has been converted to local, so we just check isUndefined() here. A |
390 | // undefined non-weak symbol will have been errored. |
391 | if (s.isUndefined() && !s.isInPlt()) |
392 | return false; |
393 | // A state change from ARM to Thumb and vice versa must go through an |
394 | // interworking thunk if the relocation type is not R_ARM_CALL or |
395 | // R_ARM_THM_CALL. |
396 | switch (type) { |
397 | case R_ARM_PC24: |
398 | case R_ARM_PLT32: |
399 | case R_ARM_JUMP24: |
400 | // Source is ARM, all PLT entries are ARM so no interworking required. |
401 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). |
402 | assert(!useThumbPLTs() && |
403 | "If the source is ARM, we should not need Thumb PLTs" ); |
404 | if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) |
405 | return true; |
406 | [[fallthrough]]; |
407 | case R_ARM_CALL: { |
408 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); |
409 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
410 | (!config->armHasBlx && (s.getVA() & 1)); |
411 | } |
412 | case R_ARM_THM_JUMP19: |
413 | case R_ARM_THM_JUMP24: |
414 | // Source is Thumb, when all PLT entries are ARM interworking is required. |
415 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). |
416 | if ((expr == R_PLT_PC && !useThumbPLTs()) || |
417 | (s.isFunc() && (s.getVA() & 1) == 0)) |
418 | return true; |
419 | [[fallthrough]]; |
420 | case R_ARM_THM_CALL: { |
421 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); |
422 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
423 | (!config->armHasBlx && (s.getVA() & 1) == 0);; |
424 | } |
425 | } |
426 | return false; |
427 | } |
428 | |
429 | uint32_t ARM::getThunkSectionSpacing() const { |
430 | // The placing of pre-created ThunkSections is controlled by the value |
431 | // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to |
432 | // place the ThunkSection such that all branches from the InputSections |
433 | // prior to the ThunkSection can reach a Thunk placed at the end of the |
434 | // ThunkSection. Graphically: |
435 | // | up to thunkSectionSpacing .text input sections | |
436 | // | ThunkSection | |
437 | // | up to thunkSectionSpacing .text input sections | |
438 | // | ThunkSection | |
439 | |
440 | // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This |
441 | // is to match the most common expected case of a Thumb 2 encoded BL, BLX or |
442 | // B.W: |
443 | // ARM B, BL, BLX range +/- 32MiB |
444 | // Thumb B.W, BL, BLX range +/- 16MiB |
445 | // Thumb B<cc>.W range +/- 1MiB |
446 | // If a branch cannot reach a pre-created ThunkSection a new one will be |
447 | // created so we can handle the rare cases of a Thumb 2 conditional branch. |
448 | // We intentionally use a lower size for thunkSectionSpacing than the maximum |
449 | // branch range so the end of the ThunkSection is more likely to be within |
450 | // range of the branch instruction that is furthest away. The value we shorten |
451 | // thunkSectionSpacing by is set conservatively to allow us to create 16,384 |
452 | // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to |
453 | // one of the Thunks going out of range. |
454 | |
455 | // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch |
456 | // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except |
457 | // ARMv6T2) the range is +/- 4MiB. |
458 | |
459 | return (config->armJ1J2BranchEncoding) ? 0x1000000 - 0x30000 |
460 | : 0x400000 - 0x7500; |
461 | } |
462 | |
463 | bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { |
464 | if ((dst & 0x1) == 0) |
465 | // Destination is ARM, if ARM caller then Src is already 4-byte aligned. |
466 | // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure |
467 | // destination will be 4 byte aligned. |
468 | src &= ~0x3; |
469 | else |
470 | // Bit 0 == 1 denotes Thumb state, it is not part of the range. |
471 | dst &= ~0x1; |
472 | |
473 | int64_t offset = dst - src; |
474 | switch (type) { |
475 | case R_ARM_PC24: |
476 | case R_ARM_PLT32: |
477 | case R_ARM_JUMP24: |
478 | case R_ARM_CALL: |
479 | return llvm::isInt<26>(x: offset); |
480 | case R_ARM_THM_JUMP19: |
481 | return llvm::isInt<21>(x: offset); |
482 | case R_ARM_THM_JUMP24: |
483 | case R_ARM_THM_CALL: |
484 | return config->armJ1J2BranchEncoding ? llvm::isInt<25>(x: offset) |
485 | : llvm::isInt<23>(x: offset); |
486 | default: |
487 | return true; |
488 | } |
489 | } |
490 | |
491 | // Helper to produce message text when LLD detects that a CALL relocation to |
492 | // a non STT_FUNC symbol that may result in incorrect interworking between ARM |
493 | // or Thumb. |
494 | static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) { |
495 | assert(!s.isFunc()); |
496 | const ErrorPlace place = getErrorPlace(loc); |
497 | std::string hint; |
498 | if (!place.srcLoc.empty()) |
499 | hint = "; " + place.srcLoc; |
500 | if (s.isSection()) { |
501 | // Section symbols must be defined and in a section. Users cannot change |
502 | // the type. Use the section name as getName() returns an empty string. |
503 | warn(msg: place.loc + "branch and link relocation: " + toString(type: relt) + |
504 | " to STT_SECTION symbol " + cast<Defined>(Val: s).section->name + |
505 | " ; interworking not performed" + hint); |
506 | } else { |
507 | // Warn with hint on how to alter the symbol type. |
508 | warn(msg: getErrorLocation(loc) + "branch and link relocation: " + |
509 | toString(type: relt) + " to non STT_FUNC symbol: " + s.getName() + |
510 | " interworking not performed; consider using directive '.type " + |
511 | s.getName() + |
512 | ", %function' to give symbol type STT_FUNC if interworking between " |
513 | "ARM and Thumb is required" + |
514 | hint); |
515 | } |
516 | } |
517 | |
518 | // Rotate a 32-bit unsigned value right by a specified amt of bits. |
519 | static uint32_t rotr32(uint32_t val, uint32_t amt) { |
520 | assert(amt < 32 && "Invalid rotate amount" ); |
521 | return (val >> amt) | (val << ((32 - amt) & 31)); |
522 | } |
523 | |
524 | static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group, |
525 | uint32_t val) { |
526 | uint32_t rem, lz; |
527 | do { |
528 | lz = llvm::countl_zero(Val: val) & ~1; |
529 | rem = val; |
530 | if (lz == 32) // implies rem == 0 |
531 | break; |
532 | val &= 0xffffff >> lz; |
533 | } while (group--); |
534 | return {rem, lz}; |
535 | } |
536 | |
537 | static void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
538 | int group, bool check) { |
539 | // ADD/SUB (immediate) add = bit23, sub = bit22 |
540 | // immediate field carries is a 12-bit modified immediate, made up of a 4-bit |
541 | // even rotate right and an 8-bit immediate. |
542 | uint32_t opcode = 0x00800000; |
543 | if (val >> 63) { |
544 | opcode = 0x00400000; |
545 | val = -val; |
546 | } |
547 | uint32_t imm, lz; |
548 | std::tie(args&: imm, args&: lz) = getRemAndLZForGroup(group, val); |
549 | uint32_t rot = 0; |
550 | if (lz < 24) { |
551 | imm = rotr32(val: imm, amt: 24 - lz); |
552 | rot = (lz + 8) << 7; |
553 | } |
554 | if (check && imm > 0xff) |
555 | error(msg: getErrorLocation(loc) + "unencodeable immediate " + Twine(val).str() + |
556 | " for relocation " + toString(type: rel.type)); |
557 | write32(p: loc, v: (read32(p: loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); |
558 | } |
559 | |
560 | static void encodeLdrGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
561 | int group) { |
562 | // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
563 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
564 | // bottom bit to recover S + A - P. |
565 | if (rel.sym->isFunc()) |
566 | val &= ~0x1; |
567 | // LDR (literal) u = bit23 |
568 | uint32_t opcode = 0x00800000; |
569 | if (val >> 63) { |
570 | opcode = 0x0; |
571 | val = -val; |
572 | } |
573 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
574 | checkUInt(loc, v: imm, n: 12, rel); |
575 | write32(p: loc, v: (read32(p: loc) & 0xff7ff000) | opcode | imm); |
576 | } |
577 | |
578 | static void encodeLdrsGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
579 | int group) { |
580 | // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
581 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
582 | // bottom bit to recover S + A - P. |
583 | if (rel.sym->isFunc()) |
584 | val &= ~0x1; |
585 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 |
586 | uint32_t opcode = 0x00800000; |
587 | if (val >> 63) { |
588 | opcode = 0x0; |
589 | val = -val; |
590 | } |
591 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
592 | checkUInt(loc, v: imm, n: 8, rel); |
593 | write32(p: loc, v: (read32(p: loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) | |
594 | (imm & 0xf)); |
595 | } |
596 | |
597 | void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { |
598 | switch (rel.type) { |
599 | case R_ARM_ABS32: |
600 | case R_ARM_BASE_PREL: |
601 | case R_ARM_GOTOFF32: |
602 | case R_ARM_GOT_BREL: |
603 | case R_ARM_GOT_PREL: |
604 | case R_ARM_REL32: |
605 | case R_ARM_RELATIVE: |
606 | case R_ARM_SBREL32: |
607 | case R_ARM_TARGET1: |
608 | case R_ARM_TARGET2: |
609 | case R_ARM_TLS_GD32: |
610 | case R_ARM_TLS_IE32: |
611 | case R_ARM_TLS_LDM32: |
612 | case R_ARM_TLS_LDO32: |
613 | case R_ARM_TLS_LE32: |
614 | case R_ARM_TLS_TPOFF32: |
615 | case R_ARM_TLS_DTPOFF32: |
616 | write32(p: loc, v: val); |
617 | break; |
618 | case R_ARM_PREL31: |
619 | checkInt(loc, v: val, n: 31, rel); |
620 | write32(p: loc, v: (read32(p: loc) & 0x80000000) | (val & ~0x80000000)); |
621 | break; |
622 | case R_ARM_CALL: { |
623 | // R_ARM_CALL is used for BL and BLX instructions, for symbols of type |
624 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
625 | // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is |
626 | // not of type STT_FUNC then we must preserve the original instruction. |
627 | assert(rel.sym); // R_ARM_CALL is always reached via relocate(). |
628 | bool bit0Thumb = val & 1; |
629 | bool isBlx = (read32(p: loc) & 0xfe000000) == 0xfa000000; |
630 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
631 | // even when type not STT_FUNC. |
632 | if (!rel.sym->isFunc() && isBlx != bit0Thumb) |
633 | stateChangeWarning(loc, relt: rel.type, s: *rel.sym); |
634 | if (rel.sym->isFunc() ? bit0Thumb : isBlx) { |
635 | // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' |
636 | checkInt(loc, v: val, n: 26, rel); |
637 | write32(p: loc, v: 0xfa000000 | // opcode |
638 | ((val & 2) << 23) | // H |
639 | ((val >> 2) & 0x00ffffff)); // imm24 |
640 | break; |
641 | } |
642 | // BLX (always unconditional) instruction to an ARM Target, select an |
643 | // unconditional BL. |
644 | write32(p: loc, v: 0xeb000000 | (read32(p: loc) & 0x00ffffff)); |
645 | // fall through as BL encoding is shared with B |
646 | } |
647 | [[fallthrough]]; |
648 | case R_ARM_JUMP24: |
649 | case R_ARM_PC24: |
650 | case R_ARM_PLT32: |
651 | checkInt(loc, v: val, n: 26, rel); |
652 | write32(p: loc, v: (read32(p: loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); |
653 | break; |
654 | case R_ARM_THM_JUMP8: |
655 | // We do a 9 bit check because val is right-shifted by 1 bit. |
656 | checkInt(loc, v: val, n: 9, rel); |
657 | write16(p: loc, v: (read32(p: loc) & 0xff00) | ((val >> 1) & 0x00ff)); |
658 | break; |
659 | case R_ARM_THM_JUMP11: |
660 | // We do a 12 bit check because val is right-shifted by 1 bit. |
661 | checkInt(loc, v: val, n: 12, rel); |
662 | write16(p: loc, v: (read32(p: loc) & 0xf800) | ((val >> 1) & 0x07ff)); |
663 | break; |
664 | case R_ARM_THM_JUMP19: |
665 | // Encoding T3: Val = S:J2:J1:imm6:imm11:0 |
666 | checkInt(loc, v: val, n: 21, rel); |
667 | write16(p: loc, |
668 | v: (read16(p: loc) & 0xfbc0) | // opcode cond |
669 | ((val >> 10) & 0x0400) | // S |
670 | ((val >> 12) & 0x003f)); // imm6 |
671 | write16(p: loc + 2, |
672 | v: 0x8000 | // opcode |
673 | ((val >> 8) & 0x0800) | // J2 |
674 | ((val >> 5) & 0x2000) | // J1 |
675 | ((val >> 1) & 0x07ff)); // imm11 |
676 | break; |
677 | case R_ARM_THM_CALL: { |
678 | // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type |
679 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
680 | // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is |
681 | // not of type STT_FUNC then we must preserve the original instruction. |
682 | // PLT entries are always ARM state so we know we need to interwork. |
683 | assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). |
684 | bool bit0Thumb = val & 1; |
685 | bool useThumb = bit0Thumb || useThumbPLTs(); |
686 | bool isBlx = (read16(p: loc + 2) & 0x1000) == 0; |
687 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
688 | // even when type not STT_FUNC. |
689 | if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb) |
690 | stateChangeWarning(loc, relt: rel.type, s: *rel.sym); |
691 | if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) { |
692 | // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As |
693 | // the BLX instruction may only be two byte aligned. This must be done |
694 | // before overflow check. |
695 | val = alignTo(Value: val, Align: 4); |
696 | write16(p: loc + 2, v: read16(p: loc + 2) & ~0x1000); |
697 | } else { |
698 | write16(p: loc + 2, v: (read16(p: loc + 2) & ~0x1000) | 1 << 12); |
699 | } |
700 | if (!config->armJ1J2BranchEncoding) { |
701 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
702 | // different encoding rules and range due to J1 and J2 always being 1. |
703 | checkInt(loc, v: val, n: 23, rel); |
704 | write16(p: loc, |
705 | v: 0xf000 | // opcode |
706 | ((val >> 12) & 0x07ff)); // imm11 |
707 | write16(p: loc + 2, |
708 | v: (read16(p: loc + 2) & 0xd000) | // opcode |
709 | 0x2800 | // J1 == J2 == 1 |
710 | ((val >> 1) & 0x07ff)); // imm11 |
711 | break; |
712 | } |
713 | } |
714 | // Fall through as rest of encoding is the same as B.W |
715 | [[fallthrough]]; |
716 | case R_ARM_THM_JUMP24: |
717 | // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 |
718 | checkInt(loc, v: val, n: 25, rel); |
719 | write16(p: loc, |
720 | v: 0xf000 | // opcode |
721 | ((val >> 14) & 0x0400) | // S |
722 | ((val >> 12) & 0x03ff)); // imm10 |
723 | write16(p: loc + 2, |
724 | v: (read16(p: loc + 2) & 0xd000) | // opcode |
725 | (((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1 |
726 | (((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2 |
727 | ((val >> 1) & 0x07ff)); // imm11 |
728 | break; |
729 | case R_ARM_MOVW_ABS_NC: |
730 | case R_ARM_MOVW_PREL_NC: |
731 | case R_ARM_MOVW_BREL_NC: |
732 | write32(p: loc, v: (read32(p: loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | |
733 | (val & 0x0fff)); |
734 | break; |
735 | case R_ARM_MOVT_ABS: |
736 | case R_ARM_MOVT_PREL: |
737 | case R_ARM_MOVT_BREL: |
738 | write32(p: loc, v: (read32(p: loc) & ~0x000f0fff) | |
739 | (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff)); |
740 | break; |
741 | case R_ARM_THM_MOVT_ABS: |
742 | case R_ARM_THM_MOVT_PREL: |
743 | case R_ARM_THM_MOVT_BREL: |
744 | // Encoding T1: A = imm4:i:imm3:imm8 |
745 | |
746 | write16(p: loc, |
747 | v: 0xf2c0 | // opcode |
748 | ((val >> 17) & 0x0400) | // i |
749 | ((val >> 28) & 0x000f)); // imm4 |
750 | |
751 | write16(p: loc + 2, |
752 | v: (read16(p: loc + 2) & 0x8f00) | // opcode |
753 | ((val >> 12) & 0x7000) | // imm3 |
754 | ((val >> 16) & 0x00ff)); // imm8 |
755 | break; |
756 | case R_ARM_THM_MOVW_ABS_NC: |
757 | case R_ARM_THM_MOVW_PREL_NC: |
758 | case R_ARM_THM_MOVW_BREL_NC: |
759 | // Encoding T3: A = imm4:i:imm3:imm8 |
760 | write16(p: loc, |
761 | v: 0xf240 | // opcode |
762 | ((val >> 1) & 0x0400) | // i |
763 | ((val >> 12) & 0x000f)); // imm4 |
764 | write16(p: loc + 2, |
765 | v: (read16(p: loc + 2) & 0x8f00) | // opcode |
766 | ((val << 4) & 0x7000) | // imm3 |
767 | (val & 0x00ff)); // imm8 |
768 | break; |
769 | case R_ARM_THM_ALU_ABS_G3: |
770 | write16(p: loc, v: (read16(p: loc) &~ 0x00ff) | ((val >> 24) & 0x00ff)); |
771 | break; |
772 | case R_ARM_THM_ALU_ABS_G2_NC: |
773 | write16(p: loc, v: (read16(p: loc) &~ 0x00ff) | ((val >> 16) & 0x00ff)); |
774 | break; |
775 | case R_ARM_THM_ALU_ABS_G1_NC: |
776 | write16(p: loc, v: (read16(p: loc) &~ 0x00ff) | ((val >> 8) & 0x00ff)); |
777 | break; |
778 | case R_ARM_THM_ALU_ABS_G0_NC: |
779 | write16(p: loc, v: (read16(p: loc) &~ 0x00ff) | (val & 0x00ff)); |
780 | break; |
781 | case R_ARM_ALU_PC_G0: |
782 | encodeAluGroup(loc, rel, val, group: 0, check: true); |
783 | break; |
784 | case R_ARM_ALU_PC_G0_NC: |
785 | encodeAluGroup(loc, rel, val, group: 0, check: false); |
786 | break; |
787 | case R_ARM_ALU_PC_G1: |
788 | encodeAluGroup(loc, rel, val, group: 1, check: true); |
789 | break; |
790 | case R_ARM_ALU_PC_G1_NC: |
791 | encodeAluGroup(loc, rel, val, group: 1, check: false); |
792 | break; |
793 | case R_ARM_ALU_PC_G2: |
794 | encodeAluGroup(loc, rel, val, group: 2, check: true); |
795 | break; |
796 | case R_ARM_LDR_PC_G0: |
797 | encodeLdrGroup(loc, rel, val, group: 0); |
798 | break; |
799 | case R_ARM_LDR_PC_G1: |
800 | encodeLdrGroup(loc, rel, val, group: 1); |
801 | break; |
802 | case R_ARM_LDR_PC_G2: |
803 | encodeLdrGroup(loc, rel, val, group: 2); |
804 | break; |
805 | case R_ARM_LDRS_PC_G0: |
806 | encodeLdrsGroup(loc, rel, val, group: 0); |
807 | break; |
808 | case R_ARM_LDRS_PC_G1: |
809 | encodeLdrsGroup(loc, rel, val, group: 1); |
810 | break; |
811 | case R_ARM_LDRS_PC_G2: |
812 | encodeLdrsGroup(loc, rel, val, group: 2); |
813 | break; |
814 | case R_ARM_THM_ALU_PREL_11_0: { |
815 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
816 | int64_t imm = val; |
817 | uint16_t sub = 0; |
818 | if (imm < 0) { |
819 | imm = -imm; |
820 | sub = 0x00a0; |
821 | } |
822 | checkUInt(loc, v: imm, n: 12, rel); |
823 | write16(p: loc, v: (read16(p: loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); |
824 | write16(p: loc + 2, |
825 | v: (read16(p: loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff)); |
826 | break; |
827 | } |
828 | case R_ARM_THM_PC8: |
829 | // ADR and LDR literal encoding T1 positive offset only imm8:00 |
830 | // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
831 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
832 | // bottom bit to recover S + A - Pa. |
833 | if (rel.sym->isFunc()) |
834 | val &= ~0x1; |
835 | checkUInt(loc, v: val, n: 10, rel); |
836 | checkAlignment(loc, v: val, n: 4, rel); |
837 | write16(p: loc, v: (read16(p: loc) & 0xff00) | (val & 0x3fc) >> 2); |
838 | break; |
839 | case R_ARM_THM_PC12: { |
840 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
841 | // imm12 is unsigned |
842 | // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
843 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
844 | // bottom bit to recover S + A - Pa. |
845 | if (rel.sym->isFunc()) |
846 | val &= ~0x1; |
847 | int64_t imm12 = val; |
848 | uint16_t u = 0x0080; |
849 | if (imm12 < 0) { |
850 | imm12 = -imm12; |
851 | u = 0; |
852 | } |
853 | checkUInt(loc, v: imm12, n: 12, rel); |
854 | write16(p: loc, v: read16(p: loc) | u); |
855 | write16(p: loc + 2, v: (read16(p: loc + 2) & 0xf000) | imm12); |
856 | break; |
857 | } |
858 | default: |
859 | llvm_unreachable("unknown relocation" ); |
860 | } |
861 | } |
862 | |
863 | int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { |
864 | switch (type) { |
865 | default: |
866 | internalLinkerError(loc: getErrorLocation(loc: buf), |
867 | msg: "cannot read addend for relocation " + toString(type)); |
868 | return 0; |
869 | case R_ARM_ABS32: |
870 | case R_ARM_BASE_PREL: |
871 | case R_ARM_GLOB_DAT: |
872 | case R_ARM_GOTOFF32: |
873 | case R_ARM_GOT_BREL: |
874 | case R_ARM_GOT_PREL: |
875 | case R_ARM_IRELATIVE: |
876 | case R_ARM_REL32: |
877 | case R_ARM_RELATIVE: |
878 | case R_ARM_SBREL32: |
879 | case R_ARM_TARGET1: |
880 | case R_ARM_TARGET2: |
881 | case R_ARM_TLS_DTPMOD32: |
882 | case R_ARM_TLS_DTPOFF32: |
883 | case R_ARM_TLS_GD32: |
884 | case R_ARM_TLS_IE32: |
885 | case R_ARM_TLS_LDM32: |
886 | case R_ARM_TLS_LE32: |
887 | case R_ARM_TLS_LDO32: |
888 | case R_ARM_TLS_TPOFF32: |
889 | return SignExtend64<32>(x: read32(p: buf)); |
890 | case R_ARM_PREL31: |
891 | return SignExtend64<31>(x: read32(p: buf)); |
892 | case R_ARM_CALL: |
893 | case R_ARM_JUMP24: |
894 | case R_ARM_PC24: |
895 | case R_ARM_PLT32: |
896 | return SignExtend64<26>(x: read32(p: buf) << 2); |
897 | case R_ARM_THM_JUMP8: |
898 | return SignExtend64<9>(x: read16(p: buf) << 1); |
899 | case R_ARM_THM_JUMP11: |
900 | return SignExtend64<12>(x: read16(p: buf) << 1); |
901 | case R_ARM_THM_JUMP19: { |
902 | // Encoding T3: A = S:J2:J1:imm10:imm6:0 |
903 | uint16_t hi = read16(p: buf); |
904 | uint16_t lo = read16(p: buf + 2); |
905 | return SignExtend64<20>(x: ((hi & 0x0400) << 10) | // S |
906 | ((lo & 0x0800) << 8) | // J2 |
907 | ((lo & 0x2000) << 5) | // J1 |
908 | ((hi & 0x003f) << 12) | // imm6 |
909 | ((lo & 0x07ff) << 1)); // imm11:0 |
910 | } |
911 | case R_ARM_THM_CALL: |
912 | if (!config->armJ1J2BranchEncoding) { |
913 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
914 | // different encoding rules and range due to J1 and J2 always being 1. |
915 | uint16_t hi = read16(p: buf); |
916 | uint16_t lo = read16(p: buf + 2); |
917 | return SignExtend64<22>(x: ((hi & 0x7ff) << 12) | // imm11 |
918 | ((lo & 0x7ff) << 1)); // imm11:0 |
919 | break; |
920 | } |
921 | [[fallthrough]]; |
922 | case R_ARM_THM_JUMP24: { |
923 | // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0 |
924 | // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S) |
925 | uint16_t hi = read16(p: buf); |
926 | uint16_t lo = read16(p: buf + 2); |
927 | return SignExtend64<24>(x: ((hi & 0x0400) << 14) | // S |
928 | (~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1 |
929 | (~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2 |
930 | ((hi & 0x003ff) << 12) | // imm0 |
931 | ((lo & 0x007ff) << 1)); // imm11:0 |
932 | } |
933 | // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and |
934 | // MOVT is in the range -32768 <= A < 32768 |
935 | case R_ARM_MOVW_ABS_NC: |
936 | case R_ARM_MOVT_ABS: |
937 | case R_ARM_MOVW_PREL_NC: |
938 | case R_ARM_MOVT_PREL: |
939 | case R_ARM_MOVW_BREL_NC: |
940 | case R_ARM_MOVT_BREL: { |
941 | uint64_t val = read32(p: buf) & 0x000f0fff; |
942 | return SignExtend64<16>(x: ((val & 0x000f0000) >> 4) | (val & 0x00fff)); |
943 | } |
944 | case R_ARM_THM_MOVW_ABS_NC: |
945 | case R_ARM_THM_MOVT_ABS: |
946 | case R_ARM_THM_MOVW_PREL_NC: |
947 | case R_ARM_THM_MOVT_PREL: |
948 | case R_ARM_THM_MOVW_BREL_NC: |
949 | case R_ARM_THM_MOVT_BREL: { |
950 | // Encoding T3: A = imm4:i:imm3:imm8 |
951 | uint16_t hi = read16(p: buf); |
952 | uint16_t lo = read16(p: buf + 2); |
953 | return SignExtend64<16>(x: ((hi & 0x000f) << 12) | // imm4 |
954 | ((hi & 0x0400) << 1) | // i |
955 | ((lo & 0x7000) >> 4) | // imm3 |
956 | (lo & 0x00ff)); // imm8 |
957 | } |
958 | case R_ARM_THM_ALU_ABS_G0_NC: |
959 | case R_ARM_THM_ALU_ABS_G1_NC: |
960 | case R_ARM_THM_ALU_ABS_G2_NC: |
961 | case R_ARM_THM_ALU_ABS_G3: |
962 | return read16(p: buf) & 0xff; |
963 | case R_ARM_ALU_PC_G0: |
964 | case R_ARM_ALU_PC_G0_NC: |
965 | case R_ARM_ALU_PC_G1: |
966 | case R_ARM_ALU_PC_G1_NC: |
967 | case R_ARM_ALU_PC_G2: { |
968 | // 12-bit immediate is a modified immediate made up of a 4-bit even |
969 | // right rotation and 8-bit constant. After the rotation the value |
970 | // is zero-extended. When bit 23 is set the instruction is an add, when |
971 | // bit 22 is set it is a sub. |
972 | uint32_t instr = read32(p: buf); |
973 | uint32_t val = rotr32(val: instr & 0xff, amt: ((instr & 0xf00) >> 8) * 2); |
974 | return (instr & 0x00400000) ? -val : val; |
975 | } |
976 | case R_ARM_LDR_PC_G0: |
977 | case R_ARM_LDR_PC_G1: |
978 | case R_ARM_LDR_PC_G2: { |
979 | // ADR (literal) add = bit23, sub = bit22 |
980 | // LDR (literal) u = bit23 unsigned imm12 |
981 | bool u = read32(p: buf) & 0x00800000; |
982 | uint32_t imm12 = read32(p: buf) & 0xfff; |
983 | return u ? imm12 : -imm12; |
984 | } |
985 | case R_ARM_LDRS_PC_G0: |
986 | case R_ARM_LDRS_PC_G1: |
987 | case R_ARM_LDRS_PC_G2: { |
988 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8 |
989 | uint32_t opcode = read32(p: buf); |
990 | bool u = opcode & 0x00800000; |
991 | uint32_t imm4l = opcode & 0xf; |
992 | uint32_t imm4h = (opcode & 0xf00) >> 4; |
993 | return u ? (imm4h | imm4l) : -(imm4h | imm4l); |
994 | } |
995 | case R_ARM_THM_ALU_PREL_11_0: { |
996 | // Thumb2 ADR, which is an alias for a sub or add instruction with an |
997 | // unsigned immediate. |
998 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
999 | uint16_t hi = read16(p: buf); |
1000 | uint16_t lo = read16(p: buf + 2); |
1001 | uint64_t imm = (hi & 0x0400) << 1 | // i |
1002 | (lo & 0x7000) >> 4 | // imm3 |
1003 | (lo & 0x00ff); // imm8 |
1004 | // For sub, addend is negative, add is positive. |
1005 | return (hi & 0x00f0) ? -imm : imm; |
1006 | } |
1007 | case R_ARM_THM_PC8: |
1008 | // ADR and LDR (literal) encoding T1 |
1009 | // From ELF for the ARM Architecture the initial signed addend is formed |
1010 | // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) ā 4) |
1011 | // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff |
1012 | return ((((read16(p: buf) & 0xff) << 2) + 4) & 0x3ff) - 4; |
1013 | case R_ARM_THM_PC12: { |
1014 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
1015 | bool u = read16(p: buf) & 0x0080; |
1016 | uint64_t imm12 = read16(p: buf + 2) & 0x0fff; |
1017 | return u ? imm12 : -imm12; |
1018 | } |
1019 | case R_ARM_NONE: |
1020 | case R_ARM_V4BX: |
1021 | case R_ARM_JUMP_SLOT: |
1022 | // These relocations are defined as not having an implicit addend. |
1023 | return 0; |
1024 | } |
1025 | } |
1026 | |
1027 | static bool isArmMapSymbol(const Symbol *b) { |
1028 | return b->getName() == "$a" || b->getName().starts_with(Prefix: "$a." ); |
1029 | } |
1030 | |
1031 | static bool isThumbMapSymbol(const Symbol *s) { |
1032 | return s->getName() == "$t" || s->getName().starts_with(Prefix: "$t." ); |
1033 | } |
1034 | |
1035 | static bool isDataMapSymbol(const Symbol *b) { |
1036 | return b->getName() == "$d" || b->getName().starts_with(Prefix: "$d." ); |
1037 | } |
1038 | |
1039 | void elf::sortArmMappingSymbols() { |
1040 | // For each input section make sure the mapping symbols are sorted in |
1041 | // ascending order. |
1042 | for (auto &kv : sectionMap) { |
1043 | SmallVector<const Defined *, 0> &mapSyms = kv.second; |
1044 | llvm::stable_sort(Range&: mapSyms, C: [](const Defined *a, const Defined *b) { |
1045 | return a->value < b->value; |
1046 | }); |
1047 | } |
1048 | } |
1049 | |
1050 | void elf::addArmInputSectionMappingSymbols() { |
1051 | // Collect mapping symbols for every executable input sections. |
1052 | // The linker generated mapping symbols for all the synthetic |
1053 | // sections are adding into the sectionmap through the function |
1054 | // addArmSyntheitcSectionMappingSymbol. |
1055 | for (ELFFileBase *file : ctx.objectFiles) { |
1056 | for (Symbol *sym : file->getLocalSymbols()) { |
1057 | auto *def = dyn_cast<Defined>(Val: sym); |
1058 | if (!def) |
1059 | continue; |
1060 | if (!isArmMapSymbol(b: def) && !isDataMapSymbol(b: def) && |
1061 | !isThumbMapSymbol(s: def)) |
1062 | continue; |
1063 | if (auto *sec = cast_if_present<InputSection>(Val: def->section)) |
1064 | if (sec->flags & SHF_EXECINSTR) |
1065 | sectionMap[sec].push_back(Elt: def); |
1066 | } |
1067 | } |
1068 | } |
1069 | |
1070 | // Synthetic sections are not backed by an ELF file where we can access the |
1071 | // symbol table, instead mapping symbols added to synthetic sections are stored |
1072 | // in the synthetic symbol table. Due to the presence of strip (--strip-all), |
1073 | // we can not rely on the synthetic symbol table retaining the mapping symbols. |
1074 | // Instead we record the mapping symbols locally. |
1075 | void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) { |
1076 | if (!isArmMapSymbol(b: sym) && !isDataMapSymbol(b: sym) && !isThumbMapSymbol(s: sym)) |
1077 | return; |
1078 | if (auto *sec = cast_if_present<InputSection>(Val: sym->section)) |
1079 | if (sec->flags & SHF_EXECINSTR) |
1080 | sectionMap[sec].push_back(Elt: sym); |
1081 | } |
1082 | |
1083 | static void toLittleEndianInstructions(uint8_t *buf, uint64_t start, |
1084 | uint64_t end, uint64_t width) { |
1085 | CodeState curState = static_cast<CodeState>(width); |
1086 | if (curState == CodeState::Arm) |
1087 | for (uint64_t i = start; i < end; i += width) |
1088 | write32le(P: buf + i, V: read32(p: buf + i)); |
1089 | |
1090 | if (curState == CodeState::Thumb) |
1091 | for (uint64_t i = start; i < end; i += width) |
1092 | write16le(P: buf + i, V: read16(p: buf + i)); |
1093 | } |
1094 | |
1095 | // Arm BE8 big endian format requires instructions to be little endian, with |
1096 | // the initial contents big-endian. Convert the big-endian instructions to |
1097 | // little endian leaving literal data untouched. We use mapping symbols to |
1098 | // identify half open intervals of Arm code [$a, non $a) and Thumb code |
1099 | // [$t, non $t) and convert these to little endian a word or half word at a |
1100 | // time respectively. |
1101 | void elf::convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf) { |
1102 | if (!sectionMap.contains(Val: sec)) |
1103 | return; |
1104 | |
1105 | SmallVector<const Defined *, 0> &mapSyms = sectionMap[sec]; |
1106 | |
1107 | if (mapSyms.empty()) |
1108 | return; |
1109 | |
1110 | CodeState curState = CodeState::Data; |
1111 | uint64_t start = 0, width = 0, size = sec->getSize(); |
1112 | for (auto &msym : mapSyms) { |
1113 | CodeState newState = CodeState::Data; |
1114 | if (isThumbMapSymbol(s: msym)) |
1115 | newState = CodeState::Thumb; |
1116 | else if (isArmMapSymbol(b: msym)) |
1117 | newState = CodeState::Arm; |
1118 | |
1119 | if (newState == curState) |
1120 | continue; |
1121 | |
1122 | if (curState != CodeState::Data) { |
1123 | width = static_cast<uint64_t>(curState); |
1124 | toLittleEndianInstructions(buf, start, end: msym->value, width); |
1125 | } |
1126 | start = msym->value; |
1127 | curState = newState; |
1128 | } |
1129 | |
1130 | // Passed last mapping symbol, may need to reverse |
1131 | // up to end of section. |
1132 | if (curState != CodeState::Data) { |
1133 | width = static_cast<uint64_t>(curState); |
1134 | toLittleEndianInstructions(buf, start, end: size, width); |
1135 | } |
1136 | } |
1137 | |
1138 | // The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts; |
1139 | // the non-secure and secure states with the secure state inaccessible from the |
1140 | // non-secure state, apart from an area of memory in secure state called the |
1141 | // secure gateway which is accessible from non-secure state. The secure gateway |
1142 | // contains one or more entry points which must start with a landing pad |
1143 | // instruction SG. Arm recommends that the secure gateway consists only of |
1144 | // secure gateway veneers, which are made up of a SG instruction followed by a |
1145 | // branch to the destination in secure state. Full details can be found in Arm |
1146 | // v8-M Security Extensions Requirements on Development Tools. |
1147 | // |
1148 | // The CMSE model of software development requires the non-secure and secure |
1149 | // states to be developed as two separate programs. The non-secure developer is |
1150 | // provided with an import library defining symbols describing the entry points |
1151 | // in the secure gateway. No additional linker support is required for the |
1152 | // non-secure state. |
1153 | // |
1154 | // Development of the secure state requires linker support to manage the secure |
1155 | // gateway veneers. The management consists of: |
1156 | // - Creation of new secure gateway veneers based on symbol conventions. |
1157 | // - Checking the address of existing secure gateway veneers. |
1158 | // - Warning when existing secure gateway veneers removed. |
1159 | // |
1160 | // The secure gateway veneers are created in an import library, which is just an |
1161 | // ELF object with a symbol table. The import library is controlled by two |
1162 | // command line options: |
1163 | // --in-implib (specify an input import library from a previous revision of the |
1164 | // program). |
1165 | // --out-implib (specify an output import library to be created by the linker). |
1166 | // |
1167 | // The input import library is used to manage consistency of the secure entry |
1168 | // points. The output import library is for new and updated secure entry points. |
1169 | // |
1170 | // The symbol convention that identifies secure entry functions is the prefix |
1171 | // __acle_se_ for a symbol called name the linker is expected to create a secure |
1172 | // gateway veneer if symbols __acle_se_name and name have the same address. |
1173 | // After creating a secure gateway veneer the symbol name labels the secure |
1174 | // gateway veneer and the __acle_se_name labels the function definition. |
1175 | // |
1176 | // The LLD implementation: |
1177 | // - Reads an existing import library with importCmseSymbols(). |
1178 | // - Determines which new secure gateway veneers to create and redirects calls |
1179 | // within the secure state to the __acle_se_ prefixed symbol with |
1180 | // processArmCmseSymbols(). |
1181 | // - Models the SG veneers as a synthetic section. |
1182 | |
1183 | // Initialize symbols. symbols is a parallel array to the corresponding ELF |
1184 | // symbol table. |
1185 | template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() { |
1186 | ArrayRef<Elf_Sym> eSyms = getELFSyms<ELFT>(); |
1187 | // Error for local symbols. The symbol at index 0 is LOCAL. So skip it. |
1188 | for (size_t i = 1, end = firstGlobal; i != end; ++i) { |
1189 | errorOrWarn("CMSE symbol '" + CHECK(eSyms[i].getName(stringTable), this) + |
1190 | "' in import library '" + toString(this) + "' is not global" ); |
1191 | } |
1192 | |
1193 | for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { |
1194 | const Elf_Sym &eSym = eSyms[i]; |
1195 | Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>()); |
1196 | |
1197 | // Initialize symbol fields. |
1198 | memset(s: sym, c: 0, n: sizeof(Symbol)); |
1199 | sym->setName(CHECK(eSyms[i].getName(stringTable), this)); |
1200 | sym->value = eSym.st_value; |
1201 | sym->size = eSym.st_size; |
1202 | sym->type = eSym.getType(); |
1203 | sym->binding = eSym.getBinding(); |
1204 | sym->stOther = eSym.st_other; |
1205 | |
1206 | if (eSym.st_shndx != SHN_ABS) { |
1207 | error("CMSE symbol '" + sym->getName() + "' in import library '" + |
1208 | toString(this) + "' is not absolute" ); |
1209 | continue; |
1210 | } |
1211 | |
1212 | if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) { |
1213 | error("CMSE symbol '" + sym->getName() + "' in import library '" + |
1214 | toString(this) + "' is not a Thumb function definition" ); |
1215 | continue; |
1216 | } |
1217 | |
1218 | if (symtab.cmseImportLib.count(Key: sym->getName())) { |
1219 | error("CMSE symbol '" + sym->getName() + |
1220 | "' is multiply defined in import library '" + toString(this) + "'" ); |
1221 | continue; |
1222 | } |
1223 | |
1224 | if (eSym.st_size != ACLESESYM_SIZE) { |
1225 | warn("CMSE symbol '" + sym->getName() + "' in import library '" + |
1226 | toString(this) + "' does not have correct size of " + |
1227 | Twine(ACLESESYM_SIZE) + " bytes" ); |
1228 | } |
1229 | |
1230 | symtab.cmseImportLib[sym->getName()] = sym; |
1231 | } |
1232 | } |
1233 | |
1234 | // Check symbol attributes of the acleSeSym, sym pair. |
1235 | // Both symbols should be global/weak Thumb code symbol definitions. |
1236 | static std::string checkCmseSymAttributes(Symbol *acleSeSym, Symbol *sym) { |
1237 | auto check = [](Symbol *s, StringRef type) -> std::optional<std::string> { |
1238 | auto d = dyn_cast_or_null<Defined>(Val: s); |
1239 | if (!(d && d->isFunc() && (d->value & 1))) |
1240 | return (Twine(toString(f: s->file)) + ": cmse " + type + " symbol '" + |
1241 | s->getName() + "' is not a Thumb function definition" ) |
1242 | .str(); |
1243 | if (!d->section) |
1244 | return (Twine(toString(f: s->file)) + ": cmse " + type + " symbol '" + |
1245 | s->getName() + "' cannot be an absolute symbol" ) |
1246 | .str(); |
1247 | return std::nullopt; |
1248 | }; |
1249 | for (auto [sym, type] : |
1250 | {std::make_pair(x&: acleSeSym, y: "special" ), std::make_pair(x&: sym, y: "entry" )}) |
1251 | if (auto err = check(sym, type)) |
1252 | return *err; |
1253 | return "" ; |
1254 | } |
1255 | |
1256 | // Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M |
1257 | // Security Extensions specification. |
1258 | // 1) <sym> : A standard function name. |
1259 | // 2) __acle_se_<sym> : A special symbol that prefixes the standard function |
1260 | // name with __acle_se_. |
1261 | // Both these symbols are Thumb function symbols with external linkage. |
1262 | // <sym> may be redefined in .gnu.sgstubs. |
1263 | void elf::processArmCmseSymbols() { |
1264 | if (!config->cmseImplib) |
1265 | return; |
1266 | // Only symbols with external linkage end up in symtab, so no need to do |
1267 | // linkage checks. Only check symbol type. |
1268 | for (Symbol *acleSeSym : symtab.getSymbols()) { |
1269 | if (!acleSeSym->getName().starts_with(Prefix: ACLESESYM_PREFIX)) |
1270 | continue; |
1271 | // If input object build attributes do not support CMSE, error and disable |
1272 | // further scanning for <sym>, __acle_se_<sym> pairs. |
1273 | if (!config->armCMSESupport) { |
1274 | error(msg: "CMSE is only supported by ARMv8-M architecture or later" ); |
1275 | config->cmseImplib = false; |
1276 | break; |
1277 | } |
1278 | |
1279 | // Try to find the associated symbol definition. |
1280 | // Symbol must have external linkage. |
1281 | StringRef name = acleSeSym->getName().substr(Start: std::strlen(s: ACLESESYM_PREFIX)); |
1282 | Symbol *sym = symtab.find(name); |
1283 | if (!sym) { |
1284 | error(msg: toString(f: acleSeSym->file) + ": cmse special symbol '" + |
1285 | acleSeSym->getName() + |
1286 | "' detected, but no associated entry function definition '" + name + |
1287 | "' with external linkage found" ); |
1288 | continue; |
1289 | } |
1290 | |
1291 | std::string errMsg = checkCmseSymAttributes(acleSeSym, sym); |
1292 | if (!errMsg.empty()) { |
1293 | error(msg: errMsg); |
1294 | continue; |
1295 | } |
1296 | |
1297 | // <sym> may be redefined later in the link in .gnu.sgstubs |
1298 | symtab.cmseSymMap[name] = {.acleSeSym: acleSeSym, .sym: sym}; |
1299 | } |
1300 | |
1301 | // If this is an Arm CMSE secure app, replace references to entry symbol <sym> |
1302 | // with its corresponding special symbol __acle_se_<sym>. |
1303 | parallelForEach(R&: ctx.objectFiles, Fn: [&](InputFile *file) { |
1304 | MutableArrayRef<Symbol *> syms = file->getMutableSymbols(); |
1305 | for (size_t i = 0, e = syms.size(); i != e; ++i) { |
1306 | StringRef symName = syms[i]->getName(); |
1307 | if (symtab.cmseSymMap.count(Key: symName)) |
1308 | syms[i] = symtab.cmseSymMap[symName].acleSeSym; |
1309 | } |
1310 | }); |
1311 | } |
1312 | |
1313 | class elf::ArmCmseSGVeneer { |
1314 | public: |
1315 | ArmCmseSGVeneer(Symbol *sym, Symbol *acleSeSym, |
1316 | std::optional<uint64_t> addr = std::nullopt) |
1317 | : sym(sym), acleSeSym(acleSeSym), entAddr{addr} {} |
1318 | static const size_t size{ACLESESYM_SIZE}; |
1319 | const std::optional<uint64_t> getAddr() const { return entAddr; }; |
1320 | |
1321 | Symbol *sym; |
1322 | Symbol *acleSeSym; |
1323 | uint64_t offset = 0; |
1324 | |
1325 | private: |
1326 | const std::optional<uint64_t> entAddr; |
1327 | }; |
1328 | |
1329 | ArmCmseSGSection::ArmCmseSGSection() |
1330 | : SyntheticSection(llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR, |
1331 | llvm::ELF::SHT_PROGBITS, |
1332 | /*alignment=*/32, ".gnu.sgstubs" ) { |
1333 | entsize = ACLESESYM_SIZE; |
1334 | // The range of addresses used in the CMSE import library should be fixed. |
1335 | for (auto &[_, sym] : symtab.cmseImportLib) { |
1336 | if (impLibMaxAddr <= sym->value) |
1337 | impLibMaxAddr = sym->value + sym->size; |
1338 | } |
1339 | if (symtab.cmseSymMap.empty()) |
1340 | return; |
1341 | addMappingSymbol(); |
1342 | for (auto &[_, entryFunc] : symtab.cmseSymMap) |
1343 | addSGVeneer(sym: cast<Defined>(Val: entryFunc.acleSeSym), |
1344 | ext_sym: cast<Defined>(Val: entryFunc.sym)); |
1345 | for (auto &[_, sym] : symtab.cmseImportLib) { |
1346 | if (!symtab.inCMSEOutImpLib.count(Key: sym->getName())) |
1347 | warn(msg: "entry function '" + sym->getName() + |
1348 | "' from CMSE import library is not present in secure application" ); |
1349 | } |
1350 | |
1351 | if (!symtab.cmseImportLib.empty() && config->cmseOutputLib.empty()) { |
1352 | for (auto &[_, entryFunc] : symtab.cmseSymMap) { |
1353 | Symbol *sym = entryFunc.sym; |
1354 | if (!symtab.inCMSEOutImpLib.count(Key: sym->getName())) |
1355 | warn(msg: "new entry function '" + sym->getName() + |
1356 | "' introduced but no output import library specified" ); |
1357 | } |
1358 | } |
1359 | } |
1360 | |
1361 | void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) { |
1362 | entries.emplace_back(Args&: acleSeSym, Args&: sym); |
1363 | if (symtab.cmseImportLib.count(Key: sym->getName())) |
1364 | symtab.inCMSEOutImpLib[sym->getName()] = true; |
1365 | // Symbol addresses different, nothing to do. |
1366 | if (acleSeSym->file != sym->file || |
1367 | cast<Defined>(Val&: *acleSeSym).value != cast<Defined>(Val&: *sym).value) |
1368 | return; |
1369 | // Only secure symbols with values equal to that of it's non-secure |
1370 | // counterpart needs to be in the .gnu.sgstubs section. |
1371 | ArmCmseSGVeneer *ss = nullptr; |
1372 | if (symtab.cmseImportLib.count(Key: sym->getName())) { |
1373 | Defined *impSym = symtab.cmseImportLib[sym->getName()]; |
1374 | ss = make<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym, args&: impSym->value); |
1375 | } else { |
1376 | ss = make<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym); |
1377 | ++newEntries; |
1378 | } |
1379 | sgVeneers.emplace_back(Args&: ss); |
1380 | } |
1381 | |
1382 | void ArmCmseSGSection::writeTo(uint8_t *buf) { |
1383 | for (ArmCmseSGVeneer *s : sgVeneers) { |
1384 | uint8_t *p = buf + s->offset; |
1385 | write16(p: p + 0, v: 0xe97f); // SG |
1386 | write16(p: p + 2, v: 0xe97f); |
1387 | write16(p: p + 4, v: 0xf000); // B.W S |
1388 | write16(p: p + 6, v: 0xb000); |
1389 | target->relocateNoSym(loc: p + 4, type: R_ARM_THM_JUMP24, |
1390 | val: s->acleSeSym->getVA() - |
1391 | (getVA() + s->offset + s->size)); |
1392 | } |
1393 | } |
1394 | |
1395 | void ArmCmseSGSection::addMappingSymbol() { |
1396 | addSyntheticLocal(name: "$t" , type: STT_NOTYPE, /*off=*/value: 0, /*size=*/0, section&: *this); |
1397 | } |
1398 | |
1399 | size_t ArmCmseSGSection::getSize() const { |
1400 | if (sgVeneers.empty()) |
1401 | return (impLibMaxAddr ? impLibMaxAddr - getVA() : 0) + newEntries * entsize; |
1402 | |
1403 | return entries.size() * entsize; |
1404 | } |
1405 | |
1406 | void ArmCmseSGSection::finalizeContents() { |
1407 | if (sgVeneers.empty()) |
1408 | return; |
1409 | |
1410 | auto it = |
1411 | std::stable_partition(first: sgVeneers.begin(), last: sgVeneers.end(), |
1412 | pred: [](auto *i) { return i->getAddr().has_value(); }); |
1413 | std::sort(first: sgVeneers.begin(), last: it, comp: [](auto *a, auto *b) { |
1414 | return a->getAddr().value() < b->getAddr().value(); |
1415 | }); |
1416 | // This is the partition of the veneers with fixed addresses. |
1417 | uint64_t addr = (*sgVeneers.begin())->getAddr().has_value() |
1418 | ? (*sgVeneers.begin())->getAddr().value() |
1419 | : getVA(); |
1420 | // Check if the start address of '.gnu.sgstubs' correspond to the |
1421 | // linker-synthesized veneer with the lowest address. |
1422 | if ((getVA() & ~1) != (addr & ~1)) { |
1423 | error(msg: "start address of '.gnu.sgstubs' is different from previous link" ); |
1424 | return; |
1425 | } |
1426 | |
1427 | for (size_t i = 0; i < sgVeneers.size(); ++i) { |
1428 | ArmCmseSGVeneer *s = sgVeneers[i]; |
1429 | s->offset = i * s->size; |
1430 | Defined(file, StringRef(), s->sym->binding, s->sym->stOther, s->sym->type, |
1431 | s->offset | 1, s->size, this) |
1432 | .overwrite(sym&: *s->sym); |
1433 | } |
1434 | } |
1435 | |
1436 | // Write the CMSE import library to disk. |
1437 | // The CMSE import library is a relocatable object with only a symbol table. |
1438 | // The symbols are copies of the (absolute) symbols of the secure gateways |
1439 | // in the executable output by this link. |
1440 | // See ArmĀ® v8-M Security Extensions: Requirements on Development Tools |
1441 | // https://developer.arm.com/documentation/ecm0359818/latest |
1442 | template <typename ELFT> void elf::writeARMCmseImportLib() { |
1443 | StringTableSection *shstrtab = |
1444 | make<StringTableSection>(args: ".shstrtab" , /*dynamic=*/args: false); |
1445 | StringTableSection *strtab = |
1446 | make<StringTableSection>(args: ".strtab" , /*dynamic=*/args: false); |
1447 | SymbolTableBaseSection *impSymTab = make<SymbolTableSection<ELFT>>(*strtab); |
1448 | |
1449 | SmallVector<std::pair<OutputSection *, SyntheticSection *>, 0> osIsPairs; |
1450 | osIsPairs.emplace_back(Args: make<OutputSection>(args&: strtab->name, args: 0, args: 0), Args&: strtab); |
1451 | osIsPairs.emplace_back(Args: make<OutputSection>(args&: impSymTab->name, args: 0, args: 0), Args&: impSymTab); |
1452 | osIsPairs.emplace_back(Args: make<OutputSection>(args&: shstrtab->name, args: 0, args: 0), Args&: shstrtab); |
1453 | |
1454 | std::sort(symtab.cmseSymMap.begin(), symtab.cmseSymMap.end(), |
1455 | [](const auto &a, const auto &b) -> bool { |
1456 | return a.second.sym->getVA() < b.second.sym->getVA(); |
1457 | }); |
1458 | // Copy the secure gateway entry symbols to the import library symbol table. |
1459 | for (auto &p : symtab.cmseSymMap) { |
1460 | Defined *d = cast<Defined>(Val: p.second.sym); |
1461 | impSymTab->addSymbol(sym: makeDefined( |
1462 | args&: ctx.internalFile, args: d->getName(), args: d->computeBinding(), |
1463 | /*stOther=*/args: 0, args: STT_FUNC, args: d->getVA(), args: d->getSize(), args: nullptr)); |
1464 | } |
1465 | |
1466 | size_t idx = 0; |
1467 | uint64_t off = sizeof(typename ELFT::Ehdr); |
1468 | for (auto &[osec, isec] : osIsPairs) { |
1469 | osec->sectionIndex = ++idx; |
1470 | osec->recordSection(isec); |
1471 | osec->finalizeInputSections(); |
1472 | osec->shName = shstrtab->addString(s: osec->name); |
1473 | osec->size = isec->getSize(); |
1474 | isec->finalizeContents(); |
1475 | osec->offset = alignToPowerOf2(Value: off, Align: osec->addralign); |
1476 | off = osec->offset + osec->size; |
1477 | } |
1478 | |
1479 | const uint64_t = alignToPowerOf2(Value: off, Align: config->wordsize); |
1480 | const auto shnum = osIsPairs.size() + 1; |
1481 | const uint64_t fileSize = |
1482 | sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr); |
1483 | const unsigned flags = |
1484 | config->mmapOutputFile ? 0 : (unsigned)FileOutputBuffer::F_no_mmap; |
1485 | unlinkAsync(path: config->cmseOutputLib); |
1486 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1487 | FileOutputBuffer::create(FilePath: config->cmseOutputLib, Size: fileSize, Flags: flags); |
1488 | if (!bufferOrErr) { |
1489 | error(msg: "failed to open " + config->cmseOutputLib + ": " + |
1490 | llvm::toString(E: bufferOrErr.takeError())); |
1491 | return; |
1492 | } |
1493 | |
1494 | // Write the ELF Header |
1495 | std::unique_ptr<FileOutputBuffer> &buffer = *bufferOrErr; |
1496 | uint8_t *const buf = buffer->getBufferStart(); |
1497 | memcpy(dest: buf, src: "\177ELF" , n: 4); |
1498 | auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf); |
1499 | eHdr->e_type = ET_REL; |
1500 | eHdr->e_entry = 0; |
1501 | eHdr->e_shoff = sectionHeaderOff; |
1502 | eHdr->e_ident[EI_CLASS] = ELFCLASS32; |
1503 | eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB; |
1504 | eHdr->e_ident[EI_VERSION] = EV_CURRENT; |
1505 | eHdr->e_ident[EI_OSABI] = config->osabi; |
1506 | eHdr->e_ident[EI_ABIVERSION] = 0; |
1507 | eHdr->e_machine = EM_ARM; |
1508 | eHdr->e_version = EV_CURRENT; |
1509 | eHdr->e_flags = config->eflags; |
1510 | eHdr->e_ehsize = sizeof(typename ELFT::Ehdr); |
1511 | eHdr->e_phnum = 0; |
1512 | eHdr->e_shentsize = sizeof(typename ELFT::Shdr); |
1513 | eHdr->e_phoff = 0; |
1514 | eHdr->e_phentsize = 0; |
1515 | eHdr->e_shnum = shnum; |
1516 | eHdr->e_shstrndx = shstrtab->getParent()->sectionIndex; |
1517 | |
1518 | // Write the section header table. |
1519 | auto *sHdrs = reinterpret_cast<typename ELFT::Shdr *>(buf + eHdr->e_shoff); |
1520 | for (auto &[osec, _] : osIsPairs) |
1521 | osec->template writeHeaderTo<ELFT>(++sHdrs); |
1522 | |
1523 | // Write section contents to a mmap'ed file. |
1524 | { |
1525 | parallel::TaskGroup tg; |
1526 | for (auto &[osec, _] : osIsPairs) |
1527 | osec->template writeTo<ELFT>(buf + osec->offset, tg); |
1528 | } |
1529 | |
1530 | if (auto e = buffer->commit()) |
1531 | fatal(msg: "failed to write output '" + buffer->getPath() + |
1532 | "': " + toString(E: std::move(e))); |
1533 | } |
1534 | |
1535 | TargetInfo *elf::getARMTargetInfo() { |
1536 | static ARM target; |
1537 | return ⌖ |
1538 | } |
1539 | |
1540 | template void elf::writeARMCmseImportLib<ELF32LE>(); |
1541 | template void elf::writeARMCmseImportLib<ELF32BE>(); |
1542 | template void elf::writeARMCmseImportLib<ELF64LE>(); |
1543 | template void elf::writeARMCmseImportLib<ELF64BE>(); |
1544 | |
1545 | template void ObjFile<ELF32LE>::importCmseSymbols(); |
1546 | template void ObjFile<ELF32BE>::importCmseSymbols(); |
1547 | template void ObjFile<ELF64LE>::importCmseSymbols(); |
1548 | template void ObjFile<ELF64BE>::importCmseSymbols(); |
1549 | |