| 1 | //===- ARM.cpp ------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "InputFiles.h" |
| 10 | #include "OutputSections.h" |
| 11 | #include "SymbolTable.h" |
| 12 | #include "Symbols.h" |
| 13 | #include "SyntheticSections.h" |
| 14 | #include "Target.h" |
| 15 | #include "lld/Common/Filesystem.h" |
| 16 | #include "llvm/BinaryFormat/ELF.h" |
| 17 | #include "llvm/Support/Endian.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | using namespace llvm::support::endian; |
| 21 | using namespace llvm::support; |
| 22 | using namespace llvm::ELF; |
| 23 | using namespace lld; |
| 24 | using namespace lld::elf; |
| 25 | using namespace llvm::object; |
| 26 | |
| 27 | namespace { |
| 28 | class ARM final : public TargetInfo { |
| 29 | public: |
| 30 | ARM(Ctx &); |
| 31 | uint32_t calcEFlags() const override; |
| 32 | RelExpr getRelExpr(RelType type, const Symbol &s, |
| 33 | const uint8_t *loc) const override; |
| 34 | RelType getDynRel(RelType type) const override; |
| 35 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
| 36 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
| 37 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
| 38 | void writePltHeader(uint8_t *buf) const override; |
| 39 | void writePlt(uint8_t *buf, const Symbol &sym, |
| 40 | uint64_t pltEntryAddr) const override; |
| 41 | void addPltSymbols(InputSection &isec, uint64_t off) const override; |
| 42 | void addPltHeaderSymbols(InputSection &isd) const override; |
| 43 | bool needsThunk(RelExpr expr, RelType type, const InputFile *file, |
| 44 | uint64_t branchAddr, const Symbol &s, |
| 45 | int64_t a) const override; |
| 46 | uint32_t getThunkSectionSpacing() const override; |
| 47 | bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; |
| 48 | void relocate(uint8_t *loc, const Relocation &rel, |
| 49 | uint64_t val) const override; |
| 50 | |
| 51 | DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap; |
| 52 | |
| 53 | private: |
| 54 | void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
| 55 | int group, bool check) const; |
| 56 | }; |
| 57 | enum class CodeState { Data = 0, Thumb = 2, Arm = 4 }; |
| 58 | } // namespace |
| 59 | |
| 60 | ARM::ARM(Ctx &ctx) : TargetInfo(ctx) { |
| 61 | copyRel = R_ARM_COPY; |
| 62 | relativeRel = R_ARM_RELATIVE; |
| 63 | iRelativeRel = R_ARM_IRELATIVE; |
| 64 | gotRel = R_ARM_GLOB_DAT; |
| 65 | pltRel = R_ARM_JUMP_SLOT; |
| 66 | symbolicRel = R_ARM_ABS32; |
| 67 | tlsGotRel = R_ARM_TLS_TPOFF32; |
| 68 | tlsModuleIndexRel = R_ARM_TLS_DTPMOD32; |
| 69 | tlsOffsetRel = R_ARM_TLS_DTPOFF32; |
| 70 | pltHeaderSize = 32; |
| 71 | pltEntrySize = 16; |
| 72 | ipltEntrySize = 16; |
| 73 | trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; |
| 74 | needsThunks = true; |
| 75 | defaultMaxPageSize = 65536; |
| 76 | } |
| 77 | |
| 78 | uint32_t ARM::calcEFlags() const { |
| 79 | // The ABIFloatType is used by loaders to detect the floating point calling |
| 80 | // convention. |
| 81 | uint32_t abiFloatType = 0; |
| 82 | |
| 83 | // Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian |
| 84 | // with BE-8 code. |
| 85 | uint32_t armBE8 = 0; |
| 86 | |
| 87 | if (ctx.arg.armVFPArgs == ARMVFPArgKind::Base || |
| 88 | ctx.arg.armVFPArgs == ARMVFPArgKind::Default) |
| 89 | abiFloatType = EF_ARM_ABI_FLOAT_SOFT; |
| 90 | else if (ctx.arg.armVFPArgs == ARMVFPArgKind::VFP) |
| 91 | abiFloatType = EF_ARM_ABI_FLOAT_HARD; |
| 92 | |
| 93 | if (!ctx.arg.isLE && ctx.arg.armBe8) |
| 94 | armBE8 = EF_ARM_BE8; |
| 95 | |
| 96 | // We don't currently use any features incompatible with EF_ARM_EABI_VER5, |
| 97 | // but we don't have any firm guarantees of conformance. Linux AArch64 |
| 98 | // kernels (as of 2016) require an EABI version to be set. |
| 99 | return EF_ARM_EABI_VER5 | abiFloatType | armBE8; |
| 100 | } |
| 101 | |
| 102 | RelExpr ARM::getRelExpr(RelType type, const Symbol &s, |
| 103 | const uint8_t *loc) const { |
| 104 | switch (type) { |
| 105 | case R_ARM_ABS32: |
| 106 | case R_ARM_MOVW_ABS_NC: |
| 107 | case R_ARM_MOVT_ABS: |
| 108 | case R_ARM_THM_MOVW_ABS_NC: |
| 109 | case R_ARM_THM_MOVT_ABS: |
| 110 | case R_ARM_THM_ALU_ABS_G0_NC: |
| 111 | case R_ARM_THM_ALU_ABS_G1_NC: |
| 112 | case R_ARM_THM_ALU_ABS_G2_NC: |
| 113 | case R_ARM_THM_ALU_ABS_G3: |
| 114 | return R_ABS; |
| 115 | case R_ARM_THM_JUMP8: |
| 116 | case R_ARM_THM_JUMP11: |
| 117 | return R_PC; |
| 118 | case R_ARM_CALL: |
| 119 | case R_ARM_JUMP24: |
| 120 | case R_ARM_PC24: |
| 121 | case R_ARM_PLT32: |
| 122 | case R_ARM_PREL31: |
| 123 | case R_ARM_THM_JUMP19: |
| 124 | case R_ARM_THM_JUMP24: |
| 125 | case R_ARM_THM_CALL: |
| 126 | return R_PLT_PC; |
| 127 | case R_ARM_GOTOFF32: |
| 128 | // (S + A) - GOT_ORG |
| 129 | return R_GOTREL; |
| 130 | case R_ARM_GOT_BREL: |
| 131 | // GOT(S) + A - GOT_ORG |
| 132 | return R_GOT_OFF; |
| 133 | case R_ARM_GOT_PREL: |
| 134 | case R_ARM_TLS_IE32: |
| 135 | // GOT(S) + A - P |
| 136 | return R_GOT_PC; |
| 137 | case R_ARM_SBREL32: |
| 138 | return RE_ARM_SBREL; |
| 139 | case R_ARM_TARGET1: |
| 140 | return ctx.arg.target1Rel ? R_PC : R_ABS; |
| 141 | case R_ARM_TARGET2: |
| 142 | if (ctx.arg.target2 == Target2Policy::Rel) |
| 143 | return R_PC; |
| 144 | if (ctx.arg.target2 == Target2Policy::Abs) |
| 145 | return R_ABS; |
| 146 | return R_GOT_PC; |
| 147 | case R_ARM_TLS_GD32: |
| 148 | return R_TLSGD_PC; |
| 149 | case R_ARM_TLS_LDM32: |
| 150 | return R_TLSLD_PC; |
| 151 | case R_ARM_TLS_LDO32: |
| 152 | return R_DTPREL; |
| 153 | case R_ARM_BASE_PREL: |
| 154 | // B(S) + A - P |
| 155 | // FIXME: currently B(S) assumed to be .got, this may not hold for all |
| 156 | // platforms. |
| 157 | return R_GOTONLY_PC; |
| 158 | case R_ARM_MOVW_PREL_NC: |
| 159 | case R_ARM_MOVT_PREL: |
| 160 | case R_ARM_REL32: |
| 161 | case R_ARM_THM_MOVW_PREL_NC: |
| 162 | case R_ARM_THM_MOVT_PREL: |
| 163 | return R_PC; |
| 164 | case R_ARM_ALU_PC_G0: |
| 165 | case R_ARM_ALU_PC_G0_NC: |
| 166 | case R_ARM_ALU_PC_G1: |
| 167 | case R_ARM_ALU_PC_G1_NC: |
| 168 | case R_ARM_ALU_PC_G2: |
| 169 | case R_ARM_LDR_PC_G0: |
| 170 | case R_ARM_LDR_PC_G1: |
| 171 | case R_ARM_LDR_PC_G2: |
| 172 | case R_ARM_LDRS_PC_G0: |
| 173 | case R_ARM_LDRS_PC_G1: |
| 174 | case R_ARM_LDRS_PC_G2: |
| 175 | case R_ARM_THM_ALU_PREL_11_0: |
| 176 | case R_ARM_THM_PC8: |
| 177 | case R_ARM_THM_PC12: |
| 178 | return RE_ARM_PCA; |
| 179 | case R_ARM_MOVW_BREL_NC: |
| 180 | case R_ARM_MOVW_BREL: |
| 181 | case R_ARM_MOVT_BREL: |
| 182 | case R_ARM_THM_MOVW_BREL_NC: |
| 183 | case R_ARM_THM_MOVW_BREL: |
| 184 | case R_ARM_THM_MOVT_BREL: |
| 185 | return RE_ARM_SBREL; |
| 186 | case R_ARM_NONE: |
| 187 | return R_NONE; |
| 188 | case R_ARM_TLS_LE32: |
| 189 | return R_TPREL; |
| 190 | case R_ARM_V4BX: |
| 191 | // V4BX is just a marker to indicate there's a "bx rN" instruction at the |
| 192 | // given address. It can be used to implement a special linker mode which |
| 193 | // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and |
| 194 | // not ARMv4 output, we can just ignore it. |
| 195 | return R_NONE; |
| 196 | default: |
| 197 | Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v |
| 198 | << ") against symbol " << &s; |
| 199 | return R_NONE; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | RelType ARM::getDynRel(RelType type) const { |
| 204 | if ((type == R_ARM_ABS32) || (type == R_ARM_TARGET1 && !ctx.arg.target1Rel)) |
| 205 | return R_ARM_ABS32; |
| 206 | return R_ARM_NONE; |
| 207 | } |
| 208 | |
| 209 | void ARM::writeGotPlt(uint8_t *buf, const Symbol &) const { |
| 210 | write32(ctx, p: buf, v: ctx.in.plt->getVA()); |
| 211 | } |
| 212 | |
| 213 | void ARM::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
| 214 | // An ARM entry is the address of the ifunc resolver function. |
| 215 | write32(ctx, p: buf, v: s.getVA(ctx)); |
| 216 | } |
| 217 | |
| 218 | // Long form PLT Header that does not have any restrictions on the displacement |
| 219 | // of the .plt from the .got.plt. |
| 220 | static void (Ctx &ctx, uint8_t *buf) { |
| 221 | write32(ctx, p: buf + 0, v: 0xe52de004); // str lr, [sp,#-4]! |
| 222 | write32(ctx, p: buf + 4, v: 0xe59fe004); // ldr lr, L2 |
| 223 | write32(ctx, p: buf + 8, v: 0xe08fe00e); // L1: add lr, pc, lr |
| 224 | write32(ctx, p: buf + 12, v: 0xe5bef008); // ldr pc, [lr, #8] |
| 225 | write32(ctx, p: buf + 16, v: 0x00000000); // L2: .word &(.got.plt) - L1 - 8 |
| 226 | write32(ctx, p: buf + 20, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
| 227 | write32(ctx, p: buf + 24, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
| 228 | write32(ctx, p: buf + 28, v: 0xd4d4d4d4); |
| 229 | uint64_t gotPlt = ctx.in.gotPlt->getVA(); |
| 230 | uint64_t l1 = ctx.in.plt->getVA() + 8; |
| 231 | write32(ctx, p: buf + 16, v: gotPlt - l1 - 8); |
| 232 | } |
| 233 | |
| 234 | // True if we should use Thumb PLTs, which currently require Thumb2, and are |
| 235 | // only used if the target does not have the ARM ISA. |
| 236 | static bool useThumbPLTs(Ctx &ctx) { |
| 237 | return ctx.arg.armHasThumb2ISA && !ctx.arg.armHasArmISA; |
| 238 | } |
| 239 | |
| 240 | // The default PLT header requires the .got.plt to be within 128 Mb of the |
| 241 | // .plt in the positive direction. |
| 242 | void ARM::(uint8_t *buf) const { |
| 243 | if (useThumbPLTs(ctx)) { |
| 244 | // The instruction sequence for thumb: |
| 245 | // |
| 246 | // 0: b500 push {lr} |
| 247 | // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe> |
| 248 | // 6: 44fe add lr, pc |
| 249 | // 8: f85e ff08 ldr pc, [lr, #8]! |
| 250 | // e: .word .got.plt - .plt - 16 |
| 251 | // |
| 252 | // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from |
| 253 | // `pc` in the add instruction and 8 bytes for the `lr` adjustment. |
| 254 | // |
| 255 | uint64_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA() - 16; |
| 256 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset" ); |
| 257 | write16(ctx, p: buf + 0, v: 0xb500); |
| 258 | // Split into two halves to support endianness correctly. |
| 259 | write16(ctx, p: buf + 2, v: 0xf8df); |
| 260 | write16(ctx, p: buf + 4, v: 0xe008); |
| 261 | write16(ctx, p: buf + 6, v: 0x44fe); |
| 262 | // Split into two halves to support endianness correctly. |
| 263 | write16(ctx, p: buf + 8, v: 0xf85e); |
| 264 | write16(ctx, p: buf + 10, v: 0xff08); |
| 265 | write32(ctx, p: buf + 12, v: offset); |
| 266 | |
| 267 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
| 268 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
| 269 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
| 270 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
| 271 | } else { |
| 272 | // Use a similar sequence to that in writePlt(), the difference is the |
| 273 | // calling conventions mean we use lr instead of ip. The PLT entry is |
| 274 | // responsible for saving lr on the stack, the dynamic loader is responsible |
| 275 | // for reloading it. |
| 276 | const uint32_t pltData[] = { |
| 277 | 0xe52de004, // L1: str lr, [sp,#-4]! |
| 278 | 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) |
| 279 | 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) |
| 280 | 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) |
| 281 | }; |
| 282 | |
| 283 | uint64_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA() - 4; |
| 284 | if (!llvm::isUInt<27>(x: offset)) { |
| 285 | // We cannot encode the Offset, use the long form. |
| 286 | writePltHeaderLong(ctx, buf); |
| 287 | return; |
| 288 | } |
| 289 | write32(ctx, p: buf + 0, v: pltData[0]); |
| 290 | write32(ctx, p: buf + 4, v: pltData[1] | ((offset >> 20) & 0xff)); |
| 291 | write32(ctx, p: buf + 8, v: pltData[2] | ((offset >> 12) & 0xff)); |
| 292 | write32(ctx, p: buf + 12, v: pltData[3] | (offset & 0xfff)); |
| 293 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
| 294 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
| 295 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
| 296 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | void ARM::(InputSection &isec) const { |
| 301 | if (useThumbPLTs(ctx)) { |
| 302 | addSyntheticLocal(ctx, name: "$t" , type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
| 303 | addSyntheticLocal(ctx, name: "$d" , type: STT_NOTYPE, value: 12, size: 0, section&: isec); |
| 304 | } else { |
| 305 | addSyntheticLocal(ctx, name: "$a" , type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
| 306 | addSyntheticLocal(ctx, name: "$d" , type: STT_NOTYPE, value: 16, size: 0, section&: isec); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | // Long form PLT entries that do not have any restrictions on the displacement |
| 311 | // of the .plt from the .got.plt. |
| 312 | static void writePltLong(Ctx &ctx, uint8_t *buf, uint64_t gotPltEntryAddr, |
| 313 | uint64_t pltEntryAddr) { |
| 314 | write32(ctx, p: buf + 0, v: 0xe59fc004); // ldr ip, L2 |
| 315 | write32(ctx, p: buf + 4, v: 0xe08cc00f); // L1: add ip, ip, pc |
| 316 | write32(ctx, p: buf + 8, v: 0xe59cf000); // ldr pc, [ip] |
| 317 | write32(ctx, p: buf + 12, v: 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8 |
| 318 | uint64_t l1 = pltEntryAddr + 4; |
| 319 | write32(ctx, p: buf + 12, v: gotPltEntryAddr - l1 - 8); |
| 320 | } |
| 321 | |
| 322 | // The default PLT entries require the .got.plt to be within 128 Mb of the |
| 323 | // .plt in the positive direction. |
| 324 | void ARM::writePlt(uint8_t *buf, const Symbol &sym, |
| 325 | uint64_t pltEntryAddr) const { |
| 326 | if (!useThumbPLTs(ctx)) { |
| 327 | uint64_t offset = sym.getGotPltVA(ctx) - pltEntryAddr - 8; |
| 328 | |
| 329 | // The PLT entry is similar to the example given in Appendix A of ELF for |
| 330 | // the Arm Architecture. Instead of using the Group Relocations to find the |
| 331 | // optimal rotation for the 8-bit immediate used in the add instructions we |
| 332 | // hard code the most compact rotations for simplicity. This saves a load |
| 333 | // instruction over the long plt sequences. |
| 334 | const uint32_t pltData[] = { |
| 335 | 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 |
| 336 | 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 |
| 337 | 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 |
| 338 | }; |
| 339 | if (!llvm::isUInt<27>(x: offset)) { |
| 340 | // We cannot encode the Offset, use the long form. |
| 341 | writePltLong(ctx, buf, gotPltEntryAddr: sym.getGotPltVA(ctx), pltEntryAddr); |
| 342 | return; |
| 343 | } |
| 344 | write32(ctx, p: buf + 0, v: pltData[0] | ((offset >> 20) & 0xff)); |
| 345 | write32(ctx, p: buf + 4, v: pltData[1] | ((offset >> 12) & 0xff)); |
| 346 | write32(ctx, p: buf + 8, v: pltData[2] | (offset & 0xfff)); |
| 347 | memcpy(dest: buf + 12, src: trapInstr.data(), n: 4); // Pad to 16-byte boundary |
| 348 | } else { |
| 349 | uint64_t offset = sym.getGotPltVA(ctx) - pltEntryAddr - 12; |
| 350 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset" ); |
| 351 | |
| 352 | // A PLT entry will be: |
| 353 | // |
| 354 | // movw ip, #<lower 16 bits> |
| 355 | // movt ip, #<upper 16 bits> |
| 356 | // add ip, pc |
| 357 | // L1: ldr.w pc, [ip] |
| 358 | // b L1 |
| 359 | // |
| 360 | // where ip = r12 = 0xc |
| 361 | |
| 362 | // movw ip, #<lower 16 bits> |
| 363 | write16(ctx, p: buf + 2, v: 0x0c00); // use `ip` |
| 364 | relocateNoSym(loc: buf, type: R_ARM_THM_MOVW_ABS_NC, val: offset); |
| 365 | |
| 366 | // movt ip, #<upper 16 bits> |
| 367 | write16(ctx, p: buf + 6, v: 0x0c00); // use `ip` |
| 368 | relocateNoSym(loc: buf + 4, type: R_ARM_THM_MOVT_ABS, val: offset); |
| 369 | |
| 370 | write16(ctx, p: buf + 8, v: 0x44fc); // add ip, pc |
| 371 | write16(ctx, p: buf + 10, v: 0xf8dc); // ldr.w pc, [ip] (bottom half) |
| 372 | write16(ctx, p: buf + 12, v: 0xf000); // ldr.w pc, [ip] (upper half) |
| 373 | write16(ctx, p: buf + 14, v: 0xe7fc); // Branch to previous instruction |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { |
| 378 | if (useThumbPLTs(ctx)) { |
| 379 | addSyntheticLocal(ctx, name: "$t" , type: STT_NOTYPE, value: off, size: 0, section&: isec); |
| 380 | } else { |
| 381 | addSyntheticLocal(ctx, name: "$a" , type: STT_NOTYPE, value: off, size: 0, section&: isec); |
| 382 | addSyntheticLocal(ctx, name: "$d" , type: STT_NOTYPE, value: off + 12, size: 0, section&: isec); |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, |
| 387 | uint64_t branchAddr, const Symbol &s, |
| 388 | int64_t a) const { |
| 389 | // If s is an undefined weak symbol and does not have a PLT entry then it will |
| 390 | // be resolved as a branch to the next instruction. If it is hidden, its |
| 391 | // binding has been converted to local, so we just check isUndefined() here. A |
| 392 | // undefined non-weak symbol will have been errored. |
| 393 | if (s.isUndefined() && !s.isInPlt(ctx)) |
| 394 | return false; |
| 395 | // A state change from ARM to Thumb and vice versa must go through an |
| 396 | // interworking thunk if the relocation type is not R_ARM_CALL or |
| 397 | // R_ARM_THM_CALL. |
| 398 | switch (type) { |
| 399 | case R_ARM_PC24: |
| 400 | case R_ARM_PLT32: |
| 401 | case R_ARM_JUMP24: |
| 402 | // Source is ARM, all PLT entries are ARM so no interworking required. |
| 403 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). |
| 404 | assert(!useThumbPLTs(ctx) && |
| 405 | "If the source is ARM, we should not need Thumb PLTs" ); |
| 406 | if (s.isFunc() && expr == R_PC && (s.getVA(ctx) & 1)) |
| 407 | return true; |
| 408 | [[fallthrough]]; |
| 409 | case R_ARM_CALL: { |
| 410 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA(ctx) : s.getVA(ctx); |
| 411 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
| 412 | (!ctx.arg.armHasBlx && (s.getVA(ctx) & 1)); |
| 413 | } |
| 414 | case R_ARM_THM_JUMP19: |
| 415 | case R_ARM_THM_JUMP24: |
| 416 | // Source is Thumb, when all PLT entries are ARM interworking is required. |
| 417 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). |
| 418 | if ((expr == R_PLT_PC && !useThumbPLTs(ctx)) || |
| 419 | (s.isFunc() && (s.getVA(ctx) & 1) == 0)) |
| 420 | return true; |
| 421 | [[fallthrough]]; |
| 422 | case R_ARM_THM_CALL: { |
| 423 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA(ctx) : s.getVA(ctx); |
| 424 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
| 425 | (!ctx.arg.armHasBlx && (s.getVA(ctx) & 1) == 0); |
| 426 | } |
| 427 | } |
| 428 | return false; |
| 429 | } |
| 430 | |
| 431 | uint32_t ARM::getThunkSectionSpacing() const { |
| 432 | // The placing of pre-created ThunkSections is controlled by the value |
| 433 | // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to |
| 434 | // place the ThunkSection such that all branches from the InputSections |
| 435 | // prior to the ThunkSection can reach a Thunk placed at the end of the |
| 436 | // ThunkSection. Graphically: |
| 437 | // | up to thunkSectionSpacing .text input sections | |
| 438 | // | ThunkSection | |
| 439 | // | up to thunkSectionSpacing .text input sections | |
| 440 | // | ThunkSection | |
| 441 | |
| 442 | // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This |
| 443 | // is to match the most common expected case of a Thumb 2 encoded BL, BLX or |
| 444 | // B.W: |
| 445 | // ARM B, BL, BLX range +/- 32MiB |
| 446 | // Thumb B.W, BL, BLX range +/- 16MiB |
| 447 | // Thumb B<cc>.W range +/- 1MiB |
| 448 | // If a branch cannot reach a pre-created ThunkSection a new one will be |
| 449 | // created so we can handle the rare cases of a Thumb 2 conditional branch. |
| 450 | // We intentionally use a lower size for thunkSectionSpacing than the maximum |
| 451 | // branch range so the end of the ThunkSection is more likely to be within |
| 452 | // range of the branch instruction that is furthest away. The value we shorten |
| 453 | // thunkSectionSpacing by is set conservatively to allow us to create 16,384 |
| 454 | // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to |
| 455 | // one of the Thunks going out of range. |
| 456 | |
| 457 | // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch |
| 458 | // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except |
| 459 | // ARMv6T2) the range is +/- 4MiB. |
| 460 | |
| 461 | return (ctx.arg.armJ1J2BranchEncoding) ? 0x1000000 - 0x30000 |
| 462 | : 0x400000 - 0x7500; |
| 463 | } |
| 464 | |
| 465 | bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { |
| 466 | if ((dst & 0x1) == 0) |
| 467 | // Destination is ARM, if ARM caller then Src is already 4-byte aligned. |
| 468 | // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure |
| 469 | // destination will be 4 byte aligned. |
| 470 | src &= ~0x3; |
| 471 | else |
| 472 | // Bit 0 == 1 denotes Thumb state, it is not part of the range. |
| 473 | dst &= ~0x1; |
| 474 | |
| 475 | int64_t offset = dst - src; |
| 476 | switch (type) { |
| 477 | case R_ARM_PC24: |
| 478 | case R_ARM_PLT32: |
| 479 | case R_ARM_JUMP24: |
| 480 | case R_ARM_CALL: |
| 481 | return llvm::isInt<26>(x: offset); |
| 482 | case R_ARM_THM_JUMP19: |
| 483 | return llvm::isInt<21>(x: offset); |
| 484 | case R_ARM_THM_JUMP24: |
| 485 | case R_ARM_THM_CALL: |
| 486 | return ctx.arg.armJ1J2BranchEncoding ? llvm::isInt<25>(x: offset) |
| 487 | : llvm::isInt<23>(x: offset); |
| 488 | default: |
| 489 | return true; |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | // Helper to produce message text when LLD detects that a CALL relocation to |
| 494 | // a non STT_FUNC symbol that may result in incorrect interworking between ARM |
| 495 | // or Thumb. |
| 496 | static void stateChangeWarning(Ctx &ctx, uint8_t *loc, RelType relt, |
| 497 | const Symbol &s) { |
| 498 | assert(!s.isFunc()); |
| 499 | const ErrorPlace place = getErrorPlace(ctx, loc); |
| 500 | std::string hint; |
| 501 | if (!place.srcLoc.empty()) |
| 502 | hint = "; " + place.srcLoc; |
| 503 | if (s.isSection()) { |
| 504 | // Section symbols must be defined and in a section. Users cannot change |
| 505 | // the type. Use the section name as getName() returns an empty string. |
| 506 | Warn(ctx) << place.loc << "branch and link relocation: " << relt |
| 507 | << " to STT_SECTION symbol " << cast<Defined>(Val: s).section->name |
| 508 | << " ; interworking not performed" << hint; |
| 509 | } else { |
| 510 | // Warn with hint on how to alter the symbol type. |
| 511 | Warn(ctx) |
| 512 | << getErrorLoc(ctx, loc) << "branch and link relocation: " << relt |
| 513 | << " to non STT_FUNC symbol: " << s.getName() |
| 514 | << " interworking not performed; consider using directive '.type " |
| 515 | << s.getName() |
| 516 | << ", %function' to give symbol type STT_FUNC if interworking between " |
| 517 | "ARM and Thumb is required" |
| 518 | << hint; |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | // Rotate a 32-bit unsigned value right by a specified amt of bits. |
| 523 | static uint32_t rotr32(uint32_t val, uint32_t amt) { |
| 524 | assert(amt < 32 && "Invalid rotate amount" ); |
| 525 | return (val >> amt) | (val << ((32 - amt) & 31)); |
| 526 | } |
| 527 | |
| 528 | static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group, |
| 529 | uint32_t val) { |
| 530 | uint32_t rem, lz; |
| 531 | do { |
| 532 | lz = llvm::countl_zero(Val: val) & ~1; |
| 533 | rem = val; |
| 534 | if (lz == 32) // implies rem == 0 |
| 535 | break; |
| 536 | val &= 0xffffff >> lz; |
| 537 | } while (group--); |
| 538 | return {rem, lz}; |
| 539 | } |
| 540 | |
| 541 | void ARM::encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
| 542 | int group, bool check) const { |
| 543 | // ADD/SUB (immediate) add = bit23, sub = bit22 |
| 544 | // immediate field carries is a 12-bit modified immediate, made up of a 4-bit |
| 545 | // even rotate right and an 8-bit immediate. |
| 546 | uint32_t opcode = 0x00800000; |
| 547 | if (val >> 63) { |
| 548 | opcode = 0x00400000; |
| 549 | val = -val; |
| 550 | } |
| 551 | uint32_t imm, lz; |
| 552 | std::tie(args&: imm, args&: lz) = getRemAndLZForGroup(group, val); |
| 553 | uint32_t rot = 0; |
| 554 | if (lz < 24) { |
| 555 | imm = rotr32(val: imm, amt: 24 - lz); |
| 556 | rot = (lz + 8) << 7; |
| 557 | } |
| 558 | if (check && imm > 0xff) |
| 559 | Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate " << val |
| 560 | << " for relocation " << rel.type; |
| 561 | write32(ctx, p: loc, |
| 562 | v: (read32(ctx, p: loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); |
| 563 | } |
| 564 | |
| 565 | static void encodeLdrGroup(Ctx &ctx, uint8_t *loc, const Relocation &rel, |
| 566 | uint64_t val, int group) { |
| 567 | // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
| 568 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
| 569 | // bottom bit to recover S + A - P. |
| 570 | if (rel.sym->isFunc()) |
| 571 | val &= ~0x1; |
| 572 | // LDR (literal) u = bit23 |
| 573 | uint32_t opcode = 0x00800000; |
| 574 | if (val >> 63) { |
| 575 | opcode = 0x0; |
| 576 | val = -val; |
| 577 | } |
| 578 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
| 579 | checkUInt(ctx, loc, v: imm, n: 12, rel); |
| 580 | write32(ctx, p: loc, v: (read32(ctx, p: loc) & 0xff7ff000) | opcode | imm); |
| 581 | } |
| 582 | |
| 583 | static void encodeLdrsGroup(Ctx &ctx, uint8_t *loc, const Relocation &rel, |
| 584 | uint64_t val, int group) { |
| 585 | // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
| 586 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
| 587 | // bottom bit to recover S + A - P. |
| 588 | if (rel.sym->isFunc()) |
| 589 | val &= ~0x1; |
| 590 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 |
| 591 | uint32_t opcode = 0x00800000; |
| 592 | if (val >> 63) { |
| 593 | opcode = 0x0; |
| 594 | val = -val; |
| 595 | } |
| 596 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
| 597 | checkUInt(ctx, loc, v: imm, n: 8, rel); |
| 598 | write32(ctx, p: loc, |
| 599 | v: (read32(ctx, p: loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) | |
| 600 | (imm & 0xf)); |
| 601 | } |
| 602 | |
| 603 | void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { |
| 604 | switch (rel.type) { |
| 605 | case R_ARM_ABS32: |
| 606 | case R_ARM_BASE_PREL: |
| 607 | case R_ARM_GOTOFF32: |
| 608 | case R_ARM_GOT_BREL: |
| 609 | case R_ARM_GOT_PREL: |
| 610 | case R_ARM_REL32: |
| 611 | case R_ARM_RELATIVE: |
| 612 | case R_ARM_SBREL32: |
| 613 | case R_ARM_TARGET1: |
| 614 | case R_ARM_TARGET2: |
| 615 | case R_ARM_TLS_GD32: |
| 616 | case R_ARM_TLS_IE32: |
| 617 | case R_ARM_TLS_LDM32: |
| 618 | case R_ARM_TLS_LDO32: |
| 619 | case R_ARM_TLS_LE32: |
| 620 | case R_ARM_TLS_TPOFF32: |
| 621 | case R_ARM_TLS_DTPOFF32: |
| 622 | write32(ctx, p: loc, v: val); |
| 623 | break; |
| 624 | case R_ARM_PREL31: |
| 625 | checkInt(ctx, loc, v: val, n: 31, rel); |
| 626 | write32(ctx, p: loc, v: (read32(ctx, p: loc) & 0x80000000) | (val & ~0x80000000)); |
| 627 | break; |
| 628 | case R_ARM_CALL: { |
| 629 | // R_ARM_CALL is used for BL and BLX instructions, for symbols of type |
| 630 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
| 631 | // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is |
| 632 | // not of type STT_FUNC then we must preserve the original instruction. |
| 633 | assert(rel.sym); // R_ARM_CALL is always reached via relocate(). |
| 634 | bool bit0Thumb = val & 1; |
| 635 | bool isBlx = (read32(ctx, p: loc) & 0xfe000000) == 0xfa000000; |
| 636 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
| 637 | // even when type not STT_FUNC. |
| 638 | if (!rel.sym->isFunc() && isBlx != bit0Thumb) |
| 639 | stateChangeWarning(ctx, loc, relt: rel.type, s: *rel.sym); |
| 640 | if (rel.sym->isFunc() ? bit0Thumb : isBlx) { |
| 641 | // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' |
| 642 | checkInt(ctx, loc, v: val, n: 26, rel); |
| 643 | write32(ctx, p: loc, |
| 644 | v: 0xfa000000 | // opcode |
| 645 | ((val & 2) << 23) | // H |
| 646 | ((val >> 2) & 0x00ffffff)); // imm24 |
| 647 | break; |
| 648 | } |
| 649 | // BLX (always unconditional) instruction to an ARM Target, select an |
| 650 | // unconditional BL. |
| 651 | write32(ctx, p: loc, v: 0xeb000000 | (read32(ctx, p: loc) & 0x00ffffff)); |
| 652 | // fall through as BL encoding is shared with B |
| 653 | } |
| 654 | [[fallthrough]]; |
| 655 | case R_ARM_JUMP24: |
| 656 | case R_ARM_PC24: |
| 657 | case R_ARM_PLT32: |
| 658 | checkInt(ctx, loc, v: val, n: 26, rel); |
| 659 | write32(ctx, p: loc, |
| 660 | v: (read32(ctx, p: loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); |
| 661 | break; |
| 662 | case R_ARM_THM_JUMP8: |
| 663 | // We do a 9 bit check because val is right-shifted by 1 bit. |
| 664 | checkInt(ctx, loc, v: val, n: 9, rel); |
| 665 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xff00) | ((val >> 1) & 0x00ff)); |
| 666 | break; |
| 667 | case R_ARM_THM_JUMP11: |
| 668 | // We do a 12 bit check because val is right-shifted by 1 bit. |
| 669 | checkInt(ctx, loc, v: val, n: 12, rel); |
| 670 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xf800) | ((val >> 1) & 0x07ff)); |
| 671 | break; |
| 672 | case R_ARM_THM_JUMP19: |
| 673 | // Encoding T3: Val = S:J2:J1:imm6:imm11:0 |
| 674 | checkInt(ctx, loc, v: val, n: 21, rel); |
| 675 | write16(ctx, p: loc, |
| 676 | v: (read16(ctx, p: loc) & 0xfbc0) | // opcode cond |
| 677 | ((val >> 10) & 0x0400) | // S |
| 678 | ((val >> 12) & 0x003f)); // imm6 |
| 679 | write16(ctx, p: loc + 2, |
| 680 | v: 0x8000 | // opcode |
| 681 | ((val >> 8) & 0x0800) | // J2 |
| 682 | ((val >> 5) & 0x2000) | // J1 |
| 683 | ((val >> 1) & 0x07ff)); // imm11 |
| 684 | break; |
| 685 | case R_ARM_THM_CALL: { |
| 686 | // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type |
| 687 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
| 688 | // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is |
| 689 | // not of type STT_FUNC then we must preserve the original instruction. |
| 690 | // PLT entries are always ARM state so we know we need to interwork. |
| 691 | assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). |
| 692 | bool bit0Thumb = val & 1; |
| 693 | bool useThumb = bit0Thumb || useThumbPLTs(ctx); |
| 694 | bool isBlx = (read16(ctx, p: loc + 2) & 0x1000) == 0; |
| 695 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
| 696 | // even when type not STT_FUNC. |
| 697 | if (!rel.sym->isFunc() && !rel.sym->isInPlt(ctx) && isBlx == useThumb) |
| 698 | stateChangeWarning(ctx, loc, relt: rel.type, s: *rel.sym); |
| 699 | if ((rel.sym->isFunc() || rel.sym->isInPlt(ctx)) ? !useThumb : isBlx) { |
| 700 | // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As |
| 701 | // the BLX instruction may only be two byte aligned. This must be done |
| 702 | // before overflow check. |
| 703 | val = alignTo(Value: val, Align: 4); |
| 704 | write16(ctx, p: loc + 2, v: read16(ctx, p: loc + 2) & ~0x1000); |
| 705 | } else { |
| 706 | write16(ctx, p: loc + 2, v: (read16(ctx, p: loc + 2) & ~0x1000) | 1 << 12); |
| 707 | } |
| 708 | if (!ctx.arg.armJ1J2BranchEncoding) { |
| 709 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
| 710 | // different encoding rules and range due to J1 and J2 always being 1. |
| 711 | checkInt(ctx, loc, v: val, n: 23, rel); |
| 712 | write16(ctx, p: loc, |
| 713 | v: 0xf000 | // opcode |
| 714 | ((val >> 12) & 0x07ff)); // imm11 |
| 715 | write16(ctx, p: loc + 2, |
| 716 | v: (read16(ctx, p: loc + 2) & 0xd000) | // opcode |
| 717 | 0x2800 | // J1 == J2 == 1 |
| 718 | ((val >> 1) & 0x07ff)); // imm11 |
| 719 | break; |
| 720 | } |
| 721 | } |
| 722 | // Fall through as rest of encoding is the same as B.W |
| 723 | [[fallthrough]]; |
| 724 | case R_ARM_THM_JUMP24: |
| 725 | // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 |
| 726 | checkInt(ctx, loc, v: val, n: 25, rel); |
| 727 | write16(ctx, p: loc, |
| 728 | v: 0xf000 | // opcode |
| 729 | ((val >> 14) & 0x0400) | // S |
| 730 | ((val >> 12) & 0x03ff)); // imm10 |
| 731 | write16(ctx, p: loc + 2, |
| 732 | v: (read16(ctx, p: loc + 2) & 0xd000) | // opcode |
| 733 | (((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1 |
| 734 | (((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2 |
| 735 | ((val >> 1) & 0x07ff)); // imm11 |
| 736 | break; |
| 737 | case R_ARM_MOVW_ABS_NC: |
| 738 | case R_ARM_MOVW_PREL_NC: |
| 739 | case R_ARM_MOVW_BREL_NC: |
| 740 | write32(ctx, p: loc, |
| 741 | v: (read32(ctx, p: loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | |
| 742 | (val & 0x0fff)); |
| 743 | break; |
| 744 | case R_ARM_MOVT_ABS: |
| 745 | case R_ARM_MOVT_PREL: |
| 746 | case R_ARM_MOVT_BREL: |
| 747 | write32(ctx, p: loc, |
| 748 | v: (read32(ctx, p: loc) & ~0x000f0fff) | (((val >> 16) & 0xf000) << 4) | |
| 749 | ((val >> 16) & 0xfff)); |
| 750 | break; |
| 751 | case R_ARM_THM_MOVT_ABS: |
| 752 | case R_ARM_THM_MOVT_PREL: |
| 753 | case R_ARM_THM_MOVT_BREL: |
| 754 | // Encoding T1: A = imm4:i:imm3:imm8 |
| 755 | |
| 756 | write16(ctx, p: loc, |
| 757 | v: 0xf2c0 | // opcode |
| 758 | ((val >> 17) & 0x0400) | // i |
| 759 | ((val >> 28) & 0x000f)); // imm4 |
| 760 | |
| 761 | write16(ctx, p: loc + 2, |
| 762 | v: (read16(ctx, p: loc + 2) & 0x8f00) | // opcode |
| 763 | ((val >> 12) & 0x7000) | // imm3 |
| 764 | ((val >> 16) & 0x00ff)); // imm8 |
| 765 | break; |
| 766 | case R_ARM_THM_MOVW_ABS_NC: |
| 767 | case R_ARM_THM_MOVW_PREL_NC: |
| 768 | case R_ARM_THM_MOVW_BREL_NC: |
| 769 | // Encoding T3: A = imm4:i:imm3:imm8 |
| 770 | write16(ctx, p: loc, |
| 771 | v: 0xf240 | // opcode |
| 772 | ((val >> 1) & 0x0400) | // i |
| 773 | ((val >> 12) & 0x000f)); // imm4 |
| 774 | write16(ctx, p: loc + 2, |
| 775 | v: (read16(ctx, p: loc + 2) & 0x8f00) | // opcode |
| 776 | ((val << 4) & 0x7000) | // imm3 |
| 777 | (val & 0x00ff)); // imm8 |
| 778 | break; |
| 779 | case R_ARM_THM_ALU_ABS_G3: |
| 780 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 24) & 0x00ff)); |
| 781 | break; |
| 782 | case R_ARM_THM_ALU_ABS_G2_NC: |
| 783 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 16) & 0x00ff)); |
| 784 | break; |
| 785 | case R_ARM_THM_ALU_ABS_G1_NC: |
| 786 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 8) & 0x00ff)); |
| 787 | break; |
| 788 | case R_ARM_THM_ALU_ABS_G0_NC: |
| 789 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | (val & 0x00ff)); |
| 790 | break; |
| 791 | case R_ARM_ALU_PC_G0: |
| 792 | encodeAluGroup(loc, rel, val, group: 0, check: true); |
| 793 | break; |
| 794 | case R_ARM_ALU_PC_G0_NC: |
| 795 | encodeAluGroup(loc, rel, val, group: 0, check: false); |
| 796 | break; |
| 797 | case R_ARM_ALU_PC_G1: |
| 798 | encodeAluGroup(loc, rel, val, group: 1, check: true); |
| 799 | break; |
| 800 | case R_ARM_ALU_PC_G1_NC: |
| 801 | encodeAluGroup(loc, rel, val, group: 1, check: false); |
| 802 | break; |
| 803 | case R_ARM_ALU_PC_G2: |
| 804 | encodeAluGroup(loc, rel, val, group: 2, check: true); |
| 805 | break; |
| 806 | case R_ARM_LDR_PC_G0: |
| 807 | encodeLdrGroup(ctx, loc, rel, val, group: 0); |
| 808 | break; |
| 809 | case R_ARM_LDR_PC_G1: |
| 810 | encodeLdrGroup(ctx, loc, rel, val, group: 1); |
| 811 | break; |
| 812 | case R_ARM_LDR_PC_G2: |
| 813 | encodeLdrGroup(ctx, loc, rel, val, group: 2); |
| 814 | break; |
| 815 | case R_ARM_LDRS_PC_G0: |
| 816 | encodeLdrsGroup(ctx, loc, rel, val, group: 0); |
| 817 | break; |
| 818 | case R_ARM_LDRS_PC_G1: |
| 819 | encodeLdrsGroup(ctx, loc, rel, val, group: 1); |
| 820 | break; |
| 821 | case R_ARM_LDRS_PC_G2: |
| 822 | encodeLdrsGroup(ctx, loc, rel, val, group: 2); |
| 823 | break; |
| 824 | case R_ARM_THM_ALU_PREL_11_0: { |
| 825 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
| 826 | int64_t imm = val; |
| 827 | uint16_t sub = 0; |
| 828 | if (imm < 0) { |
| 829 | imm = -imm; |
| 830 | sub = 0x00a0; |
| 831 | } |
| 832 | checkUInt(ctx, loc, v: imm, n: 12, rel); |
| 833 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); |
| 834 | write16(ctx, p: loc + 2, |
| 835 | v: (read16(ctx, p: loc + 2) & 0x8f00) | (imm & 0x700) << 4 | |
| 836 | (imm & 0xff)); |
| 837 | break; |
| 838 | } |
| 839 | case R_ARM_THM_PC8: |
| 840 | // ADR and LDR literal encoding T1 positive offset only imm8:00 |
| 841 | // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
| 842 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
| 843 | // bottom bit to recover S + A - Pa. |
| 844 | if (rel.sym->isFunc()) |
| 845 | val &= ~0x1; |
| 846 | checkUInt(ctx, loc, v: val, n: 10, rel); |
| 847 | checkAlignment(ctx, loc, v: val, n: 4, rel); |
| 848 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xff00) | (val & 0x3fc) >> 2); |
| 849 | break; |
| 850 | case R_ARM_THM_PC12: { |
| 851 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
| 852 | // imm12 is unsigned |
| 853 | // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
| 854 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
| 855 | // bottom bit to recover S + A - Pa. |
| 856 | if (rel.sym->isFunc()) |
| 857 | val &= ~0x1; |
| 858 | int64_t imm12 = val; |
| 859 | uint16_t u = 0x0080; |
| 860 | if (imm12 < 0) { |
| 861 | imm12 = -imm12; |
| 862 | u = 0; |
| 863 | } |
| 864 | checkUInt(ctx, loc, v: imm12, n: 12, rel); |
| 865 | write16(ctx, p: loc, v: read16(ctx, p: loc) | u); |
| 866 | write16(ctx, p: loc + 2, v: (read16(ctx, p: loc + 2) & 0xf000) | imm12); |
| 867 | break; |
| 868 | } |
| 869 | default: |
| 870 | llvm_unreachable("unknown relocation" ); |
| 871 | } |
| 872 | } |
| 873 | |
| 874 | int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { |
| 875 | switch (type) { |
| 876 | default: |
| 877 | InternalErr(ctx, buf) << "cannot read addend for relocation " << type; |
| 878 | return 0; |
| 879 | case R_ARM_ABS32: |
| 880 | case R_ARM_BASE_PREL: |
| 881 | case R_ARM_GLOB_DAT: |
| 882 | case R_ARM_GOTOFF32: |
| 883 | case R_ARM_GOT_BREL: |
| 884 | case R_ARM_GOT_PREL: |
| 885 | case R_ARM_IRELATIVE: |
| 886 | case R_ARM_REL32: |
| 887 | case R_ARM_RELATIVE: |
| 888 | case R_ARM_SBREL32: |
| 889 | case R_ARM_TARGET1: |
| 890 | case R_ARM_TARGET2: |
| 891 | case R_ARM_TLS_DTPMOD32: |
| 892 | case R_ARM_TLS_DTPOFF32: |
| 893 | case R_ARM_TLS_GD32: |
| 894 | case R_ARM_TLS_IE32: |
| 895 | case R_ARM_TLS_LDM32: |
| 896 | case R_ARM_TLS_LE32: |
| 897 | case R_ARM_TLS_LDO32: |
| 898 | case R_ARM_TLS_TPOFF32: |
| 899 | return SignExtend64<32>(x: read32(ctx, p: buf)); |
| 900 | case R_ARM_PREL31: |
| 901 | return SignExtend64<31>(x: read32(ctx, p: buf)); |
| 902 | case R_ARM_CALL: |
| 903 | case R_ARM_JUMP24: |
| 904 | case R_ARM_PC24: |
| 905 | case R_ARM_PLT32: |
| 906 | return SignExtend64<26>(x: read32(ctx, p: buf) << 2); |
| 907 | case R_ARM_THM_JUMP8: |
| 908 | return SignExtend64<9>(x: read16(ctx, p: buf) << 1); |
| 909 | case R_ARM_THM_JUMP11: |
| 910 | return SignExtend64<12>(x: read16(ctx, p: buf) << 1); |
| 911 | case R_ARM_THM_JUMP19: { |
| 912 | // Encoding T3: A = S:J2:J1:imm10:imm6:0 |
| 913 | uint16_t hi = read16(ctx, p: buf); |
| 914 | uint16_t lo = read16(ctx, p: buf + 2); |
| 915 | return SignExtend64<20>(x: ((hi & 0x0400) << 10) | // S |
| 916 | ((lo & 0x0800) << 8) | // J2 |
| 917 | ((lo & 0x2000) << 5) | // J1 |
| 918 | ((hi & 0x003f) << 12) | // imm6 |
| 919 | ((lo & 0x07ff) << 1)); // imm11:0 |
| 920 | } |
| 921 | case R_ARM_THM_CALL: |
| 922 | if (!ctx.arg.armJ1J2BranchEncoding) { |
| 923 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
| 924 | // different encoding rules and range due to J1 and J2 always being 1. |
| 925 | uint16_t hi = read16(ctx, p: buf); |
| 926 | uint16_t lo = read16(ctx, p: buf + 2); |
| 927 | return SignExtend64<22>(x: ((hi & 0x7ff) << 12) | // imm11 |
| 928 | ((lo & 0x7ff) << 1)); // imm11:0 |
| 929 | break; |
| 930 | } |
| 931 | [[fallthrough]]; |
| 932 | case R_ARM_THM_JUMP24: { |
| 933 | // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0 |
| 934 | // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S) |
| 935 | uint16_t hi = read16(ctx, p: buf); |
| 936 | uint16_t lo = read16(ctx, p: buf + 2); |
| 937 | return SignExtend64<24>(x: ((hi & 0x0400) << 14) | // S |
| 938 | (~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1 |
| 939 | (~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2 |
| 940 | ((hi & 0x003ff) << 12) | // imm0 |
| 941 | ((lo & 0x007ff) << 1)); // imm11:0 |
| 942 | } |
| 943 | // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and |
| 944 | // MOVT is in the range -32768 <= A < 32768 |
| 945 | case R_ARM_MOVW_ABS_NC: |
| 946 | case R_ARM_MOVT_ABS: |
| 947 | case R_ARM_MOVW_PREL_NC: |
| 948 | case R_ARM_MOVT_PREL: |
| 949 | case R_ARM_MOVW_BREL_NC: |
| 950 | case R_ARM_MOVT_BREL: { |
| 951 | uint64_t val = read32(ctx, p: buf) & 0x000f0fff; |
| 952 | return SignExtend64<16>(x: ((val & 0x000f0000) >> 4) | (val & 0x00fff)); |
| 953 | } |
| 954 | case R_ARM_THM_MOVW_ABS_NC: |
| 955 | case R_ARM_THM_MOVT_ABS: |
| 956 | case R_ARM_THM_MOVW_PREL_NC: |
| 957 | case R_ARM_THM_MOVT_PREL: |
| 958 | case R_ARM_THM_MOVW_BREL_NC: |
| 959 | case R_ARM_THM_MOVT_BREL: { |
| 960 | // Encoding T3: A = imm4:i:imm3:imm8 |
| 961 | uint16_t hi = read16(ctx, p: buf); |
| 962 | uint16_t lo = read16(ctx, p: buf + 2); |
| 963 | return SignExtend64<16>(x: ((hi & 0x000f) << 12) | // imm4 |
| 964 | ((hi & 0x0400) << 1) | // i |
| 965 | ((lo & 0x7000) >> 4) | // imm3 |
| 966 | (lo & 0x00ff)); // imm8 |
| 967 | } |
| 968 | case R_ARM_THM_ALU_ABS_G0_NC: |
| 969 | case R_ARM_THM_ALU_ABS_G1_NC: |
| 970 | case R_ARM_THM_ALU_ABS_G2_NC: |
| 971 | case R_ARM_THM_ALU_ABS_G3: |
| 972 | return read16(ctx, p: buf) & 0xff; |
| 973 | case R_ARM_ALU_PC_G0: |
| 974 | case R_ARM_ALU_PC_G0_NC: |
| 975 | case R_ARM_ALU_PC_G1: |
| 976 | case R_ARM_ALU_PC_G1_NC: |
| 977 | case R_ARM_ALU_PC_G2: { |
| 978 | // 12-bit immediate is a modified immediate made up of a 4-bit even |
| 979 | // right rotation and 8-bit constant. After the rotation the value |
| 980 | // is zero-extended. When bit 23 is set the instruction is an add, when |
| 981 | // bit 22 is set it is a sub. |
| 982 | uint32_t instr = read32(ctx, p: buf); |
| 983 | uint32_t val = rotr32(val: instr & 0xff, amt: ((instr & 0xf00) >> 8) * 2); |
| 984 | return (instr & 0x00400000) ? -val : val; |
| 985 | } |
| 986 | case R_ARM_LDR_PC_G0: |
| 987 | case R_ARM_LDR_PC_G1: |
| 988 | case R_ARM_LDR_PC_G2: { |
| 989 | // ADR (literal) add = bit23, sub = bit22 |
| 990 | // LDR (literal) u = bit23 unsigned imm12 |
| 991 | bool u = read32(ctx, p: buf) & 0x00800000; |
| 992 | uint32_t imm12 = read32(ctx, p: buf) & 0xfff; |
| 993 | return u ? imm12 : -imm12; |
| 994 | } |
| 995 | case R_ARM_LDRS_PC_G0: |
| 996 | case R_ARM_LDRS_PC_G1: |
| 997 | case R_ARM_LDRS_PC_G2: { |
| 998 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8 |
| 999 | uint32_t opcode = read32(ctx, p: buf); |
| 1000 | bool u = opcode & 0x00800000; |
| 1001 | uint32_t imm4l = opcode & 0xf; |
| 1002 | uint32_t imm4h = (opcode & 0xf00) >> 4; |
| 1003 | return u ? (imm4h | imm4l) : -(imm4h | imm4l); |
| 1004 | } |
| 1005 | case R_ARM_THM_ALU_PREL_11_0: { |
| 1006 | // Thumb2 ADR, which is an alias for a sub or add instruction with an |
| 1007 | // unsigned immediate. |
| 1008 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
| 1009 | uint16_t hi = read16(ctx, p: buf); |
| 1010 | uint16_t lo = read16(ctx, p: buf + 2); |
| 1011 | uint64_t imm = (hi & 0x0400) << 1 | // i |
| 1012 | (lo & 0x7000) >> 4 | // imm3 |
| 1013 | (lo & 0x00ff); // imm8 |
| 1014 | // For sub, addend is negative, add is positive. |
| 1015 | return (hi & 0x00f0) ? -imm : imm; |
| 1016 | } |
| 1017 | case R_ARM_THM_PC8: |
| 1018 | // ADR and LDR (literal) encoding T1 |
| 1019 | // From ELF for the ARM Architecture the initial signed addend is formed |
| 1020 | // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) ā 4) |
| 1021 | // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff |
| 1022 | return ((((read16(ctx, p: buf) & 0xff) << 2) + 4) & 0x3ff) - 4; |
| 1023 | case R_ARM_THM_PC12: { |
| 1024 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
| 1025 | bool u = read16(ctx, p: buf) & 0x0080; |
| 1026 | uint64_t imm12 = read16(ctx, p: buf + 2) & 0x0fff; |
| 1027 | return u ? imm12 : -imm12; |
| 1028 | } |
| 1029 | case R_ARM_NONE: |
| 1030 | case R_ARM_V4BX: |
| 1031 | case R_ARM_JUMP_SLOT: |
| 1032 | // These relocations are defined as not having an implicit addend. |
| 1033 | return 0; |
| 1034 | } |
| 1035 | } |
| 1036 | |
| 1037 | static bool isArmMapSymbol(const Symbol *b) { |
| 1038 | return b->getName() == "$a" || b->getName().starts_with(Prefix: "$a." ); |
| 1039 | } |
| 1040 | |
| 1041 | static bool isThumbMapSymbol(const Symbol *s) { |
| 1042 | return s->getName() == "$t" || s->getName().starts_with(Prefix: "$t." ); |
| 1043 | } |
| 1044 | |
| 1045 | static bool isDataMapSymbol(const Symbol *b) { |
| 1046 | return b->getName() == "$d" || b->getName().starts_with(Prefix: "$d." ); |
| 1047 | } |
| 1048 | |
| 1049 | void elf::sortArmMappingSymbols(Ctx &ctx) { |
| 1050 | // For each input section make sure the mapping symbols are sorted in |
| 1051 | // ascending order. |
| 1052 | for (auto &kv : static_cast<ARM &>(*ctx.target).sectionMap) { |
| 1053 | SmallVector<const Defined *, 0> &mapSyms = kv.second; |
| 1054 | llvm::stable_sort(Range&: mapSyms, C: [](const Defined *a, const Defined *b) { |
| 1055 | return a->value < b->value; |
| 1056 | }); |
| 1057 | } |
| 1058 | } |
| 1059 | |
| 1060 | void elf::addArmInputSectionMappingSymbols(Ctx &ctx) { |
| 1061 | // Collect mapping symbols for every executable input sections. |
| 1062 | // The linker generated mapping symbols for all the synthetic |
| 1063 | // sections are adding into the sectionmap through the function |
| 1064 | // addArmSyntheitcSectionMappingSymbol. |
| 1065 | auto §ionMap = static_cast<ARM &>(*ctx.target).sectionMap; |
| 1066 | for (ELFFileBase *file : ctx.objectFiles) { |
| 1067 | for (Symbol *sym : file->getLocalSymbols()) { |
| 1068 | auto *def = dyn_cast<Defined>(Val: sym); |
| 1069 | if (!def) |
| 1070 | continue; |
| 1071 | if (!isArmMapSymbol(b: def) && !isDataMapSymbol(b: def) && |
| 1072 | !isThumbMapSymbol(s: def)) |
| 1073 | continue; |
| 1074 | if (auto *sec = cast_if_present<InputSection>(Val: def->section)) |
| 1075 | if (sec->flags & SHF_EXECINSTR) |
| 1076 | sectionMap[sec].push_back(Elt: def); |
| 1077 | } |
| 1078 | } |
| 1079 | } |
| 1080 | |
| 1081 | // Synthetic sections are not backed by an ELF file where we can access the |
| 1082 | // symbol table, instead mapping symbols added to synthetic sections are stored |
| 1083 | // in the synthetic symbol table. Due to the presence of strip (--strip-all), |
| 1084 | // we can not rely on the synthetic symbol table retaining the mapping symbols. |
| 1085 | // Instead we record the mapping symbols locally. |
| 1086 | void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) { |
| 1087 | if (!isArmMapSymbol(b: sym) && !isDataMapSymbol(b: sym) && !isThumbMapSymbol(s: sym)) |
| 1088 | return; |
| 1089 | if (auto *sec = cast_if_present<InputSection>(Val: sym->section)) |
| 1090 | if (sec->flags & SHF_EXECINSTR) |
| 1091 | static_cast<ARM &>(*sec->file->ctx.target).sectionMap[sec].push_back(Elt: sym); |
| 1092 | } |
| 1093 | |
| 1094 | static void toLittleEndianInstructions(uint8_t *buf, uint64_t start, |
| 1095 | uint64_t end, uint64_t width) { |
| 1096 | CodeState curState = static_cast<CodeState>(width); |
| 1097 | if (curState == CodeState::Arm) |
| 1098 | for (uint64_t i = start; i < end; i += width) |
| 1099 | write32le(P: buf + i, V: read32be(P: buf + i)); |
| 1100 | |
| 1101 | if (curState == CodeState::Thumb) |
| 1102 | for (uint64_t i = start; i < end; i += width) |
| 1103 | write16le(P: buf + i, V: read16be(P: buf + i)); |
| 1104 | } |
| 1105 | |
| 1106 | // Arm BE8 big endian format requires instructions to be little endian, with |
| 1107 | // the initial contents big-endian. Convert the big-endian instructions to |
| 1108 | // little endian leaving literal data untouched. We use mapping symbols to |
| 1109 | // identify half open intervals of Arm code [$a, non $a) and Thumb code |
| 1110 | // [$t, non $t) and convert these to little endian a word or half word at a |
| 1111 | // time respectively. |
| 1112 | void elf::convertArmInstructionstoBE8(Ctx &ctx, InputSection *sec, |
| 1113 | uint8_t *buf) { |
| 1114 | auto §ionMap = static_cast<ARM &>(*ctx.target).sectionMap; |
| 1115 | auto it = sectionMap.find(Val: sec); |
| 1116 | if (it == sectionMap.end()) |
| 1117 | return; |
| 1118 | |
| 1119 | SmallVector<const Defined *, 0> &mapSyms = it->second; |
| 1120 | |
| 1121 | if (mapSyms.empty()) |
| 1122 | return; |
| 1123 | |
| 1124 | CodeState curState = CodeState::Data; |
| 1125 | uint64_t start = 0, width = 0, size = sec->getSize(); |
| 1126 | for (auto &msym : mapSyms) { |
| 1127 | CodeState newState = CodeState::Data; |
| 1128 | if (isThumbMapSymbol(s: msym)) |
| 1129 | newState = CodeState::Thumb; |
| 1130 | else if (isArmMapSymbol(b: msym)) |
| 1131 | newState = CodeState::Arm; |
| 1132 | |
| 1133 | if (newState == curState) |
| 1134 | continue; |
| 1135 | |
| 1136 | if (curState != CodeState::Data) { |
| 1137 | width = static_cast<uint64_t>(curState); |
| 1138 | toLittleEndianInstructions(buf, start, end: msym->value, width); |
| 1139 | } |
| 1140 | start = msym->value; |
| 1141 | curState = newState; |
| 1142 | } |
| 1143 | |
| 1144 | // Passed last mapping symbol, may need to reverse |
| 1145 | // up to end of section. |
| 1146 | if (curState != CodeState::Data) { |
| 1147 | width = static_cast<uint64_t>(curState); |
| 1148 | toLittleEndianInstructions(buf, start, end: size, width); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | // The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts; |
| 1153 | // the non-secure and secure states with the secure state inaccessible from the |
| 1154 | // non-secure state, apart from an area of memory in secure state called the |
| 1155 | // secure gateway which is accessible from non-secure state. The secure gateway |
| 1156 | // contains one or more entry points which must start with a landing pad |
| 1157 | // instruction SG. Arm recommends that the secure gateway consists only of |
| 1158 | // secure gateway veneers, which are made up of a SG instruction followed by a |
| 1159 | // branch to the destination in secure state. Full details can be found in Arm |
| 1160 | // v8-M Security Extensions Requirements on Development Tools. |
| 1161 | // |
| 1162 | // The CMSE model of software development requires the non-secure and secure |
| 1163 | // states to be developed as two separate programs. The non-secure developer is |
| 1164 | // provided with an import library defining symbols describing the entry points |
| 1165 | // in the secure gateway. No additional linker support is required for the |
| 1166 | // non-secure state. |
| 1167 | // |
| 1168 | // Development of the secure state requires linker support to manage the secure |
| 1169 | // gateway veneers. The management consists of: |
| 1170 | // - Creation of new secure gateway veneers based on symbol conventions. |
| 1171 | // - Checking the address of existing secure gateway veneers. |
| 1172 | // - Warning when existing secure gateway veneers removed. |
| 1173 | // |
| 1174 | // The secure gateway veneers are created in an import library, which is just an |
| 1175 | // ELF object with a symbol table. The import library is controlled by two |
| 1176 | // command line options: |
| 1177 | // --in-implib (specify an input import library from a previous revision of the |
| 1178 | // program). |
| 1179 | // --out-implib (specify an output import library to be created by the linker). |
| 1180 | // |
| 1181 | // The input import library is used to manage consistency of the secure entry |
| 1182 | // points. The output import library is for new and updated secure entry points. |
| 1183 | // |
| 1184 | // The symbol convention that identifies secure entry functions is the prefix |
| 1185 | // __acle_se_ for a symbol called name the linker is expected to create a secure |
| 1186 | // gateway veneer if symbols __acle_se_name and name have the same address. |
| 1187 | // After creating a secure gateway veneer the symbol name labels the secure |
| 1188 | // gateway veneer and the __acle_se_name labels the function definition. |
| 1189 | // |
| 1190 | // The LLD implementation: |
| 1191 | // - Reads an existing import library with importCmseSymbols(). |
| 1192 | // - Determines which new secure gateway veneers to create and redirects calls |
| 1193 | // within the secure state to the __acle_se_ prefixed symbol with |
| 1194 | // processArmCmseSymbols(). |
| 1195 | // - Models the SG veneers as a synthetic section. |
| 1196 | |
| 1197 | // Initialize symbols. symbols is a parallel array to the corresponding ELF |
| 1198 | // symbol table. |
| 1199 | template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() { |
| 1200 | ArrayRef<Elf_Sym> eSyms = getELFSyms<ELFT>(); |
| 1201 | // Error for local symbols. The symbol at index 0 is LOCAL. So skip it. |
| 1202 | for (size_t i = 1, end = firstGlobal; i != end; ++i) { |
| 1203 | Err(ctx) << "CMSE symbol '" << CHECK2(eSyms[i].getName(stringTable), this) |
| 1204 | << "' in import library '" << this << "' is not global" ; |
| 1205 | } |
| 1206 | |
| 1207 | for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { |
| 1208 | const Elf_Sym &eSym = eSyms[i]; |
| 1209 | Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>()); |
| 1210 | |
| 1211 | // Initialize symbol fields. |
| 1212 | memset(s: static_cast<void *>(sym), c: 0, n: sizeof(Symbol)); |
| 1213 | sym->setName(CHECK2(eSyms[i].getName(stringTable), this)); |
| 1214 | sym->value = eSym.st_value; |
| 1215 | sym->size = eSym.st_size; |
| 1216 | sym->type = eSym.getType(); |
| 1217 | sym->binding = eSym.getBinding(); |
| 1218 | sym->stOther = eSym.st_other; |
| 1219 | |
| 1220 | if (eSym.st_shndx != SHN_ABS) { |
| 1221 | Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" |
| 1222 | << this << "' is not absolute" ; |
| 1223 | continue; |
| 1224 | } |
| 1225 | |
| 1226 | if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) { |
| 1227 | Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" |
| 1228 | << this << "' is not a Thumb function definition" ; |
| 1229 | continue; |
| 1230 | } |
| 1231 | |
| 1232 | if (ctx.symtab->cmseImportLib.count(Key: sym->getName())) { |
| 1233 | Err(ctx) << "CMSE symbol '" << sym->getName() |
| 1234 | << "' is multiply defined in import library '" << this << "'" ; |
| 1235 | continue; |
| 1236 | } |
| 1237 | |
| 1238 | if (eSym.st_size != ACLESESYM_SIZE) { |
| 1239 | Warn(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" |
| 1240 | << this << "' does not have correct size of " << ACLESESYM_SIZE |
| 1241 | << " bytes" ; |
| 1242 | } |
| 1243 | |
| 1244 | ctx.symtab->cmseImportLib[sym->getName()] = sym; |
| 1245 | } |
| 1246 | } |
| 1247 | |
| 1248 | // Check symbol attributes of the acleSeSym, sym pair. |
| 1249 | // Both symbols should be global/weak Thumb code symbol definitions. |
| 1250 | static std::string checkCmseSymAttributes(Ctx &ctx, Symbol *acleSeSym, |
| 1251 | Symbol *sym) { |
| 1252 | auto check = [&](Symbol *s, StringRef type) -> std::optional<std::string> { |
| 1253 | auto d = dyn_cast_or_null<Defined>(Val: s); |
| 1254 | if (!(d && d->isFunc() && (d->value & 1))) |
| 1255 | return (Twine(toStr(ctx, f: s->file)) + ": cmse " + type + " symbol '" + |
| 1256 | s->getName() + "' is not a Thumb function definition" ) |
| 1257 | .str(); |
| 1258 | if (!d->section) |
| 1259 | return (Twine(toStr(ctx, f: s->file)) + ": cmse " + type + " symbol '" + |
| 1260 | s->getName() + "' cannot be an absolute symbol" ) |
| 1261 | .str(); |
| 1262 | return std::nullopt; |
| 1263 | }; |
| 1264 | for (auto [sym, type] : |
| 1265 | {std::make_pair(x&: acleSeSym, y: "special" ), std::make_pair(x&: sym, y: "entry" )}) |
| 1266 | if (auto err = check(sym, type)) |
| 1267 | return *err; |
| 1268 | return "" ; |
| 1269 | } |
| 1270 | |
| 1271 | // Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M |
| 1272 | // Security Extensions specification. |
| 1273 | // 1) <sym> : A standard function name. |
| 1274 | // 2) __acle_se_<sym> : A special symbol that prefixes the standard function |
| 1275 | // name with __acle_se_. |
| 1276 | // Both these symbols are Thumb function symbols with external linkage. |
| 1277 | // <sym> may be redefined in .gnu.sgstubs. |
| 1278 | void elf::processArmCmseSymbols(Ctx &ctx) { |
| 1279 | if (!ctx.arg.cmseImplib) |
| 1280 | return; |
| 1281 | // Only symbols with external linkage end up in ctx.symtab, so no need to do |
| 1282 | // linkage checks. Only check symbol type. |
| 1283 | for (Symbol *acleSeSym : ctx.symtab->getSymbols()) { |
| 1284 | if (!acleSeSym->getName().starts_with(Prefix: ACLESESYM_PREFIX)) |
| 1285 | continue; |
| 1286 | // If input object build attributes do not support CMSE, error and disable |
| 1287 | // further scanning for <sym>, __acle_se_<sym> pairs. |
| 1288 | if (!ctx.arg.armCMSESupport) { |
| 1289 | Err(ctx) << "CMSE is only supported by ARMv8-M architecture or later" ; |
| 1290 | ctx.arg.cmseImplib = false; |
| 1291 | break; |
| 1292 | } |
| 1293 | |
| 1294 | // Try to find the associated symbol definition. |
| 1295 | // Symbol must have external linkage. |
| 1296 | StringRef name = acleSeSym->getName().substr(Start: std::strlen(s: ACLESESYM_PREFIX)); |
| 1297 | Symbol *sym = ctx.symtab->find(name); |
| 1298 | if (!sym) { |
| 1299 | Err(ctx) << acleSeSym->file << ": cmse special symbol '" |
| 1300 | << acleSeSym->getName() |
| 1301 | << "' detected, but no associated entry function definition '" |
| 1302 | << name << "' with external linkage found" ; |
| 1303 | continue; |
| 1304 | } |
| 1305 | |
| 1306 | std::string errMsg = checkCmseSymAttributes(ctx, acleSeSym, sym); |
| 1307 | if (!errMsg.empty()) { |
| 1308 | Err(ctx) << errMsg; |
| 1309 | continue; |
| 1310 | } |
| 1311 | |
| 1312 | // <sym> may be redefined later in the link in .gnu.sgstubs |
| 1313 | ctx.symtab->cmseSymMap[name] = {.acleSeSym: acleSeSym, .sym: sym}; |
| 1314 | } |
| 1315 | |
| 1316 | // If this is an Arm CMSE secure app, replace references to entry symbol <sym> |
| 1317 | // with its corresponding special symbol __acle_se_<sym>. |
| 1318 | parallelForEach(R&: ctx.objectFiles, Fn: [&](InputFile *file) { |
| 1319 | MutableArrayRef<Symbol *> syms = file->getMutableSymbols(); |
| 1320 | for (Symbol *&sym : syms) { |
| 1321 | StringRef symName = sym->getName(); |
| 1322 | auto it = ctx.symtab->cmseSymMap.find(Key: symName); |
| 1323 | if (it != ctx.symtab->cmseSymMap.end()) |
| 1324 | sym = it->second.acleSeSym; |
| 1325 | } |
| 1326 | }); |
| 1327 | } |
| 1328 | |
| 1329 | ArmCmseSGSection::ArmCmseSGSection(Ctx &ctx) |
| 1330 | : SyntheticSection(ctx, ".gnu.sgstubs" , SHT_PROGBITS, |
| 1331 | SHF_ALLOC | SHF_EXECINSTR, |
| 1332 | /*addralign=*/32) { |
| 1333 | entsize = ACLESESYM_SIZE; |
| 1334 | // The range of addresses used in the CMSE import library should be fixed. |
| 1335 | for (auto &[_, sym] : ctx.symtab->cmseImportLib) { |
| 1336 | if (impLibMaxAddr <= sym->value) |
| 1337 | impLibMaxAddr = sym->value + sym->size; |
| 1338 | } |
| 1339 | if (ctx.symtab->cmseSymMap.empty()) |
| 1340 | return; |
| 1341 | addMappingSymbol(); |
| 1342 | for (auto &[_, entryFunc] : ctx.symtab->cmseSymMap) |
| 1343 | addSGVeneer(sym: cast<Defined>(Val: entryFunc.acleSeSym), |
| 1344 | ext_sym: cast<Defined>(Val: entryFunc.sym)); |
| 1345 | for (auto &[_, sym] : ctx.symtab->cmseImportLib) { |
| 1346 | if (!ctx.symtab->inCMSEOutImpLib.count(Key: sym->getName())) |
| 1347 | Warn(ctx) |
| 1348 | << "entry function '" << sym->getName() |
| 1349 | << "' from CMSE import library is not present in secure application" ; |
| 1350 | } |
| 1351 | |
| 1352 | if (!ctx.symtab->cmseImportLib.empty() && ctx.arg.cmseOutputLib.empty()) { |
| 1353 | for (auto &[_, entryFunc] : ctx.symtab->cmseSymMap) { |
| 1354 | Symbol *sym = entryFunc.sym; |
| 1355 | if (!ctx.symtab->inCMSEOutImpLib.count(Key: sym->getName())) |
| 1356 | Warn(ctx) << "new entry function '" << sym->getName() |
| 1357 | << "' introduced but no output import library specified" ; |
| 1358 | } |
| 1359 | } |
| 1360 | } |
| 1361 | |
| 1362 | void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) { |
| 1363 | entries.emplace_back(Args&: acleSeSym, Args&: sym); |
| 1364 | if (ctx.symtab->cmseImportLib.count(Key: sym->getName())) |
| 1365 | ctx.symtab->inCMSEOutImpLib[sym->getName()] = true; |
| 1366 | // Symbol addresses different, nothing to do. |
| 1367 | if (acleSeSym->file != sym->file || |
| 1368 | cast<Defined>(Val&: *acleSeSym).value != cast<Defined>(Val&: *sym).value) |
| 1369 | return; |
| 1370 | // Only secure symbols with values equal to that of it's non-secure |
| 1371 | // counterpart needs to be in the .gnu.sgstubs section. |
| 1372 | std::unique_ptr<ArmCmseSGVeneer> ss; |
| 1373 | auto it = ctx.symtab->cmseImportLib.find(Key: sym->getName()); |
| 1374 | if (it != ctx.symtab->cmseImportLib.end()) { |
| 1375 | Defined *impSym = it->second; |
| 1376 | ss = std::make_unique<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym, args&: impSym->value); |
| 1377 | } else { |
| 1378 | ss = std::make_unique<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym); |
| 1379 | ++newEntries; |
| 1380 | } |
| 1381 | sgVeneers.emplace_back(Args: std::move(ss)); |
| 1382 | } |
| 1383 | |
| 1384 | void ArmCmseSGSection::writeTo(uint8_t *buf) { |
| 1385 | for (std::unique_ptr<ArmCmseSGVeneer> &s : sgVeneers) { |
| 1386 | uint8_t *p = buf + s->offset; |
| 1387 | write16(ctx, p: p + 0, v: 0xe97f); // SG |
| 1388 | write16(ctx, p: p + 2, v: 0xe97f); |
| 1389 | write16(ctx, p: p + 4, v: 0xf000); // B.W S |
| 1390 | write16(ctx, p: p + 6, v: 0xb000); |
| 1391 | ctx.target->relocateNoSym(loc: p + 4, type: R_ARM_THM_JUMP24, |
| 1392 | val: s->acleSeSym->getVA(ctx) - |
| 1393 | (getVA() + s->offset + s->size)); |
| 1394 | } |
| 1395 | } |
| 1396 | |
| 1397 | void ArmCmseSGSection::addMappingSymbol() { |
| 1398 | addSyntheticLocal(ctx, name: "$t" , type: STT_NOTYPE, /*off=*/value: 0, /*size=*/0, section&: *this); |
| 1399 | } |
| 1400 | |
| 1401 | size_t ArmCmseSGSection::getSize() const { |
| 1402 | if (sgVeneers.empty()) |
| 1403 | return (impLibMaxAddr ? impLibMaxAddr - getVA() : 0) + newEntries * entsize; |
| 1404 | |
| 1405 | return entries.size() * entsize; |
| 1406 | } |
| 1407 | |
| 1408 | void ArmCmseSGSection::finalizeContents() { |
| 1409 | if (sgVeneers.empty()) |
| 1410 | return; |
| 1411 | |
| 1412 | auto it = |
| 1413 | std::stable_partition(first: sgVeneers.begin(), last: sgVeneers.end(), |
| 1414 | pred: [](auto &i) { return i->getAddr().has_value(); }); |
| 1415 | std::sort(first: sgVeneers.begin(), last: it, comp: [](auto &a, auto &b) { |
| 1416 | return a->getAddr().value() < b->getAddr().value(); |
| 1417 | }); |
| 1418 | // This is the partition of the veneers with fixed addresses. |
| 1419 | uint64_t addr = (*sgVeneers.begin())->getAddr().has_value() |
| 1420 | ? (*sgVeneers.begin())->getAddr().value() |
| 1421 | : getVA(); |
| 1422 | // Check if the start address of '.gnu.sgstubs' correspond to the |
| 1423 | // linker-synthesized veneer with the lowest address. |
| 1424 | if ((getVA() & ~1) != (addr & ~1)) { |
| 1425 | Err(ctx) |
| 1426 | << "start address of '.gnu.sgstubs' is different from previous link" ; |
| 1427 | return; |
| 1428 | } |
| 1429 | |
| 1430 | for (auto [i, s] : enumerate(First&: sgVeneers)) { |
| 1431 | s->offset = i * s->size; |
| 1432 | Defined(ctx, file, StringRef(), s->sym->binding, s->sym->stOther, |
| 1433 | s->sym->type, s->offset | 1, s->size, this) |
| 1434 | .overwrite(sym&: *s->sym); |
| 1435 | } |
| 1436 | } |
| 1437 | |
| 1438 | // Write the CMSE import library to disk. |
| 1439 | // The CMSE import library is a relocatable object with only a symbol table. |
| 1440 | // The symbols are copies of the (absolute) symbols of the secure gateways |
| 1441 | // in the executable output by this link. |
| 1442 | // See ArmĀ® v8-M Security Extensions: Requirements on Development Tools |
| 1443 | // https://developer.arm.com/documentation/ecm0359818/latest |
| 1444 | template <typename ELFT> void elf::writeARMCmseImportLib(Ctx &ctx) { |
| 1445 | auto shstrtab = |
| 1446 | std::make_unique<StringTableSection>(args&: ctx, args: ".shstrtab" , /*dynamic=*/args: false); |
| 1447 | auto strtab = |
| 1448 | std::make_unique<StringTableSection>(args&: ctx, args: ".strtab" , /*dynamic=*/args: false); |
| 1449 | auto impSymTab = std::make_unique<SymbolTableSection<ELFT>>(ctx, *strtab); |
| 1450 | |
| 1451 | SmallVector<std::pair<std::unique_ptr<OutputSection>, SyntheticSection *>, 0> |
| 1452 | osIsPairs; |
| 1453 | osIsPairs.emplace_back( |
| 1454 | Args: std::make_unique<OutputSection>(args&: ctx, args&: strtab->name, args: 0, args: 0), Args: strtab.get()); |
| 1455 | osIsPairs.emplace_back( |
| 1456 | std::make_unique<OutputSection>(ctx, impSymTab->name, 0, 0), |
| 1457 | impSymTab.get()); |
| 1458 | osIsPairs.emplace_back( |
| 1459 | Args: std::make_unique<OutputSection>(args&: ctx, args&: shstrtab->name, args: 0, args: 0), |
| 1460 | Args: shstrtab.get()); |
| 1461 | |
| 1462 | llvm::sort(ctx.symtab->cmseSymMap, [&](const auto &a, const auto &b) { |
| 1463 | return a.second.sym->getVA(ctx) < b.second.sym->getVA(ctx); |
| 1464 | }); |
| 1465 | // Copy the secure gateway entry symbols to the import library symbol table. |
| 1466 | for (auto &p : ctx.symtab->cmseSymMap) { |
| 1467 | Defined *d = cast<Defined>(Val: p.second.sym); |
| 1468 | impSymTab->addSymbol(makeDefined( |
| 1469 | args&: ctx, args&: ctx.internalFile, args: d->getName(), args: d->computeBinding(ctx), |
| 1470 | /*stOther=*/args: 0, args: STT_FUNC, args: d->getVA(ctx), args: d->getSize(), args: nullptr)); |
| 1471 | } |
| 1472 | |
| 1473 | size_t idx = 0; |
| 1474 | uint64_t off = sizeof(typename ELFT::Ehdr); |
| 1475 | for (auto &[osec, isec] : osIsPairs) { |
| 1476 | osec->sectionIndex = ++idx; |
| 1477 | osec->recordSection(isec); |
| 1478 | osec->finalizeInputSections(); |
| 1479 | osec->shName = shstrtab->addString(s: osec->name); |
| 1480 | osec->size = isec->getSize(); |
| 1481 | isec->finalizeContents(); |
| 1482 | osec->offset = alignToPowerOf2(Value: off, Align: osec->addralign); |
| 1483 | off = osec->offset + osec->size; |
| 1484 | } |
| 1485 | |
| 1486 | const uint64_t = alignToPowerOf2(Value: off, Align: ctx.arg.wordsize); |
| 1487 | const auto shnum = osIsPairs.size() + 1; |
| 1488 | const uint64_t fileSize = |
| 1489 | sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr); |
| 1490 | const unsigned flags = |
| 1491 | ctx.arg.mmapOutputFile ? (unsigned)FileOutputBuffer::F_mmap : 0; |
| 1492 | unlinkAsync(path: ctx.arg.cmseOutputLib); |
| 1493 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
| 1494 | FileOutputBuffer::create(FilePath: ctx.arg.cmseOutputLib, Size: fileSize, Flags: flags); |
| 1495 | if (!bufferOrErr) { |
| 1496 | Err(ctx) << "failed to open " << ctx.arg.cmseOutputLib << ": " |
| 1497 | << bufferOrErr.takeError(); |
| 1498 | return; |
| 1499 | } |
| 1500 | |
| 1501 | // Write the ELF Header |
| 1502 | std::unique_ptr<FileOutputBuffer> &buffer = *bufferOrErr; |
| 1503 | uint8_t *const buf = buffer->getBufferStart(); |
| 1504 | memcpy(dest: buf, src: "\177ELF" , n: 4); |
| 1505 | auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf); |
| 1506 | eHdr->e_type = ET_REL; |
| 1507 | eHdr->e_entry = 0; |
| 1508 | eHdr->e_shoff = sectionHeaderOff; |
| 1509 | eHdr->e_ident[EI_CLASS] = ELFCLASS32; |
| 1510 | eHdr->e_ident[EI_DATA] = ctx.arg.isLE ? ELFDATA2LSB : ELFDATA2MSB; |
| 1511 | eHdr->e_ident[EI_VERSION] = EV_CURRENT; |
| 1512 | eHdr->e_ident[EI_OSABI] = ctx.arg.osabi; |
| 1513 | eHdr->e_ident[EI_ABIVERSION] = 0; |
| 1514 | eHdr->e_machine = EM_ARM; |
| 1515 | eHdr->e_version = EV_CURRENT; |
| 1516 | eHdr->e_flags = ctx.arg.eflags; |
| 1517 | eHdr->e_ehsize = sizeof(typename ELFT::Ehdr); |
| 1518 | eHdr->e_phnum = 0; |
| 1519 | eHdr->e_shentsize = sizeof(typename ELFT::Shdr); |
| 1520 | eHdr->e_phoff = 0; |
| 1521 | eHdr->e_phentsize = 0; |
| 1522 | eHdr->e_shnum = shnum; |
| 1523 | eHdr->e_shstrndx = shstrtab->getParent()->sectionIndex; |
| 1524 | |
| 1525 | // Write the section header table. |
| 1526 | auto *sHdrs = reinterpret_cast<typename ELFT::Shdr *>(buf + eHdr->e_shoff); |
| 1527 | for (auto &[osec, _] : osIsPairs) |
| 1528 | osec->template writeHeaderTo<ELFT>(++sHdrs); |
| 1529 | |
| 1530 | // Write section contents to a mmap'ed file. |
| 1531 | { |
| 1532 | parallel::TaskGroup tg; |
| 1533 | for (auto &[osec, _] : osIsPairs) |
| 1534 | osec->template writeTo<ELFT>(ctx, buf + osec->offset, tg); |
| 1535 | } |
| 1536 | |
| 1537 | if (auto e = buffer->commit()) |
| 1538 | Err(ctx) << "failed to write output '" << buffer->getPath() |
| 1539 | << "': " << std::move(e); |
| 1540 | } |
| 1541 | |
| 1542 | void elf::setARMTargetInfo(Ctx &ctx) { ctx.target.reset(p: new ARM(ctx)); } |
| 1543 | |
| 1544 | template void elf::writeARMCmseImportLib<ELF32LE>(Ctx &); |
| 1545 | template void elf::writeARMCmseImportLib<ELF32BE>(Ctx &); |
| 1546 | template void elf::writeARMCmseImportLib<ELF64LE>(Ctx &); |
| 1547 | template void elf::writeARMCmseImportLib<ELF64BE>(Ctx &); |
| 1548 | |
| 1549 | template void ObjFile<ELF32LE>::importCmseSymbols(); |
| 1550 | template void ObjFile<ELF32BE>::importCmseSymbols(); |
| 1551 | template void ObjFile<ELF64LE>::importCmseSymbols(); |
| 1552 | template void ObjFile<ELF64BE>::importCmseSymbols(); |
| 1553 | |