1//===- AArch64.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "OutputSections.h"
11#include "RelocScan.h"
12#include "Symbols.h"
13#include "SyntheticSections.h"
14#include "Target.h"
15#include "TargetImpl.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/Support/Endian.h"
18
19using namespace llvm;
20using namespace llvm::support::endian;
21using namespace llvm::ELF;
22using namespace lld;
23using namespace lld::elf;
24
25// Page(Expr) is the page address of the expression Expr, defined
26// as (Expr & ~0xFFF). (This applies even if the machine page size
27// supported by the platform has a different value.)
28uint64_t elf::getAArch64Page(uint64_t expr) {
29 return expr & ~static_cast<uint64_t>(0xFFF);
30}
31
32// A BTI landing pad is a valid target for an indirect branch when the Branch
33// Target Identification has been enabled. As linker generated branches are
34// via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
35// PACIBSP.
36bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
37 // PLT entries accessed indirectly have a BTI c.
38 if (s.isInPlt(ctx))
39 return true;
40 Defined *d = dyn_cast<Defined>(Val: &s);
41 if (!isa_and_nonnull<InputSection>(Val: d->section))
42 // All places that we cannot disassemble are responsible for making
43 // the target a BTI landing pad.
44 return true;
45 InputSection *isec = cast<InputSection>(Val: d->section);
46 uint64_t off = d->value + a;
47 // Likely user error, but protect ourselves against out of bounds
48 // access.
49 if (off >= isec->getSize())
50 return true;
51 const uint8_t *buf = isec->content().begin();
52 // Synthetic sections may have a size but empty data - Assume that they won't
53 // contain a landing pad
54 if (buf == nullptr && isa<SyntheticSection>(Val: isec))
55 return false;
56
57 const uint32_t instr = read32le(P: buf + off);
58 // All BTI instructions are HINT instructions which all have same encoding
59 // apart from bits [11:5]
60 if ((instr & 0xd503201f) == 0xd503201f &&
61 is_contained(Set: {/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
62 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
63 /*BTI JC*/ 0xd50324df},
64 Element: instr))
65 return true;
66 return false;
67}
68
69namespace {
70class AArch64 : public TargetInfo {
71public:
72 AArch64(Ctx &);
73 RelExpr getRelExpr(RelType type, const Symbol &s,
74 const uint8_t *loc) const override;
75 RelType getDynRel(RelType type) const override;
76 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
77 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
78 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
79 void writePltHeader(uint8_t *buf) const override;
80 void writePlt(uint8_t *buf, const Symbol &sym,
81 uint64_t pltEntryAddr) const override;
82 template <class ELFT, class RelTy>
83 void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
84 void scanSection(InputSectionBase &sec) override {
85 if (ctx.arg.ekind == ELF64BEKind)
86 elf::scanSection1<AArch64, ELF64BE>(target&: *this, sec);
87 else
88 elf::scanSection1<AArch64, ELF64LE>(target&: *this, sec);
89 }
90 bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
91 uint64_t branchAddr, const Symbol &s,
92 int64_t a) const override;
93 uint32_t getThunkSectionSpacing() const override;
94 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
95 bool usesOnlyLowPageBits(RelType type) const override;
96 void relocate(uint8_t *loc, const Relocation &rel,
97 uint64_t val) const override;
98 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
99 void applyBranchToBranchOpt() const override;
100
101private:
102 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
103 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
104 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
105};
106
107struct AArch64Relaxer {
108 Ctx &ctx;
109 bool safeToRelaxAdrpLdr = false;
110
111 AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs);
112 bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
113 uint64_t secAddr, uint8_t *buf) const;
114 bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
115 uint64_t secAddr, uint8_t *buf) const;
116};
117} // namespace
118
119// Return the bits [Start, End] from Val shifted Start bits.
120// For instance, getBits(0xF0, 4, 8) returns 0xF.
121static uint64_t getBits(uint64_t val, int start, int end) {
122 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
123 return (val >> start) & mask;
124}
125
126AArch64::AArch64(Ctx &ctx) : TargetInfo(ctx) {
127 copyRel = R_AARCH64_COPY;
128 relativeRel = R_AARCH64_RELATIVE;
129 iRelativeRel = R_AARCH64_IRELATIVE;
130 iRelSymbolicRel = R_AARCH64_FUNCINIT64;
131 gotRel = R_AARCH64_GLOB_DAT;
132 pltRel = R_AARCH64_JUMP_SLOT;
133 symbolicRel = R_AARCH64_ABS64;
134 tlsDescRel = R_AARCH64_TLSDESC;
135 tlsGotRel = R_AARCH64_TLS_TPREL64;
136 pltHeaderSize = 32;
137 pltEntrySize = 16;
138 ipltEntrySize = 16;
139 defaultMaxPageSize = 65536;
140
141 // Align to the 2 MiB page size (known as a superpage or huge page).
142 // FreeBSD automatically promotes 2 MiB-aligned allocations.
143 defaultImageBase = 0x200000;
144
145 needsThunks = true;
146}
147
148// Only needed to support relocations used by relocateNonAlloc and
149// preprocessRelocs.
150RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
151 const uint8_t *loc) const {
152 switch (type) {
153 case R_AARCH64_ABS32:
154 case R_AARCH64_ABS64:
155 return R_ABS;
156 case R_AARCH64_PREL32:
157 case R_AARCH64_PREL64:
158 return R_PC;
159 case R_AARCH64_TLS_DTPREL64:
160 return R_DTPREL;
161 case R_AARCH64_NONE:
162 return R_NONE;
163 default:
164 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
165 << ") against symbol " << &s;
166 return R_NONE;
167 }
168}
169
170bool AArch64::usesOnlyLowPageBits(RelType type) const {
171 switch (type) {
172 default:
173 return false;
174 case R_AARCH64_ADD_ABS_LO12_NC:
175 case R_AARCH64_LD64_GOT_LO12_NC:
176 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
177 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
178 case R_AARCH64_LDST128_ABS_LO12_NC:
179 case R_AARCH64_LDST16_ABS_LO12_NC:
180 case R_AARCH64_LDST32_ABS_LO12_NC:
181 case R_AARCH64_LDST64_ABS_LO12_NC:
182 case R_AARCH64_LDST8_ABS_LO12_NC:
183 case R_AARCH64_TLSDESC_ADD_LO12:
184 case R_AARCH64_TLSDESC_LD64_LO12:
185 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
186 return true;
187 }
188}
189
190template <class ELFT, class RelTy>
191void AArch64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
192 RelocScan rs(ctx, &sec);
193 sec.relocations.reserve(N: rels.size());
194
195 for (auto it = rels.begin(); it != rels.end(); ++it) {
196 const RelTy &rel = *it;
197 uint32_t symIdx = rel.getSymbol(false);
198 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
199 uint64_t offset = rel.r_offset;
200 RelType type = rel.getType(false);
201 if (sym.isUndefined() && symIdx != 0 &&
202 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
203 continue;
204 int64_t addend = rs.getAddend<ELFT>(rel, type);
205 RelExpr expr;
206 // Relocation types that only need a RelExpr set `expr` and break out of
207 // the switch to reach rs.process(). Types that need special handling
208 // (fast-path helpers, TLS) call a handler and use `continue`.
209 switch (type) {
210 case R_AARCH64_NONE:
211 continue;
212
213 // Absolute relocations:
214 case R_AARCH64_ABS16:
215 case R_AARCH64_ABS32:
216 case R_AARCH64_ABS64:
217 case R_AARCH64_FUNCINIT64:
218 case R_AARCH64_ADD_ABS_LO12_NC:
219 case R_AARCH64_LDST128_ABS_LO12_NC:
220 case R_AARCH64_LDST16_ABS_LO12_NC:
221 case R_AARCH64_LDST32_ABS_LO12_NC:
222 case R_AARCH64_LDST64_ABS_LO12_NC:
223 case R_AARCH64_LDST8_ABS_LO12_NC:
224 case R_AARCH64_MOVW_SABS_G0:
225 case R_AARCH64_MOVW_SABS_G1:
226 case R_AARCH64_MOVW_SABS_G2:
227 case R_AARCH64_MOVW_UABS_G0:
228 case R_AARCH64_MOVW_UABS_G0_NC:
229 case R_AARCH64_MOVW_UABS_G1:
230 case R_AARCH64_MOVW_UABS_G1_NC:
231 case R_AARCH64_MOVW_UABS_G2:
232 case R_AARCH64_MOVW_UABS_G2_NC:
233 case R_AARCH64_MOVW_UABS_G3:
234 expr = R_ABS;
235 break;
236
237 case R_AARCH64_AUTH_ABS64:
238 expr = RE_AARCH64_AUTH;
239 break;
240
241 case R_AARCH64_PATCHINST:
242 if (!isAbsolute(sym))
243 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
244 << "R_AARCH64_PATCHINST relocation against non-absolute "
245 "symbol "
246 << &sym;
247 expr = R_ABS;
248 break;
249
250 // PC-relative relocations:
251 case R_AARCH64_PREL16:
252 case R_AARCH64_PREL32:
253 case R_AARCH64_PREL64:
254 case R_AARCH64_ADR_PREL_LO21:
255 case R_AARCH64_LD_PREL_LO19:
256 case R_AARCH64_MOVW_PREL_G0:
257 case R_AARCH64_MOVW_PREL_G0_NC:
258 case R_AARCH64_MOVW_PREL_G1:
259 case R_AARCH64_MOVW_PREL_G1_NC:
260 case R_AARCH64_MOVW_PREL_G2:
261 case R_AARCH64_MOVW_PREL_G2_NC:
262 case R_AARCH64_MOVW_PREL_G3:
263 rs.processR_PC(type, offset, addend, sym);
264 continue;
265
266 // Page-PC relocations:
267 case R_AARCH64_ADR_PREL_PG_HI21:
268 case R_AARCH64_ADR_PREL_PG_HI21_NC:
269 expr = RE_AARCH64_PAGE_PC;
270 break;
271
272 // PLT-generating relocations:
273 case R_AARCH64_PLT32:
274 sym.thunkAccessed = true;
275 [[fallthrough]];
276 case R_AARCH64_CALL26:
277 case R_AARCH64_CONDBR19:
278 case R_AARCH64_JUMP26:
279 case R_AARCH64_TSTBR14:
280 rs.processR_PLT_PC(type, offset, addend, sym);
281 continue;
282
283 // GOT relocations:
284 case R_AARCH64_ADR_GOT_PAGE:
285 expr = RE_AARCH64_GOT_PAGE_PC;
286 break;
287 case R_AARCH64_LD64_GOT_LO12_NC:
288 expr = R_GOT;
289 break;
290 case R_AARCH64_LD64_GOTPAGE_LO15:
291 expr = RE_AARCH64_GOT_PAGE;
292 break;
293 case R_AARCH64_GOTPCREL32:
294 case R_AARCH64_GOT_LD_PREL19:
295 expr = R_GOT_PC;
296 break;
297
298 // AUTH GOT relocations. Set NEEDS_GOT_AUTH to detect incompatibility with
299 // NEEDS_GOT_NONAUTH. rs.process does not set the flag.
300 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
301 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
302 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
303 rs.processAux(expr: R_GOT, type, offset, sym, addend);
304 continue;
305 case R_AARCH64_AUTH_GOT_LD_PREL19:
306 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
307 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
308 rs.processAux(expr: R_GOT_PC, type, offset, sym, addend);
309 continue;
310 case R_AARCH64_AUTH_ADR_GOT_PAGE:
311 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
312 rs.processAux(expr: RE_AARCH64_GOT_PAGE_PC, type, offset, sym, addend);
313 continue;
314
315 // TLS LE relocations:
316 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
317 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
318 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
319 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
320 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
321 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
322 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
323 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
324 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
325 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
326 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
327 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
328 if (rs.checkTlsLe(offset, sym, type))
329 continue;
330 expr = R_TPREL;
331 break;
332
333 // TLS IE relocations:
334 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
335 rs.handleTlsIe(ieExpr: RE_AARCH64_GOT_PAGE_PC, type, offset, addend, sym);
336 continue;
337 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
338 rs.handleTlsIe(ieExpr: R_GOT, type, offset, addend, sym);
339 continue;
340
341 // TLSDESC relocations:
342 case R_AARCH64_TLSDESC_ADR_PAGE21:
343 rs.handleTlsDesc(sharedExpr: RE_AARCH64_TLSDESC_PAGE, ieExpr: RE_AARCH64_GOT_PAGE_PC, type,
344 offset, addend, sym);
345 continue;
346 case R_AARCH64_TLSDESC_LD64_LO12:
347 case R_AARCH64_TLSDESC_ADD_LO12:
348 rs.handleTlsDesc(sharedExpr: R_TLSDESC, ieExpr: R_GOT, type, offset, addend, sym);
349 continue;
350 case R_AARCH64_TLSDESC_CALL:
351 sym.setFlags(NEEDS_TLSDESC_NONAUTH);
352 if (!ctx.arg.shared)
353 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
354 continue;
355
356 // AUTH TLSDESC relocations. Do not optimize to LE/IE because PAUTHELF64
357 // only supports the descriptor based TLS (TLSDESC).
358 // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
359 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
360 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
361 sec.addReloc(r: {.expr: RE_AARCH64_TLSDESC_PAGE, .type: type, .offset: offset, .addend: addend, .sym: &sym});
362 continue;
363 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
364 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
365 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
366 sec.addReloc(r: {.expr: R_TLSDESC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
367 continue;
368
369 default:
370 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
371 << "unknown relocation (" << type.v << ") against symbol "
372 << &sym;
373 continue;
374 }
375 rs.process(expr, type, offset, sym, addend);
376 }
377
378 if (ctx.arg.branchToBranch)
379 llvm::stable_sort(sec.relocs(),
380 [](auto &l, auto &r) { return l.offset < r.offset; });
381}
382
383RelType AArch64::getDynRel(RelType type) const {
384 if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64 ||
385 type == R_AARCH64_FUNCINIT64)
386 return type;
387 return R_AARCH64_NONE;
388}
389
390int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
391 switch (type) {
392 case R_AARCH64_TLSDESC:
393 return read64(ctx, p: buf + 8);
394 case R_AARCH64_NONE:
395 case R_AARCH64_GLOB_DAT:
396 case R_AARCH64_AUTH_GLOB_DAT:
397 case R_AARCH64_JUMP_SLOT:
398 return 0;
399 case R_AARCH64_ABS16:
400 case R_AARCH64_PREL16:
401 return SignExtend64<16>(x: read16(ctx, p: buf));
402 case R_AARCH64_ABS32:
403 case R_AARCH64_PREL32:
404 return SignExtend64<32>(x: read32(ctx, p: buf));
405 case R_AARCH64_ABS64:
406 case R_AARCH64_PREL64:
407 case R_AARCH64_RELATIVE:
408 case R_AARCH64_IRELATIVE:
409 case R_AARCH64_TLS_TPREL64:
410 return read64(ctx, p: buf);
411
412 // The following relocation types all point at instructions, and
413 // relocate an immediate field in the instruction.
414 //
415 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
416 // says: "If the relocation relocates an instruction the immediate
417 // field of the instruction is extracted, scaled as required by
418 // the instruction field encoding, and sign-extended to 64 bits".
419
420 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
421 // instructions, which have a 16-bit immediate field with its low
422 // bit in bit 5 of the instruction encoding. When the immediate
423 // field is used as an implicit addend for REL-type relocations,
424 // it is treated as added to the low bits of the output value, not
425 // shifted depending on the relocation type.
426 //
427 // This allows REL relocations to express the requirement 'please
428 // add 12345 to this symbol value and give me the four 16-bit
429 // chunks of the result', by putting the same addend 12345 in all
430 // four instructions. Carries between the 16-bit chunks are
431 // handled correctly, because the whole 64-bit addition is done
432 // once per relocation.
433 case R_AARCH64_MOVW_UABS_G0:
434 case R_AARCH64_MOVW_UABS_G0_NC:
435 case R_AARCH64_MOVW_UABS_G1:
436 case R_AARCH64_MOVW_UABS_G1_NC:
437 case R_AARCH64_MOVW_UABS_G2:
438 case R_AARCH64_MOVW_UABS_G2_NC:
439 case R_AARCH64_MOVW_UABS_G3:
440 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 20));
441
442 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
443 // has a 14-bit offset measured in instructions, i.e. shifted left
444 // by 2.
445 case R_AARCH64_TSTBR14:
446 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 18) << 2);
447
448 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
449 // which has a 19-bit offset measured in instructions.
450 //
451 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
452 // instruction, which also has a 19-bit offset, measured in 4-byte
453 // chunks. So the calculation is the same as for
454 // R_AARCH64_CONDBR19.
455 case R_AARCH64_CONDBR19:
456 case R_AARCH64_LD_PREL_LO19:
457 return SignExtend64<21>(x: getBits(val: read32le(P: buf), start: 5, end: 23) << 2);
458
459 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
460 // immediate can optionally be shifted left by 12 bits, but this
461 // relocation is intended for the case where it is not.
462 case R_AARCH64_ADD_ABS_LO12_NC:
463 return SignExtend64<12>(x: getBits(val: read32le(P: buf), start: 10, end: 21));
464
465 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
466 // 21-bit immediate is split between two bits high up in the word
467 // (in fact the two _lowest_ order bits of the value) and 19 bits
468 // lower down.
469 //
470 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
471 // which encodes the immediate in the same way, but will shift it
472 // left by 12 bits when the instruction executes. For the same
473 // reason as the MOVW family, we don't apply that left shift here.
474 case R_AARCH64_ADR_PREL_LO21:
475 case R_AARCH64_ADR_PREL_PG_HI21:
476 case R_AARCH64_ADR_PREL_PG_HI21_NC:
477 return SignExtend64<21>(x: (getBits(val: read32le(P: buf), start: 5, end: 23) << 2) |
478 getBits(val: read32le(P: buf), start: 29, end: 30));
479
480 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
481 // 26-bit offset measured in instructions.
482 case R_AARCH64_JUMP26:
483 case R_AARCH64_CALL26:
484 return SignExtend64<28>(x: getBits(val: read32le(P: buf), start: 0, end: 25) << 2);
485
486 default:
487 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
488 return 0;
489 }
490}
491
492void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
493 write64(ctx, p: buf, v: ctx.in.plt->getVA());
494}
495
496void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
497 if (ctx.arg.writeAddends)
498 write64(ctx, p: buf, v: s.getVA(ctx));
499}
500
501void AArch64::writePltHeader(uint8_t *buf) const {
502 const uint8_t pltData[] = {
503 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
504 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
505 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
506 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
507 0x20, 0x02, 0x1f, 0xd6, // br x17
508 0x1f, 0x20, 0x03, 0xd5, // nop
509 0x1f, 0x20, 0x03, 0xd5, // nop
510 0x1f, 0x20, 0x03, 0xd5 // nop
511 };
512 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
513
514 uint64_t got = ctx.in.gotPlt->getVA();
515 uint64_t plt = ctx.in.plt->getVA();
516 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
517 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
518 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
519 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
520}
521
522void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
523 uint64_t pltEntryAddr) const {
524 const uint8_t inst[] = {
525 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
526 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
527 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
528 0x20, 0x02, 0x1f, 0xd6 // br x17
529 };
530 memcpy(dest: buf, src: inst, n: sizeof(inst));
531
532 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
533 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
534 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
535 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
536 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
537}
538
539bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
540 uint64_t branchAddr, const Symbol &s,
541 int64_t a) const {
542 // If s is an undefined weak symbol and does not have a PLT entry then it will
543 // be resolved as a branch to the next instruction. If it is hidden, its
544 // binding has been converted to local, so we just check isUndefined() here. A
545 // undefined non-weak symbol will have been errored.
546 if (s.isUndefined() && !s.isInPlt(ctx))
547 return false;
548 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
549 // only permits range extension thunks for R_AARCH64_CALL26 and
550 // R_AARCH64_JUMP26 relocation types.
551 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
552 type != R_AARCH64_PLT32)
553 return false;
554 uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, addend: a);
555 return !inBranchRange(type, src: branchAddr, dst);
556}
557
558uint32_t AArch64::getThunkSectionSpacing() const {
559 // See comment in Arch/ARM.cpp for a more detailed explanation of
560 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
561 // Thunk have a range of +/- 128 MiB
562 return (128 * 1024 * 1024) - 0x30000;
563}
564
565bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
566 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
567 type != R_AARCH64_PLT32)
568 return true;
569 // The AArch64 call and unconditional branch instructions have a range of
570 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
571 uint64_t range =
572 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
573 if (dst > src) {
574 // Immediate of branch is signed.
575 range -= 4;
576 return dst - src <= range;
577 }
578 return src - dst <= range;
579}
580
581static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
582 uint32_t immLo = (imm & 0x3) << 29;
583 uint32_t immHi = (imm & 0x1FFFFC) << 3;
584 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
585 write32le(P: l, V: (read32le(P: l) & ~mask) | immLo | immHi);
586}
587
588static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
589 write32le(P: p, V: (read32le(P: p) & ~mask) | v);
590}
591
592// Update the immediate field in a AARCH64 ldr, str, and add instruction.
593static void write32Imm12(uint8_t *l, uint64_t imm) {
594 writeMaskedBits32le(p: l, v: (imm & 0xFFF) << 10, mask: 0xFFF << 10);
595}
596
597// Update the immediate field in an AArch64 movk, movn or movz instruction
598// for a signed relocation, and update the opcode of a movn or movz instruction
599// to match the sign of the operand.
600static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
601 uint32_t inst = read32le(P: loc);
602 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
603 if (!(inst & (1 << 29))) {
604 // movn or movz.
605 if (imm & 0x10000) {
606 // Change opcode to movn, which takes an inverted operand.
607 imm ^= 0xFFFF;
608 inst &= ~(1 << 30);
609 } else {
610 // Change opcode to movz.
611 inst |= 1 << 30;
612 }
613 }
614 write32le(P: loc, V: inst | ((imm & 0xFFFF) << 5));
615}
616
617void AArch64::relocate(uint8_t *loc, const Relocation &rel,
618 uint64_t val) const {
619 switch (rel.type) {
620 case R_AARCH64_ABS16:
621 case R_AARCH64_PREL16:
622 checkIntUInt(ctx, loc, v: val, n: 16, rel);
623 write16(ctx, p: loc, v: val);
624 break;
625 case R_AARCH64_ABS32:
626 case R_AARCH64_PREL32:
627 checkIntUInt(ctx, loc, v: val, n: 32, rel);
628 write32(ctx, p: loc, v: val);
629 break;
630 case R_AARCH64_PATCHINST:
631 if (!rel.sym->isUndefined()) {
632 checkUInt(ctx, loc, v: val, n: 32, rel);
633 write32le(P: loc, V: val);
634 }
635 break;
636 case R_AARCH64_PLT32:
637 case R_AARCH64_GOTPCREL32:
638 checkInt(ctx, loc, v: val, n: 32, rel);
639 write32(ctx, p: loc, v: val);
640 break;
641 case R_AARCH64_ABS64:
642 write64(ctx, p: loc, v: val);
643 break;
644 case R_AARCH64_PREL64:
645 write64(ctx, p: loc, v: val);
646 break;
647 case R_AARCH64_AUTH_ABS64:
648 // This is used for the addend of a .relr.auth.dyn entry,
649 // which is a 32-bit value; the upper 32 bits are used to
650 // encode the schema.
651 checkInt(ctx, loc, v: val, n: 32, rel);
652 write32(ctx, p: loc, v: val);
653 break;
654 case R_AARCH64_TLS_DTPREL64:
655 write64(ctx, p: loc, v: val);
656 break;
657 case R_AARCH64_ADD_ABS_LO12_NC:
658 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
659 write32Imm12(l: loc, imm: val);
660 break;
661 case R_AARCH64_ADR_GOT_PAGE:
662 case R_AARCH64_AUTH_ADR_GOT_PAGE:
663 case R_AARCH64_ADR_PREL_PG_HI21:
664 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
665 case R_AARCH64_TLSDESC_ADR_PAGE21:
666 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
667 checkInt(ctx, loc, v: val, n: 33, rel);
668 [[fallthrough]];
669 case R_AARCH64_ADR_PREL_PG_HI21_NC:
670 write32AArch64Addr(l: loc, imm: val >> 12);
671 break;
672 case R_AARCH64_ADR_PREL_LO21:
673 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
674 checkInt(ctx, loc, v: val, n: 21, rel);
675 write32AArch64Addr(l: loc, imm: val);
676 break;
677 case R_AARCH64_JUMP26:
678 // Normally we would just write the bits of the immediate field, however
679 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
680 // we want to replace a non-branch instruction with a branch immediate
681 // instruction. By writing all the bits of the instruction including the
682 // opcode and the immediate (0 001 | 01 imm26) we can do this
683 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
684 // the instruction we want to patch.
685 write32le(P: loc, V: 0x14000000);
686 [[fallthrough]];
687 case R_AARCH64_CALL26:
688 checkInt(ctx, loc, v: val, n: 28, rel);
689 writeMaskedBits32le(p: loc, v: (val & 0x0FFFFFFC) >> 2, mask: 0x0FFFFFFC >> 2);
690 break;
691 case R_AARCH64_CONDBR19:
692 case R_AARCH64_LD_PREL_LO19:
693 case R_AARCH64_GOT_LD_PREL19:
694 case R_AARCH64_AUTH_GOT_LD_PREL19:
695 checkAlignment(ctx, loc, v: val, n: 4, rel);
696 checkInt(ctx, loc, v: val, n: 21, rel);
697 writeMaskedBits32le(p: loc, v: (val & 0x1FFFFC) << 3, mask: 0x1FFFFC << 3);
698 break;
699 case R_AARCH64_LDST8_ABS_LO12_NC:
700 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
701 write32Imm12(l: loc, imm: getBits(val, start: 0, end: 11));
702 break;
703 case R_AARCH64_LDST16_ABS_LO12_NC:
704 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
705 checkAlignment(ctx, loc, v: val, n: 2, rel);
706 write32Imm12(l: loc, imm: getBits(val, start: 1, end: 11));
707 break;
708 case R_AARCH64_LDST32_ABS_LO12_NC:
709 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
710 checkAlignment(ctx, loc, v: val, n: 4, rel);
711 write32Imm12(l: loc, imm: getBits(val, start: 2, end: 11));
712 break;
713 case R_AARCH64_LDST64_ABS_LO12_NC:
714 case R_AARCH64_LD64_GOT_LO12_NC:
715 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
716 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
717 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
718 case R_AARCH64_TLSDESC_LD64_LO12:
719 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
720 checkAlignment(ctx, loc, v: val, n: 8, rel);
721 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 11));
722 break;
723 case R_AARCH64_LDST128_ABS_LO12_NC:
724 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
725 checkAlignment(ctx, loc, v: val, n: 16, rel);
726 write32Imm12(l: loc, imm: getBits(val, start: 4, end: 11));
727 break;
728 case R_AARCH64_LD64_GOTPAGE_LO15:
729 checkAlignment(ctx, loc, v: val, n: 8, rel);
730 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 14));
731 break;
732 case R_AARCH64_MOVW_UABS_G0:
733 checkUInt(ctx, loc, v: val, n: 16, rel);
734 [[fallthrough]];
735 case R_AARCH64_MOVW_UABS_G0_NC:
736 writeMaskedBits32le(p: loc, v: (val & 0xFFFF) << 5, mask: 0xFFFF << 5);
737 break;
738 case R_AARCH64_MOVW_UABS_G1:
739 checkUInt(ctx, loc, v: val, n: 32, rel);
740 [[fallthrough]];
741 case R_AARCH64_MOVW_UABS_G1_NC:
742 writeMaskedBits32le(p: loc, v: (val & 0xFFFF0000) >> 11, mask: 0xFFFF0000 >> 11);
743 break;
744 case R_AARCH64_MOVW_UABS_G2:
745 checkUInt(ctx, loc, v: val, n: 48, rel);
746 [[fallthrough]];
747 case R_AARCH64_MOVW_UABS_G2_NC:
748 writeMaskedBits32le(p: loc, v: (val & 0xFFFF00000000) >> 27,
749 mask: 0xFFFF00000000 >> 27);
750 break;
751 case R_AARCH64_MOVW_UABS_G3:
752 writeMaskedBits32le(p: loc, v: (val & 0xFFFF000000000000) >> 43,
753 mask: 0xFFFF000000000000 >> 43);
754 break;
755 case R_AARCH64_MOVW_PREL_G0:
756 case R_AARCH64_MOVW_SABS_G0:
757 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
758 checkInt(ctx, loc, v: val, n: 17, rel);
759 [[fallthrough]];
760 case R_AARCH64_MOVW_PREL_G0_NC:
761 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
762 writeSMovWImm(loc, imm: val);
763 break;
764 case R_AARCH64_MOVW_PREL_G1:
765 case R_AARCH64_MOVW_SABS_G1:
766 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
767 checkInt(ctx, loc, v: val, n: 33, rel);
768 [[fallthrough]];
769 case R_AARCH64_MOVW_PREL_G1_NC:
770 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
771 writeSMovWImm(loc, imm: val >> 16);
772 break;
773 case R_AARCH64_MOVW_PREL_G2:
774 case R_AARCH64_MOVW_SABS_G2:
775 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
776 checkInt(ctx, loc, v: val, n: 49, rel);
777 [[fallthrough]];
778 case R_AARCH64_MOVW_PREL_G2_NC:
779 writeSMovWImm(loc, imm: val >> 32);
780 break;
781 case R_AARCH64_MOVW_PREL_G3:
782 writeSMovWImm(loc, imm: val >> 48);
783 break;
784 case R_AARCH64_TSTBR14:
785 checkInt(ctx, loc, v: val, n: 16, rel);
786 writeMaskedBits32le(p: loc, v: (val & 0xFFFC) << 3, mask: 0xFFFC << 3);
787 break;
788 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
789 checkUInt(ctx, loc, v: val, n: 24, rel);
790 write32Imm12(l: loc, imm: val >> 12);
791 break;
792 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
793 case R_AARCH64_TLSDESC_ADD_LO12:
794 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
795 write32Imm12(l: loc, imm: val);
796 break;
797 case R_AARCH64_TLSDESC:
798 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
799 write64(ctx, p: loc + 8, v: val);
800 break;
801 default:
802 llvm_unreachable("unknown relocation");
803 }
804}
805
806void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
807 uint64_t val) const {
808 // TLSDESC Global-Dynamic relocation are in the form:
809 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
810 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
811 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
812 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
813 // blr x1
814 // And it can optimized to:
815 // movz x0, #0x0, lsl #16
816 // movk x0, #0x10
817 // nop
818 // nop
819 checkUInt(ctx, loc, v: val, n: 32, rel);
820
821 switch (rel.type) {
822 case R_AARCH64_TLSDESC_ADD_LO12:
823 case R_AARCH64_TLSDESC_CALL:
824 write32le(P: loc, V: 0xd503201f); // nop
825 return;
826 case R_AARCH64_TLSDESC_ADR_PAGE21:
827 write32le(P: loc, V: 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
828 return;
829 case R_AARCH64_TLSDESC_LD64_LO12:
830 write32le(P: loc, V: 0xf2800000 | ((val & 0xffff) << 5)); // movk
831 return;
832 default:
833 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
834 }
835}
836
837void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
838 uint64_t val) const {
839 // TLSDESC Global-Dynamic relocation are in the form:
840 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
841 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
842 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
843 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
844 // blr x1
845 // And it can optimized to:
846 // adrp x0, :gottprel:v
847 // ldr x0, [x0, :gottprel_lo12:v]
848 // nop
849 // nop
850
851 switch (rel.type) {
852 case R_AARCH64_TLSDESC_ADD_LO12:
853 case R_AARCH64_TLSDESC_CALL:
854 write32le(P: loc, V: 0xd503201f); // nop
855 break;
856 case R_AARCH64_TLSDESC_ADR_PAGE21:
857 write32le(P: loc, V: 0x90000000); // adrp
858 relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
859 break;
860 case R_AARCH64_TLSDESC_LD64_LO12:
861 write32le(P: loc, V: 0xf9400000); // ldr
862 relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
863 break;
864 default:
865 llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
866 }
867}
868
869void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
870 uint64_t val) const {
871 checkUInt(ctx, loc, v: val, n: 32, rel);
872
873 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
874 // Generate MOVZ.
875 uint32_t regNo = read32le(P: loc) & 0x1f;
876 write32le(P: loc, V: (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
877 return;
878 }
879 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
880 // Generate MOVK.
881 uint32_t regNo = read32le(P: loc) & 0x1f;
882 write32le(P: loc, V: (0xf2800000 | regNo) | ((val & 0xffff) << 5));
883 return;
884 }
885 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
886}
887
888AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs)
889 : ctx(ctx) {
890 if (!ctx.arg.relax)
891 return;
892 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
893 // always appear in pairs.
894 size_t i = 0;
895 const size_t size = relocs.size();
896 for (; i != size; ++i) {
897 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
898 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
899 ++i;
900 continue;
901 }
902 break;
903 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
904 break;
905 }
906 }
907 safeToRelaxAdrpLdr = i == size;
908}
909
910bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
911 const Relocation &addRel, uint64_t secAddr,
912 uint8_t *buf) const {
913 // When the address of sym is within the range of ADR then
914 // we may relax
915 // ADRP xn, sym
916 // ADD xn, xn, :lo12: sym
917 // to
918 // NOP
919 // ADR xn, sym
920 if (!ctx.arg.relax || addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
921 return false;
922 // Check if the relocations apply to consecutive instructions.
923 if (adrpRel.offset + 4 != addRel.offset)
924 return false;
925 if (adrpRel.sym != addRel.sym)
926 return false;
927 if (adrpRel.addend != 0 || addRel.addend != 0)
928 return false;
929
930 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
931 uint32_t addInstr = read32le(P: buf + addRel.offset);
932 // Check if the first instruction is ADRP and the second instruction is ADD.
933 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
934 (addInstr & 0xffc00000) != 0x91000000)
935 return false;
936 uint32_t adrpDestReg = adrpInstr & 0x1f;
937 uint32_t addDestReg = addInstr & 0x1f;
938 uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
939 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
940 return false;
941
942 Symbol &sym = *adrpRel.sym;
943 // Check if the address difference is within 1MiB range.
944 int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset);
945 if (val < -1024 * 1024 || val >= 1024 * 1024)
946 return false;
947
948 Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
949 /*addend=*/0, .sym: &sym};
950 // nop
951 write32le(P: buf + adrpRel.offset, V: 0xd503201f);
952 // adr x_<dest_reg>
953 write32le(P: buf + adrRel.offset, V: 0x10000000 | adrpDestReg);
954 ctx.target->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
955 return true;
956}
957
958bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
959 const Relocation &ldrRel, uint64_t secAddr,
960 uint8_t *buf) const {
961 if (!safeToRelaxAdrpLdr)
962 return false;
963
964 // When the definition of sym is not preemptible then we may
965 // be able to relax
966 // ADRP xn, :got: sym
967 // LDR xn, [ xn :got_lo12: sym]
968 // to
969 // ADRP xn, sym
970 // ADD xn, xn, :lo_12: sym
971
972 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
973 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
974 return false;
975 // Check if the relocations apply to consecutive instructions.
976 if (adrpRel.offset + 4 != ldrRel.offset)
977 return false;
978 // Check if the relocations reference the same symbol and
979 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
980 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
981 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
982 return false;
983 // Check if the addends of the both relocations are zero.
984 if (adrpRel.addend != 0 || ldrRel.addend != 0)
985 return false;
986 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
987 uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
988 // Check if the first instruction is ADRP and the second instruction is LDR.
989 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
990 (ldrInstr & 0x3b000000) != 0x39000000)
991 return false;
992 // Check the value of the sf bit.
993 if (!(ldrInstr >> 31))
994 return false;
995 uint32_t adrpDestReg = adrpInstr & 0x1f;
996 uint32_t ldrDestReg = ldrInstr & 0x1f;
997 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
998 // Check if ADPR and LDR use the same register.
999 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
1000 return false;
1001
1002 Symbol &sym = *adrpRel.sym;
1003 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
1004 // position-independent code because these instructions produce a relative
1005 // address.
1006 if (ctx.arg.isPic && !cast<Defined>(Val&: sym).section)
1007 return false;
1008 // Check if the address difference is within 4GB range.
1009 int64_t val =
1010 getAArch64Page(expr: sym.getVA(ctx)) - getAArch64Page(expr: secAddr + adrpRel.offset);
1011 if (val != llvm::SignExtend64(X: val, B: 33))
1012 return false;
1013
1014 Relocation adrpSymRel = {.expr: RE_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
1015 .offset: adrpRel.offset, /*addend=*/0, .sym: &sym};
1016 Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
1017 /*addend=*/0, .sym: &sym};
1018
1019 // adrp x_<dest_reg>
1020 write32le(P: buf + adrpSymRel.offset, V: 0x90000000 | adrpDestReg);
1021 // add x_<dest reg>, x_<dest reg>
1022 write32le(P: buf + addRel.offset, V: 0x91000000 | adrpDestReg | (adrpDestReg << 5));
1023
1024 ctx.target->relocate(
1025 loc: buf + adrpSymRel.offset, rel: adrpSymRel,
1026 val: SignExtend64(X: getAArch64Page(expr: sym.getVA(ctx)) -
1027 getAArch64Page(expr: secAddr + adrpSymRel.offset),
1028 B: 64));
1029 ctx.target->relocate(loc: buf + addRel.offset, rel: addRel,
1030 val: SignExtend64(X: sym.getVA(ctx), B: 64));
1031 tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
1032 return true;
1033}
1034
1035// Tagged symbols have upper address bits that are added by the dynamic loader,
1036// and thus need the full 64-bit GOT entry. Do not relax such symbols.
1037static bool needsGotForMemtag(const Relocation &rel) {
1038 return rel.sym->isTagged() && needsGot(expr: rel.expr);
1039}
1040
1041void AArch64::relocateAlloc(InputSection &sec, uint8_t *buf) const {
1042 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
1043 const ArrayRef<Relocation> relocs = sec.relocs();
1044 AArch64Relaxer relaxer(ctx, relocs);
1045 for (size_t i = 0, size = relocs.size(); i != size; ++i) {
1046 const Relocation &rel = relocs[i];
1047 if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
1048 continue;
1049 uint8_t *loc = buf + rel.offset;
1050 const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
1051
1052 if (needsGotForMemtag(rel)) {
1053 relocate(loc, rel, val);
1054 continue;
1055 }
1056
1057 switch (rel.type) {
1058 case R_AARCH64_ADR_GOT_PAGE:
1059 if (i + 1 < size &&
1060 relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: relocs[i + 1], secAddr, buf)) {
1061 ++i;
1062 continue;
1063 }
1064 break;
1065 case R_AARCH64_ADR_PREL_PG_HI21:
1066 if (i + 1 < size &&
1067 relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: relocs[i + 1], secAddr, buf)) {
1068 ++i;
1069 continue;
1070 }
1071 break;
1072
1073 case R_AARCH64_TLSDESC_ADR_PAGE21:
1074 case R_AARCH64_TLSDESC_LD64_LO12:
1075 case R_AARCH64_TLSDESC_ADD_LO12:
1076 case R_AARCH64_TLSDESC_CALL:
1077 if (rel.expr == R_TPREL)
1078 relaxTlsGdToLe(loc, rel, val);
1079 else if (rel.expr == RE_AARCH64_GOT_PAGE_PC || rel.expr == R_GOT)
1080 relaxTlsGdToIe(loc, rel, val);
1081 else
1082 relocate(loc, rel, val);
1083 continue;
1084 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
1085 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
1086 if (rel.expr == R_TPREL)
1087 relaxTlsIeToLe(loc, rel, val);
1088 else
1089 relocate(loc, rel, val);
1090 continue;
1091 default:
1092 break;
1093 }
1094
1095 relocate(loc, rel, val);
1096 }
1097}
1098
1099static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
1100 Relocation &r) {
1101 // Identify a control transfer relocation for the branch-to-branch
1102 // optimization. A "control transfer relocation" means a B or BL
1103 // target but it also includes relative vtable relocations for example.
1104 //
1105 // We require the relocation type to be JUMP26, CALL26 or PLT32. With a
1106 // relocation type of PLT32 the value may be assumed to be used for branching
1107 // directly to the symbol and the addend is only used to produce the relocated
1108 // value (hence the effective addend is always 0). This is because if a PLT is
1109 // needed the addend will be added to the address of the PLT, and it doesn't
1110 // make sense to branch into the middle of a PLT. For example, relative vtable
1111 // relocations use PLT32 and 0 or a positive value as the addend but still are
1112 // used to branch to the symbol.
1113 //
1114 // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
1115 // addend is that we are branching to symbol+addend so that becomes the
1116 // effective addend.
1117 if (r.type == R_AARCH64_PLT32)
1118 return 0;
1119 if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
1120 return r.addend;
1121 return std::nullopt;
1122}
1123
1124static std::pair<Relocation *, uint64_t>
1125getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1126 auto *i = llvm::partition_point(
1127 Range&: is.relocations, P: [&](Relocation &r) { return r.offset < offset; });
1128 if (i != is.relocations.end() && i->offset == offset &&
1129 i->type == R_AARCH64_JUMP26) {
1130 return {i, i->addend};
1131 }
1132 return {nullptr, 0};
1133}
1134
1135static void redirectControlTransferRelocations(Relocation &r1,
1136 const Relocation &r2) {
1137 r1.expr = r2.expr;
1138 r1.sym = r2.sym;
1139 // With PLT32 we must respect the original addend as that affects the value's
1140 // interpretation. With the other relocation types the original addend is
1141 // irrelevant because it referred to an offset within the original target
1142 // section so we overwrite it.
1143 if (r1.type == R_AARCH64_PLT32)
1144 r1.addend += r2.addend;
1145 else
1146 r1.addend = r2.addend;
1147}
1148
1149void AArch64::applyBranchToBranchOpt() const {
1150 applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1151 getBranchInfoAtTarget,
1152 redirectControlTransferRelocations);
1153}
1154
1155// AArch64 may use security features in variant PLT sequences. These are:
1156// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
1157// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
1158// in the variant Plt sequences are encoded in the Hint space so they can be
1159// deployed on older architectures, which treat the instructions as a nop.
1160// PAC and BTI can be combined leading to the following combinations:
1161// writePltHeader
1162// writePltHeaderBti (no PAC Header needed)
1163// writePlt
1164// writePltBti (BTI only)
1165// writePltPac (PAC only)
1166// writePltBtiPac (BTI and PAC)
1167//
1168// When PAC is enabled the dynamic loader encrypts the address that it places
1169// in the .got.plt using the pacia1716 instruction which encrypts the value in
1170// x17 using the modifier in x16. The static linker places autia1716 before the
1171// indirect branch to x17 to authenticate the address in x17 with the modifier
1172// in x16. This makes it more difficult for an attacker to modify the value in
1173// the .got.plt.
1174//
1175// When BTI is enabled all indirect branches must land on a bti instruction.
1176// The static linker must place a bti instruction at the start of any PLT entry
1177// that may be the target of an indirect branch. As the PLT entries call the
1178// lazy resolver indirectly this must have a bti instruction at start. In
1179// general a bti instruction is not needed for a PLT entry as indirect calls
1180// are resolved to the function address and not the PLT entry for the function.
1181// There are a small number of cases where the PLT address can escape, such as
1182// taking the address of a function or ifunc via a non got-generating
1183// relocation, and a shared library refers to that symbol.
1184//
1185// We use the bti c variant of the instruction which permits indirect branches
1186// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1187// guarantees that all indirect branches from code requiring BTI protection
1188// will go via x16/x17
1189
1190namespace {
1191class AArch64BtiPac final : public AArch64 {
1192public:
1193 AArch64BtiPac(Ctx &);
1194 void writePltHeader(uint8_t *buf) const override;
1195 void writePlt(uint8_t *buf, const Symbol &sym,
1196 uint64_t pltEntryAddr) const override;
1197
1198private:
1199 bool btiHeader; // bti instruction needed in PLT Header and Entry
1200 enum {
1201 PEK_NoAuth,
1202 PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry
1203 PEK_Auth, // use braa instr for authenticated branch in PLT entry
1204 } pacEntryKind;
1205};
1206} // namespace
1207
1208AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) {
1209 btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
1210 // A BTI (Branch Target Indicator) Plt Entry is only required if the
1211 // address of the PLT entry can be taken by the program, which permits an
1212 // indirect jump to the PLT entry. This can happen when the address
1213 // of the PLT entry for a function is canonicalised due to the address of
1214 // the function in an executable being taken by a shared library, or
1215 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1216 // relocations.
1217 // The PAC PLT entries require dynamic loader support and this isn't known
1218 // from properties in the objects, so we use the command line flag.
1219 // By default we only use hint-space instructions, but if we detect the
1220 // PAuthABI, which requires v8.3-A, we can use the non-hint space
1221 // instructions.
1222
1223 if (ctx.arg.zPacPlt) {
1224 if (ctx.aarch64PauthAbiCoreInfo && ctx.aarch64PauthAbiCoreInfo->isValid())
1225 pacEntryKind = PEK_Auth;
1226 else
1227 pacEntryKind = PEK_AuthHint;
1228 } else {
1229 pacEntryKind = PEK_NoAuth;
1230 }
1231
1232 if (btiHeader || (pacEntryKind != PEK_NoAuth)) {
1233 pltEntrySize = 24;
1234 ipltEntrySize = 24;
1235 }
1236}
1237
1238void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
1239 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1240 const uint8_t pltData[] = {
1241 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
1242 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
1243 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1244 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
1245 0x20, 0x02, 0x1f, 0xd6, // br x17
1246 0x1f, 0x20, 0x03, 0xd5, // nop
1247 0x1f, 0x20, 0x03, 0xd5 // nop
1248 };
1249 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1250
1251 uint64_t got = ctx.in.gotPlt->getVA();
1252 uint64_t plt = ctx.in.plt->getVA();
1253
1254 if (btiHeader) {
1255 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1256 // instruction.
1257 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1258 buf += sizeof(btiData);
1259 plt += sizeof(btiData);
1260 }
1261 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
1262
1263 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
1264 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
1265 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
1266 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
1267 if (!btiHeader)
1268 // We didn't add the BTI c instruction so round out size with NOP.
1269 memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
1270}
1271
1272void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
1273 uint64_t pltEntryAddr) const {
1274 // The PLT entry is of the form:
1275 // [btiData] addrInst (pacBr | stdBr) [nopData]
1276 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1277 const uint8_t addrInst[] = {
1278 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1279 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1280 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1281 };
1282 const uint8_t pacHintBr[] = {
1283 0x9f, 0x21, 0x03, 0xd5, // autia1716
1284 0x20, 0x02, 0x1f, 0xd6 // br x17
1285 };
1286 const uint8_t pacBr[] = {
1287 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16
1288 0x1f, 0x20, 0x03, 0xd5 // nop
1289 };
1290 const uint8_t stdBr[] = {
1291 0x20, 0x02, 0x1f, 0xd6, // br x17
1292 0x1f, 0x20, 0x03, 0xd5 // nop
1293 };
1294 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1295
1296 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1297 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1298 // address may escape if referenced by a direct relocation. If relative
1299 // vtables are used then if the vtable is in a shared object the offsets will
1300 // be to the PLT entry. The condition is conservative.
1301 bool hasBti = btiHeader &&
1302 (sym.hasFlag(bit: NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed);
1303 if (hasBti) {
1304 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1305 buf += sizeof(btiData);
1306 pltEntryAddr += sizeof(btiData);
1307 }
1308
1309 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
1310 memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
1311 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
1312 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
1313 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
1314 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
1315
1316 if (pacEntryKind != PEK_NoAuth)
1317 memcpy(dest: buf + sizeof(addrInst),
1318 src: pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr,
1319 n: sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr));
1320 else
1321 memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
1322 if (!hasBti)
1323 // We didn't add the BTI c instruction so round out size with NOP.
1324 memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1325}
1326
1327template <class ELFT>
1328static void
1329addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec,
1330 DenseMap<Symbol *, unsigned> &referenceCount) {
1331 assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1332
1333 const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1334 if (rels.areRelocsRel())
1335 ErrAlways(ctx)
1336 << "non-RELA relocations are not allowed with memtag globals";
1337
1338 for (const typename ELFT::Rela &rel : rels.relas) {
1339 Symbol &sym = sec.file->getRelocTargetSym(rel);
1340 // Linker-synthesized symbols such as __executable_start may be referenced
1341 // as tagged in input objfiles, and we don't want them to be tagged. A
1342 // cheap way to exclude them is the type check, but their type is
1343 // STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1344 // like functions or TLS symbols.
1345 if (sym.type != STT_OBJECT)
1346 continue;
1347 // STB_LOCAL symbols can't be referenced from outside the object file, and
1348 // thus don't need to be checked for references from other object files.
1349 if (sym.binding == STB_LOCAL) {
1350 sym.setIsTagged(true);
1351 continue;
1352 }
1353 ++referenceCount[&sym];
1354 }
1355 sec.markDead();
1356}
1357
1358// A tagged symbol must be denoted as being tagged by all references and the
1359// chosen definition. For simplicity, here, it must also be denoted as tagged
1360// for all definitions. Otherwise:
1361//
1362// 1. A tagged definition can be used by an untagged declaration, in which case
1363// the untagged access may be PC-relative, causing a tag mismatch at
1364// runtime.
1365// 2. An untagged definition can be used by a tagged declaration, where the
1366// compiler has taken advantage of the increased alignment of the tagged
1367// declaration, but the alignment at runtime is wrong, causing a fault.
1368//
1369// Ideally, this isn't a problem, as any TU that imports or exports tagged
1370// symbols should also be built with tagging. But, to handle these cases, we
1371// demote the symbol to be untagged.
1372void elf::createTaggedSymbols(Ctx &ctx) {
1373 assert(hasMemtag(ctx));
1374
1375 // First, collect all symbols that are marked as tagged, and count how many
1376 // times they're marked as tagged.
1377 DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount;
1378 for (InputFile *file : ctx.objectFiles) {
1379 if (file->kind() != InputFile::ObjKind)
1380 continue;
1381 for (InputSectionBase *section : file->getSections()) {
1382 if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC ||
1383 section == &InputSection::discarded)
1384 continue;
1385 invokeELFT(addTaggedSymbolReferences, ctx, *section,
1386 taggedSymbolReferenceCount);
1387 }
1388 }
1389
1390 // Now, go through all the symbols. If the number of declarations +
1391 // definitions to a symbol exceeds the amount of times they're marked as
1392 // tagged, it means we have an objfile that uses the untagged variant of the
1393 // symbol.
1394 for (InputFile *file : ctx.objectFiles) {
1395 if (file->kind() != InputFile::BinaryKind &&
1396 file->kind() != InputFile::ObjKind)
1397 continue;
1398
1399 for (Symbol *symbol : file->getSymbols()) {
1400 // See `addTaggedSymbolReferences` for more details.
1401 if (symbol->type != STT_OBJECT ||
1402 symbol->binding == STB_LOCAL)
1403 continue;
1404 auto it = taggedSymbolReferenceCount.find(Val: symbol);
1405 if (it == taggedSymbolReferenceCount.end()) continue;
1406 unsigned &remainingAllowedTaggedRefs = it->second;
1407 if (remainingAllowedTaggedRefs == 0) {
1408 taggedSymbolReferenceCount.erase(I: it);
1409 continue;
1410 }
1411 --remainingAllowedTaggedRefs;
1412 }
1413 }
1414
1415 // `addTaggedSymbolReferences` has already checked that we have RELA
1416 // relocations, the only other way to get written addends is with
1417 // --apply-dynamic-relocs.
1418 if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends)
1419 ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals";
1420
1421 // Now, `taggedSymbolReferenceCount` should only contain symbols that are
1422 // defined as tagged exactly the same amount as it's referenced, meaning all
1423 // uses are tagged.
1424 for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1425 assert(remainingTaggedRefs == 0 &&
1426 "Symbol is defined as tagged more times than it's used");
1427 symbol->setIsTagged(true);
1428 }
1429}
1430
1431void elf::setAArch64TargetInfo(Ctx &ctx) {
1432 if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
1433 ctx.arg.zPacPlt)
1434 ctx.target.reset(p: new AArch64BtiPac(ctx));
1435 else
1436 ctx.target.reset(p: new AArch64(ctx));
1437}
1438