1//===- AArch64.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "OutputSections.h"
11#include "RelocScan.h"
12#include "Symbols.h"
13#include "SyntheticSections.h"
14#include "Target.h"
15#include "TargetImpl.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/Support/Endian.h"
18
19using namespace llvm;
20using namespace llvm::support::endian;
21using namespace llvm::ELF;
22using namespace lld;
23using namespace lld::elf;
24
25// Page(Expr) is the page address of the expression Expr, defined
26// as (Expr & ~0xFFF). (This applies even if the machine page size
27// supported by the platform has a different value.)
28uint64_t elf::getAArch64Page(uint64_t expr) {
29 return expr & ~static_cast<uint64_t>(0xFFF);
30}
31
32// A BTI landing pad is a valid target for an indirect branch when the Branch
33// Target Identification has been enabled. As linker generated branches are
34// via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
35// PACIBSP.
36bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
37 // PLT entries accessed indirectly have a BTI c.
38 if (s.isInPlt(ctx))
39 return true;
40 Defined *d = dyn_cast<Defined>(Val: &s);
41 if (!isa_and_nonnull<InputSection>(Val: d->section))
42 // All places that we cannot disassemble are responsible for making
43 // the target a BTI landing pad.
44 return true;
45 InputSection *isec = cast<InputSection>(Val: d->section);
46 uint64_t off = d->value + a;
47 // Likely user error, but protect ourselves against out of bounds
48 // access.
49 if (off >= isec->getSize())
50 return true;
51 const uint8_t *buf = isec->content().begin();
52 // Synthetic sections may have a size but empty data - Assume that they won't
53 // contain a landing pad
54 if (buf == nullptr && isa<SyntheticSection>(Val: isec))
55 return false;
56
57 const uint32_t instr = read32le(P: buf + off);
58 // All BTI instructions are HINT instructions which all have same encoding
59 // apart from bits [11:5]
60 if ((instr & 0xd503201f) == 0xd503201f &&
61 is_contained(Set: {/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
62 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
63 /*BTI JC*/ 0xd50324df},
64 Element: instr))
65 return true;
66 return false;
67}
68
69namespace {
70class AArch64 : public TargetInfo {
71public:
72 AArch64(Ctx &);
73 RelExpr getRelExpr(RelType type, const Symbol &s,
74 const uint8_t *loc) const override;
75 RelType getDynRel(RelType type) const override;
76 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
77 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
78 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
79 void writePltHeader(uint8_t *buf) const override;
80 void writePlt(uint8_t *buf, const Symbol &sym,
81 uint64_t pltEntryAddr) const override;
82 template <class ELFT, class RelTy>
83 void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
84 void scanSection(InputSectionBase &sec) override {
85 if (ctx.arg.ekind == ELF64BEKind)
86 elf::scanSection1<AArch64, ELF64BE>(target&: *this, sec);
87 else
88 elf::scanSection1<AArch64, ELF64LE>(target&: *this, sec);
89 }
90 bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
91 uint64_t branchAddr, const Symbol &s,
92 int64_t a) const override;
93 uint32_t getThunkSectionSpacing() const override;
94 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
95 bool usesOnlyLowPageBits(RelType type) const override;
96 void relocate(uint8_t *loc, const Relocation &rel,
97 uint64_t val) const override;
98 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
99 void applyBranchToBranchOpt() const override;
100
101private:
102 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
103 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
104 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
105};
106
107struct AArch64Relaxer {
108 Ctx &ctx;
109 bool safeToRelaxAdrpLdr = false;
110
111 AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs);
112 bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
113 uint64_t secAddr, uint8_t *buf) const;
114 bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
115 uint64_t secAddr, uint8_t *buf) const;
116};
117} // namespace
118
119// Return the bits [Start, End] from Val shifted Start bits.
120// For instance, getBits(0xF0, 4, 8) returns 0xF.
121static uint64_t getBits(uint64_t val, int start, int end) {
122 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
123 return (val >> start) & mask;
124}
125
126AArch64::AArch64(Ctx &ctx) : TargetInfo(ctx) {
127 copyRel = R_AARCH64_COPY;
128 relativeRel = R_AARCH64_RELATIVE;
129 iRelativeRel = R_AARCH64_IRELATIVE;
130 iRelSymbolicRel = R_AARCH64_FUNCINIT64;
131 gotRel = R_AARCH64_GLOB_DAT;
132 pltRel = R_AARCH64_JUMP_SLOT;
133 symbolicRel = R_AARCH64_ABS64;
134 tlsDescRel = R_AARCH64_TLSDESC;
135 tlsGotRel = R_AARCH64_TLS_TPREL64;
136 pltHeaderSize = 32;
137 pltEntrySize = 16;
138 ipltEntrySize = 16;
139 defaultMaxPageSize = 65536;
140
141 // Align to the 2 MiB page size (known as a superpage or huge page).
142 // FreeBSD automatically promotes 2 MiB-aligned allocations.
143 defaultImageBase = 0x200000;
144
145 needsThunks = true;
146}
147
148// Only needed to support relocations used by relocateNonAlloc and
149// preprocessRelocs.
150RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
151 const uint8_t *loc) const {
152 switch (type) {
153 case R_AARCH64_ABS32:
154 case R_AARCH64_ABS64:
155 return R_ABS;
156 case R_AARCH64_PREL32:
157 case R_AARCH64_PREL64:
158 return R_PC;
159 case R_AARCH64_TLS_DTPREL64:
160 return R_DTPREL;
161 case R_AARCH64_NONE:
162 return R_NONE;
163 default:
164 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
165 << ") against symbol " << &s;
166 return R_NONE;
167 }
168}
169
170bool AArch64::usesOnlyLowPageBits(RelType type) const {
171 switch (type) {
172 default:
173 return false;
174 case R_AARCH64_ADD_ABS_LO12_NC:
175 case R_AARCH64_LD64_GOT_LO12_NC:
176 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
177 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
178 case R_AARCH64_LDST128_ABS_LO12_NC:
179 case R_AARCH64_LDST16_ABS_LO12_NC:
180 case R_AARCH64_LDST32_ABS_LO12_NC:
181 case R_AARCH64_LDST64_ABS_LO12_NC:
182 case R_AARCH64_LDST8_ABS_LO12_NC:
183 case R_AARCH64_TLSDESC_ADD_LO12:
184 case R_AARCH64_TLSDESC_LD64_LO12:
185 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
186 return true;
187 }
188}
189
190template <class ELFT, class RelTy>
191void AArch64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
192 RelocScan rs(ctx, &sec);
193 sec.relocations.reserve(N: rels.size());
194
195 for (auto it = rels.begin(); it != rels.end(); ++it) {
196 const RelTy &rel = *it;
197 uint32_t symIdx = rel.getSymbol(false);
198 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
199 uint64_t offset = rel.r_offset;
200 RelType type = rel.getType(false);
201 if (sym.isUndefined() && symIdx != 0 &&
202 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
203 continue;
204 int64_t addend = rs.getAddend<ELFT>(rel, type);
205 RelExpr expr;
206 // Relocation types that only need a RelExpr set `expr` and break out of
207 // the switch to reach rs.process(). Types that need special handling
208 // (fast-path helpers, TLS) call a handler and use `continue`.
209 switch (type) {
210 case R_AARCH64_NONE:
211 continue;
212
213 // Absolute relocations:
214 case R_AARCH64_ABS16:
215 case R_AARCH64_ABS32:
216 case R_AARCH64_ABS64:
217 case R_AARCH64_FUNCINIT64:
218 case R_AARCH64_ADD_ABS_LO12_NC:
219 case R_AARCH64_LDST128_ABS_LO12_NC:
220 case R_AARCH64_LDST16_ABS_LO12_NC:
221 case R_AARCH64_LDST32_ABS_LO12_NC:
222 case R_AARCH64_LDST64_ABS_LO12_NC:
223 case R_AARCH64_LDST8_ABS_LO12_NC:
224 case R_AARCH64_MOVW_SABS_G0:
225 case R_AARCH64_MOVW_SABS_G1:
226 case R_AARCH64_MOVW_SABS_G2:
227 case R_AARCH64_MOVW_UABS_G0:
228 case R_AARCH64_MOVW_UABS_G0_NC:
229 case R_AARCH64_MOVW_UABS_G1:
230 case R_AARCH64_MOVW_UABS_G1_NC:
231 case R_AARCH64_MOVW_UABS_G2:
232 case R_AARCH64_MOVW_UABS_G2_NC:
233 case R_AARCH64_MOVW_UABS_G3:
234 expr = R_ABS;
235 break;
236
237 case R_AARCH64_AUTH_ABS64:
238 expr = RE_AARCH64_AUTH;
239 break;
240
241 case R_AARCH64_PATCHINST:
242 if (!isAbsolute(sym))
243 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
244 << "R_AARCH64_PATCHINST relocation against non-absolute "
245 "symbol "
246 << &sym;
247 expr = R_ABS;
248 break;
249
250 // PC-relative relocations:
251 case R_AARCH64_PREL16:
252 case R_AARCH64_PREL32:
253 case R_AARCH64_PREL64:
254 case R_AARCH64_ADR_PREL_LO21:
255 case R_AARCH64_LD_PREL_LO19:
256 case R_AARCH64_MOVW_PREL_G0:
257 case R_AARCH64_MOVW_PREL_G0_NC:
258 case R_AARCH64_MOVW_PREL_G1:
259 case R_AARCH64_MOVW_PREL_G1_NC:
260 case R_AARCH64_MOVW_PREL_G2:
261 case R_AARCH64_MOVW_PREL_G2_NC:
262 case R_AARCH64_MOVW_PREL_G3:
263 rs.processR_PC(type, offset, addend, sym);
264 continue;
265
266 // Page-PC relocations:
267 case R_AARCH64_ADR_PREL_PG_HI21:
268 case R_AARCH64_ADR_PREL_PG_HI21_NC:
269 expr = RE_AARCH64_PAGE_PC;
270 break;
271
272 // PLT-generating relocations:
273 case R_AARCH64_PLT32:
274 sym.thunkAccessed = true;
275 [[fallthrough]];
276 case R_AARCH64_CALL26:
277 case R_AARCH64_CONDBR19:
278 case R_AARCH64_JUMP26:
279 case R_AARCH64_TSTBR14:
280 rs.processR_PLT_PC(type, offset, addend, sym);
281 continue;
282
283 // GOT relocations:
284 case R_AARCH64_ADR_GOT_PAGE:
285 expr = RE_AARCH64_GOT_PAGE_PC;
286 break;
287 case R_AARCH64_LD64_GOT_LO12_NC:
288 expr = R_GOT;
289 break;
290 case R_AARCH64_LD64_GOTPAGE_LO15:
291 expr = RE_AARCH64_GOT_PAGE;
292 break;
293 case R_AARCH64_GOTPCREL32:
294 case R_AARCH64_GOT_LD_PREL19:
295 expr = R_GOT_PC;
296 break;
297
298 // AUTH GOT relocations. Set NEEDS_GOT_AUTH to detect incompatibility with
299 // NEEDS_GOT_NONAUTH. rs.process does not set the flag.
300 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
301 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
302 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
303 rs.processAux(expr: R_GOT, type, offset, sym, addend);
304 continue;
305 case R_AARCH64_AUTH_GOT_LD_PREL19:
306 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
307 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
308 rs.processAux(expr: R_GOT_PC, type, offset, sym, addend);
309 continue;
310 case R_AARCH64_AUTH_ADR_GOT_PAGE:
311 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
312 rs.processAux(expr: RE_AARCH64_GOT_PAGE_PC, type, offset, sym, addend);
313 continue;
314
315 // TLS LE relocations:
316 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
317 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
318 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
319 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
320 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
321 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
322 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
323 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
324 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
325 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
326 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
327 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
328 if (rs.checkTlsLe(offset, sym, type))
329 continue;
330 expr = R_TPREL;
331 break;
332
333 // TLS IE relocations:
334 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
335 rs.handleTlsIe(ieExpr: RE_AARCH64_GOT_PAGE_PC, type, offset, addend, sym);
336 continue;
337 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
338 rs.handleTlsIe(ieExpr: R_GOT, type, offset, addend, sym);
339 continue;
340
341 // TLSDESC relocations:
342 case R_AARCH64_TLSDESC_ADR_PAGE21:
343 rs.handleTlsDesc(sharedExpr: RE_AARCH64_TLSDESC_PAGE, ieExpr: RE_AARCH64_GOT_PAGE_PC, type,
344 offset, addend, sym);
345 continue;
346 case R_AARCH64_TLSDESC_LD64_LO12:
347 case R_AARCH64_TLSDESC_ADD_LO12:
348 rs.handleTlsDesc(sharedExpr: R_TLSDESC, ieExpr: R_GOT, type, offset, addend, sym);
349 continue;
350 case R_AARCH64_TLSDESC_CALL:
351 sym.setFlags(NEEDS_TLSDESC_NONAUTH);
352 if (!ctx.arg.shared)
353 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
354 continue;
355
356 // AUTH TLSDESC relocations. Do not optimize to LE/IE because PAUTHELF64
357 // only supports the descriptor based TLS (TLSDESC).
358 // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
359 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
360 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
361 sec.addReloc(r: {.expr: RE_AARCH64_TLSDESC_PAGE, .type: type, .offset: offset, .addend: addend, .sym: &sym});
362 continue;
363 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
364 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
365 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
366 sec.addReloc(r: {.expr: R_TLSDESC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
367 continue;
368
369 default:
370 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
371 << "unknown relocation (" << type.v << ") against symbol "
372 << &sym;
373 continue;
374 }
375 rs.process(expr, type, offset, sym, addend);
376 }
377
378 if (ctx.arg.branchToBranch)
379 llvm::stable_sort(sec.relocs(),
380 [](auto &l, auto &r) { return l.offset < r.offset; });
381}
382
383RelType AArch64::getDynRel(RelType type) const {
384 if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64 ||
385 type == R_AARCH64_FUNCINIT64)
386 return type;
387 return R_AARCH64_NONE;
388}
389
390int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
391 switch (type) {
392 case R_AARCH64_TLSDESC:
393 return read64(ctx, p: buf + 8);
394 case R_AARCH64_NONE:
395 case R_AARCH64_GLOB_DAT:
396 case R_AARCH64_AUTH_GLOB_DAT:
397 case R_AARCH64_JUMP_SLOT:
398 return 0;
399 case R_AARCH64_ABS16:
400 case R_AARCH64_PREL16:
401 return SignExtend64<16>(x: read16(ctx, p: buf));
402 case R_AARCH64_ABS32:
403 case R_AARCH64_PREL32:
404 return SignExtend64<32>(x: read32(ctx, p: buf));
405 case R_AARCH64_ABS64:
406 case R_AARCH64_PREL64:
407 case R_AARCH64_RELATIVE:
408 case R_AARCH64_IRELATIVE:
409 case R_AARCH64_TLS_TPREL64:
410 return read64(ctx, p: buf);
411
412 // The following relocation types all point at instructions, and
413 // relocate an immediate field in the instruction.
414 //
415 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
416 // says: "If the relocation relocates an instruction the immediate
417 // field of the instruction is extracted, scaled as required by
418 // the instruction field encoding, and sign-extended to 64 bits".
419
420 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
421 // instructions, which have a 16-bit immediate field with its low
422 // bit in bit 5 of the instruction encoding. When the immediate
423 // field is used as an implicit addend for REL-type relocations,
424 // it is treated as added to the low bits of the output value, not
425 // shifted depending on the relocation type.
426 //
427 // This allows REL relocations to express the requirement 'please
428 // add 12345 to this symbol value and give me the four 16-bit
429 // chunks of the result', by putting the same addend 12345 in all
430 // four instructions. Carries between the 16-bit chunks are
431 // handled correctly, because the whole 64-bit addition is done
432 // once per relocation.
433 case R_AARCH64_MOVW_UABS_G0:
434 case R_AARCH64_MOVW_UABS_G0_NC:
435 case R_AARCH64_MOVW_UABS_G1:
436 case R_AARCH64_MOVW_UABS_G1_NC:
437 case R_AARCH64_MOVW_UABS_G2:
438 case R_AARCH64_MOVW_UABS_G2_NC:
439 case R_AARCH64_MOVW_UABS_G3:
440 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 20));
441
442 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
443 // has a 14-bit offset measured in instructions, i.e. shifted left
444 // by 2.
445 case R_AARCH64_TSTBR14:
446 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 18) << 2);
447
448 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
449 // which has a 19-bit offset measured in instructions.
450 //
451 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
452 // instruction, which also has a 19-bit offset, measured in 4-byte
453 // chunks. So the calculation is the same as for
454 // R_AARCH64_CONDBR19.
455 case R_AARCH64_CONDBR19:
456 case R_AARCH64_LD_PREL_LO19:
457 return SignExtend64<21>(x: getBits(val: read32le(P: buf), start: 5, end: 23) << 2);
458
459 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
460 // immediate can optionally be shifted left by 12 bits, but this
461 // relocation is intended for the case where it is not.
462 case R_AARCH64_ADD_ABS_LO12_NC:
463 return SignExtend64<12>(x: getBits(val: read32le(P: buf), start: 10, end: 21));
464
465 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
466 // 21-bit immediate is split between two bits high up in the word
467 // (in fact the two _lowest_ order bits of the value) and 19 bits
468 // lower down.
469 //
470 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
471 // which encodes the immediate in the same way, but will shift it
472 // left by 12 bits when the instruction executes. For the same
473 // reason as the MOVW family, we don't apply that left shift here.
474 case R_AARCH64_ADR_PREL_LO21:
475 case R_AARCH64_ADR_PREL_PG_HI21:
476 case R_AARCH64_ADR_PREL_PG_HI21_NC:
477 return SignExtend64<21>(x: (getBits(val: read32le(P: buf), start: 5, end: 23) << 2) |
478 getBits(val: read32le(P: buf), start: 29, end: 30));
479
480 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
481 // 26-bit offset measured in instructions.
482 case R_AARCH64_JUMP26:
483 case R_AARCH64_CALL26:
484 return SignExtend64<28>(x: getBits(val: read32le(P: buf), start: 0, end: 25) << 2);
485
486 default:
487 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
488 return 0;
489 }
490}
491
492void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
493 write64(ctx, p: buf, v: ctx.in.plt->getVA());
494}
495
496void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
497 if (ctx.arg.writeAddends)
498 write64(ctx, p: buf, v: s.getVA(ctx));
499}
500
501void AArch64::writePltHeader(uint8_t *buf) const {
502 const uint8_t pltData[] = {
503 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
504 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
505 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
506 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
507 0x20, 0x02, 0x1f, 0xd6, // br x17
508 0x1f, 0x20, 0x03, 0xd5, // nop
509 0x1f, 0x20, 0x03, 0xd5, // nop
510 0x1f, 0x20, 0x03, 0xd5 // nop
511 };
512 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
513
514 uint64_t got = ctx.in.gotPlt->getVA();
515 uint64_t plt = ctx.in.plt->getVA();
516 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
517 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
518 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
519 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
520}
521
522void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
523 uint64_t pltEntryAddr) const {
524 const uint8_t inst[] = {
525 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
526 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
527 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
528 0x20, 0x02, 0x1f, 0xd6 // br x17
529 };
530 memcpy(dest: buf, src: inst, n: sizeof(inst));
531
532 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
533 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
534 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
535 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
536 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
537}
538
539bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
540 uint64_t branchAddr, const Symbol &s,
541 int64_t a) const {
542 // If s is an undefined weak symbol and does not have a PLT entry then it will
543 // be resolved as a branch to the next instruction. If it is hidden, its
544 // binding has been converted to local, so we just check isUndefined() here. A
545 // undefined non-weak symbol will have been errored.
546 if (s.isUndefined() && !s.isInPlt(ctx))
547 return false;
548 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
549 // only permits range extension thunks for R_AARCH64_CALL26 and
550 // R_AARCH64_JUMP26 relocation types.
551 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
552 type != R_AARCH64_PLT32)
553 return false;
554 uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, addend: a);
555 return !inBranchRange(type, src: branchAddr, dst);
556}
557
558uint32_t AArch64::getThunkSectionSpacing() const {
559 // See comment in Arch/ARM.cpp for a more detailed explanation of
560 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
561 // Thunk have a range of +/- 128 MiB
562 return (128 * 1024 * 1024) - 0x30000;
563}
564
565bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
566 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
567 type != R_AARCH64_PLT32)
568 return true;
569 // The AArch64 call and unconditional branch instructions have a range of
570 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
571 uint64_t range =
572 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
573 if (dst > src) {
574 // Immediate of branch is signed.
575 range -= 4;
576 return dst - src <= range;
577 }
578 return src - dst <= range;
579}
580
581static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
582 uint32_t immLo = (imm & 0x3) << 29;
583 uint32_t immHi = (imm & 0x1FFFFC) << 3;
584 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
585 write32le(P: l, V: (read32le(P: l) & ~mask) | immLo | immHi);
586}
587
588static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
589 write32le(P: p, V: (read32le(P: p) & ~mask) | v);
590}
591
592// Update the immediate field in a AARCH64 ldr, str, and add instruction.
593static void write32Imm12(uint8_t *l, uint64_t imm) {
594 writeMaskedBits32le(p: l, v: (imm & 0xFFF) << 10, mask: 0xFFF << 10);
595}
596
597// Update the immediate field in an AArch64 movk, movn or movz instruction
598// for a signed relocation, and update the opcode of a movn or movz instruction
599// to match the sign of the operand.
600static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
601 uint32_t inst = read32le(P: loc);
602 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
603 if (!(inst & (1 << 29))) {
604 // movn or movz.
605 if (imm & 0x10000) {
606 // Change opcode to movn, which takes an inverted operand.
607 imm ^= 0xFFFF;
608 inst &= ~(1 << 30);
609 } else {
610 // Change opcode to movz.
611 inst |= 1 << 30;
612 }
613 }
614 write32le(P: loc, V: inst | ((imm & 0xFFFF) << 5));
615}
616
617void AArch64::relocate(uint8_t *loc, const Relocation &rel,
618 uint64_t val) const {
619 switch (rel.type) {
620 case R_AARCH64_ABS16:
621 case R_AARCH64_PREL16:
622 checkIntUInt(ctx, loc, v: val, n: 16, rel);
623 write16(ctx, p: loc, v: val);
624 break;
625 case R_AARCH64_ABS32:
626 case R_AARCH64_PREL32:
627 checkIntUInt(ctx, loc, v: val, n: 32, rel);
628 write32(ctx, p: loc, v: val);
629 break;
630 case R_AARCH64_PATCHINST:
631 if (!rel.sym->isUndefined()) {
632 checkUInt(ctx, loc, v: val, n: 32, rel);
633 write32le(P: loc, V: val);
634 }
635 break;
636 case R_AARCH64_PLT32:
637 case R_AARCH64_GOTPCREL32:
638 checkInt(ctx, loc, v: val, n: 32, rel);
639 write32(ctx, p: loc, v: val);
640 break;
641 case R_AARCH64_ABS64:
642 write64(ctx, p: loc, v: val);
643 break;
644 case R_AARCH64_PREL64:
645 write64(ctx, p: loc, v: val);
646 break;
647 case R_AARCH64_AUTH_ABS64:
648 // This is used for the addend of a .relr.auth.dyn entry,
649 // which is a 32-bit value; the upper 32 bits are used to
650 // encode the schema.
651 checkInt(ctx, loc, v: val, n: 32, rel);
652 write32(ctx, p: loc, v: val);
653 break;
654 case R_AARCH64_TLS_DTPREL64:
655 write64(ctx, p: loc, v: val);
656 break;
657 case R_AARCH64_ADD_ABS_LO12_NC:
658 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
659 write32Imm12(l: loc, imm: val);
660 break;
661 case R_AARCH64_ADR_GOT_PAGE:
662 case R_AARCH64_AUTH_ADR_GOT_PAGE:
663 case R_AARCH64_ADR_PREL_PG_HI21:
664 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
665 case R_AARCH64_TLSDESC_ADR_PAGE21:
666 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
667 checkInt(ctx, loc, v: val, n: 33, rel);
668 [[fallthrough]];
669 case R_AARCH64_ADR_PREL_PG_HI21_NC:
670 write32AArch64Addr(l: loc, imm: val >> 12);
671 break;
672 case R_AARCH64_ADR_PREL_LO21:
673 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
674 checkInt(ctx, loc, v: val, n: 21, rel);
675 write32AArch64Addr(l: loc, imm: val);
676 break;
677 case R_AARCH64_JUMP26:
678 // Normally we would just write the bits of the immediate field, however
679 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
680 // we want to replace a non-branch instruction with a branch immediate
681 // instruction. By writing all the bits of the instruction including the
682 // opcode and the immediate (0 001 | 01 imm26) we can do this
683 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
684 // the instruction we want to patch.
685 write32le(P: loc, V: 0x14000000);
686 [[fallthrough]];
687 case R_AARCH64_CALL26:
688 checkInt(ctx, loc, v: val, n: 28, rel);
689 writeMaskedBits32le(p: loc, v: (val & 0x0FFFFFFC) >> 2, mask: 0x0FFFFFFC >> 2);
690 break;
691 case R_AARCH64_CONDBR19:
692 case R_AARCH64_LD_PREL_LO19:
693 case R_AARCH64_GOT_LD_PREL19:
694 case R_AARCH64_AUTH_GOT_LD_PREL19:
695 checkAlignment(ctx, loc, v: val, n: 4, rel);
696 checkInt(ctx, loc, v: val, n: 21, rel);
697 writeMaskedBits32le(p: loc, v: (val & 0x1FFFFC) << 3, mask: 0x1FFFFC << 3);
698 break;
699 case R_AARCH64_LDST8_ABS_LO12_NC:
700 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
701 write32Imm12(l: loc, imm: getBits(val, start: 0, end: 11));
702 break;
703 case R_AARCH64_LDST16_ABS_LO12_NC:
704 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
705 checkAlignment(ctx, loc, v: val, n: 2, rel);
706 write32Imm12(l: loc, imm: getBits(val, start: 1, end: 11));
707 break;
708 case R_AARCH64_LDST32_ABS_LO12_NC:
709 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
710 checkAlignment(ctx, loc, v: val, n: 4, rel);
711 write32Imm12(l: loc, imm: getBits(val, start: 2, end: 11));
712 break;
713 case R_AARCH64_LDST64_ABS_LO12_NC:
714 case R_AARCH64_LD64_GOT_LO12_NC:
715 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
716 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
717 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
718 case R_AARCH64_TLSDESC_LD64_LO12:
719 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
720 checkAlignment(ctx, loc, v: val, n: 8, rel);
721 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 11));
722 break;
723 case R_AARCH64_LDST128_ABS_LO12_NC:
724 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
725 checkAlignment(ctx, loc, v: val, n: 16, rel);
726 write32Imm12(l: loc, imm: getBits(val, start: 4, end: 11));
727 break;
728 case R_AARCH64_LD64_GOTPAGE_LO15:
729 checkAlignment(ctx, loc, v: val, n: 8, rel);
730 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 14));
731 break;
732 case R_AARCH64_MOVW_UABS_G0:
733 checkUInt(ctx, loc, v: val, n: 16, rel);
734 [[fallthrough]];
735 case R_AARCH64_MOVW_UABS_G0_NC:
736 writeMaskedBits32le(p: loc, v: (val & 0xFFFF) << 5, mask: 0xFFFF << 5);
737 break;
738 case R_AARCH64_MOVW_UABS_G1:
739 checkUInt(ctx, loc, v: val, n: 32, rel);
740 [[fallthrough]];
741 case R_AARCH64_MOVW_UABS_G1_NC:
742 writeMaskedBits32le(p: loc, v: (val & 0xFFFF0000) >> 11, mask: 0xFFFF0000 >> 11);
743 break;
744 case R_AARCH64_MOVW_UABS_G2:
745 checkUInt(ctx, loc, v: val, n: 48, rel);
746 [[fallthrough]];
747 case R_AARCH64_MOVW_UABS_G2_NC:
748 writeMaskedBits32le(p: loc, v: (val & 0xFFFF00000000) >> 27,
749 mask: 0xFFFF00000000 >> 27);
750 break;
751 case R_AARCH64_MOVW_UABS_G3:
752 writeMaskedBits32le(p: loc, v: (val & 0xFFFF000000000000) >> 43,
753 mask: 0xFFFF000000000000 >> 43);
754 break;
755 case R_AARCH64_MOVW_PREL_G0:
756 case R_AARCH64_MOVW_SABS_G0:
757 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
758 checkInt(ctx, loc, v: val, n: 17, rel);
759 [[fallthrough]];
760 case R_AARCH64_MOVW_PREL_G0_NC:
761 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
762 writeSMovWImm(loc, imm: val);
763 break;
764 case R_AARCH64_MOVW_PREL_G1:
765 case R_AARCH64_MOVW_SABS_G1:
766 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
767 checkInt(ctx, loc, v: val, n: 33, rel);
768 [[fallthrough]];
769 case R_AARCH64_MOVW_PREL_G1_NC:
770 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
771 writeSMovWImm(loc, imm: val >> 16);
772 break;
773 case R_AARCH64_MOVW_PREL_G2:
774 case R_AARCH64_MOVW_SABS_G2:
775 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
776 checkInt(ctx, loc, v: val, n: 49, rel);
777 [[fallthrough]];
778 case R_AARCH64_MOVW_PREL_G2_NC:
779 writeSMovWImm(loc, imm: val >> 32);
780 break;
781 case R_AARCH64_MOVW_PREL_G3:
782 writeSMovWImm(loc, imm: val >> 48);
783 break;
784 case R_AARCH64_TSTBR14:
785 checkInt(ctx, loc, v: val, n: 16, rel);
786 writeMaskedBits32le(p: loc, v: (val & 0xFFFC) << 3, mask: 0xFFFC << 3);
787 break;
788 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
789 checkUInt(ctx, loc, v: val, n: 24, rel);
790 if (ctx.arg.relax && (val >> 12) == 0) {
791 uint32_t inst = read32le(P: loc);
792 // The W-form zero-extends Xd, so only the X-form is a nop.
793 if ((inst & (1u << 31)) && (inst & 0x1f) == ((inst >> 5) & 0x1f)) {
794 write32le(P: loc, V: 0xd503201f); // nop
795 break;
796 }
797 }
798 write32Imm12(l: loc, imm: val >> 12);
799 break;
800 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
801 case R_AARCH64_TLSDESC_ADD_LO12:
802 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
803 write32Imm12(l: loc, imm: val);
804 break;
805 case R_AARCH64_TLSDESC:
806 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
807 write64(ctx, p: loc + 8, v: val);
808 break;
809 default:
810 llvm_unreachable("unknown relocation");
811 }
812}
813
814void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
815 uint64_t val) const {
816 // TLSDESC Global-Dynamic relocation are in the form:
817 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
818 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
819 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
820 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
821 // blr x1
822 // And it can optimized to:
823 // movz x0, #0x0, lsl #16
824 // movk x0, #0x10
825 // nop
826 // nop
827 checkUInt(ctx, loc, v: val, n: 32, rel);
828
829 switch (rel.type) {
830 case R_AARCH64_TLSDESC_ADD_LO12:
831 case R_AARCH64_TLSDESC_CALL:
832 write32le(P: loc, V: 0xd503201f); // nop
833 return;
834 case R_AARCH64_TLSDESC_ADR_PAGE21:
835 write32le(P: loc, V: 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
836 return;
837 case R_AARCH64_TLSDESC_LD64_LO12:
838 write32le(P: loc, V: 0xf2800000 | ((val & 0xffff) << 5)); // movk
839 return;
840 default:
841 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
842 }
843}
844
845void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
846 uint64_t val) const {
847 // TLSDESC Global-Dynamic relocation are in the form:
848 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
849 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
850 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
851 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
852 // blr x1
853 // And it can optimized to:
854 // adrp x0, :gottprel:v
855 // ldr x0, [x0, :gottprel_lo12:v]
856 // nop
857 // nop
858
859 switch (rel.type) {
860 case R_AARCH64_TLSDESC_ADD_LO12:
861 case R_AARCH64_TLSDESC_CALL:
862 write32le(P: loc, V: 0xd503201f); // nop
863 break;
864 case R_AARCH64_TLSDESC_ADR_PAGE21:
865 write32le(P: loc, V: 0x90000000); // adrp
866 relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
867 break;
868 case R_AARCH64_TLSDESC_LD64_LO12:
869 write32le(P: loc, V: 0xf9400000); // ldr
870 relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
871 break;
872 default:
873 llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
874 }
875}
876
877void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
878 uint64_t val) const {
879 checkUInt(ctx, loc, v: val, n: 32, rel);
880
881 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
882 // Generate MOVZ.
883 uint32_t regNo = read32le(P: loc) & 0x1f;
884 write32le(P: loc, V: (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
885 return;
886 }
887 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
888 // Generate MOVK.
889 uint32_t regNo = read32le(P: loc) & 0x1f;
890 write32le(P: loc, V: (0xf2800000 | regNo) | ((val & 0xffff) << 5));
891 return;
892 }
893 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
894}
895
896AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs)
897 : ctx(ctx) {
898 if (!ctx.arg.relax)
899 return;
900 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
901 // always appear in pairs.
902 size_t i = 0;
903 const size_t size = relocs.size();
904 for (; i != size; ++i) {
905 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
906 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
907 ++i;
908 continue;
909 }
910 break;
911 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
912 break;
913 }
914 }
915 safeToRelaxAdrpLdr = i == size;
916}
917
918bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
919 const Relocation &addRel, uint64_t secAddr,
920 uint8_t *buf) const {
921 // When the address of sym is within the range of ADR then
922 // we may relax
923 // ADRP xn, sym
924 // ADD xn, xn, :lo12: sym
925 // to
926 // NOP
927 // ADR xn, sym
928 if (!ctx.arg.relax || addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
929 return false;
930 // Check if the relocations apply to consecutive instructions.
931 if (adrpRel.offset + 4 != addRel.offset)
932 return false;
933 if (adrpRel.sym != addRel.sym)
934 return false;
935 if (adrpRel.addend != 0 || addRel.addend != 0)
936 return false;
937
938 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
939 uint32_t addInstr = read32le(P: buf + addRel.offset);
940 // Check if the first instruction is ADRP and the second instruction is ADD.
941 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
942 (addInstr & 0xffc00000) != 0x91000000)
943 return false;
944 uint32_t adrpDestReg = adrpInstr & 0x1f;
945 uint32_t addDestReg = addInstr & 0x1f;
946 uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
947 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
948 return false;
949
950 Symbol &sym = *adrpRel.sym;
951 // Check if the address difference is within 1MiB range.
952 int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset);
953 if (val < -1024 * 1024 || val >= 1024 * 1024)
954 return false;
955
956 Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
957 /*addend=*/0, .sym: &sym};
958 // nop
959 write32le(P: buf + adrpRel.offset, V: 0xd503201f);
960 // adr x_<dest_reg>
961 write32le(P: buf + adrRel.offset, V: 0x10000000 | adrpDestReg);
962 ctx.target->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
963 return true;
964}
965
966bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
967 const Relocation &ldrRel, uint64_t secAddr,
968 uint8_t *buf) const {
969 if (!safeToRelaxAdrpLdr)
970 return false;
971
972 // When the definition of sym is not preemptible then we may
973 // be able to relax
974 // ADRP xn, :got: sym
975 // LDR xn, [ xn :got_lo12: sym]
976 // to
977 // ADRP xn, sym
978 // ADD xn, xn, :lo_12: sym
979
980 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
981 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
982 return false;
983 // Check if the relocations apply to consecutive instructions.
984 if (adrpRel.offset + 4 != ldrRel.offset)
985 return false;
986 // Check if the relocations reference the same symbol and
987 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
988 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
989 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
990 return false;
991 // Check if the addends of the both relocations are zero.
992 if (adrpRel.addend != 0 || ldrRel.addend != 0)
993 return false;
994 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
995 uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
996 // Check if the first instruction is ADRP and the second instruction is LDR.
997 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
998 (ldrInstr & 0x3b000000) != 0x39000000)
999 return false;
1000 // Check the value of the sf bit.
1001 if (!(ldrInstr >> 31))
1002 return false;
1003 uint32_t adrpDestReg = adrpInstr & 0x1f;
1004 uint32_t ldrDestReg = ldrInstr & 0x1f;
1005 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
1006 // Check if ADPR and LDR use the same register.
1007 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
1008 return false;
1009
1010 Symbol &sym = *adrpRel.sym;
1011 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
1012 // position-independent code because these instructions produce a relative
1013 // address.
1014 if (ctx.arg.isPic && !cast<Defined>(Val&: sym).section)
1015 return false;
1016 // Check if the address difference is within 4GB range.
1017 int64_t val =
1018 getAArch64Page(expr: sym.getVA(ctx)) - getAArch64Page(expr: secAddr + adrpRel.offset);
1019 if (val != llvm::SignExtend64(X: val, B: 33))
1020 return false;
1021
1022 Relocation adrpSymRel = {.expr: RE_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
1023 .offset: adrpRel.offset, /*addend=*/0, .sym: &sym};
1024 Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
1025 /*addend=*/0, .sym: &sym};
1026
1027 // adrp x_<dest_reg>
1028 write32le(P: buf + adrpSymRel.offset, V: 0x90000000 | adrpDestReg);
1029 // add x_<dest reg>, x_<dest reg>
1030 write32le(P: buf + addRel.offset, V: 0x91000000 | adrpDestReg | (adrpDestReg << 5));
1031
1032 ctx.target->relocate(
1033 loc: buf + adrpSymRel.offset, rel: adrpSymRel,
1034 val: SignExtend64(X: getAArch64Page(expr: sym.getVA(ctx)) -
1035 getAArch64Page(expr: secAddr + adrpSymRel.offset),
1036 B: 64));
1037 ctx.target->relocate(loc: buf + addRel.offset, rel: addRel,
1038 val: SignExtend64(X: sym.getVA(ctx), B: 64));
1039 tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
1040 return true;
1041}
1042
1043// Tagged symbols have upper address bits that are added by the dynamic loader,
1044// and thus need the full 64-bit GOT entry. Do not relax such symbols.
1045static bool needsGotForMemtag(const Relocation &rel) {
1046 return rel.sym->isTagged() && needsGot(expr: rel.expr);
1047}
1048
1049void AArch64::relocateAlloc(InputSection &sec, uint8_t *buf) const {
1050 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
1051 const ArrayRef<Relocation> relocs = sec.relocs();
1052 AArch64Relaxer relaxer(ctx, relocs);
1053 for (size_t i = 0, size = relocs.size(); i != size; ++i) {
1054 const Relocation &rel = relocs[i];
1055 if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
1056 continue;
1057 uint8_t *loc = buf + rel.offset;
1058 const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
1059
1060 if (needsGotForMemtag(rel)) {
1061 relocate(loc, rel, val);
1062 continue;
1063 }
1064
1065 switch (rel.type) {
1066 case R_AARCH64_ADR_GOT_PAGE:
1067 if (i + 1 < size &&
1068 relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: relocs[i + 1], secAddr, buf)) {
1069 ++i;
1070 continue;
1071 }
1072 break;
1073 case R_AARCH64_ADR_PREL_PG_HI21:
1074 if (i + 1 < size &&
1075 relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: relocs[i + 1], secAddr, buf)) {
1076 ++i;
1077 continue;
1078 }
1079 break;
1080
1081 case R_AARCH64_TLSDESC_ADR_PAGE21:
1082 case R_AARCH64_TLSDESC_LD64_LO12:
1083 case R_AARCH64_TLSDESC_ADD_LO12:
1084 case R_AARCH64_TLSDESC_CALL:
1085 if (rel.expr == R_TPREL)
1086 relaxTlsGdToLe(loc, rel, val);
1087 else if (rel.expr == RE_AARCH64_GOT_PAGE_PC || rel.expr == R_GOT)
1088 relaxTlsGdToIe(loc, rel, val);
1089 else
1090 relocate(loc, rel, val);
1091 continue;
1092 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
1093 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
1094 if (rel.expr == R_TPREL)
1095 relaxTlsIeToLe(loc, rel, val);
1096 else
1097 relocate(loc, rel, val);
1098 continue;
1099 default:
1100 break;
1101 }
1102
1103 relocate(loc, rel, val);
1104 }
1105}
1106
1107static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
1108 Relocation &r) {
1109 // Identify a control transfer relocation for the branch-to-branch
1110 // optimization. A "control transfer relocation" means a B or BL
1111 // target but it also includes relative vtable relocations for example.
1112 //
1113 // We require the relocation type to be JUMP26, CALL26 or PLT32. With a
1114 // relocation type of PLT32 the value may be assumed to be used for branching
1115 // directly to the symbol and the addend is only used to produce the relocated
1116 // value (hence the effective addend is always 0). This is because if a PLT is
1117 // needed the addend will be added to the address of the PLT, and it doesn't
1118 // make sense to branch into the middle of a PLT. For example, relative vtable
1119 // relocations use PLT32 and 0 or a positive value as the addend but still are
1120 // used to branch to the symbol.
1121 //
1122 // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
1123 // addend is that we are branching to symbol+addend so that becomes the
1124 // effective addend.
1125 if (r.type == R_AARCH64_PLT32)
1126 return 0;
1127 if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
1128 return r.addend;
1129 return std::nullopt;
1130}
1131
1132static std::pair<Relocation *, uint64_t>
1133getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1134 auto *i = llvm::partition_point(
1135 Range&: is.relocations, P: [&](Relocation &r) { return r.offset < offset; });
1136 if (i != is.relocations.end() && i->offset == offset &&
1137 i->type == R_AARCH64_JUMP26) {
1138 return {i, i->addend};
1139 }
1140 return {nullptr, 0};
1141}
1142
1143static void redirectControlTransferRelocations(Relocation &r1,
1144 const Relocation &r2) {
1145 r1.expr = r2.expr;
1146 r1.sym = r2.sym;
1147 // With PLT32 we must respect the original addend as that affects the value's
1148 // interpretation. With the other relocation types the original addend is
1149 // irrelevant because it referred to an offset within the original target
1150 // section so we overwrite it.
1151 if (r1.type == R_AARCH64_PLT32)
1152 r1.addend += r2.addend;
1153 else
1154 r1.addend = r2.addend;
1155}
1156
1157void AArch64::applyBranchToBranchOpt() const {
1158 applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1159 getBranchInfoAtTarget,
1160 redirectControlTransferRelocations);
1161}
1162
1163// AArch64 may use security features in variant PLT sequences. These are:
1164// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
1165// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
1166// in the variant Plt sequences are encoded in the Hint space so they can be
1167// deployed on older architectures, which treat the instructions as a nop.
1168// PAC and BTI can be combined leading to the following combinations:
1169// writePltHeader
1170// writePltHeaderBti (no PAC Header needed)
1171// writePlt
1172// writePltBti (BTI only)
1173// writePltPac (PAC only)
1174// writePltBtiPac (BTI and PAC)
1175//
1176// When PAC is enabled the dynamic loader encrypts the address that it places
1177// in the .got.plt using the pacia1716 instruction which encrypts the value in
1178// x17 using the modifier in x16. The static linker places autia1716 before the
1179// indirect branch to x17 to authenticate the address in x17 with the modifier
1180// in x16. This makes it more difficult for an attacker to modify the value in
1181// the .got.plt.
1182//
1183// When BTI is enabled all indirect branches must land on a bti instruction.
1184// The static linker must place a bti instruction at the start of any PLT entry
1185// that may be the target of an indirect branch. As the PLT entries call the
1186// lazy resolver indirectly this must have a bti instruction at start. In
1187// general a bti instruction is not needed for a PLT entry as indirect calls
1188// are resolved to the function address and not the PLT entry for the function.
1189// There are a small number of cases where the PLT address can escape, such as
1190// taking the address of a function or ifunc via a non got-generating
1191// relocation, and a shared library refers to that symbol.
1192//
1193// We use the bti c variant of the instruction which permits indirect branches
1194// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1195// guarantees that all indirect branches from code requiring BTI protection
1196// will go via x16/x17
1197
1198namespace {
1199class AArch64BtiPac final : public AArch64 {
1200public:
1201 AArch64BtiPac(Ctx &);
1202 void writePltHeader(uint8_t *buf) const override;
1203 void writePlt(uint8_t *buf, const Symbol &sym,
1204 uint64_t pltEntryAddr) const override;
1205
1206private:
1207 bool btiHeader; // bti instruction needed in PLT Header and Entry
1208 enum {
1209 PEK_NoAuth,
1210 PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry
1211 PEK_Auth, // use braa instr for authenticated branch in PLT entry
1212 } pacEntryKind;
1213};
1214} // namespace
1215
1216AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) {
1217 btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
1218 // A BTI (Branch Target Indicator) Plt Entry is only required if the
1219 // address of the PLT entry can be taken by the program, which permits an
1220 // indirect jump to the PLT entry. This can happen when the address
1221 // of the PLT entry for a function is canonicalised due to the address of
1222 // the function in an executable being taken by a shared library, or
1223 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1224 // relocations.
1225 // The PAC PLT entries require dynamic loader support and this isn't known
1226 // from properties in the objects, so we use the command line flag.
1227 // By default we only use hint-space instructions, but if we detect the
1228 // PAuthABI, which requires v8.3-A, we can use the non-hint space
1229 // instructions.
1230
1231 if (ctx.arg.zPacPlt) {
1232 if (ctx.aarch64PauthAbiCoreInfo && ctx.aarch64PauthAbiCoreInfo->isValid())
1233 pacEntryKind = PEK_Auth;
1234 else
1235 pacEntryKind = PEK_AuthHint;
1236 } else {
1237 pacEntryKind = PEK_NoAuth;
1238 }
1239
1240 if (btiHeader || (pacEntryKind != PEK_NoAuth)) {
1241 pltEntrySize = 24;
1242 ipltEntrySize = 24;
1243 }
1244}
1245
1246void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
1247 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1248 const uint8_t pltData[] = {
1249 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
1250 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
1251 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1252 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
1253 0x20, 0x02, 0x1f, 0xd6, // br x17
1254 0x1f, 0x20, 0x03, 0xd5, // nop
1255 0x1f, 0x20, 0x03, 0xd5 // nop
1256 };
1257 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1258
1259 uint64_t got = ctx.in.gotPlt->getVA();
1260 uint64_t plt = ctx.in.plt->getVA();
1261
1262 if (btiHeader) {
1263 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1264 // instruction.
1265 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1266 buf += sizeof(btiData);
1267 plt += sizeof(btiData);
1268 }
1269 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
1270
1271 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
1272 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
1273 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
1274 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
1275 if (!btiHeader)
1276 // We didn't add the BTI c instruction so round out size with NOP.
1277 memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
1278}
1279
1280void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
1281 uint64_t pltEntryAddr) const {
1282 // The PLT entry is of the form:
1283 // [btiData] addrInst (pacBr | stdBr) [nopData]
1284 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1285 const uint8_t addrInst[] = {
1286 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1287 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1288 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1289 };
1290 const uint8_t pacHintBr[] = {
1291 0x9f, 0x21, 0x03, 0xd5, // autia1716
1292 0x20, 0x02, 0x1f, 0xd6 // br x17
1293 };
1294 const uint8_t pacBr[] = {
1295 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16
1296 0x1f, 0x20, 0x03, 0xd5 // nop
1297 };
1298 const uint8_t stdBr[] = {
1299 0x20, 0x02, 0x1f, 0xd6, // br x17
1300 0x1f, 0x20, 0x03, 0xd5 // nop
1301 };
1302 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1303
1304 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1305 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1306 // address may escape if referenced by a direct relocation. If relative
1307 // vtables are used then if the vtable is in a shared object the offsets will
1308 // be to the PLT entry. The condition is conservative.
1309 bool hasBti = btiHeader &&
1310 (sym.hasFlag(bit: NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed);
1311 if (hasBti) {
1312 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1313 buf += sizeof(btiData);
1314 pltEntryAddr += sizeof(btiData);
1315 }
1316
1317 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
1318 memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
1319 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
1320 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
1321 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
1322 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
1323
1324 if (pacEntryKind != PEK_NoAuth)
1325 memcpy(dest: buf + sizeof(addrInst),
1326 src: pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr,
1327 n: sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr));
1328 else
1329 memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
1330 if (!hasBti)
1331 // We didn't add the BTI c instruction so round out size with NOP.
1332 memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1333}
1334
1335template <class ELFT>
1336static void
1337addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec,
1338 DenseMap<Symbol *, unsigned> &referenceCount) {
1339 assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1340
1341 const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1342 if (rels.areRelocsRel())
1343 ErrAlways(ctx)
1344 << "non-RELA relocations are not allowed with memtag globals";
1345
1346 for (const typename ELFT::Rela &rel : rels.relas) {
1347 Symbol &sym = sec.file->getRelocTargetSym(rel);
1348 // Linker-synthesized symbols such as __executable_start may be referenced
1349 // as tagged in input objfiles, and we don't want them to be tagged. A
1350 // cheap way to exclude them is the type check, but their type is
1351 // STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1352 // like functions or TLS symbols.
1353 if (sym.type != STT_OBJECT)
1354 continue;
1355 // STB_LOCAL symbols can't be referenced from outside the object file, and
1356 // thus don't need to be checked for references from other object files.
1357 if (sym.binding == STB_LOCAL) {
1358 sym.setIsTagged(true);
1359 continue;
1360 }
1361 ++referenceCount[&sym];
1362 }
1363 sec.markDead();
1364}
1365
1366// A tagged symbol must be denoted as being tagged by all references and the
1367// chosen definition. For simplicity, here, it must also be denoted as tagged
1368// for all definitions. Otherwise:
1369//
1370// 1. A tagged definition can be used by an untagged declaration, in which case
1371// the untagged access may be PC-relative, causing a tag mismatch at
1372// runtime.
1373// 2. An untagged definition can be used by a tagged declaration, where the
1374// compiler has taken advantage of the increased alignment of the tagged
1375// declaration, but the alignment at runtime is wrong, causing a fault.
1376//
1377// Ideally, this isn't a problem, as any TU that imports or exports tagged
1378// symbols should also be built with tagging. But, to handle these cases, we
1379// demote the symbol to be untagged.
1380void elf::createTaggedSymbols(Ctx &ctx) {
1381 assert(hasMemtag(ctx));
1382
1383 // First, collect all symbols that are marked as tagged, and count how many
1384 // times they're marked as tagged.
1385 DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount;
1386 for (InputFile *file : ctx.objectFiles) {
1387 if (file->kind() != InputFile::ObjKind)
1388 continue;
1389 for (InputSectionBase *section : file->getSections()) {
1390 if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC ||
1391 section == &InputSection::discarded)
1392 continue;
1393 invokeELFT(addTaggedSymbolReferences, ctx, *section,
1394 taggedSymbolReferenceCount);
1395 }
1396 }
1397
1398 // Now, go through all the symbols. If the number of declarations +
1399 // definitions to a symbol exceeds the amount of times they're marked as
1400 // tagged, it means we have an objfile that uses the untagged variant of the
1401 // symbol.
1402 for (InputFile *file : ctx.objectFiles) {
1403 if (file->kind() != InputFile::BinaryKind &&
1404 file->kind() != InputFile::ObjKind)
1405 continue;
1406
1407 for (Symbol *symbol : file->getSymbols()) {
1408 // See `addTaggedSymbolReferences` for more details.
1409 if (symbol->type != STT_OBJECT ||
1410 symbol->binding == STB_LOCAL)
1411 continue;
1412 auto it = taggedSymbolReferenceCount.find(Val: symbol);
1413 if (it == taggedSymbolReferenceCount.end()) continue;
1414 unsigned &remainingAllowedTaggedRefs = it->second;
1415 if (remainingAllowedTaggedRefs == 0) {
1416 taggedSymbolReferenceCount.erase(I: it);
1417 continue;
1418 }
1419 --remainingAllowedTaggedRefs;
1420 }
1421 }
1422
1423 // `addTaggedSymbolReferences` has already checked that we have RELA
1424 // relocations, the only other way to get written addends is with
1425 // --apply-dynamic-relocs.
1426 if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends)
1427 ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals";
1428
1429 // Now, `taggedSymbolReferenceCount` should only contain symbols that are
1430 // defined as tagged exactly the same amount as it's referenced, meaning all
1431 // uses are tagged.
1432 for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1433 assert(remainingTaggedRefs == 0 &&
1434 "Symbol is defined as tagged more times than it's used");
1435 symbol->setIsTagged(true);
1436 }
1437}
1438
1439void elf::setAArch64TargetInfo(Ctx &ctx) {
1440 if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
1441 ctx.arg.zPacPlt)
1442 ctx.target.reset(p: new AArch64BtiPac(ctx));
1443 else
1444 ctx.target.reset(p: new AArch64(ctx));
1445}
1446