1//===- AArch64.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "OutputSections.h"
11#include "RelocScan.h"
12#include "Symbols.h"
13#include "SyntheticSections.h"
14#include "Target.h"
15#include "TargetImpl.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/Support/Endian.h"
18
19using namespace llvm;
20using namespace llvm::support::endian;
21using namespace llvm::ELF;
22using namespace lld;
23using namespace lld::elf;
24
25// Page(Expr) is the page address of the expression Expr, defined
26// as (Expr & ~0xFFF). (This applies even if the machine page size
27// supported by the platform has a different value.)
28uint64_t elf::getAArch64Page(uint64_t expr) {
29 return expr & ~static_cast<uint64_t>(0xFFF);
30}
31
32// A BTI landing pad is a valid target for an indirect branch when the Branch
33// Target Identification has been enabled. As linker generated branches are
34// via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
35// PACIBSP.
36bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
37 // PLT entries accessed indirectly have a BTI c.
38 if (s.isInPlt(ctx))
39 return true;
40 Defined *d = dyn_cast<Defined>(Val: &s);
41 if (!isa_and_nonnull<InputSection>(Val: d->section))
42 // All places that we cannot disassemble are responsible for making
43 // the target a BTI landing pad.
44 return true;
45 InputSection *isec = cast<InputSection>(Val: d->section);
46 uint64_t off = d->value + a;
47 // Likely user error, but protect ourselves against out of bounds
48 // access.
49 if (off >= isec->getSize())
50 return true;
51 const uint8_t *buf = isec->content().begin();
52 // Synthetic sections may have a size but empty data - Assume that they won't
53 // contain a landing pad
54 if (buf == nullptr && isa<SyntheticSection>(Val: isec))
55 return false;
56
57 const uint32_t instr = read32le(P: buf + off);
58 // All BTI instructions are HINT instructions which all have same encoding
59 // apart from bits [11:5]
60 if ((instr & 0xd503201f) == 0xd503201f &&
61 is_contained(Set: {/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
62 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
63 /*BTI JC*/ 0xd50324df},
64 Element: instr))
65 return true;
66 return false;
67}
68
69namespace {
70class AArch64 : public TargetInfo {
71public:
72 AArch64(Ctx &);
73 RelExpr getRelExpr(RelType type, const Symbol &s,
74 const uint8_t *loc) const override;
75 RelType getDynRel(RelType type) const override;
76 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
77 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
78 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
79 void writePltHeader(uint8_t *buf) const override;
80 void writePlt(uint8_t *buf, const Symbol &sym,
81 uint64_t pltEntryAddr) const override;
82 template <class ELFT, class RelTy>
83 void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
84 void scanSection(InputSectionBase &sec) override {
85 if (ctx.arg.ekind == ELF64BEKind)
86 elf::scanSection1<AArch64, ELF64BE>(target&: *this, sec);
87 else
88 elf::scanSection1<AArch64, ELF64LE>(target&: *this, sec);
89 }
90 bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
91 uint64_t branchAddr, const Symbol &s,
92 int64_t a) const override;
93 uint32_t getThunkSectionSpacing() const override;
94 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
95 bool usesOnlyLowPageBits(RelType type) const override;
96 void relocate(uint8_t *loc, const Relocation &rel,
97 uint64_t val) const override;
98 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
99 void applyBranchToBranchOpt() const override;
100
101private:
102 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
103 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
104 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
105};
106
107struct AArch64Relaxer {
108 Ctx &ctx;
109 bool safeToRelaxAdrpLdr = false;
110
111 AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs);
112 bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
113 uint64_t secAddr, uint8_t *buf) const;
114 bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
115 uint64_t secAddr, uint8_t *buf) const;
116};
117} // namespace
118
119// Return the bits [Start, End] from Val shifted Start bits.
120// For instance, getBits(0xF0, 4, 8) returns 0xF.
121static uint64_t getBits(uint64_t val, int start, int end) {
122 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
123 return (val >> start) & mask;
124}
125
126AArch64::AArch64(Ctx &ctx) : TargetInfo(ctx) {
127 copyRel = R_AARCH64_COPY;
128 relativeRel = R_AARCH64_RELATIVE;
129 iRelativeRel = R_AARCH64_IRELATIVE;
130 iRelSymbolicRel = R_AARCH64_FUNCINIT64;
131 gotRel = R_AARCH64_GLOB_DAT;
132 pltRel = R_AARCH64_JUMP_SLOT;
133 symbolicRel = R_AARCH64_ABS64;
134 tlsDescRel = R_AARCH64_TLSDESC;
135 tlsGotRel = R_AARCH64_TLS_TPREL64;
136 pltHeaderSize = 32;
137 pltEntrySize = 16;
138 ipltEntrySize = 16;
139 defaultMaxPageSize = 65536;
140
141 // Align to the 2 MiB page size (known as a superpage or huge page).
142 // FreeBSD automatically promotes 2 MiB-aligned allocations.
143 defaultImageBase = 0x200000;
144
145 needsThunks = true;
146}
147
148// Only needed to support relocations used by relocateNonAlloc and
149// preprocessRelocs.
150RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
151 const uint8_t *loc) const {
152 switch (type) {
153 case R_AARCH64_ABS32:
154 case R_AARCH64_ABS64:
155 return R_ABS;
156 case R_AARCH64_PREL32:
157 case R_AARCH64_PREL64:
158 return R_PC;
159 case R_AARCH64_NONE:
160 return R_NONE;
161 default:
162 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
163 << ") against symbol " << &s;
164 return R_NONE;
165 }
166}
167
168bool AArch64::usesOnlyLowPageBits(RelType type) const {
169 switch (type) {
170 default:
171 return false;
172 case R_AARCH64_ADD_ABS_LO12_NC:
173 case R_AARCH64_LD64_GOT_LO12_NC:
174 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
175 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
176 case R_AARCH64_LDST128_ABS_LO12_NC:
177 case R_AARCH64_LDST16_ABS_LO12_NC:
178 case R_AARCH64_LDST32_ABS_LO12_NC:
179 case R_AARCH64_LDST64_ABS_LO12_NC:
180 case R_AARCH64_LDST8_ABS_LO12_NC:
181 case R_AARCH64_TLSDESC_ADD_LO12:
182 case R_AARCH64_TLSDESC_LD64_LO12:
183 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
184 return true;
185 }
186}
187
188template <class ELFT, class RelTy>
189void AArch64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
190 RelocScan rs(ctx, &sec);
191 sec.relocations.reserve(N: rels.size());
192
193 for (auto it = rels.begin(); it != rels.end(); ++it) {
194 const RelTy &rel = *it;
195 uint32_t symIdx = rel.getSymbol(false);
196 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
197 uint64_t offset = rel.r_offset;
198 RelType type = rel.getType(false);
199 if (sym.isUndefined() && symIdx != 0 &&
200 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
201 continue;
202 int64_t addend = rs.getAddend<ELFT>(rel, type);
203 RelExpr expr;
204 // Relocation types that only need a RelExpr set `expr` and break out of
205 // the switch to reach rs.process(). Types that need special handling
206 // (fast-path helpers, TLS) call a handler and use `continue`.
207 switch (type) {
208 case R_AARCH64_NONE:
209 continue;
210
211 // Absolute relocations:
212 case R_AARCH64_ABS16:
213 case R_AARCH64_ABS32:
214 case R_AARCH64_ABS64:
215 case R_AARCH64_FUNCINIT64:
216 case R_AARCH64_ADD_ABS_LO12_NC:
217 case R_AARCH64_LDST128_ABS_LO12_NC:
218 case R_AARCH64_LDST16_ABS_LO12_NC:
219 case R_AARCH64_LDST32_ABS_LO12_NC:
220 case R_AARCH64_LDST64_ABS_LO12_NC:
221 case R_AARCH64_LDST8_ABS_LO12_NC:
222 case R_AARCH64_MOVW_SABS_G0:
223 case R_AARCH64_MOVW_SABS_G1:
224 case R_AARCH64_MOVW_SABS_G2:
225 case R_AARCH64_MOVW_UABS_G0:
226 case R_AARCH64_MOVW_UABS_G0_NC:
227 case R_AARCH64_MOVW_UABS_G1:
228 case R_AARCH64_MOVW_UABS_G1_NC:
229 case R_AARCH64_MOVW_UABS_G2:
230 case R_AARCH64_MOVW_UABS_G2_NC:
231 case R_AARCH64_MOVW_UABS_G3:
232 expr = R_ABS;
233 break;
234
235 case R_AARCH64_AUTH_ABS64:
236 expr = RE_AARCH64_AUTH;
237 break;
238
239 case R_AARCH64_PATCHINST:
240 if (!isAbsolute(sym))
241 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
242 << "R_AARCH64_PATCHINST relocation against non-absolute "
243 "symbol "
244 << &sym;
245 expr = R_ABS;
246 break;
247
248 // PC-relative relocations:
249 case R_AARCH64_PREL16:
250 case R_AARCH64_PREL32:
251 case R_AARCH64_PREL64:
252 case R_AARCH64_ADR_PREL_LO21:
253 case R_AARCH64_LD_PREL_LO19:
254 case R_AARCH64_MOVW_PREL_G0:
255 case R_AARCH64_MOVW_PREL_G0_NC:
256 case R_AARCH64_MOVW_PREL_G1:
257 case R_AARCH64_MOVW_PREL_G1_NC:
258 case R_AARCH64_MOVW_PREL_G2:
259 case R_AARCH64_MOVW_PREL_G2_NC:
260 case R_AARCH64_MOVW_PREL_G3:
261 rs.processR_PC(type, offset, addend, sym);
262 continue;
263
264 // Page-PC relocations:
265 case R_AARCH64_ADR_PREL_PG_HI21:
266 case R_AARCH64_ADR_PREL_PG_HI21_NC:
267 expr = RE_AARCH64_PAGE_PC;
268 break;
269
270 // PLT-generating relocations:
271 case R_AARCH64_PLT32:
272 sym.thunkAccessed = true;
273 [[fallthrough]];
274 case R_AARCH64_CALL26:
275 case R_AARCH64_CONDBR19:
276 case R_AARCH64_JUMP26:
277 case R_AARCH64_TSTBR14:
278 rs.processR_PLT_PC(type, offset, addend, sym);
279 continue;
280
281 // GOT relocations:
282 case R_AARCH64_ADR_GOT_PAGE:
283 expr = RE_AARCH64_GOT_PAGE_PC;
284 break;
285 case R_AARCH64_LD64_GOT_LO12_NC:
286 expr = R_GOT;
287 break;
288 case R_AARCH64_LD64_GOTPAGE_LO15:
289 expr = RE_AARCH64_GOT_PAGE;
290 break;
291 case R_AARCH64_GOTPCREL32:
292 case R_AARCH64_GOT_LD_PREL19:
293 expr = R_GOT_PC;
294 break;
295
296 // AUTH GOT relocations. Set NEEDS_GOT_AUTH to detect incompatibility with
297 // NEEDS_GOT_NONAUTH. rs.process does not set the flag.
298 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
299 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
300 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
301 rs.processAux(expr: R_GOT, type, offset, sym, addend);
302 continue;
303 case R_AARCH64_AUTH_GOT_LD_PREL19:
304 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
305 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
306 rs.processAux(expr: R_GOT_PC, type, offset, sym, addend);
307 continue;
308 case R_AARCH64_AUTH_ADR_GOT_PAGE:
309 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
310 rs.processAux(expr: RE_AARCH64_GOT_PAGE_PC, type, offset, sym, addend);
311 continue;
312
313 // TLS LE relocations:
314 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
315 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
316 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
317 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
318 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
319 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
320 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
321 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
322 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
323 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
324 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
325 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
326 if (rs.checkTlsLe(offset, sym, type))
327 continue;
328 expr = R_TPREL;
329 break;
330
331 // TLS IE relocations:
332 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
333 rs.handleTlsIe(ieExpr: RE_AARCH64_GOT_PAGE_PC, type, offset, addend, sym);
334 continue;
335 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
336 rs.handleTlsIe(ieExpr: R_GOT, type, offset, addend, sym);
337 continue;
338
339 // TLSDESC relocations:
340 case R_AARCH64_TLSDESC_ADR_PAGE21:
341 rs.handleTlsDesc(sharedExpr: RE_AARCH64_TLSDESC_PAGE, ieExpr: RE_AARCH64_GOT_PAGE_PC, type,
342 offset, addend, sym);
343 continue;
344 case R_AARCH64_TLSDESC_LD64_LO12:
345 case R_AARCH64_TLSDESC_ADD_LO12:
346 rs.handleTlsDesc(sharedExpr: R_TLSDESC, ieExpr: R_GOT, type, offset, addend, sym);
347 continue;
348 case R_AARCH64_TLSDESC_CALL:
349 sym.setFlags(NEEDS_TLSDESC_NONAUTH);
350 if (!ctx.arg.shared)
351 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
352 continue;
353
354 // AUTH TLSDESC relocations. Do not optimize to LE/IE because PAUTHELF64
355 // only supports the descriptor based TLS (TLSDESC).
356 // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
357 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
358 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
359 sec.addReloc(r: {.expr: RE_AARCH64_TLSDESC_PAGE, .type: type, .offset: offset, .addend: addend, .sym: &sym});
360 continue;
361 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
362 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
363 sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
364 sec.addReloc(r: {.expr: R_TLSDESC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
365 continue;
366
367 default:
368 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
369 << "unknown relocation (" << type.v << ") against symbol "
370 << &sym;
371 continue;
372 }
373 rs.process(expr, type, offset, sym, addend);
374 }
375
376 if (ctx.arg.branchToBranch)
377 llvm::stable_sort(sec.relocs(),
378 [](auto &l, auto &r) { return l.offset < r.offset; });
379}
380
381RelType AArch64::getDynRel(RelType type) const {
382 if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64 ||
383 type == R_AARCH64_FUNCINIT64)
384 return type;
385 return R_AARCH64_NONE;
386}
387
388int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
389 switch (type) {
390 case R_AARCH64_TLSDESC:
391 return read64(ctx, p: buf + 8);
392 case R_AARCH64_NONE:
393 case R_AARCH64_GLOB_DAT:
394 case R_AARCH64_AUTH_GLOB_DAT:
395 case R_AARCH64_JUMP_SLOT:
396 return 0;
397 case R_AARCH64_ABS16:
398 case R_AARCH64_PREL16:
399 return SignExtend64<16>(x: read16(ctx, p: buf));
400 case R_AARCH64_ABS32:
401 case R_AARCH64_PREL32:
402 return SignExtend64<32>(x: read32(ctx, p: buf));
403 case R_AARCH64_ABS64:
404 case R_AARCH64_PREL64:
405 case R_AARCH64_RELATIVE:
406 case R_AARCH64_IRELATIVE:
407 case R_AARCH64_TLS_TPREL64:
408 return read64(ctx, p: buf);
409
410 // The following relocation types all point at instructions, and
411 // relocate an immediate field in the instruction.
412 //
413 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
414 // says: "If the relocation relocates an instruction the immediate
415 // field of the instruction is extracted, scaled as required by
416 // the instruction field encoding, and sign-extended to 64 bits".
417
418 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
419 // instructions, which have a 16-bit immediate field with its low
420 // bit in bit 5 of the instruction encoding. When the immediate
421 // field is used as an implicit addend for REL-type relocations,
422 // it is treated as added to the low bits of the output value, not
423 // shifted depending on the relocation type.
424 //
425 // This allows REL relocations to express the requirement 'please
426 // add 12345 to this symbol value and give me the four 16-bit
427 // chunks of the result', by putting the same addend 12345 in all
428 // four instructions. Carries between the 16-bit chunks are
429 // handled correctly, because the whole 64-bit addition is done
430 // once per relocation.
431 case R_AARCH64_MOVW_UABS_G0:
432 case R_AARCH64_MOVW_UABS_G0_NC:
433 case R_AARCH64_MOVW_UABS_G1:
434 case R_AARCH64_MOVW_UABS_G1_NC:
435 case R_AARCH64_MOVW_UABS_G2:
436 case R_AARCH64_MOVW_UABS_G2_NC:
437 case R_AARCH64_MOVW_UABS_G3:
438 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 20));
439
440 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
441 // has a 14-bit offset measured in instructions, i.e. shifted left
442 // by 2.
443 case R_AARCH64_TSTBR14:
444 return SignExtend64<16>(x: getBits(val: read32le(P: buf), start: 5, end: 18) << 2);
445
446 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
447 // which has a 19-bit offset measured in instructions.
448 //
449 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
450 // instruction, which also has a 19-bit offset, measured in 4-byte
451 // chunks. So the calculation is the same as for
452 // R_AARCH64_CONDBR19.
453 case R_AARCH64_CONDBR19:
454 case R_AARCH64_LD_PREL_LO19:
455 return SignExtend64<21>(x: getBits(val: read32le(P: buf), start: 5, end: 23) << 2);
456
457 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
458 // immediate can optionally be shifted left by 12 bits, but this
459 // relocation is intended for the case where it is not.
460 case R_AARCH64_ADD_ABS_LO12_NC:
461 return SignExtend64<12>(x: getBits(val: read32le(P: buf), start: 10, end: 21));
462
463 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
464 // 21-bit immediate is split between two bits high up in the word
465 // (in fact the two _lowest_ order bits of the value) and 19 bits
466 // lower down.
467 //
468 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
469 // which encodes the immediate in the same way, but will shift it
470 // left by 12 bits when the instruction executes. For the same
471 // reason as the MOVW family, we don't apply that left shift here.
472 case R_AARCH64_ADR_PREL_LO21:
473 case R_AARCH64_ADR_PREL_PG_HI21:
474 case R_AARCH64_ADR_PREL_PG_HI21_NC:
475 return SignExtend64<21>(x: (getBits(val: read32le(P: buf), start: 5, end: 23) << 2) |
476 getBits(val: read32le(P: buf), start: 29, end: 30));
477
478 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
479 // 26-bit offset measured in instructions.
480 case R_AARCH64_JUMP26:
481 case R_AARCH64_CALL26:
482 return SignExtend64<28>(x: getBits(val: read32le(P: buf), start: 0, end: 25) << 2);
483
484 default:
485 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
486 return 0;
487 }
488}
489
490void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
491 write64(ctx, p: buf, v: ctx.in.plt->getVA());
492}
493
494void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
495 if (ctx.arg.writeAddends)
496 write64(ctx, p: buf, v: s.getVA(ctx));
497}
498
499void AArch64::writePltHeader(uint8_t *buf) const {
500 const uint8_t pltData[] = {
501 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
502 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
503 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
504 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
505 0x20, 0x02, 0x1f, 0xd6, // br x17
506 0x1f, 0x20, 0x03, 0xd5, // nop
507 0x1f, 0x20, 0x03, 0xd5, // nop
508 0x1f, 0x20, 0x03, 0xd5 // nop
509 };
510 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
511
512 uint64_t got = ctx.in.gotPlt->getVA();
513 uint64_t plt = ctx.in.plt->getVA();
514 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
515 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
516 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
517 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
518}
519
520void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
521 uint64_t pltEntryAddr) const {
522 const uint8_t inst[] = {
523 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
524 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
525 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
526 0x20, 0x02, 0x1f, 0xd6 // br x17
527 };
528 memcpy(dest: buf, src: inst, n: sizeof(inst));
529
530 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
531 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
532 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
533 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
534 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
535}
536
537bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
538 uint64_t branchAddr, const Symbol &s,
539 int64_t a) const {
540 // If s is an undefined weak symbol and does not have a PLT entry then it will
541 // be resolved as a branch to the next instruction. If it is hidden, its
542 // binding has been converted to local, so we just check isUndefined() here. A
543 // undefined non-weak symbol will have been errored.
544 if (s.isUndefined() && !s.isInPlt(ctx))
545 return false;
546 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
547 // only permits range extension thunks for R_AARCH64_CALL26 and
548 // R_AARCH64_JUMP26 relocation types.
549 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
550 type != R_AARCH64_PLT32)
551 return false;
552 uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, addend: a);
553 return !inBranchRange(type, src: branchAddr, dst);
554}
555
556uint32_t AArch64::getThunkSectionSpacing() const {
557 // See comment in Arch/ARM.cpp for a more detailed explanation of
558 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
559 // Thunk have a range of +/- 128 MiB
560 return (128 * 1024 * 1024) - 0x30000;
561}
562
563bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
564 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
565 type != R_AARCH64_PLT32)
566 return true;
567 // The AArch64 call and unconditional branch instructions have a range of
568 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
569 uint64_t range =
570 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
571 if (dst > src) {
572 // Immediate of branch is signed.
573 range -= 4;
574 return dst - src <= range;
575 }
576 return src - dst <= range;
577}
578
579static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
580 uint32_t immLo = (imm & 0x3) << 29;
581 uint32_t immHi = (imm & 0x1FFFFC) << 3;
582 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
583 write32le(P: l, V: (read32le(P: l) & ~mask) | immLo | immHi);
584}
585
586static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
587 write32le(P: p, V: (read32le(P: p) & ~mask) | v);
588}
589
590// Update the immediate field in a AARCH64 ldr, str, and add instruction.
591static void write32Imm12(uint8_t *l, uint64_t imm) {
592 writeMaskedBits32le(p: l, v: (imm & 0xFFF) << 10, mask: 0xFFF << 10);
593}
594
595// Update the immediate field in an AArch64 movk, movn or movz instruction
596// for a signed relocation, and update the opcode of a movn or movz instruction
597// to match the sign of the operand.
598static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
599 uint32_t inst = read32le(P: loc);
600 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
601 if (!(inst & (1 << 29))) {
602 // movn or movz.
603 if (imm & 0x10000) {
604 // Change opcode to movn, which takes an inverted operand.
605 imm ^= 0xFFFF;
606 inst &= ~(1 << 30);
607 } else {
608 // Change opcode to movz.
609 inst |= 1 << 30;
610 }
611 }
612 write32le(P: loc, V: inst | ((imm & 0xFFFF) << 5));
613}
614
615void AArch64::relocate(uint8_t *loc, const Relocation &rel,
616 uint64_t val) const {
617 switch (rel.type) {
618 case R_AARCH64_ABS16:
619 case R_AARCH64_PREL16:
620 checkIntUInt(ctx, loc, v: val, n: 16, rel);
621 write16(ctx, p: loc, v: val);
622 break;
623 case R_AARCH64_ABS32:
624 case R_AARCH64_PREL32:
625 checkIntUInt(ctx, loc, v: val, n: 32, rel);
626 write32(ctx, p: loc, v: val);
627 break;
628 case R_AARCH64_PATCHINST:
629 if (!rel.sym->isUndefined()) {
630 checkUInt(ctx, loc, v: val, n: 32, rel);
631 write32le(P: loc, V: val);
632 }
633 break;
634 case R_AARCH64_PLT32:
635 case R_AARCH64_GOTPCREL32:
636 checkInt(ctx, loc, v: val, n: 32, rel);
637 write32(ctx, p: loc, v: val);
638 break;
639 case R_AARCH64_ABS64:
640 write64(ctx, p: loc, v: val);
641 break;
642 case R_AARCH64_PREL64:
643 write64(ctx, p: loc, v: val);
644 break;
645 case R_AARCH64_AUTH_ABS64:
646 // This is used for the addend of a .relr.auth.dyn entry,
647 // which is a 32-bit value; the upper 32 bits are used to
648 // encode the schema.
649 checkInt(ctx, loc, v: val, n: 32, rel);
650 write32(ctx, p: loc, v: val);
651 break;
652 case R_AARCH64_ADD_ABS_LO12_NC:
653 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
654 write32Imm12(l: loc, imm: val);
655 break;
656 case R_AARCH64_ADR_GOT_PAGE:
657 case R_AARCH64_AUTH_ADR_GOT_PAGE:
658 case R_AARCH64_ADR_PREL_PG_HI21:
659 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
660 case R_AARCH64_TLSDESC_ADR_PAGE21:
661 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
662 checkInt(ctx, loc, v: val, n: 33, rel);
663 [[fallthrough]];
664 case R_AARCH64_ADR_PREL_PG_HI21_NC:
665 write32AArch64Addr(l: loc, imm: val >> 12);
666 break;
667 case R_AARCH64_ADR_PREL_LO21:
668 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
669 checkInt(ctx, loc, v: val, n: 21, rel);
670 write32AArch64Addr(l: loc, imm: val);
671 break;
672 case R_AARCH64_JUMP26:
673 // Normally we would just write the bits of the immediate field, however
674 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
675 // we want to replace a non-branch instruction with a branch immediate
676 // instruction. By writing all the bits of the instruction including the
677 // opcode and the immediate (0 001 | 01 imm26) we can do this
678 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
679 // the instruction we want to patch.
680 write32le(P: loc, V: 0x14000000);
681 [[fallthrough]];
682 case R_AARCH64_CALL26:
683 checkInt(ctx, loc, v: val, n: 28, rel);
684 writeMaskedBits32le(p: loc, v: (val & 0x0FFFFFFC) >> 2, mask: 0x0FFFFFFC >> 2);
685 break;
686 case R_AARCH64_CONDBR19:
687 case R_AARCH64_LD_PREL_LO19:
688 case R_AARCH64_GOT_LD_PREL19:
689 case R_AARCH64_AUTH_GOT_LD_PREL19:
690 checkAlignment(ctx, loc, v: val, n: 4, rel);
691 checkInt(ctx, loc, v: val, n: 21, rel);
692 writeMaskedBits32le(p: loc, v: (val & 0x1FFFFC) << 3, mask: 0x1FFFFC << 3);
693 break;
694 case R_AARCH64_LDST8_ABS_LO12_NC:
695 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
696 write32Imm12(l: loc, imm: getBits(val, start: 0, end: 11));
697 break;
698 case R_AARCH64_LDST16_ABS_LO12_NC:
699 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
700 checkAlignment(ctx, loc, v: val, n: 2, rel);
701 write32Imm12(l: loc, imm: getBits(val, start: 1, end: 11));
702 break;
703 case R_AARCH64_LDST32_ABS_LO12_NC:
704 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
705 checkAlignment(ctx, loc, v: val, n: 4, rel);
706 write32Imm12(l: loc, imm: getBits(val, start: 2, end: 11));
707 break;
708 case R_AARCH64_LDST64_ABS_LO12_NC:
709 case R_AARCH64_LD64_GOT_LO12_NC:
710 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
711 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
712 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
713 case R_AARCH64_TLSDESC_LD64_LO12:
714 case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
715 checkAlignment(ctx, loc, v: val, n: 8, rel);
716 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 11));
717 break;
718 case R_AARCH64_LDST128_ABS_LO12_NC:
719 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
720 checkAlignment(ctx, loc, v: val, n: 16, rel);
721 write32Imm12(l: loc, imm: getBits(val, start: 4, end: 11));
722 break;
723 case R_AARCH64_LD64_GOTPAGE_LO15:
724 checkAlignment(ctx, loc, v: val, n: 8, rel);
725 write32Imm12(l: loc, imm: getBits(val, start: 3, end: 14));
726 break;
727 case R_AARCH64_MOVW_UABS_G0:
728 checkUInt(ctx, loc, v: val, n: 16, rel);
729 [[fallthrough]];
730 case R_AARCH64_MOVW_UABS_G0_NC:
731 writeMaskedBits32le(p: loc, v: (val & 0xFFFF) << 5, mask: 0xFFFF << 5);
732 break;
733 case R_AARCH64_MOVW_UABS_G1:
734 checkUInt(ctx, loc, v: val, n: 32, rel);
735 [[fallthrough]];
736 case R_AARCH64_MOVW_UABS_G1_NC:
737 writeMaskedBits32le(p: loc, v: (val & 0xFFFF0000) >> 11, mask: 0xFFFF0000 >> 11);
738 break;
739 case R_AARCH64_MOVW_UABS_G2:
740 checkUInt(ctx, loc, v: val, n: 48, rel);
741 [[fallthrough]];
742 case R_AARCH64_MOVW_UABS_G2_NC:
743 writeMaskedBits32le(p: loc, v: (val & 0xFFFF00000000) >> 27,
744 mask: 0xFFFF00000000 >> 27);
745 break;
746 case R_AARCH64_MOVW_UABS_G3:
747 writeMaskedBits32le(p: loc, v: (val & 0xFFFF000000000000) >> 43,
748 mask: 0xFFFF000000000000 >> 43);
749 break;
750 case R_AARCH64_MOVW_PREL_G0:
751 case R_AARCH64_MOVW_SABS_G0:
752 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
753 checkInt(ctx, loc, v: val, n: 17, rel);
754 [[fallthrough]];
755 case R_AARCH64_MOVW_PREL_G0_NC:
756 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
757 writeSMovWImm(loc, imm: val);
758 break;
759 case R_AARCH64_MOVW_PREL_G1:
760 case R_AARCH64_MOVW_SABS_G1:
761 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
762 checkInt(ctx, loc, v: val, n: 33, rel);
763 [[fallthrough]];
764 case R_AARCH64_MOVW_PREL_G1_NC:
765 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
766 writeSMovWImm(loc, imm: val >> 16);
767 break;
768 case R_AARCH64_MOVW_PREL_G2:
769 case R_AARCH64_MOVW_SABS_G2:
770 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
771 checkInt(ctx, loc, v: val, n: 49, rel);
772 [[fallthrough]];
773 case R_AARCH64_MOVW_PREL_G2_NC:
774 writeSMovWImm(loc, imm: val >> 32);
775 break;
776 case R_AARCH64_MOVW_PREL_G3:
777 writeSMovWImm(loc, imm: val >> 48);
778 break;
779 case R_AARCH64_TSTBR14:
780 checkInt(ctx, loc, v: val, n: 16, rel);
781 writeMaskedBits32le(p: loc, v: (val & 0xFFFC) << 3, mask: 0xFFFC << 3);
782 break;
783 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
784 checkUInt(ctx, loc, v: val, n: 24, rel);
785 write32Imm12(l: loc, imm: val >> 12);
786 break;
787 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
788 case R_AARCH64_TLSDESC_ADD_LO12:
789 case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
790 write32Imm12(l: loc, imm: val);
791 break;
792 case R_AARCH64_TLSDESC:
793 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
794 write64(ctx, p: loc + 8, v: val);
795 break;
796 default:
797 llvm_unreachable("unknown relocation");
798 }
799}
800
801void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
802 uint64_t val) const {
803 // TLSDESC Global-Dynamic relocation are in the form:
804 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
805 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
806 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
807 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
808 // blr x1
809 // And it can optimized to:
810 // movz x0, #0x0, lsl #16
811 // movk x0, #0x10
812 // nop
813 // nop
814 checkUInt(ctx, loc, v: val, n: 32, rel);
815
816 switch (rel.type) {
817 case R_AARCH64_TLSDESC_ADD_LO12:
818 case R_AARCH64_TLSDESC_CALL:
819 write32le(P: loc, V: 0xd503201f); // nop
820 return;
821 case R_AARCH64_TLSDESC_ADR_PAGE21:
822 write32le(P: loc, V: 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
823 return;
824 case R_AARCH64_TLSDESC_LD64_LO12:
825 write32le(P: loc, V: 0xf2800000 | ((val & 0xffff) << 5)); // movk
826 return;
827 default:
828 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
829 }
830}
831
832void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
833 uint64_t val) const {
834 // TLSDESC Global-Dynamic relocation are in the form:
835 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
836 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
837 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
838 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
839 // blr x1
840 // And it can optimized to:
841 // adrp x0, :gottprel:v
842 // ldr x0, [x0, :gottprel_lo12:v]
843 // nop
844 // nop
845
846 switch (rel.type) {
847 case R_AARCH64_TLSDESC_ADD_LO12:
848 case R_AARCH64_TLSDESC_CALL:
849 write32le(P: loc, V: 0xd503201f); // nop
850 break;
851 case R_AARCH64_TLSDESC_ADR_PAGE21:
852 write32le(P: loc, V: 0x90000000); // adrp
853 relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
854 break;
855 case R_AARCH64_TLSDESC_LD64_LO12:
856 write32le(P: loc, V: 0xf9400000); // ldr
857 relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
858 break;
859 default:
860 llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
861 }
862}
863
864void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
865 uint64_t val) const {
866 checkUInt(ctx, loc, v: val, n: 32, rel);
867
868 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
869 // Generate MOVZ.
870 uint32_t regNo = read32le(P: loc) & 0x1f;
871 write32le(P: loc, V: (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
872 return;
873 }
874 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
875 // Generate MOVK.
876 uint32_t regNo = read32le(P: loc) & 0x1f;
877 write32le(P: loc, V: (0xf2800000 | regNo) | ((val & 0xffff) << 5));
878 return;
879 }
880 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
881}
882
883AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs)
884 : ctx(ctx) {
885 if (!ctx.arg.relax)
886 return;
887 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
888 // always appear in pairs.
889 size_t i = 0;
890 const size_t size = relocs.size();
891 for (; i != size; ++i) {
892 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
893 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
894 ++i;
895 continue;
896 }
897 break;
898 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
899 break;
900 }
901 }
902 safeToRelaxAdrpLdr = i == size;
903}
904
905bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
906 const Relocation &addRel, uint64_t secAddr,
907 uint8_t *buf) const {
908 // When the address of sym is within the range of ADR then
909 // we may relax
910 // ADRP xn, sym
911 // ADD xn, xn, :lo12: sym
912 // to
913 // NOP
914 // ADR xn, sym
915 if (!ctx.arg.relax || addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
916 return false;
917 // Check if the relocations apply to consecutive instructions.
918 if (adrpRel.offset + 4 != addRel.offset)
919 return false;
920 if (adrpRel.sym != addRel.sym)
921 return false;
922 if (adrpRel.addend != 0 || addRel.addend != 0)
923 return false;
924
925 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
926 uint32_t addInstr = read32le(P: buf + addRel.offset);
927 // Check if the first instruction is ADRP and the second instruction is ADD.
928 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
929 (addInstr & 0xffc00000) != 0x91000000)
930 return false;
931 uint32_t adrpDestReg = adrpInstr & 0x1f;
932 uint32_t addDestReg = addInstr & 0x1f;
933 uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
934 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
935 return false;
936
937 Symbol &sym = *adrpRel.sym;
938 // Check if the address difference is within 1MiB range.
939 int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset);
940 if (val < -1024 * 1024 || val >= 1024 * 1024)
941 return false;
942
943 Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
944 /*addend=*/0, .sym: &sym};
945 // nop
946 write32le(P: buf + adrpRel.offset, V: 0xd503201f);
947 // adr x_<dest_reg>
948 write32le(P: buf + adrRel.offset, V: 0x10000000 | adrpDestReg);
949 ctx.target->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
950 return true;
951}
952
953bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
954 const Relocation &ldrRel, uint64_t secAddr,
955 uint8_t *buf) const {
956 if (!safeToRelaxAdrpLdr)
957 return false;
958
959 // When the definition of sym is not preemptible then we may
960 // be able to relax
961 // ADRP xn, :got: sym
962 // LDR xn, [ xn :got_lo12: sym]
963 // to
964 // ADRP xn, sym
965 // ADD xn, xn, :lo_12: sym
966
967 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
968 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
969 return false;
970 // Check if the relocations apply to consecutive instructions.
971 if (adrpRel.offset + 4 != ldrRel.offset)
972 return false;
973 // Check if the relocations reference the same symbol and
974 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
975 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
976 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
977 return false;
978 // Check if the addends of the both relocations are zero.
979 if (adrpRel.addend != 0 || ldrRel.addend != 0)
980 return false;
981 uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
982 uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
983 // Check if the first instruction is ADRP and the second instruction is LDR.
984 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
985 (ldrInstr & 0x3b000000) != 0x39000000)
986 return false;
987 // Check the value of the sf bit.
988 if (!(ldrInstr >> 31))
989 return false;
990 uint32_t adrpDestReg = adrpInstr & 0x1f;
991 uint32_t ldrDestReg = ldrInstr & 0x1f;
992 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
993 // Check if ADPR and LDR use the same register.
994 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
995 return false;
996
997 Symbol &sym = *adrpRel.sym;
998 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
999 // position-independent code because these instructions produce a relative
1000 // address.
1001 if (ctx.arg.isPic && !cast<Defined>(Val&: sym).section)
1002 return false;
1003 // Check if the address difference is within 4GB range.
1004 int64_t val =
1005 getAArch64Page(expr: sym.getVA(ctx)) - getAArch64Page(expr: secAddr + adrpRel.offset);
1006 if (val != llvm::SignExtend64(X: val, B: 33))
1007 return false;
1008
1009 Relocation adrpSymRel = {.expr: RE_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
1010 .offset: adrpRel.offset, /*addend=*/0, .sym: &sym};
1011 Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
1012 /*addend=*/0, .sym: &sym};
1013
1014 // adrp x_<dest_reg>
1015 write32le(P: buf + adrpSymRel.offset, V: 0x90000000 | adrpDestReg);
1016 // add x_<dest reg>, x_<dest reg>
1017 write32le(P: buf + addRel.offset, V: 0x91000000 | adrpDestReg | (adrpDestReg << 5));
1018
1019 ctx.target->relocate(
1020 loc: buf + adrpSymRel.offset, rel: adrpSymRel,
1021 val: SignExtend64(X: getAArch64Page(expr: sym.getVA(ctx)) -
1022 getAArch64Page(expr: secAddr + adrpSymRel.offset),
1023 B: 64));
1024 ctx.target->relocate(loc: buf + addRel.offset, rel: addRel,
1025 val: SignExtend64(X: sym.getVA(ctx), B: 64));
1026 tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
1027 return true;
1028}
1029
1030// Tagged symbols have upper address bits that are added by the dynamic loader,
1031// and thus need the full 64-bit GOT entry. Do not relax such symbols.
1032static bool needsGotForMemtag(const Relocation &rel) {
1033 return rel.sym->isTagged() && needsGot(expr: rel.expr);
1034}
1035
1036void AArch64::relocateAlloc(InputSection &sec, uint8_t *buf) const {
1037 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
1038 const ArrayRef<Relocation> relocs = sec.relocs();
1039 AArch64Relaxer relaxer(ctx, relocs);
1040 for (size_t i = 0, size = relocs.size(); i != size; ++i) {
1041 const Relocation &rel = relocs[i];
1042 if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
1043 continue;
1044 uint8_t *loc = buf + rel.offset;
1045 const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
1046
1047 if (needsGotForMemtag(rel)) {
1048 relocate(loc, rel, val);
1049 continue;
1050 }
1051
1052 switch (rel.type) {
1053 case R_AARCH64_ADR_GOT_PAGE:
1054 if (i + 1 < size &&
1055 relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: relocs[i + 1], secAddr, buf)) {
1056 ++i;
1057 continue;
1058 }
1059 break;
1060 case R_AARCH64_ADR_PREL_PG_HI21:
1061 if (i + 1 < size &&
1062 relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: relocs[i + 1], secAddr, buf)) {
1063 ++i;
1064 continue;
1065 }
1066 break;
1067
1068 case R_AARCH64_TLSDESC_ADR_PAGE21:
1069 case R_AARCH64_TLSDESC_LD64_LO12:
1070 case R_AARCH64_TLSDESC_ADD_LO12:
1071 case R_AARCH64_TLSDESC_CALL:
1072 if (rel.expr == R_TPREL)
1073 relaxTlsGdToLe(loc, rel, val);
1074 else if (rel.expr == RE_AARCH64_GOT_PAGE_PC || rel.expr == R_GOT)
1075 relaxTlsGdToIe(loc, rel, val);
1076 else
1077 relocate(loc, rel, val);
1078 continue;
1079 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
1080 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
1081 if (rel.expr == R_TPREL)
1082 relaxTlsIeToLe(loc, rel, val);
1083 else
1084 relocate(loc, rel, val);
1085 continue;
1086 default:
1087 break;
1088 }
1089
1090 relocate(loc, rel, val);
1091 }
1092}
1093
1094static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
1095 Relocation &r) {
1096 // Identify a control transfer relocation for the branch-to-branch
1097 // optimization. A "control transfer relocation" means a B or BL
1098 // target but it also includes relative vtable relocations for example.
1099 //
1100 // We require the relocation type to be JUMP26, CALL26 or PLT32. With a
1101 // relocation type of PLT32 the value may be assumed to be used for branching
1102 // directly to the symbol and the addend is only used to produce the relocated
1103 // value (hence the effective addend is always 0). This is because if a PLT is
1104 // needed the addend will be added to the address of the PLT, and it doesn't
1105 // make sense to branch into the middle of a PLT. For example, relative vtable
1106 // relocations use PLT32 and 0 or a positive value as the addend but still are
1107 // used to branch to the symbol.
1108 //
1109 // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
1110 // addend is that we are branching to symbol+addend so that becomes the
1111 // effective addend.
1112 if (r.type == R_AARCH64_PLT32)
1113 return 0;
1114 if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
1115 return r.addend;
1116 return std::nullopt;
1117}
1118
1119static std::pair<Relocation *, uint64_t>
1120getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1121 auto *i = llvm::partition_point(
1122 Range&: is.relocations, P: [&](Relocation &r) { return r.offset < offset; });
1123 if (i != is.relocations.end() && i->offset == offset &&
1124 i->type == R_AARCH64_JUMP26) {
1125 return {i, i->addend};
1126 }
1127 return {nullptr, 0};
1128}
1129
1130static void redirectControlTransferRelocations(Relocation &r1,
1131 const Relocation &r2) {
1132 r1.expr = r2.expr;
1133 r1.sym = r2.sym;
1134 // With PLT32 we must respect the original addend as that affects the value's
1135 // interpretation. With the other relocation types the original addend is
1136 // irrelevant because it referred to an offset within the original target
1137 // section so we overwrite it.
1138 if (r1.type == R_AARCH64_PLT32)
1139 r1.addend += r2.addend;
1140 else
1141 r1.addend = r2.addend;
1142}
1143
1144void AArch64::applyBranchToBranchOpt() const {
1145 applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1146 getBranchInfoAtTarget,
1147 redirectControlTransferRelocations);
1148}
1149
1150// AArch64 may use security features in variant PLT sequences. These are:
1151// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
1152// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
1153// in the variant Plt sequences are encoded in the Hint space so they can be
1154// deployed on older architectures, which treat the instructions as a nop.
1155// PAC and BTI can be combined leading to the following combinations:
1156// writePltHeader
1157// writePltHeaderBti (no PAC Header needed)
1158// writePlt
1159// writePltBti (BTI only)
1160// writePltPac (PAC only)
1161// writePltBtiPac (BTI and PAC)
1162//
1163// When PAC is enabled the dynamic loader encrypts the address that it places
1164// in the .got.plt using the pacia1716 instruction which encrypts the value in
1165// x17 using the modifier in x16. The static linker places autia1716 before the
1166// indirect branch to x17 to authenticate the address in x17 with the modifier
1167// in x16. This makes it more difficult for an attacker to modify the value in
1168// the .got.plt.
1169//
1170// When BTI is enabled all indirect branches must land on a bti instruction.
1171// The static linker must place a bti instruction at the start of any PLT entry
1172// that may be the target of an indirect branch. As the PLT entries call the
1173// lazy resolver indirectly this must have a bti instruction at start. In
1174// general a bti instruction is not needed for a PLT entry as indirect calls
1175// are resolved to the function address and not the PLT entry for the function.
1176// There are a small number of cases where the PLT address can escape, such as
1177// taking the address of a function or ifunc via a non got-generating
1178// relocation, and a shared library refers to that symbol.
1179//
1180// We use the bti c variant of the instruction which permits indirect branches
1181// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1182// guarantees that all indirect branches from code requiring BTI protection
1183// will go via x16/x17
1184
1185namespace {
1186class AArch64BtiPac final : public AArch64 {
1187public:
1188 AArch64BtiPac(Ctx &);
1189 void writePltHeader(uint8_t *buf) const override;
1190 void writePlt(uint8_t *buf, const Symbol &sym,
1191 uint64_t pltEntryAddr) const override;
1192
1193private:
1194 bool btiHeader; // bti instruction needed in PLT Header and Entry
1195 enum {
1196 PEK_NoAuth,
1197 PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry
1198 PEK_Auth, // use braa instr for authenticated branch in PLT entry
1199 } pacEntryKind;
1200};
1201} // namespace
1202
1203AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) {
1204 btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
1205 // A BTI (Branch Target Indicator) Plt Entry is only required if the
1206 // address of the PLT entry can be taken by the program, which permits an
1207 // indirect jump to the PLT entry. This can happen when the address
1208 // of the PLT entry for a function is canonicalised due to the address of
1209 // the function in an executable being taken by a shared library, or
1210 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1211 // relocations.
1212 // The PAC PLT entries require dynamic loader support and this isn't known
1213 // from properties in the objects, so we use the command line flag.
1214 // By default we only use hint-space instructions, but if we detect the
1215 // PAuthABI, which requires v8.3-A, we can use the non-hint space
1216 // instructions.
1217
1218 if (ctx.arg.zPacPlt) {
1219 if (ctx.aarch64PauthAbiCoreInfo && ctx.aarch64PauthAbiCoreInfo->isValid())
1220 pacEntryKind = PEK_Auth;
1221 else
1222 pacEntryKind = PEK_AuthHint;
1223 } else {
1224 pacEntryKind = PEK_NoAuth;
1225 }
1226
1227 if (btiHeader || (pacEntryKind != PEK_NoAuth)) {
1228 pltEntrySize = 24;
1229 ipltEntrySize = 24;
1230 }
1231}
1232
1233void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
1234 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1235 const uint8_t pltData[] = {
1236 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
1237 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
1238 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1239 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
1240 0x20, 0x02, 0x1f, 0xd6, // br x17
1241 0x1f, 0x20, 0x03, 0xd5, // nop
1242 0x1f, 0x20, 0x03, 0xd5 // nop
1243 };
1244 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1245
1246 uint64_t got = ctx.in.gotPlt->getVA();
1247 uint64_t plt = ctx.in.plt->getVA();
1248
1249 if (btiHeader) {
1250 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1251 // instruction.
1252 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1253 buf += sizeof(btiData);
1254 plt += sizeof(btiData);
1255 }
1256 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
1257
1258 relocateNoSym(loc: buf + 4, type: R_AARCH64_ADR_PREL_PG_HI21,
1259 val: getAArch64Page(expr: got + 16) - getAArch64Page(expr: plt + 4));
1260 relocateNoSym(loc: buf + 8, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + 16);
1261 relocateNoSym(loc: buf + 12, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + 16);
1262 if (!btiHeader)
1263 // We didn't add the BTI c instruction so round out size with NOP.
1264 memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
1265}
1266
1267void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
1268 uint64_t pltEntryAddr) const {
1269 // The PLT entry is of the form:
1270 // [btiData] addrInst (pacBr | stdBr) [nopData]
1271 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1272 const uint8_t addrInst[] = {
1273 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1274 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1275 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1276 };
1277 const uint8_t pacHintBr[] = {
1278 0x9f, 0x21, 0x03, 0xd5, // autia1716
1279 0x20, 0x02, 0x1f, 0xd6 // br x17
1280 };
1281 const uint8_t pacBr[] = {
1282 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16
1283 0x1f, 0x20, 0x03, 0xd5 // nop
1284 };
1285 const uint8_t stdBr[] = {
1286 0x20, 0x02, 0x1f, 0xd6, // br x17
1287 0x1f, 0x20, 0x03, 0xd5 // nop
1288 };
1289 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1290
1291 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1292 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1293 // address may escape if referenced by a direct relocation. If relative
1294 // vtables are used then if the vtable is in a shared object the offsets will
1295 // be to the PLT entry. The condition is conservative.
1296 bool hasBti = btiHeader &&
1297 (sym.hasFlag(bit: NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed);
1298 if (hasBti) {
1299 memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1300 buf += sizeof(btiData);
1301 pltEntryAddr += sizeof(btiData);
1302 }
1303
1304 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
1305 memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
1306 relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
1307 val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
1308 relocateNoSym(loc: buf + 4, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
1309 relocateNoSym(loc: buf + 8, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
1310
1311 if (pacEntryKind != PEK_NoAuth)
1312 memcpy(dest: buf + sizeof(addrInst),
1313 src: pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr,
1314 n: sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr));
1315 else
1316 memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
1317 if (!hasBti)
1318 // We didn't add the BTI c instruction so round out size with NOP.
1319 memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1320}
1321
1322template <class ELFT>
1323static void
1324addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec,
1325 DenseMap<Symbol *, unsigned> &referenceCount) {
1326 assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1327
1328 const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1329 if (rels.areRelocsRel())
1330 ErrAlways(ctx)
1331 << "non-RELA relocations are not allowed with memtag globals";
1332
1333 for (const typename ELFT::Rela &rel : rels.relas) {
1334 Symbol &sym = sec.file->getRelocTargetSym(rel);
1335 // Linker-synthesized symbols such as __executable_start may be referenced
1336 // as tagged in input objfiles, and we don't want them to be tagged. A
1337 // cheap way to exclude them is the type check, but their type is
1338 // STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1339 // like functions or TLS symbols.
1340 if (sym.type != STT_OBJECT)
1341 continue;
1342 // STB_LOCAL symbols can't be referenced from outside the object file, and
1343 // thus don't need to be checked for references from other object files.
1344 if (sym.binding == STB_LOCAL) {
1345 sym.setIsTagged(true);
1346 continue;
1347 }
1348 ++referenceCount[&sym];
1349 }
1350 sec.markDead();
1351}
1352
1353// A tagged symbol must be denoted as being tagged by all references and the
1354// chosen definition. For simplicity, here, it must also be denoted as tagged
1355// for all definitions. Otherwise:
1356//
1357// 1. A tagged definition can be used by an untagged declaration, in which case
1358// the untagged access may be PC-relative, causing a tag mismatch at
1359// runtime.
1360// 2. An untagged definition can be used by a tagged declaration, where the
1361// compiler has taken advantage of the increased alignment of the tagged
1362// declaration, but the alignment at runtime is wrong, causing a fault.
1363//
1364// Ideally, this isn't a problem, as any TU that imports or exports tagged
1365// symbols should also be built with tagging. But, to handle these cases, we
1366// demote the symbol to be untagged.
1367void elf::createTaggedSymbols(Ctx &ctx) {
1368 assert(hasMemtag(ctx));
1369
1370 // First, collect all symbols that are marked as tagged, and count how many
1371 // times they're marked as tagged.
1372 DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount;
1373 for (InputFile *file : ctx.objectFiles) {
1374 if (file->kind() != InputFile::ObjKind)
1375 continue;
1376 for (InputSectionBase *section : file->getSections()) {
1377 if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC ||
1378 section == &InputSection::discarded)
1379 continue;
1380 invokeELFT(addTaggedSymbolReferences, ctx, *section,
1381 taggedSymbolReferenceCount);
1382 }
1383 }
1384
1385 // Now, go through all the symbols. If the number of declarations +
1386 // definitions to a symbol exceeds the amount of times they're marked as
1387 // tagged, it means we have an objfile that uses the untagged variant of the
1388 // symbol.
1389 for (InputFile *file : ctx.objectFiles) {
1390 if (file->kind() != InputFile::BinaryKind &&
1391 file->kind() != InputFile::ObjKind)
1392 continue;
1393
1394 for (Symbol *symbol : file->getSymbols()) {
1395 // See `addTaggedSymbolReferences` for more details.
1396 if (symbol->type != STT_OBJECT ||
1397 symbol->binding == STB_LOCAL)
1398 continue;
1399 auto it = taggedSymbolReferenceCount.find(Val: symbol);
1400 if (it == taggedSymbolReferenceCount.end()) continue;
1401 unsigned &remainingAllowedTaggedRefs = it->second;
1402 if (remainingAllowedTaggedRefs == 0) {
1403 taggedSymbolReferenceCount.erase(I: it);
1404 continue;
1405 }
1406 --remainingAllowedTaggedRefs;
1407 }
1408 }
1409
1410 // `addTaggedSymbolReferences` has already checked that we have RELA
1411 // relocations, the only other way to get written addends is with
1412 // --apply-dynamic-relocs.
1413 if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends)
1414 ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals";
1415
1416 // Now, `taggedSymbolReferenceCount` should only contain symbols that are
1417 // defined as tagged exactly the same amount as it's referenced, meaning all
1418 // uses are tagged.
1419 for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1420 assert(remainingTaggedRefs == 0 &&
1421 "Symbol is defined as tagged more times than it's used");
1422 symbol->setIsTagged(true);
1423 }
1424}
1425
1426void elf::setAArch64TargetInfo(Ctx &ctx) {
1427 if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
1428 ctx.arg.zPacPlt)
1429 ctx.target.reset(p: new AArch64BtiPac(ctx));
1430 else
1431 ctx.target.reset(p: new AArch64(ctx));
1432}
1433