AArch64.cpp source code [llvm_projects/lld/ELF/Arch/AArch64.cpp]

1	//===- AArch64.cpp --------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14	#include "TargetImpl.h"
15	#include "llvm/BinaryFormat/ELF.h"
16	#include "llvm/Support/Endian.h"
17
18	using namespace llvm;
19	using namespace llvm::support::endian;
20	using namespace llvm::ELF;
21	using namespace lld;
22	using namespace lld::elf;
23
24	// Page(Expr) is the page address of the expression Expr, defined
25	// as (Expr & ~0xFFF). (This applies even if the machine page size
26	// supported by the platform has a different value.)
27	uint64_t elf::getAArch64Page(uint64_t expr) {
28	return expr & ~static_cast<uint64_t>(`0xFFF`);
29	}
30
31	// A BTI landing pad is a valid target for an indirect branch when the Branch
32	// Target Identification has been enabled. As linker generated branches are
33	// via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
34	// PACIBSP.
35	bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
36	// PLT entries accessed indirectly have a BTI c.
37	if (s.isInPlt(ctx))
38	return true;
39	Defined *d = dyn_cast<Defined>(Val: &s);
40	if (!isa_and_nonnull<InputSection>(Val: d->section))
41	// All places that we cannot disassemble are responsible for making
42	// the target a BTI landing pad.
43	return true;
44	InputSection *isec = cast<InputSection>(Val: d->section);
45	uint64_t off = d->value + a;
46	// Likely user error, but protect ourselves against out of bounds
47	// access.
48	if (off >= isec->getSize())
49	return true;
50	const uint8_t *buf = isec->content().begin();
51	const uint32_t instr = read32le(P: buf + off);
52	// All BTI instructions are HINT instructions which all have same encoding
53	// apart from bits [11:5]
54	if ((instr & `0xd503201f`) == `0xd503201f` &&
55	is_contained(Set: {/PACIASP/ `0xd503233f`, /PACIBSP/ `0xd503237f`,
56	/BTI C/ `0xd503245f`, /BTI J/ `0xd503249f`,
57	/BTI JC/ `0xd50324df`},
58	Element: instr))
59	return true;
60	return false;
61	}
62
63	namespace {
64	class AArch64 : public TargetInfo {
65	public:
66	AArch64(Ctx &);
67	RelExpr getRelExpr(RelType type, const Symbol &s,
68	const uint8_t loc) const* override;
69	RelType getDynRel(RelType type) const override;
70	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
71	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
72	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
73	void writePltHeader(uint8_t buf) const* override;
74	void writePlt(uint8_t buf, const* Symbol &sym,
75	uint64_t pltEntryAddr) const override;
76	bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
77	uint64_t branchAddr, const Symbol &s,
78	int64_t a) const override;
79	uint32_t getThunkSectionSpacing() const override;
80	bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
81	bool usesOnlyLowPageBits(RelType type) const override;
82	void relocate(uint8_t loc, const* Relocation &rel,
83	uint64_t val) const override;
84	RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
85	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
86	void applyBranchToBranchOpt() const override;
87
88	private:
89	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
90	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
91	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
92	};
93
94	struct AArch64Relaxer {
95	Ctx &ctx;
96	bool safeToRelaxAdrpLdr = false;
97
98	AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs);
99	bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
100	uint64_t secAddr, uint8_t buf) const*;
101	bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
102	uint64_t secAddr, uint8_t buf) const*;
103	};
104	} // namespace
105
106	// Return the bits [Start, End] from Val shifted Start bits.
107	// For instance, getBits(0xF0, 4, 8) returns 0xF.
108	static uint64_t getBits(uint64_t val, int start, int end) {
109	uint64_t mask = ((uint64_t)`1` << (end + `1` - start)) - `1`;
110	return (val >> start) & mask;
111	}
112
113	AArch64::AArch64(Ctx &ctx) : TargetInfo (ctx) {
114	copyRel = R_AARCH64_COPY;
115	relativeRel = R_AARCH64_RELATIVE;
116	iRelativeRel = R_AARCH64_IRELATIVE;
117	gotRel = R_AARCH64_GLOB_DAT;
118	pltRel = R_AARCH64_JUMP_SLOT;
119	symbolicRel = R_AARCH64_ABS64;
120	tlsDescRel = R_AARCH64_TLSDESC;
121	tlsGotRel = R_AARCH64_TLS_TPREL64;
122	pltHeaderSize = `32`;
123	pltEntrySize = `16`;
124	ipltEntrySize = `16`;
125	defaultMaxPageSize = `65536`;
126
127	// Align to the 2 MiB page size (known as a superpage or huge page).
128	// FreeBSD automatically promotes 2 MiB-aligned allocations.
129	defaultImageBase = `0x200000`;
130
131	needsThunks = true;
132	}
133
134	RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
135	const uint8_t loc) const* {
136	switch (type) {
137	case R_AARCH64_ABS16:
138	case R_AARCH64_ABS32:
139	case R_AARCH64_ABS64:
140	case R_AARCH64_ADD_ABS_LO12_NC:
141	case R_AARCH64_LDST128_ABS_LO12_NC:
142	case R_AARCH64_LDST16_ABS_LO12_NC:
143	case R_AARCH64_LDST32_ABS_LO12_NC:
144	case R_AARCH64_LDST64_ABS_LO12_NC:
145	case R_AARCH64_LDST8_ABS_LO12_NC:
146	case R_AARCH64_MOVW_SABS_G0:
147	case R_AARCH64_MOVW_SABS_G1:
148	case R_AARCH64_MOVW_SABS_G2:
149	case R_AARCH64_MOVW_UABS_G0:
150	case R_AARCH64_MOVW_UABS_G0_NC:
151	case R_AARCH64_MOVW_UABS_G1:
152	case R_AARCH64_MOVW_UABS_G1_NC:
153	case R_AARCH64_MOVW_UABS_G2:
154	case R_AARCH64_MOVW_UABS_G2_NC:
155	case R_AARCH64_MOVW_UABS_G3:
156	return R_ABS;
157	case R_AARCH64_AUTH_ABS64:
158	return RE_AARCH64_AUTH;
159	case R_AARCH64_TLSDESC_ADR_PAGE21:
160	return RE_AARCH64_TLSDESC_PAGE;
161	case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
162	return RE_AARCH64_AUTH_TLSDESC_PAGE;
163	case R_AARCH64_TLSDESC_LD64_LO12:
164	case R_AARCH64_TLSDESC_ADD_LO12:
165	return R_TLSDESC;
166	case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
167	case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
168	return RE_AARCH64_AUTH_TLSDESC;
169	case R_AARCH64_TLSDESC_CALL:
170	return R_TLSDESC_CALL;
171	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
172	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
173	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
174	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
175	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
176	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
177	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
178	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
179	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
180	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
181	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
182	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
183	return R_TPREL;
184	case R_AARCH64_CALL26:
185	case R_AARCH64_CONDBR19:
186	case R_AARCH64_JUMP26:
187	case R_AARCH64_TSTBR14:
188	return R_PLT_PC;
189	case R_AARCH64_PLT32:
190	const_cast<Symbol &>(s).thunkAccessed = true;
191	return R_PLT_PC;
192	case R_AARCH64_PREL16:
193	case R_AARCH64_PREL32:
194	case R_AARCH64_PREL64:
195	case R_AARCH64_ADR_PREL_LO21:
196	case R_AARCH64_LD_PREL_LO19:
197	case R_AARCH64_MOVW_PREL_G0:
198	case R_AARCH64_MOVW_PREL_G0_NC:
199	case R_AARCH64_MOVW_PREL_G1:
200	case R_AARCH64_MOVW_PREL_G1_NC:
201	case R_AARCH64_MOVW_PREL_G2:
202	case R_AARCH64_MOVW_PREL_G2_NC:
203	case R_AARCH64_MOVW_PREL_G3:
204	return R_PC;
205	case R_AARCH64_ADR_PREL_PG_HI21:
206	case R_AARCH64_ADR_PREL_PG_HI21_NC:
207	return RE_AARCH64_PAGE_PC;
208	case R_AARCH64_LD64_GOT_LO12_NC:
209	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
210	return R_GOT;
211	case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
212	case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
213	return RE_AARCH64_AUTH_GOT;
214	case R_AARCH64_AUTH_GOT_LD_PREL19:
215	case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
216	return RE_AARCH64_AUTH_GOT_PC;
217	case R_AARCH64_LD64_GOTPAGE_LO15:
218	return RE_AARCH64_GOT_PAGE;
219	case R_AARCH64_ADR_GOT_PAGE:
220	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
221	return RE_AARCH64_GOT_PAGE_PC;
222	case R_AARCH64_AUTH_ADR_GOT_PAGE:
223	return RE_AARCH64_AUTH_GOT_PAGE_PC;
224	case R_AARCH64_GOTPCREL32:
225	case R_AARCH64_GOT_LD_PREL19:
226	return R_GOT_PC;
227	case R_AARCH64_NONE:
228	return R_NONE;
229	default:
230	Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
231	<< ") against symbol " << &s;
232	return R_NONE;
233	}
234	}
235
236	RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
237	if (expr == R_RELAX_TLS_GD_TO_IE) {
238	if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
239	return RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
240	return R_RELAX_TLS_GD_TO_IE_ABS;
241	}
242	return expr;
243	}
244
245	bool AArch64::usesOnlyLowPageBits(RelType type) const {
246	switch (type) {
247	default:
248	return false;
249	case R_AARCH64_ADD_ABS_LO12_NC:
250	case R_AARCH64_LD64_GOT_LO12_NC:
251	case R_AARCH64_LDST128_ABS_LO12_NC:
252	case R_AARCH64_LDST16_ABS_LO12_NC:
253	case R_AARCH64_LDST32_ABS_LO12_NC:
254	case R_AARCH64_LDST64_ABS_LO12_NC:
255	case R_AARCH64_LDST8_ABS_LO12_NC:
256	case R_AARCH64_TLSDESC_ADD_LO12:
257	case R_AARCH64_TLSDESC_LD64_LO12:
258	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
259	return true;
260	}
261	}
262
263	RelType AArch64::getDynRel(RelType type) const {
264	if (type == R_AARCH64_ABS64 \|\| type == R_AARCH64_AUTH_ABS64)
265	return type;
266	return R_AARCH64_NONE;
267	}
268
269	int64_t AArch64::getImplicitAddend(const uint8_t buf, RelType type) const* {
270	switch (type) {
271	case R_AARCH64_TLSDESC:
272	return read64(ctx, p: buf + `8`);
273	case R_AARCH64_NONE:
274	case R_AARCH64_GLOB_DAT:
275	case R_AARCH64_AUTH_GLOB_DAT:
276	case R_AARCH64_JUMP_SLOT:
277	return `0`;
278	case R_AARCH64_ABS16:
279	case R_AARCH64_PREL16:
280	return SignExtend64<`16`>(x: read16(ctx, p: buf));
281	case R_AARCH64_ABS32:
282	case R_AARCH64_PREL32:
283	return SignExtend64<`32`>(x: read32(ctx, p: buf));
284	case R_AARCH64_ABS64:
285	case R_AARCH64_PREL64:
286	case R_AARCH64_RELATIVE:
287	case R_AARCH64_IRELATIVE:
288	case R_AARCH64_TLS_TPREL64:
289	return read64(ctx, p: buf);
290
291	// The following relocation types all point at instructions, and
292	// relocate an immediate field in the instruction.
293	//
294	// The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
295	// says: "If the relocation relocates an instruction the immediate
296	// field of the instruction is extracted, scaled as required by
297	// the instruction field encoding, and sign-extended to 64 bits".
298
299	// The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
300	// instructions, which have a 16-bit immediate field with its low
301	// bit in bit 5 of the instruction encoding. When the immediate
302	// field is used as an implicit addend for REL-type relocations,
303	// it is treated as added to the low bits of the output value, not
304	// shifted depending on the relocation type.
305	//
306	// This allows REL relocations to express the requirement 'please
307	// add 12345 to this symbol value and give me the four 16-bit
308	// chunks of the result', by putting the same addend 12345 in all
309	// four instructions. Carries between the 16-bit chunks are
310	// handled correctly, because the whole 64-bit addition is done
311	// once per relocation.
312	case R_AARCH64_MOVW_UABS_G0:
313	case R_AARCH64_MOVW_UABS_G0_NC:
314	case R_AARCH64_MOVW_UABS_G1:
315	case R_AARCH64_MOVW_UABS_G1_NC:
316	case R_AARCH64_MOVW_UABS_G2:
317	case R_AARCH64_MOVW_UABS_G2_NC:
318	case R_AARCH64_MOVW_UABS_G3:
319	return SignExtend64<`16`>(x: getBits(val: read32le(P: buf), start: `5`, end: `20`));
320
321	// R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
322	// has a 14-bit offset measured in instructions, i.e. shifted left
323	// by 2.
324	case R_AARCH64_TSTBR14:
325	return SignExtend64<`16`>(x: getBits(val: read32le(P: buf), start: `5`, end: `18`) << `2`);
326
327	// R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
328	// which has a 19-bit offset measured in instructions.
329	//
330	// R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
331	// instruction, which also has a 19-bit offset, measured in 4-byte
332	// chunks. So the calculation is the same as for
333	// R_AARCH64_CONDBR19.
334	case R_AARCH64_CONDBR19:
335	case R_AARCH64_LD_PREL_LO19:
336	return SignExtend64<`21`>(x: getBits(val: read32le(P: buf), start: `5`, end: `23`) << `2`);
337
338	// R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
339	// immediate can optionally be shifted left by 12 bits, but this
340	// relocation is intended for the case where it is not.
341	case R_AARCH64_ADD_ABS_LO12_NC:
342	return SignExtend64<`12`>(x: getBits(val: read32le(P: buf), start: `10`, end: `21`));
343
344	// R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
345	// 21-bit immediate is split between two bits high up in the word
346	// (in fact the two _lowest_ order bits of the value) and 19 bits
347	// lower down.
348	//
349	// R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
350	// which encodes the immediate in the same way, but will shift it
351	// left by 12 bits when the instruction executes. For the same
352	// reason as the MOVW family, we don't apply that left shift here.
353	case R_AARCH64_ADR_PREL_LO21:
354	case R_AARCH64_ADR_PREL_PG_HI21:
355	case R_AARCH64_ADR_PREL_PG_HI21_NC:
356	return SignExtend64<`21`>(x: (getBits(val: read32le(P: buf), start: `5`, end: `23`) << `2`) \|
357	getBits(val: read32le(P: buf), start: `29`, end: `30`));
358
359	// R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
360	// 26-bit offset measured in instructions.
361	case R_AARCH64_JUMP26:
362	case R_AARCH64_CALL26:
363	return SignExtend64<`28`>(x: getBits(val: read32le(P: buf), start: `0`, end: `25`) << `2`);
364
365	default:
366	InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
367	return `0`;
368	}
369	}
370
371	void AArch64::writeGotPlt(uint8_t buf, const* Symbol &) const {
372	write64(ctx, p: buf, v: ctx.in.plt ->getVA());
373	}
374
375	void AArch64::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
376	if (ctx.arg.writeAddends)
377	write64(ctx, p: buf, v: s.getVA(ctx));
378	}
379
380	void AArch64::writePltHeader(uint8_t buf) const* {
381	const uint8_t pltData[] = {
382	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
383	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
384	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
385	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
386	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
387	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
388	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
389	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
390	};
391	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
392
393	uint64_t got = ctx.in.gotPlt ->getVA();
394	uint64_t plt = ctx.in.plt ->getVA();
395	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
396	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `4`));
397	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
398	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
399	}
400
401	void AArch64::writePlt(uint8_t buf, const* Symbol &sym,
402	uint64_t pltEntryAddr) const {
403	const uint8_t inst[] = {
404	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
405	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
406	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[n]))
407	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
408	};
409	memcpy(dest: buf, src: inst, n: sizeof(inst));
410
411	uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
412	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
413	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
414	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
415	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
416	}
417
418	bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
419	uint64_t branchAddr, const Symbol &s,
420	int64_t a) const {
421	// If s is an undefined weak symbol and does not have a PLT entry then it will
422	// be resolved as a branch to the next instruction. If it is hidden, its
423	// binding has been converted to local, so we just check isUndefined() here. A
424	// undefined non-weak symbol will have been errored.
425	if (s.isUndefined() && !s.isInPlt(ctx))
426	return false;
427	// ELF for the ARM 64-bit architecture, section Call and Jump relocations
428	// only permits range extension thunks for R_AARCH64_CALL26 and
429	// R_AARCH64_JUMP26 relocation types.
430	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
431	type != R_AARCH64_PLT32)
432	return false;
433	uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, addend: a);
434	return !inBranchRange(type, src: branchAddr, dst);
435	}
436
437	uint32_t AArch64::getThunkSectionSpacing() const {
438	// See comment in Arch/ARM.cpp for a more detailed explanation of
439	// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
440	// Thunk have a range of +/- 128 MiB
441	return (`128` * `1024` * `1024`) - `0x30000`;
442	}
443
444	bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
445	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
446	type != R_AARCH64_PLT32)
447	return true;
448	// The AArch64 call and unconditional branch instructions have a range of
449	// +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
450	uint64_t range =
451	type == R_AARCH64_PLT32 ? (UINT64_C(`1`) << `31`) : (`128` * `1024` * `1024`);
452	if (dst > src) {
453	// Immediate of branch is signed.
454	range -= `4`;
455	return dst - src <= range;
456	}
457	return src - dst <= range;
458	}
459
460	static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
461	uint32_t immLo = (imm & `0x3`) << `29`;
462	uint32_t immHi = (imm & `0x1FFFFC`) << `3`;
463	uint64_t mask = (`0x3` << `29`) \| (`0x1FFFFC` << `3`);
464	write32le(P: l, V: (read32le(P: l) & ~mask) \| immLo \| immHi);
465	}
466
467	static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
468	write32le(P: p, V: (read32le(P: p) & ~mask) \| v);
469	}
470
471	// Update the immediate field in a AARCH64 ldr, str, and add instruction.
472	static void write32Imm12(uint8_t *l, uint64_t imm) {
473	writeMaskedBits32le(p: l, v: (imm & `0xFFF`) << `10`, mask: `0xFFF` << `10`);
474	}
475
476	// Update the immediate field in an AArch64 movk, movn or movz instruction
477	// for a signed relocation, and update the opcode of a movn or movz instruction
478	// to match the sign of the operand.
479	static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
480	uint32_t inst = read32le(P: loc);
481	// Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
482	if (!(inst & (`1` << `29`))) {
483	// movn or movz.
484	if (imm & `0x10000`) {
485	// Change opcode to movn, which takes an inverted operand.
486	imm ^= `0xFFFF`;
487	inst &= ~(`1` << `30`);
488	} else {
489	// Change opcode to movz.
490	inst \|= `1` << `30`;
491	}
492	}
493	write32le(P: loc, V: inst \| ((imm & `0xFFFF`) << `5`));
494	}
495
496	void AArch64::relocate(uint8_t loc, const* Relocation &rel,
497	uint64_t val) const {
498	switch (rel.type) {
499	case R_AARCH64_ABS16:
500	case R_AARCH64_PREL16:
501	checkIntUInt(ctx, loc, v: val, n: `16`, rel);
502	write16(ctx, p: loc, v: val);
503	break;
504	case R_AARCH64_ABS32:
505	case R_AARCH64_PREL32:
506	checkIntUInt(ctx, loc, v: val, n: `32`, rel);
507	write32(ctx, p: loc, v: val);
508	break;
509	case R_AARCH64_PLT32:
510	case R_AARCH64_GOTPCREL32:
511	checkInt(ctx, loc, v: val, n: `32`, rel);
512	write32(ctx, p: loc, v: val);
513	break;
514	case R_AARCH64_ABS64:
515	// AArch64 relocations to tagged symbols have extended semantics, as
516	// described here:
517	// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
518	// tl;dr: encode the symbol's special addend in the place, which is an
519	// offset to the point where the logical tag is derived from. Quick hack, if
520	// the addend is within the symbol's bounds, no need to encode the tag
521	// derivation offset.
522	if (rel.sym && rel.sym->isTagged() &&
523	(rel.addend < `0` \|\|
524	rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
525	write64(ctx, p: loc, v: -rel.addend);
526	else
527	write64(ctx, p: loc, v: val);
528	break;
529	case R_AARCH64_PREL64:
530	write64(ctx, p: loc, v: val);
531	break;
532	case R_AARCH64_AUTH_ABS64:
533	// If val is wider than 32 bits, the relocation must have been moved from
534	// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
535	//
536	// If val fits in 32 bits, we have two potential scenarios:
537	// True RELR: Write the 32-bit `val`.*
538	// RELA: Even if the value now fits in 32 bits, it might have been*
539	// converted from RELR during an iteration in
540	// finalizeAddressDependentContent(). Writing the value is harmless
541	// because dynamic linking ignores it.
542	if (isInt<`32`>(x: val))
543	write32(ctx, p: loc, v: val);
544	break;
545	case R_AARCH64_ADD_ABS_LO12_NC:
546	case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
547	write32Imm12(l: loc, imm: val);
548	break;
549	case R_AARCH64_ADR_GOT_PAGE:
550	case R_AARCH64_AUTH_ADR_GOT_PAGE:
551	case R_AARCH64_ADR_PREL_PG_HI21:
552	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
553	case R_AARCH64_TLSDESC_ADR_PAGE21:
554	case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
555	checkInt(ctx, loc, v: val, n: `33`, rel);
556	[[fallthrough]];
557	case R_AARCH64_ADR_PREL_PG_HI21_NC:
558	write32AArch64Addr(l: loc, imm: val >> `12`);
559	break;
560	case R_AARCH64_ADR_PREL_LO21:
561	case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
562	checkInt(ctx, loc, v: val, n: `21`, rel);
563	write32AArch64Addr(l: loc, imm: val);
564	break;
565	case R_AARCH64_JUMP26:
566	// Normally we would just write the bits of the immediate field, however
567	// when patching instructions for the cpu errata fix -fix-cortex-a53-843419
568	// we want to replace a non-branch instruction with a branch immediate
569	// instruction. By writing all the bits of the instruction including the
570	// opcode and the immediate (0 001 \| 01 imm26) we can do this
571	// transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
572	// the instruction we want to patch.
573	write32le(P: loc, V: `0x14000000`);
574	[[fallthrough]];
575	case R_AARCH64_CALL26:
576	checkInt(ctx, loc, v: val, n: `28`, rel);
577	writeMaskedBits32le(p: loc, v: (val & `0x0FFFFFFC`) >> `2`, mask: `0x0FFFFFFC` >> `2`);
578	break;
579	case R_AARCH64_CONDBR19:
580	case R_AARCH64_LD_PREL_LO19:
581	case R_AARCH64_GOT_LD_PREL19:
582	case R_AARCH64_AUTH_GOT_LD_PREL19:
583	checkAlignment(ctx, loc, v: val, n: `4`, rel);
584	checkInt(ctx, loc, v: val, n: `21`, rel);
585	writeMaskedBits32le(p: loc, v: (val & `0x1FFFFC`) << `3`, mask: `0x1FFFFC` << `3`);
586	break;
587	case R_AARCH64_LDST8_ABS_LO12_NC:
588	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
589	write32Imm12(l: loc, imm: getBits(val, start: `0`, end: `11`));
590	break;
591	case R_AARCH64_LDST16_ABS_LO12_NC:
592	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
593	checkAlignment(ctx, loc, v: val, n: `2`, rel);
594	write32Imm12(l: loc, imm: getBits(val, start: `1`, end: `11`));
595	break;
596	case R_AARCH64_LDST32_ABS_LO12_NC:
597	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
598	checkAlignment(ctx, loc, v: val, n: `4`, rel);
599	write32Imm12(l: loc, imm: getBits(val, start: `2`, end: `11`));
600	break;
601	case R_AARCH64_LDST64_ABS_LO12_NC:
602	case R_AARCH64_LD64_GOT_LO12_NC:
603	case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
604	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
605	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
606	case R_AARCH64_TLSDESC_LD64_LO12:
607	case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
608	checkAlignment(ctx, loc, v: val, n: `8`, rel);
609	write32Imm12(l: loc, imm: getBits(val, start: `3`, end: `11`));
610	break;
611	case R_AARCH64_LDST128_ABS_LO12_NC:
612	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
613	checkAlignment(ctx, loc, v: val, n: `16`, rel);
614	write32Imm12(l: loc, imm: getBits(val, start: `4`, end: `11`));
615	break;
616	case R_AARCH64_LD64_GOTPAGE_LO15:
617	checkAlignment(ctx, loc, v: val, n: `8`, rel);
618	write32Imm12(l: loc, imm: getBits(val, start: `3`, end: `14`));
619	break;
620	case R_AARCH64_MOVW_UABS_G0:
621	checkUInt(ctx, loc, v: val, n: `16`, rel);
622	[[fallthrough]];
623	case R_AARCH64_MOVW_UABS_G0_NC:
624	writeMaskedBits32le(p: loc, v: (val & `0xFFFF`) << `5`, mask: `0xFFFF` << `5`);
625	break;
626	case R_AARCH64_MOVW_UABS_G1:
627	checkUInt(ctx, loc, v: val, n: `32`, rel);
628	[[fallthrough]];
629	case R_AARCH64_MOVW_UABS_G1_NC:
630	writeMaskedBits32le(p: loc, v: (val & `0xFFFF0000`) >> `11`, mask: `0xFFFF0000` >> `11`);
631	break;
632	case R_AARCH64_MOVW_UABS_G2:
633	checkUInt(ctx, loc, v: val, n: `48`, rel);
634	[[fallthrough]];
635	case R_AARCH64_MOVW_UABS_G2_NC:
636	writeMaskedBits32le(p: loc, v: (val & `0xFFFF00000000`) >> `27`,
637	mask: `0xFFFF00000000` >> `27`);
638	break;
639	case R_AARCH64_MOVW_UABS_G3:
640	writeMaskedBits32le(p: loc, v: (val & `0xFFFF000000000000`) >> `43`,
641	mask: `0xFFFF000000000000` >> `43`);
642	break;
643	case R_AARCH64_MOVW_PREL_G0:
644	case R_AARCH64_MOVW_SABS_G0:
645	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
646	checkInt(ctx, loc, v: val, n: `17`, rel);
647	[[fallthrough]];
648	case R_AARCH64_MOVW_PREL_G0_NC:
649	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
650	writeSMovWImm(loc, imm: val);
651	break;
652	case R_AARCH64_MOVW_PREL_G1:
653	case R_AARCH64_MOVW_SABS_G1:
654	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
655	checkInt(ctx, loc, v: val, n: `33`, rel);
656	[[fallthrough]];
657	case R_AARCH64_MOVW_PREL_G1_NC:
658	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
659	writeSMovWImm(loc, imm: val >> `16`);
660	break;
661	case R_AARCH64_MOVW_PREL_G2:
662	case R_AARCH64_MOVW_SABS_G2:
663	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
664	checkInt(ctx, loc, v: val, n: `49`, rel);
665	[[fallthrough]];
666	case R_AARCH64_MOVW_PREL_G2_NC:
667	writeSMovWImm(loc, imm: val >> `32`);
668	break;
669	case R_AARCH64_MOVW_PREL_G3:
670	writeSMovWImm(loc, imm: val >> `48`);
671	break;
672	case R_AARCH64_TSTBR14:
673	checkInt(ctx, loc, v: val, n: `16`, rel);
674	writeMaskedBits32le(p: loc, v: (val & `0xFFFC`) << `3`, mask: `0xFFFC` << `3`);
675	break;
676	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
677	checkUInt(ctx, loc, v: val, n: `24`, rel);
678	write32Imm12(l: loc, imm: val >> `12`);
679	break;
680	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
681	case R_AARCH64_TLSDESC_ADD_LO12:
682	case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
683	write32Imm12(l: loc, imm: val);
684	break;
685	case R_AARCH64_TLSDESC:
686	// For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
687	write64(ctx, p: loc + `8`, v: val);
688	break;
689	default:
690	llvm_unreachable("unknown relocation");
691	}
692	}
693
694	void AArch64::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
695	uint64_t val) const {
696	// TLSDESC Global-Dynamic relocation are in the form:
697	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
698	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
699	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
700	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
701	// blr x1
702	// And it can optimized to:
703	// movz x0, #0x0, lsl #16
704	// movk x0, #0x10
705	// nop
706	// nop
707	checkUInt(ctx, loc, v: val, n: `32`, rel);
708
709	switch (rel.type) {
710	case R_AARCH64_TLSDESC_ADD_LO12:
711	case R_AARCH64_TLSDESC_CALL:
712	write32le(P: loc, V: `0xd503201f`); // nop
713	return;
714	case R_AARCH64_TLSDESC_ADR_PAGE21:
715	write32le(P: loc, V: `0xd2a00000` \| (((val >> `16`) & `0xffff`) << `5`)); // movz
716	return;
717	case R_AARCH64_TLSDESC_LD64_LO12:
718	write32le(P: loc, V: `0xf2800000` \| ((val & `0xffff`) << `5`)); // movk
719	return;
720	default:
721	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
722	}
723	}
724
725	void AArch64::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
726	uint64_t val) const {
727	// TLSDESC Global-Dynamic relocation are in the form:
728	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
729	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
730	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
731	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
732	// blr x1
733	// And it can optimized to:
734	// adrp x0, :gottprel:v
735	// ldr x0, [x0, :gottprel_lo12:v]
736	// nop
737	// nop
738
739	switch (rel.type) {
740	case R_AARCH64_TLSDESC_ADD_LO12:
741	case R_AARCH64_TLSDESC_CALL:
742	write32le(P: loc, V: `0xd503201f`); // nop
743	break;
744	case R_AARCH64_TLSDESC_ADR_PAGE21:
745	write32le(P: loc, V: `0x90000000`); // adrp
746	relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
747	break;
748	case R_AARCH64_TLSDESC_LD64_LO12:
749	write32le(P: loc, V: `0xf9400000`); // ldr
750	relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
751	break;
752	default:
753	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
754	}
755	}
756
757	void AArch64::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
758	uint64_t val) const {
759	checkUInt(ctx, loc, v: val, n: `32`, rel);
760
761	if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
762	// Generate MOVZ.
763	uint32_t regNo = read32le(P: loc) & `0x1f`;
764	write32le(P: loc, V: (`0xd2a00000` \| regNo) \| (((val >> `16`) & `0xffff`) << `5`));
765	return;
766	}
767	if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
768	// Generate MOVK.
769	uint32_t regNo = read32le(P: loc) & `0x1f`;
770	write32le(P: loc, V: (`0xf2800000` \| regNo) \| ((val & `0xffff`) << `5`));
771	return;
772	}
773	llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
774	}
775
776	AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs)
777	: ctx(ctx) {
778	if (!ctx.arg.relax)
779	return;
780	// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
781	// always appear in pairs.
782	size_t i = `0`;
783	const size_t size = relocs.size();
784	for (; i != size; ++i) {
785	if (relocs [i].type == R_AARCH64_ADR_GOT_PAGE) {
786	if (i + `1` < size && relocs [i + `1`].type == R_AARCH64_LD64_GOT_LO12_NC) {
787	++i;
788	continue;
789	}
790	break;
791	} else if (relocs [i].type == R_AARCH64_LD64_GOT_LO12_NC) {
792	break;
793	}
794	}
795	safeToRelaxAdrpLdr = i == size;
796	}
797
798	bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
799	const Relocation &addRel, uint64_t secAddr,
800	uint8_t buf) const* {
801	// When the address of sym is within the range of ADR then
802	// we may relax
803	// ADRP xn, sym
804	// ADD xn, xn, :lo12: sym
805	// to
806	// NOP
807	// ADR xn, sym
808	if (!ctx.arg.relax \|\| adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 \|\|
809	addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
810	return false;
811	// Check if the relocations apply to consecutive instructions.
812	if (adrpRel.offset + `4` != addRel.offset)
813	return false;
814	if (adrpRel.sym != addRel.sym)
815	return false;
816	if (adrpRel.addend != `0` \|\| addRel.addend != `0`)
817	return false;
818
819	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
820	uint32_t addInstr = read32le(P: buf + addRel.offset);
821	// Check if the first instruction is ADRP and the second instruction is ADD.
822	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
823	(addInstr & `0xffc00000`) != `0x91000000`)
824	return false;
825	uint32_t adrpDestReg = adrpInstr & `0x1f`;
826	uint32_t addDestReg = addInstr & `0x1f`;
827	uint32_t addSrcReg = (addInstr >> `5`) & `0x1f`;
828	if (adrpDestReg != addDestReg \|\| adrpDestReg != addSrcReg)
829	return false;
830
831	Symbol &sym = *adrpRel.sym;
832	// Check if the address difference is within 1MiB range.
833	int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset);
834	if (val < -`1024` * `1024` \|\| val >= `1024` * `1024`)
835	return false;
836
837	Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
838	/addend=/`0`, .sym: &sym};
839	// nop
840	write32le(P: buf + adrpRel.offset, V: `0xd503201f`);
841	// adr x_<dest_reg>
842	write32le(P: buf + adrRel.offset, V: `0x10000000` \| adrpDestReg);
843	ctx.target ->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
844	return true;
845	}
846
847	bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
848	const Relocation &ldrRel, uint64_t secAddr,
849	uint8_t buf) const* {
850	if (!safeToRelaxAdrpLdr)
851	return false;
852
853	// When the definition of sym is not preemptible then we may
854	// be able to relax
855	// ADRP xn, :got: sym
856	// LDR xn, [ xn :got_lo12: sym]
857	// to
858	// ADRP xn, sym
859	// ADD xn, xn, :lo_12: sym
860
861	if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE \|\|
862	ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
863	return false;
864	// Check if the relocations apply to consecutive instructions.
865	if (adrpRel.offset + `4` != ldrRel.offset)
866	return false;
867	// Check if the relocations reference the same symbol and
868	// skip undefined, preemptible and STT_GNU_IFUNC symbols.
869	if (!adrpRel.sym \|\| adrpRel.sym != ldrRel.sym \|\| !adrpRel.sym->isDefined() \|\|
870	adrpRel.sym->isPreemptible \|\| adrpRel.sym->isGnuIFunc())
871	return false;
872	// Check if the addends of the both relocations are zero.
873	if (adrpRel.addend != `0` \|\| ldrRel.addend != `0`)
874	return false;
875	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
876	uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
877	// Check if the first instruction is ADRP and the second instruction is LDR.
878	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
879	(ldrInstr & `0x3b000000`) != `0x39000000`)
880	return false;
881	// Check the value of the sf bit.
882	if (!(ldrInstr >> `31`))
883	return false;
884	uint32_t adrpDestReg = adrpInstr & `0x1f`;
885	uint32_t ldrDestReg = ldrInstr & `0x1f`;
886	uint32_t ldrSrcReg = (ldrInstr >> `5`) & `0x1f`;
887	// Check if ADPR and LDR use the same register.
888	if (adrpDestReg != ldrDestReg \|\| adrpDestReg != ldrSrcReg)
889	return false;
890
891	Symbol &sym = *adrpRel.sym;
892	// GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
893	// position-independent code because these instructions produce a relative
894	// address.
895	if (ctx.arg.isPic && !cast<Defined>(Val&: sym).section)
896	return false;
897	// Check if the address difference is within 4GB range.
898	int64_t val =
899	getAArch64Page(expr: sym.getVA(ctx)) - getAArch64Page(expr: secAddr + adrpRel.offset);
900	if (val != llvm::SignExtend64(X: val, B: `33`))
901	return false;
902
903	Relocation adrpSymRel = {.expr: RE_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
904	.offset: adrpRel.offset, /addend=/`0`, .sym: &sym};
905	Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
906	/addend=/`0`, .sym: &sym};
907
908	// adrp x_<dest_reg>
909	write32le(P: buf + adrpSymRel.offset, V: `0x90000000` \| adrpDestReg);
910	// add x_<dest reg>, x_<dest reg>
911	write32le(P: buf + addRel.offset, V: `0x91000000` \| adrpDestReg \| (adrpDestReg << `5`));
912
913	ctx.target ->relocate(
914	loc: buf + adrpSymRel.offset, rel: adrpSymRel,
915	val: SignExtend64(X: getAArch64Page(expr: sym.getVA(ctx)) -
916	getAArch64Page(expr: secAddr + adrpSymRel.offset),
917	B: `64`));
918	ctx.target ->relocate(loc: buf + addRel.offset, rel: addRel,
919	val: SignExtend64(X: sym.getVA(ctx), B: `64`));
920	tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
921	return true;
922	}
923
924	// Tagged symbols have upper address bits that are added by the dynamic loader,
925	// and thus need the full 64-bit GOT entry. Do not relax such symbols.
926	static bool needsGotForMemtag(const Relocation &rel) {
927	return rel.sym->isTagged() && needsGot(expr: rel.expr);
928	}
929
930	void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
931	uint64_t secAddr = sec.getOutputSection()->addr;
932	if (auto *s = dyn_cast<InputSection>(Val: &sec))
933	secAddr += s->outSecOff;
934	else if (auto *ehIn = dyn_cast<EhInputSection>(Val: &sec))
935	secAddr += ehIn->getParent()->outSecOff;
936	AArch64Relaxer relaxer(ctx, sec.relocs());
937	for (size_t i = `0`, size = sec.relocs().size(); i != size; ++i) {
938	const Relocation &rel = sec.relocs()[i];
939	uint8_t *loc = buf + rel.offset;
940	const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
941
942	if (needsGotForMemtag(rel)) {
943	relocate(loc, rel, val);
944	continue;
945	}
946
947	switch (rel.expr) {
948	case RE_AARCH64_GOT_PAGE_PC:
949	if (i + `1` < size &&
950	relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: sec.relocs()[i + `1`], secAddr, buf)) {
951	++i;
952	continue;
953	}
954	break;
955	case RE_AARCH64_PAGE_PC:
956	if (i + `1` < size &&
957	relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: sec.relocs()[i + `1`], secAddr, buf)) {
958	++i;
959	continue;
960	}
961	break;
962	case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
963	case R_RELAX_TLS_GD_TO_IE_ABS:
964	relaxTlsGdToIe(loc, rel, val);
965	continue;
966	case R_RELAX_TLS_GD_TO_LE:
967	relaxTlsGdToLe(loc, rel, val);
968	continue;
969	case R_RELAX_TLS_IE_TO_LE:
970	relaxTlsIeToLe(loc, rel, val);
971	continue;
972	default:
973	break;
974	}
975	relocate(loc, rel, val);
976	}
977	}
978
979	static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
980	Relocation &r) {
981	// Identify a control transfer relocation for the branch-to-branch
982	// optimization. A "control transfer relocation" means a B or BL
983	// target but it also includes relative vtable relocations for example.
984	//
985	// We require the relocation type to be JUMP26, CALL26 or PLT32. With a
986	// relocation type of PLT32 the value may be assumed to be used for branching
987	// directly to the symbol and the addend is only used to produce the relocated
988	// value (hence the effective addend is always 0). This is because if a PLT is
989	// needed the addend will be added to the address of the PLT, and it doesn't
990	// make sense to branch into the middle of a PLT. For example, relative vtable
991	// relocations use PLT32 and 0 or a positive value as the addend but still are
992	// used to branch to the symbol.
993	//
994	// With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
995	// addend is that we are branching to symbol+addend so that becomes the
996	// effective addend.
997	if (r.type == R_AARCH64_PLT32)
998	return `0`;
999	if (r.type == R_AARCH64_JUMP26 \|\| r.type == R_AARCH64_CALL26)
1000	return r.addend;
1001	return std::nullopt;
1002	}
1003
1004	static std::pair<Relocation *, uint64_t>
1005	getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1006	auto *i = llvm::partition_point(
1007	Range&: is.relocations, P: [&](Relocation &r) { return r.offset < offset; });
1008	if (i != is.relocations.end() && i->offset == offset &&
1009	i->type == R_AARCH64_JUMP26) {
1010	return {i, i->addend};
1011	}
1012	return {nullptr, `0`};
1013	}
1014
1015	static void redirectControlTransferRelocations(Relocation &r1,
1016	const Relocation &r2) {
1017	r1.expr = r2.expr;
1018	r1.sym = r2.sym;
1019	// With PLT32 we must respect the original addend as that affects the value's
1020	// interpretation. With the other relocation types the original addend is
1021	// irrelevant because it referred to an offset within the original target
1022	// section so we overwrite it.
1023	if (r1.type == R_AARCH64_PLT32)
1024	r1.addend += r2.addend;
1025	else
1026	r1.addend = r2.addend;
1027	}
1028
1029	void AArch64::applyBranchToBranchOpt() const {
1030	applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1031	getBranchInfoAtTarget,
1032	redirectControlTransferRelocations);
1033	}
1034
1035	// AArch64 may use security features in variant PLT sequences. These are:
1036	// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
1037	// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
1038	// in the variant Plt sequences are encoded in the Hint space so they can be
1039	// deployed on older architectures, which treat the instructions as a nop.
1040	// PAC and BTI can be combined leading to the following combinations:
1041	// writePltHeader
1042	// writePltHeaderBti (no PAC Header needed)
1043	// writePlt
1044	// writePltBti (BTI only)
1045	// writePltPac (PAC only)
1046	// writePltBtiPac (BTI and PAC)
1047	//
1048	// When PAC is enabled the dynamic loader encrypts the address that it places
1049	// in the .got.plt using the pacia1716 instruction which encrypts the value in
1050	// x17 using the modifier in x16. The static linker places autia1716 before the
1051	// indirect branch to x17 to authenticate the address in x17 with the modifier
1052	// in x16. This makes it more difficult for an attacker to modify the value in
1053	// the .got.plt.
1054	//
1055	// When BTI is enabled all indirect branches must land on a bti instruction.
1056	// The static linker must place a bti instruction at the start of any PLT entry
1057	// that may be the target of an indirect branch. As the PLT entries call the
1058	// lazy resolver indirectly this must have a bti instruction at start. In
1059	// general a bti instruction is not needed for a PLT entry as indirect calls
1060	// are resolved to the function address and not the PLT entry for the function.
1061	// There are a small number of cases where the PLT address can escape, such as
1062	// taking the address of a function or ifunc via a non got-generating
1063	// relocation, and a shared library refers to that symbol.
1064	//
1065	// We use the bti c variant of the instruction which permits indirect branches
1066	// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1067	// guarantees that all indirect branches from code requiring BTI protection
1068	// will go via x16/x17
1069
1070	namespace {
1071	class AArch64BtiPac final : public AArch64 {
1072	public:
1073	AArch64BtiPac(Ctx &);
1074	void writePltHeader(uint8_t buf) const* override;
1075	void writePlt(uint8_t buf, const* Symbol &sym,
1076	uint64_t pltEntryAddr) const override;
1077
1078	private:
1079	bool btiHeader; // bti instruction needed in PLT Header and Entry
1080	enum {
1081	PEK_NoAuth,
1082	PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry
1083	PEK_Auth, // use braa instr for authenticated branch in PLT entry
1084	} pacEntryKind;
1085	};
1086	} // namespace
1087
1088	AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64 (ctx) {
1089	btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
1090	// A BTI (Branch Target Indicator) Plt Entry is only required if the
1091	// address of the PLT entry can be taken by the program, which permits an
1092	// indirect jump to the PLT entry. This can happen when the address
1093	// of the PLT entry for a function is canonicalised due to the address of
1094	// the function in an executable being taken by a shared library, or
1095	// non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1096	// relocations.
1097	// The PAC PLT entries require dynamic loader support and this isn't known
1098	// from properties in the objects, so we use the command line flag.
1099	// By default we only use hint-space instructions, but if we detect the
1100	// PAuthABI, which requires v8.3-A, we can use the non-hint space
1101	// instructions.
1102
1103	if (ctx.arg.zPacPlt) {
1104	if (ctx.aarch64PauthAbiCoreInfo && ctx.aarch64PauthAbiCoreInfo ->isValid())
1105	pacEntryKind = PEK_Auth;
1106	else
1107	pacEntryKind = PEK_AuthHint;
1108	} else {
1109	pacEntryKind = PEK_NoAuth;
1110	}
1111
1112	if (btiHeader \|\| (pacEntryKind != PEK_NoAuth)) {
1113	pltEntrySize = `24`;
1114	ipltEntrySize = `24`;
1115	}
1116	}
1117
1118	void AArch64BtiPac::writePltHeader(uint8_t buf) const* {
1119	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
1120	const uint8_t pltData[] = {
1121	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
1122	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
1123	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1124	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
1125	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
1126	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
1127	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
1128	};
1129	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
1130
1131	uint64_t got = ctx.in.gotPlt ->getVA();
1132	uint64_t plt = ctx.in.plt ->getVA();
1133
1134	if (btiHeader) {
1135	// PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1136	// instruction.
1137	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1138	buf += sizeof(btiData);
1139	plt += sizeof(btiData);
1140	}
1141	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
1142
1143	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
1144	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `4`));
1145	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
1146	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
1147	if (!btiHeader)
1148	// We didn't add the BTI c instruction so round out size with NOP.
1149	memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
1150	}
1151
1152	void AArch64BtiPac::writePlt(uint8_t buf, const* Symbol &sym,
1153	uint64_t pltEntryAddr) const {
1154	// The PLT entry is of the form:
1155	// [btiData] addrInst (pacBr \| stdBr) [nopData]
1156	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
1157	const uint8_t addrInst[] = {
1158	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
1159	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1160	`0x10`, `0x02`, `0x00`, `0x91` // add x16, x16, Offset(&(.got.plt[n]))
1161	};
1162	const uint8_t pacHintBr[] = {
1163	`0x9f`, `0x21`, `0x03`, `0xd5`, // autia1716
1164	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
1165	};
1166	const uint8_t pacBr[] = {
1167	`0x30`, `0x0a`, `0x1f`, `0xd7`, // braa x17, x16
1168	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
1169	};
1170	const uint8_t stdBr[] = {
1171	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
1172	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
1173	};
1174	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
1175
1176	// NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1177	// escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1178	// address may escape if referenced by a direct relocation. If relative
1179	// vtables are used then if the vtable is in a shared object the offsets will
1180	// be to the PLT entry. The condition is conservative.
1181	bool hasBti = btiHeader &&
1182	(sym.hasFlag(bit: NEEDS_COPY) \|\| sym.isInIplt \|\| sym.thunkAccessed);
1183	if (hasBti) {
1184	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1185	buf += sizeof(btiData);
1186	pltEntryAddr += sizeof(btiData);
1187	}
1188
1189	uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
1190	memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
1191	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
1192	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
1193	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
1194	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
1195
1196	if (pacEntryKind != PEK_NoAuth)
1197	memcpy(dest: buf + sizeof(addrInst),
1198	src: pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr,
1199	n: sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr));
1200	else
1201	memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
1202	if (!hasBti)
1203	// We didn't add the BTI c instruction so round out size with NOP.
1204	memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1205	}
1206
1207	template <class ELFT>
1208	static void
1209	addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec,
1210	DenseMap<Symbol , unsigned*> &referenceCount) {
1211	assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1212
1213	const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1214	if (rels.areRelocsRel())
1215	ErrAlways(ctx)
1216	<< "non-RELA relocations are not allowed with memtag globals";
1217
1218	for (const typename ELFT::Rela &rel : rels.relas) {
1219	Symbol &sym = sec.file->getRelocTargetSym(rel);
1220	// Linker-synthesized symbols such as __executable_start may be referenced
1221	// as tagged in input objfiles, and we don't want them to be tagged. A
1222	// cheap way to exclude them is the type check, but their type is
1223	// STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1224	// like functions or TLS symbols.
1225	if (sym.type != STT_OBJECT)
1226	continue;
1227	// STB_LOCAL symbols can't be referenced from outside the object file, and
1228	// thus don't need to be checked for references from other object files.
1229	if (sym.binding == STB_LOCAL) {
1230	sym.setIsTagged(true);
1231	continue;
1232	}
1233	++referenceCount [&sym];
1234	}
1235	sec.markDead();
1236	}
1237
1238	// A tagged symbol must be denoted as being tagged by all references and the
1239	// chosen definition. For simplicity, here, it must also be denoted as tagged
1240	// for all definitions. Otherwise:
1241	//
1242	// 1. A tagged definition can be used by an untagged declaration, in which case
1243	// the untagged access may be PC-relative, causing a tag mismatch at
1244	// runtime.
1245	// 2. An untagged definition can be used by a tagged declaration, where the
1246	// compiler has taken advantage of the increased alignment of the tagged
1247	// declaration, but the alignment at runtime is wrong, causing a fault.
1248	//
1249	// Ideally, this isn't a problem, as any TU that imports or exports tagged
1250	// symbols should also be built with tagging. But, to handle these cases, we
1251	// demote the symbol to be untagged.
1252	void elf::createTaggedSymbols(Ctx &ctx) {
1253	assert(hasMemtag(ctx));
1254
1255	// First, collect all symbols that are marked as tagged, and count how many
1256	// times they're marked as tagged.
1257	DenseMap<Symbol , unsigned*> taggedSymbolReferenceCount;
1258	for (InputFile *file : ctx.objectFiles) {
1259	if (file->kind() != InputFile::ObjKind)
1260	continue;
1261	for (InputSectionBase *section : file->getSections()) {
1262	if (!section \|\| section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC \|\|
1263	section == &InputSection::discarded)
1264	continue;
1265	invokeELFT(addTaggedSymbolReferences, ctx, *section,
1266	taggedSymbolReferenceCount);
1267	}
1268	}
1269
1270	// Now, go through all the symbols. If the number of declarations +
1271	// definitions to a symbol exceeds the amount of times they're marked as
1272	// tagged, it means we have an objfile that uses the untagged variant of the
1273	// symbol.
1274	for (InputFile *file : ctx.objectFiles) {
1275	if (file->kind() != InputFile::BinaryKind &&
1276	file->kind() != InputFile::ObjKind)
1277	continue;
1278
1279	for (Symbol *symbol : file->getSymbols()) {
1280	// See `addTaggedSymbolReferences` for more details.
1281	if (symbol->type != STT_OBJECT \|\|
1282	symbol->binding == STB_LOCAL)
1283	continue;
1284	auto it = taggedSymbolReferenceCount.find(Val: symbol);
1285	if (it == taggedSymbolReferenceCount.end()) continue;
1286	unsigned &remainingAllowedTaggedRefs = it ->second;
1287	if (remainingAllowedTaggedRefs == `0`) {
1288	taggedSymbolReferenceCount.erase(I: it);
1289	continue;
1290	}
1291	--remainingAllowedTaggedRefs;
1292	}
1293	}
1294
1295	// `addTaggedSymbolReferences` has already checked that we have RELA
1296	// relocations, the only other way to get written addends is with
1297	// --apply-dynamic-relocs.
1298	if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends)
1299	ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals";
1300
1301	// Now, `taggedSymbolReferenceCount` should only contain symbols that are
1302	// defined as tagged exactly the same amount as it's referenced, meaning all
1303	// uses are tagged.
1304	for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1305	assert(remainingTaggedRefs == `0` &&
1306	"Symbol is defined as tagged more times than it's used");
1307	symbol->setIsTagged(true);
1308	}
1309	}
1310
1311	void elf::setAArch64TargetInfo(Ctx &ctx) {
1312	if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) \|\|
1313	ctx.arg.zPacPlt)
1314	ctx.target.reset(p: new AArch64BtiPac (ctx));
1315	else
1316	ctx.target.reset(p: new AArch64 (ctx));
1317	}
1318

Browse the source code of llvm_projects/lld/ELF/Arch/AArch64.cpp