AArch64.cpp source code [llvm_projects/lld/ELF/Arch/AArch64.cpp]

1	//===- AArch64.cpp --------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14	#include "lld/Common/ErrorHandler.h"
15	#include "llvm/BinaryFormat/ELF.h"
16	#include "llvm/Support/Endian.h"
17
18	using namespace llvm;
19	using namespace llvm::support::endian;
20	using namespace llvm::ELF;
21	using namespace lld;
22	using namespace lld::elf;
23
24	// Page(Expr) is the page address of the expression Expr, defined
25	// as (Expr & ~0xFFF). (This applies even if the machine page size
26	// supported by the platform has a different value.)
27	uint64_t elf::getAArch64Page(uint64_t expr) {
28	return expr & ~static_cast<uint64_t>(`0xFFF`);
29	}
30
31	namespace {
32	class AArch64 : public TargetInfo {
33	public:
34	AArch64();
35	RelExpr getRelExpr(RelType type, const Symbol &s,
36	const uint8_t loc) const* override;
37	RelType getDynRel(RelType type) const override;
38	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
39	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
40	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
41	void writePltHeader(uint8_t buf) const* override;
42	void writePlt(uint8_t buf, const* Symbol &sym,
43	uint64_t pltEntryAddr) const override;
44	bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
45	uint64_t branchAddr, const Symbol &s,
46	int64_t a) const override;
47	uint32_t getThunkSectionSpacing() const override;
48	bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
49	bool usesOnlyLowPageBits(RelType type) const override;
50	void relocate(uint8_t loc, const* Relocation &rel,
51	uint64_t val) const override;
52	RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
53	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
54
55	private:
56	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
57	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
58	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
59	};
60
61	struct AArch64Relaxer {
62	bool safeToRelaxAdrpLdr = false;
63
64	AArch64Relaxer(ArrayRef<Relocation> relocs);
65	bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
66	uint64_t secAddr, uint8_t buf) const*;
67	bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
68	uint64_t secAddr, uint8_t buf) const*;
69	};
70	} // namespace
71
72	// Return the bits [Start, End] from Val shifted Start bits.
73	// For instance, getBits(0xF0, 4, 8) returns 0xF.
74	static uint64_t getBits(uint64_t val, int start, int end) {
75	uint64_t mask = ((uint64_t)`1` << (end + `1` - start)) - `1`;
76	return (val >> start) & mask;
77	}
78
79	AArch64::AArch64() {
80	copyRel = R_AARCH64_COPY;
81	relativeRel = R_AARCH64_RELATIVE;
82	iRelativeRel = R_AARCH64_IRELATIVE;
83	gotRel = R_AARCH64_GLOB_DAT;
84	pltRel = R_AARCH64_JUMP_SLOT;
85	symbolicRel = R_AARCH64_ABS64;
86	tlsDescRel = R_AARCH64_TLSDESC;
87	tlsGotRel = R_AARCH64_TLS_TPREL64;
88	pltHeaderSize = `32`;
89	pltEntrySize = `16`;
90	ipltEntrySize = `16`;
91	defaultMaxPageSize = `65536`;
92
93	// Align to the 2 MiB page size (known as a superpage or huge page).
94	// FreeBSD automatically promotes 2 MiB-aligned allocations.
95	defaultImageBase = `0x200000`;
96
97	needsThunks = true;
98	}
99
100	RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
101	const uint8_t loc) const* {
102	switch (type) {
103	case R_AARCH64_ABS16:
104	case R_AARCH64_ABS32:
105	case R_AARCH64_ABS64:
106	case R_AARCH64_ADD_ABS_LO12_NC:
107	case R_AARCH64_LDST128_ABS_LO12_NC:
108	case R_AARCH64_LDST16_ABS_LO12_NC:
109	case R_AARCH64_LDST32_ABS_LO12_NC:
110	case R_AARCH64_LDST64_ABS_LO12_NC:
111	case R_AARCH64_LDST8_ABS_LO12_NC:
112	case R_AARCH64_MOVW_SABS_G0:
113	case R_AARCH64_MOVW_SABS_G1:
114	case R_AARCH64_MOVW_SABS_G2:
115	case R_AARCH64_MOVW_UABS_G0:
116	case R_AARCH64_MOVW_UABS_G0_NC:
117	case R_AARCH64_MOVW_UABS_G1:
118	case R_AARCH64_MOVW_UABS_G1_NC:
119	case R_AARCH64_MOVW_UABS_G2:
120	case R_AARCH64_MOVW_UABS_G2_NC:
121	case R_AARCH64_MOVW_UABS_G3:
122	return R_ABS;
123	case R_AARCH64_AUTH_ABS64:
124	return R_AARCH64_AUTH;
125	case R_AARCH64_TLSDESC_ADR_PAGE21:
126	return R_AARCH64_TLSDESC_PAGE;
127	case R_AARCH64_TLSDESC_LD64_LO12:
128	case R_AARCH64_TLSDESC_ADD_LO12:
129	return R_TLSDESC;
130	case R_AARCH64_TLSDESC_CALL:
131	return R_TLSDESC_CALL;
132	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
133	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
134	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
135	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
136	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
137	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
138	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
139	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
140	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
141	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
142	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
143	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
144	return R_TPREL;
145	case R_AARCH64_CALL26:
146	case R_AARCH64_CONDBR19:
147	case R_AARCH64_JUMP26:
148	case R_AARCH64_TSTBR14:
149	return R_PLT_PC;
150	case R_AARCH64_PLT32:
151	const_cast<Symbol &>(s).thunkAccessed = true;
152	return R_PLT_PC;
153	case R_AARCH64_PREL16:
154	case R_AARCH64_PREL32:
155	case R_AARCH64_PREL64:
156	case R_AARCH64_ADR_PREL_LO21:
157	case R_AARCH64_LD_PREL_LO19:
158	case R_AARCH64_MOVW_PREL_G0:
159	case R_AARCH64_MOVW_PREL_G0_NC:
160	case R_AARCH64_MOVW_PREL_G1:
161	case R_AARCH64_MOVW_PREL_G1_NC:
162	case R_AARCH64_MOVW_PREL_G2:
163	case R_AARCH64_MOVW_PREL_G2_NC:
164	case R_AARCH64_MOVW_PREL_G3:
165	return R_PC;
166	case R_AARCH64_ADR_PREL_PG_HI21:
167	case R_AARCH64_ADR_PREL_PG_HI21_NC:
168	return R_AARCH64_PAGE_PC;
169	case R_AARCH64_LD64_GOT_LO12_NC:
170	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
171	return R_GOT;
172	case R_AARCH64_LD64_GOTPAGE_LO15:
173	return R_AARCH64_GOT_PAGE;
174	case R_AARCH64_ADR_GOT_PAGE:
175	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
176	return R_AARCH64_GOT_PAGE_PC;
177	case R_AARCH64_GOTPCREL32:
178	case R_AARCH64_GOT_LD_PREL19:
179	return R_GOT_PC;
180	case R_AARCH64_NONE:
181	return R_NONE;
182	default:
183	error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine (type) +
184	") against symbol " + toString(s));
185	return R_NONE;
186	}
187	}
188
189	RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
190	if (expr == R_RELAX_TLS_GD_TO_IE) {
191	if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
192	return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
193	return R_RELAX_TLS_GD_TO_IE_ABS;
194	}
195	return expr;
196	}
197
198	bool AArch64::usesOnlyLowPageBits(RelType type) const {
199	switch (type) {
200	default:
201	return false;
202	case R_AARCH64_ADD_ABS_LO12_NC:
203	case R_AARCH64_LD64_GOT_LO12_NC:
204	case R_AARCH64_LDST128_ABS_LO12_NC:
205	case R_AARCH64_LDST16_ABS_LO12_NC:
206	case R_AARCH64_LDST32_ABS_LO12_NC:
207	case R_AARCH64_LDST64_ABS_LO12_NC:
208	case R_AARCH64_LDST8_ABS_LO12_NC:
209	case R_AARCH64_TLSDESC_ADD_LO12:
210	case R_AARCH64_TLSDESC_LD64_LO12:
211	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
212	return true;
213	}
214	}
215
216	RelType AArch64::getDynRel(RelType type) const {
217	if (type == R_AARCH64_ABS64 \|\| type == R_AARCH64_AUTH_ABS64)
218	return type;
219	return R_AARCH64_NONE;
220	}
221
222	int64_t AArch64::getImplicitAddend(const uint8_t buf, RelType type) const* {
223	switch (type) {
224	case R_AARCH64_TLSDESC:
225	return read64(p: buf + `8`);
226	case R_AARCH64_NONE:
227	case R_AARCH64_GLOB_DAT:
228	case R_AARCH64_JUMP_SLOT:
229	return `0`;
230	case R_AARCH64_ABS16:
231	case R_AARCH64_PREL16:
232	return SignExtend64<`16`>(x: read16(p: buf));
233	case R_AARCH64_ABS32:
234	case R_AARCH64_PREL32:
235	return SignExtend64<`32`>(x: read32(p: buf));
236	case R_AARCH64_ABS64:
237	case R_AARCH64_PREL64:
238	case R_AARCH64_RELATIVE:
239	case R_AARCH64_IRELATIVE:
240	case R_AARCH64_TLS_TPREL64:
241	return read64(p: buf);
242
243	// The following relocation types all point at instructions, and
244	// relocate an immediate field in the instruction.
245	//
246	// The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
247	// says: "If the relocation relocates an instruction the immediate
248	// field of the instruction is extracted, scaled as required by
249	// the instruction field encoding, and sign-extended to 64 bits".
250
251	// The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
252	// instructions, which have a 16-bit immediate field with its low
253	// bit in bit 5 of the instruction encoding. When the immediate
254	// field is used as an implicit addend for REL-type relocations,
255	// it is treated as added to the low bits of the output value, not
256	// shifted depending on the relocation type.
257	//
258	// This allows REL relocations to express the requirement 'please
259	// add 12345 to this symbol value and give me the four 16-bit
260	// chunks of the result', by putting the same addend 12345 in all
261	// four instructions. Carries between the 16-bit chunks are
262	// handled correctly, because the whole 64-bit addition is done
263	// once per relocation.
264	case R_AARCH64_MOVW_UABS_G0:
265	case R_AARCH64_MOVW_UABS_G0_NC:
266	case R_AARCH64_MOVW_UABS_G1:
267	case R_AARCH64_MOVW_UABS_G1_NC:
268	case R_AARCH64_MOVW_UABS_G2:
269	case R_AARCH64_MOVW_UABS_G2_NC:
270	case R_AARCH64_MOVW_UABS_G3:
271	return SignExtend64<`16`>(x: getBits(val: read32(p: buf), start: `5`, end: `20`));
272
273	// R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
274	// has a 14-bit offset measured in instructions, i.e. shifted left
275	// by 2.
276	case R_AARCH64_TSTBR14:
277	return SignExtend64<`16`>(x: getBits(val: read32(p: buf), start: `5`, end: `18`) << `2`);
278
279	// R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
280	// which has a 19-bit offset measured in instructions.
281	//
282	// R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
283	// instruction, which also has a 19-bit offset, measured in 4-byte
284	// chunks. So the calculation is the same as for
285	// R_AARCH64_CONDBR19.
286	case R_AARCH64_CONDBR19:
287	case R_AARCH64_LD_PREL_LO19:
288	return SignExtend64<`21`>(x: getBits(val: read32(p: buf), start: `5`, end: `23`) << `2`);
289
290	// R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
291	// immediate can optionally be shifted left by 12 bits, but this
292	// relocation is intended for the case where it is not.
293	case R_AARCH64_ADD_ABS_LO12_NC:
294	return SignExtend64<`12`>(x: getBits(val: read32(p: buf), start: `10`, end: `21`));
295
296	// R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
297	// 21-bit immediate is split between two bits high up in the word
298	// (in fact the two _lowest_ order bits of the value) and 19 bits
299	// lower down.
300	//
301	// R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
302	// which encodes the immediate in the same way, but will shift it
303	// left by 12 bits when the instruction executes. For the same
304	// reason as the MOVW family, we don't apply that left shift here.
305	case R_AARCH64_ADR_PREL_LO21:
306	case R_AARCH64_ADR_PREL_PG_HI21:
307	case R_AARCH64_ADR_PREL_PG_HI21_NC:
308	return SignExtend64<`21`>(x: (getBits(val: read32(p: buf), start: `5`, end: `23`) << `2`) \|
309	getBits(val: read32(p: buf), start: `29`, end: `30`));
310
311	// R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
312	// 26-bit offset measured in instructions.
313	case R_AARCH64_JUMP26:
314	case R_AARCH64_CALL26:
315	return SignExtend64<`28`>(x: getBits(val: read32(p: buf), start: `0`, end: `25`) << `2`);
316
317	default:
318	internalLinkerError(loc: getErrorLocation(loc: buf),
319	msg: "cannot read addend for relocation " + toString(type));
320	return `0`;
321	}
322	}
323
324	void AArch64::writeGotPlt(uint8_t buf, const* Symbol &) const {
325	write64(p: buf, v: in.plt ->getVA());
326	}
327
328	void AArch64::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
329	if (config ->writeAddends)
330	write64(p: buf, v: s.getVA());
331	}
332
333	void AArch64::writePltHeader(uint8_t buf) const* {
334	const uint8_t pltData[] = {
335	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
336	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
337	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
338	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
339	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
340	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
341	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
342	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
343	};
344	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
345
346	uint64_t got = in.gotPlt ->getVA();
347	uint64_t plt = in.plt ->getVA();
348	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
349	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `4`));
350	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
351	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
352	}
353
354	void AArch64::writePlt(uint8_t buf, const* Symbol &sym,
355	uint64_t pltEntryAddr) const {
356	const uint8_t inst[] = {
357	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
358	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
359	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[n]))
360	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
361	};
362	memcpy(dest: buf, src: inst, n: sizeof(inst));
363
364	uint64_t gotPltEntryAddr = sym.getGotPltVA();
365	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
366	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
367	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
368	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
369	}
370
371	bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
372	uint64_t branchAddr, const Symbol &s,
373	int64_t a) const {
374	// If s is an undefined weak symbol and does not have a PLT entry then it will
375	// be resolved as a branch to the next instruction. If it is hidden, its
376	// binding has been converted to local, so we just check isUndefined() here. A
377	// undefined non-weak symbol will have been errored.
378	if (s.isUndefined() && !s.isInPlt())
379	return false;
380	// ELF for the ARM 64-bit architecture, section Call and Jump relocations
381	// only permits range extension thunks for R_AARCH64_CALL26 and
382	// R_AARCH64_JUMP26 relocation types.
383	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
384	type != R_AARCH64_PLT32)
385	return false;
386	uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(addend: a);
387	return !inBranchRange(type, src: branchAddr, dst);
388	}
389
390	uint32_t AArch64::getThunkSectionSpacing() const {
391	// See comment in Arch/ARM.cpp for a more detailed explanation of
392	// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
393	// Thunk have a range of +/- 128 MiB
394	return (`128` * `1024` * `1024`) - `0x30000`;
395	}
396
397	bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
398	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
399	type != R_AARCH64_PLT32)
400	return true;
401	// The AArch64 call and unconditional branch instructions have a range of
402	// +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
403	uint64_t range =
404	type == R_AARCH64_PLT32 ? (UINT64_C(`1`) << `31`) : (`128` * `1024` * `1024`);
405	if (dst > src) {
406	// Immediate of branch is signed.
407	range -= `4`;
408	return dst - src <= range;
409	}
410	return src - dst <= range;
411	}
412
413	static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
414	uint32_t immLo = (imm & `0x3`) << `29`;
415	uint32_t immHi = (imm & `0x1FFFFC`) << `3`;
416	uint64_t mask = (`0x3` << `29`) \| (`0x1FFFFC` << `3`);
417	write32le(P: l, V: (read32le(P: l) & ~mask) \| immLo \| immHi);
418	}
419
420	static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
421	write32le(P: p, V: (read32le(P: p) & ~mask) \| v);
422	}
423
424	// Update the immediate field in a AARCH64 ldr, str, and add instruction.
425	static void write32Imm12(uint8_t *l, uint64_t imm) {
426	writeMaskedBits32le(p: l, v: (imm & `0xFFF`) << `10`, mask: `0xFFF` << `10`);
427	}
428
429	// Update the immediate field in an AArch64 movk, movn or movz instruction
430	// for a signed relocation, and update the opcode of a movn or movz instruction
431	// to match the sign of the operand.
432	static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
433	uint32_t inst = read32le(P: loc);
434	// Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
435	if (!(inst & (`1` << `29`))) {
436	// movn or movz.
437	if (imm & `0x10000`) {
438	// Change opcode to movn, which takes an inverted operand.
439	imm ^= `0xFFFF`;
440	inst &= ~(`1` << `30`);
441	} else {
442	// Change opcode to movz.
443	inst \|= `1` << `30`;
444	}
445	}
446	write32le(P: loc, V: inst \| ((imm & `0xFFFF`) << `5`));
447	}
448
449	void AArch64::relocate(uint8_t loc, const* Relocation &rel,
450	uint64_t val) const {
451	switch (rel.type) {
452	case R_AARCH64_ABS16:
453	case R_AARCH64_PREL16:
454	checkIntUInt(loc, v: val, n: `16`, rel);
455	write16(p: loc, v: val);
456	break;
457	case R_AARCH64_ABS32:
458	case R_AARCH64_PREL32:
459	checkIntUInt(loc, v: val, n: `32`, rel);
460	write32(p: loc, v: val);
461	break;
462	case R_AARCH64_PLT32:
463	case R_AARCH64_GOTPCREL32:
464	checkInt(loc, v: val, n: `32`, rel);
465	write32(p: loc, v: val);
466	break;
467	case R_AARCH64_ABS64:
468	// AArch64 relocations to tagged symbols have extended semantics, as
469	// described here:
470	// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
471	// tl;dr: encode the symbol's special addend in the place, which is an
472	// offset to the point where the logical tag is derived from. Quick hack, if
473	// the addend is within the symbol's bounds, no need to encode the tag
474	// derivation offset.
475	if (rel.sym && rel.sym->isTagged() &&
476	(rel.addend < `0` \|\|
477	rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
478	write64(p: loc, v: -rel.addend);
479	else
480	write64(p: loc, v: val);
481	break;
482	case R_AARCH64_PREL64:
483	write64(p: loc, v: val);
484	break;
485	case R_AARCH64_AUTH_ABS64:
486	// If val is wider than 32 bits, the relocation must have been moved from
487	// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
488	//
489	// If val fits in 32 bits, we have two potential scenarios:
490	// True RELR: Write the 32-bit `val`.*
491	// RELA: Even if the value now fits in 32 bits, it might have been*
492	// converted from RELR during an iteration in
493	// finalizeAddressDependentContent(). Writing the value is harmless
494	// because dynamic linking ignores it.
495	if (isInt<`32`>(x: val))
496	write32(p: loc, v: val);
497	break;
498	case R_AARCH64_ADD_ABS_LO12_NC:
499	write32Imm12(l: loc, imm: val);
500	break;
501	case R_AARCH64_ADR_GOT_PAGE:
502	case R_AARCH64_ADR_PREL_PG_HI21:
503	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
504	case R_AARCH64_TLSDESC_ADR_PAGE21:
505	checkInt(loc, v: val, n: `33`, rel);
506	[[fallthrough]];
507	case R_AARCH64_ADR_PREL_PG_HI21_NC:
508	write32AArch64Addr(l: loc, imm: val >> `12`);
509	break;
510	case R_AARCH64_ADR_PREL_LO21:
511	checkInt(loc, v: val, n: `21`, rel);
512	write32AArch64Addr(l: loc, imm: val);
513	break;
514	case R_AARCH64_JUMP26:
515	// Normally we would just write the bits of the immediate field, however
516	// when patching instructions for the cpu errata fix -fix-cortex-a53-843419
517	// we want to replace a non-branch instruction with a branch immediate
518	// instruction. By writing all the bits of the instruction including the
519	// opcode and the immediate (0 001 \| 01 imm26) we can do this
520	// transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
521	// the instruction we want to patch.
522	write32le(P: loc, V: `0x14000000`);
523	[[fallthrough]];
524	case R_AARCH64_CALL26:
525	checkInt(loc, v: val, n: `28`, rel);
526	writeMaskedBits32le(p: loc, v: (val & `0x0FFFFFFC`) >> `2`, mask: `0x0FFFFFFC` >> `2`);
527	break;
528	case R_AARCH64_CONDBR19:
529	case R_AARCH64_LD_PREL_LO19:
530	case R_AARCH64_GOT_LD_PREL19:
531	checkAlignment(loc, v: val, n: `4`, rel);
532	checkInt(loc, v: val, n: `21`, rel);
533	writeMaskedBits32le(p: loc, v: (val & `0x1FFFFC`) << `3`, mask: `0x1FFFFC` << `3`);
534	break;
535	case R_AARCH64_LDST8_ABS_LO12_NC:
536	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
537	write32Imm12(l: loc, imm: getBits(val, start: `0`, end: `11`));
538	break;
539	case R_AARCH64_LDST16_ABS_LO12_NC:
540	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
541	checkAlignment(loc, v: val, n: `2`, rel);
542	write32Imm12(l: loc, imm: getBits(val, start: `1`, end: `11`));
543	break;
544	case R_AARCH64_LDST32_ABS_LO12_NC:
545	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
546	checkAlignment(loc, v: val, n: `4`, rel);
547	write32Imm12(l: loc, imm: getBits(val, start: `2`, end: `11`));
548	break;
549	case R_AARCH64_LDST64_ABS_LO12_NC:
550	case R_AARCH64_LD64_GOT_LO12_NC:
551	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
552	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
553	case R_AARCH64_TLSDESC_LD64_LO12:
554	checkAlignment(loc, v: val, n: `8`, rel);
555	write32Imm12(l: loc, imm: getBits(val, start: `3`, end: `11`));
556	break;
557	case R_AARCH64_LDST128_ABS_LO12_NC:
558	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
559	checkAlignment(loc, v: val, n: `16`, rel);
560	write32Imm12(l: loc, imm: getBits(val, start: `4`, end: `11`));
561	break;
562	case R_AARCH64_LD64_GOTPAGE_LO15:
563	checkAlignment(loc, v: val, n: `8`, rel);
564	write32Imm12(l: loc, imm: getBits(val, start: `3`, end: `14`));
565	break;
566	case R_AARCH64_MOVW_UABS_G0:
567	checkUInt(loc, v: val, n: `16`, rel);
568	[[fallthrough]];
569	case R_AARCH64_MOVW_UABS_G0_NC:
570	writeMaskedBits32le(p: loc, v: (val & `0xFFFF`) << `5`, mask: `0xFFFF` << `5`);
571	break;
572	case R_AARCH64_MOVW_UABS_G1:
573	checkUInt(loc, v: val, n: `32`, rel);
574	[[fallthrough]];
575	case R_AARCH64_MOVW_UABS_G1_NC:
576	writeMaskedBits32le(p: loc, v: (val & `0xFFFF0000`) >> `11`, mask: `0xFFFF0000` >> `11`);
577	break;
578	case R_AARCH64_MOVW_UABS_G2:
579	checkUInt(loc, v: val, n: `48`, rel);
580	[[fallthrough]];
581	case R_AARCH64_MOVW_UABS_G2_NC:
582	writeMaskedBits32le(p: loc, v: (val & `0xFFFF00000000`) >> `27`,
583	mask: `0xFFFF00000000` >> `27`);
584	break;
585	case R_AARCH64_MOVW_UABS_G3:
586	writeMaskedBits32le(p: loc, v: (val & `0xFFFF000000000000`) >> `43`,
587	mask: `0xFFFF000000000000` >> `43`);
588	break;
589	case R_AARCH64_MOVW_PREL_G0:
590	case R_AARCH64_MOVW_SABS_G0:
591	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
592	checkInt(loc, v: val, n: `17`, rel);
593	[[fallthrough]];
594	case R_AARCH64_MOVW_PREL_G0_NC:
595	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
596	writeSMovWImm(loc, imm: val);
597	break;
598	case R_AARCH64_MOVW_PREL_G1:
599	case R_AARCH64_MOVW_SABS_G1:
600	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
601	checkInt(loc, v: val, n: `33`, rel);
602	[[fallthrough]];
603	case R_AARCH64_MOVW_PREL_G1_NC:
604	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
605	writeSMovWImm(loc, imm: val >> `16`);
606	break;
607	case R_AARCH64_MOVW_PREL_G2:
608	case R_AARCH64_MOVW_SABS_G2:
609	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
610	checkInt(loc, v: val, n: `49`, rel);
611	[[fallthrough]];
612	case R_AARCH64_MOVW_PREL_G2_NC:
613	writeSMovWImm(loc, imm: val >> `32`);
614	break;
615	case R_AARCH64_MOVW_PREL_G3:
616	writeSMovWImm(loc, imm: val >> `48`);
617	break;
618	case R_AARCH64_TSTBR14:
619	checkInt(loc, v: val, n: `16`, rel);
620	writeMaskedBits32le(p: loc, v: (val & `0xFFFC`) << `3`, mask: `0xFFFC` << `3`);
621	break;
622	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
623	checkUInt(loc, v: val, n: `24`, rel);
624	write32Imm12(l: loc, imm: val >> `12`);
625	break;
626	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
627	case R_AARCH64_TLSDESC_ADD_LO12:
628	write32Imm12(l: loc, imm: val);
629	break;
630	case R_AARCH64_TLSDESC:
631	// For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
632	write64(p: loc + `8`, v: val);
633	break;
634	default:
635	llvm_unreachable("unknown relocation");
636	}
637	}
638
639	void AArch64::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
640	uint64_t val) const {
641	// TLSDESC Global-Dynamic relocation are in the form:
642	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
643	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
644	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
645	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
646	// blr x1
647	// And it can optimized to:
648	// movz x0, #0x0, lsl #16
649	// movk x0, #0x10
650	// nop
651	// nop
652	checkUInt(loc, v: val, n: `32`, rel);
653
654	switch (rel.type) {
655	case R_AARCH64_TLSDESC_ADD_LO12:
656	case R_AARCH64_TLSDESC_CALL:
657	write32le(P: loc, V: `0xd503201f`); // nop
658	return;
659	case R_AARCH64_TLSDESC_ADR_PAGE21:
660	write32le(P: loc, V: `0xd2a00000` \| (((val >> `16`) & `0xffff`) << `5`)); // movz
661	return;
662	case R_AARCH64_TLSDESC_LD64_LO12:
663	write32le(P: loc, V: `0xf2800000` \| ((val & `0xffff`) << `5`)); // movk
664	return;
665	default:
666	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
667	}
668	}
669
670	void AArch64::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
671	uint64_t val) const {
672	// TLSDESC Global-Dynamic relocation are in the form:
673	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
674	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
675	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
676	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
677	// blr x1
678	// And it can optimized to:
679	// adrp x0, :gottprel:v
680	// ldr x0, [x0, :gottprel_lo12:v]
681	// nop
682	// nop
683
684	switch (rel.type) {
685	case R_AARCH64_TLSDESC_ADD_LO12:
686	case R_AARCH64_TLSDESC_CALL:
687	write32le(P: loc, V: `0xd503201f`); // nop
688	break;
689	case R_AARCH64_TLSDESC_ADR_PAGE21:
690	write32le(P: loc, V: `0x90000000`); // adrp
691	relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
692	break;
693	case R_AARCH64_TLSDESC_LD64_LO12:
694	write32le(P: loc, V: `0xf9400000`); // ldr
695	relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
696	break;
697	default:
698	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
699	}
700	}
701
702	void AArch64::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
703	uint64_t val) const {
704	checkUInt(loc, v: val, n: `32`, rel);
705
706	if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
707	// Generate MOVZ.
708	uint32_t regNo = read32le(P: loc) & `0x1f`;
709	write32le(P: loc, V: (`0xd2a00000` \| regNo) \| (((val >> `16`) & `0xffff`) << `5`));
710	return;
711	}
712	if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
713	// Generate MOVK.
714	uint32_t regNo = read32le(P: loc) & `0x1f`;
715	write32le(P: loc, V: (`0xf2800000` \| regNo) \| ((val & `0xffff`) << `5`));
716	return;
717	}
718	llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
719	}
720
721	AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
722	if (!config ->relax)
723	return;
724	// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
725	// always appear in pairs.
726	size_t i = `0`;
727	const size_t size = relocs.size();
728	for (; i != size; ++i) {
729	if (relocs [i].type == R_AARCH64_ADR_GOT_PAGE) {
730	if (i + `1` < size && relocs [i + `1`].type == R_AARCH64_LD64_GOT_LO12_NC) {
731	++i;
732	continue;
733	}
734	break;
735	} else if (relocs [i].type == R_AARCH64_LD64_GOT_LO12_NC) {
736	break;
737	}
738	}
739	safeToRelaxAdrpLdr = i == size;
740	}
741
742	bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
743	const Relocation &addRel, uint64_t secAddr,
744	uint8_t buf) const* {
745	// When the address of sym is within the range of ADR then
746	// we may relax
747	// ADRP xn, sym
748	// ADD xn, xn, :lo12: sym
749	// to
750	// NOP
751	// ADR xn, sym
752	if (!config ->relax \|\| adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 \|\|
753	addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
754	return false;
755	// Check if the relocations apply to consecutive instructions.
756	if (adrpRel.offset + `4` != addRel.offset)
757	return false;
758	if (adrpRel.sym != addRel.sym)
759	return false;
760	if (adrpRel.addend != `0` \|\| addRel.addend != `0`)
761	return false;
762
763	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
764	uint32_t addInstr = read32le(P: buf + addRel.offset);
765	// Check if the first instruction is ADRP and the second instruction is ADD.
766	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
767	(addInstr & `0xffc00000`) != `0x91000000`)
768	return false;
769	uint32_t adrpDestReg = adrpInstr & `0x1f`;
770	uint32_t addDestReg = addInstr & `0x1f`;
771	uint32_t addSrcReg = (addInstr >> `5`) & `0x1f`;
772	if (adrpDestReg != addDestReg \|\| adrpDestReg != addSrcReg)
773	return false;
774
775	Symbol &sym = *adrpRel.sym;
776	// Check if the address difference is within 1MiB range.
777	int64_t val = sym.getVA() - (secAddr + addRel.offset);
778	if (val < -`1024` * `1024` \|\| val >= `1024` * `1024`)
779	return false;
780
781	Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
782	/addend=/`0`, .sym: &sym};
783	// nop
784	write32le(P: buf + adrpRel.offset, V: `0xd503201f`);
785	// adr x_<dest_reg>
786	write32le(P: buf + adrRel.offset, V: `0x10000000` \| adrpDestReg);
787	target->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
788	return true;
789	}
790
791	bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
792	const Relocation &ldrRel, uint64_t secAddr,
793	uint8_t buf) const* {
794	if (!safeToRelaxAdrpLdr)
795	return false;
796
797	// When the definition of sym is not preemptible then we may
798	// be able to relax
799	// ADRP xn, :got: sym
800	// LDR xn, [ xn :got_lo12: sym]
801	// to
802	// ADRP xn, sym
803	// ADD xn, xn, :lo_12: sym
804
805	if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE \|\|
806	ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
807	return false;
808	// Check if the relocations apply to consecutive instructions.
809	if (adrpRel.offset + `4` != ldrRel.offset)
810	return false;
811	// Check if the relocations reference the same symbol and
812	// skip undefined, preemptible and STT_GNU_IFUNC symbols.
813	if (!adrpRel.sym \|\| adrpRel.sym != ldrRel.sym \|\| !adrpRel.sym->isDefined() \|\|
814	adrpRel.sym->isPreemptible \|\| adrpRel.sym->isGnuIFunc())
815	return false;
816	// Check if the addends of the both relocations are zero.
817	if (adrpRel.addend != `0` \|\| ldrRel.addend != `0`)
818	return false;
819	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
820	uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
821	// Check if the first instruction is ADRP and the second instruction is LDR.
822	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
823	(ldrInstr & `0x3b000000`) != `0x39000000`)
824	return false;
825	// Check the value of the sf bit.
826	if (!(ldrInstr >> `31`))
827	return false;
828	uint32_t adrpDestReg = adrpInstr & `0x1f`;
829	uint32_t ldrDestReg = ldrInstr & `0x1f`;
830	uint32_t ldrSrcReg = (ldrInstr >> `5`) & `0x1f`;
831	// Check if ADPR and LDR use the same register.
832	if (adrpDestReg != ldrDestReg \|\| adrpDestReg != ldrSrcReg)
833	return false;
834
835	Symbol &sym = *adrpRel.sym;
836	// GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
837	// position-independent code because these instructions produce a relative
838	// address.
839	if (config ->isPic && !cast<Defined>(Val&: sym).section)
840	return false;
841	// Check if the address difference is within 4GB range.
842	int64_t val =
843	getAArch64Page(expr: sym.getVA()) - getAArch64Page(expr: secAddr + adrpRel.offset);
844	if (val != llvm::SignExtend64(X: val, B: `33`))
845	return false;
846
847	Relocation adrpSymRel = {.expr: R_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
848	.offset: adrpRel.offset, /addend=/`0`, .sym: &sym};
849	Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
850	/addend=/`0`, .sym: &sym};
851
852	// adrp x_<dest_reg>
853	write32le(P: buf + adrpSymRel.offset, V: `0x90000000` \| adrpDestReg);
854	// add x_<dest reg>, x_<dest reg>
855	write32le(P: buf + addRel.offset, V: `0x91000000` \| adrpDestReg \| (adrpDestReg << `5`));
856
857	target->relocate(loc: buf + adrpSymRel.offset, rel: adrpSymRel,
858	val: SignExtend64(X: getAArch64Page(expr: sym.getVA()) -
859	getAArch64Page(expr: secAddr + adrpSymRel.offset),
860	B: `64`));
861	target->relocate(loc: buf + addRel.offset, rel: addRel, val: SignExtend64(X: sym.getVA(), B: `64`));
862	tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
863	return true;
864	}
865
866	// Tagged symbols have upper address bits that are added by the dynamic loader,
867	// and thus need the full 64-bit GOT entry. Do not relax such symbols.
868	static bool needsGotForMemtag(const Relocation &rel) {
869	return rel.sym->isTagged() && needsGot(expr: rel.expr);
870	}
871
872	void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
873	uint64_t secAddr = sec.getOutputSection()->addr;
874	if (auto *s = dyn_cast<InputSection>(Val: &sec))
875	secAddr += s->outSecOff;
876	else if (auto *ehIn = dyn_cast<EhInputSection>(Val: &sec))
877	secAddr += ehIn->getParent()->outSecOff;
878	AArch64Relaxer relaxer(sec.relocs());
879	for (size_t i = `0`, size = sec.relocs().size(); i != size; ++i) {
880	const Relocation &rel = sec.relocs()[i];
881	uint8_t *loc = buf + rel.offset;
882	const uint64_t val =
883	sec.getRelocTargetVA(File: sec.file, Type: rel.type, A: rel.addend,
884	P: secAddr + rel.offset, Sym: *rel.sym, Expr: rel.expr);
885
886	if (needsGotForMemtag(rel)) {
887	relocate(loc, rel, val);
888	continue;
889	}
890
891	switch (rel.expr) {
892	case R_AARCH64_GOT_PAGE_PC:
893	if (i + `1` < size &&
894	relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: sec.relocs()[i + `1`], secAddr, buf)) {
895	++i;
896	continue;
897	}
898	break;
899	case R_AARCH64_PAGE_PC:
900	if (i + `1` < size &&
901	relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: sec.relocs()[i + `1`], secAddr, buf)) {
902	++i;
903	continue;
904	}
905	break;
906	case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
907	case R_RELAX_TLS_GD_TO_IE_ABS:
908	relaxTlsGdToIe(loc, rel, val);
909	continue;
910	case R_RELAX_TLS_GD_TO_LE:
911	relaxTlsGdToLe(loc, rel, val);
912	continue;
913	case R_RELAX_TLS_IE_TO_LE:
914	relaxTlsIeToLe(loc, rel, val);
915	continue;
916	default:
917	break;
918	}
919	relocate(loc, rel, val);
920	}
921	}
922
923	// AArch64 may use security features in variant PLT sequences. These are:
924	// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
925	// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
926	// in the variant Plt sequences are encoded in the Hint space so they can be
927	// deployed on older architectures, which treat the instructions as a nop.
928	// PAC and BTI can be combined leading to the following combinations:
929	// writePltHeader
930	// writePltHeaderBti (no PAC Header needed)
931	// writePlt
932	// writePltBti (BTI only)
933	// writePltPac (PAC only)
934	// writePltBtiPac (BTI and PAC)
935	//
936	// When PAC is enabled the dynamic loader encrypts the address that it places
937	// in the .got.plt using the pacia1716 instruction which encrypts the value in
938	// x17 using the modifier in x16. The static linker places autia1716 before the
939	// indirect branch to x17 to authenticate the address in x17 with the modifier
940	// in x16. This makes it more difficult for an attacker to modify the value in
941	// the .got.plt.
942	//
943	// When BTI is enabled all indirect branches must land on a bti instruction.
944	// The static linker must place a bti instruction at the start of any PLT entry
945	// that may be the target of an indirect branch. As the PLT entries call the
946	// lazy resolver indirectly this must have a bti instruction at start. In
947	// general a bti instruction is not needed for a PLT entry as indirect calls
948	// are resolved to the function address and not the PLT entry for the function.
949	// There are a small number of cases where the PLT address can escape, such as
950	// taking the address of a function or ifunc via a non got-generating
951	// relocation, and a shared library refers to that symbol.
952	//
953	// We use the bti c variant of the instruction which permits indirect branches
954	// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
955	// guarantees that all indirect branches from code requiring BTI protection
956	// will go via x16/x17
957
958	namespace {
959	class AArch64BtiPac final : public AArch64 {
960	public:
961	AArch64BtiPac();
962	void writePltHeader(uint8_t buf) const* override;
963	void writePlt(uint8_t buf, const* Symbol &sym,
964	uint64_t pltEntryAddr) const override;
965
966	private:
967	bool btiHeader; // bti instruction needed in PLT Header and Entry
968	bool pacEntry; // autia1716 instruction needed in PLT Entry
969	};
970	} // namespace
971
972	AArch64BtiPac::AArch64BtiPac() {
973	btiHeader = (config ->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
974	// A BTI (Branch Target Indicator) Plt Entry is only required if the
975	// address of the PLT entry can be taken by the program, which permits an
976	// indirect jump to the PLT entry. This can happen when the address
977	// of the PLT entry for a function is canonicalised due to the address of
978	// the function in an executable being taken by a shared library, or
979	// non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
980	// relocations.
981	// The PAC PLT entries require dynamic loader support and this isn't known
982	// from properties in the objects, so we use the command line flag.
983	pacEntry = config ->zPacPlt;
984
985	if (btiHeader \|\| pacEntry) {
986	pltEntrySize = `24`;
987	ipltEntrySize = `24`;
988	}
989	}
990
991	void AArch64BtiPac::writePltHeader(uint8_t buf) const* {
992	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
993	const uint8_t pltData[] = {
994	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
995	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
996	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
997	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
998	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
999	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
1000	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
1001	};
1002	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
1003
1004	uint64_t got = in.gotPlt ->getVA();
1005	uint64_t plt = in.plt ->getVA();
1006
1007	if (btiHeader) {
1008	// PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1009	// instruction.
1010	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1011	buf += sizeof(btiData);
1012	plt += sizeof(btiData);
1013	}
1014	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
1015
1016	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
1017	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `8`));
1018	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
1019	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
1020	if (!btiHeader)
1021	// We didn't add the BTI c instruction so round out size with NOP.
1022	memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
1023	}
1024
1025	void AArch64BtiPac::writePlt(uint8_t buf, const* Symbol &sym,
1026	uint64_t pltEntryAddr) const {
1027	// The PLT entry is of the form:
1028	// [btiData] addrInst (pacBr \| stdBr) [nopData]
1029	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
1030	const uint8_t addrInst[] = {
1031	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
1032	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1033	`0x10`, `0x02`, `0x00`, `0x91` // add x16, x16, Offset(&(.got.plt[n]))
1034	};
1035	const uint8_t pacBr[] = {
1036	`0x9f`, `0x21`, `0x03`, `0xd5`, // autia1716
1037	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
1038	};
1039	const uint8_t stdBr[] = {
1040	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
1041	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
1042	};
1043	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
1044
1045	// NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1046	// escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1047	// address may escape if referenced by a direct relocation. If relative
1048	// vtables are used then if the vtable is in a shared object the offsets will
1049	// be to the PLT entry. The condition is conservative.
1050	bool hasBti = btiHeader &&
1051	(sym.hasFlag(bit: NEEDS_COPY) \|\| sym.isInIplt \|\| sym.thunkAccessed);
1052	if (hasBti) {
1053	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
1054	buf += sizeof(btiData);
1055	pltEntryAddr += sizeof(btiData);
1056	}
1057
1058	uint64_t gotPltEntryAddr = sym.getGotPltVA();
1059	memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
1060	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
1061	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
1062	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
1063	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
1064
1065	if (pacEntry)
1066	memcpy(dest: buf + sizeof(addrInst), src: pacBr, n: sizeof(pacBr));
1067	else
1068	memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
1069	if (!hasBti)
1070	// We didn't add the BTI c instruction so round out size with NOP.
1071	memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1072	}
1073
1074	static TargetInfo *getTargetInfo() {
1075	if ((config ->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) \|\|
1076	config ->zPacPlt) {
1077	static AArch64BtiPac t;
1078	return &t;
1079	}
1080	static AArch64 t;
1081	return &t;
1082	}
1083
1084	TargetInfo elf::getAArch64TargetInfo() { return* getTargetInfo(); }
1085
1086	template <class ELFT>
1087	static void
1088	addTaggedSymbolReferences(InputSectionBase &sec,
1089	DenseMap<Symbol , unsigned*> &referenceCount) {
1090	assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1091
1092	const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1093	if (rels.areRelocsRel())
1094	error(msg: "non-RELA relocations are not allowed with memtag globals");
1095
1096	for (const typename ELFT::Rela &rel : rels.relas) {
1097	Symbol &sym = sec.file->getRelocTargetSym(rel);
1098	// Linker-synthesized symbols such as __executable_start may be referenced
1099	// as tagged in input objfiles, and we don't want them to be tagged. A
1100	// cheap way to exclude them is the type check, but their type is
1101	// STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1102	// like functions or TLS symbols.
1103	if (sym.type != STT_OBJECT)
1104	continue;
1105	// STB_LOCAL symbols can't be referenced from outside the object file, and
1106	// thus don't need to be checked for references from other object files.
1107	if (sym.binding == STB_LOCAL) {
1108	sym.setIsTagged(true);
1109	continue;
1110	}
1111	++referenceCount [&sym];
1112	}
1113	sec.markDead();
1114	}
1115
1116	// A tagged symbol must be denoted as being tagged by all references and the
1117	// chosen definition. For simplicity, here, it must also be denoted as tagged
1118	// for all definitions. Otherwise:
1119	//
1120	// 1. A tagged definition can be used by an untagged declaration, in which case
1121	// the untagged access may be PC-relative, causing a tag mismatch at
1122	// runtime.
1123	// 2. An untagged definition can be used by a tagged declaration, where the
1124	// compiler has taken advantage of the increased alignment of the tagged
1125	// declaration, but the alignment at runtime is wrong, causing a fault.
1126	//
1127	// Ideally, this isn't a problem, as any TU that imports or exports tagged
1128	// symbols should also be built with tagging. But, to handle these cases, we
1129	// demote the symbol to be untagged.
1130	void lld::elf::createTaggedSymbols(const SmallVector<ELFFileBase *, `0`> &files) {
1131	assert(hasMemtag());
1132
1133	// First, collect all symbols that are marked as tagged, and count how many
1134	// times they're marked as tagged.
1135	DenseMap<Symbol , unsigned*> taggedSymbolReferenceCount;
1136	for (InputFile* file : files) {
1137	if (file->kind() != InputFile::ObjKind)
1138	continue;
1139	for (InputSectionBase *section : file->getSections()) {
1140	if (!section \|\| section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC \|\|
1141	section == &InputSection::discarded)
1142	continue;
1143	invokeELFT(addTaggedSymbolReferences, *section,
1144	taggedSymbolReferenceCount);
1145	}
1146	}
1147
1148	// Now, go through all the symbols. If the number of declarations +
1149	// definitions to a symbol exceeds the amount of times they're marked as
1150	// tagged, it means we have an objfile that uses the untagged variant of the
1151	// symbol.
1152	for (InputFile *file : files) {
1153	if (file->kind() != InputFile::BinaryKind &&
1154	file->kind() != InputFile::ObjKind)
1155	continue;
1156
1157	for (Symbol *symbol : file->getSymbols()) {
1158	// See `addTaggedSymbolReferences` for more details.
1159	if (symbol->type != STT_OBJECT \|\|
1160	symbol->binding == STB_LOCAL)
1161	continue;
1162	auto it = taggedSymbolReferenceCount.find(Val: symbol);
1163	if (it == taggedSymbolReferenceCount.end()) continue;
1164	unsigned &remainingAllowedTaggedRefs = it ->second;
1165	if (remainingAllowedTaggedRefs == `0`) {
1166	taggedSymbolReferenceCount.erase(I: it);
1167	continue;
1168	}
1169	--remainingAllowedTaggedRefs;
1170	}
1171	}
1172
1173	// `addTaggedSymbolReferences` has already checked that we have RELA
1174	// relocations, the only other way to get written addends is with
1175	// --apply-dynamic-relocs.
1176	if (!taggedSymbolReferenceCount.empty() && config ->writeAddends)
1177	error(msg: "--apply-dynamic-relocs cannot be used with MTE globals");
1178
1179	// Now, `taggedSymbolReferenceCount` should only contain symbols that are
1180	// defined as tagged exactly the same amount as it's referenced, meaning all
1181	// uses are tagged.
1182	for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1183	assert(remainingTaggedRefs == `0` &&
1184	"Symbol is defined as tagged more times than it's used");
1185	symbol->setIsTagged(true);
1186	}
1187	}
1188

Browse the source code of llvm_projects/lld/ELF/Arch/AArch64.cpp