LoongArch.cpp source code [llvm_projects/lld/ELF/Arch/LoongArch.cpp]

1	//===- LoongArch.cpp ------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14	#include "llvm/BinaryFormat/ELF.h"
15	#include "llvm/Support/LEB128.h"
16
17	using namespace llvm;
18	using namespace llvm::object;
19	using namespace llvm::support::endian;
20	using namespace llvm::ELF;
21	using namespace lld;
22	using namespace lld::elf;
23
24	namespace {
25	class LoongArch final : public TargetInfo {
26	public:
27	LoongArch();
28	uint32_t calcEFlags() const override;
29	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
30	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
31	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
32	void writePltHeader(uint8_t buf) const* override;
33	void writePlt(uint8_t buf, const* Symbol &sym,
34	uint64_t pltEntryAddr) const override;
35	RelType getDynRel(RelType type) const override;
36	RelExpr getRelExpr(RelType type, const Symbol &s,
37	const uint8_t loc) const* override;
38	bool usesOnlyLowPageBits(RelType type) const override;
39	void relocate(uint8_t loc, const* Relocation &rel,
40	uint64_t val) const override;
41	bool relaxOnce(int pass) const override;
42	void finalizeRelax(int passes) const override;
43	};
44	} // end anonymous namespace
45
46	namespace {
47	enum Op {
48	SUB_W = `0x00110000`,
49	SUB_D = `0x00118000`,
50	BREAK = `0x002a0000`,
51	SRLI_W = `0x00448000`,
52	SRLI_D = `0x00450000`,
53	ADDI_W = `0x02800000`,
54	ADDI_D = `0x02c00000`,
55	ANDI = `0x03400000`,
56	PCADDU12I = `0x1c000000`,
57	LD_W = `0x28800000`,
58	LD_D = `0x28c00000`,
59	JIRL = `0x4c000000`,
60	};
61
62	enum Reg {
63	R_ZERO = `0`,
64	R_RA = `1`,
65	R_TP = `2`,
66	R_T0 = `12`,
67	R_T1 = `13`,
68	R_T2 = `14`,
69	R_T3 = `15`,
70	};
71	} // namespace
72
73	// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
74	// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.` where the `pcalau12i`*
75	// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
76	// "page") for the next instruction to add in the "page offset". (`pcalau12i`
77	// stands for something like "PC ALigned Add Upper that starts from the 12th
78	// bit, Immediate".)
79	//
80	// Here a "page" is in fact just another way to refer to the 12-bit range
81	// allowed by the immediate field of the addi/ld/st instructions, and not
82	// related to the system or the kernel's actual page size. The semantics happen
83	// to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
84	static uint64_t getLoongArchPage(uint64_t p) {
85	return p & ~static_cast<uint64_t>(`0xfff`);
86	}
87
88	static uint32_t lo12(uint32_t val) { return val & `0xfff`; }
89
90	// Calculate the adjusted page delta between dest and PC.
91	uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {
92	// Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
93	// + lu52i.d`, they must be adjacent so that we can infer the PC of
94	// `pcalau12i` when calculating the page delta for the other two instructions
95	// (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
96	// complicated. Just use psABI recommended algorithm.
97	uint64_t pcalau12i_pc;
98	switch (type) {
99	case R_LARCH_PCALA64_LO20:
100	case R_LARCH_GOT64_PC_LO20:
101	case R_LARCH_TLS_IE64_PC_LO20:
102	case R_LARCH_TLS_DESC64_PC_LO20:
103	pcalau12i_pc = pc - `8`;
104	break;
105	case R_LARCH_PCALA64_HI12:
106	case R_LARCH_GOT64_PC_HI12:
107	case R_LARCH_TLS_IE64_PC_HI12:
108	case R_LARCH_TLS_DESC64_PC_HI12:
109	pcalau12i_pc = pc - `12`;
110	break;
111	default:
112	pcalau12i_pc = pc;
113	break;
114	}
115	uint64_t result = getLoongArchPage(p: dest) - getLoongArchPage(p: pcalau12i_pc);
116	if (dest & `0x800`)
117	result += `0x1000` - `0x1'0000'0000`;
118	if (result & `0x8000'0000`)
119	result += `0x1'0000'0000`;
120	return result;
121	}
122
123	static uint32_t hi20(uint32_t val) { return (val + `0x800`) >> `12`; }
124
125	static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
126	return op \| d \| (j << `5`) \| (k << `10`);
127	}
128
129	// Extract bits v[begin:end], where range is inclusive.
130	static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131	return begin == `63` ? v >> end : (v & ((`1ULL` << (begin + `1`)) - `1`)) >> end;
132	}
133
134	static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
135	uint32_t immLo = extractBits(v: imm, begin: `15`, end: `0`);
136	uint32_t immHi = extractBits(v: imm, begin: `20`, end: `16`);
137	return (insn & `0xfc0003e0`) \| (immLo << `10`) \| immHi;
138	}
139
140	static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
141	uint32_t immLo = extractBits(v: imm, begin: `15`, end: `0`);
142	uint32_t immHi = extractBits(v: imm, begin: `25`, end: `16`);
143	return (insn & `0xfc000000`) \| (immLo << `10`) \| immHi;
144	}
145
146	static uint32_t setJ20(uint32_t insn, uint32_t imm) {
147	return (insn & `0xfe00001f`) \| (extractBits(v: imm, begin: `19`, end: `0`) << `5`);
148	}
149
150	static uint32_t setK12(uint32_t insn, uint32_t imm) {
151	return (insn & `0xffc003ff`) \| (extractBits(v: imm, begin: `11`, end: `0`) << `10`);
152	}
153
154	static uint32_t setK16(uint32_t insn, uint32_t imm) {
155	return (insn & `0xfc0003ff`) \| (extractBits(v: imm, begin: `15`, end: `0`) << `10`);
156	}
157
158	static bool isJirl(uint32_t insn) {
159	return (insn & `0xfc000000`) == JIRL;
160	}
161
162	static void handleUleb128(uint8_t *loc, uint64_t val) {
163	const uint32_t maxcount = `1` + `64` / `7`;
164	uint32_t count;
165	const char error = nullptr*;
166	uint64_t orig = decodeULEB128(p: loc, n: &count, end: nullptr, error: &error);
167	if (count > maxcount \|\| (count == maxcount && error))
168	errorOrWarn(msg: getErrorLocation(loc) + "extra space for uleb128");
169	uint64_t mask = count < maxcount ? (`1ULL` << `7` * count) - `1` : -`1ULL`;
170	encodeULEB128(Value: (orig + val) & mask, p: loc, PadTo: count);
171	}
172
173	LoongArch::LoongArch() {
174	// The LoongArch ISA itself does not have a limit on page sizes. According to
175	// the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
176	// 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
177	// "unlimited".
178	// However, practically the maximum usable page size is constrained by the
179	// kernel implementation, and 64KiB is the biggest non-huge page size
180	// supported by Linux as of v6.4. The most widespread page size in use,
181	// though, is 16KiB.
182	defaultCommonPageSize = `16384`;
183	defaultMaxPageSize = `65536`;
184	write32le(P: trapInstr.data(), V: BREAK); // break 0
185
186	copyRel = R_LARCH_COPY;
187	pltRel = R_LARCH_JUMP_SLOT;
188	relativeRel = R_LARCH_RELATIVE;
189	iRelativeRel = R_LARCH_IRELATIVE;
190
191	if (config ->is64) {
192	symbolicRel = R_LARCH_64;
193	tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
194	tlsOffsetRel = R_LARCH_TLS_DTPREL64;
195	tlsGotRel = R_LARCH_TLS_TPREL64;
196	tlsDescRel = R_LARCH_TLS_DESC64;
197	} else {
198	symbolicRel = R_LARCH_32;
199	tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
200	tlsOffsetRel = R_LARCH_TLS_DTPREL32;
201	tlsGotRel = R_LARCH_TLS_TPREL32;
202	tlsDescRel = R_LARCH_TLS_DESC32;
203	}
204
205	gotRel = symbolicRel;
206
207	// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
208	gotPltHeaderEntriesNum = `2`;
209
210	pltHeaderSize = `32`;
211	pltEntrySize = `16`;
212	ipltEntrySize = `16`;
213	}
214
215	static uint32_t getEFlags(const InputFile *f) {
216	if (config ->is64)
217	return cast<ObjFile<ELF64LE>>(Val: f)->getObj().getHeader().e_flags;
218	return cast<ObjFile<ELF32LE>>(Val: f)->getObj().getHeader().e_flags;
219	}
220
221	static bool inputFileHasCode(const InputFile *f) {
222	for (const auto *sec : f->getSections())
223	if (sec && sec->flags & SHF_EXECINSTR)
224	return true;
225
226	return false;
227	}
228
229	uint32_t LoongArch::calcEFlags() const {
230	// If there are only binary input files (from -b binary), use a
231	// value of 0 for the ELF header flags.
232	if (ctx.objectFiles.empty())
233	return `0`;
234
235	uint32_t target = `0`;
236	const InputFile *targetFile;
237	for (const InputFile *f : ctx.objectFiles) {
238	// Do not enforce ABI compatibility if the input file does not contain code.
239	// This is useful for allowing linkage with data-only object files produced
240	// with tools like objcopy, that have zero e_flags.
241	if (!inputFileHasCode(f))
242	continue;
243
244	// Take the first non-zero e_flags as the reference.
245	uint32_t flags = getEFlags(f);
246	if (target == `0` && flags != `0`) {
247	target = flags;
248	targetFile = f;
249	}
250
251	if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
252	(target & EF_LOONGARCH_ABI_MODIFIER_MASK))
253	error(msg: toString(f) +
254	": cannot link object files with different ABI from " +
255	toString(f: targetFile));
256
257	// We cannot process psABI v1.x / object ABI v0 files (containing stack
258	// relocations), unlike ld.bfd.
259	//
260	// Instead of blindly accepting every v0 object and only failing at
261	// relocation processing time, just disallow interlink altogether. We
262	// don't expect significant usage of object ABI v0 in the wild (the old
263	// world may continue using object ABI v0 for a while, but as it's not
264	// binary-compatible with the upstream i.e. new-world ecosystem, it's not
265	// being considered here).
266	//
267	// There are briefly some new-world systems with object ABI v0 binaries too.
268	// It is because these systems were built before the new ABI was finalized.
269	// These are not supported either due to the extremely small number of them,
270	// and the few impacted users are advised to simply rebuild world or
271	// reinstall a recent system.
272	if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
273	error(msg: toString(f) + ": unsupported object file ABI version");
274	}
275
276	return target;
277	}
278
279	int64_t LoongArch::getImplicitAddend(const uint8_t buf, RelType type) const* {
280	switch (type) {
281	default:
282	internalLinkerError(loc: getErrorLocation(loc: buf),
283	msg: "cannot read addend for relocation " + toString(type));
284	return `0`;
285	case R_LARCH_32:
286	case R_LARCH_TLS_DTPMOD32:
287	case R_LARCH_TLS_DTPREL32:
288	case R_LARCH_TLS_TPREL32:
289	return SignExtend64<`32`>(x: read32le(P: buf));
290	case R_LARCH_64:
291	case R_LARCH_TLS_DTPMOD64:
292	case R_LARCH_TLS_DTPREL64:
293	case R_LARCH_TLS_TPREL64:
294	return read64le(P: buf);
295	case R_LARCH_RELATIVE:
296	case R_LARCH_IRELATIVE:
297	return config ->is64 ? read64le(P: buf) : read32le(P: buf);
298	case R_LARCH_NONE:
299	case R_LARCH_JUMP_SLOT:
300	// These relocations are defined as not having an implicit addend.
301	return `0`;
302	case R_LARCH_TLS_DESC32:
303	return read32le(P: buf + `4`);
304	case R_LARCH_TLS_DESC64:
305	return read64le(P: buf + `8`);
306	}
307	}
308
309	void LoongArch::writeGotPlt(uint8_t buf, const* Symbol &s) const {
310	if (config ->is64)
311	write64le(P: buf, V: in.plt ->getVA());
312	else
313	write32le(P: buf, V: in.plt ->getVA());
314	}
315
316	void LoongArch::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
317	if (config ->writeAddends) {
318	if (config ->is64)
319	write64le(P: buf, V: s.getVA());
320	else
321	write32le(P: buf, V: s.getVA());
322	}
323	}
324
325	void LoongArch::writePltHeader(uint8_t buf) const* {
326	// The LoongArch PLT is currently structured just like that of RISCV.
327	// Annoyingly, this means the PLT is still using `pcaddu12i` to perform
328	// PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
329	// in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
330	// is used everywhere else involving PC-relative operations in the LoongArch
331	// ELF psABI v2.00.
332	//
333	// The `pcrel_{hi20,lo12}` operators are illustrative only and not really
334	// supported by LoongArch assemblers.
335	//
336	// pcaddu12i $t2, %pcrel_hi20(.got.plt)
337	// sub.[wd] $t1, $t1, $t3
338	// ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
339	// addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
340	// addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
341	// srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
342	// ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
343	// jr $t3
344	uint32_t offset = in.gotPlt ->getVA() - in.plt ->getVA();
345	uint32_t sub = config ->is64 ? SUB_D : SUB_W;
346	uint32_t ld = config ->is64 ? LD_D : LD_W;
347	uint32_t addi = config ->is64 ? ADDI_D : ADDI_W;
348	uint32_t srli = config ->is64 ? SRLI_D : SRLI_W;
349	write32le(P: buf + `0`, V: insn(op: PCADDU12I, d: R_T2, j: hi20(val: offset), k: `0`));
350	write32le(P: buf + `4`, V: insn(op: sub, d: R_T1, j: R_T1, k: R_T3));
351	write32le(P: buf + `8`, V: insn(op: ld, d: R_T3, j: R_T2, k: lo12(val: offset)));
352	write32le(P: buf + `12`, V: insn(op: addi, d: R_T1, j: R_T1, k: lo12(val: -target->pltHeaderSize - `12`)));
353	write32le(P: buf + `16`, V: insn(op: addi, d: R_T0, j: R_T2, k: lo12(val: offset)));
354	write32le(P: buf + `20`, V: insn(op: srli, d: R_T1, j: R_T1, k: config ->is64 ? `1` : `2`));
355	write32le(P: buf + `24`, V: insn(op: ld, d: R_T0, j: R_T0, k: config ->wordsize));
356	write32le(P: buf + `28`, V: insn(op: JIRL, d: R_ZERO, j: R_T3, k: `0`));
357	}
358
359	void LoongArch::writePlt(uint8_t buf, const* Symbol &sym,
360	uint64_t pltEntryAddr) const {
361	// See the comment in writePltHeader for reason why pcaddu12i is used instead
362	// of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
363	//
364	// pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
365	// ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt)
366	// jirl $t1, $t3, 0
367	// nop
368	uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
369	write32le(P: buf + `0`, V: insn(op: PCADDU12I, d: R_T3, j: hi20(val: offset), k: `0`));
370	write32le(P: buf + `4`,
371	V: insn(op: config ->is64 ? LD_D : LD_W, d: R_T3, j: R_T3, k: lo12(val: offset)));
372	write32le(P: buf + `8`, V: insn(op: JIRL, d: R_T1, j: R_T3, k: `0`));
373	write32le(P: buf + `12`, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`));
374	}
375
376	RelType LoongArch::getDynRel(RelType type) const {
377	return type == target->symbolicRel ? type
378	: static_cast<RelType>(R_LARCH_NONE);
379	}
380
381	RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
382	const uint8_t loc) const* {
383	switch (type) {
384	case R_LARCH_NONE:
385	case R_LARCH_MARK_LA:
386	case R_LARCH_MARK_PCREL:
387	return R_NONE;
388	case R_LARCH_32:
389	case R_LARCH_64:
390	case R_LARCH_ABS_HI20:
391	case R_LARCH_ABS_LO12:
392	case R_LARCH_ABS64_LO20:
393	case R_LARCH_ABS64_HI12:
394	return R_ABS;
395	case R_LARCH_PCALA_LO12:
396	// We could just R_ABS, but the JIRL instruction reuses the relocation type
397	// for a different purpose. The questionable usage is part of glibc 2.37
398	// libc_nonshared.a [1], which is linked into user programs, so we have to
399	// work around it for a while, even if a new relocation type may be
400	// introduced in the future [2].
401	//
402	// [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
403	// [2]: https://github.com/loongson/la-abi-specs/pull/3
404	return isJirl(insn: read32le(P: loc)) ? R_PLT : R_ABS;
405	case R_LARCH_TLS_DTPREL32:
406	case R_LARCH_TLS_DTPREL64:
407	return R_DTPREL;
408	case R_LARCH_TLS_TPREL32:
409	case R_LARCH_TLS_TPREL64:
410	case R_LARCH_TLS_LE_HI20:
411	case R_LARCH_TLS_LE_HI20_R:
412	case R_LARCH_TLS_LE_LO12:
413	case R_LARCH_TLS_LE_LO12_R:
414	case R_LARCH_TLS_LE64_LO20:
415	case R_LARCH_TLS_LE64_HI12:
416	return R_TPREL;
417	case R_LARCH_ADD6:
418	case R_LARCH_ADD8:
419	case R_LARCH_ADD16:
420	case R_LARCH_ADD32:
421	case R_LARCH_ADD64:
422	case R_LARCH_ADD_ULEB128:
423	case R_LARCH_SUB6:
424	case R_LARCH_SUB8:
425	case R_LARCH_SUB16:
426	case R_LARCH_SUB32:
427	case R_LARCH_SUB64:
428	case R_LARCH_SUB_ULEB128:
429	// The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
430	// the RelExpr to avoid code duplication.
431	return R_RISCV_ADD;
432	case R_LARCH_32_PCREL:
433	case R_LARCH_64_PCREL:
434	case R_LARCH_PCREL20_S2:
435	return R_PC;
436	case R_LARCH_B16:
437	case R_LARCH_B21:
438	case R_LARCH_B26:
439	case R_LARCH_CALL36:
440	return R_PLT_PC;
441	case R_LARCH_GOT_PC_HI20:
442	case R_LARCH_GOT64_PC_LO20:
443	case R_LARCH_GOT64_PC_HI12:
444	case R_LARCH_TLS_IE_PC_HI20:
445	case R_LARCH_TLS_IE64_PC_LO20:
446	case R_LARCH_TLS_IE64_PC_HI12:
447	return R_LOONGARCH_GOT_PAGE_PC;
448	case R_LARCH_GOT_PC_LO12:
449	case R_LARCH_TLS_IE_PC_LO12:
450	return R_LOONGARCH_GOT;
451	case R_LARCH_TLS_LD_PC_HI20:
452	case R_LARCH_TLS_GD_PC_HI20:
453	return R_LOONGARCH_TLSGD_PAGE_PC;
454	case R_LARCH_PCALA_HI20:
455	// Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
456	// anyway so why waste time checking only to get everything relaxed back to
457	// it?
458	//
459	// This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
460	// both the HI20 and LO12 to potentially refer to the PLT. But in reality
461	// the HI20 reloc appears earlier, and the relocs don't contain enough
462	// information to let us properly resolve semantics per symbol.
463	// Unlike RISCV, our LO12 relocs do not* point to their corresponding HI20*
464	// relocs, hence it is nearly impossible to 100% accurately determine each
465	// HI20's "flavor" without taking big performance hits, in the presence of
466	// edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
467	// apart that relationship is not certain anymore), and programmer mistakes
468	// (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
469	//
470	// Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
471	// every HI20 reloc referring to the same symbol differently; this is not
472	// feasible with the current function signature of getRelExpr that doesn't
473	// allow for such inter-pass state.
474	//
475	// So, unfortunately we have to again workaround this quirk the same way as
476	// BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
477	// relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
478	// stage.
479	return R_LOONGARCH_PLT_PAGE_PC;
480	case R_LARCH_PCALA64_LO20:
481	case R_LARCH_PCALA64_HI12:
482	return R_LOONGARCH_PAGE_PC;
483	case R_LARCH_GOT_HI20:
484	case R_LARCH_GOT_LO12:
485	case R_LARCH_GOT64_LO20:
486	case R_LARCH_GOT64_HI12:
487	case R_LARCH_TLS_IE_HI20:
488	case R_LARCH_TLS_IE_LO12:
489	case R_LARCH_TLS_IE64_LO20:
490	case R_LARCH_TLS_IE64_HI12:
491	return R_GOT;
492	case R_LARCH_TLS_LD_HI20:
493	return R_TLSLD_GOT;
494	case R_LARCH_TLS_GD_HI20:
495	return R_TLSGD_GOT;
496	case R_LARCH_TLS_LE_ADD_R:
497	case R_LARCH_RELAX:
498	return config ->relax ? R_RELAX_HINT : R_NONE;
499	case R_LARCH_ALIGN:
500	return R_RELAX_HINT;
501	case R_LARCH_TLS_DESC_PC_HI20:
502	case R_LARCH_TLS_DESC64_PC_LO20:
503	case R_LARCH_TLS_DESC64_PC_HI12:
504	return R_LOONGARCH_TLSDESC_PAGE_PC;
505	case R_LARCH_TLS_DESC_PC_LO12:
506	case R_LARCH_TLS_DESC_LD:
507	case R_LARCH_TLS_DESC_HI20:
508	case R_LARCH_TLS_DESC_LO12:
509	case R_LARCH_TLS_DESC64_LO20:
510	case R_LARCH_TLS_DESC64_HI12:
511	return R_TLSDESC;
512	case R_LARCH_TLS_DESC_CALL:
513	return R_TLSDESC_CALL;
514	case R_LARCH_TLS_LD_PCREL20_S2:
515	return R_TLSLD_PC;
516	case R_LARCH_TLS_GD_PCREL20_S2:
517	return R_TLSGD_PC;
518	case R_LARCH_TLS_DESC_PCREL20_S2:
519	return R_TLSDESC_PC;
520
521	// Other known relocs that are explicitly unimplemented:
522	//
523	// - psABI v1 relocs that need a stateful stack machine to work, and not
524	// required when implementing psABI v2;
525	// - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
526	// two GNU vtable-related relocs).
527	//
528	// [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
529	default:
530	error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine (type) +
531	") against symbol " + toString(s));
532	return R_NONE;
533	}
534	}
535
536	bool LoongArch::usesOnlyLowPageBits(RelType type) const {
537	switch (type) {
538	default:
539	return false;
540	case R_LARCH_PCALA_LO12:
541	case R_LARCH_GOT_LO12:
542	case R_LARCH_GOT_PC_LO12:
543	case R_LARCH_TLS_IE_PC_LO12:
544	case R_LARCH_TLS_DESC_LO12:
545	case R_LARCH_TLS_DESC_PC_LO12:
546	return true;
547	}
548	}
549
550	void LoongArch::relocate(uint8_t loc, const* Relocation &rel,
551	uint64_t val) const {
552	switch (rel.type) {
553	case R_LARCH_32_PCREL:
554	checkInt(loc, v: val, n: `32`, rel);
555	[[fallthrough]];
556	case R_LARCH_32:
557	case R_LARCH_TLS_DTPREL32:
558	write32le(P: loc, V: val);
559	return;
560	case R_LARCH_64:
561	case R_LARCH_TLS_DTPREL64:
562	case R_LARCH_64_PCREL:
563	write64le(P: loc, V: val);
564	return;
565
566	// Relocs intended for `pcaddi`.
567	case R_LARCH_PCREL20_S2:
568	case R_LARCH_TLS_LD_PCREL20_S2:
569	case R_LARCH_TLS_GD_PCREL20_S2:
570	case R_LARCH_TLS_DESC_PCREL20_S2:
571	checkInt(loc, v: val, n: `22`, rel);
572	checkAlignment(loc, v: val, n: `4`, rel);
573	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: val >> `2`));
574	return;
575
576	case R_LARCH_B16:
577	checkInt(loc, v: val, n: `18`, rel);
578	checkAlignment(loc, v: val, n: `4`, rel);
579	write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> `2`));
580	return;
581
582	case R_LARCH_B21:
583	checkInt(loc, v: val, n: `23`, rel);
584	checkAlignment(loc, v: val, n: `4`, rel);
585	write32le(P: loc, V: setD5k16(insn: read32le(P: loc), imm: val >> `2`));
586	return;
587
588	case R_LARCH_B26:
589	checkInt(loc, v: val, n: `28`, rel);
590	checkAlignment(loc, v: val, n: `4`, rel);
591	write32le(P: loc, V: setD10k16(insn: read32le(P: loc), imm: val >> `2`));
592	return;
593
594	case R_LARCH_CALL36: {
595	// This relocation is designed for adjacent pcaddu18i+jirl pairs that
596	// are patched in one time. Because of sign extension of these insns'
597	// immediate fields, the relocation range is [-128G - 0x20000, +128G -
598	// 0x20000) (of course must be 4-byte aligned).
599	if (((int64_t)val + `0x20000`) != llvm::SignExtend64(X: val + `0x20000`, B: `38`))
600	reportRangeError(loc, rel, v: Twine (val), min: llvm::minIntN(N: `38`) - `0x20000`,
601	max: llvm::maxIntN(N: `38`) - `0x20000`);
602	checkAlignment(loc, v: val, n: `4`, rel);
603	// Since jirl performs sign extension on the offset immediate, adds (1<<17)
604	// to original val to get the correct hi20.
605	uint32_t hi20 = extractBits(v: val + (`1` << `17`), begin: `37`, end: `18`);
606	// Despite the name, the lower part is actually 18 bits with 4-byte aligned.
607	uint32_t lo16 = extractBits(v: val, begin: `17`, end: `2`);
608	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: hi20));
609	write32le(P: loc + `4`, V: setK16(insn: read32le(P: loc + `4`), imm: lo16));
610	return;
611	}
612
613	// Relocs intended for `addi`, `ld` or `st`.
614	case R_LARCH_PCALA_LO12:
615	// We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
616	// on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
617	// removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
618	// its immediate slot width is different too (16, not 12).
619	// In this case, process like an R_LARCH_B16, but without overflow checking
620	// and only taking the value's lowest 12 bits.
621	if (isJirl(insn: read32le(P: loc))) {
622	checkAlignment(loc, v: val, n: `4`, rel);
623	val = SignExtend64<`12`>(x: val);
624	write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> `2`));
625	return;
626	}
627	[[fallthrough]];
628	case R_LARCH_ABS_LO12:
629	case R_LARCH_GOT_PC_LO12:
630	case R_LARCH_GOT_LO12:
631	case R_LARCH_TLS_LE_LO12:
632	case R_LARCH_TLS_IE_PC_LO12:
633	case R_LARCH_TLS_IE_LO12:
634	case R_LARCH_TLS_LE_LO12_R:
635	case R_LARCH_TLS_DESC_PC_LO12:
636	case R_LARCH_TLS_DESC_LO12:
637	write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: `11`, end: `0`)));
638	return;
639
640	// Relocs intended for `lu12i.w` or `pcalau12i`.
641	case R_LARCH_ABS_HI20:
642	case R_LARCH_PCALA_HI20:
643	case R_LARCH_GOT_PC_HI20:
644	case R_LARCH_GOT_HI20:
645	case R_LARCH_TLS_LE_HI20:
646	case R_LARCH_TLS_IE_PC_HI20:
647	case R_LARCH_TLS_IE_HI20:
648	case R_LARCH_TLS_LD_PC_HI20:
649	case R_LARCH_TLS_LD_HI20:
650	case R_LARCH_TLS_GD_PC_HI20:
651	case R_LARCH_TLS_GD_HI20:
652	case R_LARCH_TLS_DESC_PC_HI20:
653	case R_LARCH_TLS_DESC_HI20:
654	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: `31`, end: `12`)));
655	return;
656	case R_LARCH_TLS_LE_HI20_R:
657	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val + `0x800`, begin: `31`, end: `12`)));
658	return;
659
660	// Relocs intended for `lu32i.d`.
661	case R_LARCH_ABS64_LO20:
662	case R_LARCH_PCALA64_LO20:
663	case R_LARCH_GOT64_PC_LO20:
664	case R_LARCH_GOT64_LO20:
665	case R_LARCH_TLS_LE64_LO20:
666	case R_LARCH_TLS_IE64_PC_LO20:
667	case R_LARCH_TLS_IE64_LO20:
668	case R_LARCH_TLS_DESC64_PC_LO20:
669	case R_LARCH_TLS_DESC64_LO20:
670	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: `51`, end: `32`)));
671	return;
672
673	// Relocs intended for `lu52i.d`.
674	case R_LARCH_ABS64_HI12:
675	case R_LARCH_PCALA64_HI12:
676	case R_LARCH_GOT64_PC_HI12:
677	case R_LARCH_GOT64_HI12:
678	case R_LARCH_TLS_LE64_HI12:
679	case R_LARCH_TLS_IE64_PC_HI12:
680	case R_LARCH_TLS_IE64_HI12:
681	case R_LARCH_TLS_DESC64_PC_HI12:
682	case R_LARCH_TLS_DESC64_HI12:
683	write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: `63`, end: `52`)));
684	return;
685
686	case R_LARCH_ADD6:
687	loc = (loc & `0xc0`) \| ((*loc + val) & `0x3f`);
688	return;
689	case R_LARCH_ADD8:
690	*loc += val;
691	return;
692	case R_LARCH_ADD16:
693	write16le(P: loc, V: read16le(P: loc) + val);
694	return;
695	case R_LARCH_ADD32:
696	write32le(P: loc, V: read32le(P: loc) + val);
697	return;
698	case R_LARCH_ADD64:
699	write64le(P: loc, V: read64le(P: loc) + val);
700	return;
701	case R_LARCH_ADD_ULEB128:
702	handleUleb128(loc, val);
703	return;
704	case R_LARCH_SUB6:
705	loc = (loc & `0xc0`) \| ((*loc - val) & `0x3f`);
706	return;
707	case R_LARCH_SUB8:
708	*loc -= val;
709	return;
710	case R_LARCH_SUB16:
711	write16le(P: loc, V: read16le(P: loc) - val);
712	return;
713	case R_LARCH_SUB32:
714	write32le(P: loc, V: read32le(P: loc) - val);
715	return;
716	case R_LARCH_SUB64:
717	write64le(P: loc, V: read64le(P: loc) - val);
718	return;
719	case R_LARCH_SUB_ULEB128:
720	handleUleb128(loc, val: -val);
721	return;
722
723	case R_LARCH_MARK_LA:
724	case R_LARCH_MARK_PCREL:
725	// no-op
726	return;
727
728	case R_LARCH_TLS_LE_ADD_R:
729	case R_LARCH_RELAX:
730	return; // Ignored (for now)
731
732	case R_LARCH_TLS_DESC_LD:
733	return; // nothing to do.
734	case R_LARCH_TLS_DESC32:
735	write32le(P: loc + `4`, V: val);
736	return;
737	case R_LARCH_TLS_DESC64:
738	write64le(P: loc + `8`, V: val);
739	return;
740
741	default:
742	llvm_unreachable("unknown relocation");
743	}
744	}
745
746	static bool relax(InputSection &sec) {
747	const uint64_t secAddr = sec.getVA();
748	const MutableArrayRef<Relocation> relocs = sec.relocs();
749	auto &aux = *sec.relaxAux;
750	bool changed = false;
751	ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
752	uint64_t delta = `0`;
753
754	std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);
755	aux.writes.clear();
756	for (auto [i, r] : llvm::enumerate(First: relocs)) {
757	const uint64_t loc = secAddr + r.offset - delta;
758	uint32_t &cur = aux.relocDeltas [i], remove = `0`;
759	switch (r.type) {
760	case R_LARCH_ALIGN: {
761	const uint64_t addend =
762	r.sym->isUndefined() ? Log2_64(Value: r.addend) + `1` : r.addend;
763	const uint64_t allBytes = (`1ULL` << (addend & `0xff`)) - `4`;
764	const uint64_t align = `1ULL` << (addend & `0xff`);
765	const uint64_t maxBytes = addend >> `8`;
766	const uint64_t off = loc & (align - `1`);
767	const uint64_t curBytes = off == `0` ? `0` : align - off;
768	// All bytes beyond the alignment boundary should be removed.
769	// If emit bytes more than max bytes to emit, remove all.
770	if (maxBytes != `0` && curBytes > maxBytes)
771	remove = allBytes;
772	else
773	remove = allBytes - curBytes;
774	// If we can't satisfy this alignment, we've found a bad input.
775	if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < `0`)) {
776	errorOrWarn(msg: getErrorLocation(loc: (const uint8_t *)loc) +
777	"insufficient padding bytes for " + lld::toString(type: r.type) +
778	": " + Twine (allBytes) + " bytes available for " +
779	"requested alignment of " + Twine (align) + " bytes");
780	remove = `0`;
781	}
782	break;
783	}
784	}
785
786	// For all anchors whose offsets are <= r.offset, they are preceded by
787	// the previous relocation whose `relocDeltas` value equals `delta`.
788	// Decrease their st_value and update their st_size.
789	for (; sa.size() && sa [`0`].offset <= r.offset; sa = sa.slice(N: `1`)) {
790	if (sa [`0`].end)
791	sa [`0`].d->size = sa [`0`].offset - delta - sa [`0`].d->value;
792	else
793	sa [`0`].d->value = sa [`0`].offset - delta;
794	}
795	delta += remove;
796	if (delta != cur) {
797	cur = delta;
798	changed = true;
799	}
800	}
801
802	for (const SymbolAnchor &a : sa) {
803	if (a.end)
804	a.d->size = a.offset - delta - a.d->value;
805	else
806	a.d->value = a.offset - delta;
807	}
808	// Inform assignAddresses that the size has changed.
809	if (!isUInt<`32`>(x: delta))
810	fatal(msg: "section size decrease is too large: " + Twine (delta));
811	sec.bytesDropped = delta;
812	return changed;
813	}
814
815	// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
816	// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
817	// shrinkage may reduce displacement and make more relocations eligible for
818	// relaxation. Code shrinkage may increase displacement to a call/load/store
819	// target at a higher fixed address, invalidating an earlier relaxation. Any
820	// change in section sizes can have cascading effect and require another
821	// relaxation pass.
822	bool LoongArch::relaxOnce(int pass) const {
823	if (config ->relocatable)
824	return false;
825
826	if (pass == `0`)
827	initSymbolAnchors();
828
829	SmallVector<InputSection *, `0`> storage;
830	bool changed = false;
831	for (OutputSection *osec : outputSections) {
832	if (!(osec->flags & SHF_EXECINSTR))
833	continue;
834	for (InputSection sec : getInputSections(os: osec, storage))
835	changed \|= relax(sec&: *sec);
836	}
837	return changed;
838	}
839
840	void LoongArch::finalizeRelax(int passes) const {
841	log(msg: "relaxation passes: " + Twine (passes));
842	SmallVector<InputSection *, `0`> storage;
843	for (OutputSection *osec : outputSections) {
844	if (!(osec->flags & SHF_EXECINSTR))
845	continue;
846	for (InputSection sec : getInputSections(os: osec, storage)) {
847	RelaxAux &aux = *sec->relaxAux;
848	if (!aux.relocDeltas)
849	continue;
850
851	MutableArrayRef<Relocation> rels = sec->relocs();
852	ArrayRef<uint8_t> old = sec->content();
853	size_t newSize = old.size() - aux.relocDeltas [rels.size() - `1`];
854	uint8_t *p = context().bAlloc.Allocate<uint8_t>(Num: newSize);
855	uint64_t offset = `0`;
856	int64_t delta = `0`;
857	sec->content_ = p;
858	sec->size = newSize;
859	sec->bytesDropped = `0`;
860
861	// Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
862	// instructions for relaxed relocations.
863	for (size_t i = `0`, e = rels.size(); i != e; ++i) {
864	uint32_t remove = aux.relocDeltas [i] - delta;
865	delta = aux.relocDeltas [i];
866	if (remove == `0` && aux.relocTypes [i] == R_LARCH_NONE)
867	continue;
868
869	// Copy from last location to the current relocated location.
870	const Relocation &r = rels [i];
871	uint64_t size = r.offset - offset;
872	memcpy(dest: p, src: old.data() + offset, n: size);
873	p += size;
874	offset = r.offset + remove;
875	}
876	memcpy(dest: p, src: old.data() + offset, n: old.size() - offset);
877
878	// Subtract the previous relocDeltas value from the relocation offset.
879	// For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
880	// their r_offset by the same delta.
881	delta = `0`;
882	for (size_t i = `0`, e = rels.size(); i != e;) {
883	uint64_t cur = rels [i].offset;
884	do {
885	rels [i].offset -= delta;
886	if (aux.relocTypes [i] != R_LARCH_NONE)
887	rels [i].type = aux.relocTypes [i];
888	} while (++i != e && rels [i].offset == cur);
889	delta = aux.relocDeltas [i - `1`];
890	}
891	}
892	}
893	}
894
895	TargetInfo *elf::getLoongArchTargetInfo() {
896	static LoongArch target;
897	return &target;
898	}
899

Browse the source code of llvm_projects/lld/ELF/Arch/LoongArch.cpp