LoongArch.cpp source code [llvm_projects/lld/ELF/Arch/LoongArch.cpp]

1	//===- LoongArch.cpp ------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "RelocScan.h"
12	#include "Symbols.h"
13	#include "SyntheticSections.h"
14	#include "Target.h"
15	#include "llvm/BinaryFormat/ELF.h"
16	#include "llvm/Support/LEB128.h"
17
18	using namespace llvm;
19	using namespace llvm::object;
20	using namespace llvm::support::endian;
21	using namespace llvm::ELF;
22	using namespace lld;
23	using namespace lld::elf;
24
25	namespace {
26	class LoongArch final : public TargetInfo {
27	public:
28	LoongArch(Ctx &);
29	uint32_t calcEFlags() const override;
30	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
31	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
32	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
33	void writePltHeader(uint8_t buf) const* override;
34	void writePlt(uint8_t buf, const* Symbol &sym,
35	uint64_t pltEntryAddr) const override;
36	RelType getDynRel(RelType type) const override;
37	RelExpr getRelExpr(RelType type, const Symbol &s,
38	const uint8_t loc) const* override;
39	bool usesOnlyLowPageBits(RelType type) const override;
40	template <class ELFT, class RelTy>
41	void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
42	void scanSection(InputSectionBase &sec) override {
43	if (ctx.arg.is64)
44	elf::scanSection1<LoongArch, ELF64LE>(target&: *this, sec);
45	else
46	elf::scanSection1<LoongArch, ELF32LE>(target&: *this, sec);
47	}
48	void relocate(uint8_t loc, const* Relocation &rel,
49	uint64_t val) const override;
50	bool relaxOnce(int pass) const override;
51	bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
52	void relocateAlloc(InputSection &sec, uint8_t buf) const* override;
53	void finalizeRelax(int passes) const override;
54
55	private:
56	void tlsdescToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
57	void tlsdescToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
58	bool tryGotToPCRel(uint8_t loc, const* Relocation &rHi20,
59	const Relocation &rLo12, uint64_t secAddr) const;
60	template <class ELFT, class RelTy>
61	bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
62	Relocs<RelTy> rels);
63	template <class ELFT, class RelTy>
64	void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
65	Relocs<RelTy> rels);
66	template <class ELFT>
67	bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
68
69	// The following two variables are used by synthesized ALIGN relocations.
70	InputSection baseSec = nullptr*;
71	// r_offset and r_addend pairs.
72	SmallVector<std::pair<uint64_t, uint64_t>, `0`> synthesizedAligns;
73	};
74	} // end anonymous namespace
75
76	namespace {
77	enum Op {
78	SUB_W = `0x00110000`,
79	SUB_D = `0x00118000`,
80	BREAK = `0x002a0000`,
81	SRLI_W = `0x00448000`,
82	SRLI_D = `0x00450000`,
83	ADDI_W = `0x02800000`,
84	ADDI_D = `0x02c00000`,
85	ANDI = `0x03400000`,
86	ORI = `0x03800000`,
87	LU12I_W = `0x14000000`,
88	PCADDI = `0x18000000`,
89	PCADDU12I = `0x1c000000`,
90	PCALAU12I = `0x1a000000`,
91	LD_W = `0x28800000`,
92	LD_D = `0x28c00000`,
93	JIRL = `0x4c000000`,
94	B = `0x50000000`,
95	BL = `0x54000000`,
96	};
97
98	enum Reg {
99	R_ZERO = `0`,
100	R_RA = `1`,
101	R_TP = `2`,
102	R_A0 = `4`,
103	R_T0 = `12`,
104	R_T1 = `13`,
105	R_T2 = `14`,
106	R_T3 = `15`,
107	};
108	} // namespace
109
110	// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
111	// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.` where the `pcalau12i`*
112	// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
113	// "page") for the next instruction to add in the "page offset". (`pcalau12i`
114	// stands for something like "PC ALigned Add Upper that starts from the 12th
115	// bit, Immediate".)
116	//
117	// Here a "page" is in fact just another way to refer to the 12-bit range
118	// allowed by the immediate field of the addi/ld/st instructions, and not
119	// related to the system or the kernel's actual page size. The semantics happen
120	// to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
121	static uint64_t getLoongArchPage(uint64_t p) {
122	return p & ~static_cast<uint64_t>(`0xfff`);
123	}
124
125	static uint32_t lo12(uint32_t val) { return val & `0xfff`; }
126
127	// Calculate the adjusted page delta between dest and PC.
128	uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {
129	// Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
130	// + lu52i.d`, they must be adjacent so that we can infer the PC of
131	// `pcalau12i` when calculating the page delta for the other two instructions
132	// (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
133	// complicated. Just use psABI recommended algorithm.
134	uint64_t pcalau12i_pc;
135	switch (type) {
136	case R_LARCH_PCALA64_LO20:
137	case R_LARCH_GOT64_PC_LO20:
138	case R_LARCH_TLS_IE64_PC_LO20:
139	case R_LARCH_TLS_DESC64_PC_LO20:
140	pcalau12i_pc = pc - `8`;
141	break;
142	case R_LARCH_PCALA64_HI12:
143	case R_LARCH_GOT64_PC_HI12:
144	case R_LARCH_TLS_IE64_PC_HI12:
145	case R_LARCH_TLS_DESC64_PC_HI12:
146	pcalau12i_pc = pc - `12`;
147	break;
148	default:
149	pcalau12i_pc = pc;
150	break;
151	}
152	uint64_t result = getLoongArchPage(p: dest) - getLoongArchPage(p: pcalau12i_pc);
153	if (dest & `0x800`)
154	result += `0x1000` - `0x1'0000'0000`;
155	if (result & `0x8000'0000`)
156	result += `0x1'0000'0000`;
157	return result;
158	}
159
160	static uint32_t hi20(uint32_t val) { return (val + `0x800`) >> `12`; }
161
162	static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
163	return op \| d \| (j << `5`) \| (k << `10`);
164	}
165
166	// Extract bits v[begin:end], where range is inclusive.
167	static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
168	return begin == `63` ? v >> end : (v & ((`1ULL` << (begin + `1`)) - `1`)) >> end;
169	}
170
171	static uint32_t getD5(uint64_t v) { return extractBits(v, begin: `4`, end: `0`); }
172
173	static uint32_t getJ5(uint64_t v) { return extractBits(v, begin: `9`, end: `5`); }
174
175	static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
176	uint32_t immLo = extractBits(v: imm, begin: `15`, end: `0`);
177	uint32_t immHi = extractBits(v: imm, begin: `20`, end: `16`);
178	return (insn & `0xfc0003e0`) \| (immLo << `10`) \| immHi;
179	}
180
181	static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
182	uint32_t immLo = extractBits(v: imm, begin: `15`, end: `0`);
183	uint32_t immHi = extractBits(v: imm, begin: `25`, end: `16`);
184	return (insn & `0xfc000000`) \| (immLo << `10`) \| immHi;
185	}
186
187	static uint32_t setJ20(uint32_t insn, uint32_t imm) {
188	return (insn & `0xfe00001f`) \| (extractBits(v: imm, begin: `19`, end: `0`) << `5`);
189	}
190
191	static uint32_t setJ5(uint32_t insn, uint32_t imm) {
192	return (insn & `0xfffffc1f`) \| (extractBits(v: imm, begin: `4`, end: `0`) << `5`);
193	}
194
195	static uint32_t setK12(uint32_t insn, uint32_t imm) {
196	return (insn & `0xffc003ff`) \| (extractBits(v: imm, begin: `11`, end: `0`) << `10`);
197	}
198
199	static uint32_t setK16(uint32_t insn, uint32_t imm) {
200	return (insn & `0xfc0003ff`) \| (extractBits(v: imm, begin: `15`, end: `0`) << `10`);
201	}
202
203	static bool isJirl(uint32_t insn) {
204	return (insn & `0xfc000000`) == JIRL;
205	}
206
207	static void handleUleb128(Ctx &ctx, uint8_t *loc, uint64_t val) {
208	const uint32_t maxcount = `1` + `64` / `7`;
209	uint32_t count;
210	const char error = nullptr*;
211	uint64_t orig = decodeULEB128(p: loc, n: &count, end: nullptr, error: &error);
212	if (count > maxcount \|\| (count == maxcount && error))
213	Err(ctx) << getErrorLoc(ctx, loc) << "extra space for uleb128";
214	uint64_t mask = count < maxcount ? (`1ULL` << `7` * count) - `1` : -`1ULL`;
215	encodeULEB128(Value: (orig + val) & mask, p: loc, PadTo: count);
216	}
217
218	LoongArch::LoongArch(Ctx &ctx) : TargetInfo (ctx) {
219	// The LoongArch ISA itself does not have a limit on page sizes. According to
220	// the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
221	// 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
222	// "unlimited".
223	// However, practically the maximum usable page size is constrained by the
224	// kernel implementation, and 64KiB is the biggest non-huge page size
225	// supported by Linux as of v6.4. The most widespread page size in use,
226	// though, is 16KiB.
227	defaultCommonPageSize = `16384`;
228	defaultMaxPageSize = `65536`;
229	write32le(P: trapInstr.data(), V: BREAK); // break 0
230
231	copyRel = R_LARCH_COPY;
232	pltRel = R_LARCH_JUMP_SLOT;
233	relativeRel = R_LARCH_RELATIVE;
234	iRelativeRel = R_LARCH_IRELATIVE;
235
236	if (ctx.arg.is64) {
237	symbolicRel = R_LARCH_64;
238	tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
239	tlsOffsetRel = R_LARCH_TLS_DTPREL64;
240	tlsGotRel = R_LARCH_TLS_TPREL64;
241	tlsDescRel = R_LARCH_TLS_DESC64;
242	} else {
243	symbolicRel = R_LARCH_32;
244	tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
245	tlsOffsetRel = R_LARCH_TLS_DTPREL32;
246	tlsGotRel = R_LARCH_TLS_TPREL32;
247	tlsDescRel = R_LARCH_TLS_DESC32;
248	}
249
250	gotRel = symbolicRel;
251
252	// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
253	gotPltHeaderEntriesNum = `2`;
254
255	pltHeaderSize = `32`;
256	pltEntrySize = `16`;
257	ipltEntrySize = `16`;
258	}
259
260	static uint32_t getEFlags(Ctx &ctx, const InputFile *f) {
261	if (ctx.arg.is64)
262	return cast<ObjFile<ELF64LE>>(Val: f)->getObj().getHeader().e_flags;
263	return cast<ObjFile<ELF32LE>>(Val: f)->getObj().getHeader().e_flags;
264	}
265
266	static bool inputFileHasCode(const InputFile *f) {
267	for (const auto *sec : f->getSections())
268	if (sec && sec->flags & SHF_EXECINSTR)
269	return true;
270
271	return false;
272	}
273
274	uint32_t LoongArch::calcEFlags() const {
275	// If there are only binary input files (from -b binary), use a
276	// value of 0 for the ELF header flags.
277	if (ctx.objectFiles.empty())
278	return `0`;
279
280	uint32_t target = `0`;
281	const InputFile *targetFile;
282	for (const InputFile *f : ctx.objectFiles) {
283	// Do not enforce ABI compatibility if the input file does not contain code.
284	// This is useful for allowing linkage with data-only object files produced
285	// with tools like objcopy, that have zero e_flags.
286	if (!inputFileHasCode(f))
287	continue;
288
289	// Take the first non-zero e_flags as the reference.
290	uint32_t flags = getEFlags(ctx, f);
291	if (target == `0` && flags != `0`) {
292	target = flags;
293	targetFile = f;
294	}
295
296	if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
297	(target & EF_LOONGARCH_ABI_MODIFIER_MASK))
298	ErrAlways(ctx) << f
299	<< ": cannot link object files with different ABI from "
300	<< targetFile;
301
302	// We cannot process psABI v1.x / object ABI v0 files (containing stack
303	// relocations), unlike ld.bfd.
304	//
305	// Instead of blindly accepting every v0 object and only failing at
306	// relocation processing time, just disallow interlink altogether. We
307	// don't expect significant usage of object ABI v0 in the wild (the old
308	// world may continue using object ABI v0 for a while, but as it's not
309	// binary-compatible with the upstream i.e. new-world ecosystem, it's not
310	// being considered here).
311	//
312	// There are briefly some new-world systems with object ABI v0 binaries too.
313	// It is because these systems were built before the new ABI was finalized.
314	// These are not supported either due to the extremely small number of them,
315	// and the few impacted users are advised to simply rebuild world or
316	// reinstall a recent system.
317	if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
318	ErrAlways(ctx) << f << ": unsupported object file ABI version";
319	}
320
321	return target;
322	}
323
324	int64_t LoongArch::getImplicitAddend(const uint8_t buf, RelType type) const* {
325	switch (type) {
326	default:
327	InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
328	return `0`;
329	case R_LARCH_32:
330	case R_LARCH_TLS_DTPMOD32:
331	case R_LARCH_TLS_DTPREL32:
332	case R_LARCH_TLS_TPREL32:
333	return SignExtend64<`32`>(x: read32le(P: buf));
334	case R_LARCH_64:
335	case R_LARCH_TLS_DTPMOD64:
336	case R_LARCH_TLS_DTPREL64:
337	case R_LARCH_TLS_TPREL64:
338	return read64le(P: buf);
339	case R_LARCH_RELATIVE:
340	case R_LARCH_IRELATIVE:
341	return ctx.arg.is64 ? read64le(P: buf) : read32le(P: buf);
342	case R_LARCH_NONE:
343	case R_LARCH_JUMP_SLOT:
344	// These relocations are defined as not having an implicit addend.
345	return `0`;
346	case R_LARCH_TLS_DESC32:
347	return read32le(P: buf + `4`);
348	case R_LARCH_TLS_DESC64:
349	return read64le(P: buf + `8`);
350	}
351	}
352
353	void LoongArch::writeGotPlt(uint8_t buf, const* Symbol &s) const {
354	if (ctx.arg.is64)
355	write64le(P: buf, V: ctx.in.plt ->getVA());
356	else
357	write32le(P: buf, V: ctx.in.plt ->getVA());
358	}
359
360	void LoongArch::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
361	if (ctx.arg.writeAddends) {
362	if (ctx.arg.is64)
363	write64le(P: buf, V: s.getVA(ctx));
364	else
365	write32le(P: buf, V: s.getVA(ctx));
366	}
367	}
368
369	void LoongArch::writePltHeader(uint8_t buf) const* {
370	// The LoongArch PLT is currently structured just like that of RISCV.
371	// Annoyingly, this means the PLT is still using `pcaddu12i` to perform
372	// PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
373	// in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
374	// is used everywhere else involving PC-relative operations in the LoongArch
375	// ELF psABI v2.00.
376	//
377	// The `pcrel_{hi20,lo12}` operators are illustrative only and not really
378	// supported by LoongArch assemblers.
379	//
380	// pcaddu12i $t2, %pcrel_hi20(.got.plt)
381	// sub.[wd] $t1, $t1, $t3
382	// ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
383	// addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
384	// addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
385	// srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
386	// ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
387	// jr $t3
388	uint32_t offset = ctx.in.gotPlt ->getVA() - ctx.in.plt ->getVA();
389	uint32_t sub = ctx.arg.is64 ? SUB_D : SUB_W;
390	uint32_t ld = ctx.arg.is64 ? LD_D : LD_W;
391	uint32_t addi = ctx.arg.is64 ? ADDI_D : ADDI_W;
392	uint32_t srli = ctx.arg.is64 ? SRLI_D : SRLI_W;
393	write32le(P: buf + `0`, V: insn(op: PCADDU12I, d: R_T2, j: hi20(val: offset), k: `0`));
394	write32le(P: buf + `4`, V: insn(op: sub, d: R_T1, j: R_T1, k: R_T3));
395	write32le(P: buf + `8`, V: insn(op: ld, d: R_T3, j: R_T2, k: lo12(val: offset)));
396	write32le(P: buf + `12`,
397	V: insn(op: addi, d: R_T1, j: R_T1, k: lo12(val: -ctx.target ->pltHeaderSize - `12`)));
398	write32le(P: buf + `16`, V: insn(op: addi, d: R_T0, j: R_T2, k: lo12(val: offset)));
399	write32le(P: buf + `20`, V: insn(op: srli, d: R_T1, j: R_T1, k: ctx.arg.is64 ? `1` : `2`));
400	write32le(P: buf + `24`, V: insn(op: ld, d: R_T0, j: R_T0, k: ctx.arg.wordsize));
401	write32le(P: buf + `28`, V: insn(op: JIRL, d: R_ZERO, j: R_T3, k: `0`));
402	}
403
404	void LoongArch::writePlt(uint8_t buf, const* Symbol &sym,
405	uint64_t pltEntryAddr) const {
406	// See the comment in writePltHeader for reason why pcaddu12i is used instead
407	// of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
408	//
409	// pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
410	// ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt)
411	// jirl $t1, $t3, 0
412	// nop
413	uint32_t offset = sym.getGotPltVA(ctx) - pltEntryAddr;
414	write32le(P: buf + `0`, V: insn(op: PCADDU12I, d: R_T3, j: hi20(val: offset), k: `0`));
415	write32le(P: buf + `4`,
416	V: insn(op: ctx.arg.is64 ? LD_D : LD_W, d: R_T3, j: R_T3, k: lo12(val: offset)));
417	write32le(P: buf + `8`, V: insn(op: JIRL, d: R_T1, j: R_T3, k: `0`));
418	write32le(P: buf + `12`, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`));
419	}
420
421	RelType LoongArch::getDynRel(RelType type) const {
422	return type == ctx.target ->symbolicRel ? type
423	: static_cast<RelType>(R_LARCH_NONE);
424	}
425
426	// Used by relocateNonAlloc(), scanEhSection(), and the extreme code model
427	// fallback in relocateAlloc(). For alloc sections, scanSectionImpl() is the
428	// primary relocation classifier.
429	RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
430	const uint8_t loc) const* {
431	switch (type) {
432	case R_LARCH_NONE:
433	return R_NONE;
434	case R_LARCH_32:
435	case R_LARCH_64:
436	return R_ABS;
437	case R_LARCH_ADD6:
438	case R_LARCH_ADD8:
439	case R_LARCH_ADD16:
440	case R_LARCH_ADD32:
441	case R_LARCH_ADD64:
442	case R_LARCH_ADD_ULEB128:
443	case R_LARCH_SUB6:
444	case R_LARCH_SUB8:
445	case R_LARCH_SUB16:
446	case R_LARCH_SUB32:
447	case R_LARCH_SUB64:
448	case R_LARCH_SUB_ULEB128:
449	// The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
450	// the RelExpr to avoid code duplication.
451	return RE_RISCV_ADD;
452	case R_LARCH_32_PCREL:
453	case R_LARCH_64_PCREL:
454	case R_LARCH_PCREL20_S2:
455	case R_LARCH_PCADD_HI20:
456	return R_PC;
457	default:
458	Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
459	<< ") against symbol " << &s;
460	return R_NONE;
461	}
462	}
463
464	bool LoongArch::usesOnlyLowPageBits(RelType type) const {
465	switch (type) {
466	default:
467	return false;
468	case R_LARCH_PCALA_LO12:
469	case R_LARCH_GOT_LO12:
470	case R_LARCH_GOT_PC_LO12:
471	case R_LARCH_TLS_IE_PC_LO12:
472	case R_LARCH_TLS_DESC_LO12:
473	case R_LARCH_TLS_DESC_PC_LO12:
474	return true;
475	}
476	}
477
478	template <class ELFT, class RelTy>
479	void LoongArch::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
480	RelocScan rs(ctx, &sec);
481	sec.relocations.reserve(N: rels.size());
482	for (auto it = rels.begin(); it != rels.end(); ++it) {
483	RelType type = it->getType(false);
484	uint32_t symIndex = it->getSymbol(false);
485	Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIndex);
486	uint64_t offset = it->r_offset;
487	if (sym.isUndefined() && symIndex != `0` &&
488	rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
489	continue;
490	int64_t addend = rs.getAddend<ELFT>(*it, type);
491	RelExpr expr;
492	// Relocation types that only need a RelExpr set `expr` and break out of
493	// the switch to reach rs.process(). Types that need special handling
494	// (fast-path helpers, TLS) call a handler and use `continue`.
495	switch (type) {
496	case R_LARCH_NONE:
497	case R_LARCH_MARK_LA:
498	case R_LARCH_MARK_PCREL:
499	continue;
500
501	// Absolute relocations:
502	case R_LARCH_32:
503	case R_LARCH_64:
504	case R_LARCH_ABS_HI20:
505	case R_LARCH_ABS_LO12:
506	case R_LARCH_ABS64_LO20:
507	case R_LARCH_ABS64_HI12:
508	expr = R_ABS;
509	break;
510
511	case R_LARCH_PCALA_LO12:
512	// R_LARCH_PCALA_LO12 on JIRL is used for function calls (glibc 2.37).
513	expr = isJirl(insn: read32le(P: sec.content().data() + offset)) ? R_PLT : R_ABS;
514	break;
515
516	// PC-indirect relocations (lo12 paired with a preceding hi20 pcadd):
517	case R_LARCH_PCADD_LO12:
518	case R_LARCH_GOT_PCADD_LO12:
519	case R_LARCH_TLS_IE_PCADD_LO12:
520	case R_LARCH_TLS_LD_PCADD_LO12:
521	case R_LARCH_TLS_GD_PCADD_LO12:
522	case R_LARCH_TLS_DESC_PCADD_LO12:
523	expr = RE_LOONGARCH_PC_INDIRECT;
524	break;
525
526	// PC-relative relocations:
527	case R_LARCH_32_PCREL:
528	case R_LARCH_64_PCREL:
529	case R_LARCH_PCREL20_S2:
530	case R_LARCH_PCADD_HI20:
531	rs.processR_PC(type, offset, addend, sym);
532	continue;
533
534	// PLT-generating relocations:
535	case R_LARCH_B16:
536	case R_LARCH_B21:
537	case R_LARCH_B26:
538	case R_LARCH_CALL30:
539	case R_LARCH_CALL36:
540	rs.processR_PLT_PC(type, offset, addend, sym);
541	continue;
542
543	// Page-PC relocations:
544	case R_LARCH_PCALA_HI20:
545	// Why not RE_LOONGARCH_PAGE_PC, majority of references don't go through
546	// PLT anyway so why waste time checking only to get everything relaxed
547	// back to it?
548	//
549	// This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
550	// both the HI20 and LO12 to potentially refer to the PLT. But in reality
551	// the HI20 reloc appears earlier, and the relocs don't contain enough
552	// information to let us properly resolve semantics per symbol.
553	// Unlike RISCV, our LO12 relocs do not* point to their corresponding*
554	// HI20 relocs, hence it is nearly impossible to 100% accurately determine
555	// each HI20's "flavor" without taking big performance hits, in the
556	// presence of edge cases (e.g. HI20 without pairing LO12; paired LO12
557	// placed so far apart that relationship is not certain anymore), and
558	// programmer mistakes (e.g. as outlined in
559	// https://github.com/loongson/la-abi-specs/pull/3).
560	//
561	// Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
562	// every HI20 reloc referring to the same symbol differently; this is not
563	// feasible with the current function signature of getRelExpr that doesn't
564	// allow for such inter-pass state.
565	//
566	// So, unfortunately we have to again workaround this quirk the same way
567	// as BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing,
568	// only relaxing back to RE_LOONGARCH_PAGE_PC if it's known not so at a
569	// later stage.
570	expr = RE_LOONGARCH_PLT_PAGE_PC;
571	break;
572	case R_LARCH_PCALA64_LO20:
573	case R_LARCH_PCALA64_HI12:
574	expr = RE_LOONGARCH_PAGE_PC;
575	break;
576
577	// GOT-generating relocations:
578	case R_LARCH_GOT_PC_HI20:
579	case R_LARCH_GOT64_PC_LO20:
580	case R_LARCH_GOT64_PC_HI12:
581	expr = RE_LOONGARCH_GOT_PAGE_PC;
582	break;
583	case R_LARCH_GOT_PCADD_HI20:
584	expr = R_GOT_PC;
585	break;
586	case R_LARCH_GOT_PC_LO12:
587	expr = RE_LOONGARCH_GOT;
588	break;
589	case R_LARCH_GOT_HI20:
590	case R_LARCH_GOT_LO12:
591	case R_LARCH_GOT64_LO20:
592	case R_LARCH_GOT64_HI12:
593	expr = R_GOT;
594	break;
595
596	// DTPREL relocations:
597	case R_LARCH_TLS_DTPREL32:
598	case R_LARCH_TLS_DTPREL64:
599	expr = R_DTPREL;
600	break;
601
602	// TLS LE relocations:
603	case R_LARCH_TLS_TPREL32:
604	case R_LARCH_TLS_TPREL64:
605	case R_LARCH_TLS_LE_HI20:
606	case R_LARCH_TLS_LE_HI20_R:
607	case R_LARCH_TLS_LE_LO12:
608	case R_LARCH_TLS_LE_LO12_R:
609	case R_LARCH_TLS_LE64_LO20:
610	case R_LARCH_TLS_LE64_HI12:
611	if (rs.checkTlsLe(offset, sym, type))
612	continue;
613	expr = R_TPREL;
614	break;
615	// TLS IE relocations (optimizable to LE in non-extreme code model):
616	case R_LARCH_TLS_IE_PC_HI20:
617	rs.handleTlsIe(ieExpr: RE_LOONGARCH_GOT_PAGE_PC, type, offset, addend, sym);
618	continue;
619	case R_LARCH_TLS_IE_PC_LO12:
620	rs.handleTlsIe(ieExpr: RE_LOONGARCH_GOT, type, offset, addend, sym);
621	continue;
622	// TLS IE relocations (extreme code model, no IE->LE optimization):
623	case R_LARCH_TLS_IE64_PC_LO20:
624	case R_LARCH_TLS_IE64_PC_HI12:
625	rs.handleTlsIe<false>(ieExpr: RE_LOONGARCH_GOT_PAGE_PC, type, offset, addend,
626	sym);
627	continue;
628	// TLS IE relocations (pcadd/absolute, no IE->LE optimization):
629	case R_LARCH_TLS_IE_PCADD_HI20:
630	rs.handleTlsIe<false>(ieExpr: R_GOT_PC, type, offset, addend, sym);
631	continue;
632	case R_LARCH_TLS_IE_HI20:
633	case R_LARCH_TLS_IE_LO12:
634	case R_LARCH_TLS_IE64_LO20:
635	case R_LARCH_TLS_IE64_HI12:
636	rs.handleTlsIe<false>(ieExpr: R_GOT, type, offset, addend, sym);
637	continue;
638	// TLS GD/LD relocations (no GD/LD->IE/LE optimization):
639	case R_LARCH_TLS_LD_PC_HI20:
640	case R_LARCH_TLS_GD_PC_HI20:
641	sym.setFlags(NEEDS_TLSGD);
642	sec.addReloc(r: {.expr: RE_LOONGARCH_TLSGD_PAGE_PC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
643	continue;
644	case R_LARCH_TLS_LD_HI20:
645	ctx.needsTlsLd.store(i: true, m: std::memory_order_relaxed);
646	sec.addReloc(r: {.expr: R_TLSLD_GOT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
647	continue;
648	case R_LARCH_TLS_GD_HI20:
649	sym.setFlags(NEEDS_TLSGD);
650	sec.addReloc(r: {.expr: R_TLSGD_GOT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
651	continue;
652	case R_LARCH_TLS_LD_PCREL20_S2:
653	case R_LARCH_TLS_LD_PCADD_HI20:
654	ctx.needsTlsLd.store(i: true, m: std::memory_order_relaxed);
655	sec.addReloc(r: {.expr: R_TLSLD_PC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
656	continue;
657	case R_LARCH_TLS_GD_PCREL20_S2:
658	case R_LARCH_TLS_GD_PCADD_HI20:
659	sym.setFlags(NEEDS_TLSGD);
660	sec.addReloc(r: {.expr: R_TLSGD_PC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
661	continue;
662
663	// TLSDESC relocations (optimizable to IE/LE in non-extreme code model):
664	case R_LARCH_TLS_DESC_PC_HI20:
665	rs.handleTlsDesc(sharedExpr: RE_LOONGARCH_TLSDESC_PAGE_PC, ieExpr: RE_LOONGARCH_GOT_PAGE_PC,
666	type, offset, addend, sym);
667	continue;
668	case R_LARCH_TLS_DESC_PC_LO12:
669	case R_LARCH_TLS_DESC_LD:
670	rs.handleTlsDesc(sharedExpr: R_TLSDESC, ieExpr: RE_LOONGARCH_GOT_PAGE_PC, type, offset,
671	addend, sym);
672	continue;
673	case R_LARCH_TLS_DESC_PCREL20_S2:
674	rs.handleTlsDesc(sharedExpr: R_TLSDESC_PC, ieExpr: RE_LOONGARCH_GOT_PAGE_PC, type, offset,
675	addend, sym);
676	continue;
677	case R_LARCH_TLS_DESC_CALL:
678	if (!ctx.arg.shared)
679	sec.addReloc(
680	r: {.expr: sym.isPreemptible ? R_GOT : R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
681	continue;
682	// TLSDESC relocations (extreme code model, no optimization):
683	case R_LARCH_TLS_DESC64_PC_LO20:
684	case R_LARCH_TLS_DESC64_PC_HI12:
685	sym.setFlags(NEEDS_TLSDESC);
686	sec.addReloc(r: {.expr: RE_LOONGARCH_TLSDESC_PAGE_PC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
687	continue;
688	// TLSDESC relocations (absolute/pcadd, no optimization):
689	case R_LARCH_TLS_DESC_HI20:
690	case R_LARCH_TLS_DESC_LO12:
691	case R_LARCH_TLS_DESC64_LO20:
692	case R_LARCH_TLS_DESC64_HI12:
693	case R_LARCH_TLS_DESC_PCADD_HI20:
694	sym.setFlags(NEEDS_TLSDESC);
695	sec.addReloc(r: {.expr: R_TLSDESC, .type: type, .offset: offset, .addend: addend, .sym: &sym});
696	continue;
697
698	// Relaxation hints:
699	case R_LARCH_TLS_LE_ADD_R:
700	case R_LARCH_RELAX:
701	if (ctx.arg.relax)
702	sec.addReloc(r: {.expr: R_RELAX_HINT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
703	continue;
704	case R_LARCH_ALIGN:
705	sec.addReloc(r: {.expr: R_RELAX_HINT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
706	continue;
707
708	// Misc relocations:
709	case R_LARCH_ADD6:
710	case R_LARCH_ADD8:
711	case R_LARCH_ADD16:
712	case R_LARCH_ADD32:
713	case R_LARCH_ADD64:
714	case R_LARCH_ADD_ULEB128:
715	case R_LARCH_SUB6:
716	case R_LARCH_SUB8:
717	case R_LARCH_SUB16:
718	case R_LARCH_SUB32:
719	case R_LARCH_SUB64:
720	case R_LARCH_SUB_ULEB128:
721	expr = RE_RISCV_ADD;
722	break;
723
724	default:
725	Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
726	<< "unknown relocation (" << type.v << ") against symbol "
727	<< &sym;
728	continue;
729	}
730	rs.process(expr, type, offset, sym, addend);
731	}
732
733	llvm::stable_sort(sec.relocs(),
734	[](const Relocation &lhs, const Relocation &rhs) {
735	return lhs.offset < rhs.offset;
736	});
737	}
738
739	void LoongArch::relocate(uint8_t loc, const* Relocation &rel,
740	uint64_t val) const {
741	switch (rel.type) {
742	case R_LARCH_32_PCREL:
743	checkInt(ctx, loc, v: val, n: `32`, rel);
744	[[fallthrough]];
745	case R_LARCH_32:
746	case R_LARCH_TLS_DTPREL32:
747	write32le(P: loc, V: val);
748	return;
749	case R_LARCH_64:
750	case R_LARCH_TLS_DTPREL64:
751	case R_LARCH_64_PCREL:
752	write64le(P: loc, V: val);
753	return;
754
755	// Relocs intended for `pcaddi`.
756	case R_LARCH_PCREL20_S2:
757	case R_LARCH_TLS_LD_PCREL20_S2:
758	case R_LARCH_TLS_GD_PCREL20_S2:
759	case R_LARCH_TLS_DESC_PCREL20_S2:
760	checkInt(ctx, loc, v: val, n: `22`, rel);
761	checkAlignment(ctx, loc, v: val, n: `4`, rel);
762	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: val >> `2`));
763	return;
764
765	case R_LARCH_B16:
766	checkInt(ctx, loc, v: val, n: `18`, rel);
767	checkAlignment(ctx, loc, v: val, n: `4`, rel);
768	write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> `2`));
769	return;
770
771	case R_LARCH_B21:
772	checkInt(ctx, loc, v: val, n: `23`, rel);
773	checkAlignment(ctx, loc, v: val, n: `4`, rel);
774	write32le(P: loc, V: setD5k16(insn: read32le(P: loc), imm: val >> `2`));
775	return;
776
777	case R_LARCH_B26:
778	checkInt(ctx, loc, v: val, n: `28`, rel);
779	checkAlignment(ctx, loc, v: val, n: `4`, rel);
780	write32le(P: loc, V: setD10k16(insn: read32le(P: loc), imm: val >> `2`));
781	return;
782
783	case R_LARCH_CALL30: {
784	// This relocation is designed for adjacent pcaddu12i+jirl pairs that
785	// are patched in one time.
786	// The relocation range is [-2G, +2G) (of course must be 4-byte aligned).
787	checkInt(ctx, loc, v: val, n: `32`, rel);
788	checkAlignment(ctx, loc, v: val, n: `4`, rel);
789	// Although jirl adds the immediate as a signed value, it is always positive
790	// in this case, so no adjustment is needed, unlike CALL36.
791	uint32_t hi20 = extractBits(v: val, begin: `31`, end: `12`);
792	// Despite the name, the lower part is actually 12 bits with 4-byte aligned.
793	uint32_t lo10 = extractBits(v: val, begin: `11`, end: `2`);
794	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: hi20));
795	write32le(P: loc + `4`, V: setK16(insn: read32le(P: loc + `4`), imm: lo10));
796	return;
797	}
798
799	case R_LARCH_CALL36: {
800	// This relocation is designed for adjacent pcaddu18i+jirl pairs that
801	// are patched in one time. Because of sign extension of these insns'
802	// immediate fields, the relocation range is [-128G - 0x20000, +128G -
803	// 0x20000) (of course must be 4-byte aligned).
804	if (((int64_t)val + `0x20000`) != llvm::SignExtend64(X: val + `0x20000`, B: `38`))
805	reportRangeError(ctx, loc, rel, v: Twine (val), min: llvm::minIntN(N: `38`) - `0x20000`,
806	max: llvm::maxIntN(N: `38`) - `0x20000`);
807	checkAlignment(ctx, loc, v: val, n: `4`, rel);
808	// Since jirl performs sign extension on the offset immediate, adds (1<<17)
809	// to original val to get the correct hi20.
810	uint32_t hi20 = extractBits(v: val + (`1` << `17`), begin: `37`, end: `18`);
811	// Despite the name, the lower part is actually 18 bits with 4-byte aligned.
812	uint32_t lo16 = extractBits(v: val, begin: `17`, end: `2`);
813	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: hi20));
814	write32le(P: loc + `4`, V: setK16(insn: read32le(P: loc + `4`), imm: lo16));
815	return;
816	}
817
818	// Relocs intended for `addi`, `ld` or `st`.
819	case R_LARCH_PCALA_LO12:
820	// We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
821	// on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
822	// removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
823	// its immediate slot width is different too (16, not 12).
824	// In this case, process like an R_LARCH_B16, but without overflow checking
825	// and only taking the value's lowest 12 bits.
826	if (isJirl(insn: read32le(P: loc))) {
827	checkAlignment(ctx, loc, v: val, n: `4`, rel);
828	val = SignExtend64<`12`>(x: val);
829	write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> `2`));
830	return;
831	}
832	[[fallthrough]];
833	case R_LARCH_ABS_LO12:
834	case R_LARCH_GOT_PC_LO12:
835	case R_LARCH_GOT_LO12:
836	case R_LARCH_TLS_LE_LO12:
837	case R_LARCH_TLS_IE_PC_LO12:
838	case R_LARCH_TLS_IE_LO12:
839	case R_LARCH_TLS_LE_LO12_R:
840	case R_LARCH_TLS_DESC_PC_LO12:
841	case R_LARCH_TLS_DESC_LO12:
842	case R_LARCH_PCADD_LO12:
843	case R_LARCH_GOT_PCADD_LO12:
844	case R_LARCH_TLS_IE_PCADD_LO12:
845	case R_LARCH_TLS_LD_PCADD_LO12:
846	case R_LARCH_TLS_GD_PCADD_LO12:
847	case R_LARCH_TLS_DESC_PCADD_LO12:
848	write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: `11`, end: `0`)));
849	return;
850
851	// Relocs intended for `lu12i.w` or `pcalau12i`.
852	case R_LARCH_ABS_HI20:
853	case R_LARCH_PCALA_HI20:
854	case R_LARCH_GOT_PC_HI20:
855	case R_LARCH_GOT_HI20:
856	case R_LARCH_TLS_LE_HI20:
857	case R_LARCH_TLS_IE_PC_HI20:
858	case R_LARCH_TLS_IE_HI20:
859	case R_LARCH_TLS_LD_PC_HI20:
860	case R_LARCH_TLS_LD_HI20:
861	case R_LARCH_TLS_GD_PC_HI20:
862	case R_LARCH_TLS_GD_HI20:
863	case R_LARCH_TLS_DESC_PC_HI20:
864	case R_LARCH_TLS_DESC_HI20:
865	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: `31`, end: `12`)));
866	return;
867	case R_LARCH_PCADD_HI20:
868	case R_LARCH_GOT_PCADD_HI20:
869	case R_LARCH_TLS_IE_PCADD_HI20:
870	case R_LARCH_TLS_LD_PCADD_HI20:
871	case R_LARCH_TLS_GD_PCADD_HI20:
872	case R_LARCH_TLS_DESC_PCADD_HI20: {
873	uint64_t hi = val + `0x800`;
874	checkInt(ctx, loc, v: val, n: `32`, rel);
875	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: hi, begin: `31`, end: `12`)));
876	return;
877	}
878	case R_LARCH_TLS_LE_HI20_R:
879	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val + `0x800`, begin: `31`, end: `12`)));
880	return;
881
882	// Relocs intended for `lu32i.d`.
883	case R_LARCH_ABS64_LO20:
884	case R_LARCH_PCALA64_LO20:
885	case R_LARCH_GOT64_PC_LO20:
886	case R_LARCH_GOT64_LO20:
887	case R_LARCH_TLS_LE64_LO20:
888	case R_LARCH_TLS_IE64_PC_LO20:
889	case R_LARCH_TLS_IE64_LO20:
890	case R_LARCH_TLS_DESC64_PC_LO20:
891	case R_LARCH_TLS_DESC64_LO20:
892	write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: `51`, end: `32`)));
893	return;
894
895	// Relocs intended for `lu52i.d`.
896	case R_LARCH_ABS64_HI12:
897	case R_LARCH_PCALA64_HI12:
898	case R_LARCH_GOT64_PC_HI12:
899	case R_LARCH_GOT64_HI12:
900	case R_LARCH_TLS_LE64_HI12:
901	case R_LARCH_TLS_IE64_PC_HI12:
902	case R_LARCH_TLS_IE64_HI12:
903	case R_LARCH_TLS_DESC64_PC_HI12:
904	case R_LARCH_TLS_DESC64_HI12:
905	write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: `63`, end: `52`)));
906	return;
907
908	case R_LARCH_ADD6:
909	loc = (loc & `0xc0`) \| ((*loc + val) & `0x3f`);
910	return;
911	case R_LARCH_ADD8:
912	*loc += val;
913	return;
914	case R_LARCH_ADD16:
915	write16le(P: loc, V: read16le(P: loc) + val);
916	return;
917	case R_LARCH_ADD32:
918	write32le(P: loc, V: read32le(P: loc) + val);
919	return;
920	case R_LARCH_ADD64:
921	write64le(P: loc, V: read64le(P: loc) + val);
922	return;
923	case R_LARCH_ADD_ULEB128:
924	handleUleb128(ctx, loc, val);
925	return;
926	case R_LARCH_SUB6:
927	loc = (loc & `0xc0`) \| ((*loc - val) & `0x3f`);
928	return;
929	case R_LARCH_SUB8:
930	*loc -= val;
931	return;
932	case R_LARCH_SUB16:
933	write16le(P: loc, V: read16le(P: loc) - val);
934	return;
935	case R_LARCH_SUB32:
936	write32le(P: loc, V: read32le(P: loc) - val);
937	return;
938	case R_LARCH_SUB64:
939	write64le(P: loc, V: read64le(P: loc) - val);
940	return;
941	case R_LARCH_SUB_ULEB128:
942	handleUleb128(ctx, loc, val: -val);
943	return;
944
945	case R_LARCH_MARK_LA:
946	case R_LARCH_MARK_PCREL:
947	// no-op
948	return;
949
950	case R_LARCH_TLS_LE_ADD_R:
951	case R_LARCH_RELAX:
952	return; // Ignored (for now)
953
954	case R_LARCH_TLS_DESC_LD:
955	return; // nothing to do.
956	case R_LARCH_TLS_DESC32:
957	write32le(P: loc + `4`, V: val);
958	return;
959	case R_LARCH_TLS_DESC64:
960	write64le(P: loc + `8`, V: val);
961	return;
962
963	default:
964	llvm_unreachable("unknown relocation");
965	}
966	}
967
968	// If the section alignment is > 4, advance `dot` to insert NOPs and synthesize
969	// an ALIGN relocation. Otherwise, return false to use default handling.
970	template <class ELFT, class RelTy>
971	bool LoongArch::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
972	Relocs<RelTy> rels) {
973	if (!baseSec) {
974	// Record the first input section with RELAX relocations. We will synthesize
975	// ALIGN relocations here.
976	for (auto rel : rels) {
977	if (rel.getType(false) == R_LARCH_RELAX) {
978	baseSec = sec;
979	break;
980	}
981	}
982	} else if (sec->addralign > `4`) {
983	// If the alignment is > 4 and the section does not start with an ALIGN
984	// relocation, synthesize one.
985	bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
986	return rel.r_offset == `0` && rel.getType(false) == R_LARCH_ALIGN;
987	});
988	if (!hasAlignRel) {
989	synthesizedAligns.emplace_back(Args: dot - baseSec->getVA(),
990	Args: sec->addralign - `4`);
991	dot += sec->addralign - `4`;
992	return true;
993	}
994	}
995	return false;
996	}
997
998	// Finalize the relocation section by appending synthesized ALIGN relocations
999	// after processing all input sections.
1000	template <class ELFT, class RelTy>
1001	void LoongArch::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
1002	Relocs<RelTy> rels) {
1003	auto *f = cast<ObjFile<ELFT>>(baseSec->file);
1004	auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
1005	// Create a copy of InputSection.
1006	sec = make<InputSection>(*f, shdr, baseSec->name);
1007	auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
1008	sec = baseRelSec;
1009	baseSec = nullptr;
1010
1011	// Allocate buffer for original and synthesized relocations in RELA format.
1012	// If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
1013	// CREL.
1014	auto newSize = rels.size() + synthesizedAligns.size();
1015	auto relas = makeThreadLocalN<typename* ELFT::Rela>(newSize);
1016	sec->size = newSize * sizeof(typename ELFT::Rela);
1017	sec->content_ = reinterpret_cast<uint8_t *>(relas);
1018	sec->type = SHT_RELA;
1019	// Copy original relocations to the new buffer, potentially converting CREL to
1020	// RELA.
1021	for (auto [i, r] : llvm::enumerate(rels)) {
1022	relas[i].r_offset = r.r_offset;
1023	relas[i].setSymbolAndType(r.getSymbol(`0`), r.getType(`0`), false);
1024	if constexpr (RelTy::HasAddend)
1025	relas[i].r_addend = r.r_addend;
1026	}
1027	// Append synthesized ALIGN relocations to the buffer.
1028	for (auto [i, r] : llvm::enumerate(First&: synthesizedAligns)) {
1029	auto &rela = relas[rels.size() + i];
1030	rela.r_offset = r.first;
1031	rela.setSymbolAndType(`0`, R_LARCH_ALIGN, false);
1032	rela.r_addend = r.second;
1033	}
1034	synthesizedAligns.clear();
1035	// Replace the old relocation section with the new one in the output section.
1036	// addOrphanSections ensures that the output relocation section is processed
1037	// after osec.
1038	for (SectionCommand *cmd : sec->getParent()->commands) {
1039	auto *isd = dyn_cast<InputSectionDescription>(Val: cmd);
1040	if (!isd)
1041	continue;
1042	for (auto *&isec : isd->sections)
1043	if (isec == baseRelSec)
1044	isec = sec;
1045	}
1046	}
1047
1048	template <class ELFT>
1049	bool LoongArch::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
1050	bool ret = false;
1051	if (sec) {
1052	invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
1053	} else if (baseSec) {
1054	invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
1055	}
1056	return ret;
1057	}
1058
1059	// Without linker relaxation enabled for a particular relocatable file or
1060	// section, the assembler will not generate R_LARCH_ALIGN relocations for
1061	// alignment directives. This becomes problematic in a two-stage linking
1062	// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
1063	// R_LARCH_ALIGN relocation at section start when needed.
1064	//
1065	// When called with an input section (`sec` is not null): If the section
1066	// alignment is > 4, advance `dot` to insert NOPs and synthesize an ALIGN
1067	// relocation.
1068	//
1069	// When called after all input sections are processed (`sec` is null): The
1070	// output relocation section is updated with all the newly synthesized ALIGN
1071	// relocations.
1072	bool LoongArch::synthesizeAlign(uint64_t &dot, InputSection *sec) {
1073	assert(ctx.arg.relocatable);
1074	if (ctx.arg.is64)
1075	return synthesizeAlignAux<ELF64LE>(dot, sec);
1076	return synthesizeAlignAux<ELF32LE>(dot, sec);
1077	}
1078
1079	static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
1080	return i + `1` < relocs.size() && relocs [i + `1`].type == R_LARCH_RELAX;
1081	}
1082
1083	static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
1084	return relaxable(relocs, i) && relaxable(relocs, i: i + `2`) &&
1085	relocs [i].offset + `4` == relocs [i + `2`].offset;
1086	}
1087
1088	// Relax code sequence.
1089	// From:
1090	// pcalau12i $a0, %pc_hi20(sym) \| %ld_pc_hi20(sym) \| %gd_pc_hi20(sym)
1091	// \| %desc_pc_hi20(sym)
1092	// addi.w/d $a0, $a0, %pc_lo12(sym) \| %got_pc_lo12(sym) \| %got_pc_lo12(sym)
1093	// \| %desc_pc_lo12(sym)
1094	// To:
1095	// pcaddi $a0, %pc_lo12(sym) \| %got_pc_lo12(sym) \| %got_pc_lo12(sym)
1096	// \| %desc_pcrel_20(sym)
1097	//
1098	// From:
1099	// pcalau12i $a0, %got_pc_hi20(sym_got)
1100	// ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
1101	// To:
1102	// pcaddi $a0, %got_pc_hi20(sym_got)
1103	static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
1104	uint64_t loc, Relocation &rHi20, Relocation &rLo12,
1105	uint32_t &remove) {
1106	// check if the relocations are relaxable sequences.
1107	if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
1108	rLo12.type == R_LARCH_PCALA_LO12) \|\|
1109	(rHi20.type == R_LARCH_GOT_PC_HI20 &&
1110	rLo12.type == R_LARCH_GOT_PC_LO12) \|\|
1111	(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
1112	rLo12.type == R_LARCH_GOT_PC_LO12) \|\|
1113	(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
1114	rLo12.type == R_LARCH_GOT_PC_LO12) \|\|
1115	(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 &&
1116	rLo12.type == R_LARCH_TLS_DESC_PC_LO12)))
1117	return;
1118
1119	// GOT references to absolute symbols can't be relaxed to use pcaddi in
1120	// position-independent code, because these instructions produce a relative
1121	// address.
1122	// Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
1123	// these symbols may be resolve in runtime.
1124	// Moreover, relaxation can only occur if the addends of both relocations are
1125	// zero for GOT references.
1126	if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
1127	(!rHi20.sym \|\| rHi20.sym != rLo12.sym \|\| !rHi20.sym->isDefined() \|\|
1128	rHi20.sym->isPreemptible \|\| rHi20.sym->isGnuIFunc() \|\|
1129	(ctx.arg.isPic && !cast<Defined>(Val&: *rHi20.sym).section) \|\|
1130	rHi20.addend != `0` \|\| rLo12.addend != `0`))
1131	return;
1132
1133	uint64_t dest = `0`;
1134	if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
1135	dest = rHi20.sym->getPltVA(ctx);
1136	else if (rHi20.expr == RE_LOONGARCH_PAGE_PC \|\|
1137	rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
1138	dest = rHi20.sym->getVA(ctx);
1139	else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
1140	dest = ctx.in.got ->getGlobalDynAddr(b: *rHi20.sym);
1141	else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC)
1142	dest = ctx.in.got ->getTlsDescAddr(sym: *rHi20.sym);
1143	else {
1144	Err(ctx) << getErrorLoc(ctx, loc: (const uint8_t *)loc) << "unknown expr ("
1145	<< rHi20.expr << ") against symbol " << rHi20.sym
1146	<< "in relaxPCHi20Lo12";
1147	return;
1148	}
1149	dest += rHi20.addend;
1150
1151	const int64_t displace = dest - loc;
1152	// Check if the displace aligns 4 bytes or exceeds the range of pcaddi.
1153	if ((displace & `0x3`) != `0` \|\| !isInt<`22`>(x: displace))
1154	return;
1155
1156	// Note: If we can ensure that the .o files generated by LLVM only contain
1157	// relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
1158	// decode instructions. The relaxable instruction sequences imply the
1159	// following constraints:
1160	// For relocation pairs related to got_pc, the opcodes of instructions*
1161	// must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
1162	// addi.w/d.
1163	// The destination register of pcalau12i is guaranteed to be used only by*
1164	// the immediately following instruction.
1165	const uint32_t currInsn = read32le(P: sec.content().data() + rHi20.offset);
1166	const uint32_t nextInsn = read32le(P: sec.content().data() + rLo12.offset);
1167	// Check if use the same register.
1168	if (getD5(v: currInsn) != getJ5(v: nextInsn) \|\| getJ5(v: nextInsn) != getD5(v: nextInsn))
1169	return;
1170
1171	sec.relaxAux->relocTypes [i] = R_LARCH_RELAX;
1172	if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
1173	sec.relaxAux->relocTypes [i + `2`] = R_LARCH_TLS_GD_PCREL20_S2;
1174	else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
1175	sec.relaxAux->relocTypes [i + `2`] = R_LARCH_TLS_LD_PCREL20_S2;
1176	else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20)
1177	sec.relaxAux->relocTypes [i + `2`] = R_LARCH_TLS_DESC_PCREL20_S2;
1178	else
1179	sec.relaxAux->relocTypes [i + `2`] = R_LARCH_PCREL20_S2;
1180	sec.relaxAux->writes.push_back(Elt: insn(op: PCADDI, d: getD5(v: nextInsn), j: `0`, k: `0`));
1181	remove = `4`;
1182	}
1183
1184	// Relax code sequence.
1185	// From:
1186	// la32r:
1187	// pcaddu12i $ra, %call30(foo)
1188	// jirl $ra, $ra, 0
1189	// la32s/la64:
1190	// pcaddu18i $ra, %call36(foo)
1191	// jirl $ra, $ra, 0
1192	// To:
1193	// b/bl foo
1194	static void relaxMediumCall(Ctx &ctx, const InputSection &sec, size_t i,
1195	uint64_t loc, Relocation &r, uint32_t &remove) {
1196	const uint64_t dest =
1197	(r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) +
1198	r.addend;
1199
1200	const int64_t displace = dest - loc;
1201	// Check if the displace aligns 4 bytes or exceeds the range of b[l].
1202	if ((displace & `0x3`) != `0` \|\| !isInt<`28`>(x: displace))
1203	return;
1204
1205	const uint32_t nextInsn = read32le(P: sec.content().data() + r.offset + `4`);
1206	if (getD5(v: nextInsn) == R_RA) {
1207	// convert jirl to bl
1208	sec.relaxAux->relocTypes [i] = R_LARCH_B26;
1209	sec.relaxAux->writes.push_back(Elt: insn(op: BL, d: `0`, j: `0`, k: `0`));
1210	remove = `4`;
1211	} else if (getD5(v: nextInsn) == R_ZERO) {
1212	// convert jirl to b
1213	sec.relaxAux->relocTypes [i] = R_LARCH_B26;
1214	sec.relaxAux->writes.push_back(Elt: insn(op: B, d: `0`, j: `0`, k: `0`));
1215	remove = `4`;
1216	}
1217	}
1218
1219	// Relax code sequence.
1220	// From:
1221	// lu12i.w $rd, %le_hi20_r(sym)
1222	// add.w/d $rd, $rd, $tp, %le_add_r(sym)
1223	// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
1224	// To:
1225	// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
1226	static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
1227	uint64_t loc, Relocation &r, uint32_t &remove) {
1228	uint64_t val = r.sym->getVA(ctx, addend: r.addend);
1229	// Check if the val exceeds the range of addi/ld/st.
1230	if (!isInt<`12`>(x: val))
1231	return;
1232	uint32_t currInsn = read32le(P: sec.content().data() + r.offset);
1233	switch (r.type) {
1234	case R_LARCH_TLS_LE_HI20_R:
1235	case R_LARCH_TLS_LE_ADD_R:
1236	sec.relaxAux->relocTypes [i] = R_LARCH_RELAX;
1237	remove = `4`;
1238	break;
1239	case R_LARCH_TLS_LE_LO12_R:
1240	sec.relaxAux->writes.push_back(Elt: setJ5(insn: currInsn, imm: R_TP));
1241	sec.relaxAux->relocTypes [i] = R_LARCH_TLS_LE_LO12_R;
1242	break;
1243	}
1244	}
1245
1246	static bool relax(Ctx &ctx, InputSection &sec) {
1247	const uint64_t secAddr = sec.getVA();
1248	const MutableArrayRef<Relocation> relocs = sec.relocs();
1249	auto &aux = *sec.relaxAux;
1250	bool changed = false;
1251	ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
1252	uint64_t delta = `0`;
1253
1254	std::fill_n(first: aux.relocTypes.get(), n: relocs.size(), value: R_LARCH_NONE);
1255	aux.writes.clear();
1256	for (auto [i, r] : llvm::enumerate(First: relocs)) {
1257	const uint64_t loc = secAddr + r.offset - delta;
1258	uint32_t &cur = aux.relocDeltas [i], remove = `0`;
1259	switch (r.type) {
1260	case R_LARCH_ALIGN: {
1261	const uint64_t addend =
1262	r.sym->isUndefined() ? Log2_64(Value: r.addend) + `1` : r.addend;
1263	const uint64_t allBytes = (`1ULL` << (addend & `0xff`)) - `4`;
1264	const uint64_t align = `1ULL` << (addend & `0xff`);
1265	const uint64_t maxBytes = addend >> `8`;
1266	const uint64_t off = loc & (align - `1`);
1267	const uint64_t curBytes = off == `0` ? `0` : align - off;
1268	// All bytes beyond the alignment boundary should be removed.
1269	// If emit bytes more than max bytes to emit, remove all.
1270	if (maxBytes != `0` && curBytes > maxBytes)
1271	remove = allBytes;
1272	else
1273	remove = allBytes - curBytes;
1274	// If we can't satisfy this alignment, we've found a bad input.
1275	if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < `0`)) {
1276	Err(ctx) << getErrorLoc(ctx, loc: (const uint8_t *)loc)
1277	<< "insufficient padding bytes for " << r.type << ": "
1278	<< allBytes << " bytes available for "
1279	<< "requested alignment of " << align << " bytes";
1280	remove = `0`;
1281	}
1282	break;
1283	}
1284	case R_LARCH_PCALA_HI20:
1285	case R_LARCH_GOT_PC_HI20:
1286	case R_LARCH_TLS_GD_PC_HI20:
1287	case R_LARCH_TLS_LD_PC_HI20:
1288	// The overflow check for i+2 will be carried out in isPairRelaxable.
1289	if (isPairRelaxable(relocs, i))
1290	relaxPCHi20Lo12(ctx, sec, i, loc, rHi20&: r, rLo12&: relocs [i + `2`], remove);
1291	break;
1292	case R_LARCH_TLS_DESC_PC_HI20:
1293	if (r.expr == RE_LOONGARCH_GOT_PAGE_PC \|\| r.expr == R_TPREL) {
1294	if (relaxable(relocs, i))
1295	remove = `4`;
1296	} else if (isPairRelaxable(relocs, i))
1297	relaxPCHi20Lo12(ctx, sec, i, loc, rHi20&: r, rLo12&: relocs [i + `2`], remove);
1298	break;
1299	case R_LARCH_CALL30:
1300	case R_LARCH_CALL36:
1301	if (relaxable(relocs, i))
1302	relaxMediumCall(ctx, sec, i, loc, r, remove);
1303	break;
1304	case R_LARCH_TLS_LE_HI20_R:
1305	case R_LARCH_TLS_LE_ADD_R:
1306	case R_LARCH_TLS_LE_LO12_R:
1307	if (relaxable(relocs, i))
1308	relaxTlsLe(ctx, sec, i, loc, r, remove);
1309	break;
1310	case R_LARCH_TLS_IE_PC_HI20:
1311	if (relaxable(relocs, i) && r.expr == R_TPREL &&
1312	isUInt<`12`>(x: r.sym->getVA(ctx, addend: r.addend)))
1313	remove = `4`;
1314	break;
1315	case R_LARCH_TLS_DESC_PC_LO12:
1316	if (relaxable(relocs, i) &&
1317	(r.expr == RE_LOONGARCH_GOT_PAGE_PC \|\| r.expr == R_TPREL))
1318	remove = `4`;
1319	break;
1320	case R_LARCH_TLS_DESC_LD:
1321	if (relaxable(relocs, i) && r.expr == R_TPREL &&
1322	isUInt<`12`>(x: r.sym->getVA(ctx, addend: r.addend)))
1323	remove = `4`;
1324	break;
1325	}
1326
1327	// For all anchors whose offsets are <= r.offset, they are preceded by
1328	// the previous relocation whose `relocDeltas` value equals `delta`.
1329	// Decrease their st_value and update their st_size.
1330	for (; sa.size() && sa [`0`].offset <= r.offset; sa = sa.slice(N: `1`)) {
1331	if (sa [`0`].end)
1332	sa [`0`].d->size = sa [`0`].offset - delta - sa [`0`].d->value;
1333	else
1334	sa [`0`].d->value = sa [`0`].offset - delta;
1335	}
1336	delta += remove;
1337	if (delta != cur) {
1338	cur = delta;
1339	changed = true;
1340	}
1341	}
1342
1343	for (const SymbolAnchor &a : sa) {
1344	if (a.end)
1345	a.d->size = a.offset - delta - a.d->value;
1346	else
1347	a.d->value = a.offset - delta;
1348	}
1349	// Inform assignAddresses that the size has changed.
1350	if (!isUInt<`32`>(x: delta))
1351	Fatal(ctx) << "section size decrease is too large: " << delta;
1352	sec.bytesDropped = delta;
1353	return changed;
1354	}
1355
1356	// Convert TLS IE to LE in the normal or medium code model.
1357	// Original code sequence:
1358	// pcalau12i $a0, %ie_pc_hi20(sym)*
1359	// ld.d $a0, $a0, %ie_pc_lo12(sym)*
1360	//
1361	// The code sequence converted is as follows:
1362	// lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP*
1363	// ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,*
1364	// # otherwise, src = $zero
1365	//
1366	// When relaxation enables, redundant NOPs can be removed.
1367	static void tlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) {
1368	assert(isInt<`32`>(val) &&
1369	"val exceeds the range of medium code model in tlsIeToLe");
1370
1371	bool isUInt12 = isUInt<`12`>(x: val);
1372	const uint32_t currInsn = read32le(P: loc);
1373	switch (rel.type) {
1374	case R_LARCH_TLS_IE_PC_HI20:
1375	if (isUInt12)
1376	write32le(P: loc, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`)); // nop
1377	else
1378	write32le(P: loc, V: insn(op: LU12I_W, d: getD5(v: currInsn), j: extractBits(v: val, begin: `31`, end: `12`),
1379	k: `0`)); // lu12i.w $a0, %le_hi20
1380	break;
1381	case R_LARCH_TLS_IE_PC_LO12:
1382	if (isUInt12)
1383	write32le(P: loc, V: insn(op: ORI, d: getD5(v: currInsn), j: R_ZERO,
1384	k: val)); // ori $a0, $zero, %le_lo12
1385	else
1386	write32le(P: loc, V: insn(op: ORI, d: getD5(v: currInsn), j: getJ5(v: currInsn),
1387	k: lo12(val))); // ori $a0, $a0, %le_lo12
1388	break;
1389	}
1390	}
1391
1392	// Convert TLSDESC GD/LD to IE.
1393	// In normal or medium code model, there are two forms of code sequences:
1394	// pcalau12i $a0, %desc_pc_hi20(sym_desc)*
1395	// addi.d $a0, $a0, %desc_pc_lo12(sym_desc)*
1396	// ld.d $ra, $a0, %desc_ld(sym_desc)*
1397	// jirl $ra, $ra, %desc_call(sym_desc)*
1398	// ------
1399	// pcaddi $a0, %desc_pcrel_20(a)*
1400	// load $ra, $a0, %desc_ld(a)*
1401	// jirl $ra, $ra, %desc_call(a)*
1402	//
1403	// The code sequence obtained is as follows:
1404	// pcalau12i $a0, %ie_pc_hi20(sym_ie)*
1405	// ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)*
1406	//
1407	// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the
1408	// preceding instructions to NOPs, due to both forms of code sequence
1409	// (corresponding to relocation combinations:
1410	// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and
1411	// R_LARCH_TLS_DESC_PCREL20_S2) have same process.
1412	//
1413	// When relaxation enables, redundant NOPs can be removed.
1414	void LoongArch::tlsdescToIe(uint8_t loc, const* Relocation &rel,
1415	uint64_t val) const {
1416	switch (rel.type) {
1417	case R_LARCH_TLS_DESC_PC_HI20:
1418	case R_LARCH_TLS_DESC_PC_LO12:
1419	case R_LARCH_TLS_DESC_PCREL20_S2:
1420	write32le(P: loc, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`)); // nop
1421	break;
1422	case R_LARCH_TLS_DESC_LD:
1423	write32le(P: loc, V: insn(op: PCALAU12I, d: R_A0, j: `0`, k: `0`)); // pcalau12i $a0, %ie_pc_hi20
1424	relocateNoSym(loc, type: R_LARCH_TLS_IE_PC_HI20, val);
1425	break;
1426	case R_LARCH_TLS_DESC_CALL:
1427	write32le(P: loc, V: insn(op: ctx.arg.is64 ? LD_D : LD_W, d: R_A0, j: R_A0,
1428	k: `0`)); // ld.[wd] $a0, $a0, %ie_pc_lo12
1429	relocateNoSym(loc, type: R_LARCH_TLS_IE_PC_LO12, val);
1430	break;
1431	default:
1432	llvm_unreachable("unsupported relocation for TLSDESC to IE");
1433	}
1434	}
1435
1436	// Convert TLSDESC GD/LD to LE.
1437	// The code sequence obtained in the normal or medium code model is as follows:
1438	// lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP*
1439	// ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,*
1440	// # otherwise, src = $zero
1441	// See the comment in tlsdescToIe for detailed information.
1442	void LoongArch::tlsdescToLe(uint8_t loc, const* Relocation &rel,
1443	uint64_t val) const {
1444	assert(isInt<`32`>(val) &&
1445	"val exceeds the range of medium code model in tlsdescToLe");
1446
1447	bool isUInt12 = isUInt<`12`>(x: val);
1448	switch (rel.type) {
1449	case R_LARCH_TLS_DESC_PC_HI20:
1450	case R_LARCH_TLS_DESC_PC_LO12:
1451	case R_LARCH_TLS_DESC_PCREL20_S2:
1452	write32le(P: loc, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`)); // nop
1453	break;
1454	case R_LARCH_TLS_DESC_LD:
1455	if (isUInt12)
1456	write32le(P: loc, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: `0`)); // nop
1457	else
1458	write32le(P: loc, V: insn(op: LU12I_W, d: R_A0, j: extractBits(v: val, begin: `31`, end: `12`),
1459	k: `0`)); // lu12i.w $a0, %le_hi20
1460	break;
1461	case R_LARCH_TLS_DESC_CALL:
1462	if (isUInt12)
1463	write32le(P: loc, V: insn(op: ORI, d: R_A0, j: R_ZERO, k: val)); // ori $a0, $zero, %le_lo12
1464	else
1465	write32le(P: loc,
1466	V: insn(op: ORI, d: R_A0, j: R_A0, k: lo12(val))); // ori $a0, $a0, %le_lo12
1467	break;
1468	default:
1469	llvm_unreachable("unsupported relocation for TLSDESC to LE");
1470	}
1471	}
1472
1473	// Try GOT indirection to PC relative optimization.
1474	// From:
1475	// pcalau12i $a0, %got_pc_hi20(sym_got)*
1476	// ld.w/d $a0, $a0, %got_pc_lo12(sym_got)*
1477	// To:
1478	// pcalau12i $a0, %pc_hi20(sym)*
1479	// addi.w/d $a0, $a0, %pc_lo12(sym)*
1480	//
1481	// Note: Althouth the optimization has been performed, the GOT entries still
1482	// exists, similarly to AArch64. Eliminating the entries will increase code
1483	// complexity.
1484	bool LoongArch::tryGotToPCRel(uint8_t loc, const* Relocation &rHi20,
1485	const Relocation &rLo12, uint64_t secAddr) const {
1486	// Check if the relocations apply to consecutive instructions.
1487	if (rHi20.offset + `4` != rLo12.offset)
1488	return false;
1489
1490	// Check if the relocations reference the same symbol and skip undefined,
1491	// preemptible and STT_GNU_IFUNC symbols.
1492	if (!rHi20.sym \|\| rHi20.sym != rLo12.sym \|\| !rHi20.sym->isDefined() \|\|
1493	rHi20.sym->isPreemptible \|\| rHi20.sym->isGnuIFunc())
1494	return false;
1495
1496	// GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
1497	// in position-independent code because these instructions produce a relative
1498	// address.
1499	if ((ctx.arg.isPic && !cast<Defined>(Val&: *rHi20.sym).section))
1500	return false;
1501
1502	// Check if the addends of the both relocations are zero.
1503	if (rHi20.addend != `0` \|\| rLo12.addend != `0`)
1504	return false;
1505
1506	const uint32_t currInsn = read32le(P: loc);
1507	const uint32_t nextInsn = read32le(P: loc + `4`);
1508	const uint32_t ldOpcode = ctx.arg.is64 ? LD_D : LD_W;
1509	// Check if the first instruction is PCALAU12I and the second instruction is
1510	// LD.
1511	if ((currInsn & `0xfe000000`) != PCALAU12I \|\|
1512	(nextInsn & `0xffc00000`) != ldOpcode)
1513	return false;
1514
1515	// Check if use the same register.
1516	if (getD5(v: currInsn) != getJ5(v: nextInsn) \|\| getJ5(v: nextInsn) != getD5(v: nextInsn))
1517	return false;
1518
1519	Symbol &sym = *rHi20.sym;
1520	uint64_t symLocal = sym.getVA(ctx);
1521	const int64_t displace = symLocal - getLoongArchPage(p: secAddr + rHi20.offset);
1522	// Check if the symbol address is in
1523	// [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
1524	const int64_t underflow = -`0x80000000LL` - `0x800`;
1525	const int64_t overflow = `0x80000000LL` - `0x800`;
1526	if (!(displace >= underflow && displace < overflow))
1527	return false;
1528
1529	Relocation newRHi20 = {.expr: RE_LOONGARCH_PAGE_PC, .type: R_LARCH_PCALA_HI20, .offset: rHi20.offset,
1530	.addend: rHi20.addend, .sym: &sym};
1531	Relocation newRLo12 = {.expr: R_ABS, .type: R_LARCH_PCALA_LO12, .offset: rLo12.offset, .addend: rLo12.addend,
1532	.sym: &sym};
1533	uint64_t pageDelta =
1534	getLoongArchPageDelta(dest: symLocal, pc: secAddr + rHi20.offset, type: rHi20.type);
1535	// pcalau12i $a0, %pc_hi20
1536	write32le(P: loc, V: insn(op: PCALAU12I, d: getD5(v: currInsn), j: `0`, k: `0`));
1537	relocate(loc, rel: newRHi20, val: pageDelta);
1538	// addi.w/d $a0, $a0, %pc_lo12
1539	write32le(P: loc + `4`, V: insn(op: ctx.arg.is64 ? ADDI_D : ADDI_W, d: getD5(v: nextInsn),
1540	j: getJ5(v: nextInsn), k: `0`));
1541	relocate(loc: loc + `4`, rel: newRLo12, val: SignExtend64(X: symLocal, B: `64`));
1542	return true;
1543	}
1544
1545	// During TLSDESC to IE, the converted code sequence always includes an
1546	// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
1547	// in `getRelocTargetVA`, expr of this instruction should be adjusted to R_GOT,
1548	// while expr of other instructions related to the Hi20 relocation (pcalau12i)
1549	// should be adjusted to RE_LOONGARCH_GOT_PAGE_PC. Specifically, in the normal
1550	// or medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL
1551	// is the candidate of Lo12 relocation.
1552
1553	static bool pairForGotRels(ArrayRef<Relocation> relocs) {
1554	// Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
1555	// pairs.
1556	size_t i = `0`;
1557	const size_t size = relocs.size();
1558	for (; i != size; ++i) {
1559	if (relocs [i].type == R_LARCH_GOT_PC_HI20) {
1560	if (i + `1` < size && relocs [i + `1`].type == R_LARCH_GOT_PC_LO12) {
1561	++i;
1562	continue;
1563	}
1564	if (relaxable(relocs, i) && i + `2` < size &&
1565	relocs [i + `2`].type == R_LARCH_GOT_PC_LO12) {
1566	i += `2`;
1567	continue;
1568	}
1569	break;
1570	} else if (relocs [i].type == R_LARCH_GOT_PC_LO12) {
1571	break;
1572	}
1573	}
1574	return i == size;
1575	}
1576
1577	void LoongArch::relocateAlloc(InputSection &sec, uint8_t buf) const* {
1578	const unsigned bits = ctx.arg.is64 ? `64` : `32`;
1579	uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
1580	bool isExtreme = false;
1581	const MutableArrayRef<Relocation> relocs = sec.relocs();
1582	const bool isPairForGotRels = pairForGotRels(relocs);
1583	for (size_t i = `0`, size = relocs.size(); i != size; ++i) {
1584	Relocation &rel = relocs [i];
1585	if (rel.expr == R_RELAX_HINT)
1586	continue;
1587	uint8_t *loc = buf + rel.offset;
1588	uint64_t val = SignExtend64(
1589	X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: bits);
1590	switch (rel.type) {
1591	case R_LARCH_TLS_IE_PC_HI20:
1592	case R_LARCH_TLS_IE_PC_LO12:
1593	// IE to LE. Not supported in extreme code model.
1594	if (rel.expr != R_TPREL)
1595	break;
1596	if (rel.type == R_LARCH_TLS_IE_PC_HI20)
1597	isExtreme =
1598	i + `2` < size && relocs [i + `2`].type == R_LARCH_TLS_IE64_PC_LO20;
1599	if (isExtreme) {
1600	rel.expr = getRelExpr(type: rel.type, s: *rel.sym, loc);
1601	val = SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset),
1602	B: bits);
1603	break;
1604	}
1605	if (relaxable(relocs, i) && rel.type == R_LARCH_TLS_IE_PC_HI20 &&
1606	isUInt<`12`>(x: val))
1607	continue;
1608	tlsIeToLe(loc, rel, val);
1609	continue;
1610
1611	case R_LARCH_TLS_DESC_PC_HI20:
1612	case R_LARCH_TLS_DESC_PC_LO12:
1613	case R_LARCH_TLS_DESC_LD:
1614	case R_LARCH_TLS_DESC_PCREL20_S2:
1615	// TLSDESC to LE/IE. Not supported in extreme code model.
1616	if (rel.expr != R_TPREL && rel.expr != RE_LOONGARCH_GOT_PAGE_PC)
1617	break;
1618	if (rel.type == R_LARCH_TLS_DESC_PC_HI20)
1619	isExtreme =
1620	i + `2` < size && relocs [i + `2`].type == R_LARCH_TLS_DESC64_PC_LO20;
1621	if (isExtreme) {
1622	rel.expr = getRelExpr(type: rel.type, s: *rel.sym, loc);
1623	val = SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset),
1624	B: bits);
1625	break;
1626	}
1627	if (relaxable(relocs, i) && (rel.type == R_LARCH_TLS_DESC_PC_HI20 \|\|
1628	rel.type == R_LARCH_TLS_DESC_PC_LO12))
1629	continue;
1630	if (rel.expr == R_TPREL) {
1631	if (relaxable(relocs, i) && rel.type == R_LARCH_TLS_DESC_LD &&
1632	isUInt<`12`>(x: val))
1633	continue;
1634	tlsdescToLe(loc, rel, val);
1635	} else {
1636	tlsdescToIe(loc, rel, val);
1637	}
1638	continue;
1639
1640	case R_LARCH_TLS_DESC_CALL:
1641	if (isExtreme)
1642	continue;
1643	if (rel.expr == R_TPREL)
1644	tlsdescToLe(loc, rel, val);
1645	else
1646	tlsdescToIe(loc, rel, val);
1647	continue;
1648
1649	case R_LARCH_GOT_PC_HI20:
1650	// GOT indirection to PC relative optimization in normal or medium code
1651	// model, whether or not with R_LARCH_RELAX. If the code sequence can be
1652	// relaxed to a single pcaddi, the first instruction will be removed and
1653	// it will not reach here.
1654	if (isPairForGotRels) {
1655	bool isRelax = relaxable(relocs, i);
1656	const Relocation lo12Rel = isRelax ? relocs [i + `2`] : relocs [i + `1`];
1657	if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
1658	tryGotToPCRel(loc, rHi20: rel, rLo12: lo12Rel, secAddr)) {
1659	i += isRelax ? `2` : `1`;
1660	continue;
1661	}
1662	}
1663	break;
1664
1665	default:
1666	break;
1667	}
1668	relocate(loc, rel, val);
1669	}
1670	}
1671
1672	// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
1673	// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
1674	// shrinkage may reduce displacement and make more relocations eligible for
1675	// relaxation. Code shrinkage may increase displacement to a call/load/store
1676	// target at a higher fixed address, invalidating an earlier relaxation. Any
1677	// change in section sizes can have cascading effect and require another
1678	// relaxation pass.
1679	bool LoongArch::relaxOnce(int pass) const {
1680	if (pass == `0`)
1681	initSymbolAnchors(ctx);
1682
1683	SmallVector<InputSection *, `0`> storage;
1684	bool changed = false;
1685	for (OutputSection *osec : ctx.outputSections) {
1686	if (!(osec->flags & SHF_EXECINSTR))
1687	continue;
1688	for (InputSection sec : getInputSections(os: osec, storage))
1689	if (sec->relaxAux)
1690	changed \|= relax(ctx, sec&: *sec);
1691	}
1692	return changed;
1693	}
1694
1695	void LoongArch::finalizeRelax(int passes) const {
1696	Log(ctx) << "relaxation passes: " << passes;
1697	SmallVector<InputSection *, `0`> storage;
1698	for (OutputSection *osec : ctx.outputSections) {
1699	if (!(osec->flags & SHF_EXECINSTR))
1700	continue;
1701	for (InputSection sec : getInputSections(os: osec, storage)) {
1702	if (!sec->relaxAux)
1703	continue;
1704	RelaxAux &aux = *sec->relaxAux;
1705	if (!aux.relocDeltas)
1706	continue;
1707
1708	MutableArrayRef<Relocation> rels = sec->relocs();
1709	ArrayRef<uint8_t> old = sec->content();
1710	size_t newSize = old.size() - aux.relocDeltas [rels.size() - `1`];
1711	size_t writesIdx = `0`;
1712	uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(Num: newSize);
1713	uint64_t offset = `0`;
1714	int64_t delta = `0`;
1715	sec->content_ = p;
1716	sec->size = newSize;
1717	sec->bytesDropped = `0`;
1718
1719	// Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
1720	// instructions for relaxed relocations.
1721	for (size_t i = `0`, e = rels.size(); i != e; ++i) {
1722	uint32_t remove = aux.relocDeltas [i] - delta;
1723	delta = aux.relocDeltas [i];
1724	if (remove == `0` && aux.relocTypes [i] == R_LARCH_NONE)
1725	continue;
1726
1727	// Copy from last location to the current relocated location.
1728	Relocation &r = rels [i];
1729	uint64_t size = r.offset - offset;
1730	memcpy(dest: p, src: old.data() + offset, n: size);
1731	p += size;
1732
1733	int64_t skip = `0`;
1734	if (RelType newType = aux.relocTypes [i]) {
1735	switch (newType) {
1736	case R_LARCH_RELAX:
1737	break;
1738	case R_LARCH_PCREL20_S2:
1739	skip = `4`;
1740	write32le(P: p, V: aux.writes [writesIdx++]);
1741	// RelExpr is needed for relocating.
1742	r.expr = r.sym->hasFlag(bit: NEEDS_PLT) ? R_PLT_PC : R_PC;
1743	break;
1744	case R_LARCH_B26:
1745	case R_LARCH_TLS_LE_LO12_R:
1746	skip = `4`;
1747	write32le(P: p, V: aux.writes [writesIdx++]);
1748	break;
1749	case R_LARCH_TLS_GD_PCREL20_S2:
1750	// Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead
1751	// of R_TLSLD_PC due to historical reasons. In fact, right now TLSLD
1752	// behaves exactly like TLSGD on LoongArch.
1753	//
1754	// This reason has also been mentioned in mold commit:
1755	// https://github.com/rui314/mold/commit/5dfa1cf07c03bd57cb3d493b652ef22441bcd71c
1756	case R_LARCH_TLS_LD_PCREL20_S2:
1757	skip = `4`;
1758	write32le(P: p, V: aux.writes [writesIdx++]);
1759	r.expr = R_TLSGD_PC;
1760	break;
1761	case R_LARCH_TLS_DESC_PCREL20_S2:
1762	skip = `4`;
1763	write32le(P: p, V: aux.writes [writesIdx++]);
1764	r.expr = R_TLSDESC_PC;
1765	break;
1766	default:
1767	llvm_unreachable("unsupported type");
1768	}
1769	}
1770
1771	p += skip;
1772	offset = r.offset + skip + remove;
1773	}
1774	memcpy(dest: p, src: old.data() + offset, n: old.size() - offset);
1775
1776	// Subtract the previous relocDeltas value from the relocation offset.
1777	// For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
1778	// their r_offset by the same delta.
1779	delta = `0`;
1780	for (size_t i = `0`, e = rels.size(); i != e;) {
1781	uint64_t cur = rels [i].offset;
1782	do {
1783	rels [i].offset -= delta;
1784	if (aux.relocTypes [i] != R_LARCH_NONE)
1785	rels [i].type = aux.relocTypes [i];
1786	} while (++i != e && rels [i].offset == cur);
1787	delta = aux.relocDeltas [i - `1`];
1788	}
1789	}
1790	}
1791	}
1792
1793	void elf::setLoongArchTargetInfo(Ctx &ctx) {
1794	ctx.target.reset(p: new LoongArch (ctx));
1795	}
1796

Browse the source code of llvm_projects/lld/ELF/Arch/LoongArch.cpp