PPC64.cpp source code [llvm_projects/lld/ELF/Arch/PPC64.cpp]

1	//===- PPC64.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "RelocScan.h"
12	#include "SymbolTable.h"
13	#include "Symbols.h"
14	#include "SyntheticSections.h"
15	#include "Target.h"
16	#include "Thunks.h"
17
18	using namespace llvm;
19	using namespace llvm::object;
20	using namespace llvm::support::endian;
21	using namespace llvm::ELF;
22	using namespace lld;
23	using namespace lld::elf;
24
25	constexpr uint64_t ppc64TocOffset = `0x8000`;
26	constexpr uint64_t dynamicThreadPointerOffset = `0x8000`;
27
28	namespace {
29	// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
30	// instructions that can be used as part of the initial exec TLS sequence.
31	enum XFormOpcd {
32	LBZX = `87`,
33	LHZX = `279`,
34	LWZX = `23`,
35	LDX = `21`,
36	STBX = `215`,
37	STHX = `407`,
38	STWX = `151`,
39	STDX = `149`,
40	LHAX = `343`,
41	LWAX = `341`,
42	LFSX = `535`,
43	LFDX = `599`,
44	STFSX = `663`,
45	STFDX = `727`,
46	ADD = `266`,
47	};
48
49	enum DFormOpcd {
50	LBZ = `34`,
51	LBZU = `35`,
52	LHZ = `40`,
53	LHZU = `41`,
54	LHAU = `43`,
55	LWZ = `32`,
56	LWZU = `33`,
57	LFSU = `49`,
58	LFDU = `51`,
59	STB = `38`,
60	STBU = `39`,
61	STH = `44`,
62	STHU = `45`,
63	STW = `36`,
64	STWU = `37`,
65	STFSU = `53`,
66	STFDU = `55`,
67	LHA = `42`,
68	LFS = `48`,
69	LFD = `50`,
70	STFS = `52`,
71	STFD = `54`,
72	ADDI = `14`
73	};
74
75	enum DSFormOpcd {
76	LD = `58`,
77	LWA = `58`,
78	STD = `62`
79	};
80
81	constexpr uint32_t NOP = `0x60000000`;
82
83	enum class PPCLegacyInsn : uint32_t {
84	NOINSN = `0`,
85	// Loads.
86	LBZ = `0x88000000`,
87	LHZ = `0xa0000000`,
88	LWZ = `0x80000000`,
89	LHA = `0xa8000000`,
90	LWA = `0xe8000002`,
91	LD = `0xe8000000`,
92	LFS = `0xC0000000`,
93	LXSSP = `0xe4000003`,
94	LFD = `0xc8000000`,
95	LXSD = `0xe4000002`,
96	LXV = `0xf4000001`,
97	LXVP = `0x18000000`,
98
99	// Stores.
100	STB = `0x98000000`,
101	STH = `0xb0000000`,
102	STW = `0x90000000`,
103	STD = `0xf8000000`,
104	STFS = `0xd0000000`,
105	STXSSP = `0xf4000003`,
106	STFD = `0xd8000000`,
107	STXSD = `0xf4000002`,
108	STXV = `0xf4000005`,
109	STXVP = `0x18000001`
110	};
111	enum class PPCPrefixedInsn : uint64_t {
112	NOINSN = `0`,
113	PREFIX_MLS = `0x0610000000000000`,
114	PREFIX_8LS = `0x0410000000000000`,
115
116	// Loads.
117	PLBZ = PREFIX_MLS,
118	PLHZ = PREFIX_MLS,
119	PLWZ = PREFIX_MLS,
120	PLHA = PREFIX_MLS,
121	PLWA = PREFIX_8LS \| `0xa4000000`,
122	PLD = PREFIX_8LS \| `0xe4000000`,
123	PLFS = PREFIX_MLS,
124	PLXSSP = PREFIX_8LS \| `0xac000000`,
125	PLFD = PREFIX_MLS,
126	PLXSD = PREFIX_8LS \| `0xa8000000`,
127	PLXV = PREFIX_8LS \| `0xc8000000`,
128	PLXVP = PREFIX_8LS \| `0xe8000000`,
129
130	// Stores.
131	PSTB = PREFIX_MLS,
132	PSTH = PREFIX_MLS,
133	PSTW = PREFIX_MLS,
134	PSTD = PREFIX_8LS \| `0xf4000000`,
135	PSTFS = PREFIX_MLS,
136	PSTXSSP = PREFIX_8LS \| `0xbc000000`,
137	PSTFD = PREFIX_MLS,
138	PSTXSD = PREFIX_8LS \| `0xb8000000`,
139	PSTXV = PREFIX_8LS \| `0xd8000000`,
140	PSTXVP = PREFIX_8LS \| `0xf8000000`
141	};
142
143	static bool checkPPCLegacyInsn(uint32_t encoding) {
144	PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
145	if (insn == PPCLegacyInsn::NOINSN)
146	return false;
147	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
148	if (insn == PPCLegacyInsn::Legacy) \
149	return true;
150	#include "PPCInsns.def"
151	#undef PCREL_OPT
152	return false;
153	}
154
155	// Masks to apply to legacy instructions when converting them to prefixed,
156	// pc-relative versions. For the most part, the primary opcode is shared
157	// between the legacy instruction and the suffix of its prefixed version.
158	// However, there are some instances where that isn't the case (DS-Form and
159	// DQ-form instructions).
160	enum class LegacyToPrefixMask : uint64_t {
161	NOMASK = `0x0`,
162	OPC_AND_RST = `0xffe00000`, // Primary opc (0-5) and R[ST] (6-10).
163	ONLY_RST = `0x3e00000`, // [RS]T (6-10).
164	ST_STX28_TO5 =
165	`0x8000000003e00000`, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
166	};
167
168	class PPC64 final : public TargetInfo {
169	public:
170	PPC64(Ctx &);
171	uint32_t calcEFlags() const override;
172	void initTargetSpecificSections() override;
173	RelExpr getRelExpr(RelType type, const Symbol &s,
174	const uint8_t loc) const* override;
175	RelType getDynRel(RelType type) const override;
176	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
177	void writePltHeader(uint8_t buf) const* override;
178	void writePlt(uint8_t buf, const* Symbol &sym,
179	uint64_t pltEntryAddr) const override;
180	void writeIplt(uint8_t buf, const* Symbol &sym,
181	uint64_t pltEntryAddr) const override;
182	template <class ELFT, class RelTy>
183	void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
184	void scanSection(InputSectionBase &) override;
185	void relocate(uint8_t loc, const* Relocation &rel,
186	uint64_t val) const override;
187	void writeGotHeader(uint8_t buf) const* override;
188	bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
189	uint64_t branchAddr, const Symbol &s,
190	int64_t a) const override;
191	uint32_t getThunkSectionSpacing() const override;
192	bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
193	RelExpr adjustGotPcExpr(RelType type, int64_t addend,
194	const uint8_t loc) const* override;
195	void relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val) const;
196	void relocateAlloc(InputSection &sec, uint8_t buf) const* override;
197
198	bool adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
199	uint8_t stOther) const override;
200
201	private:
202	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
203	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
204	void relaxTlsLdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
205	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
206	};
207	} // namespace
208
209	uint64_t elf::getPPC64TocBase(Ctx &ctx) {
210	// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
211	// TOC starts where the first of these sections starts. We always create a
212	// .got when we see a relocation that uses it, so for us the start is always
213	// the .got.
214	uint64_t tocVA = ctx.in.got ->getVA();
215
216	// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
217	// thus permitting a full 64 Kbytes segment. Note that the glibc startup
218	// code (crt1.o) assumes that you can get from the TOC base to the
219	// start of the .toc section with only a single (signed) 16-bit relocation.
220	return tocVA + ppc64TocOffset;
221	}
222
223	unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(Ctx &ctx, uint8_t stOther) {
224	// The offset is encoded into the 3 most significant bits of the st_other
225	// field, with some special values described in section 3.4.1 of the ABI:
226	// 0 --> Zero offset between the GEP and LEP, and the function does NOT use
227	// the TOC pointer (r2). r2 will hold the same value on returning from
228	// the function as it did on entering the function.
229	// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
230	// caller-saved register for all callers.
231	// 2-6 --> The binary logarithm of the offset eg:
232	// 2 --> 2^2 = 4 bytes --> 1 instruction.
233	// 6 --> 2^6 = 64 bytes --> 16 instructions.
234	// 7 --> Reserved.
235	uint8_t gepToLep = (stOther >> `5`) & `7`;
236	if (gepToLep < `2`)
237	return `0`;
238
239	// The value encoded in the st_other bits is the
240	// log-base-2(offset).
241	if (gepToLep < `7`)
242	return `1` << gepToLep;
243
244	ErrAlways(ctx)
245	<< "reserved value of 7 in the 3 most-significant-bits of st_other";
246	return `0`;
247	}
248
249	void elf::writePrefixedInst(Ctx &ctx, uint8_t *loc, uint64_t insn) {
250	insn = ctx.arg.isLE ? insn << `32` \| insn >> `32` : insn;
251	write64(ctx, p: loc, v: insn);
252	}
253
254	static bool addOptional(Ctx &ctx, StringRef name, uint64_t value,
255	std::vector<Defined *> &defined) {
256	Symbol *sym = ctx.symtab ->find(name);
257	if (!sym \|\| sym->isDefined())
258	return false;
259	sym->resolve(ctx, other: Defined {ctx, ctx.internalFile, StringRef (), STB_GLOBAL,
260	STV_HIDDEN, STT_FUNC, value,
261	/size=/`0`, /section=/nullptr});
262	defined.push_back(x: cast<Defined>(Val: sym));
263	return true;
264	}
265
266	// If from is 14, write ${prefix}14: firstInsn; ${prefix}15:
267	// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)0x200008; $tail*
268	// The labels are defined only if they exist in the symbol table.
269	static void writeSequence(Ctx &ctx, const char prefix, int* from,
270	uint32_t firstInsn, ArrayRef<uint32_t> tail) {
271	std::vector<Defined *> defined;
272	char name[`16`];
273	int first;
274	const size_t size = `32` - from + tail.size();
275	MutableArrayRef<uint32_t> buf(ctx.bAlloc.Allocate<uint32_t>(Num: size), size);
276	uint32_t *ptr = buf.data();
277	for (int r = from; r < `32`; ++r) {
278	format(Fmt: "%s%d", Vals: prefix, Vals: r).snprint(Buffer: name, BufferSize: sizeof(name));
279	if (addOptional(ctx, name, value: `4` * (r - from), defined) && defined.size() == `1`)
280	first = r - from;
281	write32(ctx, p: ptr++, v: firstInsn + `0x200008` * (r - from));
282	}
283	for (uint32_t insn : tail)
284	write32(ctx, p: ptr++, v: insn);
285	assert(ptr == &*buf.end());
286
287	if (defined.empty())
288	return;
289	// The full section content has the extent of [begin, end). We drop unused
290	// instructions and write [first,end).
291	auto *sec = make<InputSection>(
292	args&: ctx.internalFile, args: ".text", args: SHT_PROGBITS, args: SHF_ALLOC, /addralign=/args: `4`,
293	/entsize=/args: `0`,
294	args: ArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first),
295	`4` * (buf.size() - first)));
296	ctx.inputSections.push_back(Elt: sec);
297	for (Defined *sym : defined) {
298	sym->section = sec;
299	sym->value -= `4` * first;
300	}
301	}
302
303	// Implements some save and restore functions as described by ELF V2 ABI to be
304	// compatible with GCC. With GCC -Os, when the number of call-saved registers
305	// exceeds a certain threshold, GCC generates _savegpr0_ _restgpr0_* calls and*
306	// expects the linker to define them. See
307	// https://sourceware.org/pipermail/binutils/2002-February/017444.html and
308	// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is
309	// weird because libgcc.a would be the natural place. The linker generation
310	// approach has the advantage that the linker can generate multiple copies to
311	// avoid long branch thunks. However, we don't consider the advantage
312	// significant enough to complicate our trunk implementation, so we take the
313	// simple approach and synthesize .text sections providing the implementation.
314	void elf::addPPC64SaveRestore(Ctx &ctx) {
315	constexpr uint32_t blr = `0x4e800020`, mtlr_0 = `0x7c0803a6`;
316
317	// _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ...
318	// Tail: ld 0, 16(1); mtlr 0; blr
319	writeSequence(ctx, prefix: "_restgpr0_", from: `14`, firstInsn: `0xe9c1ff70`, tail: {`0xe8010010`, mtlr_0, blr});
320	// _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ...
321	// Tail: blr
322	writeSequence(ctx, prefix: "_restgpr1_", from: `14`, firstInsn: `0xe9ccff70`, tail: {blr});
323	// _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ...
324	// Tail: std 0, 16(1); blr
325	writeSequence(ctx, prefix: "_savegpr0_", from: `14`, firstInsn: `0xf9c1ff70`, tail: {`0xf8010010`, blr});
326	// _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ...
327	// Tail: blr
328	writeSequence(ctx, prefix: "_savegpr1_", from: `14`, firstInsn: `0xf9ccff70`, tail: {blr});
329	}
330
331	// Find the R_PPC64_ADDR64 in .rela.toc with matching offset.
332	template <typename ELFT>
333	static std::pair<Defined *, int64_t>
334	getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
335	// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by
336	// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the
337	// relocation index in most cases.
338	//
339	// In rare cases a TOC entry may store a constant that doesn't need an
340	// R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8
341	// points to a relocation with larger r_offset. Do a linear probe then.
342	// Constants are extremely uncommon in .toc and the extra number of array
343	// accesses can be seen as a small constant.
344	ArrayRef<typename ELFT::Rela> relas =
345	tocSec->template relsOrRelas<ELFT>().relas;
346	if (relas.empty())
347	return {};
348	uint64_t index = std::min<uint64_t>(offset / `8`, relas.size() - `1`);
349	for (;;) {
350	if (relas[index].r_offset == offset) {
351	Symbol &sym = tocSec->file->getRelocTargetSym(relas[index]);
352	return {dyn_cast<Defined>(Val: &sym), getAddend<ELFT>(relas[index])};
353	}
354	if (relas[index].r_offset < offset \|\| index == `0`)
355	break;
356	--index;
357	}
358	return {};
359	}
360
361	// When accessing a symbol defined in another translation unit, compilers
362	// reserve a .toc entry, allocate a local label and generate toc-indirect
363	// instructions:
364	//
365	// addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA
366	// ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
367	// ld/lwa 3, 0(3) # load the value from the address
368	//
369	// .section .toc,"aw",@progbits
370	// .LC0: .tc var[TC],var
371	//
372	// If var is defined, non-preemptable and addressable with a 32-bit signed
373	// offset from the toc base, the address of var can be computed by adding an
374	// offset to the toc base, saving a load.
375	//
376	// addis 3,2,var@toc@ha # this may be relaxed to a nop,
377	// addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc
378	// ld/lwa 3, 0(3) # load the value from the address
379	//
380	// Returns true if the relaxation is performed.
381	static bool tryRelaxPPC64TocIndirection(Ctx &ctx, const Relocation &rel,
382	uint8_t *bufLoc) {
383	assert(ctx.arg.tocOptimize);
384	if (rel.addend < `0`)
385	return false;
386
387	// If the symbol is not the .toc section, this isn't a toc-indirection.
388	Defined *defSym = dyn_cast<Defined>(Val: rel.sym);
389	if (!defSym \|\| !defSym->isSection() \|\| defSym->section->name != ".toc")
390	return false;
391
392	Defined *d;
393	int64_t addend;
394	auto *tocISB = cast<InputSectionBase>(Val: defSym->section);
395	std::tie(args&: d, args&: addend) =
396	ctx.arg.isLE ? getRelaTocSymAndAddend<ELF64LE>(tocSec: tocISB, offset: rel.addend)
397	: getRelaTocSymAndAddend<ELF64BE>(tocSec: tocISB, offset: rel.addend);
398
399	// Only non-preemptable defined symbols can be relaxed.
400	if (!d \|\| d->isPreemptible)
401	return false;
402
403	// R_PPC64_ADDR64 should have created a canonical PLT for the non-preemptable
404	// ifunc and changed its type to STT_FUNC.
405	assert(!d->isGnuIFunc());
406
407	// Two instructions can materialize a 32-bit signed offset from the toc base.
408	uint64_t tocRelative = d->getVA(ctx, addend) - getPPC64TocBase(ctx);
409	if (!isInt<`32`>(x: tocRelative))
410	return false;
411
412	// Add PPC64TocOffset that will be subtracted by PPC64::relocate().
413	static_cast<const PPC64 &>(*ctx.target)
414	.relaxGot(loc: bufLoc, rel, val: tocRelative + ppc64TocOffset);
415	return true;
416	}
417
418	// Relocation masks following the #lo(value), #hi(value), #ha(value),
419	// #higher(value), #highera(value), #highest(value), and #highesta(value)
420	// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
421	// document.
422	static uint16_t lo(uint64_t v) { return v; }
423	static uint16_t hi(uint64_t v) { return v >> `16`; }
424	static uint64_t ha(uint64_t v) { return (v + `0x8000`) >> `16`; }
425	static uint16_t higher(uint64_t v) { return v >> `32`; }
426	static uint16_t highera(uint64_t v) { return (v + `0x8000`) >> `32`; }
427	static uint16_t highest(uint64_t v) { return v >> `48`; }
428	static uint16_t highesta(uint64_t v) { return (v + `0x8000`) >> `48`; }
429
430	// Extracts the 'PO' field of an instruction encoding.
431	static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> `26`); }
432
433	static bool isDQFormInstruction(uint32_t encoding) {
434	switch (getPrimaryOpCode(encoding)) {
435	default:
436	return false;
437	case `6`: // Power10 paired loads/stores (lxvp, stxvp).
438	case `56`:
439	// The only instruction with a primary opcode of 56 is `lq`.
440	return true;
441	case `61`:
442	// There are both DS and DQ instruction forms with this primary opcode.
443	// Namely `lxv` and `stxv` are the DQ-forms that use it.
444	// The DS 'XO' bits being set to 01 is restricted to DQ form.
445	return (encoding & `3`) == `0x1`;
446	}
447	}
448
449	static bool isDSFormInstruction(PPCLegacyInsn insn) {
450	switch (insn) {
451	default:
452	return false;
453	case PPCLegacyInsn::LWA:
454	case PPCLegacyInsn::LD:
455	case PPCLegacyInsn::LXSD:
456	case PPCLegacyInsn::LXSSP:
457	case PPCLegacyInsn::STD:
458	case PPCLegacyInsn::STXSD:
459	case PPCLegacyInsn::STXSSP:
460	return true;
461	}
462	}
463
464	static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {
465	uint32_t opc = encoding & `0xfc000000`;
466
467	// If the primary opcode is shared between multiple instructions, we need to
468	// fix it up to match the actual instruction we are after.
469	if ((opc == `0xe4000000` \|\| opc == `0xe8000000` \|\| opc == `0xf4000000` \|\|
470	opc == `0xf8000000`) &&
471	!isDQFormInstruction(encoding))
472	opc = encoding & `0xfc000003`;
473	else if (opc == `0xf4000000`)
474	opc = encoding & `0xfc000007`;
475	else if (opc == `0x18000000`)
476	opc = encoding & `0xfc00000f`;
477
478	// If the value is not one of the enumerators in PPCLegacyInsn, we want to
479	// return PPCLegacyInsn::NOINSN.
480	if (!checkPPCLegacyInsn(encoding: opc))
481	return PPCLegacyInsn::NOINSN;
482	return static_cast<PPCLegacyInsn>(opc);
483	}
484
485	static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {
486	switch (insn) {
487	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
488	case PPCLegacyInsn::Legacy: \
489	return PPCPrefixedInsn::PCRel
490	#include "PPCInsns.def"
491	#undef PCREL_OPT
492	}
493	return PPCPrefixedInsn::NOINSN;
494	}
495
496	static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {
497	switch (insn) {
498	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
499	case PPCLegacyInsn::Legacy: \
500	return LegacyToPrefixMask::InsnMask
501	#include "PPCInsns.def"
502	#undef PCREL_OPT
503	}
504	return LegacyToPrefixMask::NOMASK;
505	}
506	static uint64_t getPCRelativeForm(uint32_t encoding) {
507	PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);
508	PPCPrefixedInsn pcrelInsn = getPCRelativeForm(insn: origInsn);
509	if (pcrelInsn == PPCPrefixedInsn::NOINSN)
510	return UINT64_C(-`1`);
511	LegacyToPrefixMask origInsnMask = getInsnMask(insn: origInsn);
512	uint64_t pcrelEncoding =
513	(uint64_t)pcrelInsn \| (encoding & (uint64_t)origInsnMask);
514
515	// If the mask requires moving bit 28 to bit 5, do that now.
516	if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)
517	pcrelEncoding \|= (encoding & `0x8`) << `23`;
518	return pcrelEncoding;
519	}
520
521	static bool isInstructionUpdateForm(uint32_t encoding) {
522	switch (getPrimaryOpCode(encoding)) {
523	default:
524	return false;
525	case LBZU:
526	case LHAU:
527	case LHZU:
528	case LWZU:
529	case LFSU:
530	case LFDU:
531	case STBU:
532	case STHU:
533	case STWU:
534	case STFSU:
535	case STFDU:
536	return true;
537	// LWA has the same opcode as LD, and the DS bits is what differentiates
538	// between LD/LDU/LWA
539	case LD:
540	case STD:
541	return (encoding & `3`) == `1`;
542	}
543	}
544
545	// Compute the total displacement between the prefixed instruction that gets
546	// to the start of the data and the load/store instruction that has the offset
547	// into the data structure.
548	// For example:
549	// paddi 3, 0, 1000, 1
550	// lwz 3, 20(3)
551	// Should add up to 1020 for total displacement.
552	static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {
553	int64_t disp34 = llvm::SignExtend64(
554	X: ((prefixedInsn & `0x3ffff00000000`) >> `16`) \| (prefixedInsn & `0xffff`), B: `34`);
555	int32_t disp16 = llvm::SignExtend32(X: accessInsn & `0xffff`, B: `16`);
556	// For DS and DQ form instructions, we need to mask out the XO bits.
557	if (isDQFormInstruction(encoding: accessInsn))
558	disp16 &= ~`0xf`;
559	else if (isDSFormInstruction(insn: getPPCLegacyInsn(encoding: accessInsn)))
560	disp16 &= ~`0x3`;
561	return disp34 + disp16;
562	}
563
564	// There are a number of places when we either want to read or write an
565	// instruction when handling a half16 relocation type. On big-endian the buffer
566	// pointer is pointing into the middle of the word we want to extract, and on
567	// little-endian it is pointing to the start of the word. These 2 helpers are to
568	// simplify reading and writing in that context.
569	static void writeFromHalf16(Ctx &ctx, uint8_t *loc, uint32_t insn) {
570	write32(ctx, p: ctx.arg.isLE ? loc : loc - `2`, v: insn);
571	}
572
573	static uint32_t readFromHalf16(Ctx &ctx, const uint8_t *loc) {
574	return read32(ctx, p: ctx.arg.isLE ? loc : loc - `2`);
575	}
576
577	static uint64_t readPrefixedInst(Ctx &ctx, const uint8_t *loc) {
578	uint64_t fullInstr = read64(ctx, p: loc);
579	return ctx.arg.isLE ? (fullInstr << `32` \| fullInstr >> `32`) : fullInstr;
580	}
581
582	PPC64::PPC64(Ctx &ctx) : TargetInfo (ctx) {
583	copyRel = R_PPC64_COPY;
584	gotRel = R_PPC64_GLOB_DAT;
585	pltRel = R_PPC64_JMP_SLOT;
586	relativeRel = R_PPC64_RELATIVE;
587	iRelativeRel = R_PPC64_IRELATIVE;
588	symbolicRel = R_PPC64_ADDR64;
589	pltHeaderSize = `60`;
590	pltEntrySize = `4`;
591	ipltEntrySize = `16`; // PPC64PltCallStub::size
592	gotHeaderEntriesNum = `1`;
593	gotPltHeaderEntriesNum = `2`;
594	needsThunks = true;
595
596	tlsModuleIndexRel = R_PPC64_DTPMOD64;
597	tlsOffsetRel = R_PPC64_DTPREL64;
598
599	tlsGotRel = R_PPC64_TPREL64;
600
601	needsMoreStackNonSplit = false;
602
603	// We need 64K pages (at least under glibc/Linux, the loader won't
604	// set different permissions on a finer granularity than that).
605	defaultMaxPageSize = `65536`;
606
607	// The PPC64 ELF ABI v1 spec, says:
608	//
609	// It is normally desirable to put segments with different characteristics
610	// in separate 256 Mbyte portions of the address space, to give the
611	// operating system full paging flexibility in the 64-bit address space.
612	//
613	// And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers
614	// use 0x10000000 as the starting address.
615	defaultImageBase = `0x10000000`;
616
617	write32(ctx, p: trapInstr.data(), v: `0x7fe00008`);
618	}
619
620	static uint32_t getEFlags(InputFile *file) {
621	if (file->ekind == ELF64BEKind)
622	return cast<ObjFile<ELF64BE>>(Val: file)->getObj().getHeader().e_flags;
623	return cast<ObjFile<ELF64LE>>(Val: file)->getObj().getHeader().e_flags;
624	}
625
626	// This file implements v2 ABI. This function makes sure that all
627	// object files have v2 or an unspecified version as an ABI version.
628	uint32_t PPC64::calcEFlags() const {
629	for (InputFile *f : ctx.objectFiles) {
630	uint32_t flag = getEFlags(file: f);
631	if (flag == `1`)
632	ErrAlways(ctx) << f << ": ABI version 1 is not supported";
633	else if (flag > `2`)
634	ErrAlways(ctx) << f << ": unrecognized e_flags: " << flag;
635	}
636	return `2`;
637	}
638
639	void PPC64::relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val) const {
640	switch (rel.type) {
641	case R_PPC64_TOC16_HA:
642	// Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".
643	relocate(loc, rel, val);
644	break;
645	case R_PPC64_TOC16_LO_DS: {
646	// Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or
647	// "addi reg, 2, var@toc".
648	uint32_t insn = readFromHalf16(ctx, loc);
649	if (getPrimaryOpCode(encoding: insn) != LD)
650	ErrAlways(ctx)
651	<< "expected a 'ld' for got-indirect to toc-relative relaxing";
652	writeFromHalf16(ctx, loc, insn: (insn & `0x03ffffff`) \| `0x38000000`);
653	relocateNoSym(loc, type: R_PPC64_TOC16_LO, val);
654	break;
655	}
656	case R_PPC64_GOT_PCREL34: {
657	// Clear the first 8 bits of the prefix and the first 6 bits of the
658	// instruction (the primary opcode).
659	uint64_t insn = readPrefixedInst(ctx, loc);
660	if ((insn & `0xfc000000`) != `0xe4000000`)
661	ErrAlways(ctx)
662	<< "expected a 'pld' for got-indirect to pc-relative relaxing";
663	insn &= ~`0xff000000fc000000`;
664
665	// Replace the cleared bits with the values for PADDI (0x600000038000000);
666	insn \|= `0x600000038000000`;
667	writePrefixedInst(ctx, loc, insn);
668	relocate(loc, rel, val);
669	break;
670	}
671	case R_PPC64_PCREL_OPT: {
672	// We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
673	// be relaxed. The eligibility for the relaxation needs to be determined
674	// on that relocation since this one does not relocate a symbol.
675	uint64_t insn = readPrefixedInst(ctx, loc);
676	uint32_t accessInsn = read32(ctx, p: loc + rel.addend);
677	uint64_t pcRelInsn = getPCRelativeForm(encoding: accessInsn);
678
679	// This error is not necessary for correctness but is emitted for now
680	// to ensure we don't miss these opportunities in real code. It can be
681	// removed at a later date.
682	if (pcRelInsn == UINT64_C(-`1`)) {
683	Err(ctx)
684	<< "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x"
685	<< utohexstr(X: accessInsn, LowerCase: true);
686	break;
687	}
688
689	int64_t totalDisp = getTotalDisp(prefixedInsn: insn, accessInsn);
690	if (!isInt<`34`>(x: totalDisp))
691	break; // Displacement doesn't fit.
692	// Convert the PADDI to the prefixed version of accessInsn and convert
693	// accessInsn to a nop.
694	writePrefixedInst(ctx, loc,
695	insn: pcRelInsn \| ((totalDisp & `0x3ffff0000`) << `16`) \|
696	(totalDisp & `0xffff`));
697	write32(ctx, p: loc + rel.addend, v: NOP); // nop accessInsn.
698	break;
699	}
700	default:
701	llvm_unreachable("unexpected relocation type");
702	}
703	}
704
705	void PPC64::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
706	uint64_t val) const {
707	// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
708	// The general dynamic code sequence for a global `x` will look like:
709	// Instruction Relocation Symbol
710	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
711	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
712	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
713	// R_PPC64_REL24 __tls_get_addr
714	// nop None None
715
716	// Relaxing to local exec entails converting:
717	// addis r3, r2, x@got@tlsgd@ha into nop
718	// addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha
719	// bl __tls_get_addr(x@tlsgd) into nop
720	// nop into addi r3, r3, x@tprel@l
721
722	switch (rel.type) {
723	case R_PPC64_GOT_TLSGD16_HA:
724	writeFromHalf16(ctx, loc, insn: NOP);
725	break;
726	case R_PPC64_GOT_TLSGD16:
727	case R_PPC64_GOT_TLSGD16_LO:
728	writeFromHalf16(ctx, loc, insn: `0x3c6d0000`); // addis r3, r13
729	relocateNoSym(loc, type: R_PPC64_TPREL16_HA, val);
730	break;
731	case R_PPC64_GOT_TLSGD_PCREL34:
732	// Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to
733	// paddi r3, r13, x@tprel, 0
734	writePrefixedInst(ctx, loc, insn: `0x06000000386d0000`);
735	relocateNoSym(loc, type: R_PPC64_TPREL34, val);
736	break;
737	case R_PPC64_TLSGD: {
738	// PC Relative Relaxation:
739	// Relax from bl __tls_get_addr@notoc(x@tlsgd) to
740	// nop
741	// TOC Relaxation:
742	// Relax from bl __tls_get_addr(x@tlsgd)
743	// nop
744	// to
745	// nop
746	// addi r3, r3, x@tprel@l
747	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
748	if (locAsInt % `4` == `0`) {
749	write32(ctx, p: loc, v: NOP); // nop
750	write32(ctx, p: loc + `4`, v: `0x38630000`); // addi r3, r3
751	// Since we are relocating a half16 type relocation and Loc + 4 points to
752	// the start of an instruction we need to advance the buffer by an extra
753	// 2 bytes on BE.
754	relocateNoSym(loc: loc + `4` + (ctx.arg.ekind == ELF64BEKind ? `2` : `0`),
755	type: R_PPC64_TPREL16_LO, val);
756	} else if (locAsInt % `4` == `1`) {
757	write32(ctx, p: loc - `1`, v: NOP);
758	} else {
759	Err(ctx) << "R_PPC64_TLSGD has unexpected byte alignment";
760	}
761	break;
762	}
763	default:
764	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
765	}
766	}
767
768	void PPC64::relaxTlsLdToLe(uint8_t loc, const* Relocation &rel,
769	uint64_t val) const {
770	// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
771	// The local dynamic code sequence for a global `x` will look like:
772	// Instruction Relocation Symbol
773	// addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x
774	// addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x
775	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x
776	// R_PPC64_REL24 __tls_get_addr
777	// nop None None
778
779	// Relaxing to local exec entails converting:
780	// addis r3, r2, x@got@tlsld@ha into nop
781	// addi r3, r3, x@got@tlsld@l into addis r3, r13, 0
782	// bl __tls_get_addr(x@tlsgd) into nop
783	// nop into addi r3, r3, 4096
784
785	switch (rel.type) {
786	case R_PPC64_GOT_TLSLD16_HA:
787	writeFromHalf16(ctx, loc, insn: NOP);
788	break;
789	case R_PPC64_GOT_TLSLD16_LO:
790	writeFromHalf16(ctx, loc, insn: `0x3c6d0000`); // addis r3, r13, 0
791	break;
792	case R_PPC64_GOT_TLSLD_PCREL34:
793	// Relax from paddi r3, 0, x1@got@tlsld@pcrel, 1 to
794	// paddi r3, r13, 0x1000, 0
795	writePrefixedInst(ctx, loc, insn: `0x06000000386d1000`);
796	break;
797	case R_PPC64_TLSLD: {
798	// PC Relative Relaxation:
799	// Relax from bl __tls_get_addr@notoc(x@tlsld)
800	// to
801	// nop
802	// TOC Relaxation:
803	// Relax from bl __tls_get_addr(x@tlsld)
804	// nop
805	// to
806	// nop
807	// addi r3, r3, 4096
808	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
809	if (locAsInt % `4` == `0`) {
810	write32(ctx, p: loc, v: NOP);
811	write32(ctx, p: loc + `4`, v: `0x38631000`); // addi r3, r3, 4096
812	} else if (locAsInt % `4` == `1`) {
813	write32(ctx, p: loc - `1`, v: NOP);
814	} else {
815	Err(ctx) << "R_PPC64_TLSLD has unexpected byte alignment";
816	}
817	break;
818	}
819	default:
820	llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
821	}
822	}
823
824	// Map X-Form instructions to their DS-Form counterparts, if applicable.
825	// The full encoding is returned here to distinguish between the different
826	// DS-Form instructions.
827	unsigned elf::getPPCDSFormOp(unsigned secondaryOp) {
828	switch (secondaryOp) {
829	case LWAX:
830	return (LWA << `26`) \| `0x2`;
831	case LDX:
832	return LD << `26`;
833	case STDX:
834	return STD << `26`;
835	default:
836	return `0`;
837	}
838	}
839
840	unsigned elf::getPPCDFormOp(unsigned secondaryOp) {
841	switch (secondaryOp) {
842	case LBZX:
843	return LBZ << `26`;
844	case LHZX:
845	return LHZ << `26`;
846	case LWZX:
847	return LWZ << `26`;
848	case STBX:
849	return STB << `26`;
850	case STHX:
851	return STH << `26`;
852	case STWX:
853	return STW << `26`;
854	case LHAX:
855	return LHA << `26`;
856	case LFSX:
857	return LFS << `26`;
858	case LFDX:
859	return LFD << `26`;
860	case STFSX:
861	return STFS << `26`;
862	case STFDX:
863	return STFD << `26`;
864	case ADD:
865	return ADDI << `26`;
866	default:
867	return `0`;
868	}
869	}
870
871	void PPC64::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
872	uint64_t val) const {
873	// The initial exec code sequence for a global `x` will look like:
874	// Instruction Relocation Symbol
875	// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
876	// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
877	// add r9, r9, x@tls R_PPC64_TLS x
878
879	// Relaxing to local exec entails converting:
880	// addis r9, r2, x@got@tprel@ha into nop
881	// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
882	// add r9, r9, x@tls into addi r9, r9, x@tprel@l
883
884	// x@tls R_PPC64_TLS is a relocation which does not compute anything,
885	// it is replaced with r13 (thread pointer).
886
887	// The add instruction in the initial exec sequence has multiple variations
888	// that need to be handled. If we are building an address it will use an add
889	// instruction, if we are accessing memory it will use any of the X-form
890	// indexed load or store instructions.
891
892	unsigned offset = (ctx.arg.ekind == ELF64BEKind) ? `2` : `0`;
893	switch (rel.type) {
894	case R_PPC64_GOT_TPREL16_HA:
895	write32(ctx, p: loc - offset, v: NOP);
896	break;
897	case R_PPC64_GOT_TPREL16_LO_DS:
898	case R_PPC64_GOT_TPREL16_DS: {
899	uint32_t regNo = read32(ctx, p: loc - offset) & `0x03e00000`; // bits 6-10
900	write32(ctx, p: loc - offset, v: `0x3c0d0000` \| regNo); // addis RegNo, r13
901	relocateNoSym(loc, type: R_PPC64_TPREL16_HA, val);
902	break;
903	}
904	case R_PPC64_GOT_TPREL_PCREL34: {
905	const uint64_t pldRT = readPrefixedInst(ctx, loc) & `0x0000000003e00000`;
906	// paddi RT(from pld), r13, symbol@tprel, 0
907	writePrefixedInst(ctx, loc, insn: `0x06000000380d0000` \| pldRT);
908	relocateNoSym(loc, type: R_PPC64_TPREL34, val);
909	break;
910	}
911	case R_PPC64_TLS: {
912	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
913	if (locAsInt % `4` == `0`) {
914	uint32_t primaryOp = getPrimaryOpCode(encoding: read32(ctx, p: loc));
915	if (primaryOp != `31`)
916	ErrAlways(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
917	uint32_t secondaryOp = (read32(ctx, p: loc) & `0x000007fe`) >> `1`; // bits 21-30
918	uint32_t dFormOp = getPPCDFormOp(secondaryOp);
919	uint32_t finalReloc;
920	if (dFormOp == `0`) { // Expecting a DS-Form instruction.
921	dFormOp = getPPCDSFormOp(secondaryOp);
922	if (dFormOp == `0`)
923	ErrAlways(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
924	finalReloc = R_PPC64_TPREL16_LO_DS;
925	} else
926	finalReloc = R_PPC64_TPREL16_LO;
927	write32(ctx, p: loc, v: dFormOp \| (read32(ctx, p: loc) & `0x03ff0000`));
928	relocateNoSym(loc: loc + offset, type: finalReloc, val);
929	} else if (locAsInt % `4` == `1`) {
930	// If the offset is not 4 byte aligned then we have a PCRel type reloc.
931	// This version of the relocation is offset by one byte from the
932	// instruction it references.
933	uint32_t tlsInstr = read32(ctx, p: loc - `1`);
934	uint32_t primaryOp = getPrimaryOpCode(encoding: tlsInstr);
935	if (primaryOp != `31`)
936	Err(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
937	uint32_t secondaryOp = (tlsInstr & `0x000007FE`) >> `1`; // bits 21-30
938	// The add is a special case and should be turned into a nop. The paddi
939	// that comes before it will already have computed the address of the
940	// symbol.
941	if (secondaryOp == `266`) {
942	// Check if the add uses the same result register as the input register.
943	uint32_t rt = (tlsInstr & `0x03E00000`) >> `21`; // bits 6-10
944	uint32_t ra = (tlsInstr & `0x001F0000`) >> `16`; // bits 11-15
945	if (ra == rt) {
946	write32(ctx, p: loc - `1`, v: NOP);
947	} else {
948	// mr rt, ra
949	write32(ctx, p: loc - `1`,
950	v: `0x7C000378` \| (rt << `16`) \| (ra << `21`) \| (ra << `11`));
951	}
952	} else {
953	uint32_t dFormOp = getPPCDFormOp(secondaryOp);
954	if (dFormOp == `0`) { // Expecting a DS-Form instruction.
955	dFormOp = getPPCDSFormOp(secondaryOp);
956	if (dFormOp == `0`)
957	Err(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
958	}
959	write32(ctx, p: loc - `1`, v: (dFormOp \| (tlsInstr & `0x03ff0000`)));
960	}
961	} else {
962	Err(ctx) << "R_PPC64_TLS must be either 4 byte aligned or one byte "
963	"offset from 4 byte aligned";
964	}
965	break;
966	}
967	default:
968	llvm_unreachable("unknown relocation for IE to LE");
969	break;
970	}
971	}
972
973	void PPC64::initTargetSpecificSections() {
974	ctx.in.ppc64LongBranchTarget =
975	std::make_unique<PPC64LongBranchTargetSection>(args&: ctx);
976	ctx.inputSections.push_back(Elt: ctx.in.ppc64LongBranchTarget.get());
977	}
978
979	// Only needed to support relocations used by relocateNonAlloc and relocateEh.
980	RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
981	const uint8_t loc) const* {
982	switch (type) {
983	case R_PPC64_NONE:
984	return R_NONE;
985	case R_PPC64_ADDR16:
986	case R_PPC64_ADDR32:
987	case R_PPC64_ADDR64:
988	return R_ABS;
989	case R_PPC64_REL32:
990	case R_PPC64_REL64:
991	return R_PC;
992	case R_PPC64_DTPREL64:
993	return R_DTPREL;
994	default:
995	Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
996	<< ") against symbol " << &s;
997	return R_NONE;
998	}
999	}
1000
1001	RelType PPC64::getDynRel(RelType type) const {
1002	if (type == R_PPC64_ADDR64 \|\| type == R_PPC64_TOC)
1003	return R_PPC64_ADDR64;
1004	return R_PPC64_NONE;
1005	}
1006
1007	int64_t PPC64::getImplicitAddend(const uint8_t buf, RelType type) const* {
1008	switch (type) {
1009	case R_PPC64_NONE:
1010	case R_PPC64_GLOB_DAT:
1011	case R_PPC64_JMP_SLOT:
1012	return `0`;
1013	case R_PPC64_REL32:
1014	return SignExtend64<`32`>(x: read32(ctx, p: buf));
1015	case R_PPC64_ADDR64:
1016	case R_PPC64_REL64:
1017	case R_PPC64_RELATIVE:
1018	case R_PPC64_IRELATIVE:
1019	case R_PPC64_DTPMOD64:
1020	case R_PPC64_DTPREL64:
1021	case R_PPC64_TPREL64:
1022	return read64(ctx, p: buf);
1023	default:
1024	InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
1025	return `0`;
1026	}
1027	}
1028
1029	void PPC64::writeGotHeader(uint8_t buf) const* {
1030	write64(ctx, p: buf, v: getPPC64TocBase(ctx));
1031	}
1032
1033	void PPC64::writePltHeader(uint8_t buf) const* {
1034	// The generic resolver stub goes first.
1035	write32(ctx, p: buf + `0`, v: `0x7c0802a6`); // mflr r0
1036	write32(ctx, p: buf + `4`, v: `0x429f0005`); // bcl 20,4cr7+so,8 <_glink+0x8>*
1037	write32(ctx, p: buf + `8`, v: `0x7d6802a6`); // mflr r11
1038	write32(ctx, p: buf + `12`, v: `0x7c0803a6`); // mtlr r0
1039	write32(ctx, p: buf + `16`, v: `0x7d8b6050`); // subf r12, r11, r12
1040	write32(ctx, p: buf + `20`, v: `0x380cffcc`); // subi r0,r12,52
1041	write32(ctx, p: buf + `24`, v: `0x7800f082`); // srdi r0,r0,62,2
1042	write32(ctx, p: buf + `28`, v: `0xe98b002c`); // ld r12,44(r11)
1043	write32(ctx, p: buf + `32`, v: `0x7d6c5a14`); // add r11,r12,r11
1044	write32(ctx, p: buf + `36`, v: `0xe98b0000`); // ld r12,0(r11)
1045	write32(ctx, p: buf + `40`, v: `0xe96b0008`); // ld r11,8(r11)
1046	write32(ctx, p: buf + `44`, v: `0x7d8903a6`); // mtctr r12
1047	write32(ctx, p: buf + `48`, v: `0x4e800420`); // bctr
1048
1049	// The 'bcl' instruction will set the link register to the address of the
1050	// following instruction ('mflr r11'). Here we store the offset from that
1051	// instruction to the first entry in the GotPlt section.
1052	int64_t gotPltOffset = ctx.in.gotPlt ->getVA() - (ctx.in.plt ->getVA() + `8`);
1053	write64(ctx, p: buf + `52`, v: gotPltOffset);
1054	}
1055
1056	void PPC64::writePlt(uint8_t buf, const* Symbol &sym,
1057	uint64_t /pltEntryAddr/) const {
1058	int32_t offset = pltHeaderSize + sym.getPltIdx(ctx) * pltEntrySize;
1059	// bl __glink_PLTresolve
1060	write32(ctx, p: buf, v: `0x48000000` \| ((-offset) & `0x03fffffc`));
1061	}
1062
1063	void PPC64::writeIplt(uint8_t buf, const* Symbol &sym,
1064	uint64_t /pltEntryAddr/) const {
1065	writePPC64LoadAndBranch(ctx, buf,
1066	offset: sym.getGotPltVA(ctx) - getPPC64TocBase(ctx));
1067	}
1068
1069	static bool isTocOptType(RelType type) {
1070	switch (type) {
1071	case R_PPC64_GOT16_HA:
1072	case R_PPC64_GOT16_LO_DS:
1073	case R_PPC64_TOC16_HA:
1074	case R_PPC64_TOC16_LO_DS:
1075	case R_PPC64_TOC16_LO:
1076	return true;
1077	default:
1078	return false;
1079	}
1080	}
1081
1082	// Return true if the section has GD/LD GOT relocations without
1083	// R_PPC64_TLSGD/R_PPC64_TLSLD markers. Old IBM XL compilers generate GD/LD code
1084	// sequences without markers; disable GD/LD to IE/LE relaxation for the section.
1085	template <class RelTy>
1086	static bool missingTlsGdLdMarker(InputSectionBase &sec, Relocs<RelTy> rels) {
1087	bool hasGotGdLd = false;
1088	for (const RelTy &rel : rels) {
1089	RelType type = rel.getType(false);
1090	switch (type) {
1091	case R_PPC64_TLSGD:
1092	case R_PPC64_TLSLD:
1093	return false; // Found a marker
1094	case R_PPC64_GOT_TLSGD16:
1095	case R_PPC64_GOT_TLSGD16_HA:
1096	case R_PPC64_GOT_TLSGD16_HI:
1097	case R_PPC64_GOT_TLSGD16_LO:
1098	case R_PPC64_GOT_TLSLD16:
1099	case R_PPC64_GOT_TLSLD16_HA:
1100	case R_PPC64_GOT_TLSLD16_HI:
1101	case R_PPC64_GOT_TLSLD16_LO:
1102	hasGotGdLd = true;
1103	break;
1104	}
1105	}
1106	if (hasGotGdLd) {
1107	Warn(ctx&: sec.file->ctx)
1108	<< sec.file
1109	<< ": disable TLS relaxation due to R_PPC64_GOT_TLS* relocations "
1110	"without "
1111	"R_PPC64_TLSGD/R_PPC64_TLSLD relocations";
1112	}
1113	return hasGotGdLd;
1114	}
1115
1116	template <class ELFT, class RelTy>
1117	void PPC64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
1118	RelocScan rs(ctx, &sec);
1119	sec.relocations.reserve(N: rels.size());
1120	bool optimizeTlsGdLd =
1121	!missingTlsGdLdMarker<RelTy>(sec, rels) && !ctx.arg.shared;
1122	for (auto it = rels.begin(); it != rels.end(); ++it) {
1123	RelType type = it->getType(false);
1124	uint32_t symIdx = it->getSymbol(false);
1125	Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
1126	uint64_t offset = it->r_offset;
1127	if (sym.isUndefined() && symIdx != `0` &&
1128	rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
1129	continue;
1130	int64_t addend = rs.getAddend<ELFT>(*it, type);
1131	if (ctx.arg.isPic && type == R_PPC64_TOC)
1132	addend += getPPC64TocBase(ctx);
1133
1134	RelExpr expr;
1135	// Relocation types that only need a RelExpr set `expr` and break out of
1136	// the switch to reach rs.process(). Types that need special handling
1137	// (fast-path helpers, TLS) call a handler and use `continue`.
1138	switch (type) {
1139	case R_PPC64_NONE:
1140	continue;
1141	// Absolute relocations:
1142	case R_PPC64_ADDR16:
1143	case R_PPC64_ADDR16_DS:
1144	case R_PPC64_ADDR16_HA:
1145	case R_PPC64_ADDR16_HI:
1146	case R_PPC64_ADDR16_HIGH:
1147	case R_PPC64_ADDR16_HIGHER:
1148	case R_PPC64_ADDR16_HIGHERA:
1149	case R_PPC64_ADDR16_HIGHEST:
1150	case R_PPC64_ADDR16_HIGHESTA:
1151	case R_PPC64_ADDR16_LO:
1152	case R_PPC64_ADDR16_LO_DS:
1153	case R_PPC64_ADDR32:
1154	case R_PPC64_ADDR64:
1155	expr = R_ABS;
1156	break;
1157
1158	// PC-relative relocations:
1159	case R_PPC64_REL16_LO:
1160	case R_PPC64_REL16_HA:
1161	case R_PPC64_REL16_HI:
1162	case R_PPC64_REL32:
1163	case R_PPC64_REL64:
1164	case R_PPC64_PCREL34:
1165	rs.processR_PC(type, offset, addend, sym);
1166	continue;
1167
1168	// GOT-generating relocations:
1169	case R_PPC64_GOT16:
1170	case R_PPC64_GOT16_DS:
1171	case R_PPC64_GOT16_HA:
1172	case R_PPC64_GOT16_HI:
1173	case R_PPC64_GOT16_LO:
1174	case R_PPC64_GOT16_LO_DS:
1175	expr = R_GOT_OFF;
1176	break;
1177	case R_PPC64_GOT_PCREL34:
1178	expr = R_GOT_PC;
1179	break;
1180	case R_PPC64_PCREL_OPT:
1181	expr = adjustGotPcExpr(type, addend, loc: sec.content().data() + offset);
1182	if (expr == R_RELAX_GOT_PC)
1183	ctx.in.got ->hasGotOffRel.store(i: true, m: std::memory_order_relaxed);
1184	rs.processAux(expr, type, offset, sym, addend);
1185	continue;
1186
1187	// TOC-relative relocations:
1188	case R_PPC64_TOC16:
1189	case R_PPC64_TOC16_DS:
1190	sec.file->ppc64SmallCodeModelTocRelocs = true;
1191	expr = R_GOTREL;
1192	break;
1193	case R_PPC64_TOC16_HI:
1194	expr = R_GOTREL;
1195	break;
1196	case R_PPC64_TOC16_LO:
1197	// Record the TOC entry (.toc + addend) as not relaxable.
1198	if (sym.isSection() && isa<Defined>(Val: sym) &&
1199	cast<Defined>(Val&: sym).section->name == ".toc")
1200	ctx.ppc64noTocRelax.insert(V: {&sym, addend});
1201	expr = R_GOTREL;
1202	break;
1203	case R_PPC64_TOC16_HA:
1204	case R_PPC64_TOC16_LO_DS:
1205	expr = R_GOTREL;
1206	break;
1207	case R_PPC64_TOC:
1208	expr = RE_PPC64_TOCBASE;
1209	break;
1210
1211	// PLT-generating relocations:
1212	case R_PPC64_REL14:
1213	case R_PPC64_REL24:
1214	expr = RE_PPC64_CALL_PLT;
1215	break;
1216	case R_PPC64_REL24_NOTOC:
1217	rs.processR_PLT_PC(type, offset, addend, sym);
1218	continue;
1219
1220	// TLS relocations:
1221
1222	// TLS LE:
1223	case R_PPC64_TPREL16:
1224	case R_PPC64_TPREL16_HA:
1225	case R_PPC64_TPREL16_LO:
1226	case R_PPC64_TPREL16_HI:
1227	case R_PPC64_TPREL16_DS:
1228	case R_PPC64_TPREL16_LO_DS:
1229	case R_PPC64_TPREL16_HIGHER:
1230	case R_PPC64_TPREL16_HIGHERA:
1231	case R_PPC64_TPREL16_HIGHEST:
1232	case R_PPC64_TPREL16_HIGHESTA:
1233	case R_PPC64_TPREL34:
1234	if (rs.checkTlsLe(offset, sym, type))
1235	continue;
1236	expr = R_TPREL;
1237	break;
1238
1239	// TLS IE:
1240	case R_PPC64_GOT_TPREL16_HA:
1241	case R_PPC64_GOT_TPREL16_LO_DS:
1242	case R_PPC64_GOT_TPREL16_DS:
1243	case R_PPC64_GOT_TPREL16_HI:
1244	rs.handleTlsIe(ieExpr: R_GOT_OFF, type, offset, addend, sym);
1245	continue;
1246	case R_PPC64_GOT_TPREL_PCREL34:
1247	rs.handleTlsIe(ieExpr: R_GOT_PC, type, offset, addend, sym);
1248	continue;
1249	case R_PPC64_TLS:
1250	if (!ctx.arg.shared && !sym.isPreemptible)
1251	sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1252	continue;
1253
1254	// TLS GD:
1255	case R_PPC64_GOT_TLSGD16:
1256	case R_PPC64_GOT_TLSGD16_HA:
1257	case R_PPC64_GOT_TLSGD16_HI:
1258	case R_PPC64_GOT_TLSGD16_LO:
1259	case R_PPC64_GOT_TLSGD_PCREL34: {
1260	bool isPCRel = type == R_PPC64_GOT_TLSGD_PCREL34;
1261	if (optimizeTlsGdLd) {
1262	if (sym.isPreemptible) {
1263	ctx.hasTlsIe.store(i: true, m: std::memory_order_relaxed);
1264	sym.setFlags(NEEDS_TLSIE);
1265	sec.addReloc(
1266	r: {.expr: isPCRel ? R_GOT_PC : R_GOT_OFF, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1267	} else {
1268	sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1269	}
1270	} else {
1271	sym.setFlags(NEEDS_TLSGD);
1272	sec.addReloc(
1273	r: {.expr: isPCRel ? R_TLSGD_PC : R_TLSGD_GOT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1274	}
1275	continue;
1276	}
1277	// bl __tls_get_addr(x@tlsgd) is relocated by R_PPC64_TLSGD and
1278	// R_PPC64_REL24. After optimization we no longer call __tls_get_addr
1279	// and should skip both relocations to avoid a false dependence on
1280	// __tls_get_addr being defined.
1281	case R_PPC64_TLSGD:
1282	case R_PPC64_TLSLD: {
1283	auto it1 = it;
1284	++it1;
1285	if (it1 == rels.end()) {
1286	auto diag = Err(ctx);
1287	diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last "
1288	"relocation";
1289	printLocation(s&: diag, sec, sym, off: offset);
1290	continue;
1291	}
1292	// Increment the offset for the NOTOC case so that relaxTlsGdToIe
1293	// and relaxTlsGdToLe can distinguish it from the TOC case.
1294	if (it1->getType(false) == R_PPC64_REL24_NOTOC)
1295	++offset;
1296	if (optimizeTlsGdLd) {
1297	sec.addReloc(r: {.expr: sym.isPreemptible ? R_GOT_OFF : R_TPREL, .type: type, .offset: offset,
1298	.addend: addend, .sym: &sym});
1299	++it; // skip REL24
1300	}
1301	continue;
1302	}
1303
1304	// TLS LD:
1305	case R_PPC64_GOT_TLSLD16:
1306	case R_PPC64_GOT_TLSLD16_HA:
1307	case R_PPC64_GOT_TLSLD16_HI:
1308	case R_PPC64_GOT_TLSLD16_LO:
1309	case R_PPC64_GOT_TLSLD_PCREL34:
1310	if (optimizeTlsGdLd) {
1311	sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1312	} else {
1313	ctx.needsTlsLd.store(i: true, m: std::memory_order_relaxed);
1314	sec.addReloc(
1315	r: {.expr: type == R_PPC64_GOT_TLSLD_PCREL34 ? R_TLSLD_PC : R_TLSLD_GOT, .type: type,
1316	.offset: offset, .addend: addend, .sym: &sym});
1317	}
1318	continue;
1319	case R_PPC64_DTPREL16:
1320	case R_PPC64_DTPREL16_DS:
1321	case R_PPC64_DTPREL16_HA:
1322	case R_PPC64_DTPREL16_HI:
1323	case R_PPC64_DTPREL16_HIGHER:
1324	case R_PPC64_DTPREL16_HIGHERA:
1325	case R_PPC64_DTPREL16_HIGHEST:
1326	case R_PPC64_DTPREL16_HIGHESTA:
1327	case R_PPC64_DTPREL16_LO:
1328	case R_PPC64_DTPREL16_LO_DS:
1329	case R_PPC64_DTPREL64:
1330	case R_PPC64_DTPREL34:
1331	sec.addReloc(r: {.expr: R_DTPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1332	continue;
1333	case R_PPC64_GOT_DTPREL16_HA:
1334	case R_PPC64_GOT_DTPREL16_LO_DS:
1335	case R_PPC64_GOT_DTPREL16_DS:
1336	case R_PPC64_GOT_DTPREL16_HI:
1337	sym.setFlags(NEEDS_GOT_DTPREL);
1338	sec.addReloc(r: {.expr: R_TLSLD_GOT_OFF, .type: type, .offset: offset, .addend: addend, .sym: &sym});
1339	continue;
1340
1341	default:
1342	Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
1343	<< "unknown relocation (" << type.v << ") against symbol "
1344	<< &sym;
1345	continue;
1346	}
1347	if (oneof<R_GOTREL, RE_PPC64_TOCBASE>(expr))
1348	ctx.in.got ->hasGotOffRel.store(i: true, m: std::memory_order_relaxed);
1349	rs.process(expr, type, offset, sym, addend);
1350	}
1351	}
1352
1353	void PPC64::scanSection(InputSectionBase &sec) {
1354	if (ctx.arg.isLE)
1355	elf::scanSection1<PPC64, ELF64LE>(target&: *this, sec);
1356	else
1357	elf::scanSection1<PPC64, ELF64BE>(target&: *this, sec);
1358
1359	// Sort relocations by offset for .toc sections. This is needed so that
1360	// sections addressed with small code model relocations come first.
1361	if (sec.name == ".toc")
1362	llvm::stable_sort(Range: sec.relocs(),
1363	C: [](const Relocation &lhs, const Relocation &rhs) {
1364	return lhs.offset < rhs.offset;
1365	});
1366	}
1367
1368	void PPC64::relocate(uint8_t loc, const* Relocation &rel, uint64_t val) const {
1369	RelType type = rel.type;
1370	bool shouldTocOptimize = isTocOptType(type);
1371
1372	// Handle TLS optimization.
1373	switch (type) {
1374	case R_PPC64_GOT_TLSGD16:
1375	case R_PPC64_GOT_TLSGD16_HA:
1376	case R_PPC64_GOT_TLSGD16_HI:
1377	case R_PPC64_GOT_TLSGD16_LO:
1378	case R_PPC64_GOT_TLSGD_PCREL34:
1379	case R_PPC64_TLSGD:
1380	if (rel.expr == R_TPREL) {
1381	relaxTlsGdToLe(loc, rel, val);
1382	return;
1383	}
1384	if (oneof<R_GOT_OFF, R_GOT_PC>(expr: rel.expr)) {
1385	relaxTlsGdToIe(loc, rel, val);
1386	return;
1387	}
1388	break;
1389	case R_PPC64_GOT_TLSLD16:
1390	case R_PPC64_GOT_TLSLD16_HA:
1391	case R_PPC64_GOT_TLSLD16_HI:
1392	case R_PPC64_GOT_TLSLD16_LO:
1393	case R_PPC64_GOT_TLSLD_PCREL34:
1394	case R_PPC64_TLSLD:
1395	if (rel.expr == R_TPREL) {
1396	relaxTlsLdToLe(loc, rel, val);
1397	return;
1398	}
1399	break;
1400	case R_PPC64_GOT_TPREL16_HA:
1401	case R_PPC64_GOT_TPREL16_LO_DS:
1402	case R_PPC64_GOT_TPREL16_DS:
1403	case R_PPC64_GOT_TPREL16_HI:
1404	case R_PPC64_GOT_TPREL_PCREL34:
1405	case R_PPC64_TLS:
1406	if (rel.expr == R_TPREL) {
1407	relaxTlsIeToLe(loc, rel, val);
1408	return;
1409	}
1410	break;
1411	default:
1412	break;
1413	}
1414
1415	switch (type) {
1416	case R_PPC64_ADDR14: {
1417	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1418	// Preserve the AA/LK bits in the branch instruction
1419	uint8_t aalk = loc[`3`];
1420	write16(ctx, p: loc + `2`, v: (aalk & `3`) \| (val & `0xfffc`));
1421	break;
1422	}
1423	case R_PPC64_GOT16:
1424	case R_PPC64_GOT_TLSGD16:
1425	case R_PPC64_GOT_TLSLD16:
1426	case R_PPC64_TOC16:
1427	case R_PPC64_DTPREL16: // semantically subtracts DTP offset (== tocOffset)
1428	val -= ppc64TocOffset;
1429	[[fallthrough]];
1430	case R_PPC64_ADDR16:
1431	checkIntUInt(ctx, loc, v: val, n: `16`, rel);
1432	write16(ctx, p: loc, v: val);
1433	break;
1434	case R_PPC64_ADDR32:
1435	checkIntUInt(ctx, loc, v: val, n: `32`, rel);
1436	write32(ctx, p: loc, v: val);
1437	break;
1438	case R_PPC64_GOT16_DS:
1439	case R_PPC64_TOC16_DS:
1440	case R_PPC64_GOT_DTPREL16_DS:
1441	case R_PPC64_GOT_TPREL16_DS:
1442	case R_PPC64_DTPREL16_DS:
1443	val -= ppc64TocOffset;
1444	[[fallthrough]];
1445	case R_PPC64_ADDR16_DS:
1446	case R_PPC64_TPREL16_DS: {
1447	checkInt(ctx, loc, v: val, n: `16`, rel);
1448	// DQ-form instructions use bits 28-31 as part of the instruction encoding
1449	// DS-form instructions only use bits 30-31.
1450	uint16_t mask = isDQFormInstruction(encoding: readFromHalf16(ctx, loc)) ? `0xf` : `0x3`;
1451	checkAlignment(ctx, loc, v: lo(v: val), n: mask + `1`, rel);
1452	write16(ctx, p: loc, v: (read16(ctx, p: loc) & mask) \| lo(v: val));
1453	} break;
1454	case R_PPC64_GOT16_HA:
1455	case R_PPC64_GOT_TLSGD16_HA:
1456	case R_PPC64_GOT_TLSLD16_HA:
1457	case R_PPC64_GOT_TPREL16_HA:
1458	case R_PPC64_GOT_DTPREL16_HA:
1459	case R_PPC64_TOC16_HA:
1460	case R_PPC64_DTPREL16_HA:
1461	val -= ppc64TocOffset;
1462	[[fallthrough]];
1463	case R_PPC64_ADDR16_HA:
1464	case R_PPC64_REL16_HA:
1465	case R_PPC64_TPREL16_HA:
1466	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`)
1467	writeFromHalf16(ctx, loc, insn: NOP);
1468	else {
1469	checkInt(ctx, loc, v: val + `0x8000`, n: `32`, rel);
1470	write16(ctx, p: loc, v: ha(v: val));
1471	}
1472	break;
1473	case R_PPC64_GOT16_HI:
1474	case R_PPC64_GOT_TLSGD16_HI:
1475	case R_PPC64_GOT_TLSLD16_HI:
1476	case R_PPC64_GOT_TPREL16_HI:
1477	case R_PPC64_GOT_DTPREL16_HI:
1478	case R_PPC64_TOC16_HI:
1479	case R_PPC64_DTPREL16_HI:
1480	val -= ppc64TocOffset;
1481	[[fallthrough]];
1482	case R_PPC64_ADDR16_HI:
1483	case R_PPC64_REL16_HI:
1484	case R_PPC64_TPREL16_HI:
1485	checkInt(ctx, loc, v: val, n: `32`, rel);
1486	write16(ctx, p: loc, v: hi(v: val));
1487	break;
1488	case R_PPC64_ADDR16_HIGH:
1489	write16(ctx, p: loc, v: hi(v: val));
1490	break;
1491	case R_PPC64_DTPREL16_HIGHER:
1492	val -= ppc64TocOffset;
1493	[[fallthrough]];
1494	case R_PPC64_ADDR16_HIGHER:
1495	case R_PPC64_TPREL16_HIGHER:
1496	write16(ctx, p: loc, v: higher(v: val));
1497	break;
1498	case R_PPC64_DTPREL16_HIGHERA:
1499	val -= ppc64TocOffset;
1500	[[fallthrough]];
1501	case R_PPC64_ADDR16_HIGHERA:
1502	case R_PPC64_TPREL16_HIGHERA:
1503	write16(ctx, p: loc, v: highera(v: val));
1504	break;
1505	case R_PPC64_DTPREL16_HIGHEST:
1506	val -= ppc64TocOffset;
1507	[[fallthrough]];
1508	case R_PPC64_ADDR16_HIGHEST:
1509	case R_PPC64_TPREL16_HIGHEST:
1510	write16(ctx, p: loc, v: highest(v: val));
1511	break;
1512	case R_PPC64_DTPREL16_HIGHESTA:
1513	val -= ppc64TocOffset;
1514	[[fallthrough]];
1515	case R_PPC64_ADDR16_HIGHESTA:
1516	case R_PPC64_TPREL16_HIGHESTA:
1517	write16(ctx, p: loc, v: highesta(v: val));
1518	break;
1519	case R_PPC64_GOT16_LO:
1520	case R_PPC64_GOT_TLSGD16_LO:
1521	case R_PPC64_GOT_TLSLD16_LO:
1522	case R_PPC64_TOC16_LO:
1523	case R_PPC64_DTPREL16_LO:
1524	val -= ppc64TocOffset;
1525	[[fallthrough]];
1526	case R_PPC64_ADDR16_LO:
1527	case R_PPC64_REL16_LO:
1528	case R_PPC64_TPREL16_LO:
1529	// When the high-adjusted part of a toc relocation evaluates to 0, it is
1530	// changed into a nop. The lo part then needs to be updated to use the
1531	// toc-pointer register r2, as the base register.
1532	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`) {
1533	uint32_t insn = readFromHalf16(ctx, loc);
1534	if (isInstructionUpdateForm(encoding: insn))
1535	Err(ctx) << getErrorLoc(ctx, loc)
1536	<< "can't toc-optimize an update instruction: 0x"
1537	<< utohexstr(X: insn, LowerCase: true);
1538	writeFromHalf16(ctx, loc, insn: (insn & `0xffe00000`) \| `0x00020000` \| lo(v: val));
1539	} else {
1540	write16(ctx, p: loc, v: lo(v: val));
1541	}
1542	break;
1543	case R_PPC64_GOT16_LO_DS:
1544	case R_PPC64_GOT_TPREL16_LO_DS:
1545	case R_PPC64_GOT_DTPREL16_LO_DS:
1546	case R_PPC64_TOC16_LO_DS:
1547	case R_PPC64_DTPREL16_LO_DS:
1548	val -= ppc64TocOffset;
1549	[[fallthrough]];
1550	case R_PPC64_ADDR16_LO_DS:
1551	case R_PPC64_TPREL16_LO_DS: {
1552	// DQ-form instructions use bits 28-31 as part of the instruction encoding
1553	// DS-form instructions only use bits 30-31.
1554	uint32_t insn = readFromHalf16(ctx, loc);
1555	uint16_t mask = isDQFormInstruction(encoding: insn) ? `0xf` : `0x3`;
1556	checkAlignment(ctx, loc, v: lo(v: val), n: mask + `1`, rel);
1557	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`) {
1558	// When the high-adjusted part of a toc relocation evaluates to 0, it is
1559	// changed into a nop. The lo part then needs to be updated to use the toc
1560	// pointer register r2, as the base register.
1561	if (isInstructionUpdateForm(encoding: insn))
1562	Err(ctx) << getErrorLoc(ctx, loc)
1563	<< "can't toc-optimize an update instruction: 0x"
1564	<< utohexstr(X: insn, LowerCase: true);
1565	insn &= `0xffe00000` \| mask;
1566	writeFromHalf16(ctx, loc, insn: insn \| `0x00020000` \| lo(v: val));
1567	} else {
1568	write16(ctx, p: loc, v: (read16(ctx, p: loc) & mask) \| lo(v: val));
1569	}
1570	} break;
1571	case R_PPC64_TPREL16:
1572	checkInt(ctx, loc, v: val, n: `16`, rel);
1573	write16(ctx, p: loc, v: val);
1574	break;
1575	case R_PPC64_REL32:
1576	checkInt(ctx, loc, v: val, n: `32`, rel);
1577	write32(ctx, p: loc, v: val);
1578	break;
1579	case R_PPC64_DTPREL64:
1580	val -= dynamicThreadPointerOffset;
1581	[[fallthrough]];
1582	case R_PPC64_ADDR64:
1583	case R_PPC64_REL64:
1584	case R_PPC64_TOC:
1585	write64(ctx, p: loc, v: val);
1586	break;
1587	case R_PPC64_REL14: {
1588	uint32_t mask = `0x0000FFFC`;
1589	checkInt(ctx, loc, v: val, n: `16`, rel);
1590	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1591	write32(ctx, p: loc, v: (read32(ctx, p: loc) & ~mask) \| (val & mask));
1592	break;
1593	}
1594	case R_PPC64_REL24:
1595	case R_PPC64_REL24_NOTOC: {
1596	uint32_t mask = `0x03FFFFFC`;
1597	checkInt(ctx, loc, v: val, n: `26`, rel);
1598	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1599	write32(ctx, p: loc, v: (read32(ctx, p: loc) & ~mask) \| (val & mask));
1600	break;
1601	}
1602	case R_PPC64_DTPREL34:
1603	val -= dynamicThreadPointerOffset;
1604	[[fallthrough]];
1605	case R_PPC64_PCREL34:
1606	case R_PPC64_GOT_PCREL34:
1607	case R_PPC64_GOT_TLSGD_PCREL34:
1608	case R_PPC64_GOT_TLSLD_PCREL34:
1609	case R_PPC64_GOT_TPREL_PCREL34:
1610	case R_PPC64_TPREL34: {
1611	const uint64_t si0Mask = `0x00000003ffff0000`;
1612	const uint64_t si1Mask = `0x000000000000ffff`;
1613	const uint64_t fullMask = `0x0003ffff0000ffff`;
1614	checkInt(ctx, loc, v: val, n: `34`, rel);
1615
1616	uint64_t instr = readPrefixedInst(ctx, loc) & ~fullMask;
1617	writePrefixedInst(ctx, loc,
1618	insn: instr \| ((val & si0Mask) << `16`) \| (val & si1Mask));
1619	break;
1620	}
1621	// If we encounter a PCREL_OPT relocation that we won't optimize.
1622	case R_PPC64_PCREL_OPT:
1623	break;
1624	default:
1625	llvm_unreachable("unknown relocation");
1626	}
1627	}
1628
1629	bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
1630	uint64_t branchAddr, const Symbol &s, int64_t a) const {
1631	if (type != R_PPC64_REL14 && type != R_PPC64_REL24 &&
1632	type != R_PPC64_REL24_NOTOC)
1633	return false;
1634
1635	// If a function is in the Plt it needs to be called with a call-stub.
1636	if (s.isInPlt(ctx))
1637	return true;
1638
1639	// This check looks at the st_other bits of the callee with relocation
1640	// R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee
1641	// clobbers the TOC and we need an R2 save stub.
1642	if (type != R_PPC64_REL24_NOTOC && (s.stOther >> `5`) == `1`)
1643	return true;
1644
1645	if (type == R_PPC64_REL24_NOTOC && (s.stOther >> `5`) > `1`)
1646	return true;
1647
1648	// An undefined weak symbol not in a PLT does not need a thunk. If it is
1649	// hidden, its binding has been converted to local, so we just check
1650	// isUndefined() here. A undefined non-weak symbol has been errored.
1651	if (s.isUndefined())
1652	return false;
1653
1654	// If the offset exceeds the range of the branch type then it will need
1655	// a range-extending thunk.
1656	// See the comment in getRelocTargetVA() about RE_PPC64_CALL.
1657	return !inBranchRange(
1658	type, src: branchAddr,
1659	dst: s.getVA(ctx, addend: a) + getPPC64GlobalEntryToLocalEntryOffset(ctx, stOther: s.stOther));
1660	}
1661
1662	uint32_t PPC64::getThunkSectionSpacing() const {
1663	// See comment in Arch/ARM.cpp for a more detailed explanation of
1664	// getThunkSectionSpacing(). For PPC64 we pick the constant here based on
1665	// R_PPC64_REL24, which is used by unconditional branch instructions.
1666	// 0x2000000 = (1 << 24-1) 4*
1667	return `0x2000000`;
1668	}
1669
1670	bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
1671	int64_t offset = dst - src;
1672	if (type == R_PPC64_REL14)
1673	return isInt<`16`>(x: offset);
1674	if (type == R_PPC64_REL24 \|\| type == R_PPC64_REL24_NOTOC)
1675	return isInt<`26`>(x: offset);
1676	llvm_unreachable("unsupported relocation type used in branch");
1677	}
1678
1679	RelExpr PPC64::adjustGotPcExpr(RelType type, int64_t addend,
1680	const uint8_t loc) const* {
1681	if ((type == R_PPC64_GOT_PCREL34 \|\| type == R_PPC64_PCREL_OPT) &&
1682	ctx.arg.pcRelOptimize) {
1683	// It only makes sense to optimize pld since paddi means that the address
1684	// of the object in the GOT is required rather than the object itself.
1685	if ((readPrefixedInst(ctx, loc) & `0xfc000000`) == `0xe4000000`)
1686	return R_RELAX_GOT_PC;
1687	}
1688	return R_GOT_PC;
1689	}
1690
1691	// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement.
1692	// The general dynamic code sequence for a global `x` uses 4 instructions.
1693	// Instruction Relocation Symbol
1694	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
1695	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
1696	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
1697	// R_PPC64_REL24 __tls_get_addr
1698	// nop None None
1699	//
1700	// Relaxing to initial-exec entails:
1701	// 1) Convert the addis/addi pair that builds the address of the tls_index
1702	// struct for 'x' to an addis/ld pair that loads an offset from a got-entry.
1703	// 2) Convert the call to __tls_get_addr to a nop.
1704	// 3) Convert the nop following the call to an add of the loaded offset to the
1705	// thread pointer.
1706	// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
1707	// used as the relaxation hint for both steps 2 and 3.
1708	void PPC64::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
1709	uint64_t val) const {
1710	switch (rel.type) {
1711	case R_PPC64_GOT_TLSGD16_HA:
1712	// This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
1713	// addis rT, r2, sym@got@tprel@ha.
1714	relocateNoSym(loc, type: R_PPC64_GOT_TPREL16_HA, val);
1715	return;
1716	case R_PPC64_GOT_TLSGD16:
1717	case R_PPC64_GOT_TLSGD16_LO: {
1718	// Relax from addi r3, rA, sym@got@tlsgd@l to
1719	// ld r3, sym@got@tprel@l(rA)
1720	uint32_t ra = (readFromHalf16(ctx, loc) & (`0x1f` << `16`));
1721	writeFromHalf16(ctx, loc, insn: `0xe8600000` \| ra);
1722	relocateNoSym(loc, type: R_PPC64_GOT_TPREL16_LO_DS, val);
1723	return;
1724	}
1725	case R_PPC64_GOT_TLSGD_PCREL34: {
1726	// Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to
1727	// pld r3, sym@got@tprel@pcrel
1728	writePrefixedInst(ctx, loc, insn: `0x04100000e4600000`);
1729	relocateNoSym(loc, type: R_PPC64_GOT_TPREL_PCREL34, val);
1730	return;
1731	}
1732	case R_PPC64_TLSGD: {
1733	// PC Relative Relaxation:
1734	// Relax from bl __tls_get_addr@notoc(x@tlsgd) to
1735	// nop
1736	// TOC Relaxation:
1737	// Relax from bl __tls_get_addr(x@tlsgd)
1738	// nop
1739	// to
1740	// nop
1741	// add r3, r3, r13
1742	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
1743	if (locAsInt % `4` == `0`) {
1744	write32(ctx, p: loc, v: NOP); // bl __tls_get_addr(sym@tlsgd) --> nop
1745	write32(ctx, p: loc + `4`, v: `0x7c636a14`); // nop --> add r3, r3, r13
1746	} else if (locAsInt % `4` == `1`) {
1747	// bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r13
1748	write32(ctx, p: loc - `1`, v: `0x7c636a14`);
1749	} else {
1750	Err(ctx) << "R_PPC64_TLSGD has unexpected byte alignment";
1751	}
1752	return;
1753	}
1754	default:
1755	llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
1756	}
1757	}
1758
1759	void PPC64::relocateAlloc(InputSection &sec, uint8_t buf) const* {
1760	uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
1761	uint64_t lastPPCRelaxedRelocOff = -`1`;
1762	for (const Relocation &rel : sec.relocs()) {
1763	uint8_t *loc = buf + rel.offset;
1764	const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
1765	switch (rel.type) {
1766	case R_PPC64_GOT_PCREL34:
1767	if (rel.expr == R_RELAX_GOT_PC) {
1768	lastPPCRelaxedRelocOff = rel.offset;
1769	relaxGot(loc, rel, val);
1770	continue;
1771	}
1772	break;
1773	case R_PPC64_PCREL_OPT:
1774	// R_PPC64_PCREL_OPT must appear immediately after R_PPC64_GOT_PCREL34
1775	// at the same offset. Only relax if the associated GOT_PCREL34 was
1776	// relaxed.
1777	if (rel.expr == R_RELAX_GOT_PC && rel.offset == lastPPCRelaxedRelocOff) {
1778	relaxGot(loc, rel, val);
1779	continue;
1780	}
1781	break;
1782	case R_PPC64_TOC16_HA:
1783	case R_PPC64_TOC16_LO_DS:
1784	// rel.sym refers to the STT_SECTION symbol associated to the .toc input
1785	// section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC
1786	// entry, there may be R_PPC64_TOC16_HA not paired with
1787	// R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation
1788	// opportunities but is safe.
1789	if (ctx.arg.tocOptimize &&
1790	!ctx.ppc64noTocRelax.contains(V: {rel.sym, rel.addend}) &&
1791	tryRelaxPPC64TocIndirection(ctx, rel, bufLoc: loc))
1792	continue;
1793	break;
1794	case R_PPC64_REL14:
1795	case R_PPC64_REL24:
1796	// If this is a call to __tls_get_addr, it may be part of a TLS
1797	// sequence that has been relaxed and turned into a nop. In this
1798	// case, we don't want to handle it as a call.
1799	if (read32(ctx, p: loc) == NOP)
1800	continue;
1801
1802	// Patch a nop (0x60000000) to a ld.
1803	if (rel.sym->needsTocRestore()) {
1804	// gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for
1805	// recursive calls even if the function is preemptible. This is not
1806	// wrong in the common case where the function is not preempted at
1807	// runtime. Just ignore.
1808	if ((rel.offset + `8` > sec.content().size() \|\|
1809	read32(ctx, p: loc + `4`) != NOP) &&
1810	rel.sym->file != sec.file) {
1811	// Use substr(6) to remove the "__plt_" prefix.
1812	Err(ctx) << getErrorLoc(ctx, loc) << "call to "
1813	<< toStr(ctx, *rel.sym).substr(pos: `6`)
1814	<< " lacks nop, can't restore toc";
1815	continue;
1816	}
1817	write32(ctx, p: loc + `4`, v: `0xe8410018`); // ld %r2, 24(%r1)
1818	}
1819	break;
1820	}
1821	relocate(loc, rel, val);
1822	}
1823	}
1824
1825	// The prologue for a split-stack function is expected to look roughly
1826	// like this:
1827	// .Lglobal_entry_point:
1828	// # TOC pointer initialization.
1829	// ...
1830	// .Llocal_entry_point:
1831	// # load the __private_ss member of the threads tcbhead.
1832	// ld r0,-0x7000-64(r13)
1833	// # subtract the functions stack size from the stack pointer.
1834	// addis r12, r1, ha(-stack-frame size)
1835	// addi r12, r12, l(-stack-frame size)
1836	// # compare needed to actual and branch to allocate_more_stack if more
1837	// # space is needed, otherwise fallthrough to 'normal' function body.
1838	// cmpld cr7,r12,r0
1839	// blt- cr7, .Lallocate_more_stack
1840	//
1841	// -) The allocate_more_stack block might be placed after the split-stack
1842	// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
1843	// instead.
1844	// -) If either the addis or addi is not needed due to the stack size being
1845	// smaller then 32K or a multiple of 64K they will be replaced with a nop,
1846	// but there will always be 2 instructions the linker can overwrite for the
1847	// adjusted stack size.
1848	//
1849	// The linkers job here is to increase the stack size used in the addis/addi
1850	// pair by split-stack-size-adjust.
1851	// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
1852	// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
1853	bool PPC64::adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
1854	uint8_t stOther) const {
1855	// If the caller has a global entry point adjust the buffer past it. The start
1856	// of the split-stack prologue will be at the local entry point.
1857	loc += getPPC64GlobalEntryToLocalEntryOffset(ctx, stOther);
1858
1859	// At the very least we expect to see a load of some split-stack data from the
1860	// tcb, and 2 instructions that calculate the ending stack address this
1861	// function will require. If there is not enough room for at least 3
1862	// instructions it can't be a split-stack prologue.
1863	if (loc + `12` >= end)
1864	return false;
1865
1866	// First instruction must be `ld r0, -0x7000-64(r13)`
1867	if (read32(ctx, p: loc) != `0xe80d8fc0`)
1868	return false;
1869
1870	int16_t hiImm = `0`;
1871	int16_t loImm = `0`;
1872	// First instruction can be either an addis if the frame size is larger then
1873	// 32K, or an addi if the size is less then 32K.
1874	int32_t firstInstr = read32(ctx, p: loc + `4`);
1875	if (getPrimaryOpCode(encoding: firstInstr) == `15`) {
1876	hiImm = firstInstr & `0xFFFF`;
1877	} else if (getPrimaryOpCode(encoding: firstInstr) == `14`) {
1878	loImm = firstInstr & `0xFFFF`;
1879	} else {
1880	return false;
1881	}
1882
1883	// Second instruction is either an addi or a nop. If the first instruction was
1884	// an addi then LoImm is set and the second instruction must be a nop.
1885	uint32_t secondInstr = read32(ctx, p: loc + `8`);
1886	if (!loImm && getPrimaryOpCode(encoding: secondInstr) == `14`) {
1887	loImm = secondInstr & `0xFFFF`;
1888	} else if (secondInstr != NOP) {
1889	return false;
1890	}
1891
1892	// The register operands of the first instruction should be the stack-pointer
1893	// (r1) as the input (RA) and r12 as the output (RT). If the second
1894	// instruction is not a nop, then it should use r12 as both input and output.
1895	auto checkRegOperands = [](uint32_t instr, uint8_t expectedRT,
1896	uint8_t expectedRA) {
1897	return ((instr & `0x3E00000`) >> `21` == expectedRT) &&
1898	((instr & `0x1F0000`) >> `16` == expectedRA);
1899	};
1900	if (!checkRegOperands (firstInstr, `12`, `1`))
1901	return false;
1902	if (secondInstr != NOP && !checkRegOperands (secondInstr, `12`, `12`))
1903	return false;
1904
1905	int32_t stackFrameSize = (hiImm * `65536`) + loImm;
1906	// Check that the adjusted size doesn't overflow what we can represent with 2
1907	// instructions.
1908	if (stackFrameSize < ctx.arg.splitStackAdjustSize + INT32_MIN) {
1909	Err(ctx) << getErrorLoc(ctx, loc)
1910	<< "split-stack prologue adjustment overflows";
1911	return false;
1912	}
1913
1914	int32_t adjustedStackFrameSize =
1915	stackFrameSize - ctx.arg.splitStackAdjustSize;
1916
1917	loImm = adjustedStackFrameSize & `0xFFFF`;
1918	hiImm = (adjustedStackFrameSize + `0x8000`) >> `16`;
1919	if (hiImm) {
1920	write32(ctx, p: loc + `4`, v: `0x3d810000` \| (uint16_t)hiImm);
1921	// If the low immediate is zero the second instruction will be a nop.
1922	secondInstr = loImm ? `0x398C0000` \| (uint16_t)loImm : NOP;
1923	write32(ctx, p: loc + `8`, v: secondInstr);
1924	} else {
1925	// addi r12, r1, imm
1926	write32(ctx, p: loc + `4`, v: (`0x39810000`) \| (uint16_t)loImm);
1927	write32(ctx, p: loc + `8`, v: NOP);
1928	}
1929
1930	return true;
1931	}
1932
1933	void elf::setPPC64TargetInfo(Ctx &ctx) { ctx.target.reset(p: new PPC64 (ctx)); }
1934

Browse the source code of llvm_projects/lld/ELF/Arch/PPC64.cpp