PPC64.cpp source code [llvm_projects/lld/ELF/Arch/PPC64.cpp]

1	//===- PPC64.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "SymbolTable.h"
12	#include "Symbols.h"
13	#include "SyntheticSections.h"
14	#include "Target.h"
15	#include "Thunks.h"
16
17	using namespace llvm;
18	using namespace llvm::object;
19	using namespace llvm::support::endian;
20	using namespace llvm::ELF;
21	using namespace lld;
22	using namespace lld::elf;
23
24	constexpr uint64_t ppc64TocOffset = `0x8000`;
25	constexpr uint64_t dynamicThreadPointerOffset = `0x8000`;
26
27	namespace {
28	// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
29	// instructions that can be used as part of the initial exec TLS sequence.
30	enum XFormOpcd {
31	LBZX = `87`,
32	LHZX = `279`,
33	LWZX = `23`,
34	LDX = `21`,
35	STBX = `215`,
36	STHX = `407`,
37	STWX = `151`,
38	STDX = `149`,
39	LHAX = `343`,
40	LWAX = `341`,
41	LFSX = `535`,
42	LFDX = `599`,
43	STFSX = `663`,
44	STFDX = `727`,
45	ADD = `266`,
46	};
47
48	enum DFormOpcd {
49	LBZ = `34`,
50	LBZU = `35`,
51	LHZ = `40`,
52	LHZU = `41`,
53	LHAU = `43`,
54	LWZ = `32`,
55	LWZU = `33`,
56	LFSU = `49`,
57	LFDU = `51`,
58	STB = `38`,
59	STBU = `39`,
60	STH = `44`,
61	STHU = `45`,
62	STW = `36`,
63	STWU = `37`,
64	STFSU = `53`,
65	STFDU = `55`,
66	LHA = `42`,
67	LFS = `48`,
68	LFD = `50`,
69	STFS = `52`,
70	STFD = `54`,
71	ADDI = `14`
72	};
73
74	enum DSFormOpcd {
75	LD = `58`,
76	LWA = `58`,
77	STD = `62`
78	};
79
80	constexpr uint32_t NOP = `0x60000000`;
81
82	enum class PPCLegacyInsn : uint32_t {
83	NOINSN = `0`,
84	// Loads.
85	LBZ = `0x88000000`,
86	LHZ = `0xa0000000`,
87	LWZ = `0x80000000`,
88	LHA = `0xa8000000`,
89	LWA = `0xe8000002`,
90	LD = `0xe8000000`,
91	LFS = `0xC0000000`,
92	LXSSP = `0xe4000003`,
93	LFD = `0xc8000000`,
94	LXSD = `0xe4000002`,
95	LXV = `0xf4000001`,
96	LXVP = `0x18000000`,
97
98	// Stores.
99	STB = `0x98000000`,
100	STH = `0xb0000000`,
101	STW = `0x90000000`,
102	STD = `0xf8000000`,
103	STFS = `0xd0000000`,
104	STXSSP = `0xf4000003`,
105	STFD = `0xd8000000`,
106	STXSD = `0xf4000002`,
107	STXV = `0xf4000005`,
108	STXVP = `0x18000001`
109	};
110	enum class PPCPrefixedInsn : uint64_t {
111	NOINSN = `0`,
112	PREFIX_MLS = `0x0610000000000000`,
113	PREFIX_8LS = `0x0410000000000000`,
114
115	// Loads.
116	PLBZ = PREFIX_MLS,
117	PLHZ = PREFIX_MLS,
118	PLWZ = PREFIX_MLS,
119	PLHA = PREFIX_MLS,
120	PLWA = PREFIX_8LS \| `0xa4000000`,
121	PLD = PREFIX_8LS \| `0xe4000000`,
122	PLFS = PREFIX_MLS,
123	PLXSSP = PREFIX_8LS \| `0xac000000`,
124	PLFD = PREFIX_MLS,
125	PLXSD = PREFIX_8LS \| `0xa8000000`,
126	PLXV = PREFIX_8LS \| `0xc8000000`,
127	PLXVP = PREFIX_8LS \| `0xe8000000`,
128
129	// Stores.
130	PSTB = PREFIX_MLS,
131	PSTH = PREFIX_MLS,
132	PSTW = PREFIX_MLS,
133	PSTD = PREFIX_8LS \| `0xf4000000`,
134	PSTFS = PREFIX_MLS,
135	PSTXSSP = PREFIX_8LS \| `0xbc000000`,
136	PSTFD = PREFIX_MLS,
137	PSTXSD = PREFIX_8LS \| `0xb8000000`,
138	PSTXV = PREFIX_8LS \| `0xd8000000`,
139	PSTXVP = PREFIX_8LS \| `0xf8000000`
140	};
141
142	static bool checkPPCLegacyInsn(uint32_t encoding) {
143	PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
144	if (insn == PPCLegacyInsn::NOINSN)
145	return false;
146	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
147	if (insn == PPCLegacyInsn::Legacy) \
148	return true;
149	#include "PPCInsns.def"
150	#undef PCREL_OPT
151	return false;
152	}
153
154	// Masks to apply to legacy instructions when converting them to prefixed,
155	// pc-relative versions. For the most part, the primary opcode is shared
156	// between the legacy instruction and the suffix of its prefixed version.
157	// However, there are some instances where that isn't the case (DS-Form and
158	// DQ-form instructions).
159	enum class LegacyToPrefixMask : uint64_t {
160	NOMASK = `0x0`,
161	OPC_AND_RST = `0xffe00000`, // Primary opc (0-5) and R[ST] (6-10).
162	ONLY_RST = `0x3e00000`, // [RS]T (6-10).
163	ST_STX28_TO5 =
164	`0x8000000003e00000`, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
165	};
166
167	class PPC64 final : public TargetInfo {
168	public:
169	PPC64(Ctx &);
170	int getTlsGdRelaxSkip(RelType type) const override;
171	uint32_t calcEFlags() const override;
172	RelExpr getRelExpr(RelType type, const Symbol &s,
173	const uint8_t loc) const* override;
174	RelType getDynRel(RelType type) const override;
175	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
176	void writePltHeader(uint8_t buf) const* override;
177	void writePlt(uint8_t buf, const* Symbol &sym,
178	uint64_t pltEntryAddr) const override;
179	void writeIplt(uint8_t buf, const* Symbol &sym,
180	uint64_t pltEntryAddr) const override;
181	void relocate(uint8_t loc, const* Relocation &rel,
182	uint64_t val) const override;
183	void writeGotHeader(uint8_t buf) const* override;
184	bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
185	uint64_t branchAddr, const Symbol &s,
186	int64_t a) const override;
187	uint32_t getThunkSectionSpacing() const override;
188	bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
189	RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
190	RelExpr adjustGotPcExpr(RelType type, int64_t addend,
191	const uint8_t loc) const* override;
192	void relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val) const;
193	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
194
195	bool adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
196	uint8_t stOther) const override;
197
198	private:
199	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
200	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
201	void relaxTlsLdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
202	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
203	};
204	} // namespace
205
206	uint64_t elf::getPPC64TocBase(Ctx &ctx) {
207	// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
208	// TOC starts where the first of these sections starts. We always create a
209	// .got when we see a relocation that uses it, so for us the start is always
210	// the .got.
211	uint64_t tocVA = ctx.in.got ->getVA();
212
213	// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
214	// thus permitting a full 64 Kbytes segment. Note that the glibc startup
215	// code (crt1.o) assumes that you can get from the TOC base to the
216	// start of the .toc section with only a single (signed) 16-bit relocation.
217	return tocVA + ppc64TocOffset;
218	}
219
220	unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(Ctx &ctx, uint8_t stOther) {
221	// The offset is encoded into the 3 most significant bits of the st_other
222	// field, with some special values described in section 3.4.1 of the ABI:
223	// 0 --> Zero offset between the GEP and LEP, and the function does NOT use
224	// the TOC pointer (r2). r2 will hold the same value on returning from
225	// the function as it did on entering the function.
226	// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
227	// caller-saved register for all callers.
228	// 2-6 --> The binary logarithm of the offset eg:
229	// 2 --> 2^2 = 4 bytes --> 1 instruction.
230	// 6 --> 2^6 = 64 bytes --> 16 instructions.
231	// 7 --> Reserved.
232	uint8_t gepToLep = (stOther >> `5`) & `7`;
233	if (gepToLep < `2`)
234	return `0`;
235
236	// The value encoded in the st_other bits is the
237	// log-base-2(offset).
238	if (gepToLep < `7`)
239	return `1` << gepToLep;
240
241	ErrAlways(ctx)
242	<< "reserved value of 7 in the 3 most-significant-bits of st_other";
243	return `0`;
244	}
245
246	void elf::writePrefixedInst(Ctx &ctx, uint8_t *loc, uint64_t insn) {
247	insn = ctx.arg.isLE ? insn << `32` \| insn >> `32` : insn;
248	write64(ctx, p: loc, v: insn);
249	}
250
251	static bool addOptional(Ctx &ctx, StringRef name, uint64_t value,
252	std::vector<Defined *> &defined) {
253	Symbol *sym = ctx.symtab ->find(name);
254	if (!sym \|\| sym->isDefined())
255	return false;
256	sym->resolve(ctx, other: Defined {ctx, ctx.internalFile, StringRef (), STB_GLOBAL,
257	STV_HIDDEN, STT_FUNC, value,
258	/size=/`0`, /section=/nullptr});
259	defined.push_back(x: cast<Defined>(Val: sym));
260	return true;
261	}
262
263	// If from is 14, write ${prefix}14: firstInsn; ${prefix}15:
264	// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)0x200008; $tail*
265	// The labels are defined only if they exist in the symbol table.
266	static void writeSequence(Ctx &ctx, const char prefix, int* from,
267	uint32_t firstInsn, ArrayRef<uint32_t> tail) {
268	std::vector<Defined *> defined;
269	char name[`16`];
270	int first;
271	const size_t size = `32` - from + tail.size();
272	MutableArrayRef<uint32_t> buf(ctx.bAlloc.Allocate<uint32_t>(Num: size), size);
273	uint32_t *ptr = buf.data();
274	for (int r = from; r < `32`; ++r) {
275	format(Fmt: "%s%d", Vals: prefix, Vals: r).snprint(Buffer: name, BufferSize: sizeof(name));
276	if (addOptional(ctx, name, value: `4` * (r - from), defined) && defined.size() == `1`)
277	first = r - from;
278	write32(ctx, p: ptr++, v: firstInsn + `0x200008` * (r - from));
279	}
280	for (uint32_t insn : tail)
281	write32(ctx, p: ptr++, v: insn);
282	assert(ptr == &*buf.end());
283
284	if (defined.empty())
285	return;
286	// The full section content has the extent of [begin, end). We drop unused
287	// instructions and write [first,end).
288	auto *sec = make<InputSection>(
289	args&: ctx.internalFile, args: ".text", args: SHT_PROGBITS, args: SHF_ALLOC, /addralign=/args: `4`,
290	/entsize=/args: `0`,
291	args: ArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first),
292	`4` * (buf.size() - first)));
293	ctx.inputSections.push_back(Elt: sec);
294	for (Defined *sym : defined) {
295	sym->section = sec;
296	sym->value -= `4` * first;
297	}
298	}
299
300	// Implements some save and restore functions as described by ELF V2 ABI to be
301	// compatible with GCC. With GCC -Os, when the number of call-saved registers
302	// exceeds a certain threshold, GCC generates _savegpr0_ _restgpr0_* calls and*
303	// expects the linker to define them. See
304	// https://sourceware.org/pipermail/binutils/2002-February/017444.html and
305	// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is
306	// weird because libgcc.a would be the natural place. The linker generation
307	// approach has the advantage that the linker can generate multiple copies to
308	// avoid long branch thunks. However, we don't consider the advantage
309	// significant enough to complicate our trunk implementation, so we take the
310	// simple approach and synthesize .text sections providing the implementation.
311	void elf::addPPC64SaveRestore(Ctx &ctx) {
312	constexpr uint32_t blr = `0x4e800020`, mtlr_0 = `0x7c0803a6`;
313
314	// _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ...
315	// Tail: ld 0, 16(1); mtlr 0; blr
316	writeSequence(ctx, prefix: "_restgpr0_", from: `14`, firstInsn: `0xe9c1ff70`, tail: {`0xe8010010`, mtlr_0, blr});
317	// _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ...
318	// Tail: blr
319	writeSequence(ctx, prefix: "_restgpr1_", from: `14`, firstInsn: `0xe9ccff70`, tail: {blr});
320	// _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ...
321	// Tail: std 0, 16(1); blr
322	writeSequence(ctx, prefix: "_savegpr0_", from: `14`, firstInsn: `0xf9c1ff70`, tail: {`0xf8010010`, blr});
323	// _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ...
324	// Tail: blr
325	writeSequence(ctx, prefix: "_savegpr1_", from: `14`, firstInsn: `0xf9ccff70`, tail: {blr});
326	}
327
328	// Find the R_PPC64_ADDR64 in .rela.toc with matching offset.
329	template <typename ELFT>
330	static std::pair<Defined *, int64_t>
331	getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
332	// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by
333	// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the
334	// relocation index in most cases.
335	//
336	// In rare cases a TOC entry may store a constant that doesn't need an
337	// R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8
338	// points to a relocation with larger r_offset. Do a linear probe then.
339	// Constants are extremely uncommon in .toc and the extra number of array
340	// accesses can be seen as a small constant.
341	ArrayRef<typename ELFT::Rela> relas =
342	tocSec->template relsOrRelas<ELFT>().relas;
343	if (relas.empty())
344	return {};
345	uint64_t index = std::min<uint64_t>(offset / `8`, relas.size() - `1`);
346	for (;;) {
347	if (relas[index].r_offset == offset) {
348	Symbol &sym = tocSec->file->getRelocTargetSym(relas[index]);
349	return {dyn_cast<Defined>(Val: &sym), getAddend<ELFT>(relas[index])};
350	}
351	if (relas[index].r_offset < offset \|\| index == `0`)
352	break;
353	--index;
354	}
355	return {};
356	}
357
358	// When accessing a symbol defined in another translation unit, compilers
359	// reserve a .toc entry, allocate a local label and generate toc-indirect
360	// instructions:
361	//
362	// addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA
363	// ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
364	// ld/lwa 3, 0(3) # load the value from the address
365	//
366	// .section .toc,"aw",@progbits
367	// .LC0: .tc var[TC],var
368	//
369	// If var is defined, non-preemptable and addressable with a 32-bit signed
370	// offset from the toc base, the address of var can be computed by adding an
371	// offset to the toc base, saving a load.
372	//
373	// addis 3,2,var@toc@ha # this may be relaxed to a nop,
374	// addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc
375	// ld/lwa 3, 0(3) # load the value from the address
376	//
377	// Returns true if the relaxation is performed.
378	static bool tryRelaxPPC64TocIndirection(Ctx &ctx, const Relocation &rel,
379	uint8_t *bufLoc) {
380	assert(ctx.arg.tocOptimize);
381	if (rel.addend < `0`)
382	return false;
383
384	// If the symbol is not the .toc section, this isn't a toc-indirection.
385	Defined *defSym = dyn_cast<Defined>(Val: rel.sym);
386	if (!defSym \|\| !defSym->isSection() \|\| defSym->section->name != ".toc")
387	return false;
388
389	Defined *d;
390	int64_t addend;
391	auto *tocISB = cast<InputSectionBase>(Val: defSym->section);
392	std::tie(args&: d, args&: addend) =
393	ctx.arg.isLE ? getRelaTocSymAndAddend<ELF64LE>(tocSec: tocISB, offset: rel.addend)
394	: getRelaTocSymAndAddend<ELF64BE>(tocSec: tocISB, offset: rel.addend);
395
396	// Only non-preemptable defined symbols can be relaxed.
397	if (!d \|\| d->isPreemptible)
398	return false;
399
400	// R_PPC64_ADDR64 should have created a canonical PLT for the non-preemptable
401	// ifunc and changed its type to STT_FUNC.
402	assert(!d->isGnuIFunc());
403
404	// Two instructions can materialize a 32-bit signed offset from the toc base.
405	uint64_t tocRelative = d->getVA(ctx, addend) - getPPC64TocBase(ctx);
406	if (!isInt<`32`>(x: tocRelative))
407	return false;
408
409	// Add PPC64TocOffset that will be subtracted by PPC64::relocate().
410	static_cast<const PPC64 &>(*ctx.target)
411	.relaxGot(loc: bufLoc, rel, val: tocRelative + ppc64TocOffset);
412	return true;
413	}
414
415	// Relocation masks following the #lo(value), #hi(value), #ha(value),
416	// #higher(value), #highera(value), #highest(value), and #highesta(value)
417	// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
418	// document.
419	static uint16_t lo(uint64_t v) { return v; }
420	static uint16_t hi(uint64_t v) { return v >> `16`; }
421	static uint64_t ha(uint64_t v) { return (v + `0x8000`) >> `16`; }
422	static uint16_t higher(uint64_t v) { return v >> `32`; }
423	static uint16_t highera(uint64_t v) { return (v + `0x8000`) >> `32`; }
424	static uint16_t highest(uint64_t v) { return v >> `48`; }
425	static uint16_t highesta(uint64_t v) { return (v + `0x8000`) >> `48`; }
426
427	// Extracts the 'PO' field of an instruction encoding.
428	static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> `26`); }
429
430	static bool isDQFormInstruction(uint32_t encoding) {
431	switch (getPrimaryOpCode(encoding)) {
432	default:
433	return false;
434	case `6`: // Power10 paired loads/stores (lxvp, stxvp).
435	case `56`:
436	// The only instruction with a primary opcode of 56 is `lq`.
437	return true;
438	case `61`:
439	// There are both DS and DQ instruction forms with this primary opcode.
440	// Namely `lxv` and `stxv` are the DQ-forms that use it.
441	// The DS 'XO' bits being set to 01 is restricted to DQ form.
442	return (encoding & `3`) == `0x1`;
443	}
444	}
445
446	static bool isDSFormInstruction(PPCLegacyInsn insn) {
447	switch (insn) {
448	default:
449	return false;
450	case PPCLegacyInsn::LWA:
451	case PPCLegacyInsn::LD:
452	case PPCLegacyInsn::LXSD:
453	case PPCLegacyInsn::LXSSP:
454	case PPCLegacyInsn::STD:
455	case PPCLegacyInsn::STXSD:
456	case PPCLegacyInsn::STXSSP:
457	return true;
458	}
459	}
460
461	static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {
462	uint32_t opc = encoding & `0xfc000000`;
463
464	// If the primary opcode is shared between multiple instructions, we need to
465	// fix it up to match the actual instruction we are after.
466	if ((opc == `0xe4000000` \|\| opc == `0xe8000000` \|\| opc == `0xf4000000` \|\|
467	opc == `0xf8000000`) &&
468	!isDQFormInstruction(encoding))
469	opc = encoding & `0xfc000003`;
470	else if (opc == `0xf4000000`)
471	opc = encoding & `0xfc000007`;
472	else if (opc == `0x18000000`)
473	opc = encoding & `0xfc00000f`;
474
475	// If the value is not one of the enumerators in PPCLegacyInsn, we want to
476	// return PPCLegacyInsn::NOINSN.
477	if (!checkPPCLegacyInsn(encoding: opc))
478	return PPCLegacyInsn::NOINSN;
479	return static_cast<PPCLegacyInsn>(opc);
480	}
481
482	static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {
483	switch (insn) {
484	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
485	case PPCLegacyInsn::Legacy: \
486	return PPCPrefixedInsn::PCRel
487	#include "PPCInsns.def"
488	#undef PCREL_OPT
489	}
490	return PPCPrefixedInsn::NOINSN;
491	}
492
493	static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {
494	switch (insn) {
495	#define PCREL_OPT(Legacy, PCRel, InsnMask) \
496	case PPCLegacyInsn::Legacy: \
497	return LegacyToPrefixMask::InsnMask
498	#include "PPCInsns.def"
499	#undef PCREL_OPT
500	}
501	return LegacyToPrefixMask::NOMASK;
502	}
503	static uint64_t getPCRelativeForm(uint32_t encoding) {
504	PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);
505	PPCPrefixedInsn pcrelInsn = getPCRelativeForm(insn: origInsn);
506	if (pcrelInsn == PPCPrefixedInsn::NOINSN)
507	return UINT64_C(-`1`);
508	LegacyToPrefixMask origInsnMask = getInsnMask(insn: origInsn);
509	uint64_t pcrelEncoding =
510	(uint64_t)pcrelInsn \| (encoding & (uint64_t)origInsnMask);
511
512	// If the mask requires moving bit 28 to bit 5, do that now.
513	if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)
514	pcrelEncoding \|= (encoding & `0x8`) << `23`;
515	return pcrelEncoding;
516	}
517
518	static bool isInstructionUpdateForm(uint32_t encoding) {
519	switch (getPrimaryOpCode(encoding)) {
520	default:
521	return false;
522	case LBZU:
523	case LHAU:
524	case LHZU:
525	case LWZU:
526	case LFSU:
527	case LFDU:
528	case STBU:
529	case STHU:
530	case STWU:
531	case STFSU:
532	case STFDU:
533	return true;
534	// LWA has the same opcode as LD, and the DS bits is what differentiates
535	// between LD/LDU/LWA
536	case LD:
537	case STD:
538	return (encoding & `3`) == `1`;
539	}
540	}
541
542	// Compute the total displacement between the prefixed instruction that gets
543	// to the start of the data and the load/store instruction that has the offset
544	// into the data structure.
545	// For example:
546	// paddi 3, 0, 1000, 1
547	// lwz 3, 20(3)
548	// Should add up to 1020 for total displacement.
549	static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {
550	int64_t disp34 = llvm::SignExtend64(
551	X: ((prefixedInsn & `0x3ffff00000000`) >> `16`) \| (prefixedInsn & `0xffff`), B: `34`);
552	int32_t disp16 = llvm::SignExtend32(X: accessInsn & `0xffff`, B: `16`);
553	// For DS and DQ form instructions, we need to mask out the XO bits.
554	if (isDQFormInstruction(encoding: accessInsn))
555	disp16 &= ~`0xf`;
556	else if (isDSFormInstruction(insn: getPPCLegacyInsn(encoding: accessInsn)))
557	disp16 &= ~`0x3`;
558	return disp34 + disp16;
559	}
560
561	// There are a number of places when we either want to read or write an
562	// instruction when handling a half16 relocation type. On big-endian the buffer
563	// pointer is pointing into the middle of the word we want to extract, and on
564	// little-endian it is pointing to the start of the word. These 2 helpers are to
565	// simplify reading and writing in that context.
566	static void writeFromHalf16(Ctx &ctx, uint8_t *loc, uint32_t insn) {
567	write32(ctx, p: ctx.arg.isLE ? loc : loc - `2`, v: insn);
568	}
569
570	static uint32_t readFromHalf16(Ctx &ctx, const uint8_t *loc) {
571	return read32(ctx, p: ctx.arg.isLE ? loc : loc - `2`);
572	}
573
574	static uint64_t readPrefixedInst(Ctx &ctx, const uint8_t *loc) {
575	uint64_t fullInstr = read64(ctx, p: loc);
576	return ctx.arg.isLE ? (fullInstr << `32` \| fullInstr >> `32`) : fullInstr;
577	}
578
579	PPC64::PPC64(Ctx &ctx) : TargetInfo (ctx) {
580	copyRel = R_PPC64_COPY;
581	gotRel = R_PPC64_GLOB_DAT;
582	pltRel = R_PPC64_JMP_SLOT;
583	relativeRel = R_PPC64_RELATIVE;
584	iRelativeRel = R_PPC64_IRELATIVE;
585	symbolicRel = R_PPC64_ADDR64;
586	pltHeaderSize = `60`;
587	pltEntrySize = `4`;
588	ipltEntrySize = `16`; // PPC64PltCallStub::size
589	gotHeaderEntriesNum = `1`;
590	gotPltHeaderEntriesNum = `2`;
591	needsThunks = true;
592
593	tlsModuleIndexRel = R_PPC64_DTPMOD64;
594	tlsOffsetRel = R_PPC64_DTPREL64;
595
596	tlsGotRel = R_PPC64_TPREL64;
597
598	needsMoreStackNonSplit = false;
599
600	// We need 64K pages (at least under glibc/Linux, the loader won't
601	// set different permissions on a finer granularity than that).
602	defaultMaxPageSize = `65536`;
603
604	// The PPC64 ELF ABI v1 spec, says:
605	//
606	// It is normally desirable to put segments with different characteristics
607	// in separate 256 Mbyte portions of the address space, to give the
608	// operating system full paging flexibility in the 64-bit address space.
609	//
610	// And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers
611	// use 0x10000000 as the starting address.
612	defaultImageBase = `0x10000000`;
613
614	write32(ctx, p: trapInstr.data(), v: `0x7fe00008`);
615	}
616
617	int PPC64::getTlsGdRelaxSkip(RelType type) const {
618	// A __tls_get_addr call instruction is marked with 2 relocations:
619	//
620	// R_PPC64_TLSGD / R_PPC64_TLSLD: marker relocation
621	// R_PPC64_REL24: __tls_get_addr
622	//
623	// After the relaxation we no longer call __tls_get_addr and should skip both
624	// relocations to not create a false dependence on __tls_get_addr being
625	// defined.
626	if (type == R_PPC64_TLSGD \|\| type == R_PPC64_TLSLD)
627	return `2`;
628	return `1`;
629	}
630
631	static uint32_t getEFlags(InputFile *file) {
632	if (file->ekind == ELF64BEKind)
633	return cast<ObjFile<ELF64BE>>(Val: file)->getObj().getHeader().e_flags;
634	return cast<ObjFile<ELF64LE>>(Val: file)->getObj().getHeader().e_flags;
635	}
636
637	// This file implements v2 ABI. This function makes sure that all
638	// object files have v2 or an unspecified version as an ABI version.
639	uint32_t PPC64::calcEFlags() const {
640	for (InputFile *f : ctx.objectFiles) {
641	uint32_t flag = getEFlags(file: f);
642	if (flag == `1`)
643	ErrAlways(ctx) << f << ": ABI version 1 is not supported";
644	else if (flag > `2`)
645	ErrAlways(ctx) << f << ": unrecognized e_flags: " << flag;
646	}
647	return `2`;
648	}
649
650	void PPC64::relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val) const {
651	switch (rel.type) {
652	case R_PPC64_TOC16_HA:
653	// Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".
654	relocate(loc, rel, val);
655	break;
656	case R_PPC64_TOC16_LO_DS: {
657	// Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or
658	// "addi reg, 2, var@toc".
659	uint32_t insn = readFromHalf16(ctx, loc);
660	if (getPrimaryOpCode(encoding: insn) != LD)
661	ErrAlways(ctx)
662	<< "expected a 'ld' for got-indirect to toc-relative relaxing";
663	writeFromHalf16(ctx, loc, insn: (insn & `0x03ffffff`) \| `0x38000000`);
664	relocateNoSym(loc, type: R_PPC64_TOC16_LO, val);
665	break;
666	}
667	case R_PPC64_GOT_PCREL34: {
668	// Clear the first 8 bits of the prefix and the first 6 bits of the
669	// instruction (the primary opcode).
670	uint64_t insn = readPrefixedInst(ctx, loc);
671	if ((insn & `0xfc000000`) != `0xe4000000`)
672	ErrAlways(ctx)
673	<< "expected a 'pld' for got-indirect to pc-relative relaxing";
674	insn &= ~`0xff000000fc000000`;
675
676	// Replace the cleared bits with the values for PADDI (0x600000038000000);
677	insn \|= `0x600000038000000`;
678	writePrefixedInst(ctx, loc, insn);
679	relocate(loc, rel, val);
680	break;
681	}
682	case R_PPC64_PCREL_OPT: {
683	// We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
684	// be relaxed. The eligibility for the relaxation needs to be determined
685	// on that relocation since this one does not relocate a symbol.
686	uint64_t insn = readPrefixedInst(ctx, loc);
687	uint32_t accessInsn = read32(ctx, p: loc + rel.addend);
688	uint64_t pcRelInsn = getPCRelativeForm(encoding: accessInsn);
689
690	// This error is not necessary for correctness but is emitted for now
691	// to ensure we don't miss these opportunities in real code. It can be
692	// removed at a later date.
693	if (pcRelInsn == UINT64_C(-`1`)) {
694	Err(ctx)
695	<< "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x"
696	<< utohexstr(X: accessInsn, LowerCase: true);
697	break;
698	}
699
700	int64_t totalDisp = getTotalDisp(prefixedInsn: insn, accessInsn);
701	if (!isInt<`34`>(x: totalDisp))
702	break; // Displacement doesn't fit.
703	// Convert the PADDI to the prefixed version of accessInsn and convert
704	// accessInsn to a nop.
705	writePrefixedInst(ctx, loc,
706	insn: pcRelInsn \| ((totalDisp & `0x3ffff0000`) << `16`) \|
707	(totalDisp & `0xffff`));
708	write32(ctx, p: loc + rel.addend, v: NOP); // nop accessInsn.
709	break;
710	}
711	default:
712	llvm_unreachable("unexpected relocation type");
713	}
714	}
715
716	void PPC64::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
717	uint64_t val) const {
718	// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
719	// The general dynamic code sequence for a global `x` will look like:
720	// Instruction Relocation Symbol
721	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
722	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
723	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
724	// R_PPC64_REL24 __tls_get_addr
725	// nop None None
726
727	// Relaxing to local exec entails converting:
728	// addis r3, r2, x@got@tlsgd@ha into nop
729	// addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha
730	// bl __tls_get_addr(x@tlsgd) into nop
731	// nop into addi r3, r3, x@tprel@l
732
733	switch (rel.type) {
734	case R_PPC64_GOT_TLSGD16_HA:
735	writeFromHalf16(ctx, loc, insn: NOP);
736	break;
737	case R_PPC64_GOT_TLSGD16:
738	case R_PPC64_GOT_TLSGD16_LO:
739	writeFromHalf16(ctx, loc, insn: `0x3c6d0000`); // addis r3, r13
740	relocateNoSym(loc, type: R_PPC64_TPREL16_HA, val);
741	break;
742	case R_PPC64_GOT_TLSGD_PCREL34:
743	// Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to
744	// paddi r3, r13, x@tprel, 0
745	writePrefixedInst(ctx, loc, insn: `0x06000000386d0000`);
746	relocateNoSym(loc, type: R_PPC64_TPREL34, val);
747	break;
748	case R_PPC64_TLSGD: {
749	// PC Relative Relaxation:
750	// Relax from bl __tls_get_addr@notoc(x@tlsgd) to
751	// nop
752	// TOC Relaxation:
753	// Relax from bl __tls_get_addr(x@tlsgd)
754	// nop
755	// to
756	// nop
757	// addi r3, r3, x@tprel@l
758	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
759	if (locAsInt % `4` == `0`) {
760	write32(ctx, p: loc, v: NOP); // nop
761	write32(ctx, p: loc + `4`, v: `0x38630000`); // addi r3, r3
762	// Since we are relocating a half16 type relocation and Loc + 4 points to
763	// the start of an instruction we need to advance the buffer by an extra
764	// 2 bytes on BE.
765	relocateNoSym(loc: loc + `4` + (ctx.arg.ekind == ELF64BEKind ? `2` : `0`),
766	type: R_PPC64_TPREL16_LO, val);
767	} else if (locAsInt % `4` == `1`) {
768	write32(ctx, p: loc - `1`, v: NOP);
769	} else {
770	Err(ctx) << "R_PPC64_TLSGD has unexpected byte alignment";
771	}
772	break;
773	}
774	default:
775	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
776	}
777	}
778
779	void PPC64::relaxTlsLdToLe(uint8_t loc, const* Relocation &rel,
780	uint64_t val) const {
781	// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
782	// The local dynamic code sequence for a global `x` will look like:
783	// Instruction Relocation Symbol
784	// addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x
785	// addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x
786	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x
787	// R_PPC64_REL24 __tls_get_addr
788	// nop None None
789
790	// Relaxing to local exec entails converting:
791	// addis r3, r2, x@got@tlsld@ha into nop
792	// addi r3, r3, x@got@tlsld@l into addis r3, r13, 0
793	// bl __tls_get_addr(x@tlsgd) into nop
794	// nop into addi r3, r3, 4096
795
796	switch (rel.type) {
797	case R_PPC64_GOT_TLSLD16_HA:
798	writeFromHalf16(ctx, loc, insn: NOP);
799	break;
800	case R_PPC64_GOT_TLSLD16_LO:
801	writeFromHalf16(ctx, loc, insn: `0x3c6d0000`); // addis r3, r13, 0
802	break;
803	case R_PPC64_GOT_TLSLD_PCREL34:
804	// Relax from paddi r3, 0, x1@got@tlsld@pcrel, 1 to
805	// paddi r3, r13, 0x1000, 0
806	writePrefixedInst(ctx, loc, insn: `0x06000000386d1000`);
807	break;
808	case R_PPC64_TLSLD: {
809	// PC Relative Relaxation:
810	// Relax from bl __tls_get_addr@notoc(x@tlsld)
811	// to
812	// nop
813	// TOC Relaxation:
814	// Relax from bl __tls_get_addr(x@tlsld)
815	// nop
816	// to
817	// nop
818	// addi r3, r3, 4096
819	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
820	if (locAsInt % `4` == `0`) {
821	write32(ctx, p: loc, v: NOP);
822	write32(ctx, p: loc + `4`, v: `0x38631000`); // addi r3, r3, 4096
823	} else if (locAsInt % `4` == `1`) {
824	write32(ctx, p: loc - `1`, v: NOP);
825	} else {
826	Err(ctx) << "R_PPC64_TLSLD has unexpected byte alignment";
827	}
828	break;
829	}
830	case R_PPC64_DTPREL16:
831	case R_PPC64_DTPREL16_HA:
832	case R_PPC64_DTPREL16_HI:
833	case R_PPC64_DTPREL16_DS:
834	case R_PPC64_DTPREL16_LO:
835	case R_PPC64_DTPREL16_LO_DS:
836	case R_PPC64_DTPREL34:
837	relocate(loc, rel, val);
838	break;
839	default:
840	llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
841	}
842	}
843
844	// Map X-Form instructions to their DS-Form counterparts, if applicable.
845	// The full encoding is returned here to distinguish between the different
846	// DS-Form instructions.
847	unsigned elf::getPPCDSFormOp(unsigned secondaryOp) {
848	switch (secondaryOp) {
849	case LWAX:
850	return (LWA << `26`) \| `0x2`;
851	case LDX:
852	return LD << `26`;
853	case STDX:
854	return STD << `26`;
855	default:
856	return `0`;
857	}
858	}
859
860	unsigned elf::getPPCDFormOp(unsigned secondaryOp) {
861	switch (secondaryOp) {
862	case LBZX:
863	return LBZ << `26`;
864	case LHZX:
865	return LHZ << `26`;
866	case LWZX:
867	return LWZ << `26`;
868	case STBX:
869	return STB << `26`;
870	case STHX:
871	return STH << `26`;
872	case STWX:
873	return STW << `26`;
874	case LHAX:
875	return LHA << `26`;
876	case LFSX:
877	return LFS << `26`;
878	case LFDX:
879	return LFD << `26`;
880	case STFSX:
881	return STFS << `26`;
882	case STFDX:
883	return STFD << `26`;
884	case ADD:
885	return ADDI << `26`;
886	default:
887	return `0`;
888	}
889	}
890
891	void PPC64::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
892	uint64_t val) const {
893	// The initial exec code sequence for a global `x` will look like:
894	// Instruction Relocation Symbol
895	// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
896	// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
897	// add r9, r9, x@tls R_PPC64_TLS x
898
899	// Relaxing to local exec entails converting:
900	// addis r9, r2, x@got@tprel@ha into nop
901	// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
902	// add r9, r9, x@tls into addi r9, r9, x@tprel@l
903
904	// x@tls R_PPC64_TLS is a relocation which does not compute anything,
905	// it is replaced with r13 (thread pointer).
906
907	// The add instruction in the initial exec sequence has multiple variations
908	// that need to be handled. If we are building an address it will use an add
909	// instruction, if we are accessing memory it will use any of the X-form
910	// indexed load or store instructions.
911
912	unsigned offset = (ctx.arg.ekind == ELF64BEKind) ? `2` : `0`;
913	switch (rel.type) {
914	case R_PPC64_GOT_TPREL16_HA:
915	write32(ctx, p: loc - offset, v: NOP);
916	break;
917	case R_PPC64_GOT_TPREL16_LO_DS:
918	case R_PPC64_GOT_TPREL16_DS: {
919	uint32_t regNo = read32(ctx, p: loc - offset) & `0x03e00000`; // bits 6-10
920	write32(ctx, p: loc - offset, v: `0x3c0d0000` \| regNo); // addis RegNo, r13
921	relocateNoSym(loc, type: R_PPC64_TPREL16_HA, val);
922	break;
923	}
924	case R_PPC64_GOT_TPREL_PCREL34: {
925	const uint64_t pldRT = readPrefixedInst(ctx, loc) & `0x0000000003e00000`;
926	// paddi RT(from pld), r13, symbol@tprel, 0
927	writePrefixedInst(ctx, loc, insn: `0x06000000380d0000` \| pldRT);
928	relocateNoSym(loc, type: R_PPC64_TPREL34, val);
929	break;
930	}
931	case R_PPC64_TLS: {
932	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
933	if (locAsInt % `4` == `0`) {
934	uint32_t primaryOp = getPrimaryOpCode(encoding: read32(ctx, p: loc));
935	if (primaryOp != `31`)
936	ErrAlways(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
937	uint32_t secondaryOp = (read32(ctx, p: loc) & `0x000007fe`) >> `1`; // bits 21-30
938	uint32_t dFormOp = getPPCDFormOp(secondaryOp);
939	uint32_t finalReloc;
940	if (dFormOp == `0`) { // Expecting a DS-Form instruction.
941	dFormOp = getPPCDSFormOp(secondaryOp);
942	if (dFormOp == `0`)
943	ErrAlways(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
944	finalReloc = R_PPC64_TPREL16_LO_DS;
945	} else
946	finalReloc = R_PPC64_TPREL16_LO;
947	write32(ctx, p: loc, v: dFormOp \| (read32(ctx, p: loc) & `0x03ff0000`));
948	relocateNoSym(loc: loc + offset, type: finalReloc, val);
949	} else if (locAsInt % `4` == `1`) {
950	// If the offset is not 4 byte aligned then we have a PCRel type reloc.
951	// This version of the relocation is offset by one byte from the
952	// instruction it references.
953	uint32_t tlsInstr = read32(ctx, p: loc - `1`);
954	uint32_t primaryOp = getPrimaryOpCode(encoding: tlsInstr);
955	if (primaryOp != `31`)
956	Err(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
957	uint32_t secondaryOp = (tlsInstr & `0x000007FE`) >> `1`; // bits 21-30
958	// The add is a special case and should be turned into a nop. The paddi
959	// that comes before it will already have computed the address of the
960	// symbol.
961	if (secondaryOp == `266`) {
962	// Check if the add uses the same result register as the input register.
963	uint32_t rt = (tlsInstr & `0x03E00000`) >> `21`; // bits 6-10
964	uint32_t ra = (tlsInstr & `0x001F0000`) >> `16`; // bits 11-15
965	if (ra == rt) {
966	write32(ctx, p: loc - `1`, v: NOP);
967	} else {
968	// mr rt, ra
969	write32(ctx, p: loc - `1`,
970	v: `0x7C000378` \| (rt << `16`) \| (ra << `21`) \| (ra << `11`));
971	}
972	} else {
973	uint32_t dFormOp = getPPCDFormOp(secondaryOp);
974	if (dFormOp == `0`) { // Expecting a DS-Form instruction.
975	dFormOp = getPPCDSFormOp(secondaryOp);
976	if (dFormOp == `0`)
977	Err(ctx) << "unrecognized instruction for IE to LE R_PPC64_TLS";
978	}
979	write32(ctx, p: loc - `1`, v: (dFormOp \| (tlsInstr & `0x03ff0000`)));
980	}
981	} else {
982	Err(ctx) << "R_PPC64_TLS must be either 4 byte aligned or one byte "
983	"offset from 4 byte aligned";
984	}
985	break;
986	}
987	default:
988	llvm_unreachable("unknown relocation for IE to LE");
989	break;
990	}
991	}
992
993	RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
994	const uint8_t loc) const* {
995	switch (type) {
996	case R_PPC64_NONE:
997	return R_NONE;
998	case R_PPC64_ADDR16:
999	case R_PPC64_ADDR16_DS:
1000	case R_PPC64_ADDR16_HA:
1001	case R_PPC64_ADDR16_HI:
1002	case R_PPC64_ADDR16_HIGH:
1003	case R_PPC64_ADDR16_HIGHER:
1004	case R_PPC64_ADDR16_HIGHERA:
1005	case R_PPC64_ADDR16_HIGHEST:
1006	case R_PPC64_ADDR16_HIGHESTA:
1007	case R_PPC64_ADDR16_LO:
1008	case R_PPC64_ADDR16_LO_DS:
1009	case R_PPC64_ADDR32:
1010	case R_PPC64_ADDR64:
1011	return R_ABS;
1012	case R_PPC64_GOT16:
1013	case R_PPC64_GOT16_DS:
1014	case R_PPC64_GOT16_HA:
1015	case R_PPC64_GOT16_HI:
1016	case R_PPC64_GOT16_LO:
1017	case R_PPC64_GOT16_LO_DS:
1018	return R_GOT_OFF;
1019	case R_PPC64_TOC16:
1020	case R_PPC64_TOC16_DS:
1021	case R_PPC64_TOC16_HI:
1022	case R_PPC64_TOC16_LO:
1023	return R_GOTREL;
1024	case R_PPC64_GOT_PCREL34:
1025	case R_PPC64_GOT_TPREL_PCREL34:
1026	case R_PPC64_PCREL_OPT:
1027	return R_GOT_PC;
1028	case R_PPC64_TOC16_HA:
1029	case R_PPC64_TOC16_LO_DS:
1030	return ctx.arg.tocOptimize ? RE_PPC64_RELAX_TOC : R_GOTREL;
1031	case R_PPC64_TOC:
1032	return RE_PPC64_TOCBASE;
1033	case R_PPC64_REL14:
1034	case R_PPC64_REL24:
1035	return RE_PPC64_CALL_PLT;
1036	case R_PPC64_REL24_NOTOC:
1037	return R_PLT_PC;
1038	case R_PPC64_REL16_LO:
1039	case R_PPC64_REL16_HA:
1040	case R_PPC64_REL16_HI:
1041	case R_PPC64_REL32:
1042	case R_PPC64_REL64:
1043	case R_PPC64_PCREL34:
1044	return R_PC;
1045	case R_PPC64_GOT_TLSGD16:
1046	case R_PPC64_GOT_TLSGD16_HA:
1047	case R_PPC64_GOT_TLSGD16_HI:
1048	case R_PPC64_GOT_TLSGD16_LO:
1049	return R_TLSGD_GOT;
1050	case R_PPC64_GOT_TLSGD_PCREL34:
1051	return R_TLSGD_PC;
1052	case R_PPC64_GOT_TLSLD16:
1053	case R_PPC64_GOT_TLSLD16_HA:
1054	case R_PPC64_GOT_TLSLD16_HI:
1055	case R_PPC64_GOT_TLSLD16_LO:
1056	return R_TLSLD_GOT;
1057	case R_PPC64_GOT_TLSLD_PCREL34:
1058	return R_TLSLD_PC;
1059	case R_PPC64_GOT_TPREL16_HA:
1060	case R_PPC64_GOT_TPREL16_LO_DS:
1061	case R_PPC64_GOT_TPREL16_DS:
1062	case R_PPC64_GOT_TPREL16_HI:
1063	return R_GOT_OFF;
1064	case R_PPC64_GOT_DTPREL16_HA:
1065	case R_PPC64_GOT_DTPREL16_LO_DS:
1066	case R_PPC64_GOT_DTPREL16_DS:
1067	case R_PPC64_GOT_DTPREL16_HI:
1068	return R_TLSLD_GOT_OFF;
1069	case R_PPC64_TPREL16:
1070	case R_PPC64_TPREL16_HA:
1071	case R_PPC64_TPREL16_LO:
1072	case R_PPC64_TPREL16_HI:
1073	case R_PPC64_TPREL16_DS:
1074	case R_PPC64_TPREL16_LO_DS:
1075	case R_PPC64_TPREL16_HIGHER:
1076	case R_PPC64_TPREL16_HIGHERA:
1077	case R_PPC64_TPREL16_HIGHEST:
1078	case R_PPC64_TPREL16_HIGHESTA:
1079	case R_PPC64_TPREL34:
1080	return R_TPREL;
1081	case R_PPC64_DTPREL16:
1082	case R_PPC64_DTPREL16_DS:
1083	case R_PPC64_DTPREL16_HA:
1084	case R_PPC64_DTPREL16_HI:
1085	case R_PPC64_DTPREL16_HIGHER:
1086	case R_PPC64_DTPREL16_HIGHERA:
1087	case R_PPC64_DTPREL16_HIGHEST:
1088	case R_PPC64_DTPREL16_HIGHESTA:
1089	case R_PPC64_DTPREL16_LO:
1090	case R_PPC64_DTPREL16_LO_DS:
1091	case R_PPC64_DTPREL64:
1092	case R_PPC64_DTPREL34:
1093	return R_DTPREL;
1094	case R_PPC64_TLSGD:
1095	return R_TLSDESC_CALL;
1096	case R_PPC64_TLSLD:
1097	return R_TLSLD_HINT;
1098	case R_PPC64_TLS:
1099	return R_TLSIE_HINT;
1100	default:
1101	Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
1102	<< ") against symbol " << &s;
1103	return R_NONE;
1104	}
1105	}
1106
1107	RelType PPC64::getDynRel(RelType type) const {
1108	if (type == R_PPC64_ADDR64 \|\| type == R_PPC64_TOC)
1109	return R_PPC64_ADDR64;
1110	return R_PPC64_NONE;
1111	}
1112
1113	int64_t PPC64::getImplicitAddend(const uint8_t buf, RelType type) const* {
1114	switch (type) {
1115	case R_PPC64_NONE:
1116	case R_PPC64_GLOB_DAT:
1117	case R_PPC64_JMP_SLOT:
1118	return `0`;
1119	case R_PPC64_REL32:
1120	return SignExtend64<`32`>(x: read32(ctx, p: buf));
1121	case R_PPC64_ADDR64:
1122	case R_PPC64_REL64:
1123	case R_PPC64_RELATIVE:
1124	case R_PPC64_IRELATIVE:
1125	case R_PPC64_DTPMOD64:
1126	case R_PPC64_DTPREL64:
1127	case R_PPC64_TPREL64:
1128	return read64(ctx, p: buf);
1129	default:
1130	InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
1131	return `0`;
1132	}
1133	}
1134
1135	void PPC64::writeGotHeader(uint8_t buf) const* {
1136	write64(ctx, p: buf, v: getPPC64TocBase(ctx));
1137	}
1138
1139	void PPC64::writePltHeader(uint8_t buf) const* {
1140	// The generic resolver stub goes first.
1141	write32(ctx, p: buf + `0`, v: `0x7c0802a6`); // mflr r0
1142	write32(ctx, p: buf + `4`, v: `0x429f0005`); // bcl 20,4cr7+so,8 <_glink+0x8>*
1143	write32(ctx, p: buf + `8`, v: `0x7d6802a6`); // mflr r11
1144	write32(ctx, p: buf + `12`, v: `0x7c0803a6`); // mtlr r0
1145	write32(ctx, p: buf + `16`, v: `0x7d8b6050`); // subf r12, r11, r12
1146	write32(ctx, p: buf + `20`, v: `0x380cffcc`); // subi r0,r12,52
1147	write32(ctx, p: buf + `24`, v: `0x7800f082`); // srdi r0,r0,62,2
1148	write32(ctx, p: buf + `28`, v: `0xe98b002c`); // ld r12,44(r11)
1149	write32(ctx, p: buf + `32`, v: `0x7d6c5a14`); // add r11,r12,r11
1150	write32(ctx, p: buf + `36`, v: `0xe98b0000`); // ld r12,0(r11)
1151	write32(ctx, p: buf + `40`, v: `0xe96b0008`); // ld r11,8(r11)
1152	write32(ctx, p: buf + `44`, v: `0x7d8903a6`); // mtctr r12
1153	write32(ctx, p: buf + `48`, v: `0x4e800420`); // bctr
1154
1155	// The 'bcl' instruction will set the link register to the address of the
1156	// following instruction ('mflr r11'). Here we store the offset from that
1157	// instruction to the first entry in the GotPlt section.
1158	int64_t gotPltOffset = ctx.in.gotPlt ->getVA() - (ctx.in.plt ->getVA() + `8`);
1159	write64(ctx, p: buf + `52`, v: gotPltOffset);
1160	}
1161
1162	void PPC64::writePlt(uint8_t buf, const* Symbol &sym,
1163	uint64_t /pltEntryAddr/) const {
1164	int32_t offset = pltHeaderSize + sym.getPltIdx(ctx) * pltEntrySize;
1165	// bl __glink_PLTresolve
1166	write32(ctx, p: buf, v: `0x48000000` \| ((-offset) & `0x03fffffc`));
1167	}
1168
1169	void PPC64::writeIplt(uint8_t buf, const* Symbol &sym,
1170	uint64_t /pltEntryAddr/) const {
1171	writePPC64LoadAndBranch(ctx, buf,
1172	offset: sym.getGotPltVA(ctx) - getPPC64TocBase(ctx));
1173	}
1174
1175	static std::pair<RelType, uint64_t> toAddr16Rel(RelType type, uint64_t val) {
1176	// Relocations relative to the toc-base need to be adjusted by the Toc offset.
1177	uint64_t tocBiasedVal = val - ppc64TocOffset;
1178	// Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset.
1179	uint64_t dtpBiasedVal = val - dynamicThreadPointerOffset;
1180
1181	switch (type) {
1182	// TOC biased relocation.
1183	case R_PPC64_GOT16:
1184	case R_PPC64_GOT_TLSGD16:
1185	case R_PPC64_GOT_TLSLD16:
1186	case R_PPC64_TOC16:
1187	return {R_PPC64_ADDR16, tocBiasedVal};
1188	case R_PPC64_GOT16_DS:
1189	case R_PPC64_TOC16_DS:
1190	case R_PPC64_GOT_TPREL16_DS:
1191	case R_PPC64_GOT_DTPREL16_DS:
1192	return {R_PPC64_ADDR16_DS, tocBiasedVal};
1193	case R_PPC64_GOT16_HA:
1194	case R_PPC64_GOT_TLSGD16_HA:
1195	case R_PPC64_GOT_TLSLD16_HA:
1196	case R_PPC64_GOT_TPREL16_HA:
1197	case R_PPC64_GOT_DTPREL16_HA:
1198	case R_PPC64_TOC16_HA:
1199	return {R_PPC64_ADDR16_HA, tocBiasedVal};
1200	case R_PPC64_GOT16_HI:
1201	case R_PPC64_GOT_TLSGD16_HI:
1202	case R_PPC64_GOT_TLSLD16_HI:
1203	case R_PPC64_GOT_TPREL16_HI:
1204	case R_PPC64_GOT_DTPREL16_HI:
1205	case R_PPC64_TOC16_HI:
1206	return {R_PPC64_ADDR16_HI, tocBiasedVal};
1207	case R_PPC64_GOT16_LO:
1208	case R_PPC64_GOT_TLSGD16_LO:
1209	case R_PPC64_GOT_TLSLD16_LO:
1210	case R_PPC64_TOC16_LO:
1211	return {R_PPC64_ADDR16_LO, tocBiasedVal};
1212	case R_PPC64_GOT16_LO_DS:
1213	case R_PPC64_TOC16_LO_DS:
1214	case R_PPC64_GOT_TPREL16_LO_DS:
1215	case R_PPC64_GOT_DTPREL16_LO_DS:
1216	return {R_PPC64_ADDR16_LO_DS, tocBiasedVal};
1217
1218	// Dynamic Thread pointer biased relocation types.
1219	case R_PPC64_DTPREL16:
1220	return {R_PPC64_ADDR16, dtpBiasedVal};
1221	case R_PPC64_DTPREL16_DS:
1222	return {R_PPC64_ADDR16_DS, dtpBiasedVal};
1223	case R_PPC64_DTPREL16_HA:
1224	return {R_PPC64_ADDR16_HA, dtpBiasedVal};
1225	case R_PPC64_DTPREL16_HI:
1226	return {R_PPC64_ADDR16_HI, dtpBiasedVal};
1227	case R_PPC64_DTPREL16_HIGHER:
1228	return {R_PPC64_ADDR16_HIGHER, dtpBiasedVal};
1229	case R_PPC64_DTPREL16_HIGHERA:
1230	return {R_PPC64_ADDR16_HIGHERA, dtpBiasedVal};
1231	case R_PPC64_DTPREL16_HIGHEST:
1232	return {R_PPC64_ADDR16_HIGHEST, dtpBiasedVal};
1233	case R_PPC64_DTPREL16_HIGHESTA:
1234	return {R_PPC64_ADDR16_HIGHESTA, dtpBiasedVal};
1235	case R_PPC64_DTPREL16_LO:
1236	return {R_PPC64_ADDR16_LO, dtpBiasedVal};
1237	case R_PPC64_DTPREL16_LO_DS:
1238	return {R_PPC64_ADDR16_LO_DS, dtpBiasedVal};
1239	case R_PPC64_DTPREL64:
1240	return {R_PPC64_ADDR64, dtpBiasedVal};
1241
1242	default:
1243	return {type, val};
1244	}
1245	}
1246
1247	static bool isTocOptType(RelType type) {
1248	switch (type) {
1249	case R_PPC64_GOT16_HA:
1250	case R_PPC64_GOT16_LO_DS:
1251	case R_PPC64_TOC16_HA:
1252	case R_PPC64_TOC16_LO_DS:
1253	case R_PPC64_TOC16_LO:
1254	return true;
1255	default:
1256	return false;
1257	}
1258	}
1259
1260	void PPC64::relocate(uint8_t loc, const* Relocation &rel, uint64_t val) const {
1261	RelType type = rel.type;
1262	bool shouldTocOptimize = isTocOptType(type);
1263	// For dynamic thread pointer relative, toc-relative, and got-indirect
1264	// relocations, proceed in terms of the corresponding ADDR16 relocation type.
1265	std::tie(args&: type, args&: val) = toAddr16Rel(type, val);
1266
1267	switch (type) {
1268	case R_PPC64_ADDR14: {
1269	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1270	// Preserve the AA/LK bits in the branch instruction
1271	uint8_t aalk = loc[`3`];
1272	write16(ctx, p: loc + `2`, v: (aalk & `3`) \| (val & `0xfffc`));
1273	break;
1274	}
1275	case R_PPC64_ADDR16:
1276	checkIntUInt(ctx, loc, v: val, n: `16`, rel);
1277	write16(ctx, p: loc, v: val);
1278	break;
1279	case R_PPC64_ADDR32:
1280	checkIntUInt(ctx, loc, v: val, n: `32`, rel);
1281	write32(ctx, p: loc, v: val);
1282	break;
1283	case R_PPC64_ADDR16_DS:
1284	case R_PPC64_TPREL16_DS: {
1285	checkInt(ctx, loc, v: val, n: `16`, rel);
1286	// DQ-form instructions use bits 28-31 as part of the instruction encoding
1287	// DS-form instructions only use bits 30-31.
1288	uint16_t mask = isDQFormInstruction(encoding: readFromHalf16(ctx, loc)) ? `0xf` : `0x3`;
1289	checkAlignment(ctx, loc, v: lo(v: val), n: mask + `1`, rel);
1290	write16(ctx, p: loc, v: (read16(ctx, p: loc) & mask) \| lo(v: val));
1291	} break;
1292	case R_PPC64_ADDR16_HA:
1293	case R_PPC64_REL16_HA:
1294	case R_PPC64_TPREL16_HA:
1295	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`)
1296	writeFromHalf16(ctx, loc, insn: NOP);
1297	else {
1298	checkInt(ctx, loc, v: val + `0x8000`, n: `32`, rel);
1299	write16(ctx, p: loc, v: ha(v: val));
1300	}
1301	break;
1302	case R_PPC64_ADDR16_HI:
1303	case R_PPC64_REL16_HI:
1304	case R_PPC64_TPREL16_HI:
1305	checkInt(ctx, loc, v: val, n: `32`, rel);
1306	write16(ctx, p: loc, v: hi(v: val));
1307	break;
1308	case R_PPC64_ADDR16_HIGH:
1309	write16(ctx, p: loc, v: hi(v: val));
1310	break;
1311	case R_PPC64_ADDR16_HIGHER:
1312	case R_PPC64_TPREL16_HIGHER:
1313	write16(ctx, p: loc, v: higher(v: val));
1314	break;
1315	case R_PPC64_ADDR16_HIGHERA:
1316	case R_PPC64_TPREL16_HIGHERA:
1317	write16(ctx, p: loc, v: highera(v: val));
1318	break;
1319	case R_PPC64_ADDR16_HIGHEST:
1320	case R_PPC64_TPREL16_HIGHEST:
1321	write16(ctx, p: loc, v: highest(v: val));
1322	break;
1323	case R_PPC64_ADDR16_HIGHESTA:
1324	case R_PPC64_TPREL16_HIGHESTA:
1325	write16(ctx, p: loc, v: highesta(v: val));
1326	break;
1327	case R_PPC64_ADDR16_LO:
1328	case R_PPC64_REL16_LO:
1329	case R_PPC64_TPREL16_LO:
1330	// When the high-adjusted part of a toc relocation evaluates to 0, it is
1331	// changed into a nop. The lo part then needs to be updated to use the
1332	// toc-pointer register r2, as the base register.
1333	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`) {
1334	uint32_t insn = readFromHalf16(ctx, loc);
1335	if (isInstructionUpdateForm(encoding: insn))
1336	Err(ctx) << getErrorLoc(ctx, loc)
1337	<< "can't toc-optimize an update instruction: 0x"
1338	<< utohexstr(X: insn, LowerCase: true);
1339	writeFromHalf16(ctx, loc, insn: (insn & `0xffe00000`) \| `0x00020000` \| lo(v: val));
1340	} else {
1341	write16(ctx, p: loc, v: lo(v: val));
1342	}
1343	break;
1344	case R_PPC64_ADDR16_LO_DS:
1345	case R_PPC64_TPREL16_LO_DS: {
1346	// DQ-form instructions use bits 28-31 as part of the instruction encoding
1347	// DS-form instructions only use bits 30-31.
1348	uint32_t insn = readFromHalf16(ctx, loc);
1349	uint16_t mask = isDQFormInstruction(encoding: insn) ? `0xf` : `0x3`;
1350	checkAlignment(ctx, loc, v: lo(v: val), n: mask + `1`, rel);
1351	if (ctx.arg.tocOptimize && shouldTocOptimize && ha(v: val) == `0`) {
1352	// When the high-adjusted part of a toc relocation evaluates to 0, it is
1353	// changed into a nop. The lo part then needs to be updated to use the toc
1354	// pointer register r2, as the base register.
1355	if (isInstructionUpdateForm(encoding: insn))
1356	Err(ctx) << getErrorLoc(ctx, loc)
1357	<< "can't toc-optimize an update instruction: 0x"
1358	<< utohexstr(X: insn, LowerCase: true);
1359	insn &= `0xffe00000` \| mask;
1360	writeFromHalf16(ctx, loc, insn: insn \| `0x00020000` \| lo(v: val));
1361	} else {
1362	write16(ctx, p: loc, v: (read16(ctx, p: loc) & mask) \| lo(v: val));
1363	}
1364	} break;
1365	case R_PPC64_TPREL16:
1366	checkInt(ctx, loc, v: val, n: `16`, rel);
1367	write16(ctx, p: loc, v: val);
1368	break;
1369	case R_PPC64_REL32:
1370	checkInt(ctx, loc, v: val, n: `32`, rel);
1371	write32(ctx, p: loc, v: val);
1372	break;
1373	case R_PPC64_ADDR64:
1374	case R_PPC64_REL64:
1375	case R_PPC64_TOC:
1376	write64(ctx, p: loc, v: val);
1377	break;
1378	case R_PPC64_REL14: {
1379	uint32_t mask = `0x0000FFFC`;
1380	checkInt(ctx, loc, v: val, n: `16`, rel);
1381	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1382	write32(ctx, p: loc, v: (read32(ctx, p: loc) & ~mask) \| (val & mask));
1383	break;
1384	}
1385	case R_PPC64_REL24:
1386	case R_PPC64_REL24_NOTOC: {
1387	uint32_t mask = `0x03FFFFFC`;
1388	checkInt(ctx, loc, v: val, n: `26`, rel);
1389	checkAlignment(ctx, loc, v: val, n: `4`, rel);
1390	write32(ctx, p: loc, v: (read32(ctx, p: loc) & ~mask) \| (val & mask));
1391	break;
1392	}
1393	case R_PPC64_DTPREL64:
1394	write64(ctx, p: loc, v: val - dynamicThreadPointerOffset);
1395	break;
1396	case R_PPC64_DTPREL34:
1397	// The Dynamic Thread Vector actually points 0x8000 bytes past the start
1398	// of the TLS block. Therefore, in the case of R_PPC64_DTPREL34 we first
1399	// need to subtract that value then fallthrough to the general case.
1400	val -= dynamicThreadPointerOffset;
1401	[[fallthrough]];
1402	case R_PPC64_PCREL34:
1403	case R_PPC64_GOT_PCREL34:
1404	case R_PPC64_GOT_TLSGD_PCREL34:
1405	case R_PPC64_GOT_TLSLD_PCREL34:
1406	case R_PPC64_GOT_TPREL_PCREL34:
1407	case R_PPC64_TPREL34: {
1408	const uint64_t si0Mask = `0x00000003ffff0000`;
1409	const uint64_t si1Mask = `0x000000000000ffff`;
1410	const uint64_t fullMask = `0x0003ffff0000ffff`;
1411	checkInt(ctx, loc, v: val, n: `34`, rel);
1412
1413	uint64_t instr = readPrefixedInst(ctx, loc) & ~fullMask;
1414	writePrefixedInst(ctx, loc,
1415	insn: instr \| ((val & si0Mask) << `16`) \| (val & si1Mask));
1416	break;
1417	}
1418	// If we encounter a PCREL_OPT relocation that we won't optimize.
1419	case R_PPC64_PCREL_OPT:
1420	break;
1421	default:
1422	llvm_unreachable("unknown relocation");
1423	}
1424	}
1425
1426	bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
1427	uint64_t branchAddr, const Symbol &s, int64_t a) const {
1428	if (type != R_PPC64_REL14 && type != R_PPC64_REL24 &&
1429	type != R_PPC64_REL24_NOTOC)
1430	return false;
1431
1432	// If a function is in the Plt it needs to be called with a call-stub.
1433	if (s.isInPlt(ctx))
1434	return true;
1435
1436	// This check looks at the st_other bits of the callee with relocation
1437	// R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee
1438	// clobbers the TOC and we need an R2 save stub.
1439	if (type != R_PPC64_REL24_NOTOC && (s.stOther >> `5`) == `1`)
1440	return true;
1441
1442	if (type == R_PPC64_REL24_NOTOC && (s.stOther >> `5`) > `1`)
1443	return true;
1444
1445	// An undefined weak symbol not in a PLT does not need a thunk. If it is
1446	// hidden, its binding has been converted to local, so we just check
1447	// isUndefined() here. A undefined non-weak symbol has been errored.
1448	if (s.isUndefined())
1449	return false;
1450
1451	// If the offset exceeds the range of the branch type then it will need
1452	// a range-extending thunk.
1453	// See the comment in getRelocTargetVA() about RE_PPC64_CALL.
1454	return !inBranchRange(
1455	type, src: branchAddr,
1456	dst: s.getVA(ctx, addend: a) + getPPC64GlobalEntryToLocalEntryOffset(ctx, stOther: s.stOther));
1457	}
1458
1459	uint32_t PPC64::getThunkSectionSpacing() const {
1460	// See comment in Arch/ARM.cpp for a more detailed explanation of
1461	// getThunkSectionSpacing(). For PPC64 we pick the constant here based on
1462	// R_PPC64_REL24, which is used by unconditional branch instructions.
1463	// 0x2000000 = (1 << 24-1) 4*
1464	return `0x2000000`;
1465	}
1466
1467	bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
1468	int64_t offset = dst - src;
1469	if (type == R_PPC64_REL14)
1470	return isInt<`16`>(x: offset);
1471	if (type == R_PPC64_REL24 \|\| type == R_PPC64_REL24_NOTOC)
1472	return isInt<`26`>(x: offset);
1473	llvm_unreachable("unsupported relocation type used in branch");
1474	}
1475
1476	RelExpr PPC64::adjustTlsExpr(RelType type, RelExpr expr) const {
1477	if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE)
1478	return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
1479	if (expr == R_RELAX_TLS_LD_TO_LE)
1480	return R_RELAX_TLS_LD_TO_LE_ABS;
1481	return expr;
1482	}
1483
1484	RelExpr PPC64::adjustGotPcExpr(RelType type, int64_t addend,
1485	const uint8_t loc) const* {
1486	if ((type == R_PPC64_GOT_PCREL34 \|\| type == R_PPC64_PCREL_OPT) &&
1487	ctx.arg.pcRelOptimize) {
1488	// It only makes sense to optimize pld since paddi means that the address
1489	// of the object in the GOT is required rather than the object itself.
1490	if ((readPrefixedInst(ctx, loc) & `0xfc000000`) == `0xe4000000`)
1491	return RE_PPC64_RELAX_GOT_PC;
1492	}
1493	return R_GOT_PC;
1494	}
1495
1496	// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement.
1497	// The general dynamic code sequence for a global `x` uses 4 instructions.
1498	// Instruction Relocation Symbol
1499	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
1500	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
1501	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
1502	// R_PPC64_REL24 __tls_get_addr
1503	// nop None None
1504	//
1505	// Relaxing to initial-exec entails:
1506	// 1) Convert the addis/addi pair that builds the address of the tls_index
1507	// struct for 'x' to an addis/ld pair that loads an offset from a got-entry.
1508	// 2) Convert the call to __tls_get_addr to a nop.
1509	// 3) Convert the nop following the call to an add of the loaded offset to the
1510	// thread pointer.
1511	// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
1512	// used as the relaxation hint for both steps 2 and 3.
1513	void PPC64::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
1514	uint64_t val) const {
1515	switch (rel.type) {
1516	case R_PPC64_GOT_TLSGD16_HA:
1517	// This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
1518	// addis rT, r2, sym@got@tprel@ha.
1519	relocateNoSym(loc, type: R_PPC64_GOT_TPREL16_HA, val);
1520	return;
1521	case R_PPC64_GOT_TLSGD16:
1522	case R_PPC64_GOT_TLSGD16_LO: {
1523	// Relax from addi r3, rA, sym@got@tlsgd@l to
1524	// ld r3, sym@got@tprel@l(rA)
1525	uint32_t ra = (readFromHalf16(ctx, loc) & (`0x1f` << `16`));
1526	writeFromHalf16(ctx, loc, insn: `0xe8600000` \| ra);
1527	relocateNoSym(loc, type: R_PPC64_GOT_TPREL16_LO_DS, val);
1528	return;
1529	}
1530	case R_PPC64_GOT_TLSGD_PCREL34: {
1531	// Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to
1532	// pld r3, sym@got@tprel@pcrel
1533	writePrefixedInst(ctx, loc, insn: `0x04100000e4600000`);
1534	relocateNoSym(loc, type: R_PPC64_GOT_TPREL_PCREL34, val);
1535	return;
1536	}
1537	case R_PPC64_TLSGD: {
1538	// PC Relative Relaxation:
1539	// Relax from bl __tls_get_addr@notoc(x@tlsgd) to
1540	// nop
1541	// TOC Relaxation:
1542	// Relax from bl __tls_get_addr(x@tlsgd)
1543	// nop
1544	// to
1545	// nop
1546	// add r3, r3, r13
1547	const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);
1548	if (locAsInt % `4` == `0`) {
1549	write32(ctx, p: loc, v: NOP); // bl __tls_get_addr(sym@tlsgd) --> nop
1550	write32(ctx, p: loc + `4`, v: `0x7c636a14`); // nop --> add r3, r3, r13
1551	} else if (locAsInt % `4` == `1`) {
1552	// bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r13
1553	write32(ctx, p: loc - `1`, v: `0x7c636a14`);
1554	} else {
1555	Err(ctx) << "R_PPC64_TLSGD has unexpected byte alignment";
1556	}
1557	return;
1558	}
1559	default:
1560	llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
1561	}
1562	}
1563
1564	void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
1565	uint64_t secAddr = sec.getOutputSection()->addr;
1566	if (auto *s = dyn_cast<InputSection>(Val: &sec))
1567	secAddr += s->outSecOff;
1568	else if (auto *ehIn = dyn_cast<EhInputSection>(Val: &sec))
1569	secAddr += ehIn->getParent()->outSecOff;
1570	uint64_t lastPPCRelaxedRelocOff = -`1`;
1571	for (const Relocation &rel : sec.relocs()) {
1572	uint8_t *loc = buf + rel.offset;
1573	const uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
1574	switch (rel.expr) {
1575	case RE_PPC64_RELAX_GOT_PC: {
1576	// The R_PPC64_PCREL_OPT relocation must appear immediately after
1577	// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
1578	// We can only relax R_PPC64_PCREL_OPT if we have also relaxed
1579	// the associated R_PPC64_GOT_PCREL34 since only the latter has an
1580	// associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34
1581	// and only relax the other if the saved offset matches.
1582	if (rel.type == R_PPC64_GOT_PCREL34)
1583	lastPPCRelaxedRelocOff = rel.offset;
1584	if (rel.type == R_PPC64_PCREL_OPT && rel.offset != lastPPCRelaxedRelocOff)
1585	break;
1586	relaxGot(loc, rel, val);
1587	break;
1588	}
1589	case RE_PPC64_RELAX_TOC:
1590	// rel.sym refers to the STT_SECTION symbol associated to the .toc input
1591	// section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC
1592	// entry, there may be R_PPC64_TOC16_HA not paired with
1593	// R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation
1594	// opportunities but is safe.
1595	if (ctx.ppc64noTocRelax.count(V: {rel.sym, rel.addend}) \|\|
1596	!tryRelaxPPC64TocIndirection(ctx, rel, bufLoc: loc))
1597	relocate(loc, rel, val);
1598	break;
1599	case RE_PPC64_CALL:
1600	// If this is a call to __tls_get_addr, it may be part of a TLS
1601	// sequence that has been relaxed and turned into a nop. In this
1602	// case, we don't want to handle it as a call.
1603	if (read32(ctx, p: loc) == `0x60000000`) // nop
1604	break;
1605
1606	// Patch a nop (0x60000000) to a ld.
1607	if (rel.sym->needsTocRestore()) {
1608	// gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for
1609	// recursive calls even if the function is preemptible. This is not
1610	// wrong in the common case where the function is not preempted at
1611	// runtime. Just ignore.
1612	if ((rel.offset + `8` > sec.content().size() \|\|
1613	read32(ctx, p: loc + `4`) != `0x60000000`) &&
1614	rel.sym->file != sec.file) {
1615	// Use substr(6) to remove the "__plt_" prefix.
1616	Err(ctx) << getErrorLoc(ctx, loc) << "call to "
1617	<< toStr(ctx, *rel.sym).substr(pos: `6`)
1618	<< " lacks nop, can't restore toc";
1619	break;
1620	}
1621	write32(ctx, p: loc + `4`, v: `0xe8410018`); // ld %r2, 24(%r1)
1622	}
1623	relocate(loc, rel, val);
1624	break;
1625	case R_RELAX_TLS_GD_TO_IE:
1626	case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
1627	relaxTlsGdToIe(loc, rel, val);
1628	break;
1629	case R_RELAX_TLS_GD_TO_LE:
1630	relaxTlsGdToLe(loc, rel, val);
1631	break;
1632	case R_RELAX_TLS_LD_TO_LE_ABS:
1633	relaxTlsLdToLe(loc, rel, val);
1634	break;
1635	case R_RELAX_TLS_IE_TO_LE:
1636	relaxTlsIeToLe(loc, rel, val);
1637	break;
1638	default:
1639	relocate(loc, rel, val);
1640	break;
1641	}
1642	}
1643	}
1644
1645	// The prologue for a split-stack function is expected to look roughly
1646	// like this:
1647	// .Lglobal_entry_point:
1648	// # TOC pointer initialization.
1649	// ...
1650	// .Llocal_entry_point:
1651	// # load the __private_ss member of the threads tcbhead.
1652	// ld r0,-0x7000-64(r13)
1653	// # subtract the functions stack size from the stack pointer.
1654	// addis r12, r1, ha(-stack-frame size)
1655	// addi r12, r12, l(-stack-frame size)
1656	// # compare needed to actual and branch to allocate_more_stack if more
1657	// # space is needed, otherwise fallthrough to 'normal' function body.
1658	// cmpld cr7,r12,r0
1659	// blt- cr7, .Lallocate_more_stack
1660	//
1661	// -) The allocate_more_stack block might be placed after the split-stack
1662	// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
1663	// instead.
1664	// -) If either the addis or addi is not needed due to the stack size being
1665	// smaller then 32K or a multiple of 64K they will be replaced with a nop,
1666	// but there will always be 2 instructions the linker can overwrite for the
1667	// adjusted stack size.
1668	//
1669	// The linkers job here is to increase the stack size used in the addis/addi
1670	// pair by split-stack-size-adjust.
1671	// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
1672	// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
1673	bool PPC64::adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
1674	uint8_t stOther) const {
1675	// If the caller has a global entry point adjust the buffer past it. The start
1676	// of the split-stack prologue will be at the local entry point.
1677	loc += getPPC64GlobalEntryToLocalEntryOffset(ctx, stOther);
1678
1679	// At the very least we expect to see a load of some split-stack data from the
1680	// tcb, and 2 instructions that calculate the ending stack address this
1681	// function will require. If there is not enough room for at least 3
1682	// instructions it can't be a split-stack prologue.
1683	if (loc + `12` >= end)
1684	return false;
1685
1686	// First instruction must be `ld r0, -0x7000-64(r13)`
1687	if (read32(ctx, p: loc) != `0xe80d8fc0`)
1688	return false;
1689
1690	int16_t hiImm = `0`;
1691	int16_t loImm = `0`;
1692	// First instruction can be either an addis if the frame size is larger then
1693	// 32K, or an addi if the size is less then 32K.
1694	int32_t firstInstr = read32(ctx, p: loc + `4`);
1695	if (getPrimaryOpCode(encoding: firstInstr) == `15`) {
1696	hiImm = firstInstr & `0xFFFF`;
1697	} else if (getPrimaryOpCode(encoding: firstInstr) == `14`) {
1698	loImm = firstInstr & `0xFFFF`;
1699	} else {
1700	return false;
1701	}
1702
1703	// Second instruction is either an addi or a nop. If the first instruction was
1704	// an addi then LoImm is set and the second instruction must be a nop.
1705	uint32_t secondInstr = read32(ctx, p: loc + `8`);
1706	if (!loImm && getPrimaryOpCode(encoding: secondInstr) == `14`) {
1707	loImm = secondInstr & `0xFFFF`;
1708	} else if (secondInstr != NOP) {
1709	return false;
1710	}
1711
1712	// The register operands of the first instruction should be the stack-pointer
1713	// (r1) as the input (RA) and r12 as the output (RT). If the second
1714	// instruction is not a nop, then it should use r12 as both input and output.
1715	auto checkRegOperands = [](uint32_t instr, uint8_t expectedRT,
1716	uint8_t expectedRA) {
1717	return ((instr & `0x3E00000`) >> `21` == expectedRT) &&
1718	((instr & `0x1F0000`) >> `16` == expectedRA);
1719	};
1720	if (!checkRegOperands (firstInstr, `12`, `1`))
1721	return false;
1722	if (secondInstr != NOP && !checkRegOperands (secondInstr, `12`, `12`))
1723	return false;
1724
1725	int32_t stackFrameSize = (hiImm * `65536`) + loImm;
1726	// Check that the adjusted size doesn't overflow what we can represent with 2
1727	// instructions.
1728	if (stackFrameSize < ctx.arg.splitStackAdjustSize + INT32_MIN) {
1729	Err(ctx) << getErrorLoc(ctx, loc)
1730	<< "split-stack prologue adjustment overflows";
1731	return false;
1732	}
1733
1734	int32_t adjustedStackFrameSize =
1735	stackFrameSize - ctx.arg.splitStackAdjustSize;
1736
1737	loImm = adjustedStackFrameSize & `0xFFFF`;
1738	hiImm = (adjustedStackFrameSize + `0x8000`) >> `16`;
1739	if (hiImm) {
1740	write32(ctx, p: loc + `4`, v: `0x3d810000` \| (uint16_t)hiImm);
1741	// If the low immediate is zero the second instruction will be a nop.
1742	secondInstr = loImm ? `0x398C0000` \| (uint16_t)loImm : NOP;
1743	write32(ctx, p: loc + `8`, v: secondInstr);
1744	} else {
1745	// addi r12, r1, imm
1746	write32(ctx, p: loc + `4`, v: (`0x39810000`) \| (uint16_t)loImm);
1747	write32(ctx, p: loc + `8`, v: NOP);
1748	}
1749
1750	return true;
1751	}
1752
1753	void elf::setPPC64TargetInfo(Ctx &ctx) { ctx.target.reset(p: new PPC64 (ctx)); }
1754

Browse the source code of llvm_projects/lld/ELF/Arch/PPC64.cpp