X86_64.cpp source code [llvm_projects/lld/ELF/Arch/X86_64.cpp]

1	//===- X86_64.cpp ---------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "OutputSections.h"
10	#include "Relocations.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14	#include "lld/Common/ErrorHandler.h"
15	#include "llvm/BinaryFormat/ELF.h"
16	#include "llvm/Support/Endian.h"
17	#include "llvm/Support/MathExtras.h"
18
19	using namespace llvm;
20	using namespace llvm::object;
21	using namespace llvm::support::endian;
22	using namespace llvm::ELF;
23	using namespace lld;
24	using namespace lld::elf;
25
26	namespace {
27	class X86_64 : public TargetInfo {
28	public:
29	X86_64();
30	int getTlsGdRelaxSkip(RelType type) const override;
31	RelExpr getRelExpr(RelType type, const Symbol &s,
32	const uint8_t loc) const* override;
33	RelType getDynRel(RelType type) const override;
34	void writeGotPltHeader(uint8_t buf) const* override;
35	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
36	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
37	void writePltHeader(uint8_t buf) const* override;
38	void writePlt(uint8_t buf, const* Symbol &sym,
39	uint64_t pltEntryAddr) const override;
40	void relocate(uint8_t loc, const* Relocation &rel,
41	uint64_t val) const override;
42	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
43	void applyJumpInstrMod(uint8_t *loc, JumpModType type,
44	unsigned size) const override;
45	RelExpr adjustGotPcExpr(RelType type, int64_t addend,
46	const uint8_t loc) const* override;
47	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
48	bool adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
49	uint8_t stOther) const override;
50	bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
51	InputSection nextIS) const* override;
52	bool relaxOnce(int pass) const override;
53	};
54	} // namespace
55
56	// This is vector of NOP instructions of sizes from 1 to 8 bytes. The
57	// appropriately sized instructions are used to fill the gaps between sections
58	// which are executed during fall through.
59	static const std::vector<std::vector<uint8_t>> nopInstructions = {
60	{`0x90`},
61	{`0x66`, `0x90`},
62	{`0x0f`, `0x1f`, `0x00`},
63	{`0x0f`, `0x1f`, `0x40`, `0x00`},
64	{`0x0f`, `0x1f`, `0x44`, `0x00`, `0x00`},
65	{`0x66`, `0x0f`, `0x1f`, `0x44`, `0x00`, `0x00`},
66	{`0x0F`, `0x1F`, `0x80`, `0x00`, `0x00`, `0x00`, `0x00`},
67	{`0x0F`, `0x1F`, `0x84`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`},
68	{`0x66`, `0x0F`, `0x1F`, `0x84`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}};
69
70	X86_64::X86_64() {
71	copyRel = R_X86_64_COPY;
72	gotRel = R_X86_64_GLOB_DAT;
73	pltRel = R_X86_64_JUMP_SLOT;
74	relativeRel = R_X86_64_RELATIVE;
75	iRelativeRel = R_X86_64_IRELATIVE;
76	symbolicRel = R_X86_64_64;
77	tlsDescRel = R_X86_64_TLSDESC;
78	tlsGotRel = R_X86_64_TPOFF64;
79	tlsModuleIndexRel = R_X86_64_DTPMOD64;
80	tlsOffsetRel = R_X86_64_DTPOFF64;
81	gotBaseSymInGotPlt = true;
82	gotEntrySize = `8`;
83	pltHeaderSize = `16`;
84	pltEntrySize = `16`;
85	ipltEntrySize = `16`;
86	trapInstr = {`0xcc`, `0xcc`, `0xcc`, `0xcc`}; // 0xcc = INT3
87	nopInstrs = nopInstructions;
88
89	// Align to the large page size (known as a superpage or huge page).
90	// FreeBSD automatically promotes large, superpage-aligned allocations.
91	defaultImageBase = `0x200000`;
92	}
93
94	int X86_64::getTlsGdRelaxSkip(RelType type) const {
95	// TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
96	return type == R_X86_64_GOTPC32_TLSDESC \|\| type == R_X86_64_TLSDESC_CALL ? `1`
97	: `2`;
98	}
99
100	// Opcodes for the different X86_64 jmp instructions.
101	enum JmpInsnOpcode : uint32_t {
102	J_JMP_32,
103	J_JNE_32,
104	J_JE_32,
105	J_JG_32,
106	J_JGE_32,
107	J_JB_32,
108	J_JBE_32,
109	J_JL_32,
110	J_JLE_32,
111	J_JA_32,
112	J_JAE_32,
113	J_UNKNOWN,
114	};
115
116	// Given the first (optional) and second byte of the insn's opcode, this
117	// returns the corresponding enum value.
118	static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
119	const uint8_t *second) {
120	if (*second == `0xe9`)
121	return J_JMP_32;
122
123	if (first == nullptr)
124	return J_UNKNOWN;
125
126	if (*first == `0x0f`) {
127	switch (*second) {
128	case `0x84`:
129	return J_JE_32;
130	case `0x85`:
131	return J_JNE_32;
132	case `0x8f`:
133	return J_JG_32;
134	case `0x8d`:
135	return J_JGE_32;
136	case `0x82`:
137	return J_JB_32;
138	case `0x86`:
139	return J_JBE_32;
140	case `0x8c`:
141	return J_JL_32;
142	case `0x8e`:
143	return J_JLE_32;
144	case `0x87`:
145	return J_JA_32;
146	case `0x83`:
147	return J_JAE_32;
148	}
149	}
150	return J_UNKNOWN;
151	}
152
153	// Return the relocation index for input section IS with a specific Offset.
154	// Returns the maximum size of the vector if no such relocation is found.
155	static unsigned getRelocationWithOffset(const InputSection &is,
156	uint64_t offset) {
157	unsigned size = is.relocs().size();
158	for (unsigned i = size - `1`; i + `1` > `0`; --i) {
159	if (is.relocs()[i].offset == offset && is.relocs()[i].expr != R_NONE)
160	return i;
161	}
162	return size;
163	}
164
165	// Returns true if R corresponds to a relocation used for a jump instruction.
166	// TODO: Once special relocations for relaxable jump instructions are available,
167	// this should be modified to use those relocations.
168	static bool isRelocationForJmpInsn(Relocation &R) {
169	return R.type == R_X86_64_PLT32 \|\| R.type == R_X86_64_PC32 \|\|
170	R.type == R_X86_64_PC8;
171	}
172
173	// Return true if Relocation R points to the first instruction in the
174	// next section.
175	// TODO: Delete this once psABI reserves a new relocation type for fall thru
176	// jumps.
177	static bool isFallThruRelocation(InputSection &is, InputFile *file,
178	InputSection *nextIS, Relocation &r) {
179	if (!isRelocationForJmpInsn(R&: r))
180	return false;
181
182	uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
183	uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
184	File: file, Type: r.type, A: r.addend, P: addrLoc, Sym: *r.sym, Expr: r.expr);
185
186	// If this jmp is a fall thru, the target offset is the beginning of the
187	// next section.
188	uint64_t nextSectionOffset =
189	nextIS->getOutputSection()->addr + nextIS->outSecOff;
190	return (addrLoc + `4` + targetOffset) == nextSectionOffset;
191	}
192
193	// Return the jmp instruction opcode that is the inverse of the given
194	// opcode. For example, JE inverted is JNE.
195	static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
196	switch (opcode) {
197	case J_JE_32:
198	return J_JNE_32;
199	case J_JNE_32:
200	return J_JE_32;
201	case J_JG_32:
202	return J_JLE_32;
203	case J_JGE_32:
204	return J_JL_32;
205	case J_JB_32:
206	return J_JAE_32;
207	case J_JBE_32:
208	return J_JA_32;
209	case J_JL_32:
210	return J_JGE_32;
211	case J_JLE_32:
212	return J_JG_32;
213	case J_JA_32:
214	return J_JBE_32;
215	case J_JAE_32:
216	return J_JB_32;
217	default:
218	return J_UNKNOWN;
219	}
220	}
221
222	// Deletes direct jump instruction in input sections that jumps to the
223	// following section as it is not required. If there are two consecutive jump
224	// instructions, it checks if they can be flipped and one can be deleted.
225	// For example:
226	// .section .text
227	// a.BB.foo:
228	// ...
229	// 10: jne aa.BB.foo
230	// 16: jmp bar
231	// aa.BB.foo:
232	// ...
233	//
234	// can be converted to:
235	// a.BB.foo:
236	// ...
237	// 10: je bar #jne flipped to je and the jmp is deleted.
238	// aa.BB.foo:
239	// ...
240	bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
241	InputSection nextIS) const* {
242	const unsigned sizeOfDirectJmpInsn = `5`;
243
244	if (nextIS == nullptr)
245	return false;
246
247	if (is.getSize() < sizeOfDirectJmpInsn)
248	return false;
249
250	// If this jmp insn can be removed, it is the last insn and the
251	// relocation is 4 bytes before the end.
252	unsigned rIndex = getRelocationWithOffset(is, offset: is.getSize() - `4`);
253	if (rIndex == is.relocs().size())
254	return false;
255
256	Relocation &r = is.relocs()[rIndex];
257
258	// Check if the relocation corresponds to a direct jmp.
259	const uint8_t *secContents = is.content().data();
260	// If it is not a direct jmp instruction, there is nothing to do here.
261	if (*(secContents + r.offset - `1`) != `0xe9`)
262	return false;
263
264	if (isFallThruRelocation(is, file, nextIS, r)) {
265	// This is a fall thru and can be deleted.
266	r.expr = R_NONE;
267	r.offset = `0`;
268	is.drop_back(num: sizeOfDirectJmpInsn);
269	is.nopFiller = true;
270	return true;
271	}
272
273	// Now, check if flip and delete is possible.
274	const unsigned sizeOfJmpCCInsn = `6`;
275	// To flip, there must be at least one JmpCC and one direct jmp.
276	if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
277	return false;
278
279	unsigned rbIndex =
280	getRelocationWithOffset(is, offset: (is.getSize() - sizeOfDirectJmpInsn - `4`));
281	if (rbIndex == is.relocs().size())
282	return false;
283
284	Relocation &rB = is.relocs()[rbIndex];
285
286	const uint8_t *jmpInsnB = secContents + rB.offset - `1`;
287	JmpInsnOpcode jmpOpcodeB = getJmpInsnType(first: jmpInsnB - `1`, second: jmpInsnB);
288	if (jmpOpcodeB == J_UNKNOWN)
289	return false;
290
291	if (!isFallThruRelocation(is, file, nextIS, r&: rB))
292	return false;
293
294	// jmpCC jumps to the fall thru block, the branch can be flipped and the
295	// jmp can be deleted.
296	JmpInsnOpcode jInvert = invertJmpOpcode(opcode: jmpOpcodeB);
297	if (jInvert == J_UNKNOWN)
298	return false;
299	is.jumpInstrMod = make<JumpInstrMod>();
300	*is.jumpInstrMod = {.offset: rB.offset - `1`, .original: jInvert, .size: `4`};
301	// Move R's values to rB except the offset.
302	rB = {.expr: r.expr, .type: r.type, .offset: rB.offset, .addend: r.addend, .sym: r.sym};
303	// Cancel R
304	r.expr = R_NONE;
305	r.offset = `0`;
306	is.drop_back(num: sizeOfDirectJmpInsn);
307	is.nopFiller = true;
308	return true;
309	}
310
311	bool X86_64::relaxOnce(int pass) const {
312	uint64_t minVA = UINT64_MAX, maxVA = `0`;
313	for (OutputSection *osec : outputSections) {
314	minVA = std::min(a: minVA, b: osec->addr);
315	maxVA = std::max(a: maxVA, b: osec->addr + osec->size);
316	}
317	// If the max VA is under 2^31, GOTPCRELX relocations cannot overfow. In
318	// -pie/-shared, the condition can be relaxed to test the max VA difference as
319	// there is no R_RELAX_GOT_PC_NOPIC.
320	if (isUInt<`31`>(x: maxVA) \|\| (isUInt<`31`>(x: maxVA - minVA) && config ->isPic))
321	return false;
322
323	SmallVector<InputSection *, `0`> storage;
324	bool changed = false;
325	for (OutputSection *osec : outputSections) {
326	if (!(osec->flags & SHF_EXECINSTR))
327	continue;
328	for (InputSection sec : getInputSections(os: osec, storage)) {
329	for (Relocation &rel : sec->relocs()) {
330	if (rel.expr != R_RELAX_GOT_PC && rel.expr != R_RELAX_GOT_PC_NOPIC)
331	continue;
332	assert(rel.addend == -`4`);
333
334	uint64_t v = sec->getRelocTargetVA(
335	File: sec->file, Type: rel.type, A: rel.expr == R_RELAX_GOT_PC_NOPIC ? `0` : -`4`,
336	P: sec->getOutputSection()->addr + sec->outSecOff + rel.offset,
337	Sym: *rel.sym, Expr: rel.expr);
338	if (isInt<`32`>(x: v))
339	continue;
340	if (rel.sym->auxIdx == `0`) {
341	rel.sym->allocateAux();
342	addGotEntry(sym&: *rel.sym);
343	changed = true;
344	}
345	rel.expr = R_GOT_PC;
346	}
347	}
348	}
349	return changed;
350	}
351
352	RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
353	const uint8_t loc) const* {
354	switch (type) {
355	case R_X86_64_8:
356	case R_X86_64_16:
357	case R_X86_64_32:
358	case R_X86_64_32S:
359	case R_X86_64_64:
360	return R_ABS;
361	case R_X86_64_DTPOFF32:
362	case R_X86_64_DTPOFF64:
363	return R_DTPREL;
364	case R_X86_64_TPOFF32:
365	case R_X86_64_TPOFF64:
366	return R_TPREL;
367	case R_X86_64_TLSDESC_CALL:
368	return R_TLSDESC_CALL;
369	case R_X86_64_TLSLD:
370	return R_TLSLD_PC;
371	case R_X86_64_TLSGD:
372	return R_TLSGD_PC;
373	case R_X86_64_SIZE32:
374	case R_X86_64_SIZE64:
375	return R_SIZE;
376	case R_X86_64_PLT32:
377	return R_PLT_PC;
378	case R_X86_64_PC8:
379	case R_X86_64_PC16:
380	case R_X86_64_PC32:
381	case R_X86_64_PC64:
382	return R_PC;
383	case R_X86_64_GOT32:
384	case R_X86_64_GOT64:
385	return R_GOTPLT;
386	case R_X86_64_GOTPC32_TLSDESC:
387	return R_TLSDESC_PC;
388	case R_X86_64_GOTPCREL:
389	case R_X86_64_GOTPCRELX:
390	case R_X86_64_REX_GOTPCRELX:
391	case R_X86_64_GOTTPOFF:
392	return R_GOT_PC;
393	case R_X86_64_GOTOFF64:
394	return R_GOTPLTREL;
395	case R_X86_64_PLTOFF64:
396	return R_PLT_GOTPLT;
397	case R_X86_64_GOTPC32:
398	case R_X86_64_GOTPC64:
399	return R_GOTPLTONLY_PC;
400	case R_X86_64_NONE:
401	return R_NONE;
402	default:
403	error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine (type) +
404	") against symbol " + toString(s));
405	return R_NONE;
406	}
407	}
408
409	void X86_64::writeGotPltHeader(uint8_t buf) const* {
410	// The first entry holds the link-time address of _DYNAMIC. It is documented
411	// in the psABI and glibc before Aug 2021 used the entry to compute run-time
412	// load address of the shared object (note that this is relevant for linking
413	// ld.so, not any other program).
414	write64le(P: buf, V: mainPart->dynamic ->getVA());
415	}
416
417	void X86_64::writeGotPlt(uint8_t buf, const* Symbol &s) const {
418	// See comments in X86::writeGotPlt.
419	write64le(P: buf, V: s.getPltVA() + `6`);
420	}
421
422	void X86_64::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
423	// An x86 entry is the address of the ifunc resolver function (for -z rel).
424	if (config ->writeAddends)
425	write64le(P: buf, V: s.getVA());
426	}
427
428	void X86_64::writePltHeader(uint8_t buf) const* {
429	const uint8_t pltData[] = {
430	`0xff`, `0x35`, `0`, `0`, `0`, `0`, // pushq GOTPLT+8(%rip)
431	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmp GOTPLT+16(%rip)*
432	`0x0f`, `0x1f`, `0x40`, `0x00`, // nop
433	};
434	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
435	uint64_t gotPlt = in.gotPlt ->getVA();
436	uint64_t plt = in.ibtPlt ? in.ibtPlt ->getVA() : in.plt ->getVA();
437	write32le(P: buf + `2`, V: gotPlt - plt + `2`); // GOTPLT+8
438	write32le(P: buf + `8`, V: gotPlt - plt + `4`); // GOTPLT+16
439	}
440
441	void X86_64::writePlt(uint8_t buf, const* Symbol &sym,
442	uint64_t pltEntryAddr) const {
443	const uint8_t inst[] = {
444	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmpq got(%rip)*
445	`0x68`, `0`, `0`, `0`, `0`, // pushq <relocation index>
446	`0xe9`, `0`, `0`, `0`, `0`, // jmpq plt[0]
447	};
448	memcpy(dest: buf, src: inst, n: sizeof(inst));
449
450	write32le(P: buf + `2`, V: sym.getGotPltVA() - pltEntryAddr - `6`);
451	write32le(P: buf + `7`, V: sym.getPltIdx());
452	write32le(P: buf + `12`, V: in.plt ->getVA() - pltEntryAddr - `16`);
453	}
454
455	RelType X86_64::getDynRel(RelType type) const {
456	if (type == R_X86_64_64 \|\| type == R_X86_64_PC64 \|\| type == R_X86_64_SIZE32 \|\|
457	type == R_X86_64_SIZE64)
458	return type;
459	return R_X86_64_NONE;
460	}
461
462	static void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) {
463	if (rel.type == R_X86_64_TLSGD) {
464	// Convert
465	// .byte 0x66
466	// leaq x@tlsgd(%rip), %rdi
467	// .word 0x6666
468	// rex64
469	// call __tls_get_addr@plt
470	// to the following two instructions.
471	const uint8_t inst[] = {
472	`0x64`, `0x48`, `0x8b`, `0x04`, `0x25`, `0x00`, `0x00`,
473	`0x00`, `0x00`, // mov %fs:0x0,%rax
474	`0x48`, `0x8d`, `0x80`, `0`, `0`, `0`, `0`, // lea x@tpoff,%rax
475	};
476	memcpy(dest: loc - `4`, src: inst, n: sizeof(inst));
477
478	// The original code used a pc relative relocation and so we have to
479	// compensate for the -4 in had in the addend.
480	write32le(P: loc + `8`, V: val + `4`);
481	} else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
482	// Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG.
483	if ((loc[-`3`] & `0xfb`) != `0x48` \|\| loc[-`2`] != `0x8d` \|\|
484	(loc[-`1`] & `0xc7`) != `0x05`) {
485	errorOrWarn(msg: getErrorLocation(loc: loc - `3`) +
486	"R_X86_64_GOTPC32_TLSDESC must be used "
487	"in leaq x@tlsdesc(%rip), %REG");
488	return;
489	}
490	loc[-`3`] = `0x48` \| ((loc[-`3`] >> `2`) & `1`);
491	loc[-`2`] = `0xc7`;
492	loc[-`1`] = `0xc0` \| ((loc[-`1`] >> `3`) & `7`);
493	write32le(P: loc, V: val + `4`);
494	} else {
495	// Convert call x@tlsdesc(%REG) to xchg ax, ax.*
496	assert(rel.type == R_X86_64_TLSDESC_CALL);
497	loc[`0`] = `0x66`;
498	loc[`1`] = `0x90`;
499	}
500	}
501
502	static void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) {
503	if (rel.type == R_X86_64_TLSGD) {
504	// Convert
505	// .byte 0x66
506	// leaq x@tlsgd(%rip), %rdi
507	// .word 0x6666
508	// rex64
509	// call __tls_get_addr@plt
510	// to the following two instructions.
511	const uint8_t inst[] = {
512	`0x64`, `0x48`, `0x8b`, `0x04`, `0x25`, `0x00`, `0x00`,
513	`0x00`, `0x00`, // mov %fs:0x0,%rax
514	`0x48`, `0x03`, `0x05`, `0`, `0`, `0`, `0`, // addq x@gottpoff(%rip),%rax
515	};
516	memcpy(dest: loc - `4`, src: inst, n: sizeof(inst));
517
518	// Both code sequences are PC relatives, but since we are moving the
519	// constant forward by 8 bytes we have to subtract the value by 8.
520	write32le(P: loc + `8`, V: val - `8`);
521	} else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
522	// Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG.
523	assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
524	if ((loc[-`3`] & `0xfb`) != `0x48` \|\| loc[-`2`] != `0x8d` \|\|
525	(loc[-`1`] & `0xc7`) != `0x05`) {
526	errorOrWarn(msg: getErrorLocation(loc: loc - `3`) +
527	"R_X86_64_GOTPC32_TLSDESC must be used "
528	"in leaq x@tlsdesc(%rip), %REG");
529	return;
530	}
531	loc[-`2`] = `0x8b`;
532	write32le(P: loc, V: val);
533	} else {
534	// Convert call x@tlsdesc(%rax) to xchg ax, ax.*
535	assert(rel.type == R_X86_64_TLSDESC_CALL);
536	loc[`0`] = `0x66`;
537	loc[`1`] = `0x90`;
538	}
539	}
540
541	// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
542	// R_X86_64_TPOFF32 so that it does not use GOT.
543	static void relaxTlsIeToLe(uint8_t loc, const* Relocation &, uint64_t val) {
544	uint8_t *inst = loc - `3`;
545	uint8_t reg = loc[-`1`] >> `3`;
546	uint8_t *regSlot = loc - `1`;
547
548	// Note that ADD with RSP or R12 is converted to ADD instead of LEA
549	// because LEA with these registers needs 4 bytes to encode and thus
550	// wouldn't fit the space.
551
552	if (memcmp(s1: inst, s2: "\x48\x03\x25", n: `3`) == `0`) {
553	// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
554	memcpy(dest: inst, src: "\x48\x81\xc4", n: `3`);
555	} else if (memcmp(s1: inst, s2: "\x4c\x03\x25", n: `3`) == `0`) {
556	// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
557	memcpy(dest: inst, src: "\x49\x81\xc4", n: `3`);
558	} else if (memcmp(s1: inst, s2: "\x4c\x03", n: `2`) == `0`) {
559	// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
560	memcpy(dest: inst, src: "\x4d\x8d", n: `2`);
561	*regSlot = `0x80` \| (reg << `3`) \| reg;
562	} else if (memcmp(s1: inst, s2: "\x48\x03", n: `2`) == `0`) {
563	// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
564	memcpy(dest: inst, src: "\x48\x8d", n: `2`);
565	*regSlot = `0x80` \| (reg << `3`) \| reg;
566	} else if (memcmp(s1: inst, s2: "\x4c\x8b", n: `2`) == `0`) {
567	// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
568	memcpy(dest: inst, src: "\x49\xc7", n: `2`);
569	*regSlot = `0xc0` \| reg;
570	} else if (memcmp(s1: inst, s2: "\x48\x8b", n: `2`) == `0`) {
571	// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
572	memcpy(dest: inst, src: "\x48\xc7", n: `2`);
573	*regSlot = `0xc0` \| reg;
574	} else {
575	error(msg: getErrorLocation(loc: loc - `3`) +
576	"R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
577	}
578
579	// The original code used a PC relative relocation.
580	// Need to compensate for the -4 it had in the addend.
581	write32le(P: loc, V: val + `4`);
582	}
583
584	static void relaxTlsLdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) {
585	const uint8_t inst[] = {
586	`0x66`, `0x66`, // .word 0x6666
587	`0x66`, // .byte 0x66
588	`0x64`, `0x48`, `0x8b`, `0x04`, `0x25`, `0x00`, `0x00`, `0x00`, `0x00`, // mov %fs:0,%rax
589	};
590
591	if (loc[`4`] == `0xe8`) {
592	// Convert
593	// leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
594	// callq __tls_get_addr@PLT # e8 <disp32>
595	// leaq bar@dtpoff(%rax), %rcx
596	// to
597	// .word 0x6666
598	// .byte 0x66
599	// mov %fs:0,%rax
600	// leaq bar@tpoff(%rax), %rcx
601	memcpy(dest: loc - `3`, src: inst, n: sizeof(inst));
602	return;
603	}
604
605	if (loc[`4`] == `0xff` && loc[`5`] == `0x15`) {
606	// Convert
607	// leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
608	// call __tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>*
609	// to
610	// .long 0x66666666
611	// movq %fs:0,%rax
612	// See "Table 11.9: LD -> LE Code Transition (LP64)" in
613	// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
614	loc[-`3`] = `0x66`;
615	memcpy(dest: loc - `2`, src: inst, n: sizeof(inst));
616	return;
617	}
618
619	error(msg: getErrorLocation(loc: loc - `3`) +
620	"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
621	}
622
623	// A JumpInstrMod at a specific offset indicates that the jump instruction
624	// opcode at that offset must be modified. This is specifically used to relax
625	// jump instructions with basic block sections. This function looks at the
626	// JumpMod and effects the change.
627	void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
628	unsigned size) const {
629	switch (type) {
630	case J_JMP_32:
631	if (size == `4`)
632	*loc = `0xe9`;
633	else
634	*loc = `0xeb`;
635	break;
636	case J_JE_32:
637	if (size == `4`) {
638	loc[-`1`] = `0x0f`;
639	*loc = `0x84`;
640	} else
641	*loc = `0x74`;
642	break;
643	case J_JNE_32:
644	if (size == `4`) {
645	loc[-`1`] = `0x0f`;
646	*loc = `0x85`;
647	} else
648	*loc = `0x75`;
649	break;
650	case J_JG_32:
651	if (size == `4`) {
652	loc[-`1`] = `0x0f`;
653	*loc = `0x8f`;
654	} else
655	*loc = `0x7f`;
656	break;
657	case J_JGE_32:
658	if (size == `4`) {
659	loc[-`1`] = `0x0f`;
660	*loc = `0x8d`;
661	} else
662	*loc = `0x7d`;
663	break;
664	case J_JB_32:
665	if (size == `4`) {
666	loc[-`1`] = `0x0f`;
667	*loc = `0x82`;
668	} else
669	*loc = `0x72`;
670	break;
671	case J_JBE_32:
672	if (size == `4`) {
673	loc[-`1`] = `0x0f`;
674	*loc = `0x86`;
675	} else
676	*loc = `0x76`;
677	break;
678	case J_JL_32:
679	if (size == `4`) {
680	loc[-`1`] = `0x0f`;
681	*loc = `0x8c`;
682	} else
683	*loc = `0x7c`;
684	break;
685	case J_JLE_32:
686	if (size == `4`) {
687	loc[-`1`] = `0x0f`;
688	*loc = `0x8e`;
689	} else
690	*loc = `0x7e`;
691	break;
692	case J_JA_32:
693	if (size == `4`) {
694	loc[-`1`] = `0x0f`;
695	*loc = `0x87`;
696	} else
697	*loc = `0x77`;
698	break;
699	case J_JAE_32:
700	if (size == `4`) {
701	loc[-`1`] = `0x0f`;
702	*loc = `0x83`;
703	} else
704	*loc = `0x73`;
705	break;
706	case J_UNKNOWN:
707	llvm_unreachable("Unknown Jump Relocation");
708	}
709	}
710
711	int64_t X86_64::getImplicitAddend(const uint8_t buf, RelType type) const* {
712	switch (type) {
713	case R_X86_64_8:
714	case R_X86_64_PC8:
715	return SignExtend64<`8`>(x: *buf);
716	case R_X86_64_16:
717	case R_X86_64_PC16:
718	return SignExtend64<`16`>(x: read16le(P: buf));
719	case R_X86_64_32:
720	case R_X86_64_32S:
721	case R_X86_64_TPOFF32:
722	case R_X86_64_GOT32:
723	case R_X86_64_GOTPC32:
724	case R_X86_64_GOTPC32_TLSDESC:
725	case R_X86_64_GOTPCREL:
726	case R_X86_64_GOTPCRELX:
727	case R_X86_64_REX_GOTPCRELX:
728	case R_X86_64_PC32:
729	case R_X86_64_GOTTPOFF:
730	case R_X86_64_PLT32:
731	case R_X86_64_TLSGD:
732	case R_X86_64_TLSLD:
733	case R_X86_64_DTPOFF32:
734	case R_X86_64_SIZE32:
735	return SignExtend64<`32`>(x: read32le(P: buf));
736	case R_X86_64_64:
737	case R_X86_64_TPOFF64:
738	case R_X86_64_DTPOFF64:
739	case R_X86_64_DTPMOD64:
740	case R_X86_64_PC64:
741	case R_X86_64_SIZE64:
742	case R_X86_64_GLOB_DAT:
743	case R_X86_64_GOT64:
744	case R_X86_64_GOTOFF64:
745	case R_X86_64_GOTPC64:
746	case R_X86_64_PLTOFF64:
747	case R_X86_64_IRELATIVE:
748	case R_X86_64_RELATIVE:
749	return read64le(P: buf);
750	case R_X86_64_TLSDESC:
751	return read64le(P: buf + `8`);
752	case R_X86_64_JUMP_SLOT:
753	case R_X86_64_NONE:
754	// These relocations are defined as not having an implicit addend.
755	return `0`;
756	default:
757	internalLinkerError(loc: getErrorLocation(loc: buf),
758	msg: "cannot read addend for relocation " + toString(type));
759	return `0`;
760	}
761	}
762
763	static void relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val);
764
765	void X86_64::relocate(uint8_t loc, const* Relocation &rel, uint64_t val) const {
766	switch (rel.type) {
767	case R_X86_64_8:
768	checkIntUInt(loc, v: val, n: `8`, rel);
769	*loc = val;
770	break;
771	case R_X86_64_PC8:
772	checkInt(loc, v: val, n: `8`, rel);
773	*loc = val;
774	break;
775	case R_X86_64_16:
776	checkIntUInt(loc, v: val, n: `16`, rel);
777	write16le(P: loc, V: val);
778	break;
779	case R_X86_64_PC16:
780	checkInt(loc, v: val, n: `16`, rel);
781	write16le(P: loc, V: val);
782	break;
783	case R_X86_64_32:
784	checkUInt(loc, v: val, n: `32`, rel);
785	write32le(P: loc, V: val);
786	break;
787	case R_X86_64_32S:
788	case R_X86_64_GOT32:
789	case R_X86_64_GOTPC32:
790	case R_X86_64_GOTPCREL:
791	case R_X86_64_PC32:
792	case R_X86_64_PLT32:
793	case R_X86_64_DTPOFF32:
794	case R_X86_64_SIZE32:
795	checkInt(loc, v: val, n: `32`, rel);
796	write32le(P: loc, V: val);
797	break;
798	case R_X86_64_64:
799	case R_X86_64_TPOFF64:
800	case R_X86_64_DTPOFF64:
801	case R_X86_64_PC64:
802	case R_X86_64_SIZE64:
803	case R_X86_64_GOT64:
804	case R_X86_64_GOTOFF64:
805	case R_X86_64_GOTPC64:
806	case R_X86_64_PLTOFF64:
807	write64le(P: loc, V: val);
808	break;
809	case R_X86_64_GOTPCRELX:
810	case R_X86_64_REX_GOTPCRELX:
811	if (rel.expr != R_GOT_PC) {
812	relaxGot(loc, rel, val);
813	} else {
814	checkInt(loc, v: val, n: `32`, rel);
815	write32le(P: loc, V: val);
816	}
817	break;
818	case R_X86_64_GOTPC32_TLSDESC:
819	case R_X86_64_TLSDESC_CALL:
820	case R_X86_64_TLSGD:
821	if (rel.expr == R_RELAX_TLS_GD_TO_LE) {
822	relaxTlsGdToLe(loc, rel, val);
823	} else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {
824	relaxTlsGdToIe(loc, rel, val);
825	} else {
826	checkInt(loc, v: val, n: `32`, rel);
827	write32le(P: loc, V: val);
828	}
829	break;
830	case R_X86_64_TLSLD:
831	if (rel.expr == R_RELAX_TLS_LD_TO_LE) {
832	relaxTlsLdToLe(loc, rel, val);
833	} else {
834	checkInt(loc, v: val, n: `32`, rel);
835	write32le(P: loc, V: val);
836	}
837	break;
838	case R_X86_64_GOTTPOFF:
839	if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
840	relaxTlsIeToLe(loc, rel, val);
841	} else {
842	checkInt(loc, v: val, n: `32`, rel);
843	write32le(P: loc, V: val);
844	}
845	break;
846	case R_X86_64_TPOFF32:
847	checkInt(loc, v: val, n: `32`, rel);
848	write32le(P: loc, V: val);
849	break;
850
851	case R_X86_64_TLSDESC:
852	// The addend is stored in the second 64-bit word.
853	write64le(P: loc + `8`, V: val);
854	break;
855	default:
856	llvm_unreachable("unknown relocation");
857	}
858	}
859
860	RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,
861	const uint8_t loc) const* {
862	// Only R_X86_64_[REX_]GOTPCRELX can be relaxed. GNU as may emit GOTPCRELX
863	// with addend != -4. Such an instruction does not load the full GOT entry, so
864	// we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax
865	// (addend=0) loads the high 32 bits of the GOT entry.
866	if (!config ->relax \|\| addend != -`4` \|\|
867	(type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX))
868	return R_GOT_PC;
869	const uint8_t op = loc[-`2`];
870	const uint8_t modRm = loc[-`1`];
871
872	// FIXME: When PIC is disabled and foo is defined locally in the
873	// lower 32 bit address space, memory operand in mov can be converted into
874	// immediate operand. Otherwise, mov must be changed to lea. We support only
875	// latter relaxation at this moment.
876	if (op == `0x8b`)
877	return R_RELAX_GOT_PC;
878
879	// Relax call and jmp.
880	if (op == `0xff` && (modRm == `0x15` \|\| modRm == `0x25`))
881	return R_RELAX_GOT_PC;
882
883	// We don't support test/binop instructions without a REX prefix.
884	if (type == R_X86_64_GOTPCRELX)
885	return R_GOT_PC;
886
887	// Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
888	// If PIC then no relaxation is available.
889	return config ->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC;
890	}
891
892	// A subset of relaxations can only be applied for no-PIC. This method
893	// handles such relaxations. Instructions encoding information was taken from:
894	// "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
895	// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
896	// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
897	static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
898	uint8_t modRm) {
899	const uint8_t rex = loc[-`3`];
900	// Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
901	if (op == `0x85`) {
902	// See "TEST-Logical Compare" (4-428 Vol. 2B),
903	// TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
904
905	// ModR/M byte has form XX YYY ZZZ, where
906	// YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
907	// XX has different meanings:
908	// 00: The operand's memory address is in reg1.
909	// 01: The operand's memory address is reg1 + a byte-sized displacement.
910	// 10: The operand's memory address is reg1 + a word-sized displacement.
911	// 11: The operand is reg1 itself.
912	// If an instruction requires only one operand, the unused reg2 field
913	// holds extra opcode bits rather than a register code
914	// 0xC0 == 11 000 000 binary.
915	// 0x38 == 00 111 000 binary.
916	// We transfer reg2 to reg1 here as operand.
917	// See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
918	loc[-`1`] = `0xc0` \| (modRm & `0x38`) >> `3`; // ModR/M byte.
919
920	// Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
921	// See "TEST-Logical Compare" (4-428 Vol. 2B).
922	loc[-`2`] = `0xf7`;
923
924	// Move R bit to the B bit in REX byte.
925	// REX byte is encoded as 0100WRXB, where
926	// 0100 is 4bit fixed pattern.
927	// REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
928	// default operand size is used (which is 32-bit for most but not all
929	// instructions).
930	// REX.R This 1-bit value is an extension to the MODRM.reg field.
931	// REX.X This 1-bit value is an extension to the SIB.index field.
932	// REX.B This 1-bit value is an extension to the MODRM.rm field or the
933	// SIB.base field.
934	// See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
935	loc[-`3`] = (rex & ~`0x4`) \| (rex & `0x4`) >> `2`;
936	write32le(P: loc, V: val);
937	return;
938	}
939
940	// If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
941	// or xor operations.
942
943	// Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
944	// Logic is close to one for test instruction above, but we also
945	// write opcode extension here, see below for details.
946	loc[-`1`] = `0xc0` \| (modRm & `0x38`) >> `3` \| (op & `0x3c`); // ModR/M byte.
947
948	// Primary opcode is 0x81, opcode extension is one of:
949	// 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
950	// 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
951	// This value was wrote to MODRM.reg in a line above.
952	// See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
953	// "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
954	// descriptions about each operation.
955	loc[-`2`] = `0x81`;
956	loc[-`3`] = (rex & ~`0x4`) \| (rex & `0x4`) >> `2`;
957	write32le(P: loc, V: val);
958	}
959
960	static void relaxGot(uint8_t loc, const* Relocation &rel, uint64_t val) {
961	assert(isInt<`32`>(val) &&
962	"GOTPCRELX should not have been relaxed if it overflows");
963	const uint8_t op = loc[-`2`];
964	const uint8_t modRm = loc[-`1`];
965
966	// Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
967	if (op == `0x8b`) {
968	loc[-`2`] = `0x8d`;
969	write32le(P: loc, V: val);
970	return;
971	}
972
973	if (op != `0xff`) {
974	// We are relaxing a rip relative to an absolute, so compensate
975	// for the old -4 addend.
976	assert(!config->isPic);
977	relaxGotNoPic(loc, val: val + `4`, op, modRm);
978	return;
979	}
980
981	// Convert call/jmp instructions.
982	if (modRm == `0x15`) {
983	// ABI says we can convert "call foo@GOTPCREL(%rip)" to "nop; call foo".*
984	// Instead we convert to "addr32 call foo" where addr32 is an instruction
985	// prefix. That makes result expression to be a single instruction.
986	loc[-`2`] = `0x67`; // addr32 prefix
987	loc[-`1`] = `0xe8`; // call
988	write32le(P: loc, V: val);
989	return;
990	}
991
992	// Convert "jmp foo@GOTPCREL(%rip)" to "jmp foo; nop".*
993	// jmp doesn't return, so it is fine to use nop here, it is just a stub.
994	assert(modRm == `0x25`);
995	loc[-`2`] = `0xe9`; // jmp
996	loc[`3`] = `0x90`; // nop
997	write32le(P: loc - `1`, V: val + `1`);
998	}
999
1000	// A split-stack prologue starts by checking the amount of stack remaining
1001	// in one of two ways:
1002	// A) Comparing of the stack pointer to a field in the tcb.
1003	// B) Or a load of a stack pointer offset with an lea to r10 or r11.
1004	bool X86_64::adjustPrologueForCrossSplitStack(uint8_t loc, uint8_t end,
1005	uint8_t stOther) const {
1006	if (!config ->is64) {
1007	error(msg: "target doesn't support split stacks");
1008	return false;
1009	}
1010
1011	if (loc + `8` >= end)
1012	return false;
1013
1014	// Replace "cmp %fs:0x70,%rsp" and subsequent branch
1015	// with "stc, nopl 0x0(%rax,%rax,1)"
1016	if (memcmp(s1: loc, s2: "\x64\x48\x3b\x24\x25", n: `5`) == `0`) {
1017	memcpy(dest: loc, src: "\xf9\x0f\x1f\x84\x00\x00\x00\x00", n: `8`);
1018	return true;
1019	}
1020
1021	// Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
1022	// be r10 or r11. The lea instruction feeds a subsequent compare which checks
1023	// if there is X available stack space. Making X larger effectively reserves
1024	// that much additional space. The stack grows downward so subtract the value.
1025	if (memcmp(s1: loc, s2: "\x4c\x8d\x94\x24", n: `4`) == `0` \|\|
1026	memcmp(s1: loc, s2: "\x4c\x8d\x9c\x24", n: `4`) == `0`) {
1027	// The offset bytes are encoded four bytes after the start of the
1028	// instruction.
1029	write32le(P: loc + `4`, V: read32le(P: loc + `4`) - `0x4000`);
1030	return true;
1031	}
1032	return false;
1033	}
1034
1035	void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
1036	uint64_t secAddr = sec.getOutputSection()->addr;
1037	if (auto *s = dyn_cast<InputSection>(Val: &sec))
1038	secAddr += s->outSecOff;
1039	else if (auto *ehIn = dyn_cast<EhInputSection>(Val: &sec))
1040	secAddr += ehIn->getParent()->outSecOff;
1041	for (const Relocation &rel : sec.relocs()) {
1042	if (rel.expr == R_NONE) // See deleteFallThruJmpInsn
1043	continue;
1044	uint8_t *loc = buf + rel.offset;
1045	const uint64_t val =
1046	sec.getRelocTargetVA(File: sec.file, Type: rel.type, A: rel.addend,
1047	P: secAddr + rel.offset, Sym: *rel.sym, Expr: rel.expr);
1048	relocate(loc, rel, val);
1049	}
1050	if (sec.jumpInstrMod) {
1051	applyJumpInstrMod(loc: buf + sec.jumpInstrMod->offset,
1052	type: sec.jumpInstrMod->original, size: sec.jumpInstrMod->size);
1053	}
1054	}
1055
1056	// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
1057	// entries containing endbr64 instructions. A PLT entry will be split into two
1058	// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
1059	namespace {
1060	class IntelIBT : public X86_64 {
1061	public:
1062	IntelIBT();
1063	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
1064	void writePlt(uint8_t buf, const* Symbol &sym,
1065	uint64_t pltEntryAddr) const override;
1066	void writeIBTPlt(uint8_t buf, size_t numEntries) const* override;
1067
1068	static const unsigned IBTPltHeaderSize = `16`;
1069	};
1070	} // namespace
1071
1072	IntelIBT::IntelIBT() { pltHeaderSize = `0`; }
1073
1074	void IntelIBT::writeGotPlt(uint8_t buf, const* Symbol &s) const {
1075	uint64_t va =
1076	in.ibtPlt ->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
1077	write64le(P: buf, V: va);
1078	}
1079
1080	void IntelIBT::writePlt(uint8_t buf, const* Symbol &sym,
1081	uint64_t pltEntryAddr) const {
1082	const uint8_t Inst[] = {
1083	`0xf3`, `0x0f`, `0x1e`, `0xfa`, // endbr64
1084	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmpq got(%rip)*
1085	`0x66`, `0x0f`, `0x1f`, `0x44`, `0`, `0`, // nop
1086	};
1087	memcpy(dest: buf, src: Inst, n: sizeof(Inst));
1088	write32le(P: buf + `6`, V: sym.getGotPltVA() - pltEntryAddr - `10`);
1089	}
1090
1091	void IntelIBT::writeIBTPlt(uint8_t buf, size_t numEntries) const* {
1092	writePltHeader(buf);
1093	buf += IBTPltHeaderSize;
1094
1095	const uint8_t inst[] = {
1096	`0xf3`, `0x0f`, `0x1e`, `0xfa`, // endbr64
1097	`0x68`, `0`, `0`, `0`, `0`, // pushq <relocation index>
1098	`0xe9`, `0`, `0`, `0`, `0`, // jmpq plt[0]
1099	`0x66`, `0x90`, // nop
1100	};
1101
1102	for (size_t i = `0`; i < numEntries; ++i) {
1103	memcpy(dest: buf, src: inst, n: sizeof(inst));
1104	write32le(P: buf + `5`, V: i);
1105	write32le(P: buf + `10`, V: -pltHeaderSize - sizeof(inst) * i - `30`);
1106	buf += sizeof(inst);
1107	}
1108	}
1109
1110	// These nonstandard PLT entries are to migtigate Spectre v2 security
1111	// vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
1112	// branch instructions such as `jmp GOTPLT(%rip)`. So, in the following PLT*
1113	// entries, we use a CALL followed by MOV and RET to do the same thing as an
1114	// indirect jump. That instruction sequence is so-called "retpoline".
1115	//
1116	// We have two types of retpoline PLTs as a size optimization. If `-z now`
1117	// is specified, all dynamic symbols are resolved at load-time. Thus, when
1118	// that option is given, we can omit code for symbol lazy resolution.
1119	namespace {
1120	class Retpoline : public X86_64 {
1121	public:
1122	Retpoline();
1123	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
1124	void writePltHeader(uint8_t buf) const* override;
1125	void writePlt(uint8_t buf, const* Symbol &sym,
1126	uint64_t pltEntryAddr) const override;
1127	};
1128
1129	class RetpolineZNow : public X86_64 {
1130	public:
1131	RetpolineZNow();
1132	void writeGotPlt(uint8_t buf, const* Symbol &s) const override {}
1133	void writePltHeader(uint8_t buf) const* override;
1134	void writePlt(uint8_t buf, const* Symbol &sym,
1135	uint64_t pltEntryAddr) const override;
1136	};
1137	} // namespace
1138
1139	Retpoline::Retpoline() {
1140	pltHeaderSize = `48`;
1141	pltEntrySize = `32`;
1142	ipltEntrySize = `32`;
1143	}
1144
1145	void Retpoline::writeGotPlt(uint8_t buf, const* Symbol &s) const {
1146	write64le(P: buf, V: s.getPltVA() + `17`);
1147	}
1148
1149	void Retpoline::writePltHeader(uint8_t buf) const* {
1150	const uint8_t insn[] = {
1151	`0xff`, `0x35`, `0`, `0`, `0`, `0`, // 0: pushq GOTPLT+8(%rip)
1152	`0x4c`, `0x8b`, `0x1d`, `0`, `0`, `0`, `0`, // 6: mov GOTPLT+16(%rip), %r11
1153	`0xe8`, `0x0e`, `0x00`, `0x00`, `0x00`, // d: callq next
1154	`0xf3`, `0x90`, // 12: loop: pause
1155	`0x0f`, `0xae`, `0xe8`, // 14: lfence
1156	`0xeb`, `0xf9`, // 17: jmp loop
1157	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 19: int3; .align 16
1158	`0x4c`, `0x89`, `0x1c`, `0x24`, // 20: next: mov %r11, (%rsp)
1159	`0xc3`, // 24: ret
1160	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 25: int3; padding
1161	`0xcc`, `0xcc`, `0xcc`, `0xcc`, // 2c: int3; padding
1162	};
1163	memcpy(dest: buf, src: insn, n: sizeof(insn));
1164
1165	uint64_t gotPlt = in.gotPlt ->getVA();
1166	uint64_t plt = in.plt ->getVA();
1167	write32le(P: buf + `2`, V: gotPlt - plt - `6` + `8`);
1168	write32le(P: buf + `9`, V: gotPlt - plt - `13` + `16`);
1169	}
1170
1171	void Retpoline::writePlt(uint8_t buf, const* Symbol &sym,
1172	uint64_t pltEntryAddr) const {
1173	const uint8_t insn[] = {
1174	`0x4c`, `0x8b`, `0x1d`, `0`, `0`, `0`, `0`, // 0: mov foo@GOTPLT(%rip), %r11
1175	`0xe8`, `0`, `0`, `0`, `0`, // 7: callq plt+0x20
1176	`0xe9`, `0`, `0`, `0`, `0`, // c: jmp plt+0x12
1177	`0x68`, `0`, `0`, `0`, `0`, // 11: pushq <relocation index>
1178	`0xe9`, `0`, `0`, `0`, `0`, // 16: jmp plt+0
1179	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 1b: int3; padding
1180	};
1181	memcpy(dest: buf, src: insn, n: sizeof(insn));
1182
1183	uint64_t off = pltEntryAddr - in.plt ->getVA();
1184
1185	write32le(P: buf + `3`, V: sym.getGotPltVA() - pltEntryAddr - `7`);
1186	write32le(P: buf + `8`, V: -off - `12` + `32`);
1187	write32le(P: buf + `13`, V: -off - `17` + `18`);
1188	write32le(P: buf + `18`, V: sym.getPltIdx());
1189	write32le(P: buf + `23`, V: -off - `27`);
1190	}
1191
1192	RetpolineZNow::RetpolineZNow() {
1193	pltHeaderSize = `32`;
1194	pltEntrySize = `16`;
1195	ipltEntrySize = `16`;
1196	}
1197
1198	void RetpolineZNow::writePltHeader(uint8_t buf) const* {
1199	const uint8_t insn[] = {
1200	`0xe8`, `0x0b`, `0x00`, `0x00`, `0x00`, // 0: call next
1201	`0xf3`, `0x90`, // 5: loop: pause
1202	`0x0f`, `0xae`, `0xe8`, // 7: lfence
1203	`0xeb`, `0xf9`, // a: jmp loop
1204	`0xcc`, `0xcc`, `0xcc`, `0xcc`, // c: int3; .align 16
1205	`0x4c`, `0x89`, `0x1c`, `0x24`, // 10: next: mov %r11, (%rsp)
1206	`0xc3`, // 14: ret
1207	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 15: int3; padding
1208	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 1a: int3; padding
1209	`0xcc`, // 1f: int3; padding
1210	};
1211	memcpy(dest: buf, src: insn, n: sizeof(insn));
1212	}
1213
1214	void RetpolineZNow::writePlt(uint8_t buf, const* Symbol &sym,
1215	uint64_t pltEntryAddr) const {
1216	const uint8_t insn[] = {
1217	`0x4c`, `0x8b`, `0x1d`, `0`, `0`, `0`, `0`, // mov foo@GOTPLT(%rip), %r11
1218	`0xe9`, `0`, `0`, `0`, `0`, // jmp plt+0
1219	`0xcc`, `0xcc`, `0xcc`, `0xcc`, // int3; padding
1220	};
1221	memcpy(dest: buf, src: insn, n: sizeof(insn));
1222
1223	write32le(P: buf + `3`, V: sym.getGotPltVA() - pltEntryAddr - `7`);
1224	write32le(P: buf + `8`, V: in.plt ->getVA() - pltEntryAddr - `12`);
1225	}
1226
1227	static TargetInfo *getTargetInfo() {
1228	if (config ->zRetpolineplt) {
1229	if (config ->zNow) {
1230	static RetpolineZNow t;
1231	return &t;
1232	}
1233	static Retpoline t;
1234	return &t;
1235	}
1236
1237	if (config ->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
1238	static IntelIBT t;
1239	return &t;
1240	}
1241
1242	static X86_64 t;
1243	return &t;
1244	}
1245
1246	TargetInfo elf::getX86_64TargetInfo() { return* getTargetInfo(); }
1247

Browse the source code of llvm_projects/lld/ELF/Arch/X86_64.cpp