X86.cpp source code [llvm_projects/lld/ELF/Arch/X86.cpp]

1	//===- X86.cpp ------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "OutputSections.h"
10	#include "Symbols.h"
11	#include "SyntheticSections.h"
12	#include "Target.h"
13	#include "llvm/Support/Endian.h"
14
15	using namespace llvm;
16	using namespace llvm::support::endian;
17	using namespace llvm::ELF;
18	using namespace lld;
19	using namespace lld::elf;
20
21	namespace {
22	class X86 : public TargetInfo {
23	public:
24	X86(Ctx &);
25	int getTlsGdRelaxSkip(RelType type) const override;
26	RelExpr getRelExpr(RelType type, const Symbol &s,
27	const uint8_t loc) const* override;
28	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
29	void writeGotPltHeader(uint8_t buf) const* override;
30	RelType getDynRel(RelType type) const override;
31	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
32	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
33	void writePltHeader(uint8_t buf) const* override;
34	void writePlt(uint8_t buf, const* Symbol &sym,
35	uint64_t pltEntryAddr) const override;
36	void relocate(uint8_t loc, const* Relocation &rel,
37	uint64_t val) const override;
38
39	RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
41
42	private:
43	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
44	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
45	void relaxTlsLdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
46	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
47	};
48	} // namespace
49
50	X86::X86(Ctx &ctx) : TargetInfo (ctx) {
51	copyRel = R_386_COPY;
52	gotRel = R_386_GLOB_DAT;
53	pltRel = R_386_JUMP_SLOT;
54	iRelativeRel = R_386_IRELATIVE;
55	relativeRel = R_386_RELATIVE;
56	symbolicRel = R_386_32;
57	tlsDescRel = R_386_TLS_DESC;
58	tlsGotRel = R_386_TLS_TPOFF;
59	tlsModuleIndexRel = R_386_TLS_DTPMOD32;
60	tlsOffsetRel = R_386_TLS_DTPOFF32;
61	gotBaseSymInGotPlt = true;
62	pltHeaderSize = `16`;
63	pltEntrySize = `16`;
64	ipltEntrySize = `16`;
65	trapInstr = {`0xcc`, `0xcc`, `0xcc`, `0xcc`}; // 0xcc = INT3
66
67	// Align to the non-PAE large page size (known as a superpage or huge page).
68	// FreeBSD automatically promotes large, superpage-aligned allocations.
69	defaultImageBase = `0x400000`;
70	}
71
72	int X86::getTlsGdRelaxSkip(RelType type) const {
73	// TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
74	return type == R_386_TLS_GOTDESC \|\| type == R_386_TLS_DESC_CALL ? `1` : `2`;
75	}
76
77	RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78	const uint8_t loc) const* {
79	switch (type) {
80	case R_386_8:
81	case R_386_16:
82	case R_386_32:
83	return R_ABS;
84	case R_386_TLS_LDO_32:
85	return R_DTPREL;
86	case R_386_TLS_GD:
87	return R_TLSGD_GOTPLT;
88	case R_386_TLS_LDM:
89	return R_TLSLD_GOTPLT;
90	case R_386_PLT32:
91	return R_PLT_PC;
92	case R_386_PC8:
93	case R_386_PC16:
94	case R_386_PC32:
95	return R_PC;
96	case R_386_GOTPC:
97	return R_GOTPLTONLY_PC;
98	case R_386_TLS_IE:
99	return R_GOT;
100	case R_386_GOT32:
101	case R_386_GOT32X:
102	// These relocations are arguably mis-designed because their calculations
103	// depend on the instructions they are applied to. This is bad because we
104	// usually don't care about whether the target section contains valid
105	// machine instructions or not. But this is part of the documented ABI, so
106	// we had to implement as the standard requires.
107	//
108	// x86 does not support PC-relative data access. Therefore, in order to
109	// access GOT contents, a GOT address needs to be known at link-time
110	// (which means non-PIC) or compilers have to emit code to get a GOT
111	// address at runtime (which means code is position-independent but
112	// compilers need to emit extra code for each GOT access.) This decision
113	// is made at compile-time. In the latter case, compilers emit code to
114	// load a GOT address to a register, which is usually %ebx.
115	//
116	// So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
117	// foo@GOT(%ebx).
118	//
119	// foo@GOT is not usable in PIC. If we are creating a PIC output and if we
120	// find such relocation, we should report an error. foo@GOT is resolved to
121	// an absolute* address of foo's GOT entry, because both GOT address and*
122	// foo's offset are known. In other words, it's G + A.
123	//
124	// foo@GOT(%ebx) needs to be resolved to a relative* offset from a GOT to*
125	// foo's GOT entry in the table, because GOT address is not known but foo's
126	// offset in the table is known. It's G + A - GOT.
127	//
128	// It's unfortunate that compilers emit the same relocation for these
129	// different use cases. In order to distinguish them, we have to read a
130	// machine instruction.
131	//
132	// The following code implements it. We assume that Loc[0] is the first byte
133	// of a displacement or an immediate field of a valid machine
134	// instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
135	// the byte, we can determine whether the instruction uses the operand as an
136	// absolute address (R_GOT) or a register-relative address (R_GOTPLT).
137	return (loc[-`1`] & `0xc7`) == `0x5` ? R_GOT : R_GOTPLT;
138	case R_386_TLS_GOTDESC:
139	return R_TLSDESC_GOTPLT;
140	case R_386_TLS_DESC_CALL:
141	return R_TLSDESC_CALL;
142	case R_386_TLS_GOTIE:
143	return R_GOTPLT;
144	case R_386_GOTOFF:
145	return R_GOTPLTREL;
146	case R_386_TLS_LE:
147	return R_TPREL;
148	case R_386_TLS_LE_32:
149	return R_TPREL_NEG;
150	case R_386_NONE:
151	return R_NONE;
152	default:
153	Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
154	<< ") against symbol " << &s;
155	return R_NONE;
156	}
157	}
158
159	RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
160	switch (expr) {
161	default:
162	return expr;
163	case R_RELAX_TLS_GD_TO_IE:
164	return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165	case R_RELAX_TLS_GD_TO_LE:
166	return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
167	: R_RELAX_TLS_GD_TO_LE;
168	}
169	}
170
171	void X86::writeGotPltHeader(uint8_t buf) const* {
172	write32le(P: buf, V: ctx.mainPart->dynamic ->getVA());
173	}
174
175	void X86::writeGotPlt(uint8_t buf, const* Symbol &s) const {
176	// Entries in .got.plt initially points back to the corresponding
177	// PLT entries with a fixed offset to skip the first instruction.
178	write32le(P: buf, V: s.getPltVA(ctx) + `6`);
179	}
180
181	void X86::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
182	// An x86 entry is the address of the ifunc resolver function.
183	write32le(P: buf, V: s.getVA(ctx));
184	}
185
186	RelType X86::getDynRel(RelType type) const {
187	if (type == R_386_TLS_LE)
188	return R_386_TLS_TPOFF;
189	if (type == R_386_TLS_LE_32)
190	return R_386_TLS_TPOFF32;
191	return type;
192	}
193
194	void X86::writePltHeader(uint8_t buf) const* {
195	if (ctx.arg.isPic) {
196	const uint8_t v[] = {
197	`0xff`, `0xb3`, `0x04`, `0x00`, `0x00`, `0x00`, // pushl 4(%ebx)
198	`0xff`, `0xa3`, `0x08`, `0x00`, `0x00`, `0x00`, // jmp 8(%ebx)*
199	`0x90`, `0x90`, `0x90`, `0x90` // nop
200	};
201	memcpy(dest: buf, src: v, n: sizeof(v));
202	return;
203	}
204
205	const uint8_t pltData[] = {
206	`0xff`, `0x35`, `0`, `0`, `0`, `0`, // pushl (GOTPLT+4)
207	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmp (GOTPLT+8)*
208	`0x90`, `0x90`, `0x90`, `0x90`, // nop
209	};
210	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
211	uint32_t gotPlt = ctx.in.gotPlt ->getVA();
212	write32le(P: buf + `2`, V: gotPlt + `4`);
213	write32le(P: buf + `8`, V: gotPlt + `8`);
214	}
215
216	void X86::writePlt(uint8_t buf, const* Symbol &sym,
217	uint64_t pltEntryAddr) const {
218	unsigned relOff = ctx.in.relaPlt ->entsize * sym.getPltIdx(ctx);
219	if (ctx.arg.isPic) {
220	const uint8_t inst[] = {
221	`0xff`, `0xa3`, `0`, `0`, `0`, `0`, // jmp foo@GOT(%ebx)*
222	`0x68`, `0`, `0`, `0`, `0`, // pushl $reloc_offset
223	`0xe9`, `0`, `0`, `0`, `0`, // jmp .PLT0@PC
224	};
225	memcpy(dest: buf, src: inst, n: sizeof(inst));
226	write32le(P: buf + `2`, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt ->getVA());
227	} else {
228	const uint8_t inst[] = {
229	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmp foo@GOT*
230	`0x68`, `0`, `0`, `0`, `0`, // pushl $reloc_offset
231	`0xe9`, `0`, `0`, `0`, `0`, // jmp .PLT0@PC
232	};
233	memcpy(dest: buf, src: inst, n: sizeof(inst));
234	write32le(P: buf + `2`, V: sym.getGotPltVA(ctx));
235	}
236
237	write32le(P: buf + `7`, V: relOff);
238	write32le(P: buf + `12`, V: ctx.in.plt ->getVA() - pltEntryAddr - `16`);
239	}
240
241	int64_t X86::getImplicitAddend(const uint8_t buf, RelType type) const* {
242	switch (type) {
243	case R_386_8:
244	case R_386_PC8:
245	return SignExtend64<`8`>(x: *buf);
246	case R_386_16:
247	case R_386_PC16:
248	return SignExtend64<`16`>(x: read16le(P: buf));
249	case R_386_32:
250	case R_386_GLOB_DAT:
251	case R_386_GOT32:
252	case R_386_GOT32X:
253	case R_386_GOTOFF:
254	case R_386_GOTPC:
255	case R_386_IRELATIVE:
256	case R_386_PC32:
257	case R_386_PLT32:
258	case R_386_RELATIVE:
259	case R_386_TLS_GOTDESC:
260	case R_386_TLS_DESC_CALL:
261	case R_386_TLS_DTPMOD32:
262	case R_386_TLS_DTPOFF32:
263	case R_386_TLS_LDO_32:
264	case R_386_TLS_LDM:
265	case R_386_TLS_IE:
266	case R_386_TLS_IE_32:
267	case R_386_TLS_LE:
268	case R_386_TLS_LE_32:
269	case R_386_TLS_GD:
270	case R_386_TLS_GD_32:
271	case R_386_TLS_GOTIE:
272	case R_386_TLS_TPOFF:
273	case R_386_TLS_TPOFF32:
274	return SignExtend64<`32`>(x: read32le(P: buf));
275	case R_386_TLS_DESC:
276	return SignExtend64<`32`>(x: read32le(P: buf + `4`));
277	case R_386_NONE:
278	case R_386_JUMP_SLOT:
279	// These relocations are defined as not having an implicit addend.
280	return `0`;
281	default:
282	InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
283	return `0`;
284	}
285	}
286
287	void X86::relocate(uint8_t loc, const* Relocation &rel, uint64_t val) const {
288	switch (rel.type) {
289	case R_386_8:
290	// R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291	// being used for some 16-bit programs such as boot loaders, so
292	// we want to support them.
293	checkIntUInt(ctx, loc, v: val, n: `8`, rel);
294	*loc = val;
295	break;
296	case R_386_PC8:
297	checkInt(ctx, loc, v: val, n: `8`, rel);
298	*loc = val;
299	break;
300	case R_386_16:
301	checkIntUInt(ctx, loc, v: val, n: `16`, rel);
302	write16le(P: loc, V: val);
303	break;
304	case R_386_PC16:
305	// R_386_PC16 is normally used with 16 bit code. In that situation
306	// the PC is 16 bits, just like the addend. This means that it can
307	// point from any 16 bit address to any other if the possibility
308	// of wrapping is included.
309	// The only restriction we have to check then is that the destination
310	// address fits in 16 bits. That is impossible to do here. The problem is
311	// that we are passed the final value, which already had the
312	// current location subtracted from it.
313	// We just check that Val fits in 17 bits. This misses some cases, but
314	// should have no false positives.
315	checkInt(ctx, loc, v: val, n: `17`, rel);
316	write16le(P: loc, V: val);
317	break;
318	case R_386_32:
319	case R_386_GOT32:
320	case R_386_GOT32X:
321	case R_386_GOTOFF:
322	case R_386_GOTPC:
323	case R_386_PC32:
324	case R_386_PLT32:
325	case R_386_RELATIVE:
326	case R_386_TLS_GOTDESC:
327	case R_386_TLS_DESC_CALL:
328	case R_386_TLS_DTPMOD32:
329	case R_386_TLS_DTPOFF32:
330	case R_386_TLS_GD:
331	case R_386_TLS_GOTIE:
332	case R_386_TLS_IE:
333	case R_386_TLS_LDM:
334	case R_386_TLS_LDO_32:
335	case R_386_TLS_LE:
336	case R_386_TLS_LE_32:
337	case R_386_TLS_TPOFF:
338	case R_386_TLS_TPOFF32:
339	checkInt(ctx, loc, v: val, n: `32`, rel);
340	write32le(P: loc, V: val);
341	break;
342	case R_386_TLS_DESC:
343	// The addend is stored in the second 32-bit word.
344	write32le(P: loc + `4`, V: val);
345	break;
346	default:
347	llvm_unreachable("unknown relocation");
348	}
349	}
350
351	void X86::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
352	uint64_t val) const {
353	if (rel.type == R_386_TLS_GD) {
354	// Convert (loc[-2] == 0x04)
355	// leal x@tlsgd(, %ebx, 1), %eax
356	// call ___tls_get_addr@plt
357	// or
358	// leal x@tlsgd(%reg), %eax
359	// call ___tls_get_addr@got(%reg)*
360	// to
361	const uint8_t inst[] = {
362	`0x65`, `0xa1`, `0x00`, `0x00`, `0x00`, `0x00`, // movl %gs:0, %eax
363	`0x81`, `0xe8`, `0`, `0`, `0`, `0`, // subl x@ntpoff(%ebx), %eax
364	};
365	uint8_t *w = loc[-`2`] == `0x04` ? loc - `3` : loc - `2`;
366	memcpy(dest: w, src: inst, n: sizeof(inst));
367	write32le(P: w + `8`, V: val);
368	} else if (rel.type == R_386_TLS_GOTDESC) {
369	// Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
370	//
371	// Note: call x@tlsdesc(%eax) may not immediately follow this instruction.*
372	if (memcmp(s1: loc - `2`, s2: "\x8d\x83", n: `2`)) {
373	ErrAlways(ctx)
374	<< getErrorLoc(ctx, loc: loc - `2`)
375	<< "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
376	return;
377	}
378	loc[-`1`] = `0x05`;
379	write32le(P: loc, V: val);
380	} else {
381	// Convert call x@tlsdesc(%eax) to xchg ax, ax.*
382	assert(rel.type == R_386_TLS_DESC_CALL);
383	loc[`0`] = `0x66`;
384	loc[`1`] = `0x90`;
385	}
386	}
387
388	void X86::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
389	uint64_t val) const {
390	if (rel.type == R_386_TLS_GD) {
391	// Convert (loc[-2] == 0x04)
392	// leal x@tlsgd(, %ebx, 1), %eax
393	// call ___tls_get_addr@plt
394	// or
395	// leal x@tlsgd(%reg), %eax
396	// call ___tls_get_addr@got(%reg)*
397	const uint8_t inst[] = {
398	`0x65`, `0xa1`, `0x00`, `0x00`, `0x00`, `0x00`, // movl %gs:0, %eax
399	`0x03`, `0x83`, `0`, `0`, `0`, `0`, // addl x@gottpoff(%ebx), %eax
400	};
401	uint8_t *w = loc[-`2`] == `0x04` ? loc - `3` : loc - `2`;
402	memcpy(dest: w, src: inst, n: sizeof(inst));
403	write32le(P: w + `8`, V: val);
404	} else if (rel.type == R_386_TLS_GOTDESC) {
405	// Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
406	if (memcmp(s1: loc - `2`, s2: "\x8d\x83", n: `2`)) {
407	ErrAlways(ctx)
408	<< getErrorLoc(ctx, loc: loc - `2`)
409	<< "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
410	return;
411	}
412	loc[-`2`] = `0x8b`;
413	write32le(P: loc, V: val);
414	} else {
415	// Convert call x@tlsdesc(%eax) to xchg ax, ax.*
416	assert(rel.type == R_386_TLS_DESC_CALL);
417	loc[`0`] = `0x66`;
418	loc[`1`] = `0x90`;
419	}
420	}
421
422	// In some conditions, relocations can be optimized to avoid using GOT.
423	// This function does that for Initial Exec to Local Exec case.
424	void X86::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
425	uint64_t val) const {
426	// Ulrich's document section 6.2 says that @gotntpoff can
427	// be used with MOVL or ADDL instructions.
428	// @indntpoff is similar to @gotntpoff, but for use in
429	// position dependent code.
430	uint8_t reg = (loc[-`1`] >> `3`) & `7`;
431
432	if (rel.type == R_386_TLS_IE) {
433	if (loc[-`1`] == `0xa1`) {
434	// "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435	// This case is different from the generic case below because
436	// this is a 5 byte instruction while below is 6 bytes.
437	loc[-`1`] = `0xb8`;
438	} else if (loc[-`2`] == `0x8b`) {
439	// "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440	loc[-`2`] = `0xc7`;
441	loc[-`1`] = `0xc0` \| reg;
442	} else {
443	// "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444	loc[-`2`] = `0x81`;
445	loc[-`1`] = `0xc0` \| reg;
446	}
447	} else {
448	assert(rel.type == R_386_TLS_GOTIE);
449	if (loc[-`2`] == `0x8b`) {
450	// "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451	loc[-`2`] = `0xc7`;
452	loc[-`1`] = `0xc0` \| reg;
453	} else {
454	// "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455	loc[-`2`] = `0x8d`;
456	loc[-`1`] = `0x80` \| (reg << `3`) \| reg;
457	}
458	}
459	write32le(P: loc, V: val);
460	}
461
462	void X86::relaxTlsLdToLe(uint8_t loc, const* Relocation &rel,
463	uint64_t val) const {
464	if (rel.type == R_386_TLS_LDO_32) {
465	write32le(P: loc, V: val);
466	return;
467	}
468
469	if (loc[`4`] == `0xe8`) {
470	// Convert
471	// leal x(%reg),%eax
472	// call ___tls_get_addr@plt
473	// to
474	const uint8_t inst[] = {
475	`0x65`, `0xa1`, `0x00`, `0x00`, `0x00`, `0x00`, // movl %gs:0,%eax
476	`0x90`, // nop
477	`0x8d`, `0x74`, `0x26`, `0x00`, // leal 0(%esi,1),%esi
478	};
479	memcpy(dest: loc - `2`, src: inst, n: sizeof(inst));
480	return;
481	}
482
483	// Convert
484	// leal x(%reg),%eax
485	// call ___tls_get_addr@got(%reg)*
486	// to
487	const uint8_t inst[] = {
488	`0x65`, `0xa1`, `0x00`, `0x00`, `0x00`, `0x00`, // movl %gs:0,%eax
489	`0x8d`, `0xb6`, `0x00`, `0x00`, `0x00`, `0x00`, // leal (%esi),%esi
490	};
491	memcpy(dest: loc - `2`, src: inst, n: sizeof(inst));
492	}
493
494	void X86::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
495	uint64_t secAddr = sec.getOutputSection()->addr;
496	if (auto *s = dyn_cast<InputSection>(Val: &sec))
497	secAddr += s->outSecOff;
498	for (const Relocation &rel : sec.relocs()) {
499	uint8_t *loc = buf + rel.offset;
500	const uint64_t val =
501	SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: `32`);
502	switch (rel.expr) {
503	case R_RELAX_TLS_GD_TO_IE_GOTPLT:
504	relaxTlsGdToIe(loc, rel, val);
505	continue;
506	case R_RELAX_TLS_GD_TO_LE:
507	case R_RELAX_TLS_GD_TO_LE_NEG:
508	relaxTlsGdToLe(loc, rel, val);
509	continue;
510	case R_RELAX_TLS_LD_TO_LE:
511	relaxTlsLdToLe(loc, rel, val);
512	break;
513	case R_RELAX_TLS_IE_TO_LE:
514	relaxTlsIeToLe(loc, rel, val);
515	continue;
516	default:
517	relocate(loc, rel, val);
518	break;
519	}
520	}
521	}
522
523	// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
524	// entries containing endbr32 instructions. A PLT entry will be split into two
525	// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
526	namespace {
527	class IntelIBT : public X86 {
528	public:
529	IntelIBT(Ctx &ctx) : X86 (ctx) { pltHeaderSize = `0`; }
530	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
531	void writePlt(uint8_t buf, const* Symbol &sym,
532	uint64_t pltEntryAddr) const override;
533	void writeIBTPlt(uint8_t buf, size_t numEntries) const* override;
534
535	static const unsigned IBTPltHeaderSize = `16`;
536	};
537	} // namespace
538
539	void IntelIBT::writeGotPlt(uint8_t buf, const* Symbol &s) const {
540	uint64_t va = ctx.in.ibtPlt ->getVA() + IBTPltHeaderSize +
541	s.getPltIdx(ctx) * pltEntrySize;
542	write32le(P: buf, V: va);
543	}
544
545	void IntelIBT::writePlt(uint8_t buf, const* Symbol &sym,
546	uint64_t /pltEntryAddr/) const {
547	if (ctx.arg.isPic) {
548	const uint8_t inst[] = {
549	`0xf3`, `0x0f`, `0x1e`, `0xfb`, // endbr32
550	`0xff`, `0xa3`, `0`, `0`, `0`, `0`, // jmp name@GOT(%ebx)*
551	`0x66`, `0x0f`, `0x1f`, `0x44`, `0`, `0`, // nop
552	};
553	memcpy(dest: buf, src: inst, n: sizeof(inst));
554	write32le(P: buf + `6`, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt ->getVA());
555	return;
556	}
557
558	const uint8_t inst[] = {
559	`0xf3`, `0x0f`, `0x1e`, `0xfb`, // endbr32
560	`0xff`, `0x25`, `0`, `0`, `0`, `0`, // jmp foo@GOT*
561	`0x66`, `0x0f`, `0x1f`, `0x44`, `0`, `0`, // nop
562	};
563	memcpy(dest: buf, src: inst, n: sizeof(inst));
564	write32le(P: buf + `6`, V: sym.getGotPltVA(ctx));
565	}
566
567	void IntelIBT::writeIBTPlt(uint8_t buf, size_t numEntries) const* {
568	writePltHeader(buf);
569	buf += IBTPltHeaderSize;
570
571	const uint8_t inst[] = {
572	`0xf3`, `0x0f`, `0x1e`, `0xfb`, // endbr32
573	`0x68`, `0`, `0`, `0`, `0`, // pushl $reloc_offset
574	`0xe9`, `0`, `0`, `0`, `0`, // jmpq .PLT0@PC
575	`0x66`, `0x90`, // nop
576	};
577
578	for (size_t i = `0`; i < numEntries; ++i) {
579	memcpy(dest: buf, src: inst, n: sizeof(inst));
580	write32le(P: buf + `5`, V: i * sizeof(object::ELF32LE::Rel));
581	write32le(P: buf + `10`, V: -pltHeaderSize - sizeof(inst) * i - `30`);
582	buf += sizeof(inst);
583	}
584	}
585
586	namespace {
587	class RetpolinePic : public X86 {
588	public:
589	RetpolinePic(Ctx &);
590	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
591	void writePltHeader(uint8_t buf) const* override;
592	void writePlt(uint8_t buf, const* Symbol &sym,
593	uint64_t pltEntryAddr) const override;
594	};
595
596	class RetpolineNoPic : public X86 {
597	public:
598	RetpolineNoPic(Ctx &);
599	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
600	void writePltHeader(uint8_t buf) const* override;
601	void writePlt(uint8_t buf, const* Symbol &sym,
602	uint64_t pltEntryAddr) const override;
603	};
604	} // namespace
605
606	RetpolinePic::RetpolinePic(Ctx &ctx) : X86 (ctx) {
607	pltHeaderSize = `48`;
608	pltEntrySize = `32`;
609	ipltEntrySize = `32`;
610	}
611
612	void RetpolinePic::writeGotPlt(uint8_t buf, const* Symbol &s) const {
613	write32le(P: buf, V: s.getPltVA(ctx) + `17`);
614	}
615
616	void RetpolinePic::writePltHeader(uint8_t buf) const* {
617	const uint8_t insn[] = {
618	`0xff`, `0xb3`, `4`, `0`, `0`, `0`, // 0: pushl 4(%ebx)
619	`0x50`, // 6: pushl %eax
620	`0x8b`, `0x83`, `8`, `0`, `0`, `0`, // 7: mov 8(%ebx), %eax
621	`0xe8`, `0x0e`, `0x00`, `0x00`, `0x00`, // d: call next
622	`0xf3`, `0x90`, // 12: loop: pause
623	`0x0f`, `0xae`, `0xe8`, // 14: lfence
624	`0xeb`, `0xf9`, // 17: jmp loop
625	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 19: int3; .align 16
626	`0x89`, `0x0c`, `0x24`, // 20: next: mov %ecx, (%esp)
627	`0x8b`, `0x4c`, `0x24`, `0x04`, // 23: mov 0x4(%esp), %ecx
628	`0x89`, `0x44`, `0x24`, `0x04`, // 27: mov %eax ,0x4(%esp)
629	`0x89`, `0xc8`, // 2b: mov %ecx, %eax
630	`0x59`, // 2d: pop %ecx
631	`0xc3`, // 2e: ret
632	`0xcc`, // 2f: int3; padding
633	};
634	memcpy(dest: buf, src: insn, n: sizeof(insn));
635	}
636
637	void RetpolinePic::writePlt(uint8_t buf, const* Symbol &sym,
638	uint64_t pltEntryAddr) const {
639	unsigned relOff = ctx.in.relaPlt ->entsize * sym.getPltIdx(ctx);
640	const uint8_t insn[] = {
641	`0x50`, // pushl %eax
642	`0x8b`, `0x83`, `0`, `0`, `0`, `0`, // mov foo@GOT(%ebx), %eax
643	`0xe8`, `0`, `0`, `0`, `0`, // call plt+0x20
644	`0xe9`, `0`, `0`, `0`, `0`, // jmp plt+0x12
645	`0x68`, `0`, `0`, `0`, `0`, // pushl $reloc_offset
646	`0xe9`, `0`, `0`, `0`, `0`, // jmp plt+0
647	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // int3; padding
648	};
649	memcpy(dest: buf, src: insn, n: sizeof(insn));
650
651	uint32_t ebx = ctx.in.gotPlt ->getVA();
652	unsigned off = pltEntryAddr - ctx.in.plt ->getVA();
653	write32le(P: buf + `3`, V: sym.getGotPltVA(ctx) - ebx);
654	write32le(P: buf + `8`, V: -off - `12` + `32`);
655	write32le(P: buf + `13`, V: -off - `17` + `18`);
656	write32le(P: buf + `18`, V: relOff);
657	write32le(P: buf + `23`, V: -off - `27`);
658	}
659
660	RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86 (ctx) {
661	pltHeaderSize = `48`;
662	pltEntrySize = `32`;
663	ipltEntrySize = `32`;
664	}
665
666	void RetpolineNoPic::writeGotPlt(uint8_t buf, const* Symbol &s) const {
667	write32le(P: buf, V: s.getPltVA(ctx) + `16`);
668	}
669
670	void RetpolineNoPic::writePltHeader(uint8_t buf) const* {
671	const uint8_t insn[] = {
672	`0xff`, `0x35`, `0`, `0`, `0`, `0`, // 0: pushl GOTPLT+4
673	`0x50`, // 6: pushl %eax
674	`0xa1`, `0`, `0`, `0`, `0`, // 7: mov GOTPLT+8, %eax
675	`0xe8`, `0x0f`, `0x00`, `0x00`, `0x00`, // c: call next
676	`0xf3`, `0x90`, // 11: loop: pause
677	`0x0f`, `0xae`, `0xe8`, // 13: lfence
678	`0xeb`, `0xf9`, // 16: jmp loop
679	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 18: int3
680	`0xcc`, `0xcc`, `0xcc`, // 1f: int3; .align 16
681	`0x89`, `0x0c`, `0x24`, // 20: next: mov %ecx, (%esp)
682	`0x8b`, `0x4c`, `0x24`, `0x04`, // 23: mov 0x4(%esp), %ecx
683	`0x89`, `0x44`, `0x24`, `0x04`, // 27: mov %eax ,0x4(%esp)
684	`0x89`, `0xc8`, // 2b: mov %ecx, %eax
685	`0x59`, // 2d: pop %ecx
686	`0xc3`, // 2e: ret
687	`0xcc`, // 2f: int3; padding
688	};
689	memcpy(dest: buf, src: insn, n: sizeof(insn));
690
691	uint32_t gotPlt = ctx.in.gotPlt ->getVA();
692	write32le(P: buf + `2`, V: gotPlt + `4`);
693	write32le(P: buf + `8`, V: gotPlt + `8`);
694	}
695
696	void RetpolineNoPic::writePlt(uint8_t buf, const* Symbol &sym,
697	uint64_t pltEntryAddr) const {
698	unsigned relOff = ctx.in.relaPlt ->entsize * sym.getPltIdx(ctx);
699	const uint8_t insn[] = {
700	`0x50`, // 0: pushl %eax
701	`0xa1`, `0`, `0`, `0`, `0`, // 1: mov foo_in_GOT, %eax
702	`0xe8`, `0`, `0`, `0`, `0`, // 6: call plt+0x20
703	`0xe9`, `0`, `0`, `0`, `0`, // b: jmp plt+0x11
704	`0x68`, `0`, `0`, `0`, `0`, // 10: pushl $reloc_offset
705	`0xe9`, `0`, `0`, `0`, `0`, // 15: jmp plt+0
706	`0xcc`, `0xcc`, `0xcc`, `0xcc`, `0xcc`, // 1a: int3; padding
707	`0xcc`, // 1f: int3; padding
708	};
709	memcpy(dest: buf, src: insn, n: sizeof(insn));
710
711	unsigned off = pltEntryAddr - ctx.in.plt ->getVA();
712	write32le(P: buf + `2`, V: sym.getGotPltVA(ctx));
713	write32le(P: buf + `7`, V: -off - `11` + `32`);
714	write32le(P: buf + `12`, V: -off - `16` + `17`);
715	write32le(P: buf + `17`, V: relOff);
716	write32le(P: buf + `22`, V: -off - `26`);
717	}
718
719	void elf::setX86TargetInfo(Ctx &ctx) {
720	if (ctx.arg.zRetpolineplt) {
721	if (ctx.arg.isPic)
722	ctx.target.reset(p: new RetpolinePic (ctx));
723	else
724	ctx.target.reset(p: new RetpolineNoPic (ctx));
725	return;
726	}
727
728	if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
729	ctx.target.reset(p: new IntelIBT (ctx));
730	else
731	ctx.target.reset(p: new X86 (ctx));
732	}
733

Browse the source code of llvm_projects/lld/ELF/Arch/X86.cpp