1//===- X86.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "OutputSections.h"
10#include "Symbols.h"
11#include "SyntheticSections.h"
12#include "Target.h"
13#include "llvm/Support/Endian.h"
14
15using namespace llvm;
16using namespace llvm::support::endian;
17using namespace llvm::ELF;
18using namespace lld;
19using namespace lld::elf;
20
21namespace {
22class X86 : public TargetInfo {
23public:
24 X86(Ctx &);
25 int getTlsGdRelaxSkip(RelType type) const override;
26 RelExpr getRelExpr(RelType type, const Symbol &s,
27 const uint8_t *loc) const override;
28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29 void writeGotPltHeader(uint8_t *buf) const override;
30 RelType getDynRel(RelType type) const override;
31 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writePltHeader(uint8_t *buf) const override;
34 void writePlt(uint8_t *buf, const Symbol &sym,
35 uint64_t pltEntryAddr) const override;
36 void relocate(uint8_t *loc, const Relocation &rel,
37 uint64_t val) const override;
38
39 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
41
42private:
43 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
44 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47};
48} // namespace
49
50X86::X86(Ctx &ctx) : TargetInfo(ctx) {
51 copyRel = R_386_COPY;
52 gotRel = R_386_GLOB_DAT;
53 pltRel = R_386_JUMP_SLOT;
54 iRelativeRel = R_386_IRELATIVE;
55 relativeRel = R_386_RELATIVE;
56 symbolicRel = R_386_32;
57 tlsDescRel = R_386_TLS_DESC;
58 tlsGotRel = R_386_TLS_TPOFF;
59 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
60 tlsOffsetRel = R_386_TLS_DTPOFF32;
61 gotBaseSymInGotPlt = true;
62 pltHeaderSize = 16;
63 pltEntrySize = 16;
64 ipltEntrySize = 16;
65 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
66
67 // Align to the non-PAE large page size (known as a superpage or huge page).
68 // FreeBSD automatically promotes large, superpage-aligned allocations.
69 defaultImageBase = 0x400000;
70}
71
72int X86::getTlsGdRelaxSkip(RelType type) const {
73 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
74 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
75}
76
77RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78 const uint8_t *loc) const {
79 switch (type) {
80 case R_386_8:
81 case R_386_16:
82 case R_386_32:
83 return R_ABS;
84 case R_386_TLS_LDO_32:
85 return R_DTPREL;
86 case R_386_TLS_GD:
87 return R_TLSGD_GOTPLT;
88 case R_386_TLS_LDM:
89 return R_TLSLD_GOTPLT;
90 case R_386_PLT32:
91 return R_PLT_PC;
92 case R_386_PC8:
93 case R_386_PC16:
94 case R_386_PC32:
95 return R_PC;
96 case R_386_GOTPC:
97 return R_GOTPLTONLY_PC;
98 case R_386_TLS_IE:
99 return R_GOT;
100 case R_386_GOT32:
101 case R_386_GOT32X:
102 // These relocations are arguably mis-designed because their calculations
103 // depend on the instructions they are applied to. This is bad because we
104 // usually don't care about whether the target section contains valid
105 // machine instructions or not. But this is part of the documented ABI, so
106 // we had to implement as the standard requires.
107 //
108 // x86 does not support PC-relative data access. Therefore, in order to
109 // access GOT contents, a GOT address needs to be known at link-time
110 // (which means non-PIC) or compilers have to emit code to get a GOT
111 // address at runtime (which means code is position-independent but
112 // compilers need to emit extra code for each GOT access.) This decision
113 // is made at compile-time. In the latter case, compilers emit code to
114 // load a GOT address to a register, which is usually %ebx.
115 //
116 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
117 // foo@GOT(%ebx).
118 //
119 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
120 // find such relocation, we should report an error. foo@GOT is resolved to
121 // an *absolute* address of foo's GOT entry, because both GOT address and
122 // foo's offset are known. In other words, it's G + A.
123 //
124 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
125 // foo's GOT entry in the table, because GOT address is not known but foo's
126 // offset in the table is known. It's G + A - GOT.
127 //
128 // It's unfortunate that compilers emit the same relocation for these
129 // different use cases. In order to distinguish them, we have to read a
130 // machine instruction.
131 //
132 // The following code implements it. We assume that Loc[0] is the first byte
133 // of a displacement or an immediate field of a valid machine
134 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
135 // the byte, we can determine whether the instruction uses the operand as an
136 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
137 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
138 case R_386_TLS_GOTDESC:
139 return R_TLSDESC_GOTPLT;
140 case R_386_TLS_DESC_CALL:
141 return R_TLSDESC_CALL;
142 case R_386_TLS_GOTIE:
143 return R_GOTPLT;
144 case R_386_GOTOFF:
145 return R_GOTPLTREL;
146 case R_386_TLS_LE:
147 return R_TPREL;
148 case R_386_TLS_LE_32:
149 return R_TPREL_NEG;
150 case R_386_NONE:
151 return R_NONE;
152 default:
153 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
154 << ") against symbol " << &s;
155 return R_NONE;
156 }
157}
158
159RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
160 switch (expr) {
161 default:
162 return expr;
163 case R_RELAX_TLS_GD_TO_IE:
164 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165 case R_RELAX_TLS_GD_TO_LE:
166 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
167 : R_RELAX_TLS_GD_TO_LE;
168 }
169}
170
171void X86::writeGotPltHeader(uint8_t *buf) const {
172 write32le(P: buf, V: ctx.mainPart->dynamic->getVA());
173}
174
175void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
176 // Entries in .got.plt initially points back to the corresponding
177 // PLT entries with a fixed offset to skip the first instruction.
178 write32le(P: buf, V: s.getPltVA(ctx) + 6);
179}
180
181void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
182 // An x86 entry is the address of the ifunc resolver function.
183 write32le(P: buf, V: s.getVA(ctx));
184}
185
186RelType X86::getDynRel(RelType type) const {
187 if (type == R_386_TLS_LE)
188 return R_386_TLS_TPOFF;
189 if (type == R_386_TLS_LE_32)
190 return R_386_TLS_TPOFF32;
191 return type;
192}
193
194void X86::writePltHeader(uint8_t *buf) const {
195 if (ctx.arg.isPic) {
196 const uint8_t v[] = {
197 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
198 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
199 0x90, 0x90, 0x90, 0x90 // nop
200 };
201 memcpy(dest: buf, src: v, n: sizeof(v));
202 return;
203 }
204
205 const uint8_t pltData[] = {
206 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
207 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
208 0x90, 0x90, 0x90, 0x90, // nop
209 };
210 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
211 uint32_t gotPlt = ctx.in.gotPlt->getVA();
212 write32le(P: buf + 2, V: gotPlt + 4);
213 write32le(P: buf + 8, V: gotPlt + 8);
214}
215
216void X86::writePlt(uint8_t *buf, const Symbol &sym,
217 uint64_t pltEntryAddr) const {
218 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
219 if (ctx.arg.isPic) {
220 const uint8_t inst[] = {
221 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
222 0x68, 0, 0, 0, 0, // pushl $reloc_offset
223 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
224 };
225 memcpy(dest: buf, src: inst, n: sizeof(inst));
226 write32le(P: buf + 2, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
227 } else {
228 const uint8_t inst[] = {
229 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
230 0x68, 0, 0, 0, 0, // pushl $reloc_offset
231 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
232 };
233 memcpy(dest: buf, src: inst, n: sizeof(inst));
234 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
235 }
236
237 write32le(P: buf + 7, V: relOff);
238 write32le(P: buf + 12, V: ctx.in.plt->getVA() - pltEntryAddr - 16);
239}
240
241int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
242 switch (type) {
243 case R_386_8:
244 case R_386_PC8:
245 return SignExtend64<8>(x: *buf);
246 case R_386_16:
247 case R_386_PC16:
248 return SignExtend64<16>(x: read16le(P: buf));
249 case R_386_32:
250 case R_386_GLOB_DAT:
251 case R_386_GOT32:
252 case R_386_GOT32X:
253 case R_386_GOTOFF:
254 case R_386_GOTPC:
255 case R_386_IRELATIVE:
256 case R_386_PC32:
257 case R_386_PLT32:
258 case R_386_RELATIVE:
259 case R_386_TLS_GOTDESC:
260 case R_386_TLS_DESC_CALL:
261 case R_386_TLS_DTPMOD32:
262 case R_386_TLS_DTPOFF32:
263 case R_386_TLS_LDO_32:
264 case R_386_TLS_LDM:
265 case R_386_TLS_IE:
266 case R_386_TLS_IE_32:
267 case R_386_TLS_LE:
268 case R_386_TLS_LE_32:
269 case R_386_TLS_GD:
270 case R_386_TLS_GD_32:
271 case R_386_TLS_GOTIE:
272 case R_386_TLS_TPOFF:
273 case R_386_TLS_TPOFF32:
274 return SignExtend64<32>(x: read32le(P: buf));
275 case R_386_TLS_DESC:
276 return SignExtend64<32>(x: read32le(P: buf + 4));
277 case R_386_NONE:
278 case R_386_JUMP_SLOT:
279 // These relocations are defined as not having an implicit addend.
280 return 0;
281 default:
282 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
283 return 0;
284 }
285}
286
287void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
288 switch (rel.type) {
289 case R_386_8:
290 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291 // being used for some 16-bit programs such as boot loaders, so
292 // we want to support them.
293 checkIntUInt(ctx, loc, v: val, n: 8, rel);
294 *loc = val;
295 break;
296 case R_386_PC8:
297 checkInt(ctx, loc, v: val, n: 8, rel);
298 *loc = val;
299 break;
300 case R_386_16:
301 checkIntUInt(ctx, loc, v: val, n: 16, rel);
302 write16le(P: loc, V: val);
303 break;
304 case R_386_PC16:
305 // R_386_PC16 is normally used with 16 bit code. In that situation
306 // the PC is 16 bits, just like the addend. This means that it can
307 // point from any 16 bit address to any other if the possibility
308 // of wrapping is included.
309 // The only restriction we have to check then is that the destination
310 // address fits in 16 bits. That is impossible to do here. The problem is
311 // that we are passed the final value, which already had the
312 // current location subtracted from it.
313 // We just check that Val fits in 17 bits. This misses some cases, but
314 // should have no false positives.
315 checkInt(ctx, loc, v: val, n: 17, rel);
316 write16le(P: loc, V: val);
317 break;
318 case R_386_32:
319 case R_386_GOT32:
320 case R_386_GOT32X:
321 case R_386_GOTOFF:
322 case R_386_GOTPC:
323 case R_386_PC32:
324 case R_386_PLT32:
325 case R_386_RELATIVE:
326 case R_386_TLS_GOTDESC:
327 case R_386_TLS_DESC_CALL:
328 case R_386_TLS_DTPMOD32:
329 case R_386_TLS_DTPOFF32:
330 case R_386_TLS_GD:
331 case R_386_TLS_GOTIE:
332 case R_386_TLS_IE:
333 case R_386_TLS_LDM:
334 case R_386_TLS_LDO_32:
335 case R_386_TLS_LE:
336 case R_386_TLS_LE_32:
337 case R_386_TLS_TPOFF:
338 case R_386_TLS_TPOFF32:
339 checkInt(ctx, loc, v: val, n: 32, rel);
340 write32le(P: loc, V: val);
341 break;
342 case R_386_TLS_DESC:
343 // The addend is stored in the second 32-bit word.
344 write32le(P: loc + 4, V: val);
345 break;
346 default:
347 llvm_unreachable("unknown relocation");
348 }
349}
350
351void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
352 uint64_t val) const {
353 if (rel.type == R_386_TLS_GD) {
354 // Convert (loc[-2] == 0x04)
355 // leal x@tlsgd(, %ebx, 1), %eax
356 // call ___tls_get_addr@plt
357 // or
358 // leal x@tlsgd(%reg), %eax
359 // call *___tls_get_addr@got(%reg)
360 // to
361 const uint8_t inst[] = {
362 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
363 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
364 };
365 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
366 memcpy(dest: w, src: inst, n: sizeof(inst));
367 write32le(P: w + 8, V: val);
368 } else if (rel.type == R_386_TLS_GOTDESC) {
369 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
370 //
371 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
372 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
373 ErrAlways(ctx)
374 << getErrorLoc(ctx, loc: loc - 2)
375 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
376 return;
377 }
378 loc[-1] = 0x05;
379 write32le(P: loc, V: val);
380 } else {
381 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
382 assert(rel.type == R_386_TLS_DESC_CALL);
383 loc[0] = 0x66;
384 loc[1] = 0x90;
385 }
386}
387
388void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
389 uint64_t val) const {
390 if (rel.type == R_386_TLS_GD) {
391 // Convert (loc[-2] == 0x04)
392 // leal x@tlsgd(, %ebx, 1), %eax
393 // call ___tls_get_addr@plt
394 // or
395 // leal x@tlsgd(%reg), %eax
396 // call *___tls_get_addr@got(%reg)
397 const uint8_t inst[] = {
398 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
399 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
400 };
401 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
402 memcpy(dest: w, src: inst, n: sizeof(inst));
403 write32le(P: w + 8, V: val);
404 } else if (rel.type == R_386_TLS_GOTDESC) {
405 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
406 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
407 ErrAlways(ctx)
408 << getErrorLoc(ctx, loc: loc - 2)
409 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
410 return;
411 }
412 loc[-2] = 0x8b;
413 write32le(P: loc, V: val);
414 } else {
415 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
416 assert(rel.type == R_386_TLS_DESC_CALL);
417 loc[0] = 0x66;
418 loc[1] = 0x90;
419 }
420}
421
422// In some conditions, relocations can be optimized to avoid using GOT.
423// This function does that for Initial Exec to Local Exec case.
424void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
425 uint64_t val) const {
426 // Ulrich's document section 6.2 says that @gotntpoff can
427 // be used with MOVL or ADDL instructions.
428 // @indntpoff is similar to @gotntpoff, but for use in
429 // position dependent code.
430 uint8_t reg = (loc[-1] >> 3) & 7;
431
432 if (rel.type == R_386_TLS_IE) {
433 if (loc[-1] == 0xa1) {
434 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435 // This case is different from the generic case below because
436 // this is a 5 byte instruction while below is 6 bytes.
437 loc[-1] = 0xb8;
438 } else if (loc[-2] == 0x8b) {
439 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440 loc[-2] = 0xc7;
441 loc[-1] = 0xc0 | reg;
442 } else {
443 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444 loc[-2] = 0x81;
445 loc[-1] = 0xc0 | reg;
446 }
447 } else {
448 assert(rel.type == R_386_TLS_GOTIE);
449 if (loc[-2] == 0x8b) {
450 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451 loc[-2] = 0xc7;
452 loc[-1] = 0xc0 | reg;
453 } else {
454 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455 loc[-2] = 0x8d;
456 loc[-1] = 0x80 | (reg << 3) | reg;
457 }
458 }
459 write32le(P: loc, V: val);
460}
461
462void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
463 uint64_t val) const {
464 if (rel.type == R_386_TLS_LDO_32) {
465 write32le(P: loc, V: val);
466 return;
467 }
468
469 if (loc[4] == 0xe8) {
470 // Convert
471 // leal x(%reg),%eax
472 // call ___tls_get_addr@plt
473 // to
474 const uint8_t inst[] = {
475 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
476 0x90, // nop
477 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
478 };
479 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
480 return;
481 }
482
483 // Convert
484 // leal x(%reg),%eax
485 // call *___tls_get_addr@got(%reg)
486 // to
487 const uint8_t inst[] = {
488 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
489 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
490 };
491 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
492}
493
494void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
495 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
496 for (const Relocation &rel : sec.relocs()) {
497 uint8_t *loc = buf + rel.offset;
498 const uint64_t val =
499 SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: 32);
500 switch (rel.expr) {
501 case R_RELAX_TLS_GD_TO_IE_GOTPLT:
502 relaxTlsGdToIe(loc, rel, val);
503 continue;
504 case R_RELAX_TLS_GD_TO_LE:
505 case R_RELAX_TLS_GD_TO_LE_NEG:
506 relaxTlsGdToLe(loc, rel, val);
507 continue;
508 case R_RELAX_TLS_LD_TO_LE:
509 relaxTlsLdToLe(loc, rel, val);
510 break;
511 case R_RELAX_TLS_IE_TO_LE:
512 relaxTlsIeToLe(loc, rel, val);
513 continue;
514 default:
515 relocate(loc, rel, val);
516 break;
517 }
518 }
519}
520
521// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
522// entries containing endbr32 instructions. A PLT entry will be split into two
523// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
524namespace {
525class IntelIBT : public X86 {
526public:
527 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
528 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
529 void writePlt(uint8_t *buf, const Symbol &sym,
530 uint64_t pltEntryAddr) const override;
531 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
532
533 static const unsigned IBTPltHeaderSize = 16;
534};
535} // namespace
536
537void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
538 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
539 s.getPltIdx(ctx) * pltEntrySize;
540 write32le(P: buf, V: va);
541}
542
543void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
544 uint64_t /*pltEntryAddr*/) const {
545 if (ctx.arg.isPic) {
546 const uint8_t inst[] = {
547 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
548 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
549 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
550 };
551 memcpy(dest: buf, src: inst, n: sizeof(inst));
552 write32le(P: buf + 6, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
553 return;
554 }
555
556 const uint8_t inst[] = {
557 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
558 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
559 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
560 };
561 memcpy(dest: buf, src: inst, n: sizeof(inst));
562 write32le(P: buf + 6, V: sym.getGotPltVA(ctx));
563}
564
565void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
566 writePltHeader(buf);
567 buf += IBTPltHeaderSize;
568
569 const uint8_t inst[] = {
570 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
571 0x68, 0, 0, 0, 0, // pushl $reloc_offset
572 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
573 0x66, 0x90, // nop
574 };
575
576 for (size_t i = 0; i < numEntries; ++i) {
577 memcpy(dest: buf, src: inst, n: sizeof(inst));
578 write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel));
579 write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30);
580 buf += sizeof(inst);
581 }
582}
583
584namespace {
585class RetpolinePic : public X86 {
586public:
587 RetpolinePic(Ctx &);
588 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
589 void writePltHeader(uint8_t *buf) const override;
590 void writePlt(uint8_t *buf, const Symbol &sym,
591 uint64_t pltEntryAddr) const override;
592};
593
594class RetpolineNoPic : public X86 {
595public:
596 RetpolineNoPic(Ctx &);
597 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
598 void writePltHeader(uint8_t *buf) const override;
599 void writePlt(uint8_t *buf, const Symbol &sym,
600 uint64_t pltEntryAddr) const override;
601};
602} // namespace
603
604RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
605 pltHeaderSize = 48;
606 pltEntrySize = 32;
607 ipltEntrySize = 32;
608}
609
610void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
611 write32le(P: buf, V: s.getPltVA(ctx) + 17);
612}
613
614void RetpolinePic::writePltHeader(uint8_t *buf) const {
615 const uint8_t insn[] = {
616 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
617 0x50, // 6: pushl %eax
618 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
619 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
620 0xf3, 0x90, // 12: loop: pause
621 0x0f, 0xae, 0xe8, // 14: lfence
622 0xeb, 0xf9, // 17: jmp loop
623 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
624 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
625 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
626 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
627 0x89, 0xc8, // 2b: mov %ecx, %eax
628 0x59, // 2d: pop %ecx
629 0xc3, // 2e: ret
630 0xcc, // 2f: int3; padding
631 };
632 memcpy(dest: buf, src: insn, n: sizeof(insn));
633}
634
635void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
636 uint64_t pltEntryAddr) const {
637 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
638 const uint8_t insn[] = {
639 0x50, // pushl %eax
640 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
641 0xe8, 0, 0, 0, 0, // call plt+0x20
642 0xe9, 0, 0, 0, 0, // jmp plt+0x12
643 0x68, 0, 0, 0, 0, // pushl $reloc_offset
644 0xe9, 0, 0, 0, 0, // jmp plt+0
645 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
646 };
647 memcpy(dest: buf, src: insn, n: sizeof(insn));
648
649 uint32_t ebx = ctx.in.gotPlt->getVA();
650 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
651 write32le(P: buf + 3, V: sym.getGotPltVA(ctx) - ebx);
652 write32le(P: buf + 8, V: -off - 12 + 32);
653 write32le(P: buf + 13, V: -off - 17 + 18);
654 write32le(P: buf + 18, V: relOff);
655 write32le(P: buf + 23, V: -off - 27);
656}
657
658RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
659 pltHeaderSize = 48;
660 pltEntrySize = 32;
661 ipltEntrySize = 32;
662}
663
664void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
665 write32le(P: buf, V: s.getPltVA(ctx) + 16);
666}
667
668void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
669 const uint8_t insn[] = {
670 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
671 0x50, // 6: pushl %eax
672 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
673 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
674 0xf3, 0x90, // 11: loop: pause
675 0x0f, 0xae, 0xe8, // 13: lfence
676 0xeb, 0xf9, // 16: jmp loop
677 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
678 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
679 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
680 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
681 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
682 0x89, 0xc8, // 2b: mov %ecx, %eax
683 0x59, // 2d: pop %ecx
684 0xc3, // 2e: ret
685 0xcc, // 2f: int3; padding
686 };
687 memcpy(dest: buf, src: insn, n: sizeof(insn));
688
689 uint32_t gotPlt = ctx.in.gotPlt->getVA();
690 write32le(P: buf + 2, V: gotPlt + 4);
691 write32le(P: buf + 8, V: gotPlt + 8);
692}
693
694void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
695 uint64_t pltEntryAddr) const {
696 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
697 const uint8_t insn[] = {
698 0x50, // 0: pushl %eax
699 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
700 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
701 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
702 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
703 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
704 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
705 0xcc, // 1f: int3; padding
706 };
707 memcpy(dest: buf, src: insn, n: sizeof(insn));
708
709 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
710 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
711 write32le(P: buf + 7, V: -off - 11 + 32);
712 write32le(P: buf + 12, V: -off - 16 + 17);
713 write32le(P: buf + 17, V: relOff);
714 write32le(P: buf + 22, V: -off - 26);
715}
716
717void elf::setX86TargetInfo(Ctx &ctx) {
718 if (ctx.arg.zRetpolineplt) {
719 if (ctx.arg.isPic)
720 ctx.target.reset(p: new RetpolinePic(ctx));
721 else
722 ctx.target.reset(p: new RetpolineNoPic(ctx));
723 return;
724 }
725
726 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
727 ctx.target.reset(p: new IntelIBT(ctx));
728 else
729 ctx.target.reset(p: new X86(ctx));
730}
731