1//===- X86.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "OutputSections.h"
10#include "RelocScan.h"
11#include "Symbols.h"
12#include "SyntheticSections.h"
13#include "Target.h"
14#include "llvm/Support/Endian.h"
15
16using namespace llvm;
17using namespace llvm::support::endian;
18using namespace llvm::ELF;
19using namespace lld;
20using namespace lld::elf;
21
22namespace {
23class X86 : public TargetInfo {
24public:
25 X86(Ctx &);
26 RelExpr getRelExpr(RelType type, const Symbol &s,
27 const uint8_t *loc) const override;
28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29 void writeGotPltHeader(uint8_t *buf) const override;
30 RelType getDynRel(RelType type) const override;
31 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writePltHeader(uint8_t *buf) const override;
34 void writePlt(uint8_t *buf, const Symbol &sym,
35 uint64_t pltEntryAddr) const override;
36 void relocate(uint8_t *loc, const Relocation &rel,
37 uint64_t val) const override;
38 template <class ELFT, class RelTy>
39 void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
40 void scanSection(InputSectionBase &sec) override;
41 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
42
43private:
44 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48};
49} // namespace
50
51X86::X86(Ctx &ctx) : TargetInfo(ctx) {
52 copyRel = R_386_COPY;
53 gotRel = R_386_GLOB_DAT;
54 pltRel = R_386_JUMP_SLOT;
55 iRelativeRel = R_386_IRELATIVE;
56 relativeRel = R_386_RELATIVE;
57 symbolicRel = R_386_32;
58 tlsDescRel = R_386_TLS_DESC;
59 tlsGotRel = R_386_TLS_TPOFF;
60 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
61 tlsOffsetRel = R_386_TLS_DTPOFF32;
62 gotBaseSymInGotPlt = true;
63 pltHeaderSize = 16;
64 pltEntrySize = 16;
65 ipltEntrySize = 16;
66 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67
68 // Align to the non-PAE large page size (known as a superpage or huge page).
69 // FreeBSD automatically promotes large, superpage-aligned allocations.
70 defaultImageBase = 0x400000;
71}
72
73// Only needed to support relocations used by relocateNonAlloc and relocateEh.
74RelExpr X86::getRelExpr(RelType type, const Symbol &s,
75 const uint8_t *loc) const {
76 switch (type) {
77 case R_386_8:
78 case R_386_16:
79 case R_386_32:
80 return R_ABS;
81 case R_386_TLS_LDO_32:
82 return R_DTPREL;
83 case R_386_PC8:
84 case R_386_PC16:
85 case R_386_PC32:
86 return R_PC;
87 case R_386_GOTPC:
88 return R_GOTPLTONLY_PC;
89 case R_386_GOTOFF:
90 return R_GOTPLTREL;
91 case R_386_NONE:
92 return R_NONE;
93 default:
94 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
95 << ") against symbol " << &s;
96 return R_NONE;
97 }
98}
99
100void X86::writeGotPltHeader(uint8_t *buf) const {
101 write32le(P: buf, V: ctx.mainPart->dynamic->getVA());
102}
103
104void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
105 // Entries in .got.plt initially points back to the corresponding
106 // PLT entries with a fixed offset to skip the first instruction.
107 write32le(P: buf, V: s.getPltVA(ctx) + 6);
108}
109
110void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
111 // An x86 entry is the address of the ifunc resolver function.
112 write32le(P: buf, V: s.getVA(ctx));
113}
114
115RelType X86::getDynRel(RelType type) const {
116 if (type == R_386_TLS_LE)
117 return R_386_TLS_TPOFF;
118 if (type == R_386_TLS_LE_32)
119 return R_386_TLS_TPOFF32;
120 return type;
121}
122
123void X86::writePltHeader(uint8_t *buf) const {
124 if (ctx.arg.isPic) {
125 const uint8_t v[] = {
126 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
127 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
128 0x90, 0x90, 0x90, 0x90 // nop
129 };
130 memcpy(dest: buf, src: v, n: sizeof(v));
131 return;
132 }
133
134 const uint8_t pltData[] = {
135 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
136 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
137 0x90, 0x90, 0x90, 0x90, // nop
138 };
139 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
140 uint32_t gotPlt = ctx.in.gotPlt->getVA();
141 write32le(P: buf + 2, V: gotPlt + 4);
142 write32le(P: buf + 8, V: gotPlt + 8);
143}
144
145void X86::writePlt(uint8_t *buf, const Symbol &sym,
146 uint64_t pltEntryAddr) const {
147 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
148 if (ctx.arg.isPic) {
149 const uint8_t inst[] = {
150 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
151 0x68, 0, 0, 0, 0, // pushl $reloc_offset
152 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
153 };
154 memcpy(dest: buf, src: inst, n: sizeof(inst));
155 write32le(P: buf + 2, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
156 } else {
157 const uint8_t inst[] = {
158 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
159 0x68, 0, 0, 0, 0, // pushl $reloc_offset
160 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
161 };
162 memcpy(dest: buf, src: inst, n: sizeof(inst));
163 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
164 }
165
166 write32le(P: buf + 7, V: relOff);
167 write32le(P: buf + 12, V: ctx.in.plt->getVA() - pltEntryAddr - 16);
168}
169
170template <class ELFT, class RelTy>
171void X86::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
172 RelocScan rs(ctx, &sec);
173 sec.relocations.reserve(N: rels.size());
174
175 for (auto it = rels.begin(); it != rels.end(); ++it) {
176 const RelTy &rel = *it;
177 uint32_t symIdx = rel.getSymbol(false);
178 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
179 uint64_t offset = rel.r_offset;
180 RelType type = rel.getType(false);
181 if (sym.isUndefined() && symIdx != 0 &&
182 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
183 continue;
184 int64_t addend = rs.getAddend<ELFT>(rel, type);
185 RelExpr expr;
186 switch (type) {
187 case R_386_NONE:
188 continue;
189
190 // Absolute relocations:
191 case R_386_8:
192 case R_386_16:
193 case R_386_32:
194 expr = R_ABS;
195 break;
196
197 // PC-relative relocations:
198 case R_386_PC8:
199 case R_386_PC16:
200 case R_386_PC32:
201 rs.processR_PC(type, offset, addend, sym);
202 continue;
203
204 // PLT-generating relocation:
205 case R_386_PLT32:
206 rs.processR_PLT_PC(type, offset, addend, sym);
207 continue;
208
209 // GOT-related relocations:
210 case R_386_GOTPC:
211 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
212 expr = R_GOTPLTONLY_PC;
213 break;
214 case R_386_GOTOFF:
215 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
216 expr = R_GOTPLTREL;
217 break;
218 case R_386_GOT32:
219 case R_386_GOT32X:
220 // R_386_GOT32(X) is used for both absolute GOT access (foo@GOT,
221 // non-PIC, G + A => R_GOT) and register-relative GOT access
222 // (foo@GOT(%ebx), PIC, G + A - GOT => R_GOTPLT). Both use the same
223 // relocation type, so we check the ModRM byte to distinguish them.
224 expr = offset && (sec.content().data()[offset - 1] & 0xc7) == 0x5
225 ? R_GOT
226 : R_GOTPLT;
227 if (expr == R_GOTPLT)
228 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
229 break;
230
231 // TLS relocations:
232 case R_386_TLS_LE:
233 if (rs.checkTlsLe(offset, sym, type))
234 continue;
235 expr = R_TPREL;
236 break;
237 case R_386_TLS_LE_32:
238 if (rs.checkTlsLe(offset, sym, type))
239 continue;
240 expr = R_TPREL_NEG;
241 break;
242 case R_386_TLS_IE:
243 rs.handleTlsIe(ieExpr: R_GOT, type, offset, addend, sym);
244 continue;
245 case R_386_TLS_GOTIE:
246 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
247 rs.handleTlsIe(ieExpr: R_GOTPLT, type, offset, addend, sym);
248 continue;
249 case R_386_TLS_GD:
250 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
251 // Use R_TPREL_NEG for negative TP offset.
252 if (rs.handleTlsGd(sharedExpr: R_TLSGD_GOTPLT, ieExpr: R_GOTPLT, leExpr: R_TPREL_NEG, type, offset,
253 addend, sym))
254 ++it;
255 continue;
256 case R_386_TLS_LDM:
257 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
258 if (rs.handleTlsLd(sharedExpr: R_TLSLD_GOTPLT, type, offset, addend, sym))
259 ++it;
260 continue;
261 case R_386_TLS_LDO_32:
262 sec.addReloc(
263 r: {.expr: ctx.arg.shared ? R_DTPREL : R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
264 continue;
265 case R_386_TLS_GOTDESC:
266 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
267 rs.handleTlsDesc(sharedExpr: R_TLSDESC_GOTPLT, ieExpr: R_GOTPLT, type, offset, addend, sym);
268 continue;
269 case R_386_TLS_DESC_CALL:
270 // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
271 // rewrites for this relocation are identical.
272 if (!ctx.arg.shared)
273 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
274 continue;
275
276 default:
277 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
278 << "unknown relocation (" << type.v << ") against symbol "
279 << &sym;
280 continue;
281 }
282 rs.process(expr, type, offset, sym, addend);
283 }
284}
285
286void X86::scanSection(InputSectionBase &sec) {
287 elf::scanSection1<X86, ELF32LE>(target&: *this, sec);
288}
289
290int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
291 switch (type) {
292 case R_386_8:
293 case R_386_PC8:
294 return SignExtend64<8>(x: *buf);
295 case R_386_16:
296 case R_386_PC16:
297 return SignExtend64<16>(x: read16le(P: buf));
298 case R_386_32:
299 case R_386_GLOB_DAT:
300 case R_386_GOT32:
301 case R_386_GOT32X:
302 case R_386_GOTOFF:
303 case R_386_GOTPC:
304 case R_386_IRELATIVE:
305 case R_386_PC32:
306 case R_386_PLT32:
307 case R_386_RELATIVE:
308 case R_386_TLS_GOTDESC:
309 case R_386_TLS_DESC_CALL:
310 case R_386_TLS_DTPMOD32:
311 case R_386_TLS_DTPOFF32:
312 case R_386_TLS_LDO_32:
313 case R_386_TLS_LDM:
314 case R_386_TLS_IE:
315 case R_386_TLS_IE_32:
316 case R_386_TLS_LE:
317 case R_386_TLS_LE_32:
318 case R_386_TLS_GD:
319 case R_386_TLS_GD_32:
320 case R_386_TLS_GOTIE:
321 case R_386_TLS_TPOFF:
322 case R_386_TLS_TPOFF32:
323 return SignExtend64<32>(x: read32le(P: buf));
324 case R_386_TLS_DESC:
325 return SignExtend64<32>(x: read32le(P: buf + 4));
326 case R_386_NONE:
327 case R_386_JUMP_SLOT:
328 // These relocations are defined as not having an implicit addend.
329 return 0;
330 default:
331 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
332 return 0;
333 }
334}
335
336void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
337 switch (rel.type) {
338 case R_386_8:
339 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
340 // being used for some 16-bit programs such as boot loaders, so
341 // we want to support them.
342 checkIntUInt(ctx, loc, v: val, n: 8, rel);
343 *loc = val;
344 break;
345 case R_386_PC8:
346 checkInt(ctx, loc, v: val, n: 8, rel);
347 *loc = val;
348 break;
349 case R_386_16:
350 checkIntUInt(ctx, loc, v: val, n: 16, rel);
351 write16le(P: loc, V: val);
352 break;
353 case R_386_PC16:
354 // R_386_PC16 is normally used with 16 bit code. In that situation
355 // the PC is 16 bits, just like the addend. This means that it can
356 // point from any 16 bit address to any other if the possibility
357 // of wrapping is included.
358 // The only restriction we have to check then is that the destination
359 // address fits in 16 bits. That is impossible to do here. The problem is
360 // that we are passed the final value, which already had the
361 // current location subtracted from it.
362 // We just check that Val fits in 17 bits. This misses some cases, but
363 // should have no false positives.
364 checkInt(ctx, loc, v: val, n: 17, rel);
365 write16le(P: loc, V: val);
366 break;
367 case R_386_32:
368 case R_386_GOT32:
369 case R_386_GOT32X:
370 case R_386_GOTOFF:
371 case R_386_GOTPC:
372 case R_386_PC32:
373 case R_386_PLT32:
374 case R_386_RELATIVE:
375 case R_386_TLS_GOTDESC:
376 case R_386_TLS_DESC_CALL:
377 case R_386_TLS_DTPMOD32:
378 case R_386_TLS_DTPOFF32:
379 case R_386_TLS_GD:
380 case R_386_TLS_GOTIE:
381 case R_386_TLS_IE:
382 case R_386_TLS_LDM:
383 case R_386_TLS_LDO_32:
384 case R_386_TLS_LE:
385 case R_386_TLS_LE_32:
386 case R_386_TLS_TPOFF:
387 case R_386_TLS_TPOFF32:
388 checkInt(ctx, loc, v: val, n: 32, rel);
389 write32le(P: loc, V: val);
390 break;
391 case R_386_TLS_DESC:
392 // The addend is stored in the second 32-bit word.
393 write32le(P: loc + 4, V: val);
394 break;
395 default:
396 llvm_unreachable("unknown relocation");
397 }
398}
399
400void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
401 uint64_t val) const {
402 if (rel.type == R_386_TLS_GD) {
403 // Convert (loc[-2] == 0x04)
404 // leal x@tlsgd(, %ebx, 1), %eax
405 // call ___tls_get_addr@plt
406 // or
407 // leal x@tlsgd(%reg), %eax
408 // call *___tls_get_addr@got(%reg)
409 // to
410 const uint8_t inst[] = {
411 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
412 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
413 };
414 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
415 memcpy(dest: w, src: inst, n: sizeof(inst));
416 write32le(P: w + 8, V: val);
417 } else if (rel.type == R_386_TLS_GOTDESC) {
418 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
419 //
420 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
421 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
422 ErrAlways(ctx)
423 << getErrorLoc(ctx, loc: loc - 2)
424 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
425 return;
426 }
427 loc[-1] = 0x05;
428 write32le(P: loc, V: val);
429 } else {
430 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
431 assert(rel.type == R_386_TLS_DESC_CALL);
432 loc[0] = 0x66;
433 loc[1] = 0x90;
434 }
435}
436
437void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
438 uint64_t val) const {
439 if (rel.type == R_386_TLS_GD) {
440 // Convert (loc[-2] == 0x04)
441 // leal x@tlsgd(, %ebx, 1), %eax
442 // call ___tls_get_addr@plt
443 // or
444 // leal x@tlsgd(%reg), %eax
445 // call *___tls_get_addr@got(%reg)
446 const uint8_t inst[] = {
447 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
448 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
449 };
450 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
451 memcpy(dest: w, src: inst, n: sizeof(inst));
452 write32le(P: w + 8, V: val);
453 } else if (rel.type == R_386_TLS_GOTDESC) {
454 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
455 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
456 ErrAlways(ctx)
457 << getErrorLoc(ctx, loc: loc - 2)
458 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
459 return;
460 }
461 loc[-2] = 0x8b;
462 write32le(P: loc, V: val);
463 }
464}
465
466// In some conditions, relocations can be optimized to avoid using GOT.
467// This function does that for Initial Exec to Local Exec case.
468void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
469 uint64_t val) const {
470 // Ulrich's document section 6.2 says that @gotntpoff can
471 // be used with MOVL or ADDL instructions.
472 // @indntpoff is similar to @gotntpoff, but for use in
473 // position dependent code.
474 uint8_t reg = (loc[-1] >> 3) & 7;
475
476 if (rel.type == R_386_TLS_IE) {
477 if (loc[-1] == 0xa1) {
478 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
479 // This case is different from the generic case below because
480 // this is a 5 byte instruction while below is 6 bytes.
481 loc[-1] = 0xb8;
482 } else if (loc[-2] == 0x8b) {
483 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
484 loc[-2] = 0xc7;
485 loc[-1] = 0xc0 | reg;
486 } else {
487 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
488 loc[-2] = 0x81;
489 loc[-1] = 0xc0 | reg;
490 }
491 } else {
492 assert(rel.type == R_386_TLS_GOTIE);
493 if (loc[-2] == 0x8b) {
494 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
495 loc[-2] = 0xc7;
496 loc[-1] = 0xc0 | reg;
497 } else {
498 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
499 loc[-2] = 0x8d;
500 loc[-1] = 0x80 | (reg << 3) | reg;
501 }
502 }
503 write32le(P: loc, V: val);
504}
505
506void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
507 uint64_t val) const {
508 if (rel.type == R_386_TLS_LDO_32) {
509 write32le(P: loc, V: val);
510 return;
511 }
512
513 if (loc[4] == 0xe8) {
514 // Convert
515 // leal x(%reg),%eax
516 // call ___tls_get_addr@plt
517 // to
518 const uint8_t inst[] = {
519 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
520 0x90, // nop
521 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
522 };
523 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
524 return;
525 }
526
527 // Convert
528 // leal x(%reg),%eax
529 // call *___tls_get_addr@got(%reg)
530 // to
531 const uint8_t inst[] = {
532 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
533 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
534 };
535 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
536}
537
538void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
539 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
540 for (const Relocation &rel : sec.relocs()) {
541 uint8_t *loc = buf + rel.offset;
542 const uint64_t val =
543 SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: 32);
544 switch (rel.type) {
545 case R_386_TLS_GD:
546 case R_386_TLS_GOTDESC:
547 case R_386_TLS_DESC_CALL:
548 if (rel.expr == R_TPREL || rel.expr == R_TPREL_NEG)
549 relaxTlsGdToLe(loc, rel, val);
550 else if (rel.expr == R_GOTPLT)
551 relaxTlsGdToIe(loc, rel, val);
552 else
553 relocate(loc, rel, val);
554 continue;
555 case R_386_TLS_LDM:
556 case R_386_TLS_LDO_32:
557 if (rel.expr == R_TPREL)
558 relaxTlsLdToLe(loc, rel, val);
559 else
560 relocate(loc, rel, val);
561 continue;
562 case R_386_TLS_IE:
563 case R_386_TLS_GOTIE:
564 if (rel.expr == R_TPREL)
565 relaxTlsIeToLe(loc, rel, val);
566 else
567 relocate(loc, rel, val);
568 continue;
569 default:
570 relocate(loc, rel, val);
571 break;
572 }
573 }
574}
575
576// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
577// entries containing endbr32 instructions. A PLT entry will be split into two
578// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
579namespace {
580class IntelIBT : public X86 {
581public:
582 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
583 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
584 void writePlt(uint8_t *buf, const Symbol &sym,
585 uint64_t pltEntryAddr) const override;
586 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
587
588 static const unsigned IBTPltHeaderSize = 16;
589};
590} // namespace
591
592void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
593 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
594 s.getPltIdx(ctx) * pltEntrySize;
595 write32le(P: buf, V: va);
596}
597
598void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
599 uint64_t /*pltEntryAddr*/) const {
600 if (ctx.arg.isPic) {
601 const uint8_t inst[] = {
602 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
603 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
604 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
605 };
606 memcpy(dest: buf, src: inst, n: sizeof(inst));
607 write32le(P: buf + 6, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
608 return;
609 }
610
611 const uint8_t inst[] = {
612 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
613 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
614 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
615 };
616 memcpy(dest: buf, src: inst, n: sizeof(inst));
617 write32le(P: buf + 6, V: sym.getGotPltVA(ctx));
618}
619
620void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
621 writePltHeader(buf);
622 buf += IBTPltHeaderSize;
623
624 const uint8_t inst[] = {
625 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
626 0x68, 0, 0, 0, 0, // pushl $reloc_offset
627 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
628 0x66, 0x90, // nop
629 };
630
631 for (size_t i = 0; i < numEntries; ++i) {
632 memcpy(dest: buf, src: inst, n: sizeof(inst));
633 write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel));
634 write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30);
635 buf += sizeof(inst);
636 }
637}
638
639namespace {
640class RetpolinePic : public X86 {
641public:
642 RetpolinePic(Ctx &);
643 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
644 void writePltHeader(uint8_t *buf) const override;
645 void writePlt(uint8_t *buf, const Symbol &sym,
646 uint64_t pltEntryAddr) const override;
647};
648
649class RetpolineNoPic : public X86 {
650public:
651 RetpolineNoPic(Ctx &);
652 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
653 void writePltHeader(uint8_t *buf) const override;
654 void writePlt(uint8_t *buf, const Symbol &sym,
655 uint64_t pltEntryAddr) const override;
656};
657} // namespace
658
659RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
660 pltHeaderSize = 48;
661 pltEntrySize = 32;
662 ipltEntrySize = 32;
663}
664
665void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
666 write32le(P: buf, V: s.getPltVA(ctx) + 17);
667}
668
669void RetpolinePic::writePltHeader(uint8_t *buf) const {
670 const uint8_t insn[] = {
671 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
672 0x50, // 6: pushl %eax
673 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
674 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
675 0xf3, 0x90, // 12: loop: pause
676 0x0f, 0xae, 0xe8, // 14: lfence
677 0xeb, 0xf9, // 17: jmp loop
678 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
679 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
680 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
681 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
682 0x89, 0xc8, // 2b: mov %ecx, %eax
683 0x59, // 2d: pop %ecx
684 0xc3, // 2e: ret
685 0xcc, // 2f: int3; padding
686 };
687 memcpy(dest: buf, src: insn, n: sizeof(insn));
688}
689
690void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
691 uint64_t pltEntryAddr) const {
692 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
693 const uint8_t insn[] = {
694 0x50, // pushl %eax
695 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
696 0xe8, 0, 0, 0, 0, // call plt+0x20
697 0xe9, 0, 0, 0, 0, // jmp plt+0x12
698 0x68, 0, 0, 0, 0, // pushl $reloc_offset
699 0xe9, 0, 0, 0, 0, // jmp plt+0
700 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
701 };
702 memcpy(dest: buf, src: insn, n: sizeof(insn));
703
704 uint32_t ebx = ctx.in.gotPlt->getVA();
705 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
706 write32le(P: buf + 3, V: sym.getGotPltVA(ctx) - ebx);
707 write32le(P: buf + 8, V: -off - 12 + 32);
708 write32le(P: buf + 13, V: -off - 17 + 18);
709 write32le(P: buf + 18, V: relOff);
710 write32le(P: buf + 23, V: -off - 27);
711}
712
713RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
714 pltHeaderSize = 48;
715 pltEntrySize = 32;
716 ipltEntrySize = 32;
717}
718
719void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
720 write32le(P: buf, V: s.getPltVA(ctx) + 16);
721}
722
723void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
724 const uint8_t insn[] = {
725 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
726 0x50, // 6: pushl %eax
727 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
728 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
729 0xf3, 0x90, // 11: loop: pause
730 0x0f, 0xae, 0xe8, // 13: lfence
731 0xeb, 0xf9, // 16: jmp loop
732 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
733 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
734 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
735 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
736 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
737 0x89, 0xc8, // 2b: mov %ecx, %eax
738 0x59, // 2d: pop %ecx
739 0xc3, // 2e: ret
740 0xcc, // 2f: int3; padding
741 };
742 memcpy(dest: buf, src: insn, n: sizeof(insn));
743
744 uint32_t gotPlt = ctx.in.gotPlt->getVA();
745 write32le(P: buf + 2, V: gotPlt + 4);
746 write32le(P: buf + 8, V: gotPlt + 8);
747}
748
749void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
750 uint64_t pltEntryAddr) const {
751 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
752 const uint8_t insn[] = {
753 0x50, // 0: pushl %eax
754 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
755 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
756 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
757 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
758 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
759 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
760 0xcc, // 1f: int3; padding
761 };
762 memcpy(dest: buf, src: insn, n: sizeof(insn));
763
764 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
765 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
766 write32le(P: buf + 7, V: -off - 11 + 32);
767 write32le(P: buf + 12, V: -off - 16 + 17);
768 write32le(P: buf + 17, V: relOff);
769 write32le(P: buf + 22, V: -off - 26);
770}
771
772void elf::setX86TargetInfo(Ctx &ctx) {
773 if (ctx.arg.zRetpolineplt) {
774 if (ctx.arg.isPic)
775 ctx.target.reset(p: new RetpolinePic(ctx));
776 else
777 ctx.target.reset(p: new RetpolineNoPic(ctx));
778 return;
779 }
780
781 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
782 ctx.target.reset(p: new IntelIBT(ctx));
783 else
784 ctx.target.reset(p: new X86(ctx));
785}
786