1//===- X86.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "OutputSections.h"
10#include "RelocScan.h"
11#include "Symbols.h"
12#include "SyntheticSections.h"
13#include "Target.h"
14#include "llvm/Support/Endian.h"
15
16using namespace llvm;
17using namespace llvm::support::endian;
18using namespace llvm::ELF;
19using namespace lld;
20using namespace lld::elf;
21
22namespace {
23class X86 : public TargetInfo {
24public:
25 X86(Ctx &);
26 void initTargetSpecificSections() override;
27 RelExpr getRelExpr(RelType type, const Symbol &s,
28 const uint8_t *loc) const override;
29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30 void writeGotPltHeader(uint8_t *buf) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writePltHeader(uint8_t *buf) const override;
35 void writePlt(uint8_t *buf, const Symbol &sym,
36 uint64_t pltEntryAddr) const override;
37 void relocate(uint8_t *loc, const Relocation &rel,
38 uint64_t val) const override;
39 template <class ELFT, class RelTy>
40 void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
41 void scanSection(InputSectionBase &sec) override;
42 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
43
44private:
45 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
49};
50} // namespace
51
52X86::X86(Ctx &ctx) : TargetInfo(ctx) {
53 copyRel = R_386_COPY;
54 gotRel = R_386_GLOB_DAT;
55 pltRel = R_386_JUMP_SLOT;
56 iRelativeRel = R_386_IRELATIVE;
57 relativeRel = R_386_RELATIVE;
58 symbolicRel = R_386_32;
59 tlsDescRel = R_386_TLS_DESC;
60 tlsGotRel = R_386_TLS_TPOFF;
61 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
62 tlsOffsetRel = R_386_TLS_DTPOFF32;
63 gotBaseSymInGotPlt = true;
64 pltHeaderSize = 16;
65 pltEntrySize = 16;
66 ipltEntrySize = 16;
67 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68
69 // Align to the non-PAE large page size (known as a superpage or huge page).
70 // FreeBSD automatically promotes large, superpage-aligned allocations.
71 defaultImageBase = 0x400000;
72}
73
74void X86::initTargetSpecificSections() {
75 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
76 ctx.in.ibtPlt = std::make_unique<IBTPltSection>(args&: ctx);
77 ctx.inputSections.push_back(Elt: ctx.in.ibtPlt.get());
78 }
79}
80
81// Only needed to support relocations used by relocateNonAlloc and relocateEh.
82RelExpr X86::getRelExpr(RelType type, const Symbol &s,
83 const uint8_t *loc) const {
84 switch (type) {
85 case R_386_8:
86 case R_386_16:
87 case R_386_32:
88 return R_ABS;
89 case R_386_TLS_LDO_32:
90 return R_DTPREL;
91 case R_386_PC8:
92 case R_386_PC16:
93 case R_386_PC32:
94 return R_PC;
95 case R_386_GOTPC:
96 return R_GOTPLTONLY_PC;
97 case R_386_GOTOFF:
98 return R_GOTPLTREL;
99 case R_386_NONE:
100 return R_NONE;
101 default:
102 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
103 << ") against symbol " << &s;
104 return R_NONE;
105 }
106}
107
108void X86::writeGotPltHeader(uint8_t *buf) const {
109 write32le(P: buf, V: ctx.mainPart->dynamic->getVA());
110}
111
112void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
113 // Entries in .got.plt initially points back to the corresponding
114 // PLT entries with a fixed offset to skip the first instruction.
115 write32le(P: buf, V: s.getPltVA(ctx) + 6);
116}
117
118void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
119 // An x86 entry is the address of the ifunc resolver function.
120 write32le(P: buf, V: s.getVA(ctx));
121}
122
123RelType X86::getDynRel(RelType type) const {
124 if (type == R_386_TLS_LE)
125 return R_386_TLS_TPOFF;
126 if (type == R_386_TLS_LE_32)
127 return R_386_TLS_TPOFF32;
128 return type;
129}
130
131void X86::writePltHeader(uint8_t *buf) const {
132 if (ctx.arg.isPic) {
133 const uint8_t v[] = {
134 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
135 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
136 0x90, 0x90, 0x90, 0x90 // nop
137 };
138 memcpy(dest: buf, src: v, n: sizeof(v));
139 return;
140 }
141
142 const uint8_t pltData[] = {
143 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
144 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
145 0x90, 0x90, 0x90, 0x90, // nop
146 };
147 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
148 uint32_t gotPlt = ctx.in.gotPlt->getVA();
149 write32le(P: buf + 2, V: gotPlt + 4);
150 write32le(P: buf + 8, V: gotPlt + 8);
151}
152
153void X86::writePlt(uint8_t *buf, const Symbol &sym,
154 uint64_t pltEntryAddr) const {
155 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
156 if (ctx.arg.isPic) {
157 const uint8_t inst[] = {
158 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
159 0x68, 0, 0, 0, 0, // pushl $reloc_offset
160 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
161 };
162 memcpy(dest: buf, src: inst, n: sizeof(inst));
163 write32le(P: buf + 2, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
164 } else {
165 const uint8_t inst[] = {
166 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
167 0x68, 0, 0, 0, 0, // pushl $reloc_offset
168 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
169 };
170 memcpy(dest: buf, src: inst, n: sizeof(inst));
171 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
172 }
173
174 write32le(P: buf + 7, V: relOff);
175 write32le(P: buf + 12, V: ctx.in.plt->getVA() - pltEntryAddr - 16);
176}
177
178template <class ELFT, class RelTy>
179void X86::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
180 RelocScan rs(ctx, &sec);
181 sec.relocations.reserve(N: rels.size());
182
183 for (auto it = rels.begin(); it != rels.end(); ++it) {
184 const RelTy &rel = *it;
185 uint32_t symIdx = rel.getSymbol(false);
186 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
187 uint64_t offset = rel.r_offset;
188 RelType type = rel.getType(false);
189 if (sym.isUndefined() && symIdx != 0 &&
190 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
191 continue;
192 int64_t addend = rs.getAddend<ELFT>(rel, type);
193 RelExpr expr;
194 switch (type) {
195 case R_386_NONE:
196 continue;
197
198 // Absolute relocations:
199 case R_386_8:
200 case R_386_16:
201 case R_386_32:
202 expr = R_ABS;
203 break;
204
205 // PC-relative relocations:
206 case R_386_PC8:
207 case R_386_PC16:
208 case R_386_PC32:
209 rs.processR_PC(type, offset, addend, sym);
210 continue;
211
212 // PLT-generating relocation:
213 case R_386_PLT32:
214 rs.processR_PLT_PC(type, offset, addend, sym);
215 continue;
216
217 // GOT-related relocations:
218 case R_386_GOTPC:
219 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
220 expr = R_GOTPLTONLY_PC;
221 break;
222 case R_386_GOTOFF:
223 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
224 expr = R_GOTPLTREL;
225 break;
226 case R_386_GOT32:
227 case R_386_GOT32X:
228 // R_386_GOT32(X) is used for both absolute GOT access (foo@GOT,
229 // non-PIC, G + A => R_GOT) and register-relative GOT access
230 // (foo@GOT(%ebx), PIC, G + A - GOT => R_GOTPLT). Both use the same
231 // relocation type, so we check the ModRM byte to distinguish them.
232 expr = offset && (sec.content().data()[offset - 1] & 0xc7) == 0x5
233 ? R_GOT
234 : R_GOTPLT;
235 if (expr == R_GOTPLT)
236 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
237 break;
238
239 // TLS relocations:
240 case R_386_TLS_LE:
241 if (rs.checkTlsLe(offset, sym, type))
242 continue;
243 expr = R_TPREL;
244 break;
245 case R_386_TLS_LE_32:
246 if (rs.checkTlsLe(offset, sym, type))
247 continue;
248 expr = R_TPREL_NEG;
249 break;
250 case R_386_TLS_IE:
251 rs.handleTlsIe(ieExpr: R_GOT, type, offset, addend, sym);
252 continue;
253 case R_386_TLS_GOTIE:
254 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
255 rs.handleTlsIe(ieExpr: R_GOTPLT, type, offset, addend, sym);
256 continue;
257 case R_386_TLS_GD:
258 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
259 // Use R_TPREL_NEG for negative TP offset.
260 if (rs.handleTlsGd(sharedExpr: R_TLSGD_GOTPLT, ieExpr: R_GOTPLT, leExpr: R_TPREL_NEG, type, offset,
261 addend, sym))
262 ++it;
263 continue;
264 case R_386_TLS_LDM:
265 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
266 if (rs.handleTlsLd(sharedExpr: R_TLSLD_GOTPLT, type, offset, addend, sym))
267 ++it;
268 continue;
269 case R_386_TLS_LDO_32:
270 sec.addReloc(
271 r: {.expr: ctx.arg.shared ? R_DTPREL : R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
272 continue;
273 case R_386_TLS_GOTDESC:
274 ctx.in.gotPlt->hasGotPltOffRel.store(i: true, m: std::memory_order_relaxed);
275 rs.handleTlsDesc(sharedExpr: R_TLSDESC_GOTPLT, ieExpr: R_GOTPLT, type, offset, addend, sym);
276 continue;
277 case R_386_TLS_DESC_CALL:
278 // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
279 // rewrites for this relocation are identical.
280 if (!ctx.arg.shared)
281 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
282 continue;
283
284 default:
285 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
286 << "unknown relocation (" << type.v << ") against symbol "
287 << &sym;
288 continue;
289 }
290 rs.process(expr, type, offset, sym, addend);
291 }
292}
293
294void X86::scanSection(InputSectionBase &sec) {
295 elf::scanSection1<X86, ELF32LE>(target&: *this, sec);
296}
297
298int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
299 switch (type) {
300 case R_386_8:
301 case R_386_PC8:
302 return SignExtend64<8>(x: *buf);
303 case R_386_16:
304 case R_386_PC16:
305 return SignExtend64<16>(x: read16le(P: buf));
306 case R_386_32:
307 case R_386_GLOB_DAT:
308 case R_386_GOT32:
309 case R_386_GOT32X:
310 case R_386_GOTOFF:
311 case R_386_GOTPC:
312 case R_386_IRELATIVE:
313 case R_386_PC32:
314 case R_386_PLT32:
315 case R_386_RELATIVE:
316 case R_386_TLS_GOTDESC:
317 case R_386_TLS_DESC_CALL:
318 case R_386_TLS_DTPMOD32:
319 case R_386_TLS_DTPOFF32:
320 case R_386_TLS_LDO_32:
321 case R_386_TLS_LDM:
322 case R_386_TLS_IE:
323 case R_386_TLS_IE_32:
324 case R_386_TLS_LE:
325 case R_386_TLS_LE_32:
326 case R_386_TLS_GD:
327 case R_386_TLS_GD_32:
328 case R_386_TLS_GOTIE:
329 case R_386_TLS_TPOFF:
330 case R_386_TLS_TPOFF32:
331 return SignExtend64<32>(x: read32le(P: buf));
332 case R_386_TLS_DESC:
333 return SignExtend64<32>(x: read32le(P: buf + 4));
334 case R_386_NONE:
335 case R_386_JUMP_SLOT:
336 // These relocations are defined as not having an implicit addend.
337 return 0;
338 default:
339 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
340 return 0;
341 }
342}
343
344void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
345 switch (rel.type) {
346 case R_386_8:
347 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
348 // being used for some 16-bit programs such as boot loaders, so
349 // we want to support them.
350 checkIntUInt(ctx, loc, v: val, n: 8, rel);
351 *loc = val;
352 break;
353 case R_386_PC8:
354 checkInt(ctx, loc, v: val, n: 8, rel);
355 *loc = val;
356 break;
357 case R_386_16:
358 checkIntUInt(ctx, loc, v: val, n: 16, rel);
359 write16le(P: loc, V: val);
360 break;
361 case R_386_PC16:
362 // R_386_PC16 is normally used with 16 bit code. In that situation
363 // the PC is 16 bits, just like the addend. This means that it can
364 // point from any 16 bit address to any other if the possibility
365 // of wrapping is included.
366 // The only restriction we have to check then is that the destination
367 // address fits in 16 bits. That is impossible to do here. The problem is
368 // that we are passed the final value, which already had the
369 // current location subtracted from it.
370 // We just check that Val fits in 17 bits. This misses some cases, but
371 // should have no false positives.
372 checkInt(ctx, loc, v: val, n: 17, rel);
373 write16le(P: loc, V: val);
374 break;
375 case R_386_32:
376 case R_386_GOT32:
377 case R_386_GOT32X:
378 case R_386_GOTOFF:
379 case R_386_GOTPC:
380 case R_386_PC32:
381 case R_386_PLT32:
382 case R_386_RELATIVE:
383 case R_386_TLS_GOTDESC:
384 case R_386_TLS_DESC_CALL:
385 case R_386_TLS_DTPMOD32:
386 case R_386_TLS_DTPOFF32:
387 case R_386_TLS_GD:
388 case R_386_TLS_GOTIE:
389 case R_386_TLS_IE:
390 case R_386_TLS_LDM:
391 case R_386_TLS_LDO_32:
392 case R_386_TLS_LE:
393 case R_386_TLS_LE_32:
394 case R_386_TLS_TPOFF:
395 case R_386_TLS_TPOFF32:
396 checkInt(ctx, loc, v: val, n: 32, rel);
397 write32le(P: loc, V: val);
398 break;
399 case R_386_TLS_DESC:
400 // The addend is stored in the second 32-bit word.
401 write32le(P: loc + 4, V: val);
402 break;
403 default:
404 llvm_unreachable("unknown relocation");
405 }
406}
407
408void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
409 uint64_t val) const {
410 if (rel.type == R_386_TLS_GD) {
411 // Convert (loc[-2] == 0x04)
412 // leal x@tlsgd(, %ebx, 1), %eax
413 // call ___tls_get_addr@plt
414 // or
415 // leal x@tlsgd(%reg), %eax
416 // call *___tls_get_addr@got(%reg)
417 // to
418 const uint8_t inst[] = {
419 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
420 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
421 };
422 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
423 memcpy(dest: w, src: inst, n: sizeof(inst));
424 write32le(P: w + 8, V: val);
425 } else if (rel.type == R_386_TLS_GOTDESC) {
426 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
427 //
428 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
429 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
430 ErrAlways(ctx)
431 << getErrorLoc(ctx, loc: loc - 2)
432 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
433 return;
434 }
435 loc[-1] = 0x05;
436 write32le(P: loc, V: val);
437 } else {
438 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
439 assert(rel.type == R_386_TLS_DESC_CALL);
440 loc[0] = 0x66;
441 loc[1] = 0x90;
442 }
443}
444
445void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
446 uint64_t val) const {
447 if (rel.type == R_386_TLS_GD) {
448 // Convert (loc[-2] == 0x04)
449 // leal x@tlsgd(, %ebx, 1), %eax
450 // call ___tls_get_addr@plt
451 // or
452 // leal x@tlsgd(%reg), %eax
453 // call *___tls_get_addr@got(%reg)
454 const uint8_t inst[] = {
455 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
456 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
457 };
458 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
459 memcpy(dest: w, src: inst, n: sizeof(inst));
460 write32le(P: w + 8, V: val);
461 } else if (rel.type == R_386_TLS_GOTDESC) {
462 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
463 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
464 ErrAlways(ctx)
465 << getErrorLoc(ctx, loc: loc - 2)
466 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
467 return;
468 }
469 loc[-2] = 0x8b;
470 write32le(P: loc, V: val);
471 }
472}
473
474// In some conditions, relocations can be optimized to avoid using GOT.
475// This function does that for Initial Exec to Local Exec case.
476void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
477 uint64_t val) const {
478 // Ulrich's document section 6.2 says that @gotntpoff can
479 // be used with MOVL or ADDL instructions.
480 // @indntpoff is similar to @gotntpoff, but for use in
481 // position dependent code.
482 uint8_t reg = (loc[-1] >> 3) & 7;
483
484 if (rel.type == R_386_TLS_IE) {
485 if (loc[-1] == 0xa1) {
486 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
487 // This case is different from the generic case below because
488 // this is a 5 byte instruction while below is 6 bytes.
489 loc[-1] = 0xb8;
490 } else if (loc[-2] == 0x8b) {
491 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
492 loc[-2] = 0xc7;
493 loc[-1] = 0xc0 | reg;
494 } else {
495 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
496 loc[-2] = 0x81;
497 loc[-1] = 0xc0 | reg;
498 }
499 } else {
500 assert(rel.type == R_386_TLS_GOTIE);
501 if (loc[-2] == 0x8b) {
502 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
503 loc[-2] = 0xc7;
504 loc[-1] = 0xc0 | reg;
505 } else {
506 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
507 loc[-2] = 0x8d;
508 loc[-1] = 0x80 | (reg << 3) | reg;
509 }
510 }
511 write32le(P: loc, V: val);
512}
513
514void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
515 uint64_t val) const {
516 if (rel.type == R_386_TLS_LDO_32) {
517 write32le(P: loc, V: val);
518 return;
519 }
520
521 if (loc[4] == 0xe8) {
522 // Convert
523 // leal x(%reg),%eax
524 // call ___tls_get_addr@plt
525 // to
526 const uint8_t inst[] = {
527 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
528 0x90, // nop
529 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
530 };
531 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
532 return;
533 }
534
535 // Convert
536 // leal x(%reg),%eax
537 // call *___tls_get_addr@got(%reg)
538 // to
539 const uint8_t inst[] = {
540 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
541 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
542 };
543 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
544}
545
546void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
547 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
548 for (const Relocation &rel : sec.relocs()) {
549 uint8_t *loc = buf + rel.offset;
550 const uint64_t val =
551 SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: 32);
552 switch (rel.type) {
553 case R_386_TLS_GD:
554 case R_386_TLS_GOTDESC:
555 case R_386_TLS_DESC_CALL:
556 if (rel.expr == R_TPREL || rel.expr == R_TPREL_NEG)
557 relaxTlsGdToLe(loc, rel, val);
558 else if (rel.expr == R_GOTPLT)
559 relaxTlsGdToIe(loc, rel, val);
560 else
561 relocate(loc, rel, val);
562 continue;
563 case R_386_TLS_LDM:
564 case R_386_TLS_LDO_32:
565 if (rel.expr == R_TPREL)
566 relaxTlsLdToLe(loc, rel, val);
567 else
568 relocate(loc, rel, val);
569 continue;
570 case R_386_TLS_IE:
571 case R_386_TLS_GOTIE:
572 if (rel.expr == R_TPREL)
573 relaxTlsIeToLe(loc, rel, val);
574 else
575 relocate(loc, rel, val);
576 continue;
577 default:
578 relocate(loc, rel, val);
579 break;
580 }
581 }
582}
583
584// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
585// entries containing endbr32 instructions. A PLT entry will be split into two
586// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
587namespace {
588class IntelIBT : public X86 {
589public:
590 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
591 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
592 void writePlt(uint8_t *buf, const Symbol &sym,
593 uint64_t pltEntryAddr) const override;
594 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
595
596 static const unsigned IBTPltHeaderSize = 16;
597};
598} // namespace
599
600void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
601 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
602 s.getPltIdx(ctx) * pltEntrySize;
603 write32le(P: buf, V: va);
604}
605
606void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
607 uint64_t /*pltEntryAddr*/) const {
608 if (ctx.arg.isPic) {
609 const uint8_t inst[] = {
610 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
611 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
612 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
613 };
614 memcpy(dest: buf, src: inst, n: sizeof(inst));
615 write32le(P: buf + 6, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
616 return;
617 }
618
619 const uint8_t inst[] = {
620 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
621 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
622 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
623 };
624 memcpy(dest: buf, src: inst, n: sizeof(inst));
625 write32le(P: buf + 6, V: sym.getGotPltVA(ctx));
626}
627
628void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
629 writePltHeader(buf);
630 buf += IBTPltHeaderSize;
631
632 const uint8_t inst[] = {
633 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
634 0x68, 0, 0, 0, 0, // pushl $reloc_offset
635 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
636 0x66, 0x90, // nop
637 };
638
639 for (size_t i = 0; i < numEntries; ++i) {
640 memcpy(dest: buf, src: inst, n: sizeof(inst));
641 write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel));
642 write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30);
643 buf += sizeof(inst);
644 }
645}
646
647namespace {
648class RetpolinePic : public X86 {
649public:
650 RetpolinePic(Ctx &);
651 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
652 void writePltHeader(uint8_t *buf) const override;
653 void writePlt(uint8_t *buf, const Symbol &sym,
654 uint64_t pltEntryAddr) const override;
655};
656
657class RetpolineNoPic : public X86 {
658public:
659 RetpolineNoPic(Ctx &);
660 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
661 void writePltHeader(uint8_t *buf) const override;
662 void writePlt(uint8_t *buf, const Symbol &sym,
663 uint64_t pltEntryAddr) const override;
664};
665} // namespace
666
667RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
668 pltHeaderSize = 48;
669 pltEntrySize = 32;
670 ipltEntrySize = 32;
671}
672
673void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
674 write32le(P: buf, V: s.getPltVA(ctx) + 17);
675}
676
677void RetpolinePic::writePltHeader(uint8_t *buf) const {
678 const uint8_t insn[] = {
679 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
680 0x50, // 6: pushl %eax
681 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
682 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
683 0xf3, 0x90, // 12: loop: pause
684 0x0f, 0xae, 0xe8, // 14: lfence
685 0xeb, 0xf9, // 17: jmp loop
686 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
687 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
688 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
689 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
690 0x89, 0xc8, // 2b: mov %ecx, %eax
691 0x59, // 2d: pop %ecx
692 0xc3, // 2e: ret
693 0xcc, // 2f: int3; padding
694 };
695 memcpy(dest: buf, src: insn, n: sizeof(insn));
696}
697
698void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
699 uint64_t pltEntryAddr) const {
700 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
701 const uint8_t insn[] = {
702 0x50, // pushl %eax
703 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
704 0xe8, 0, 0, 0, 0, // call plt+0x20
705 0xe9, 0, 0, 0, 0, // jmp plt+0x12
706 0x68, 0, 0, 0, 0, // pushl $reloc_offset
707 0xe9, 0, 0, 0, 0, // jmp plt+0
708 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
709 };
710 memcpy(dest: buf, src: insn, n: sizeof(insn));
711
712 uint32_t ebx = ctx.in.gotPlt->getVA();
713 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
714 write32le(P: buf + 3, V: sym.getGotPltVA(ctx) - ebx);
715 write32le(P: buf + 8, V: -off - 12 + 32);
716 write32le(P: buf + 13, V: -off - 17 + 18);
717 write32le(P: buf + 18, V: relOff);
718 write32le(P: buf + 23, V: -off - 27);
719}
720
721RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
722 pltHeaderSize = 48;
723 pltEntrySize = 32;
724 ipltEntrySize = 32;
725}
726
727void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
728 write32le(P: buf, V: s.getPltVA(ctx) + 16);
729}
730
731void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
732 const uint8_t insn[] = {
733 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
734 0x50, // 6: pushl %eax
735 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
736 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
737 0xf3, 0x90, // 11: loop: pause
738 0x0f, 0xae, 0xe8, // 13: lfence
739 0xeb, 0xf9, // 16: jmp loop
740 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
741 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
742 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
743 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
744 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
745 0x89, 0xc8, // 2b: mov %ecx, %eax
746 0x59, // 2d: pop %ecx
747 0xc3, // 2e: ret
748 0xcc, // 2f: int3; padding
749 };
750 memcpy(dest: buf, src: insn, n: sizeof(insn));
751
752 uint32_t gotPlt = ctx.in.gotPlt->getVA();
753 write32le(P: buf + 2, V: gotPlt + 4);
754 write32le(P: buf + 8, V: gotPlt + 8);
755}
756
757void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
758 uint64_t pltEntryAddr) const {
759 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
760 const uint8_t insn[] = {
761 0x50, // 0: pushl %eax
762 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
763 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
764 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
765 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
766 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
767 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
768 0xcc, // 1f: int3; padding
769 };
770 memcpy(dest: buf, src: insn, n: sizeof(insn));
771
772 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
773 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
774 write32le(P: buf + 7, V: -off - 11 + 32);
775 write32le(P: buf + 12, V: -off - 16 + 17);
776 write32le(P: buf + 17, V: relOff);
777 write32le(P: buf + 22, V: -off - 26);
778}
779
780void elf::setX86TargetInfo(Ctx &ctx) {
781 if (ctx.arg.zRetpolineplt) {
782 if (ctx.arg.isPic)
783 ctx.target.reset(p: new RetpolinePic(ctx));
784 else
785 ctx.target.reset(p: new RetpolineNoPic(ctx));
786 return;
787 }
788
789 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
790 ctx.target.reset(p: new IntelIBT(ctx));
791 else
792 ctx.target.reset(p: new X86(ctx));
793}
794