1 | //===- X86.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "OutputSections.h" |
10 | #include "Symbols.h" |
11 | #include "SyntheticSections.h" |
12 | #include "Target.h" |
13 | #include "llvm/Support/Endian.h" |
14 | |
15 | using namespace llvm; |
16 | using namespace llvm::support::endian; |
17 | using namespace llvm::ELF; |
18 | using namespace lld; |
19 | using namespace lld::elf; |
20 | |
21 | namespace { |
22 | class X86 : public TargetInfo { |
23 | public: |
24 | X86(Ctx &); |
25 | int getTlsGdRelaxSkip(RelType type) const override; |
26 | RelExpr getRelExpr(RelType type, const Symbol &s, |
27 | const uint8_t *loc) const override; |
28 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
29 | void writeGotPltHeader(uint8_t *buf) const override; |
30 | RelType getDynRel(RelType type) const override; |
31 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
32 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
33 | void writePltHeader(uint8_t *buf) const override; |
34 | void writePlt(uint8_t *buf, const Symbol &sym, |
35 | uint64_t pltEntryAddr) const override; |
36 | void relocate(uint8_t *loc, const Relocation &rel, |
37 | uint64_t val) const override; |
38 | |
39 | RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; |
40 | void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; |
41 | |
42 | private: |
43 | void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; |
44 | void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; |
45 | void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; |
46 | void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; |
47 | }; |
48 | } // namespace |
49 | |
50 | X86::X86(Ctx &ctx) : TargetInfo(ctx) { |
51 | copyRel = R_386_COPY; |
52 | gotRel = R_386_GLOB_DAT; |
53 | pltRel = R_386_JUMP_SLOT; |
54 | iRelativeRel = R_386_IRELATIVE; |
55 | relativeRel = R_386_RELATIVE; |
56 | symbolicRel = R_386_32; |
57 | tlsDescRel = R_386_TLS_DESC; |
58 | tlsGotRel = R_386_TLS_TPOFF; |
59 | tlsModuleIndexRel = R_386_TLS_DTPMOD32; |
60 | tlsOffsetRel = R_386_TLS_DTPOFF32; |
61 | gotBaseSymInGotPlt = true; |
62 | pltHeaderSize = 16; |
63 | pltEntrySize = 16; |
64 | ipltEntrySize = 16; |
65 | trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 |
66 | |
67 | // Align to the non-PAE large page size (known as a superpage or huge page). |
68 | // FreeBSD automatically promotes large, superpage-aligned allocations. |
69 | defaultImageBase = 0x400000; |
70 | } |
71 | |
72 | int X86::getTlsGdRelaxSkip(RelType type) const { |
73 | // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. |
74 | return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; |
75 | } |
76 | |
77 | RelExpr X86::getRelExpr(RelType type, const Symbol &s, |
78 | const uint8_t *loc) const { |
79 | switch (type) { |
80 | case R_386_8: |
81 | case R_386_16: |
82 | case R_386_32: |
83 | return R_ABS; |
84 | case R_386_TLS_LDO_32: |
85 | return R_DTPREL; |
86 | case R_386_TLS_GD: |
87 | return R_TLSGD_GOTPLT; |
88 | case R_386_TLS_LDM: |
89 | return R_TLSLD_GOTPLT; |
90 | case R_386_PLT32: |
91 | return R_PLT_PC; |
92 | case R_386_PC8: |
93 | case R_386_PC16: |
94 | case R_386_PC32: |
95 | return R_PC; |
96 | case R_386_GOTPC: |
97 | return R_GOTPLTONLY_PC; |
98 | case R_386_TLS_IE: |
99 | return R_GOT; |
100 | case R_386_GOT32: |
101 | case R_386_GOT32X: |
102 | // These relocations are arguably mis-designed because their calculations |
103 | // depend on the instructions they are applied to. This is bad because we |
104 | // usually don't care about whether the target section contains valid |
105 | // machine instructions or not. But this is part of the documented ABI, so |
106 | // we had to implement as the standard requires. |
107 | // |
108 | // x86 does not support PC-relative data access. Therefore, in order to |
109 | // access GOT contents, a GOT address needs to be known at link-time |
110 | // (which means non-PIC) or compilers have to emit code to get a GOT |
111 | // address at runtime (which means code is position-independent but |
112 | // compilers need to emit extra code for each GOT access.) This decision |
113 | // is made at compile-time. In the latter case, compilers emit code to |
114 | // load a GOT address to a register, which is usually %ebx. |
115 | // |
116 | // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or |
117 | // foo@GOT(%ebx). |
118 | // |
119 | // foo@GOT is not usable in PIC. If we are creating a PIC output and if we |
120 | // find such relocation, we should report an error. foo@GOT is resolved to |
121 | // an *absolute* address of foo's GOT entry, because both GOT address and |
122 | // foo's offset are known. In other words, it's G + A. |
123 | // |
124 | // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to |
125 | // foo's GOT entry in the table, because GOT address is not known but foo's |
126 | // offset in the table is known. It's G + A - GOT. |
127 | // |
128 | // It's unfortunate that compilers emit the same relocation for these |
129 | // different use cases. In order to distinguish them, we have to read a |
130 | // machine instruction. |
131 | // |
132 | // The following code implements it. We assume that Loc[0] is the first byte |
133 | // of a displacement or an immediate field of a valid machine |
134 | // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at |
135 | // the byte, we can determine whether the instruction uses the operand as an |
136 | // absolute address (R_GOT) or a register-relative address (R_GOTPLT). |
137 | return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; |
138 | case R_386_TLS_GOTDESC: |
139 | return R_TLSDESC_GOTPLT; |
140 | case R_386_TLS_DESC_CALL: |
141 | return R_TLSDESC_CALL; |
142 | case R_386_TLS_GOTIE: |
143 | return R_GOTPLT; |
144 | case R_386_GOTOFF: |
145 | return R_GOTPLTREL; |
146 | case R_386_TLS_LE: |
147 | return R_TPREL; |
148 | case R_386_TLS_LE_32: |
149 | return R_TPREL_NEG; |
150 | case R_386_NONE: |
151 | return R_NONE; |
152 | default: |
153 | Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v |
154 | << ") against symbol " << &s; |
155 | return R_NONE; |
156 | } |
157 | } |
158 | |
159 | RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { |
160 | switch (expr) { |
161 | default: |
162 | return expr; |
163 | case R_RELAX_TLS_GD_TO_IE: |
164 | return R_RELAX_TLS_GD_TO_IE_GOTPLT; |
165 | case R_RELAX_TLS_GD_TO_LE: |
166 | return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG |
167 | : R_RELAX_TLS_GD_TO_LE; |
168 | } |
169 | } |
170 | |
171 | void X86::(uint8_t *buf) const { |
172 | write32le(P: buf, V: ctx.mainPart->dynamic->getVA()); |
173 | } |
174 | |
175 | void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
176 | // Entries in .got.plt initially points back to the corresponding |
177 | // PLT entries with a fixed offset to skip the first instruction. |
178 | write32le(P: buf, V: s.getPltVA(ctx) + 6); |
179 | } |
180 | |
181 | void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
182 | // An x86 entry is the address of the ifunc resolver function. |
183 | write32le(P: buf, V: s.getVA(ctx)); |
184 | } |
185 | |
186 | RelType X86::getDynRel(RelType type) const { |
187 | if (type == R_386_TLS_LE) |
188 | return R_386_TLS_TPOFF; |
189 | if (type == R_386_TLS_LE_32) |
190 | return R_386_TLS_TPOFF32; |
191 | return type; |
192 | } |
193 | |
194 | void X86::(uint8_t *buf) const { |
195 | if (ctx.arg.isPic) { |
196 | const uint8_t v[] = { |
197 | 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) |
198 | 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) |
199 | 0x90, 0x90, 0x90, 0x90 // nop |
200 | }; |
201 | memcpy(dest: buf, src: v, n: sizeof(v)); |
202 | return; |
203 | } |
204 | |
205 | const uint8_t pltData[] = { |
206 | 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) |
207 | 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) |
208 | 0x90, 0x90, 0x90, 0x90, // nop |
209 | }; |
210 | memcpy(dest: buf, src: pltData, n: sizeof(pltData)); |
211 | uint32_t gotPlt = ctx.in.gotPlt->getVA(); |
212 | write32le(P: buf + 2, V: gotPlt + 4); |
213 | write32le(P: buf + 8, V: gotPlt + 8); |
214 | } |
215 | |
216 | void X86::writePlt(uint8_t *buf, const Symbol &sym, |
217 | uint64_t pltEntryAddr) const { |
218 | unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); |
219 | if (ctx.arg.isPic) { |
220 | const uint8_t inst[] = { |
221 | 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) |
222 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
223 | 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC |
224 | }; |
225 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
226 | write32le(P: buf + 2, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); |
227 | } else { |
228 | const uint8_t inst[] = { |
229 | 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT |
230 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
231 | 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC |
232 | }; |
233 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
234 | write32le(P: buf + 2, V: sym.getGotPltVA(ctx)); |
235 | } |
236 | |
237 | write32le(P: buf + 7, V: relOff); |
238 | write32le(P: buf + 12, V: ctx.in.plt->getVA() - pltEntryAddr - 16); |
239 | } |
240 | |
241 | int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { |
242 | switch (type) { |
243 | case R_386_8: |
244 | case R_386_PC8: |
245 | return SignExtend64<8>(x: *buf); |
246 | case R_386_16: |
247 | case R_386_PC16: |
248 | return SignExtend64<16>(x: read16le(P: buf)); |
249 | case R_386_32: |
250 | case R_386_GLOB_DAT: |
251 | case R_386_GOT32: |
252 | case R_386_GOT32X: |
253 | case R_386_GOTOFF: |
254 | case R_386_GOTPC: |
255 | case R_386_IRELATIVE: |
256 | case R_386_PC32: |
257 | case R_386_PLT32: |
258 | case R_386_RELATIVE: |
259 | case R_386_TLS_GOTDESC: |
260 | case R_386_TLS_DESC_CALL: |
261 | case R_386_TLS_DTPMOD32: |
262 | case R_386_TLS_DTPOFF32: |
263 | case R_386_TLS_LDO_32: |
264 | case R_386_TLS_LDM: |
265 | case R_386_TLS_IE: |
266 | case R_386_TLS_IE_32: |
267 | case R_386_TLS_LE: |
268 | case R_386_TLS_LE_32: |
269 | case R_386_TLS_GD: |
270 | case R_386_TLS_GD_32: |
271 | case R_386_TLS_GOTIE: |
272 | case R_386_TLS_TPOFF: |
273 | case R_386_TLS_TPOFF32: |
274 | return SignExtend64<32>(x: read32le(P: buf)); |
275 | case R_386_TLS_DESC: |
276 | return SignExtend64<32>(x: read32le(P: buf + 4)); |
277 | case R_386_NONE: |
278 | case R_386_JUMP_SLOT: |
279 | // These relocations are defined as not having an implicit addend. |
280 | return 0; |
281 | default: |
282 | InternalErr(ctx, buf) << "cannot read addend for relocation " << type; |
283 | return 0; |
284 | } |
285 | } |
286 | |
287 | void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { |
288 | switch (rel.type) { |
289 | case R_386_8: |
290 | // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are |
291 | // being used for some 16-bit programs such as boot loaders, so |
292 | // we want to support them. |
293 | checkIntUInt(ctx, loc, v: val, n: 8, rel); |
294 | *loc = val; |
295 | break; |
296 | case R_386_PC8: |
297 | checkInt(ctx, loc, v: val, n: 8, rel); |
298 | *loc = val; |
299 | break; |
300 | case R_386_16: |
301 | checkIntUInt(ctx, loc, v: val, n: 16, rel); |
302 | write16le(P: loc, V: val); |
303 | break; |
304 | case R_386_PC16: |
305 | // R_386_PC16 is normally used with 16 bit code. In that situation |
306 | // the PC is 16 bits, just like the addend. This means that it can |
307 | // point from any 16 bit address to any other if the possibility |
308 | // of wrapping is included. |
309 | // The only restriction we have to check then is that the destination |
310 | // address fits in 16 bits. That is impossible to do here. The problem is |
311 | // that we are passed the final value, which already had the |
312 | // current location subtracted from it. |
313 | // We just check that Val fits in 17 bits. This misses some cases, but |
314 | // should have no false positives. |
315 | checkInt(ctx, loc, v: val, n: 17, rel); |
316 | write16le(P: loc, V: val); |
317 | break; |
318 | case R_386_32: |
319 | case R_386_GOT32: |
320 | case R_386_GOT32X: |
321 | case R_386_GOTOFF: |
322 | case R_386_GOTPC: |
323 | case R_386_PC32: |
324 | case R_386_PLT32: |
325 | case R_386_RELATIVE: |
326 | case R_386_TLS_GOTDESC: |
327 | case R_386_TLS_DESC_CALL: |
328 | case R_386_TLS_DTPMOD32: |
329 | case R_386_TLS_DTPOFF32: |
330 | case R_386_TLS_GD: |
331 | case R_386_TLS_GOTIE: |
332 | case R_386_TLS_IE: |
333 | case R_386_TLS_LDM: |
334 | case R_386_TLS_LDO_32: |
335 | case R_386_TLS_LE: |
336 | case R_386_TLS_LE_32: |
337 | case R_386_TLS_TPOFF: |
338 | case R_386_TLS_TPOFF32: |
339 | checkInt(ctx, loc, v: val, n: 32, rel); |
340 | write32le(P: loc, V: val); |
341 | break; |
342 | case R_386_TLS_DESC: |
343 | // The addend is stored in the second 32-bit word. |
344 | write32le(P: loc + 4, V: val); |
345 | break; |
346 | default: |
347 | llvm_unreachable("unknown relocation" ); |
348 | } |
349 | } |
350 | |
351 | void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, |
352 | uint64_t val) const { |
353 | if (rel.type == R_386_TLS_GD) { |
354 | // Convert (loc[-2] == 0x04) |
355 | // leal x@tlsgd(, %ebx, 1), %eax |
356 | // call ___tls_get_addr@plt |
357 | // or |
358 | // leal x@tlsgd(%reg), %eax |
359 | // call *___tls_get_addr@got(%reg) |
360 | // to |
361 | const uint8_t inst[] = { |
362 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax |
363 | 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax |
364 | }; |
365 | uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; |
366 | memcpy(dest: w, src: inst, n: sizeof(inst)); |
367 | write32le(P: w + 8, V: val); |
368 | } else if (rel.type == R_386_TLS_GOTDESC) { |
369 | // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. |
370 | // |
371 | // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. |
372 | if (memcmp(s1: loc - 2, s2: "\x8d\x83" , n: 2)) { |
373 | ErrAlways(ctx) |
374 | << getErrorLoc(ctx, loc: loc - 2) |
375 | << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax" ; |
376 | return; |
377 | } |
378 | loc[-1] = 0x05; |
379 | write32le(P: loc, V: val); |
380 | } else { |
381 | // Convert call *x@tlsdesc(%eax) to xchg ax, ax. |
382 | assert(rel.type == R_386_TLS_DESC_CALL); |
383 | loc[0] = 0x66; |
384 | loc[1] = 0x90; |
385 | } |
386 | } |
387 | |
388 | void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, |
389 | uint64_t val) const { |
390 | if (rel.type == R_386_TLS_GD) { |
391 | // Convert (loc[-2] == 0x04) |
392 | // leal x@tlsgd(, %ebx, 1), %eax |
393 | // call ___tls_get_addr@plt |
394 | // or |
395 | // leal x@tlsgd(%reg), %eax |
396 | // call *___tls_get_addr@got(%reg) |
397 | const uint8_t inst[] = { |
398 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax |
399 | 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax |
400 | }; |
401 | uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; |
402 | memcpy(dest: w, src: inst, n: sizeof(inst)); |
403 | write32le(P: w + 8, V: val); |
404 | } else if (rel.type == R_386_TLS_GOTDESC) { |
405 | // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. |
406 | if (memcmp(s1: loc - 2, s2: "\x8d\x83" , n: 2)) { |
407 | ErrAlways(ctx) |
408 | << getErrorLoc(ctx, loc: loc - 2) |
409 | << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax" ; |
410 | return; |
411 | } |
412 | loc[-2] = 0x8b; |
413 | write32le(P: loc, V: val); |
414 | } else { |
415 | // Convert call *x@tlsdesc(%eax) to xchg ax, ax. |
416 | assert(rel.type == R_386_TLS_DESC_CALL); |
417 | loc[0] = 0x66; |
418 | loc[1] = 0x90; |
419 | } |
420 | } |
421 | |
422 | // In some conditions, relocations can be optimized to avoid using GOT. |
423 | // This function does that for Initial Exec to Local Exec case. |
424 | void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, |
425 | uint64_t val) const { |
426 | // Ulrich's document section 6.2 says that @gotntpoff can |
427 | // be used with MOVL or ADDL instructions. |
428 | // @indntpoff is similar to @gotntpoff, but for use in |
429 | // position dependent code. |
430 | uint8_t reg = (loc[-1] >> 3) & 7; |
431 | |
432 | if (rel.type == R_386_TLS_IE) { |
433 | if (loc[-1] == 0xa1) { |
434 | // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" |
435 | // This case is different from the generic case below because |
436 | // this is a 5 byte instruction while below is 6 bytes. |
437 | loc[-1] = 0xb8; |
438 | } else if (loc[-2] == 0x8b) { |
439 | // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" |
440 | loc[-2] = 0xc7; |
441 | loc[-1] = 0xc0 | reg; |
442 | } else { |
443 | // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" |
444 | loc[-2] = 0x81; |
445 | loc[-1] = 0xc0 | reg; |
446 | } |
447 | } else { |
448 | assert(rel.type == R_386_TLS_GOTIE); |
449 | if (loc[-2] == 0x8b) { |
450 | // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" |
451 | loc[-2] = 0xc7; |
452 | loc[-1] = 0xc0 | reg; |
453 | } else { |
454 | // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" |
455 | loc[-2] = 0x8d; |
456 | loc[-1] = 0x80 | (reg << 3) | reg; |
457 | } |
458 | } |
459 | write32le(P: loc, V: val); |
460 | } |
461 | |
462 | void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, |
463 | uint64_t val) const { |
464 | if (rel.type == R_386_TLS_LDO_32) { |
465 | write32le(P: loc, V: val); |
466 | return; |
467 | } |
468 | |
469 | if (loc[4] == 0xe8) { |
470 | // Convert |
471 | // leal x(%reg),%eax |
472 | // call ___tls_get_addr@plt |
473 | // to |
474 | const uint8_t inst[] = { |
475 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax |
476 | 0x90, // nop |
477 | 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi |
478 | }; |
479 | memcpy(dest: loc - 2, src: inst, n: sizeof(inst)); |
480 | return; |
481 | } |
482 | |
483 | // Convert |
484 | // leal x(%reg),%eax |
485 | // call *___tls_get_addr@got(%reg) |
486 | // to |
487 | const uint8_t inst[] = { |
488 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax |
489 | 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi |
490 | }; |
491 | memcpy(dest: loc - 2, src: inst, n: sizeof(inst)); |
492 | } |
493 | |
494 | void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { |
495 | uint64_t secAddr = sec.getOutputSection()->addr; |
496 | if (auto *s = dyn_cast<InputSection>(Val: &sec)) |
497 | secAddr += s->outSecOff; |
498 | for (const Relocation &rel : sec.relocs()) { |
499 | uint8_t *loc = buf + rel.offset; |
500 | const uint64_t val = |
501 | SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: 32); |
502 | switch (rel.expr) { |
503 | case R_RELAX_TLS_GD_TO_IE_GOTPLT: |
504 | relaxTlsGdToIe(loc, rel, val); |
505 | continue; |
506 | case R_RELAX_TLS_GD_TO_LE: |
507 | case R_RELAX_TLS_GD_TO_LE_NEG: |
508 | relaxTlsGdToLe(loc, rel, val); |
509 | continue; |
510 | case R_RELAX_TLS_LD_TO_LE: |
511 | relaxTlsLdToLe(loc, rel, val); |
512 | break; |
513 | case R_RELAX_TLS_IE_TO_LE: |
514 | relaxTlsIeToLe(loc, rel, val); |
515 | continue; |
516 | default: |
517 | relocate(loc, rel, val); |
518 | break; |
519 | } |
520 | } |
521 | } |
522 | |
523 | // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT |
524 | // entries containing endbr32 instructions. A PLT entry will be split into two |
525 | // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). |
526 | namespace { |
527 | class IntelIBT : public X86 { |
528 | public: |
529 | IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; } |
530 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
531 | void writePlt(uint8_t *buf, const Symbol &sym, |
532 | uint64_t pltEntryAddr) const override; |
533 | void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; |
534 | |
535 | static const unsigned = 16; |
536 | }; |
537 | } // namespace |
538 | |
539 | void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
540 | uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize + |
541 | s.getPltIdx(ctx) * pltEntrySize; |
542 | write32le(P: buf, V: va); |
543 | } |
544 | |
545 | void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, |
546 | uint64_t /*pltEntryAddr*/) const { |
547 | if (ctx.arg.isPic) { |
548 | const uint8_t inst[] = { |
549 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
550 | 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) |
551 | 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop |
552 | }; |
553 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
554 | write32le(P: buf + 6, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); |
555 | return; |
556 | } |
557 | |
558 | const uint8_t inst[] = { |
559 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
560 | 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT |
561 | 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop |
562 | }; |
563 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
564 | write32le(P: buf + 6, V: sym.getGotPltVA(ctx)); |
565 | } |
566 | |
567 | void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { |
568 | writePltHeader(buf); |
569 | buf += IBTPltHeaderSize; |
570 | |
571 | const uint8_t inst[] = { |
572 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
573 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
574 | 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC |
575 | 0x66, 0x90, // nop |
576 | }; |
577 | |
578 | for (size_t i = 0; i < numEntries; ++i) { |
579 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
580 | write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel)); |
581 | write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30); |
582 | buf += sizeof(inst); |
583 | } |
584 | } |
585 | |
586 | namespace { |
587 | class RetpolinePic : public X86 { |
588 | public: |
589 | RetpolinePic(Ctx &); |
590 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
591 | void writePltHeader(uint8_t *buf) const override; |
592 | void writePlt(uint8_t *buf, const Symbol &sym, |
593 | uint64_t pltEntryAddr) const override; |
594 | }; |
595 | |
596 | class RetpolineNoPic : public X86 { |
597 | public: |
598 | RetpolineNoPic(Ctx &); |
599 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
600 | void writePltHeader(uint8_t *buf) const override; |
601 | void writePlt(uint8_t *buf, const Symbol &sym, |
602 | uint64_t pltEntryAddr) const override; |
603 | }; |
604 | } // namespace |
605 | |
606 | RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) { |
607 | pltHeaderSize = 48; |
608 | pltEntrySize = 32; |
609 | ipltEntrySize = 32; |
610 | } |
611 | |
612 | void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
613 | write32le(P: buf, V: s.getPltVA(ctx) + 17); |
614 | } |
615 | |
616 | void RetpolinePic::(uint8_t *buf) const { |
617 | const uint8_t insn[] = { |
618 | 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) |
619 | 0x50, // 6: pushl %eax |
620 | 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax |
621 | 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next |
622 | 0xf3, 0x90, // 12: loop: pause |
623 | 0x0f, 0xae, 0xe8, // 14: lfence |
624 | 0xeb, 0xf9, // 17: jmp loop |
625 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 |
626 | 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) |
627 | 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx |
628 | 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) |
629 | 0x89, 0xc8, // 2b: mov %ecx, %eax |
630 | 0x59, // 2d: pop %ecx |
631 | 0xc3, // 2e: ret |
632 | 0xcc, // 2f: int3; padding |
633 | }; |
634 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
635 | } |
636 | |
637 | void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, |
638 | uint64_t pltEntryAddr) const { |
639 | unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); |
640 | const uint8_t insn[] = { |
641 | 0x50, // pushl %eax |
642 | 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax |
643 | 0xe8, 0, 0, 0, 0, // call plt+0x20 |
644 | 0xe9, 0, 0, 0, 0, // jmp plt+0x12 |
645 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
646 | 0xe9, 0, 0, 0, 0, // jmp plt+0 |
647 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding |
648 | }; |
649 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
650 | |
651 | uint32_t ebx = ctx.in.gotPlt->getVA(); |
652 | unsigned off = pltEntryAddr - ctx.in.plt->getVA(); |
653 | write32le(P: buf + 3, V: sym.getGotPltVA(ctx) - ebx); |
654 | write32le(P: buf + 8, V: -off - 12 + 32); |
655 | write32le(P: buf + 13, V: -off - 17 + 18); |
656 | write32le(P: buf + 18, V: relOff); |
657 | write32le(P: buf + 23, V: -off - 27); |
658 | } |
659 | |
660 | RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) { |
661 | pltHeaderSize = 48; |
662 | pltEntrySize = 32; |
663 | ipltEntrySize = 32; |
664 | } |
665 | |
666 | void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
667 | write32le(P: buf, V: s.getPltVA(ctx) + 16); |
668 | } |
669 | |
670 | void RetpolineNoPic::(uint8_t *buf) const { |
671 | const uint8_t insn[] = { |
672 | 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 |
673 | 0x50, // 6: pushl %eax |
674 | 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax |
675 | 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next |
676 | 0xf3, 0x90, // 11: loop: pause |
677 | 0x0f, 0xae, 0xe8, // 13: lfence |
678 | 0xeb, 0xf9, // 16: jmp loop |
679 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 |
680 | 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 |
681 | 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) |
682 | 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx |
683 | 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) |
684 | 0x89, 0xc8, // 2b: mov %ecx, %eax |
685 | 0x59, // 2d: pop %ecx |
686 | 0xc3, // 2e: ret |
687 | 0xcc, // 2f: int3; padding |
688 | }; |
689 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
690 | |
691 | uint32_t gotPlt = ctx.in.gotPlt->getVA(); |
692 | write32le(P: buf + 2, V: gotPlt + 4); |
693 | write32le(P: buf + 8, V: gotPlt + 8); |
694 | } |
695 | |
696 | void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, |
697 | uint64_t pltEntryAddr) const { |
698 | unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); |
699 | const uint8_t insn[] = { |
700 | 0x50, // 0: pushl %eax |
701 | 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax |
702 | 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 |
703 | 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 |
704 | 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset |
705 | 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 |
706 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding |
707 | 0xcc, // 1f: int3; padding |
708 | }; |
709 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
710 | |
711 | unsigned off = pltEntryAddr - ctx.in.plt->getVA(); |
712 | write32le(P: buf + 2, V: sym.getGotPltVA(ctx)); |
713 | write32le(P: buf + 7, V: -off - 11 + 32); |
714 | write32le(P: buf + 12, V: -off - 16 + 17); |
715 | write32le(P: buf + 17, V: relOff); |
716 | write32le(P: buf + 22, V: -off - 26); |
717 | } |
718 | |
719 | void elf::setX86TargetInfo(Ctx &ctx) { |
720 | if (ctx.arg.zRetpolineplt) { |
721 | if (ctx.arg.isPic) |
722 | ctx.target.reset(p: new RetpolinePic(ctx)); |
723 | else |
724 | ctx.target.reset(p: new RetpolineNoPic(ctx)); |
725 | return; |
726 | } |
727 | |
728 | if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) |
729 | ctx.target.reset(p: new IntelIBT(ctx)); |
730 | else |
731 | ctx.target.reset(p: new X86(ctx)); |
732 | } |
733 | |