1//===- RISCV.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "OutputSections.h"
11#include "RelocScan.h"
12#include "Symbols.h"
13#include "SyntheticSections.h"
14#include "Target.h"
15#include "llvm/Support/ELFAttributes.h"
16#include "llvm/Support/LEB128.h"
17#include "llvm/Support/RISCVAttributeParser.h"
18#include "llvm/Support/RISCVAttributes.h"
19#include "llvm/Support/TimeProfiler.h"
20#include "llvm/TargetParser/RISCVISAInfo.h"
21
22using namespace llvm;
23using namespace llvm::object;
24using namespace llvm::support::endian;
25using namespace llvm::ELF;
26using namespace lld;
27using namespace lld::elf;
28
29namespace {
30
31class RISCV final : public TargetInfo {
32public:
33 RISCV(Ctx &);
34 uint32_t calcEFlags() const override;
35 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
36 void writeGotHeader(uint8_t *buf) const override;
37 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
38 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
39 void writePltHeader(uint8_t *buf) const override;
40 void writePlt(uint8_t *buf, const Symbol &sym,
41 uint64_t pltEntryAddr) const override;
42 template <class ELFT, class RelTy>
43 void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
44 void scanSection(InputSectionBase &) override;
45 RelType getDynRel(RelType type) const override;
46 RelExpr getRelExpr(RelType type, const Symbol &s,
47 const uint8_t *loc) const override;
48 void relocate(uint8_t *loc, const Relocation &rel,
49 uint64_t val) const override;
50 void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
51 bool relaxOnce(int pass) const override;
52 template <class ELFT, class RelTy>
53 bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
54 Relocs<RelTy> rels);
55 template <class ELFT, class RelTy>
56 void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
57 Relocs<RelTy> rels);
58 template <class ELFT>
59 bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
60 bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
61 void finalizeRelax(int passes) const override;
62
63 // The following two variables are used by synthesized ALIGN relocations.
64 InputSection *baseSec = nullptr;
65 // r_offset and r_addend pairs.
66 SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
67};
68
69} // end anonymous namespace
70
71// These are internal relocation numbers for GP/X0 relaxation. They aren't part
72// of the psABI spec.
73#define INTERNAL_R_RISCV_GPREL_I 256
74#define INTERNAL_R_RISCV_GPREL_S 257
75#define INTERNAL_R_RISCV_X0REL_I 258
76#define INTERNAL_R_RISCV_X0REL_S 259
77
78const uint64_t dtpOffset = 0x800;
79
80namespace {
81enum Op {
82 ADDI = 0x13,
83 AUIPC = 0x17,
84 JALR = 0x67,
85 LD = 0x3003,
86 LUI = 0x37,
87 LW = 0x2003,
88 SRLI = 0x5013,
89 SUB = 0x40000033,
90};
91
92enum Reg {
93 X_X0 = 0,
94 X_RA = 1,
95 X_GP = 3,
96 X_TP = 4,
97 X_T0 = 5,
98 X_T1 = 6,
99 X_T2 = 7,
100 X_A0 = 10,
101 X_T3 = 28,
102};
103} // namespace
104
105static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
106static uint32_t lo12(uint32_t val) { return val & 4095; }
107
108static uint32_t itype(uint32_t op, uint32_t rd, uint32_t rs1, uint32_t imm) {
109 return op | (rd << 7) | (rs1 << 15) | (imm << 20);
110}
111static uint32_t rtype(uint32_t op, uint32_t rd, uint32_t rs1, uint32_t rs2) {
112 return op | (rd << 7) | (rs1 << 15) | (rs2 << 20);
113}
114static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
115 return op | (rd << 7) | (imm << 12);
116}
117
118// Extract bits v[begin:end], where range is inclusive, and begin must be < 63.
119static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
120 return (v & ((1ULL << (begin + 1)) - 1)) >> end;
121}
122
123static uint32_t setLO12_I(uint32_t insn, uint32_t imm) {
124 return (insn & 0xfffff) | (imm << 20);
125}
126static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
127 return (insn & 0x1fff07f) | (extractBits(v: imm, begin: 11, end: 5) << 25) |
128 (extractBits(v: imm, begin: 4, end: 0) << 7);
129}
130
131RISCV::RISCV(Ctx &ctx) : TargetInfo(ctx) {
132 copyRel = R_RISCV_COPY;
133 pltRel = R_RISCV_JUMP_SLOT;
134 relativeRel = R_RISCV_RELATIVE;
135 iRelativeRel = R_RISCV_IRELATIVE;
136 if (ctx.arg.is64) {
137 symbolicRel = R_RISCV_64;
138 tlsModuleIndexRel = R_RISCV_TLS_DTPMOD64;
139 tlsOffsetRel = R_RISCV_TLS_DTPREL64;
140 tlsGotRel = R_RISCV_TLS_TPREL64;
141 } else {
142 symbolicRel = R_RISCV_32;
143 tlsModuleIndexRel = R_RISCV_TLS_DTPMOD32;
144 tlsOffsetRel = R_RISCV_TLS_DTPREL32;
145 tlsGotRel = R_RISCV_TLS_TPREL32;
146 }
147 gotRel = symbolicRel;
148 tlsDescRel = R_RISCV_TLSDESC;
149
150 // .got[0] = _DYNAMIC
151 gotHeaderEntriesNum = 1;
152
153 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
154 gotPltHeaderEntriesNum = 2;
155
156 pltHeaderSize = 32;
157 pltEntrySize = 16;
158 ipltEntrySize = 16;
159}
160
161static uint32_t getEFlags(Ctx &ctx, InputFile *f) {
162 if (ctx.arg.is64)
163 return cast<ObjFile<ELF64LE>>(Val: f)->getObj().getHeader().e_flags;
164 return cast<ObjFile<ELF32LE>>(Val: f)->getObj().getHeader().e_flags;
165}
166
167uint32_t RISCV::calcEFlags() const {
168 // If there are only binary input files (from -b binary), use a
169 // value of 0 for the ELF header flags.
170 if (ctx.objectFiles.empty())
171 return 0;
172
173 uint32_t target = getEFlags(ctx, f: ctx.objectFiles.front());
174 for (InputFile *f : ctx.objectFiles) {
175 uint32_t eflags = getEFlags(ctx, f);
176 if (eflags & EF_RISCV_RVC)
177 target |= EF_RISCV_RVC;
178
179 if ((eflags & EF_RISCV_FLOAT_ABI) != (target & EF_RISCV_FLOAT_ABI))
180 Err(ctx) << f
181 << ": cannot link object files with different "
182 "floating-point ABI from "
183 << ctx.objectFiles[0];
184
185 if ((eflags & EF_RISCV_RVE) != (target & EF_RISCV_RVE))
186 Err(ctx) << f << ": cannot link object files with different EF_RISCV_RVE";
187 }
188
189 return target;
190}
191
192int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
193 switch (type) {
194 default:
195 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
196 return 0;
197 case R_RISCV_32:
198 case R_RISCV_TLS_DTPMOD32:
199 case R_RISCV_TLS_DTPREL32:
200 case R_RISCV_TLS_TPREL32:
201 return SignExtend64<32>(x: read32le(P: buf));
202 case R_RISCV_64:
203 case R_RISCV_TLS_DTPMOD64:
204 case R_RISCV_TLS_DTPREL64:
205 case R_RISCV_TLS_TPREL64:
206 return read64le(P: buf);
207 case R_RISCV_RELATIVE:
208 case R_RISCV_IRELATIVE:
209 return ctx.arg.is64 ? read64le(P: buf) : read32le(P: buf);
210 case R_RISCV_NONE:
211 case R_RISCV_JUMP_SLOT:
212 // These relocations are defined as not having an implicit addend.
213 return 0;
214 case R_RISCV_TLSDESC:
215 return ctx.arg.is64 ? read64le(P: buf + 8) : read32le(P: buf + 4);
216 }
217}
218
219void RISCV::writeGotHeader(uint8_t *buf) const {
220 if (ctx.arg.is64)
221 write64le(P: buf, V: ctx.mainPart->dynamic->getVA());
222 else
223 write32le(P: buf, V: ctx.mainPart->dynamic->getVA());
224}
225
226void RISCV::writeGotPlt(uint8_t *buf, const Symbol &s) const {
227 if (ctx.arg.is64)
228 write64le(P: buf, V: ctx.in.plt->getVA());
229 else
230 write32le(P: buf, V: ctx.in.plt->getVA());
231}
232
233void RISCV::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
234 if (ctx.arg.writeAddends) {
235 if (ctx.arg.is64)
236 write64le(P: buf, V: s.getVA(ctx));
237 else
238 write32le(P: buf, V: s.getVA(ctx));
239 }
240}
241
242void RISCV::writePltHeader(uint8_t *buf) const {
243 // 1: auipc t2, %pcrel_hi(.got.plt)
244 // sub t1, t1, t3
245 // l[wd] t3, %pcrel_lo(1b)(t2); t3 = _dl_runtime_resolve
246 // addi t1, t1, -pltHeaderSize-12; t1 = &.plt[i] - &.plt[0]
247 // addi t0, t2, %pcrel_lo(1b)
248 // srli t1, t1, (rv64?1:2); t1 = &.got.plt[i] - &.got.plt[0]
249 // l[wd] t0, Wordsize(t0); t0 = link_map
250 // jr t3
251 uint32_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA();
252 uint32_t load = ctx.arg.is64 ? LD : LW;
253 write32le(P: buf + 0, V: utype(op: AUIPC, rd: X_T2, imm: hi20(val: offset)));
254 write32le(P: buf + 4, V: rtype(op: SUB, rd: X_T1, rs1: X_T1, rs2: X_T3));
255 write32le(P: buf + 8, V: itype(op: load, rd: X_T3, rs1: X_T2, imm: lo12(val: offset)));
256 write32le(P: buf + 12, V: itype(op: ADDI, rd: X_T1, rs1: X_T1, imm: -ctx.target->pltHeaderSize - 12));
257 write32le(P: buf + 16, V: itype(op: ADDI, rd: X_T0, rs1: X_T2, imm: lo12(val: offset)));
258 write32le(P: buf + 20, V: itype(op: SRLI, rd: X_T1, rs1: X_T1, imm: ctx.arg.is64 ? 1 : 2));
259 write32le(P: buf + 24, V: itype(op: load, rd: X_T0, rs1: X_T0, imm: ctx.arg.wordsize));
260 write32le(P: buf + 28, V: itype(op: JALR, rd: 0, rs1: X_T3, imm: 0));
261}
262
263void RISCV::writePlt(uint8_t *buf, const Symbol &sym,
264 uint64_t pltEntryAddr) const {
265 // 1: auipc t3, %pcrel_hi(f@.got.plt)
266 // l[wd] t3, %pcrel_lo(1b)(t3)
267 // jalr t1, t3
268 // nop
269 uint32_t offset = sym.getGotPltVA(ctx) - pltEntryAddr;
270 write32le(P: buf + 0, V: utype(op: AUIPC, rd: X_T3, imm: hi20(val: offset)));
271 write32le(P: buf + 4, V: itype(op: ctx.arg.is64 ? LD : LW, rd: X_T3, rs1: X_T3, imm: lo12(val: offset)));
272 write32le(P: buf + 8, V: itype(op: JALR, rd: X_T1, rs1: X_T3, imm: 0));
273 write32le(P: buf + 12, V: itype(op: ADDI, rd: 0, rs1: 0, imm: 0));
274}
275
276RelType RISCV::getDynRel(RelType type) const {
277 return type == ctx.target->symbolicRel ? type
278 : static_cast<RelType>(R_RISCV_NONE);
279}
280
281// Only needed to support relocations used by relocateNonAlloc and
282// preprocessRelocs.
283RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
284 const uint8_t *loc) const {
285 switch (type) {
286 case R_RISCV_NONE:
287 return R_NONE;
288 case R_RISCV_32:
289 case R_RISCV_64:
290 return R_ABS;
291 case R_RISCV_ADD8:
292 case R_RISCV_ADD16:
293 case R_RISCV_ADD32:
294 case R_RISCV_ADD64:
295 case R_RISCV_SET6:
296 case R_RISCV_SET8:
297 case R_RISCV_SET16:
298 case R_RISCV_SET32:
299 case R_RISCV_SUB6:
300 case R_RISCV_SUB8:
301 case R_RISCV_SUB16:
302 case R_RISCV_SUB32:
303 case R_RISCV_SUB64:
304 return RE_RISCV_ADD;
305 case R_RISCV_32_PCREL:
306 return R_PC;
307 case R_RISCV_SET_ULEB128:
308 case R_RISCV_SUB_ULEB128:
309 return RE_RISCV_LEB128;
310 default:
311 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
312 << ") against symbol " << &s;
313 return R_NONE;
314 }
315}
316
317template <class ELFT, class RelTy>
318void RISCV::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
319 RelocScan rs(ctx, &sec);
320 // Many relocations end up in sec.relocations.
321 sec.relocations.reserve(N: rels.size());
322
323 StringRef vendor;
324 for (auto it = rels.begin(); it != rels.end(); ++it) {
325 RelType type = it->getType(false);
326 uint32_t symIndex = it->getSymbol(false);
327 Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIndex);
328 uint64_t offset = it->r_offset;
329 if (sym.isUndefined() && symIndex != 0 &&
330 rs.maybeReportUndefined(sym&: cast<Undefined>(Val&: sym), offset))
331 continue;
332 int64_t addend = rs.getAddend<ELFT>(*it, type);
333 RelExpr expr;
334 // Relocation types that only need a RelExpr set `expr` and break out of
335 // the switch to reach rs.process(). Types that need special handling
336 // (fast-path helpers, TLS) call a handler and use `continue`.
337 switch (type) {
338 case R_RISCV_NONE:
339 continue;
340
341 // Absolute relocations:
342 case R_RISCV_32:
343 case R_RISCV_64:
344 case R_RISCV_HI20:
345 case R_RISCV_LO12_I:
346 case R_RISCV_LO12_S:
347 expr = R_ABS;
348 break;
349
350 // PC-relative relocations:
351 case R_RISCV_JAL:
352 case R_RISCV_BRANCH:
353 case R_RISCV_PCREL_HI20:
354 case R_RISCV_RVC_BRANCH:
355 case R_RISCV_RVC_JUMP:
356 case R_RISCV_32_PCREL:
357 rs.processR_PC(type, offset, addend, sym);
358 continue;
359 case R_RISCV_PCREL_LO12_I:
360 case R_RISCV_PCREL_LO12_S:
361 expr = RE_RISCV_PC_INDIRECT;
362 break;
363
364 // PLT-generating relocations:
365 case R_RISCV_CALL:
366 case R_RISCV_CALL_PLT:
367 case R_RISCV_PLT32:
368 rs.processR_PLT_PC(type, offset, addend, sym);
369 continue;
370
371 // GOT-generating relocations:
372 case R_RISCV_GOT_HI20:
373 case R_RISCV_GOT32_PCREL:
374 expr = R_GOT_PC;
375 break;
376
377 // TLS relocations:
378 case R_RISCV_TPREL_HI20:
379 case R_RISCV_TPREL_LO12_I:
380 case R_RISCV_TPREL_LO12_S:
381 if (rs.checkTlsLe(offset, sym, type))
382 continue;
383 expr = R_TPREL;
384 break;
385 case R_RISCV_TLS_GOT_HI20:
386 // There is no IE to LE optimization.
387 rs.handleTlsIe<false>(ieExpr: R_GOT_PC, type, offset, addend, sym);
388 continue;
389 case R_RISCV_TLS_GD_HI20:
390 // There is no GD to IE/LE optimization.
391 rs.handleTlsGd(sharedExpr: R_TLSGD_PC, ieExpr: R_NONE, leExpr: R_NONE, type, offset, addend, sym);
392 continue;
393
394 // TLSDESC relocations:
395 case R_RISCV_TLSDESC_HI20:
396 rs.handleTlsDesc(sharedExpr: R_TLSDESC_PC, ieExpr: R_GOT_PC, type, offset, addend, sym);
397 continue;
398 case R_RISCV_TLSDESC_LOAD_LO12:
399 case R_RISCV_TLSDESC_ADD_LO12:
400 // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} reference a label, not the
401 // TLS symbol, so we cannot use handleTlsDesc (which sets NEEDS_TLSDESC).
402 // For TLSDESC->IE, use R_TPREL as well, but relocateAlloc uses isToLe
403 // (from HI20) to select the correct transform.
404 sec.addReloc(r: {.expr: ctx.arg.shared ? R_TLSDESC_PC : R_TPREL, .type: type, .offset: offset,
405 .addend: addend, .sym: &sym});
406 continue;
407 case R_RISCV_TLSDESC_CALL:
408 if (!ctx.arg.shared)
409 sec.addReloc(r: {.expr: R_TPREL, .type: type, .offset: offset, .addend: addend, .sym: &sym});
410 continue;
411
412 // Relaxation hints:
413 case R_RISCV_ALIGN:
414 sec.addReloc(r: {.expr: R_RELAX_HINT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
415 continue;
416 case R_RISCV_TPREL_ADD:
417 case R_RISCV_RELAX:
418 if (ctx.arg.relax)
419 sec.addReloc(r: {.expr: R_RELAX_HINT, .type: type, .offset: offset, .addend: addend, .sym: &sym});
420 continue;
421
422 // Misc relocations:
423 case R_RISCV_ADD8:
424 case R_RISCV_ADD16:
425 case R_RISCV_ADD32:
426 case R_RISCV_ADD64:
427 case R_RISCV_SET6:
428 case R_RISCV_SET8:
429 case R_RISCV_SET16:
430 case R_RISCV_SET32:
431 case R_RISCV_SUB6:
432 case R_RISCV_SUB8:
433 case R_RISCV_SUB16:
434 case R_RISCV_SUB32:
435 case R_RISCV_SUB64:
436 expr = RE_RISCV_ADD;
437 break;
438 case R_RISCV_SET_ULEB128:
439 case R_RISCV_SUB_ULEB128:
440 expr = RE_RISCV_LEB128;
441 break;
442
443 case R_RISCV_VENDOR: {
444 auto it1 = it;
445 ++it1;
446 if (it1 == rels.end() || it1->getType(false) - 192u > 63u) {
447 Err(ctx) << getErrorLoc(ctx, loc: sec.content().data() + offset)
448 << "R_RISCV_VENDOR is not followed by a relocation of code "
449 "192 to 255";
450 continue;
451 }
452 vendor = sym.getName();
453 }
454 continue;
455 default:
456 auto diag = Err(ctx);
457 diag << getErrorLoc(ctx, loc: sec.content().data() + offset);
458 if (!vendor.empty()) {
459 diag << "unknown vendor-specific relocation (" << type.v
460 << ") in namespace '" << vendor << "' against symbol '" << &sym
461 << "'";
462 vendor = "";
463 } else {
464 diag << "unknown relocation (" << type.v << ") against symbol " << &sym;
465 }
466 continue;
467 }
468 rs.process(expr, type, offset, sym, addend);
469 }
470
471 // Sort relocations by offset for more efficient searching for
472 // R_RISCV_PCREL_HI20.
473 llvm::stable_sort(sec.relocs(),
474 [](const Relocation &lhs, const Relocation &rhs) {
475 return lhs.offset < rhs.offset;
476 });
477}
478
479void RISCV::scanSection(InputSectionBase &sec) {
480 if (ctx.arg.is64)
481 elf::scanSection1<RISCV, ELF64LE>(target&: *this, sec);
482 else
483 elf::scanSection1<RISCV, ELF32LE>(target&: *this, sec);
484}
485
486void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
487 const unsigned bits = ctx.arg.wordsize * 8;
488
489 switch (rel.type) {
490 case R_RISCV_32:
491 write32le(P: loc, V: val);
492 return;
493 case R_RISCV_64:
494 write64le(P: loc, V: val);
495 return;
496
497 case R_RISCV_RVC_BRANCH: {
498 checkInt(ctx, loc, v: val, n: 9, rel);
499 checkAlignment(ctx, loc, v: val, n: 2, rel);
500 uint16_t insn = read16le(P: loc) & 0xE383;
501 uint16_t imm8 = extractBits(v: val, begin: 8, end: 8) << 12;
502 uint16_t imm4_3 = extractBits(v: val, begin: 4, end: 3) << 10;
503 uint16_t imm7_6 = extractBits(v: val, begin: 7, end: 6) << 5;
504 uint16_t imm2_1 = extractBits(v: val, begin: 2, end: 1) << 3;
505 uint16_t imm5 = extractBits(v: val, begin: 5, end: 5) << 2;
506 insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
507
508 write16le(P: loc, V: insn);
509 return;
510 }
511
512 case R_RISCV_RVC_JUMP: {
513 checkInt(ctx, loc, v: val, n: 12, rel);
514 checkAlignment(ctx, loc, v: val, n: 2, rel);
515 uint16_t insn = read16le(P: loc) & 0xE003;
516 uint16_t imm11 = extractBits(v: val, begin: 11, end: 11) << 12;
517 uint16_t imm4 = extractBits(v: val, begin: 4, end: 4) << 11;
518 uint16_t imm9_8 = extractBits(v: val, begin: 9, end: 8) << 9;
519 uint16_t imm10 = extractBits(v: val, begin: 10, end: 10) << 8;
520 uint16_t imm6 = extractBits(v: val, begin: 6, end: 6) << 7;
521 uint16_t imm7 = extractBits(v: val, begin: 7, end: 7) << 6;
522 uint16_t imm3_1 = extractBits(v: val, begin: 3, end: 1) << 3;
523 uint16_t imm5 = extractBits(v: val, begin: 5, end: 5) << 2;
524 insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
525
526 write16le(P: loc, V: insn);
527 return;
528 }
529
530 case R_RISCV_JAL: {
531 checkInt(ctx, loc, v: val, n: 21, rel);
532 checkAlignment(ctx, loc, v: val, n: 2, rel);
533
534 uint32_t insn = read32le(P: loc) & 0xFFF;
535 uint32_t imm20 = extractBits(v: val, begin: 20, end: 20) << 31;
536 uint32_t imm10_1 = extractBits(v: val, begin: 10, end: 1) << 21;
537 uint32_t imm11 = extractBits(v: val, begin: 11, end: 11) << 20;
538 uint32_t imm19_12 = extractBits(v: val, begin: 19, end: 12) << 12;
539 insn |= imm20 | imm10_1 | imm11 | imm19_12;
540
541 write32le(P: loc, V: insn);
542 return;
543 }
544
545 case R_RISCV_BRANCH: {
546 checkInt(ctx, loc, v: val, n: 13, rel);
547 checkAlignment(ctx, loc, v: val, n: 2, rel);
548
549 uint32_t insn = read32le(P: loc) & 0x1FFF07F;
550 uint32_t imm12 = extractBits(v: val, begin: 12, end: 12) << 31;
551 uint32_t imm10_5 = extractBits(v: val, begin: 10, end: 5) << 25;
552 uint32_t imm4_1 = extractBits(v: val, begin: 4, end: 1) << 8;
553 uint32_t imm11 = extractBits(v: val, begin: 11, end: 11) << 7;
554 insn |= imm12 | imm10_5 | imm4_1 | imm11;
555
556 write32le(P: loc, V: insn);
557 return;
558 }
559
560 // auipc + jalr pair
561 case R_RISCV_CALL:
562 case R_RISCV_CALL_PLT: {
563 int64_t hi = SignExtend64(X: val + 0x800, B: bits) >> 12;
564 checkInt(ctx, loc, v: hi, n: 20, rel);
565 if (isInt<20>(x: hi)) {
566 relocateNoSym(loc, type: R_RISCV_PCREL_HI20, val);
567 relocateNoSym(loc: loc + 4, type: R_RISCV_PCREL_LO12_I, val);
568 }
569 return;
570 }
571
572 case R_RISCV_GOT_HI20:
573 case R_RISCV_PCREL_HI20:
574 case R_RISCV_TLSDESC_HI20:
575 case R_RISCV_TLS_GD_HI20:
576 case R_RISCV_TLS_GOT_HI20:
577 case R_RISCV_TPREL_HI20:
578 case R_RISCV_HI20: {
579 uint64_t hi = val + 0x800;
580 checkInt(ctx, loc, v: SignExtend64(X: hi, B: bits) >> 12, n: 20, rel);
581 write32le(P: loc, V: (read32le(P: loc) & 0xFFF) | (hi & 0xFFFFF000));
582 return;
583 }
584
585 case R_RISCV_PCREL_LO12_I:
586 case R_RISCV_TLSDESC_LOAD_LO12:
587 case R_RISCV_TLSDESC_ADD_LO12:
588 case R_RISCV_TPREL_LO12_I:
589 case R_RISCV_LO12_I: {
590 uint64_t hi = (val + 0x800) >> 12;
591 uint64_t lo = val - (hi << 12);
592 write32le(P: loc, V: setLO12_I(insn: read32le(P: loc), imm: lo & 0xfff));
593 return;
594 }
595
596 case R_RISCV_PCREL_LO12_S:
597 case R_RISCV_TPREL_LO12_S:
598 case R_RISCV_LO12_S: {
599 uint64_t hi = (val + 0x800) >> 12;
600 uint64_t lo = val - (hi << 12);
601 write32le(P: loc, V: setLO12_S(insn: read32le(P: loc), imm: lo));
602 return;
603 }
604
605 case INTERNAL_R_RISCV_X0REL_I:
606 case INTERNAL_R_RISCV_X0REL_S: {
607 checkInt(ctx, loc, v: val, n: 12, rel);
608 uint32_t insn = (read32le(P: loc) & ~(31 << 15)) | (X_X0 << 15);
609 if (rel.type == INTERNAL_R_RISCV_X0REL_I)
610 insn = setLO12_I(insn, imm: val);
611 else
612 insn = setLO12_S(insn, imm: val);
613 write32le(P: loc, V: insn);
614 return;
615 }
616
617 case INTERNAL_R_RISCV_GPREL_I:
618 case INTERNAL_R_RISCV_GPREL_S: {
619 Defined *gp = ctx.sym.riscvGlobalPointer;
620 int64_t displace = SignExtend64(X: val - gp->getVA(ctx), B: bits);
621 checkInt(ctx, loc, v: displace, n: 12, rel);
622 uint32_t insn = (read32le(P: loc) & ~(31 << 15)) | (X_GP << 15);
623 if (rel.type == INTERNAL_R_RISCV_GPREL_I)
624 insn = setLO12_I(insn, imm: displace);
625 else
626 insn = setLO12_S(insn, imm: displace);
627 write32le(P: loc, V: insn);
628 return;
629 }
630
631 case R_RISCV_ADD8:
632 *loc += val;
633 return;
634 case R_RISCV_ADD16:
635 write16le(P: loc, V: read16le(P: loc) + val);
636 return;
637 case R_RISCV_ADD32:
638 write32le(P: loc, V: read32le(P: loc) + val);
639 return;
640 case R_RISCV_ADD64:
641 write64le(P: loc, V: read64le(P: loc) + val);
642 return;
643 case R_RISCV_SUB6:
644 *loc = (*loc & 0xc0) | (((*loc & 0x3f) - val) & 0x3f);
645 return;
646 case R_RISCV_SUB8:
647 *loc -= val;
648 return;
649 case R_RISCV_SUB16:
650 write16le(P: loc, V: read16le(P: loc) - val);
651 return;
652 case R_RISCV_SUB32:
653 write32le(P: loc, V: read32le(P: loc) - val);
654 return;
655 case R_RISCV_SUB64:
656 write64le(P: loc, V: read64le(P: loc) - val);
657 return;
658 case R_RISCV_SET6:
659 *loc = (*loc & 0xc0) | (val & 0x3f);
660 return;
661 case R_RISCV_SET8:
662 *loc = val;
663 return;
664 case R_RISCV_SET16:
665 write16le(P: loc, V: val);
666 return;
667 case R_RISCV_SET32:
668 case R_RISCV_32_PCREL:
669 case R_RISCV_PLT32:
670 case R_RISCV_GOT32_PCREL:
671 checkInt(ctx, loc, v: val, n: 32, rel);
672 write32le(P: loc, V: val);
673 return;
674
675 case R_RISCV_TLS_DTPREL32:
676 write32le(P: loc, V: val - dtpOffset);
677 break;
678 case R_RISCV_TLS_DTPREL64:
679 write64le(P: loc, V: val - dtpOffset);
680 break;
681
682 case R_RISCV_RELAX:
683 return;
684 case R_RISCV_TLSDESC:
685 // The addend is stored in the second word.
686 if (ctx.arg.is64)
687 write64le(P: loc + 8, V: val);
688 else
689 write32le(P: loc + 4, V: val);
690 break;
691 default:
692 llvm_unreachable("unknown relocation");
693 }
694}
695
696static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
697 return i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX;
698}
699
700static void tlsdescToIe(Ctx &ctx, uint8_t *loc, const Relocation &rel,
701 uint64_t val) {
702 switch (rel.type) {
703 case R_RISCV_TLSDESC_HI20:
704 case R_RISCV_TLSDESC_LOAD_LO12:
705 write32le(P: loc, V: 0x00000013); // nop
706 break;
707 case R_RISCV_TLSDESC_ADD_LO12:
708 write32le(P: loc, V: utype(op: AUIPC, rd: X_A0, imm: hi20(val))); // auipc a0,<hi20>
709 break;
710 case R_RISCV_TLSDESC_CALL:
711 if (ctx.arg.is64)
712 write32le(P: loc, V: itype(op: LD, rd: X_A0, rs1: X_A0, imm: lo12(val))); // ld a0,<lo12>(a0)
713 else
714 write32le(P: loc, V: itype(op: LW, rd: X_A0, rs1: X_A0, imm: lo12(val))); // lw a0,<lo12>(a0)
715 break;
716 default:
717 llvm_unreachable("unsupported relocation for TLSDESC to IE");
718 }
719}
720
721static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
722 switch (rel.type) {
723 case R_RISCV_TLSDESC_HI20:
724 case R_RISCV_TLSDESC_LOAD_LO12:
725 write32le(P: loc, V: 0x00000013); // nop
726 return;
727 case R_RISCV_TLSDESC_ADD_LO12:
728 if (isInt<12>(x: val))
729 write32le(P: loc, V: 0x00000013); // nop
730 else
731 write32le(P: loc, V: utype(op: LUI, rd: X_A0, imm: hi20(val))); // lui a0,<hi20>
732 return;
733 case R_RISCV_TLSDESC_CALL:
734 if (isInt<12>(x: val))
735 write32le(P: loc, V: itype(op: ADDI, rd: X_A0, rs1: 0, imm: val)); // addi a0,zero,<lo12>
736 else
737 write32le(P: loc, V: itype(op: ADDI, rd: X_A0, rs1: X_A0, imm: lo12(val))); // addi a0,a0,<lo12>
738 return;
739 default:
740 llvm_unreachable("unsupported relocation for TLSDESC to LE");
741 }
742}
743
744void RISCV::relocateAlloc(InputSection &sec, uint8_t *buf) const {
745 uint64_t secAddr = sec.getOutputSection()->addr + sec.outSecOff;
746 uint64_t tlsdescVal = 0;
747 bool tlsdescRelax = false, isToLe = false;
748 const ArrayRef<Relocation> relocs = sec.relocs();
749 for (size_t i = 0, size = relocs.size(); i != size; ++i) {
750 const Relocation &rel = relocs[i];
751 uint8_t *loc = buf + rel.offset;
752 uint64_t val = sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset);
753
754 switch (rel.type) {
755 case R_RISCV_ALIGN:
756 case R_RISCV_RELAX:
757 case R_RISCV_TPREL_ADD:
758 continue;
759 case R_RISCV_TLSDESC_HI20:
760 if (rel.expr == R_TLSDESC_PC) {
761 // Shared object: store &got(sym)-PC for the following L[DW]/ADDI.
762 tlsdescVal = val;
763 break;
764 }
765 // Executable: TLSDESC->LE (R_TPREL) or TLSDESC->IE (R_GOT_PC).
766 isToLe = rel.expr == R_TPREL;
767 if (isToLe) {
768 tlsdescVal = val;
769 } else {
770 // tlsdescVal will be finalized after we see R_RISCV_TLSDESC_ADD_LO12.
771 // The net effect is that tlsdescVal will be smaller than `val` to
772 // take into account of NOP instructions (in the absence of
773 // R_RISCV_RELAX) before AUIPC.
774 tlsdescVal = val + rel.offset;
775 }
776 tlsdescRelax = relaxable(relocs, i);
777 if (!tlsdescRelax) {
778 if (isToLe)
779 tlsdescToLe(loc, rel, val);
780 else
781 tlsdescToIe(ctx, loc, rel, val);
782 }
783 continue;
784 case R_RISCV_TLSDESC_LOAD_LO12:
785 case R_RISCV_TLSDESC_ADD_LO12:
786 case R_RISCV_TLSDESC_CALL:
787 if (rel.expr == R_TLSDESC_PC) {
788 // Shared object: propagate the stored GOT value.
789 val = tlsdescVal;
790 break;
791 }
792 // Executable: IE or LE instruction rewrite.
793 if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12)
794 tlsdescVal -= rel.offset;
795 val = tlsdescVal;
796 // When NOP conversion is eligible and relaxation applies, don't write a
797 // NOP in case an unrelated instruction follows the current instruction.
798 if (tlsdescRelax &&
799 (rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
800 (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))))
801 continue;
802 if (isToLe)
803 tlsdescToLe(loc, rel, val);
804 else
805 tlsdescToIe(ctx, loc, rel, val);
806 continue;
807 case R_RISCV_SET_ULEB128:
808 if (i + 1 < size) {
809 const Relocation &rel1 = relocs[i + 1];
810 if (rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) {
811 auto val = rel.sym->getVA(ctx, addend: rel.addend) -
812 rel1.sym->getVA(ctx, addend: rel1.addend);
813 if (overwriteULEB128(bufLoc: loc, val) >= 0x80)
814 Err(ctx) << sec.getLocation(offset: rel.offset) << ": ULEB128 value " << val
815 << " exceeds available space; references '" << rel.sym
816 << "'";
817 ++i;
818 continue;
819 }
820 }
821 Err(ctx) << sec.getLocation(offset: rel.offset)
822 << ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_ULEB128";
823 return;
824 default:
825 break;
826 }
827 relocate(loc, rel, val);
828 }
829}
830
831void elf::initSymbolAnchors(Ctx &ctx) {
832 SmallVector<InputSection *, 0> storage;
833 for (OutputSection *osec : ctx.outputSections) {
834 if (!(osec->flags & SHF_EXECINSTR))
835 continue;
836 for (InputSection *sec : getInputSections(os: *osec, storage)) {
837 if (isa<SyntheticSection>(Val: sec))
838 continue;
839 sec->relaxAux = make<RelaxAux>();
840 if (sec->relocs().size()) {
841 sec->relaxAux->relocDeltas =
842 std::make_unique<uint32_t[]>(num: sec->relocs().size());
843 sec->relaxAux->relocTypes =
844 std::make_unique<RelType[]>(num: sec->relocs().size());
845 }
846 }
847 }
848 // Store symbol anchors for adjusting st_value/st_size during relaxation.
849 // We include symbols where d->file == file for the prevailing copies.
850 //
851 // For a defined symbol foo, we may have `d->file != file` with --wrap=foo.
852 // We should process foo, as the defining object file's symbol table may not
853 // contain foo after redirectSymbols changed the foo entry to __wrap_foo. Use
854 // `d->scriptDefined` to include such symbols.
855 //
856 // `relaxAux->anchors` may contain duplicate symbols, but that is fine.
857 auto addAnchor = [](Defined *d) {
858 if (auto *sec = dyn_cast_or_null<InputSection>(Val: d->section))
859 if (sec->flags & SHF_EXECINSTR && sec->relaxAux) {
860 // If sec is discarded, relaxAux will be nullptr.
861 sec->relaxAux->anchors.push_back(Elt: {.offset: d->value, .d: d, .end: false});
862 sec->relaxAux->anchors.push_back(Elt: {.offset: d->value + d->size, .d: d, .end: true});
863 }
864 };
865 for (InputFile *file : ctx.objectFiles)
866 for (Symbol *sym : file->getSymbols()) {
867 auto *d = dyn_cast<Defined>(Val: sym);
868 if (d && (d->file == file || d->scriptDefined))
869 addAnchor(d);
870 }
871 // Add anchors for IRELATIVE symbols (see `handleNonPreemptibleIfunc`).
872 // Their values must be adjusted so IRELATIVE addends remain correct.
873 for (Defined *d : ctx.irelativeSyms)
874 addAnchor(d);
875 // Sort anchors by offset so that we can find the closest relocation
876 // efficiently. For a zero size symbol, ensure that its start anchor precedes
877 // its end anchor. For two symbols with anchors at the same offset, their
878 // order does not matter.
879 for (OutputSection *osec : ctx.outputSections) {
880 if (!(osec->flags & SHF_EXECINSTR))
881 continue;
882 for (InputSection *sec : getInputSections(os: *osec, storage)) {
883 if (!sec->relaxAux)
884 continue;
885 llvm::sort(C&: sec->relaxAux->anchors, Comp: [](auto &a, auto &b) {
886 return std::make_pair(a.offset, a.end) <
887 std::make_pair(b.offset, b.end);
888 });
889 }
890 }
891}
892
893// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
894static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
895 Relocation &r, uint32_t &remove) {
896 const bool rvc = getEFlags(ctx, f: sec.file) & EF_RISCV_RVC;
897 const Symbol &sym = *r.sym;
898 const uint64_t insnPair = read64le(P: sec.content().data() + r.offset);
899 const uint32_t rd = extractBits(v: insnPair, begin: 32 + 11, end: 32 + 7);
900 const uint64_t dest =
901 (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
902 const int64_t displace = dest - loc;
903
904 // When the caller specifies the old value of `remove`, disallow its
905 // increment.
906 if (remove >= 6 && rvc && isInt<12>(x: displace) && rd == X_X0) {
907 sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
908 sec.relaxAux->writes.push_back(Elt: 0xa001); // c.j
909 remove = 6;
910 } else if (remove >= 6 && rvc && isInt<12>(x: displace) && rd == X_RA &&
911 !ctx.arg.is64) { // RV32C only
912 sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
913 sec.relaxAux->writes.push_back(Elt: 0x2001); // c.jal
914 remove = 6;
915 } else if (remove >= 4 && isInt<21>(x: displace)) {
916 sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
917 sec.relaxAux->writes.push_back(Elt: 0x6f | rd << 7); // jal
918 remove = 4;
919 } else {
920 remove = 0;
921 }
922}
923
924// Relax local-exec TLS when hi20 is zero.
925static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
926 uint64_t loc, Relocation &r, uint32_t &remove) {
927 uint64_t val = r.sym->getVA(ctx, addend: r.addend);
928 if (hi20(val) != 0)
929 return;
930 uint32_t insn = read32le(P: sec.content().data() + r.offset);
931 switch (r.type) {
932 case R_RISCV_TPREL_HI20:
933 case R_RISCV_TPREL_ADD:
934 // Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x).
935 sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
936 remove = 4;
937 break;
938 case R_RISCV_TPREL_LO12_I:
939 // addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x)
940 sec.relaxAux->relocTypes[i] = R_RISCV_32;
941 insn = (insn & ~(31 << 15)) | (X_TP << 15);
942 sec.relaxAux->writes.push_back(Elt: setLO12_I(insn, imm: val));
943 break;
944 case R_RISCV_TPREL_LO12_S:
945 // sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd)
946 sec.relaxAux->relocTypes[i] = R_RISCV_32;
947 insn = (insn & ~(31 << 15)) | (X_TP << 15);
948 sec.relaxAux->writes.push_back(Elt: setLO12_S(insn, imm: val));
949 break;
950 }
951}
952
953static void relaxHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
954 uint64_t loc, Relocation &r, uint32_t &remove) {
955
956 // Fold into use of x0+offset
957 if (isInt<12>(x: r.sym->getVA(ctx, addend: r.addend))) {
958 switch (r.type) {
959 case R_RISCV_HI20:
960 // Remove lui rd, %hi20(x).
961 sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
962 remove = 4;
963 break;
964 case R_RISCV_LO12_I:
965 sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_X0REL_I;
966 break;
967 case R_RISCV_LO12_S:
968 sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_X0REL_S;
969 break;
970 }
971 return;
972 }
973
974 const Defined *gp = ctx.sym.riscvGlobalPointer;
975 if (!gp)
976 return;
977
978 if (!isInt<12>(x: r.sym->getVA(ctx, addend: r.addend) - gp->getVA(ctx)))
979 return;
980
981 switch (r.type) {
982 case R_RISCV_HI20:
983 // Remove lui rd, %hi20(x).
984 sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
985 remove = 4;
986 break;
987 case R_RISCV_LO12_I:
988 sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_I;
989 break;
990 case R_RISCV_LO12_S:
991 sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_S;
992 break;
993 }
994}
995
996static bool relax(Ctx &ctx, int pass, InputSection &sec) {
997 const uint64_t secAddr = sec.getVA();
998 const MutableArrayRef<Relocation> relocs = sec.relocs();
999 auto &aux = *sec.relaxAux;
1000 bool changed = false;
1001 ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
1002 uint64_t delta = 0;
1003 bool tlsdescRelax = false, toLeShortForm = false;
1004
1005 std::fill_n(first: aux.relocTypes.get(), n: relocs.size(), value: R_RISCV_NONE);
1006 aux.writes.clear();
1007 for (auto [i, r] : llvm::enumerate(First: relocs)) {
1008 const uint64_t loc = secAddr + r.offset - delta;
1009 uint32_t &cur = aux.relocDeltas[i], remove = 0;
1010 switch (r.type) {
1011 case R_RISCV_ALIGN: {
1012 const uint64_t nextLoc = loc + r.addend;
1013 const uint64_t align = PowerOf2Ceil(A: r.addend + 2);
1014 // All bytes beyond the alignment boundary should be removed.
1015 remove = nextLoc - ((loc + align - 1) & -align);
1016 // If we can't satisfy this alignment, we've found a bad input.
1017 if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
1018 Err(ctx) << getErrorLoc(ctx, loc: (const uint8_t *)loc)
1019 << "insufficient padding bytes for " << r.type << ": "
1020 << r.addend
1021 << " bytes available "
1022 "for requested alignment of "
1023 << align << " bytes";
1024 remove = 0;
1025 }
1026 break;
1027 }
1028 case R_RISCV_CALL:
1029 case R_RISCV_CALL_PLT:
1030 // Prevent oscillation between states by disallowing the increment of
1031 // `remove` after a few passes. The previous `remove` value is
1032 // `cur-delta`.
1033 if (relaxable(relocs, i)) {
1034 remove = pass < 4 ? 6 : cur - delta;
1035 relaxCall(ctx, sec, i, loc, r, remove);
1036 }
1037 break;
1038 case R_RISCV_TPREL_HI20:
1039 case R_RISCV_TPREL_ADD:
1040 case R_RISCV_TPREL_LO12_I:
1041 case R_RISCV_TPREL_LO12_S:
1042 if (relaxable(relocs, i))
1043 relaxTlsLe(ctx, sec, i, loc, r, remove);
1044 break;
1045 case R_RISCV_HI20:
1046 case R_RISCV_LO12_I:
1047 case R_RISCV_LO12_S:
1048 if (relaxable(relocs, i))
1049 relaxHi20Lo12(ctx, sec, i, loc, r, remove);
1050 break;
1051 case R_RISCV_TLSDESC_HI20:
1052 // For TLSDESC=>LE, we can use the short form if hi20 is zero.
1053 tlsdescRelax = relaxable(relocs, i);
1054 toLeShortForm = tlsdescRelax && r.expr == R_TPREL &&
1055 !hi20(val: r.sym->getVA(ctx, addend: r.addend));
1056 [[fallthrough]];
1057 case R_RISCV_TLSDESC_LOAD_LO12:
1058 // For TLSDESC=>LE/IE, AUIPC and L[DW] are removed if relaxable.
1059 if (tlsdescRelax && r.expr != R_TLSDESC_PC)
1060 remove = 4;
1061 break;
1062 case R_RISCV_TLSDESC_ADD_LO12:
1063 if (toLeShortForm)
1064 remove = 4;
1065 break;
1066 }
1067
1068 // For all anchors whose offsets are <= r.offset, they are preceded by
1069 // the previous relocation whose `relocDeltas` value equals `delta`.
1070 // Decrease their st_value and update their st_size.
1071 for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(N: 1)) {
1072 if (sa[0].end)
1073 sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
1074 else
1075 sa[0].d->value = sa[0].offset - delta;
1076 }
1077 delta += remove;
1078 if (delta != cur) {
1079 cur = delta;
1080 changed = true;
1081 }
1082 }
1083
1084 for (const SymbolAnchor &a : sa) {
1085 if (a.end)
1086 a.d->size = a.offset - delta - a.d->value;
1087 else
1088 a.d->value = a.offset - delta;
1089 }
1090 // Inform assignAddresses that the size has changed.
1091 if (!isUInt<32>(x: delta))
1092 Err(ctx) << "section size decrease is too large: " << delta;
1093 sec.bytesDropped = delta;
1094 return changed;
1095}
1096
1097// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in
1098// the absence of a linker script. For call and load/store R_RISCV_RELAX, code
1099// shrinkage may reduce displacement and make more relocations eligible for
1100// relaxation. Code shrinkage may increase displacement to a call/load/store
1101// target at a higher fixed address, invalidating an earlier relaxation. Any
1102// change in section sizes can have cascading effect and require another
1103// relaxation pass.
1104bool RISCV::relaxOnce(int pass) const {
1105 llvm::TimeTraceScope timeScope("RISC-V relaxOnce");
1106 if (pass == 0)
1107 initSymbolAnchors(ctx);
1108
1109 SmallVector<InputSection *, 0> storage;
1110 bool changed = false;
1111 for (OutputSection *osec : ctx.outputSections) {
1112 if (!(osec->flags & SHF_EXECINSTR))
1113 continue;
1114 for (InputSection *sec : getInputSections(os: *osec, storage))
1115 if (sec->relaxAux)
1116 changed |= relax(ctx, pass, sec&: *sec);
1117 }
1118 return changed;
1119}
1120
1121// If the section alignment is >= 4, advance `dot` to insert NOPs and synthesize
1122// an ALIGN relocation. Otherwise, return false to use default handling.
1123template <class ELFT, class RelTy>
1124bool RISCV::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
1125 Relocs<RelTy> rels) {
1126 if (!baseSec) {
1127 // Record the first input section with RELAX relocations. We will synthesize
1128 // ALIGN relocations here.
1129 for (auto rel : rels) {
1130 if (rel.getType(false) == R_RISCV_RELAX) {
1131 baseSec = sec;
1132 break;
1133 }
1134 }
1135 } else if (sec->addralign >= 4) {
1136 // If the alignment is >= 4 and the section does not start with an ALIGN
1137 // relocation, synthesize one.
1138 bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
1139 return rel.r_offset == 0 && rel.getType(false) == R_RISCV_ALIGN;
1140 });
1141 if (!hasAlignRel) {
1142 synthesizedAligns.emplace_back(Args: dot - baseSec->getVA(),
1143 Args: sec->addralign - 2);
1144 dot += sec->addralign - 2;
1145 return true;
1146 }
1147 }
1148 return false;
1149}
1150
1151// Finalize the relocation section by appending synthesized ALIGN relocations
1152// after processing all input sections.
1153template <class ELFT, class RelTy>
1154void RISCV::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
1155 Relocs<RelTy> rels) {
1156 auto *f = cast<ObjFile<ELFT>>(baseSec->file);
1157 auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
1158 // Create a copy of InputSection.
1159 sec = make<InputSection>(*f, shdr, baseSec->name);
1160 auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
1161 *sec = *baseRelSec;
1162 baseSec = nullptr;
1163
1164 // Allocate buffer for original and synthesized relocations in RELA format.
1165 // If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
1166 // CREL.
1167 auto newSize = rels.size() + synthesizedAligns.size();
1168 auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
1169 sec->size = newSize * sizeof(typename ELFT::Rela);
1170 sec->content_ = reinterpret_cast<uint8_t *>(relas);
1171 sec->type = SHT_RELA;
1172 // Copy original relocations to the new buffer, potentially converting CREL to
1173 // RELA.
1174 for (auto [i, r] : llvm::enumerate(rels)) {
1175 relas[i].r_offset = r.r_offset;
1176 relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
1177 if constexpr (RelTy::HasAddend)
1178 relas[i].r_addend = r.r_addend;
1179 }
1180 // Append synthesized ALIGN relocations to the buffer.
1181 for (auto [i, r] : llvm::enumerate(First&: synthesizedAligns)) {
1182 auto &rela = relas[rels.size() + i];
1183 rela.r_offset = r.first;
1184 rela.setSymbolAndType(0, R_RISCV_ALIGN, false);
1185 rela.r_addend = r.second;
1186 }
1187 synthesizedAligns.clear();
1188 // Replace the old relocation section with the new one in the output section.
1189 // addOrphanSections ensures that the output relocation section is processed
1190 // after osec.
1191 for (SectionCommand *cmd : sec->getParent()->commands) {
1192 auto *isd = dyn_cast<InputSectionDescription>(Val: cmd);
1193 if (!isd)
1194 continue;
1195 for (auto *&isec : isd->sections)
1196 if (isec == baseRelSec)
1197 isec = sec;
1198 }
1199}
1200
1201template <class ELFT>
1202bool RISCV::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
1203 bool ret = false;
1204 if (sec) {
1205 invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
1206 } else if (baseSec) {
1207 invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
1208 }
1209 return ret;
1210}
1211
1212// Without linker relaxation enabled for a particular relocatable file or
1213// section, the assembler will not generate R_RISCV_ALIGN relocations for
1214// alignment directives. This becomes problematic in a two-stage linking
1215// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
1216// R_RISCV_ALIGN relocation at section start when needed.
1217//
1218// When called with an input section (`sec` is not null): If the section
1219// alignment is >= 4, advance `dot` to insert NOPs and synthesize an ALIGN
1220// relocation.
1221//
1222// When called after all input sections are processed (`sec` is null): The
1223// output relocation section is updated with all the newly synthesized ALIGN
1224// relocations.
1225bool RISCV::synthesizeAlign(uint64_t &dot, InputSection *sec) {
1226 assert(ctx.arg.relocatable);
1227 if (ctx.arg.is64)
1228 return synthesizeAlignAux<ELF64LE>(dot, sec);
1229 return synthesizeAlignAux<ELF32LE>(dot, sec);
1230}
1231
1232void RISCV::finalizeRelax(int passes) const {
1233 llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
1234 Log(ctx) << "relaxation passes: " << passes;
1235 SmallVector<InputSection *, 0> storage;
1236 for (OutputSection *osec : ctx.outputSections) {
1237 if (!(osec->flags & SHF_EXECINSTR))
1238 continue;
1239 for (InputSection *sec : getInputSections(os: *osec, storage)) {
1240 if (!sec->relaxAux)
1241 continue;
1242 RelaxAux &aux = *sec->relaxAux;
1243 if (!aux.relocDeltas)
1244 continue;
1245
1246 MutableArrayRef<Relocation> rels = sec->relocs();
1247 ArrayRef<uint8_t> old = sec->content();
1248 size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
1249 size_t writesIdx = 0;
1250 uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(Num: newSize);
1251 uint64_t offset = 0;
1252 int64_t delta = 0;
1253 sec->content_ = p;
1254 sec->size = newSize;
1255 sec->bytesDropped = 0;
1256
1257 // Update section content: remove NOPs for R_RISCV_ALIGN and rewrite
1258 // instructions for relaxed relocations.
1259 for (size_t i = 0, e = rels.size(); i != e; ++i) {
1260 uint32_t remove = aux.relocDeltas[i] - delta;
1261 delta = aux.relocDeltas[i];
1262 if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE)
1263 continue;
1264
1265 // Copy from last location to the current relocated location.
1266 const Relocation &r = rels[i];
1267 uint64_t size = r.offset - offset;
1268 memcpy(dest: p, src: old.data() + offset, n: size);
1269 p += size;
1270
1271 // For R_RISCV_ALIGN, we will place `offset` in a location (among NOPs)
1272 // to satisfy the alignment requirement. If both `remove` and r.addend
1273 // are multiples of 4, it is as if we have skipped some NOPs. Otherwise
1274 // we are in the middle of a 4-byte NOP, and we need to rewrite the NOP
1275 // sequence.
1276 int64_t skip = 0;
1277 if (r.type == R_RISCV_ALIGN) {
1278 if (remove % 4 || r.addend % 4) {
1279 skip = r.addend - remove;
1280 int64_t j = 0;
1281 for (; j + 4 <= skip; j += 4)
1282 write32le(P: p + j, V: 0x00000013); // nop
1283 if (j != skip) {
1284 assert(j + 2 == skip);
1285 write16le(P: p + j, V: 0x0001); // c.nop
1286 }
1287 }
1288 } else if (RelType newType = aux.relocTypes[i]) {
1289 switch (newType) {
1290 case INTERNAL_R_RISCV_GPREL_I:
1291 case INTERNAL_R_RISCV_GPREL_S:
1292 case INTERNAL_R_RISCV_X0REL_I:
1293 case INTERNAL_R_RISCV_X0REL_S:
1294 break;
1295 case R_RISCV_RELAX:
1296 // Used by relaxTlsLe to indicate the relocation is ignored.
1297 break;
1298 case R_RISCV_RVC_JUMP:
1299 skip = 2;
1300 write16le(P: p, V: aux.writes[writesIdx++]);
1301 break;
1302 case R_RISCV_JAL:
1303 skip = 4;
1304 write32le(P: p, V: aux.writes[writesIdx++]);
1305 break;
1306 case R_RISCV_32:
1307 // Used by relaxTlsLe to write a uint32_t then suppress the handling
1308 // in relocateAlloc.
1309 skip = 4;
1310 write32le(P: p, V: aux.writes[writesIdx++]);
1311 aux.relocTypes[i] = R_RISCV_NONE;
1312 break;
1313 default:
1314 llvm_unreachable("unsupported type");
1315 }
1316 }
1317
1318 p += skip;
1319 offset = r.offset + skip + remove;
1320 }
1321 memcpy(dest: p, src: old.data() + offset, n: old.size() - offset);
1322
1323 // Subtract the previous relocDeltas value from the relocation offset.
1324 // For a pair of R_RISCV_CALL/R_RISCV_RELAX with the same offset, decrease
1325 // their r_offset by the same delta.
1326 delta = 0;
1327 for (size_t i = 0, e = rels.size(); i != e;) {
1328 uint64_t cur = rels[i].offset;
1329 do {
1330 rels[i].offset -= delta;
1331 if (aux.relocTypes[i] != R_RISCV_NONE)
1332 rels[i].type = aux.relocTypes[i];
1333 } while (++i != e && rels[i].offset == cur);
1334 delta = aux.relocDeltas[i - 1];
1335 }
1336 }
1337 }
1338}
1339
1340namespace {
1341// Representation of the merged .riscv.attributes input sections. The psABI
1342// specifies merge policy for attributes. E.g. if we link an object without an
1343// extension with an object with the extension, the output Tag_RISCV_arch shall
1344// contain the extension. Some tools like objdump parse .riscv.attributes and
1345// disabling some instructions if the first Tag_RISCV_arch does not contain an
1346// extension.
1347class RISCVAttributesSection final : public SyntheticSection {
1348public:
1349 RISCVAttributesSection(Ctx &ctx)
1350 : SyntheticSection(ctx, ".riscv.attributes", SHT_RISCV_ATTRIBUTES, 0, 1) {
1351 }
1352
1353 size_t getSize() const override { return size; }
1354 void writeTo(uint8_t *buf) override;
1355
1356 static constexpr StringRef vendor = "riscv";
1357 DenseMap<unsigned, unsigned> intAttr;
1358 DenseMap<unsigned, StringRef> strAttr;
1359 size_t size = 0;
1360};
1361} // namespace
1362
1363static void mergeArch(Ctx &ctx, RISCVISAUtils::OrderedExtensionMap &mergedExts,
1364 unsigned &mergedXlen, const InputSectionBase *sec,
1365 StringRef s) {
1366 auto maybeInfo = RISCVISAInfo::parseNormalizedArchString(Arch: s);
1367 if (!maybeInfo) {
1368 Err(ctx) << sec << ": " << s << ": " << maybeInfo.takeError();
1369 return;
1370 }
1371
1372 // Merge extensions.
1373 RISCVISAInfo &info = **maybeInfo;
1374 if (mergedExts.empty()) {
1375 mergedExts = info.getExtensions();
1376 mergedXlen = info.getXLen();
1377 } else {
1378 for (const auto &ext : info.getExtensions()) {
1379 auto p = mergedExts.insert(x: ext);
1380 if (!p.second) {
1381 if (std::tie(args&: p.first->second.Major, args&: p.first->second.Minor) <
1382 std::tie(args: ext.second.Major, args: ext.second.Minor))
1383 p.first->second = ext.second;
1384 }
1385 }
1386 }
1387}
1388
1389static void mergeAtomic(Ctx &ctx, DenseMap<unsigned, unsigned>::iterator it,
1390 const InputSectionBase *oldSection,
1391 const InputSectionBase *newSection,
1392 RISCVAttrs::RISCVAtomicAbiTag oldTag,
1393 RISCVAttrs::RISCVAtomicAbiTag newTag) {
1394 using RISCVAttrs::RISCVAtomicAbiTag;
1395 // Same tags stay the same, and UNKNOWN is compatible with anything
1396 if (oldTag == newTag || newTag == RISCVAtomicAbiTag::UNKNOWN)
1397 return;
1398
1399 auto reportAbiError = [&]() {
1400 Err(ctx) << "atomic abi mismatch for " << oldSection->name << "\n>>> "
1401 << oldSection << ": atomic_abi=" << static_cast<unsigned>(oldTag)
1402 << "\n>>> " << newSection
1403 << ": atomic_abi=" << static_cast<unsigned>(newTag);
1404 };
1405
1406 auto reportUnknownAbiError = [&](const InputSectionBase *section,
1407 RISCVAtomicAbiTag tag) {
1408 switch (tag) {
1409 case RISCVAtomicAbiTag::UNKNOWN:
1410 case RISCVAtomicAbiTag::A6C:
1411 case RISCVAtomicAbiTag::A6S:
1412 case RISCVAtomicAbiTag::A7:
1413 return;
1414 };
1415 Err(ctx) << "unknown atomic abi for " << section->name << "\n>>> "
1416 << section << ": atomic_abi=" << static_cast<unsigned>(tag);
1417 };
1418 switch (oldTag) {
1419 case RISCVAtomicAbiTag::UNKNOWN:
1420 it->getSecond() = static_cast<unsigned>(newTag);
1421 return;
1422 case RISCVAtomicAbiTag::A6C:
1423 switch (newTag) {
1424 case RISCVAtomicAbiTag::A6S:
1425 it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A6C);
1426 return;
1427 case RISCVAtomicAbiTag::A7:
1428 reportAbiError();
1429 return;
1430 case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
1431 case RISCVAttrs::RISCVAtomicAbiTag::A6C:
1432 return;
1433 };
1434 break;
1435
1436 case RISCVAtomicAbiTag::A6S:
1437 switch (newTag) {
1438 case RISCVAtomicAbiTag::A6C:
1439 it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A6C);
1440 return;
1441 case RISCVAtomicAbiTag::A7:
1442 it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A7);
1443 return;
1444 case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
1445 case RISCVAttrs::RISCVAtomicAbiTag::A6S:
1446 return;
1447 };
1448 break;
1449
1450 case RISCVAtomicAbiTag::A7:
1451 switch (newTag) {
1452 case RISCVAtomicAbiTag::A6S:
1453 it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A7);
1454 return;
1455 case RISCVAtomicAbiTag::A6C:
1456 reportAbiError();
1457 return;
1458 case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
1459 case RISCVAttrs::RISCVAtomicAbiTag::A7:
1460 return;
1461 };
1462 break;
1463 };
1464
1465 // If we get here, then we have an invalid tag, so report it.
1466 // Putting these checks at the end allows us to only do these checks when we
1467 // need to, since this is expected to be a rare occurrence.
1468 reportUnknownAbiError(oldSection, oldTag);
1469 reportUnknownAbiError(newSection, newTag);
1470}
1471
1472static RISCVAttributesSection *
1473mergeAttributesSection(Ctx &ctx,
1474 const SmallVector<InputSectionBase *, 0> &sections) {
1475 using RISCVAttrs::RISCVAtomicAbiTag;
1476 RISCVISAUtils::OrderedExtensionMap exts;
1477 const InputSectionBase *firstStackAlign = nullptr;
1478 const InputSectionBase *firstAtomicAbi = nullptr;
1479 unsigned firstStackAlignValue = 0, xlen = 0;
1480 bool hasArch = false;
1481
1482 ctx.in.riscvAttributes = std::make_unique<RISCVAttributesSection>(args&: ctx);
1483 auto &merged = static_cast<RISCVAttributesSection &>(*ctx.in.riscvAttributes);
1484
1485 // Collect all tags values from attributes section.
1486 const auto &attributesTags = RISCVAttrs::getRISCVAttributeTags();
1487 for (const InputSectionBase *sec : sections) {
1488 RISCVAttributeParser parser;
1489 if (Error e = parser.parse(section: sec->content(), endian: llvm::endianness::little))
1490 Warn(ctx) << sec << ": " << std::move(e);
1491 for (const auto &tag : attributesTags) {
1492 switch (RISCVAttrs::AttrType(tag.attr)) {
1493 // Integer attributes.
1494 case RISCVAttrs::STACK_ALIGN:
1495 if (auto i = parser.getAttributeValue(tag: tag.attr)) {
1496 auto r = merged.intAttr.try_emplace(Key: tag.attr, Args&: *i);
1497 if (r.second) {
1498 firstStackAlign = sec;
1499 firstStackAlignValue = *i;
1500 } else if (r.first->second != *i) {
1501 Err(ctx) << sec << " has stack_align=" << *i << " but "
1502 << firstStackAlign
1503 << " has stack_align=" << firstStackAlignValue;
1504 }
1505 }
1506 continue;
1507 case RISCVAttrs::UNALIGNED_ACCESS:
1508 if (auto i = parser.getAttributeValue(tag: tag.attr))
1509 merged.intAttr[tag.attr] |= *i;
1510 continue;
1511
1512 // String attributes.
1513 case RISCVAttrs::ARCH:
1514 if (auto s = parser.getAttributeString(tag: tag.attr)) {
1515 hasArch = true;
1516 mergeArch(ctx, mergedExts&: exts, mergedXlen&: xlen, sec, s: *s);
1517 }
1518 continue;
1519
1520 // Attributes which use the default handling.
1521 case RISCVAttrs::PRIV_SPEC:
1522 case RISCVAttrs::PRIV_SPEC_MINOR:
1523 case RISCVAttrs::PRIV_SPEC_REVISION:
1524 break;
1525
1526 case RISCVAttrs::AttrType::ATOMIC_ABI:
1527 if (auto i = parser.getAttributeValue(tag: tag.attr)) {
1528 auto r = merged.intAttr.try_emplace(Key: tag.attr, Args&: *i);
1529 if (r.second)
1530 firstAtomicAbi = sec;
1531 else
1532 mergeAtomic(ctx, it: r.first, oldSection: firstAtomicAbi, newSection: sec,
1533 oldTag: static_cast<RISCVAtomicAbiTag>(r.first->getSecond()),
1534 newTag: static_cast<RISCVAtomicAbiTag>(*i));
1535 }
1536 continue;
1537 }
1538
1539 // Fallback for deprecated priv_spec* and other unknown attributes: retain
1540 // the attribute if all input sections agree on the value. GNU ld uses 0
1541 // and empty strings as default values which are not dumped to the output.
1542 // TODO Adjust after resolution to
1543 // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/352
1544 if (tag.attr % 2 == 0) {
1545 if (auto i = parser.getAttributeValue(tag: tag.attr)) {
1546 auto r = merged.intAttr.try_emplace(Key: tag.attr, Args&: *i);
1547 if (!r.second && r.first->second != *i)
1548 r.first->second = 0;
1549 }
1550 } else if (auto s = parser.getAttributeString(tag: tag.attr)) {
1551 auto r = merged.strAttr.try_emplace(Key: tag.attr, Args&: *s);
1552 if (!r.second && r.first->second != *s)
1553 r.first->second = {};
1554 }
1555 }
1556 }
1557
1558 if (hasArch && xlen != 0) {
1559 if (auto result = RISCVISAInfo::createFromExtMap(XLen: xlen, Exts: exts)) {
1560 merged.strAttr.try_emplace(Key: RISCVAttrs::ARCH,
1561 Args: ctx.saver.save(S: (*result)->toString()));
1562 } else {
1563 Err(ctx) << result.takeError();
1564 }
1565 }
1566
1567 // The total size of headers: format-version [ <section-length> "vendor-name"
1568 // [ <file-tag> <size>.
1569 size_t size = 5 + merged.vendor.size() + 1 + 5;
1570 for (auto &attr : merged.intAttr)
1571 if (attr.second != 0)
1572 size += getULEB128Size(Value: attr.first) + getULEB128Size(Value: attr.second);
1573 for (auto &attr : merged.strAttr)
1574 if (!attr.second.empty())
1575 size += getULEB128Size(Value: attr.first) + attr.second.size() + 1;
1576 merged.size = size;
1577 return &merged;
1578}
1579
1580void RISCVAttributesSection::writeTo(uint8_t *buf) {
1581 const size_t size = getSize();
1582 uint8_t *const end = buf + size;
1583 *buf = ELFAttrs::Format_Version;
1584 write32(ctx, p: buf + 1, v: size - 1);
1585 buf += 5;
1586
1587 memcpy(dest: buf, src: vendor.data(), n: vendor.size());
1588 buf += vendor.size() + 1;
1589
1590 *buf = ELFAttrs::File;
1591 write32(ctx, p: buf + 1, v: end - buf);
1592 buf += 5;
1593
1594 for (auto &attr : intAttr) {
1595 if (attr.second == 0)
1596 continue;
1597 buf += encodeULEB128(Value: attr.first, p: buf);
1598 buf += encodeULEB128(Value: attr.second, p: buf);
1599 }
1600 for (auto &attr : strAttr) {
1601 if (attr.second.empty())
1602 continue;
1603 buf += encodeULEB128(Value: attr.first, p: buf);
1604 memcpy(dest: buf, src: attr.second.data(), n: attr.second.size());
1605 buf += attr.second.size() + 1;
1606 }
1607}
1608
1609void elf::mergeRISCVAttributesSections(Ctx &ctx) {
1610 // Find the first input SHT_RISCV_ATTRIBUTES; return if not found.
1611 size_t place =
1612 llvm::find_if(Range&: ctx.inputSections,
1613 P: [](auto *s) { return s->type == SHT_RISCV_ATTRIBUTES; }) -
1614 ctx.inputSections.begin();
1615 if (place == ctx.inputSections.size())
1616 return;
1617
1618 // Extract all SHT_RISCV_ATTRIBUTES sections into `sections`.
1619 SmallVector<InputSectionBase *, 0> sections;
1620 llvm::erase_if(C&: ctx.inputSections, P: [&](InputSectionBase *s) {
1621 if (s->type != SHT_RISCV_ATTRIBUTES)
1622 return false;
1623 sections.push_back(Elt: s);
1624 return true;
1625 });
1626
1627 // Add the merged section.
1628 ctx.inputSections.insert(I: ctx.inputSections.begin() + place,
1629 Elt: mergeAttributesSection(ctx, sections));
1630}
1631
1632void elf::setRISCVTargetInfo(Ctx &ctx) { ctx.target.reset(p: new RISCV(ctx)); }
1633