1//===- InputChunks.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputChunks.h"
10#include "Config.h"
11#include "OutputSegment.h"
12#include "WriterUtils.h"
13#include "lld/Common/ErrorHandler.h"
14#include "lld/Common/LLVM.h"
15#include "llvm/Support/LEB128.h"
16#include "llvm/Support/xxhash.h"
17
18#define DEBUG_TYPE "lld"
19
20using namespace llvm;
21using namespace llvm::wasm;
22using namespace llvm::support::endian;
23
24namespace lld {
25StringRef relocTypeToString(uint8_t relocType) {
26 switch (relocType) {
27#define WASM_RELOC(NAME, REL) \
28 case REL: \
29 return #NAME;
30#include "llvm/BinaryFormat/WasmRelocs.def"
31#undef WASM_RELOC
32 }
33 llvm_unreachable("unknown reloc type");
34}
35
36bool relocIs64(uint8_t relocType) {
37 switch (relocType) {
38 case R_WASM_MEMORY_ADDR_LEB64:
39 case R_WASM_MEMORY_ADDR_SLEB64:
40 case R_WASM_MEMORY_ADDR_REL_SLEB64:
41 case R_WASM_MEMORY_ADDR_I64:
42 case R_WASM_TABLE_INDEX_SLEB64:
43 case R_WASM_TABLE_INDEX_I64:
44 case R_WASM_FUNCTION_OFFSET_I64:
45 case R_WASM_TABLE_INDEX_REL_SLEB64:
46 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
47 return true;
48 default:
49 return false;
50 }
51}
52
53std::string toString(const wasm::InputChunk *c) {
54 return (toString(file: c->file) + ":(" + c->name + ")").str();
55}
56
57namespace wasm {
58StringRef InputChunk::getComdatName() const {
59 uint32_t index = getComdat();
60 if (index == UINT32_MAX)
61 return StringRef();
62 return file->getWasmObj()->linkingData().Comdats[index];
63}
64
65uint32_t InputChunk::getSize() const {
66 if (const auto *ms = dyn_cast<SyntheticMergedChunk>(Val: this))
67 return ms->builder.getSize();
68
69 if (const auto *f = dyn_cast<InputFunction>(Val: this)) {
70 if (config->compressRelocations && f->file) {
71 return f->getCompressedSize();
72 }
73 }
74
75 return data().size();
76}
77
78uint32_t InputChunk::getInputSize() const {
79 if (const auto *f = dyn_cast<InputFunction>(Val: this))
80 return f->function->Size;
81 return getSize();
82}
83
84// Copy this input chunk to an mmap'ed output file and apply relocations.
85void InputChunk::writeTo(uint8_t *buf) const {
86 if (const auto *f = dyn_cast<InputFunction>(Val: this)) {
87 if (file && config->compressRelocations)
88 return f->writeCompressed(buf);
89 } else if (const auto *ms = dyn_cast<SyntheticMergedChunk>(Val: this)) {
90 ms->builder.write(Buf: buf + outSecOff);
91 // Apply relocations
92 ms->relocate(buf: buf + outSecOff);
93 return;
94 }
95
96 // Copy contents
97 memcpy(dest: buf + outSecOff, src: data().data(), n: data().size());
98
99 // Apply relocations
100 relocate(buf: buf + outSecOff);
101}
102
103void InputChunk::relocate(uint8_t *buf) const {
104 if (relocations.empty())
105 return;
106
107 LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
108 << " count=" << relocations.size() << "\n");
109 int32_t inputSectionOffset = getInputSectionOffset();
110 uint64_t tombstone = getTombstone();
111
112 for (const WasmRelocation &rel : relocations) {
113 uint8_t *loc = buf + rel.Offset - inputSectionOffset;
114 LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type));
115 if (rel.Type != R_WASM_TYPE_INDEX_LEB)
116 LLVM_DEBUG(dbgs() << " sym=" << file->getSymbols()[rel.Index]->getName());
117 LLVM_DEBUG(dbgs() << " addend=" << rel.Addend << " index=" << rel.Index
118 << " offset=" << rel.Offset << "\n");
119 // TODO(sbc): Check that the value is within the range of the
120 // relocation type below. Most likely we must error out here
121 // if its not with range.
122 uint64_t value = file->calcNewValue(reloc: rel, tombstone, chunk: this);
123
124 switch (rel.Type) {
125 case R_WASM_TYPE_INDEX_LEB:
126 case R_WASM_FUNCTION_INDEX_LEB:
127 case R_WASM_GLOBAL_INDEX_LEB:
128 case R_WASM_TAG_INDEX_LEB:
129 case R_WASM_MEMORY_ADDR_LEB:
130 case R_WASM_TABLE_NUMBER_LEB:
131 encodeULEB128(Value: static_cast<uint32_t>(value), p: loc, PadTo: 5);
132 break;
133 case R_WASM_MEMORY_ADDR_LEB64:
134 encodeULEB128(Value: value, p: loc, PadTo: 10);
135 break;
136 case R_WASM_TABLE_INDEX_SLEB:
137 case R_WASM_TABLE_INDEX_REL_SLEB:
138 case R_WASM_MEMORY_ADDR_SLEB:
139 case R_WASM_MEMORY_ADDR_REL_SLEB:
140 case R_WASM_MEMORY_ADDR_TLS_SLEB:
141 encodeSLEB128(Value: static_cast<int32_t>(value), p: loc, PadTo: 5);
142 break;
143 case R_WASM_TABLE_INDEX_SLEB64:
144 case R_WASM_TABLE_INDEX_REL_SLEB64:
145 case R_WASM_MEMORY_ADDR_SLEB64:
146 case R_WASM_MEMORY_ADDR_REL_SLEB64:
147 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
148 encodeSLEB128(Value: static_cast<int64_t>(value), p: loc, PadTo: 10);
149 break;
150 case R_WASM_TABLE_INDEX_I32:
151 case R_WASM_MEMORY_ADDR_I32:
152 case R_WASM_FUNCTION_OFFSET_I32:
153 case R_WASM_FUNCTION_INDEX_I32:
154 case R_WASM_SECTION_OFFSET_I32:
155 case R_WASM_GLOBAL_INDEX_I32:
156 case R_WASM_MEMORY_ADDR_LOCREL_I32:
157 write32le(P: loc, V: value);
158 break;
159 case R_WASM_TABLE_INDEX_I64:
160 case R_WASM_MEMORY_ADDR_I64:
161 case R_WASM_FUNCTION_OFFSET_I64:
162 write64le(P: loc, V: value);
163 break;
164 default:
165 llvm_unreachable("unknown relocation type");
166 }
167 }
168}
169
170// Copy relocation entries to a given output stream.
171// This function is used only when a user passes "-r". For a regular link,
172// we consume relocations instead of copying them to an output file.
173void InputChunk::writeRelocations(raw_ostream &os) const {
174 if (relocations.empty())
175 return;
176
177 int32_t off = outSecOff - getInputSectionOffset();
178 LLVM_DEBUG(dbgs() << "writeRelocations: " << file->getName()
179 << " offset=" << Twine(off) << "\n");
180
181 for (const WasmRelocation &rel : relocations) {
182 writeUleb128(os, number: rel.Type, msg: "reloc type");
183 writeUleb128(os, number: rel.Offset + off, msg: "reloc offset");
184 writeUleb128(os, number: file->calcNewIndex(reloc: rel), msg: "reloc index");
185
186 if (relocTypeHasAddend(type: rel.Type))
187 writeSleb128(os, number: file->calcNewAddend(reloc: rel), msg: "reloc addend");
188 }
189}
190
191uint64_t InputChunk::getTombstone() const {
192 if (const auto *s = dyn_cast<InputSection>(Val: this)) {
193 return s->tombstoneValue;
194 }
195
196 return 0;
197}
198
199void InputFunction::setFunctionIndex(uint32_t index) {
200 LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << name << " -> "
201 << index << "\n");
202 assert(!hasFunctionIndex());
203 functionIndex = index;
204}
205
206void InputFunction::setTableIndex(uint32_t index) {
207 LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << name << " -> "
208 << index << "\n");
209 assert(!hasTableIndex());
210 tableIndex = index;
211}
212
213// Write a relocation value without padding and return the number of bytes
214// witten.
215static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel,
216 uint64_t value) {
217 switch (rel.Type) {
218 case R_WASM_TYPE_INDEX_LEB:
219 case R_WASM_FUNCTION_INDEX_LEB:
220 case R_WASM_GLOBAL_INDEX_LEB:
221 case R_WASM_TAG_INDEX_LEB:
222 case R_WASM_MEMORY_ADDR_LEB:
223 case R_WASM_MEMORY_ADDR_LEB64:
224 case R_WASM_TABLE_NUMBER_LEB:
225 return encodeULEB128(Value: value, p: buf);
226 case R_WASM_TABLE_INDEX_SLEB:
227 case R_WASM_TABLE_INDEX_SLEB64:
228 case R_WASM_MEMORY_ADDR_SLEB:
229 case R_WASM_MEMORY_ADDR_SLEB64:
230 return encodeSLEB128(Value: static_cast<int64_t>(value), p: buf);
231 default:
232 llvm_unreachable("unexpected relocation type");
233 }
234}
235
236static unsigned getRelocWidthPadded(const WasmRelocation &rel) {
237 switch (rel.Type) {
238 case R_WASM_TYPE_INDEX_LEB:
239 case R_WASM_FUNCTION_INDEX_LEB:
240 case R_WASM_GLOBAL_INDEX_LEB:
241 case R_WASM_TAG_INDEX_LEB:
242 case R_WASM_MEMORY_ADDR_LEB:
243 case R_WASM_TABLE_NUMBER_LEB:
244 case R_WASM_TABLE_INDEX_SLEB:
245 case R_WASM_MEMORY_ADDR_SLEB:
246 return 5;
247 case R_WASM_TABLE_INDEX_SLEB64:
248 case R_WASM_MEMORY_ADDR_LEB64:
249 case R_WASM_MEMORY_ADDR_SLEB64:
250 return 10;
251 default:
252 llvm_unreachable("unexpected relocation type");
253 }
254}
255
256static unsigned getRelocWidth(const WasmRelocation &rel, uint64_t value) {
257 uint8_t buf[10];
258 return writeCompressedReloc(buf, rel, value);
259}
260
261// Relocations of type LEB and SLEB in the code section are padded to 5 bytes
262// so that a fast linker can blindly overwrite them without needing to worry
263// about the number of bytes needed to encode the values.
264// However, for optimal output the code section can be compressed to remove
265// the padding then outputting non-relocatable files.
266// In this case we need to perform a size calculation based on the value at each
267// relocation. At best we end up saving 4 bytes for each relocation entry.
268//
269// This function only computes the final output size. It must be called
270// before getSize() is used to calculate of layout of the code section.
271void InputFunction::calculateSize() {
272 if (!file || !config->compressRelocations)
273 return;
274
275 LLVM_DEBUG(dbgs() << "calculateSize: " << name << "\n");
276
277 const uint8_t *secStart = file->codeSection->Content.data();
278 const uint8_t *funcStart = secStart + getInputSectionOffset();
279 uint32_t functionSizeLength;
280 decodeULEB128(p: funcStart, n: &functionSizeLength);
281
282 uint32_t start = getInputSectionOffset();
283 uint32_t end = start + function->Size;
284
285 uint64_t tombstone = getTombstone();
286
287 uint32_t lastRelocEnd = start + functionSizeLength;
288 for (const WasmRelocation &rel : relocations) {
289 LLVM_DEBUG(dbgs() << " region: " << (rel.Offset - lastRelocEnd) << "\n");
290 compressedFuncSize += rel.Offset - lastRelocEnd;
291 compressedFuncSize +=
292 getRelocWidth(rel, value: file->calcNewValue(reloc: rel, tombstone, chunk: this));
293 lastRelocEnd = rel.Offset + getRelocWidthPadded(rel);
294 }
295 LLVM_DEBUG(dbgs() << " final region: " << (end - lastRelocEnd) << "\n");
296 compressedFuncSize += end - lastRelocEnd;
297
298 // Now we know how long the resulting function is we can add the encoding
299 // of its length
300 uint8_t buf[5];
301 compressedSize = compressedFuncSize + encodeULEB128(Value: compressedFuncSize, p: buf);
302
303 LLVM_DEBUG(dbgs() << " calculateSize orig: " << function->Size << "\n");
304 LLVM_DEBUG(dbgs() << " calculateSize new: " << compressedSize << "\n");
305}
306
307// Override the default writeTo method so that we can (optionally) write the
308// compressed version of the function.
309void InputFunction::writeCompressed(uint8_t *buf) const {
310 buf += outSecOff;
311 uint8_t *orig = buf;
312 (void)orig;
313
314 const uint8_t *secStart = file->codeSection->Content.data();
315 const uint8_t *funcStart = secStart + getInputSectionOffset();
316 const uint8_t *end = funcStart + function->Size;
317 uint64_t tombstone = getTombstone();
318 uint32_t count;
319 decodeULEB128(p: funcStart, n: &count);
320 funcStart += count;
321
322 LLVM_DEBUG(dbgs() << "write func: " << name << "\n");
323 buf += encodeULEB128(Value: compressedFuncSize, p: buf);
324 const uint8_t *lastRelocEnd = funcStart;
325 for (const WasmRelocation &rel : relocations) {
326 unsigned chunkSize = (secStart + rel.Offset) - lastRelocEnd;
327 LLVM_DEBUG(dbgs() << " write chunk: " << chunkSize << "\n");
328 memcpy(dest: buf, src: lastRelocEnd, n: chunkSize);
329 buf += chunkSize;
330 buf += writeCompressedReloc(buf, rel,
331 value: file->calcNewValue(reloc: rel, tombstone, chunk: this));
332 lastRelocEnd = secStart + rel.Offset + getRelocWidthPadded(rel);
333 }
334
335 unsigned chunkSize = end - lastRelocEnd;
336 LLVM_DEBUG(dbgs() << " write final chunk: " << chunkSize << "\n");
337 memcpy(dest: buf, src: lastRelocEnd, n: chunkSize);
338 LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
339}
340
341uint64_t InputChunk::getChunkOffset(uint64_t offset) const {
342 if (const auto *ms = dyn_cast<MergeInputChunk>(Val: this)) {
343 LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << name << "\n");
344 LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
345 LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
346 << "\n");
347 assert(ms->parent);
348 return ms->parent->getChunkOffset(offset: ms->getParentOffset(offset));
349 }
350 return outputSegmentOffset + offset;
351}
352
353uint64_t InputChunk::getOffset(uint64_t offset) const {
354 return outSecOff + getChunkOffset(offset);
355}
356
357uint64_t InputChunk::getVA(uint64_t offset) const {
358 return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset);
359}
360
361// Generate code to apply relocations to the data section at runtime.
362// This is only called when generating shared libraries (PIC) where address are
363// not known at static link time.
364void InputChunk::generateRelocationCode(raw_ostream &os) const {
365 LLVM_DEBUG(dbgs() << "generating runtime relocations: " << name
366 << " count=" << relocations.size() << "\n");
367
368 bool is64 = config->is64.value_or(u: false);
369 unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
370 : WASM_OPCODE_I32_CONST;
371 unsigned opcode_ptr_add = is64 ? WASM_OPCODE_I64_ADD
372 : WASM_OPCODE_I32_ADD;
373
374 uint64_t tombstone = getTombstone();
375 // TODO(sbc): Encode the relocations in the data section and write a loop
376 // here to apply them.
377 for (const WasmRelocation &rel : relocations) {
378 uint64_t offset = getVA(offset: rel.Offset) - getInputSectionOffset();
379
380 Symbol *sym = file->getSymbol(reloc: rel);
381 if (!ctx.isPic && sym->isDefined())
382 continue;
383
384 LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
385 << " addend=" << rel.Addend << " index=" << rel.Index
386 << " output offset=" << offset << "\n");
387
388 // Calculate the address at which to apply the relocation
389 writeU8(os, byte: opcode_ptr_const, msg: "CONST");
390 writeSleb128(os, number: offset, msg: "offset");
391
392 // In PIC mode we need to add the __memory_base
393 if (ctx.isPic) {
394 writeU8(os, byte: WASM_OPCODE_GLOBAL_GET, msg: "GLOBAL_GET");
395 if (isTLS())
396 writeUleb128(os, number: WasmSym::tlsBase->getGlobalIndex(), msg: "tls_base");
397 else
398 writeUleb128(os, number: WasmSym::memoryBase->getGlobalIndex(), msg: "memory_base");
399 writeU8(os, byte: opcode_ptr_add, msg: "ADD");
400 }
401
402 // Now figure out what we want to store at this location
403 bool is64 = relocIs64(relocType: rel.Type);
404 unsigned opcode_reloc_const =
405 is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
406 unsigned opcode_reloc_add =
407 is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
408 unsigned opcode_reloc_store =
409 is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE;
410
411 if (sym->hasGOTIndex()) {
412 writeU8(os, byte: WASM_OPCODE_GLOBAL_GET, msg: "GLOBAL_GET");
413 writeUleb128(os, number: sym->getGOTIndex(), msg: "global index");
414 if (rel.Addend) {
415 writeU8(os, byte: opcode_reloc_const, msg: "CONST");
416 writeSleb128(os, number: rel.Addend, msg: "addend");
417 writeU8(os, byte: opcode_reloc_add, msg: "ADD");
418 }
419 } else {
420 assert(ctx.isPic);
421 const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
422 if (rel.Type == R_WASM_TABLE_INDEX_I32 ||
423 rel.Type == R_WASM_TABLE_INDEX_I64)
424 baseSymbol = WasmSym::tableBase;
425 else if (sym->isTLS())
426 baseSymbol = WasmSym::tlsBase;
427 writeU8(os, byte: WASM_OPCODE_GLOBAL_GET, msg: "GLOBAL_GET");
428 writeUleb128(os, number: baseSymbol->getGlobalIndex(), msg: "base");
429 writeU8(os, byte: opcode_reloc_const, msg: "CONST");
430 writeSleb128(os, number: file->calcNewValue(reloc: rel, tombstone, chunk: this), msg: "offset");
431 writeU8(os, byte: opcode_reloc_add, msg: "ADD");
432 }
433
434 // Store that value at the virtual address
435 writeU8(os, byte: opcode_reloc_store, msg: "I32_STORE");
436 writeUleb128(os, number: 2, msg: "align");
437 writeUleb128(os, number: 0, msg: "offset");
438 }
439}
440
441// Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
442// null-terminated strings.
443void MergeInputChunk::splitStrings(ArrayRef<uint8_t> data) {
444 LLVM_DEBUG(llvm::dbgs() << "splitStrings\n");
445 size_t off = 0;
446 StringRef s = toStringRef(Input: data);
447
448 while (!s.empty()) {
449 size_t end = s.find(C: 0);
450 if (end == StringRef::npos)
451 fatal(msg: toString(c: this) + ": string is not null terminated");
452 size_t size = end + 1;
453
454 pieces.emplace_back(args&: off, args: xxh3_64bits(data: s.substr(Start: 0, N: size)), args: true);
455 s = s.substr(Start: size);
456 off += size;
457 }
458}
459
460// This function is called after we obtain a complete list of input sections
461// that need to be linked. This is responsible to split section contents
462// into small chunks for further processing.
463//
464// Note that this function is called from parallelForEach. This must be
465// thread-safe (i.e. no memory allocation from the pools).
466void MergeInputChunk::splitIntoPieces() {
467 assert(pieces.empty());
468 // As of now we only support WASM_SEG_FLAG_STRINGS but in the future we
469 // could add other types of splitting (see ELF's splitIntoPieces).
470 assert(flags & WASM_SEG_FLAG_STRINGS);
471 splitStrings(data: data());
472}
473
474SectionPiece *MergeInputChunk::getSectionPiece(uint64_t offset) {
475 if (this->data().size() <= offset)
476 fatal(msg: toString(c: this) + ": offset is outside the section");
477
478 // If Offset is not at beginning of a section piece, it is not in the map.
479 // In that case we need to do a binary search of the original section piece
480 // vector.
481 auto it = partition_point(
482 Range&: pieces, P: [=](SectionPiece p) { return p.inputOff <= offset; });
483 return &it[-1];
484}
485
486// Returns the offset in an output section for a given input offset.
487// Because contents of a mergeable section is not contiguous in output,
488// it is not just an addition to a base output offset.
489uint64_t MergeInputChunk::getParentOffset(uint64_t offset) const {
490 // If Offset is not at beginning of a section piece, it is not in the map.
491 // In that case we need to search from the original section piece vector.
492 const SectionPiece *piece = getSectionPiece(offset);
493 uint64_t addend = offset - piece->inputOff;
494 return piece->outputOff + addend;
495}
496
497void SyntheticMergedChunk::finalizeContents() {
498 // Add all string pieces to the string table builder to create section
499 // contents.
500 for (MergeInputChunk *sec : chunks)
501 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
502 if (sec->pieces[i].live)
503 builder.add(S: sec->getData(i));
504
505 // Fix the string table content. After this, the contents will never change.
506 builder.finalize();
507
508 // finalize() fixed tail-optimized strings, so we can now get
509 // offsets of strings. Get an offset for each string and save it
510 // to a corresponding SectionPiece for easy access.
511 for (MergeInputChunk *sec : chunks)
512 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
513 if (sec->pieces[i].live)
514 sec->pieces[i].outputOff = builder.getOffset(S: sec->getData(i));
515}
516
517uint64_t InputSection::getTombstoneForSection(StringRef name) {
518 // When a function is not live we need to update relocations referring to it.
519 // If they occur in DWARF debug symbols, we want to change the pc of the
520 // function to -1 to avoid overlapping with a valid range. However for the
521 // debug_ranges and debug_loc sections that would conflict with the existing
522 // meaning of -1 so we use -2.
523 if (name == ".debug_ranges" || name == ".debug_loc")
524 return UINT64_C(-2);
525 if (name.starts_with(Prefix: ".debug_"))
526 return UINT64_C(-1);
527 // If the function occurs in an function attribute section change it to -1 since
528 // 0 is a valid function index.
529 if (name.starts_with(Prefix: "llvm.func_attr."))
530 return UINT64_C(-1);
531 // Returning 0 means there is no tombstone value for this section, and relocation
532 // will just use the addend.
533 return 0;
534}
535
536} // namespace wasm
537} // namespace lld
538