1//===- Chunks.h -------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_CHUNKS_H
10#define LLD_COFF_CHUNKS_H
11
12#include "Config.h"
13#include "InputFiles.h"
14#include "lld/Common/LLVM.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/PointerIntPair.h"
17#include "llvm/ADT/iterator.h"
18#include "llvm/ADT/iterator_range.h"
19#include "llvm/MC/StringTableBuilder.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Object/WindowsMachineFlag.h"
22#include <utility>
23#include <vector>
24
25namespace lld::coff {
26
27using llvm::COFF::ImportDirectoryTableEntry;
28using llvm::object::chpe_range_type;
29using llvm::object::coff_relocation;
30using llvm::object::coff_section;
31using llvm::object::COFFSymbolRef;
32using llvm::object::SectionRef;
33
34class Baserel;
35class Defined;
36class DefinedImportData;
37class DefinedRegular;
38class ObjFile;
39class OutputSection;
40class RuntimePseudoReloc;
41class Symbol;
42
43// Mask for permissions (discardable, writable, readable, executable, etc).
44const uint32_t permMask = 0xFE000000;
45
46// Mask for section types (code, data, bss).
47const uint32_t typeMask = 0x000000E0;
48
49// The log base 2 of the largest section alignment, which is log2(8192), or 13.
50enum : unsigned { Log2MaxSectionAlignment = 13 };
51
52// A Chunk represents a chunk of data that will occupy space in the
53// output (if the resolver chose that). It may or may not be backed by
54// a section of an input file. It could be linker-created data, or
55// doesn't even have actual data (if common or bss).
56class Chunk {
57public:
58 enum Kind : uint8_t {
59 SectionKind,
60 SectionECKind,
61 OtherKind,
62 ImportThunkKind,
63 ECExportThunkKind
64 };
65 Kind kind() const { return chunkKind; }
66
67 // Returns the size of this chunk (even if this is a common or BSS.)
68 size_t getSize() const;
69
70 // Returns chunk alignment in power of two form. Value values are powers of
71 // two from 1 to 8192.
72 uint32_t getAlignment() const { return 1U << p2Align; }
73
74 // Update the chunk section alignment measured in bytes. Internally alignment
75 // is stored in log2.
76 void setAlignment(uint32_t align) {
77 // Treat zero byte alignment as 1 byte alignment.
78 align = align ? align : 1;
79 assert(llvm::isPowerOf2_32(align) && "alignment is not a power of 2");
80 p2Align = llvm::Log2_32(Value: align);
81 assert(p2Align <= Log2MaxSectionAlignment &&
82 "impossible requested alignment");
83 }
84
85 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
86 // beginning of the file. Because this function may use RVA values
87 // of other chunks for relocations, you need to set them properly
88 // before calling this function.
89 void writeTo(uint8_t *buf) const;
90
91 // The writer sets and uses the addresses. In practice, PE images cannot be
92 // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
93 // can be stored with 32 bits.
94 uint32_t getRVA() const { return rva; }
95 void setRVA(uint64_t v) {
96 // This may truncate. The writer checks for overflow later.
97 rva = (uint32_t)v;
98 }
99
100 // Returns readable/writable/executable bits.
101 uint32_t getOutputCharacteristics() const;
102
103 // Returns the section name if this is a section chunk.
104 // It is illegal to call this function on non-section chunks.
105 StringRef getSectionName() const;
106
107 // An output section has pointers to chunks in the section, and each
108 // chunk has a back pointer to an output section.
109 void setOutputSectionIdx(uint16_t o) { osidx = o; }
110 uint16_t getOutputSectionIdx() const { return osidx; }
111
112 // Windows-specific.
113 // Collect all locations that contain absolute addresses for base relocations.
114 void getBaserels(std::vector<Baserel> *res);
115
116 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
117 // bytes, so this is used only for logging or debugging.
118 StringRef getDebugName() const;
119
120 // Return true if this file has the hotpatch flag set to true in the
121 // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
122 // synthesized by the linker.
123 bool isHotPatchable() const;
124
125 MachineTypes getMachine() const;
126 llvm::Triple::ArchType getArch() const;
127 std::optional<chpe_range_type> getArm64ECRangeType() const;
128
129 // ARM64EC entry thunk associated with the chunk.
130 Defined *getEntryThunk() const;
131 void setEntryThunk(Defined *entryThunk);
132
133protected:
134 Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
135
136 const Kind chunkKind;
137
138public:
139 // Returns true if this has non-zero data. BSS chunks return
140 // false. If false is returned, the space occupied by this chunk
141 // will be filled with zeros. Corresponds to the
142 // IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
143 uint8_t hasData : 1;
144
145public:
146 // The alignment of this chunk, stored in log2 form. The writer uses the
147 // value.
148 uint8_t p2Align : 7;
149
150 // The output section index for this chunk. The first valid section number is
151 // one.
152 uint16_t osidx = 0;
153
154 // The RVA of this chunk in the output. The writer sets a value.
155 uint32_t rva = 0;
156};
157
158class NonSectionChunk : public Chunk {
159public:
160 virtual ~NonSectionChunk() = default;
161
162 // Returns the size of this chunk (even if this is a common or BSS.)
163 virtual size_t getSize() const = 0;
164
165 virtual uint32_t getOutputCharacteristics() const { return 0; }
166
167 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
168 // beginning of the file. Because this function may use RVA values
169 // of other chunks for relocations, you need to set them properly
170 // before calling this function.
171 virtual void writeTo(uint8_t *buf) const {}
172
173 // Returns the section name if this is a section chunk.
174 // It is illegal to call this function on non-section chunks.
175 virtual StringRef getSectionName() const {
176 llvm_unreachable("unimplemented getSectionName");
177 }
178
179 // Windows-specific.
180 // Collect all locations that contain absolute addresses for base relocations.
181 virtual void getBaserels(std::vector<Baserel> *res) {}
182
183 virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; }
184
185 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
186 // bytes, so this is used only for logging or debugging.
187 virtual StringRef getDebugName() const { return ""; }
188
189 // Verify that chunk relocations are within their ranges.
190 virtual bool verifyRanges() { return true; };
191
192 // If needed, extend the chunk to ensure all relocations are within the
193 // allowed ranges. Return the additional space required for the extension.
194 virtual uint32_t extendRanges() { return 0; };
195
196 virtual Defined *getEntryThunk() const { return nullptr; };
197
198 static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
199
200protected:
201 NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
202};
203
204class NonSectionCodeChunk : public NonSectionChunk {
205public:
206 virtual uint32_t getOutputCharacteristics() const override {
207 return llvm::COFF::IMAGE_SCN_MEM_READ | llvm::COFF::IMAGE_SCN_MEM_EXECUTE;
208 }
209
210protected:
211 NonSectionCodeChunk(Kind k = OtherKind) : NonSectionChunk(k) {}
212};
213
214// MinGW specific; information about one individual location in the image
215// that needs to be fixed up at runtime after loading. This represents
216// one individual element in the PseudoRelocTableChunk table.
217class RuntimePseudoReloc {
218public:
219 RuntimePseudoReloc(Defined *sym, SectionChunk *target, uint32_t targetOffset,
220 int flags)
221 : sym(sym), target(target), targetOffset(targetOffset), flags(flags) {}
222
223 Defined *sym;
224 SectionChunk *target;
225 uint32_t targetOffset;
226 // The Flags field contains the size of the relocation, in bits. No other
227 // flags are currently defined.
228 int flags;
229};
230
231// A chunk corresponding a section of an input file.
232class SectionChunk : public Chunk {
233 // Identical COMDAT Folding feature accesses section internal data.
234 friend class ICF;
235
236public:
237 class symbol_iterator : public llvm::iterator_adaptor_base<
238 symbol_iterator, const coff_relocation *,
239 std::random_access_iterator_tag, Symbol *> {
240 friend SectionChunk;
241
242 ObjFile *file;
243
244 symbol_iterator(ObjFile *file, const coff_relocation *i)
245 : symbol_iterator::iterator_adaptor_base(i), file(file) {}
246
247 public:
248 symbol_iterator() = default;
249
250 Symbol *operator*() const { return file->getSymbol(symbolIndex: I->SymbolTableIndex); }
251 };
252
253 SectionChunk(ObjFile *file, const coff_section *header, Kind k = SectionKind);
254 static bool classof(const Chunk *c) { return c->kind() <= SectionECKind; }
255 size_t getSize() const { return header->SizeOfRawData; }
256 ArrayRef<uint8_t> getContents() const;
257 void writeTo(uint8_t *buf) const;
258 MachineTypes getMachine() const;
259
260 // Defend against unsorted relocations. This may be overly conservative.
261 void sortRelocations();
262
263 // Write and relocate a portion of the section. This is intended to be called
264 // in a loop. Relocations must be sorted first.
265 void writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
266 ArrayRef<uint8_t> subsec,
267 uint32_t &nextRelocIndex, uint8_t *buf) const;
268
269 uint32_t getOutputCharacteristics() const {
270 return header->Characteristics & (permMask | typeMask);
271 }
272 StringRef getSectionName() const {
273 return StringRef(sectionNameData, sectionNameSize);
274 }
275 void getBaserels(std::vector<Baserel> *res);
276 bool isCOMDAT() const;
277 void applyRelocation(uint8_t *off, const coff_relocation &rel) const;
278 void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
279 uint64_t p, uint64_t imageBase) const;
280 void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
281 uint64_t p, uint64_t imageBase) const;
282 void applyRelARM(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
283 uint64_t p, uint64_t imageBase) const;
284 void applyRelARM64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
285 uint64_t p, uint64_t imageBase) const;
286
287 void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &res);
288
289 // Called if the garbage collector decides to not include this chunk
290 // in a final output. It's supposed to print out a log message to stdout.
291 void printDiscardedMessage() const;
292
293 // Adds COMDAT associative sections to this COMDAT section. A chunk
294 // and its children are treated as a group by the garbage collector.
295 void addAssociative(SectionChunk *child);
296
297 StringRef getDebugName() const;
298
299 // True if this is a codeview debug info chunk. These will not be laid out in
300 // the image. Instead they will end up in the PDB, if one is requested.
301 bool isCodeView() const {
302 return getSectionName() == ".debug" || getSectionName().starts_with(Prefix: ".debug$");
303 }
304
305 // True if this is a DWARF debug info or exception handling chunk.
306 bool isDWARF() const {
307 return getSectionName().starts_with(Prefix: ".debug_") || getSectionName() == ".eh_frame";
308 }
309
310 // Allow iteration over the bodies of this chunk's relocated symbols.
311 llvm::iterator_range<symbol_iterator> symbols() const {
312 return llvm::make_range(x: symbol_iterator(file, relocsData),
313 y: symbol_iterator(file, relocsData + relocsSize));
314 }
315
316 ArrayRef<coff_relocation> getRelocs() const {
317 return llvm::ArrayRef(relocsData, relocsSize);
318 }
319
320 // Reloc setter used by ARM range extension thunk insertion.
321 void setRelocs(ArrayRef<coff_relocation> newRelocs) {
322 relocsData = newRelocs.data();
323 relocsSize = newRelocs.size();
324 assert(relocsSize == newRelocs.size() && "reloc size truncation");
325 }
326
327 // Single linked list iterator for associated comdat children.
328 class AssociatedIterator
329 : public llvm::iterator_facade_base<
330 AssociatedIterator, std::forward_iterator_tag, SectionChunk> {
331 public:
332 AssociatedIterator() = default;
333 AssociatedIterator(SectionChunk *head) : cur(head) {}
334 bool operator==(const AssociatedIterator &r) const { return cur == r.cur; }
335 // FIXME: Wrong const-ness, but it makes filter ranges work.
336 SectionChunk &operator*() const { return *cur; }
337 SectionChunk &operator*() { return *cur; }
338 AssociatedIterator &operator++() {
339 cur = cur->assocChildren;
340 return *this;
341 }
342
343 private:
344 SectionChunk *cur = nullptr;
345 };
346
347 // Allow iteration over the associated child chunks for this section.
348 llvm::iterator_range<AssociatedIterator> children() const {
349 // Associated sections do not have children. The assocChildren field is
350 // part of the parent's list of children.
351 bool isAssoc = selection == llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
352 return llvm::make_range(
353 x: AssociatedIterator(isAssoc ? nullptr : assocChildren),
354 y: AssociatedIterator(nullptr));
355 }
356
357 // The section ID this chunk belongs to in its Obj.
358 uint32_t getSectionNumber() const;
359
360 ArrayRef<uint8_t> consumeDebugMagic();
361
362 static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> data,
363 StringRef sectionName);
364
365 static SectionChunk *findByName(ArrayRef<SectionChunk *> sections,
366 StringRef name);
367
368 // The file that this chunk was created from.
369 ObjFile *file;
370
371 // Pointer to the COFF section header in the input file.
372 const coff_section *header;
373
374 // The COMDAT leader symbol if this is a COMDAT chunk.
375 DefinedRegular *sym = nullptr;
376
377 // The CRC of the contents as described in the COFF spec 4.5.5.
378 // Auxiliary Format 5: Section Definitions. Used for ICF.
379 uint32_t checksum = 0;
380
381 // Used by the garbage collector.
382 bool live;
383
384 // Whether this section needs to be kept distinct from other sections during
385 // ICF. This is set by the driver using address-significance tables.
386 bool keepUnique = false;
387
388 // The COMDAT selection if this is a COMDAT chunk.
389 llvm::COFF::COMDATType selection = (llvm::COFF::COMDATType)0;
390
391 // A pointer pointing to a replacement for this chunk.
392 // Initially it points to "this" object. If this chunk is merged
393 // with other chunk by ICF, it points to another chunk,
394 // and this chunk is considered as dead.
395 SectionChunk *repl;
396
397private:
398 SectionChunk *assocChildren = nullptr;
399
400 // Used for ICF (Identical COMDAT Folding)
401 void replace(SectionChunk *other);
402 uint32_t eqClass[2] = {0, 0};
403
404 // Relocations for this section. Size is stored below.
405 const coff_relocation *relocsData;
406
407 // Section name string. Size is stored below.
408 const char *sectionNameData;
409
410 uint32_t relocsSize = 0;
411 uint32_t sectionNameSize = 0;
412};
413
414// A section chunk corresponding a section of an EC input file.
415class SectionChunkEC final : public SectionChunk {
416public:
417 static bool classof(const Chunk *c) { return c->kind() == SectionECKind; }
418
419 SectionChunkEC(ObjFile *file, const coff_section *header)
420 : SectionChunk(file, header, SectionECKind) {}
421 Defined *entryThunk = nullptr;
422};
423
424// Inline methods to implement faux-virtual dispatch for SectionChunk.
425
426inline size_t Chunk::getSize() const {
427 if (isa<SectionChunk>(Val: this))
428 return static_cast<const SectionChunk *>(this)->getSize();
429 return static_cast<const NonSectionChunk *>(this)->getSize();
430}
431
432inline uint32_t Chunk::getOutputCharacteristics() const {
433 if (isa<SectionChunk>(Val: this))
434 return static_cast<const SectionChunk *>(this)->getOutputCharacteristics();
435 return static_cast<const NonSectionChunk *>(this)->getOutputCharacteristics();
436}
437
438inline void Chunk::writeTo(uint8_t *buf) const {
439 if (isa<SectionChunk>(Val: this))
440 static_cast<const SectionChunk *>(this)->writeTo(buf);
441 else
442 static_cast<const NonSectionChunk *>(this)->writeTo(buf);
443}
444
445inline StringRef Chunk::getSectionName() const {
446 if (isa<SectionChunk>(Val: this))
447 return static_cast<const SectionChunk *>(this)->getSectionName();
448 return static_cast<const NonSectionChunk *>(this)->getSectionName();
449}
450
451inline void Chunk::getBaserels(std::vector<Baserel> *res) {
452 if (isa<SectionChunk>(Val: this))
453 static_cast<SectionChunk *>(this)->getBaserels(res);
454 else
455 static_cast<NonSectionChunk *>(this)->getBaserels(res);
456}
457
458inline StringRef Chunk::getDebugName() const {
459 if (isa<SectionChunk>(Val: this))
460 return static_cast<const SectionChunk *>(this)->getDebugName();
461 return static_cast<const NonSectionChunk *>(this)->getDebugName();
462}
463
464inline MachineTypes Chunk::getMachine() const {
465 if (isa<SectionChunk>(Val: this))
466 return static_cast<const SectionChunk *>(this)->getMachine();
467 return static_cast<const NonSectionChunk *>(this)->getMachine();
468}
469
470inline llvm::Triple::ArchType Chunk::getArch() const {
471 return llvm::getMachineArchType(machine: getMachine());
472}
473
474inline std::optional<chpe_range_type> Chunk::getArm64ECRangeType() const {
475 // Data sections don't need codemap entries.
476 if (!(getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE))
477 return std::nullopt;
478
479 switch (getMachine()) {
480 case AMD64:
481 return chpe_range_type::Amd64;
482 case ARM64EC:
483 return chpe_range_type::Arm64EC;
484 default:
485 return chpe_range_type::Arm64;
486 }
487}
488
489// This class is used to implement an lld-specific feature (not implemented in
490// MSVC) that minimizes the output size by finding string literals sharing tail
491// parts and merging them.
492//
493// If string tail merging is enabled and a section is identified as containing a
494// string literal, it is added to a MergeChunk with an appropriate alignment.
495// The MergeChunk then tail merges the strings using the StringTableBuilder
496// class and assigns RVAs and section offsets to each of the member chunks based
497// on the offsets assigned by the StringTableBuilder.
498class MergeChunk : public NonSectionChunk {
499public:
500 MergeChunk(uint32_t alignment);
501 static void addSection(COFFLinkerContext &ctx, SectionChunk *c);
502 void finalizeContents();
503 void assignSubsectionRVAs();
504
505 uint32_t getOutputCharacteristics() const override;
506 StringRef getSectionName() const override { return ".rdata"; }
507 size_t getSize() const override;
508 void writeTo(uint8_t *buf) const override;
509
510 std::vector<SectionChunk *> sections;
511
512private:
513 llvm::StringTableBuilder builder;
514 bool finalized = false;
515};
516
517// A chunk for common symbols. Common chunks don't have actual data.
518class CommonChunk : public NonSectionChunk {
519public:
520 CommonChunk(const COFFSymbolRef sym);
521 size_t getSize() const override { return sym.getValue(); }
522 uint32_t getOutputCharacteristics() const override;
523 StringRef getSectionName() const override { return ".bss"; }
524
525 bool live;
526
527private:
528 const COFFSymbolRef sym;
529};
530
531// A chunk for linker-created strings.
532class StringChunk : public NonSectionChunk {
533public:
534 explicit StringChunk(StringRef s) : str(s) {}
535 size_t getSize() const override { return str.size() + 1; }
536 void writeTo(uint8_t *buf) const override;
537
538private:
539 StringRef str;
540};
541
542static const uint8_t importThunkX86[] = {
543 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
544};
545
546static const uint8_t importThunkARM[] = {
547 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
548 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
549 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
550};
551
552static const uint8_t importThunkARM64[] = {
553 0x10, 0x00, 0x00, 0x90, // adrp x16, #0
554 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16]
555 0x00, 0x02, 0x1f, 0xd6, // br x16
556};
557
558static const uint8_t importThunkARM64EC[] = {
559 0x0b, 0x00, 0x00, 0x90, // adrp x11, 0x0
560 0x6b, 0x01, 0x40, 0xf9, // ldr x11, [x11]
561 0x0a, 0x00, 0x00, 0x90, // adrp x10, 0x0
562 0x4a, 0x01, 0x00, 0x91, // add x10, x10, #0x0
563 0x00, 0x00, 0x00, 0x14 // b 0x0
564};
565
566// Windows-specific.
567// A chunk for DLL import jump table entry. In a final output, its
568// contents will be a JMP instruction to some __imp_ symbol.
569class ImportThunkChunk : public NonSectionCodeChunk {
570public:
571 ImportThunkChunk(COFFLinkerContext &ctx, Defined *s);
572 static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; }
573
574 // We track the usage of the thunk symbol separately from the import file
575 // to avoid generating unnecessary thunks.
576 bool live;
577
578protected:
579 Defined *impSymbol;
580 COFFLinkerContext &ctx;
581};
582
583class ImportThunkChunkX64 : public ImportThunkChunk {
584public:
585 explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s);
586 size_t getSize() const override { return sizeof(importThunkX86); }
587 void writeTo(uint8_t *buf) const override;
588 MachineTypes getMachine() const override { return AMD64; }
589};
590
591class ImportThunkChunkX86 : public ImportThunkChunk {
592public:
593 explicit ImportThunkChunkX86(COFFLinkerContext &ctx, Defined *s)
594 : ImportThunkChunk(ctx, s) {}
595 size_t getSize() const override { return sizeof(importThunkX86); }
596 void getBaserels(std::vector<Baserel> *res) override;
597 void writeTo(uint8_t *buf) const override;
598 MachineTypes getMachine() const override { return I386; }
599};
600
601class ImportThunkChunkARM : public ImportThunkChunk {
602public:
603 explicit ImportThunkChunkARM(COFFLinkerContext &ctx, Defined *s)
604 : ImportThunkChunk(ctx, s) {
605 setAlignment(2);
606 }
607 size_t getSize() const override { return sizeof(importThunkARM); }
608 void getBaserels(std::vector<Baserel> *res) override;
609 void writeTo(uint8_t *buf) const override;
610 MachineTypes getMachine() const override { return ARMNT; }
611};
612
613class ImportThunkChunkARM64 : public ImportThunkChunk {
614public:
615 explicit ImportThunkChunkARM64(COFFLinkerContext &ctx, Defined *s,
616 MachineTypes machine)
617 : ImportThunkChunk(ctx, s), machine(machine) {
618 setAlignment(4);
619 }
620 size_t getSize() const override { return sizeof(importThunkARM64); }
621 void writeTo(uint8_t *buf) const override;
622 MachineTypes getMachine() const override { return machine; }
623
624private:
625 MachineTypes machine;
626};
627
628// ARM64EC __impchk_* thunk implementation.
629// Performs an indirect call to an imported function pointer
630// using the __icall_helper_arm64ec helper function.
631class ImportThunkChunkARM64EC : public ImportThunkChunk {
632public:
633 explicit ImportThunkChunkARM64EC(ImportFile *file);
634 size_t getSize() const override;
635 MachineTypes getMachine() const override { return ARM64EC; }
636 void writeTo(uint8_t *buf) const override;
637 bool verifyRanges() override;
638 uint32_t extendRanges() override;
639
640 Defined *exitThunk = nullptr;
641 Defined *sym = nullptr;
642 bool extended = false;
643
644private:
645 ImportFile *file;
646};
647
648class RangeExtensionThunkARM : public NonSectionCodeChunk {
649public:
650 explicit RangeExtensionThunkARM(COFFLinkerContext &ctx, Defined *t)
651 : target(t), ctx(ctx) {
652 setAlignment(2);
653 }
654 size_t getSize() const override;
655 void writeTo(uint8_t *buf) const override;
656 MachineTypes getMachine() const override { return ARMNT; }
657
658 Defined *target;
659
660private:
661 COFFLinkerContext &ctx;
662};
663
664// A ragnge extension thunk used for both ARM64EC and ARM64 machine types.
665class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
666public:
667 explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
668 : target(t), machine(machine) {
669 setAlignment(4);
670 assert(llvm::COFF::isAnyArm64(machine));
671 }
672 size_t getSize() const override;
673 void writeTo(uint8_t *buf) const override;
674 MachineTypes getMachine() const override { return machine; }
675
676 Defined *target;
677
678private:
679 MachineTypes machine;
680};
681
682// A chunk used to guarantee the same address for a function in both views of
683// a hybrid image. Similar to RangeExtensionThunkARM64 chunks, it calls the
684// target symbol using a BR instruction. It also contains an entry thunk for EC
685// compatibility and additional ARM64X relocations that swap targets between
686// views.
687class SameAddressThunkARM64EC : public RangeExtensionThunkARM64 {
688public:
689 explicit SameAddressThunkARM64EC(Defined *t, Defined *hybridTarget,
690 Defined *entryThunk)
691 : RangeExtensionThunkARM64(ARM64EC, t), hybridTarget(hybridTarget),
692 entryThunk(entryThunk) {}
693
694 Defined *getEntryThunk() const override { return entryThunk; }
695 void setDynamicRelocs(COFFLinkerContext &ctx) const;
696
697private:
698 Defined *hybridTarget;
699 Defined *entryThunk;
700};
701
702// Windows-specific.
703// See comments for DefinedLocalImport class.
704class LocalImportChunk : public NonSectionChunk {
705public:
706 explicit LocalImportChunk(COFFLinkerContext &ctx, Defined *s);
707 size_t getSize() const override;
708 void getBaserels(std::vector<Baserel> *res) override;
709 void writeTo(uint8_t *buf) const override;
710
711private:
712 Defined *sym;
713 COFFLinkerContext &ctx;
714};
715
716// Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
717// offset into the chunk. Order does not matter as the RVA table will be sorted
718// later.
719struct ChunkAndOffset {
720 Chunk *inputChunk;
721 uint32_t offset;
722
723 struct DenseMapInfo {
724 static ChunkAndOffset getEmptyKey() {
725 return {.inputChunk: llvm::DenseMapInfo<Chunk *>::getEmptyKey(), .offset: 0};
726 }
727 static ChunkAndOffset getTombstoneKey() {
728 return {.inputChunk: llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), .offset: 0};
729 }
730 static unsigned getHashValue(const ChunkAndOffset &co) {
731 return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue(
732 PairVal: {co.inputChunk, co.offset});
733 }
734 static bool isEqual(const ChunkAndOffset &lhs, const ChunkAndOffset &rhs) {
735 return lhs.inputChunk == rhs.inputChunk && lhs.offset == rhs.offset;
736 }
737 };
738};
739
740using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
741
742// Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
743class RVATableChunk : public NonSectionChunk {
744public:
745 explicit RVATableChunk(SymbolRVASet s) : syms(std::move(s)) {}
746 size_t getSize() const override { return syms.size() * 4; }
747 void writeTo(uint8_t *buf) const override;
748
749private:
750 SymbolRVASet syms;
751};
752
753// Table which contains symbol RVAs with flags. Used for /guard:ehcont.
754class RVAFlagTableChunk : public NonSectionChunk {
755public:
756 explicit RVAFlagTableChunk(SymbolRVASet s) : syms(std::move(s)) {}
757 size_t getSize() const override { return syms.size() * 5; }
758 void writeTo(uint8_t *buf) const override;
759
760private:
761 SymbolRVASet syms;
762};
763
764// Windows-specific.
765// This class represents a block in .reloc section.
766// See the PE/COFF spec 5.6 for details.
767class BaserelChunk : public NonSectionChunk {
768public:
769 BaserelChunk(uint32_t page, Baserel *begin, Baserel *end);
770 size_t getSize() const override { return data.size(); }
771 void writeTo(uint8_t *buf) const override;
772
773private:
774 std::vector<uint8_t> data;
775};
776
777class Baserel {
778public:
779 Baserel(uint32_t v, uint8_t ty) : rva(v), type(ty) {}
780 explicit Baserel(uint32_t v, llvm::COFF::MachineTypes machine)
781 : Baserel(v, getDefaultType(machine)) {}
782 static uint8_t getDefaultType(llvm::COFF::MachineTypes machine);
783
784 uint32_t rva;
785 uint8_t type;
786};
787
788// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
789// specific place in a section, without any data. This is used for the MinGW
790// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
791// of an empty chunk isn't MinGW specific.
792class EmptyChunk : public NonSectionChunk {
793public:
794 EmptyChunk() {}
795 size_t getSize() const override { return 0; }
796 void writeTo(uint8_t *buf) const override {}
797};
798
799class ECCodeMapEntry {
800public:
801 ECCodeMapEntry(Chunk *first, Chunk *last, chpe_range_type type)
802 : first(first), last(last), type(type) {}
803 Chunk *first;
804 Chunk *last;
805 chpe_range_type type;
806};
807
808// This is a chunk containing CHPE code map on EC targets. It's a table
809// of address ranges and their types.
810class ECCodeMapChunk : public NonSectionChunk {
811public:
812 ECCodeMapChunk(std::vector<ECCodeMapEntry> &map) : map(map) {}
813 size_t getSize() const override;
814 void writeTo(uint8_t *buf) const override;
815
816private:
817 std::vector<ECCodeMapEntry> &map;
818};
819
820class CHPECodeRangesChunk : public NonSectionChunk {
821public:
822 CHPECodeRangesChunk(std::vector<std::pair<Chunk *, Defined *>> &exportThunks)
823 : exportThunks(exportThunks) {}
824 size_t getSize() const override;
825 void writeTo(uint8_t *buf) const override;
826
827private:
828 std::vector<std::pair<Chunk *, Defined *>> &exportThunks;
829};
830
831class CHPERedirectionChunk : public NonSectionChunk {
832public:
833 CHPERedirectionChunk(std::vector<std::pair<Chunk *, Defined *>> &exportThunks)
834 : exportThunks(exportThunks) {}
835 size_t getSize() const override;
836 void writeTo(uint8_t *buf) const override;
837
838private:
839 std::vector<std::pair<Chunk *, Defined *>> &exportThunks;
840};
841
842static const uint8_t ECExportThunkCode[] = {
843 0x48, 0x8b, 0xc4, // movq %rsp, %rax
844 0x48, 0x89, 0x58, 0x20, // movq %rbx, 0x20(%rax)
845 0x55, // pushq %rbp
846 0x5d, // popq %rbp
847 0xe9, 0, 0, 0, 0, // jmp *0x0
848 0xcc, // int3
849 0xcc // int3
850};
851
852class ECExportThunkChunk : public NonSectionCodeChunk {
853public:
854 explicit ECExportThunkChunk(Defined *targetSym)
855 : NonSectionCodeChunk(ECExportThunkKind), target(targetSym) {}
856 static bool classof(const Chunk *c) { return c->kind() == ECExportThunkKind; }
857
858 size_t getSize() const override { return sizeof(ECExportThunkCode); };
859 void writeTo(uint8_t *buf) const override;
860 MachineTypes getMachine() const override { return AMD64; }
861
862 Defined *target;
863};
864
865// ARM64X relocation value, potentially relative to a symbol.
866class Arm64XRelocVal {
867public:
868 Arm64XRelocVal(uint64_t value = 0) : value(value) {}
869 Arm64XRelocVal(Defined *sym, int32_t offset = 0) : sym(sym), value(offset) {}
870 Arm64XRelocVal(const Chunk *chunk, int32_t offset = 0)
871 : chunk(chunk), value(offset) {}
872 uint64_t get() const;
873
874private:
875 Defined *sym = nullptr;
876 const Chunk *chunk = nullptr;
877 uint64_t value;
878};
879
880// ARM64X entry for dynamic relocations.
881class Arm64XDynamicRelocEntry {
882public:
883 Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type, uint8_t size,
884 Arm64XRelocVal offset, Arm64XRelocVal value)
885 : offset(offset), value(value), type(type), size(size) {}
886
887 size_t getSize() const;
888 void writeTo(uint8_t *buf) const;
889
890 Arm64XRelocVal offset;
891 Arm64XRelocVal value;
892
893private:
894 llvm::COFF::Arm64XFixupType type;
895 uint8_t size;
896};
897
898// Dynamic relocation chunk containing ARM64X relocations for the hybrid image.
899class DynamicRelocsChunk : public NonSectionChunk {
900public:
901 DynamicRelocsChunk() {}
902 size_t getSize() const override { return size; }
903 void writeTo(uint8_t *buf) const override;
904 void finalize();
905
906 void add(llvm::COFF::Arm64XFixupType type, uint8_t size,
907 Arm64XRelocVal offset, Arm64XRelocVal value = Arm64XRelocVal()) {
908 arm64xRelocs.emplace_back(args&: type, args&: size, args&: offset, args&: value);
909 }
910
911 void set(Arm64XRelocVal offset, Arm64XRelocVal value);
912
913private:
914 std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
915 size_t size;
916};
917
918// MinGW specific, for the "automatic import of variables from DLLs" feature.
919// This provides the table of runtime pseudo relocations, for variable
920// references that turned out to need to be imported from a DLL even though
921// the reference didn't use the dllimport attribute. The MinGW runtime will
922// process this table after loading, before handling control over to user
923// code.
924class PseudoRelocTableChunk : public NonSectionChunk {
925public:
926 PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &relocs)
927 : relocs(std::move(relocs)) {
928 setAlignment(4);
929 }
930 size_t getSize() const override;
931 void writeTo(uint8_t *buf) const override;
932
933private:
934 std::vector<RuntimePseudoReloc> relocs;
935};
936
937// MinGW specific. A Chunk that contains one pointer-sized absolute value.
938class AbsolutePointerChunk : public NonSectionChunk {
939public:
940 AbsolutePointerChunk(SymbolTable &symtab, uint64_t value)
941 : value(value), symtab(symtab) {
942 setAlignment(getSize());
943 }
944 size_t getSize() const override;
945 void writeTo(uint8_t *buf) const override;
946 MachineTypes getMachine() const override;
947
948private:
949 uint64_t value;
950 SymbolTable &symtab;
951};
952
953// Return true if this file has the hotpatch flag set to true in the S_COMPILE3
954// record in codeview debug info. Also returns true for some thunks synthesized
955// by the linker.
956inline bool Chunk::isHotPatchable() const {
957 if (auto *sc = dyn_cast<SectionChunk>(Val: this))
958 return sc->file->hotPatchable;
959 else if (isa<ImportThunkChunk>(Val: this))
960 return true;
961 return false;
962}
963
964inline Defined *Chunk::getEntryThunk() const {
965 if (auto *c = dyn_cast<const SectionChunkEC>(Val: this))
966 return c->entryThunk;
967 if (auto *c = dyn_cast<const NonSectionChunk>(Val: this))
968 return c->getEntryThunk();
969 return nullptr;
970}
971
972inline void Chunk::setEntryThunk(Defined *entryThunk) {
973 if (auto c = dyn_cast<SectionChunkEC>(Val: this))
974 c->entryThunk = entryThunk;
975}
976
977void applyMOV32T(uint8_t *off, uint32_t v);
978void applyBranch24T(uint8_t *off, int32_t v);
979
980void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift);
981void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit);
982void applyArm64Branch26(uint8_t *off, int64_t v);
983
984// Convenience class for initializing a coff_section with specific flags.
985class FakeSection {
986public:
987 FakeSection(int c) { section.Characteristics = c; }
988
989 coff_section section;
990};
991
992// Convenience class for initializing a SectionChunk with specific flags.
993class FakeSectionChunk {
994public:
995 FakeSectionChunk(const coff_section *section) : chunk(nullptr, section) {
996 // Comdats from LTO files can't be fully treated as regular comdats
997 // at this point; we don't know what size or contents they are going to
998 // have, so we can't do proper checking of such aspects of them.
999 chunk.selection = llvm::COFF::IMAGE_COMDAT_SELECT_ANY;
1000 }
1001
1002 SectionChunk chunk;
1003};
1004
1005} // namespace lld::coff
1006
1007namespace llvm {
1008template <>
1009struct DenseMapInfo<lld::coff::ChunkAndOffset>
1010 : lld::coff::ChunkAndOffset::DenseMapInfo {};
1011}
1012
1013#endif
1014