1//===- ARM64.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Arch/ARM64Common.h"
10#include "InputFiles.h"
11#include "Symbols.h"
12#include "SyntheticSections.h"
13#include "Target.h"
14
15#include "lld/Common/ErrorHandler.h"
16#include "mach-o/compact_unwind_encoding.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/BinaryFormat/MachO.h"
19#include "llvm/Support/Endian.h"
20#include "llvm/Support/LEB128.h"
21#include "llvm/Support/MathExtras.h"
22
23using namespace llvm;
24using namespace llvm::MachO;
25using namespace llvm::support::endian;
26using namespace lld;
27using namespace lld::macho;
28
29namespace {
30
31struct ARM64 : ARM64Common {
32 ARM64();
33 void writeStub(uint8_t *buf, const Symbol &, uint64_t) const override;
34 void writeStubHelperHeader(uint8_t *buf) const override;
35 void writeStubHelperEntry(uint8_t *buf, const Symbol &,
36 uint64_t entryAddr) const override;
37
38 void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
39 uint64_t &stubOffset, uint64_t selrefVA,
40 Symbol *objcMsgSend) const override;
41 void populateThunk(InputSection *thunk, Symbol *funcSym) override;
42 void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
43
44 void initICFSafeThunkBody(InputSection *thunk,
45 Symbol *targetSym) const override;
46 Symbol *getThunkBranchTarget(InputSection *thunk) const override;
47 uint32_t getICFSafeThunkSize() const override;
48};
49
50} // namespace
51
52// Random notes on reloc types:
53// ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12
54// POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte
55// absolute version of this relocation. The semantics of the absolute relocation
56// are weird -- it results in the value of the GOT slot being written, instead
57// of the address. Let's not support it unless we find a real-world use case.
58static constexpr std::array<RelocAttrs, 11> relocAttrsArray{._M_elems: {
59#define B(x) RelocAttrBits::x
60 {.name: "UNSIGNED",
61 B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
62 {.name: "SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
63 {.name: "BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
64 {.name: "PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
65 {.name: "PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
66 {.name: "GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
67 {.name: "GOT_LOAD_PAGEOFF12",
68 B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
69 {.name: "POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
70 {.name: "TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
71 {.name: "TLVP_LOAD_PAGEOFF12",
72 B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
73 {.name: "ADDEND", B(ADDEND)},
74#undef B
75}};
76
77static constexpr uint32_t stubCode[] = {
78 0x90000010, // 00: adrp x16, __la_symbol_ptr@page
79 0xf9400210, // 04: ldr x16, [x16, __la_symbol_ptr@pageoff]
80 0xd61f0200, // 08: br x16
81};
82
83void ARM64::writeStub(uint8_t *buf8, const Symbol &sym,
84 uint64_t pointerVA) const {
85 ::writeStub(buf8, stubCode, sym, pointerVA);
86}
87
88static constexpr uint32_t stubHelperHeaderCode[] = {
89 0x90000011, // 00: adrp x17, _dyld_private@page
90 0x91000231, // 04: add x17, x17, _dyld_private@pageoff
91 0xa9bf47f0, // 08: stp x16/x17, [sp, #-16]!
92 0x90000010, // 0c: adrp x16, dyld_stub_binder@page
93 0xf9400210, // 10: ldr x16, [x16, dyld_stub_binder@pageoff]
94 0xd61f0200, // 14: br x16
95};
96
97void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
98 ::writeStubHelperHeader<LP64>(buf8, stubHelperHeaderCode);
99}
100
101static constexpr uint32_t stubHelperEntryCode[] = {
102 0x18000050, // 00: ldr w16, l0
103 0x14000000, // 04: b stubHelperHeader
104 0x00000000, // 08: l0: .long 0
105};
106
107void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym,
108 uint64_t entryVA) const {
109 ::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
110}
111
112static constexpr uint32_t objcStubsFastCode[] = {
113 0x90000001, // adrp x1, __objc_selrefs@page
114 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff]
115 0x90000010, // adrp x16, _got@page
116 0xf9400210, // ldr x16, [x16, _objc_msgSend@pageoff]
117 0xd61f0200, // br x16
118 0xd4200020, // brk #0x1
119 0xd4200020, // brk #0x1
120 0xd4200020, // brk #0x1
121};
122
123static constexpr uint32_t objcStubsSmallCode[] = {
124 0x90000001, // adrp x1, __objc_selrefs@page
125 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff]
126 0x14000000, // b _objc_msgSend
127};
128
129void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
130 uint64_t &stubOffset, uint64_t selrefVA,
131 Symbol *objcMsgSend) const {
132 uint64_t objcMsgSendAddr;
133 uint64_t objcStubSize;
134 uint64_t objcMsgSendIndex;
135
136 if (config->objcStubsMode == ObjCStubsMode::fast) {
137 objcStubSize = target->objcStubsFastSize;
138 objcMsgSendAddr = in.got->addr;
139 objcMsgSendIndex = objcMsgSend->gotIndex;
140 ::writeObjCMsgSendFastStub<LP64>(buf, objcStubsFastCode, sym, stubsAddr,
141 stubOffset, selrefVA, gotAddr: objcMsgSendAddr,
142 msgSendIndex: objcMsgSendIndex);
143 } else {
144 assert(config->objcStubsMode == ObjCStubsMode::small);
145 objcStubSize = target->objcStubsSmallSize;
146 if (auto *d = dyn_cast<Defined>(Val: objcMsgSend)) {
147 objcMsgSendAddr = d->getVA();
148 objcMsgSendIndex = 0;
149 } else {
150 objcMsgSendAddr = in.stubs->addr;
151 objcMsgSendIndex = objcMsgSend->stubsIndex;
152 }
153 ::writeObjCMsgSendSmallStub<LP64>(buf, objcStubsSmallCode, sym, stubsAddr,
154 stubOffset, selrefVA, msgSendAddr: objcMsgSendAddr,
155 msgSendIndex: objcMsgSendIndex);
156 }
157 stubOffset += objcStubSize;
158}
159
160// A thunk is the relaxed variation of stubCode. We don't need the
161// extra indirection through a lazy pointer because the target address
162// is known at link time.
163static constexpr uint32_t thunkCode[] = {
164 0x90000010, // 00: adrp x16, <thunk.ptr>@page
165 0x91000210, // 04: add x16, [x16,<thunk.ptr>@pageoff]
166 0xd61f0200, // 08: br x16
167};
168
169void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
170 thunk->align = 4;
171 thunk->data = {reinterpret_cast<const uint8_t *>(thunkCode),
172 sizeof(thunkCode)};
173 thunk->relocs.emplace_back(/*type=*/args: ARM64_RELOC_PAGEOFF12,
174 /*pcrel=*/args: false, /*length=*/args: 2,
175 /*offset=*/args: 4, /*addend=*/args: 0,
176 /*referent=*/args&: funcSym);
177 thunk->relocs.emplace_back(/*type=*/args: ARM64_RELOC_PAGE21,
178 /*pcrel=*/args: true, /*length=*/args: 2,
179 /*offset=*/args: 0, /*addend=*/args: 0,
180 /*referent=*/args&: funcSym);
181}
182// Just a single direct branch to the target function.
183static constexpr uint32_t icfSafeThunkCode[] = {
184 0x14000000, // 08: b target
185};
186
187void ARM64::initICFSafeThunkBody(InputSection *thunk, Symbol *targetSym) const {
188 // The base data here will not be itself modified, we'll just be adding a
189 // reloc below. So we can directly use the constexpr above as the data.
190 thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
191 sizeof(icfSafeThunkCode)};
192
193 thunk->relocs.emplace_back(/*type=*/args: ARM64_RELOC_BRANCH26,
194 /*pcrel=*/args: true, /*length=*/args: 2,
195 /*offset=*/args: 0, /*addend=*/args: 0,
196 /*referent=*/args&: targetSym);
197}
198
199Symbol *ARM64::getThunkBranchTarget(InputSection *thunk) const {
200 assert(thunk->relocs.size() == 1 &&
201 "expected a single reloc on ARM64 ICF thunk");
202 auto &reloc = thunk->relocs[0];
203 assert(isa<Symbol *>(reloc.referent) &&
204 "ARM64 thunk reloc is expected to point to a Symbol");
205
206 return cast<Symbol *>(Val&: reloc.referent);
207}
208
209uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
210
211ARM64::ARM64() : ARM64Common(LP64()) {
212 cpuType = CPU_TYPE_ARM64;
213 cpuSubtype = CPU_SUBTYPE_ARM64_ALL;
214
215 stubSize = sizeof(stubCode);
216 thunkSize = sizeof(thunkCode);
217
218 objcStubsFastSize = sizeof(objcStubsFastCode);
219 objcStubsFastAlignment = 32;
220 objcStubsSmallSize = sizeof(objcStubsSmallCode);
221 objcStubsSmallAlignment = 4;
222
223 // Branch immediate is two's complement 26 bits, which is implicitly
224 // multiplied by 4 (since all functions are 4-aligned: The branch range
225 // is -4*(2**(26-1))..4*(2**(26-1) - 1).
226 backwardBranchRange = 128 * 1024 * 1024;
227 forwardBranchRange = backwardBranchRange - 4;
228
229 modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
230 subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
231 unsignedRelocType = ARM64_RELOC_UNSIGNED;
232
233 stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
234 stubHelperEntrySize = sizeof(stubHelperEntryCode);
235
236 relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
237}
238
239namespace {
240struct Adrp {
241 uint32_t destRegister;
242 int64_t addend;
243};
244
245struct Add {
246 uint8_t destRegister;
247 uint8_t srcRegister;
248 uint32_t addend;
249};
250
251enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 };
252
253struct Ldr {
254 uint8_t destRegister;
255 uint8_t baseRegister;
256 uint8_t p2Size;
257 bool isFloat;
258 ExtendType extendType;
259 int64_t offset;
260};
261} // namespace
262
263static bool parseAdrp(uint32_t insn, Adrp &adrp) {
264 if ((insn & 0x9f000000) != 0x90000000)
265 return false;
266 adrp.destRegister = insn & 0x1f;
267 uint64_t immHi = (insn >> 5) & 0x7ffff;
268 uint64_t immLo = (insn >> 29) & 0x3;
269 adrp.addend = SignExtend64<21>(x: immLo | (immHi << 2)) * 4096;
270 return true;
271}
272
273static bool parseAdd(uint32_t insn, Add &add) {
274 if ((insn & 0xffc00000) != 0x91000000)
275 return false;
276 add.destRegister = insn & 0x1f;
277 add.srcRegister = (insn >> 5) & 0x1f;
278 add.addend = (insn >> 10) & 0xfff;
279 return true;
280}
281
282static bool parseLdr(uint32_t insn, Ldr &ldr) {
283 ldr.destRegister = insn & 0x1f;
284 ldr.baseRegister = (insn >> 5) & 0x1f;
285 uint8_t size = insn >> 30;
286 uint8_t opc = (insn >> 22) & 3;
287
288 if ((insn & 0x3fc00000) == 0x39400000) {
289 // LDR (immediate), LDRB (immediate), LDRH (immediate)
290 ldr.p2Size = size;
291 ldr.extendType = ZeroExtend;
292 ldr.isFloat = false;
293 } else if ((insn & 0x3f800000) == 0x39800000) {
294 // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
295 ldr.p2Size = size;
296 ldr.extendType = static_cast<ExtendType>(opc);
297 ldr.isFloat = false;
298 } else if ((insn & 0x3f400000) == 0x3d400000) {
299 // LDR (immediate, SIMD&FP)
300 ldr.extendType = ZeroExtend;
301 ldr.isFloat = true;
302 if (opc == 1)
303 ldr.p2Size = size;
304 else if (size == 0 && opc == 3)
305 ldr.p2Size = 4;
306 else
307 return false;
308 } else {
309 return false;
310 }
311 ldr.offset = ((insn >> 10) & 0xfff) << ldr.p2Size;
312 return true;
313}
314
315static bool isValidAdrOffset(int32_t delta) { return isInt<21>(x: delta); }
316
317static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
318 assert(isValidAdrOffset(delta));
319 uint32_t opcode = 0x10000000;
320 uint32_t immHi = (delta & 0x001ffffc) << 3;
321 uint32_t immLo = (delta & 0x00000003) << 29;
322 write32le(P: loc, V: opcode | immHi | immLo | dest);
323}
324
325static void writeNop(void *loc) { write32le(P: loc, V: 0xd503201f); }
326
327static bool isLiteralLdrEligible(const Ldr &ldr) {
328 return ldr.p2Size > 1 && isShiftedInt<19, 2>(x: ldr.offset);
329}
330
331static void writeLiteralLdr(void *loc, const Ldr &ldr) {
332 assert(isLiteralLdrEligible(ldr));
333 uint32_t imm19 = (ldr.offset / 4 & maskTrailingOnes<uint32_t>(N: 19)) << 5;
334 uint32_t opcode;
335 switch (ldr.p2Size) {
336 case 2:
337 if (ldr.isFloat)
338 opcode = 0x1c000000;
339 else
340 opcode = ldr.extendType == Sign64 ? 0x98000000 : 0x18000000;
341 break;
342 case 3:
343 opcode = ldr.isFloat ? 0x5c000000 : 0x58000000;
344 break;
345 case 4:
346 opcode = 0x9c000000;
347 break;
348 default:
349 llvm_unreachable("Invalid literal ldr size");
350 }
351 write32le(P: loc, V: opcode | imm19 | ldr.destRegister);
352}
353
354static bool isImmediateLdrEligible(const Ldr &ldr) {
355 // Note: We deviate from ld64's behavior, which converts to immediate loads
356 // only if ldr.offset < 4096, even though the offset is divided by the load's
357 // size in the 12-bit immediate operand. Only the unsigned offset variant is
358 // supported.
359
360 uint32_t size = 1 << ldr.p2Size;
361 return ldr.offset >= 0 && (ldr.offset % size) == 0 &&
362 isUInt<12>(x: ldr.offset >> ldr.p2Size);
363}
364
365static void writeImmediateLdr(void *loc, const Ldr &ldr) {
366 assert(isImmediateLdrEligible(ldr));
367 uint32_t opcode = 0x39000000;
368 if (ldr.isFloat) {
369 opcode |= 0x04000000;
370 assert(ldr.extendType == ZeroExtend);
371 }
372 opcode |= ldr.destRegister;
373 opcode |= ldr.baseRegister << 5;
374 uint8_t size, opc;
375 if (ldr.p2Size == 4) {
376 size = 0;
377 opc = 3;
378 } else {
379 opc = ldr.extendType;
380 size = ldr.p2Size;
381 }
382 uint32_t immBits = ldr.offset >> ldr.p2Size;
383 write32le(P: loc, V: opcode | (immBits << 10) | (opc << 22) | (size << 30));
384}
385
386// Transforms a pair of adrp+add instructions into an adr instruction if the
387// target is within the +/- 1 MiB range allowed by the adr's 21 bit signed
388// immediate offset.
389//
390// adrp xN, _foo@PAGE
391// add xM, xN, _foo@PAGEOFF
392// ->
393// adr xM, _foo
394// nop
395static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
396 uint64_t offset1, uint64_t offset2) {
397 uint32_t ins1 = read32le(P: buf + offset1);
398 uint32_t ins2 = read32le(P: buf + offset2);
399 Adrp adrp;
400 Add add;
401 if (!parseAdrp(insn: ins1, adrp) || !parseAdd(insn: ins2, add))
402 return false;
403 if (adrp.destRegister != add.srcRegister)
404 return false;
405
406 uint64_t addr1 = isec->getVA() + offset1;
407 uint64_t referent = pageBits(address: addr1) + adrp.addend + add.addend;
408 int64_t delta = referent - addr1;
409 if (!isValidAdrOffset(delta))
410 return false;
411
412 writeAdr(loc: buf + offset1, dest: add.destRegister, delta);
413 writeNop(loc: buf + offset2);
414 return true;
415}
416
417// Transforms two adrp instructions into a single adrp if their referent
418// addresses are located on the same 4096 byte page.
419//
420// adrp xN, _foo@PAGE
421// adrp xN, _bar@PAGE
422// ->
423// adrp xN, _foo@PAGE
424// nop
425static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec,
426 uint64_t offset1, uint64_t offset2) {
427 uint32_t ins1 = read32le(P: buf + offset1);
428 uint32_t ins2 = read32le(P: buf + offset2);
429 Adrp adrp1, adrp2;
430 if (!parseAdrp(insn: ins1, adrp&: adrp1) || !parseAdrp(insn: ins2, adrp&: adrp2))
431 return;
432 if (adrp1.destRegister != adrp2.destRegister)
433 return;
434
435 uint64_t page1 = pageBits(address: offset1 + isec->getVA()) + adrp1.addend;
436 uint64_t page2 = pageBits(address: offset2 + isec->getVA()) + adrp2.addend;
437 if (page1 != page2)
438 return;
439
440 writeNop(loc: buf + offset2);
441}
442
443// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
444// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
445// as ldr can encode a signed 19-bit offset that gets multiplied by 4.
446//
447// adrp xN, _foo@PAGE
448// ldr xM, [xN, _foo@PAGEOFF]
449// ->
450// nop
451// ldr xM, _foo
452static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec,
453 uint64_t offset1, uint64_t offset2) {
454 uint32_t ins1 = read32le(P: buf + offset1);
455 uint32_t ins2 = read32le(P: buf + offset2);
456 Adrp adrp;
457 Ldr ldr;
458 if (!parseAdrp(insn: ins1, adrp) || !parseLdr(insn: ins2, ldr))
459 return;
460 if (adrp.destRegister != ldr.baseRegister)
461 return;
462
463 uint64_t addr1 = isec->getVA() + offset1;
464 uint64_t addr2 = isec->getVA() + offset2;
465 uint64_t referent = pageBits(address: addr1) + adrp.addend + ldr.offset;
466 ldr.offset = referent - addr2;
467 if (!isLiteralLdrEligible(ldr))
468 return;
469
470 writeNop(loc: buf + offset1);
471 writeLiteralLdr(loc: buf + offset2, ldr);
472}
473
474// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
475// but they may be changed to adrp+add by relaxGotLoad(). This hint performs
476// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
477static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec,
478 uint64_t offset1, uint64_t offset2) {
479 uint32_t ins2 = read32le(P: buf + offset2);
480 Add add;
481 Ldr ldr;
482 if (parseAdd(insn: ins2, add))
483 applyAdrpAdd(buf, isec, offset1, offset2);
484 else if (parseLdr(insn: ins2, ldr))
485 applyAdrpLdr(buf, isec, offset1, offset2);
486}
487
488// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
489// address by loading directly if it's close enough, or to an adrp(p)+ldr
490// sequence if it's not.
491//
492// adrp x0, _foo@PAGE
493// add x1, x0, _foo@PAGEOFF
494// ldr x2, [x1, #off]
495static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
496 uint64_t offset1, uint64_t offset2,
497 uint64_t offset3) {
498 uint32_t ins1 = read32le(P: buf + offset1);
499 uint32_t ins2 = read32le(P: buf + offset2);
500 uint32_t ins3 = read32le(P: buf + offset3);
501 Adrp adrp;
502 Add add;
503 Ldr ldr;
504 if (!parseAdrp(insn: ins1, adrp) || !parseAdd(insn: ins2, add) || !parseLdr(insn: ins3, ldr))
505 return;
506 if (adrp.destRegister != add.srcRegister)
507 return;
508 if (add.destRegister != ldr.baseRegister)
509 return;
510
511 // Load from the target address directly.
512 // nop
513 // nop
514 // ldr x2, [_foo + #off]
515 uint64_t addr1 = isec->getVA() + offset1;
516 uint64_t addr3 = isec->getVA() + offset3;
517 uint64_t referent = pageBits(address: addr1) + adrp.addend + add.addend;
518 Ldr literalLdr = ldr;
519 literalLdr.offset += referent - addr3;
520 if (isLiteralLdrEligible(ldr: literalLdr)) {
521 writeNop(loc: buf + offset1);
522 writeNop(loc: buf + offset2);
523 writeLiteralLdr(loc: buf + offset3, ldr: literalLdr);
524 return;
525 }
526
527 if (applyAdrpAdd(buf, isec, offset1, offset2))
528 return;
529
530 // Move the target's page offset into the ldr's immediate offset.
531 // adrp x0, _foo@PAGE
532 // nop
533 // ldr x2, [x0, _foo@PAGEOFF + #off]
534 Ldr immediateLdr = ldr;
535 immediateLdr.baseRegister = adrp.destRegister;
536 immediateLdr.offset += add.addend;
537 if (isImmediateLdrEligible(ldr: immediateLdr)) {
538 writeNop(loc: buf + offset2);
539 writeImmediateLdr(loc: buf + offset3, ldr: immediateLdr);
540 return;
541 }
542}
543
544// Relaxes a GOT-indirect load.
545// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
546// the GOT entry can be loaded with a single literal ldr instruction.
547// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
548// we perform the AdrpAddLdr transformation.
549static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
550 uint64_t offset1, uint64_t offset2,
551 uint64_t offset3) {
552 uint32_t ins2 = read32le(P: buf + offset2);
553 Add add;
554 Ldr ldr2;
555
556 if (parseAdd(insn: ins2, add)) {
557 applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
558 } else if (parseLdr(insn: ins2, ldr&: ldr2)) {
559 // adrp x1, _foo@GOTPAGE
560 // ldr x2, [x1, _foo@GOTPAGEOFF]
561 // ldr x3, [x2, #off]
562 uint32_t ins3 = read32le(P: buf + offset3);
563 Ldr ldr3;
564 if (!parseLdr(insn: ins3, ldr&: ldr3))
565 return;
566 if (ldr3.baseRegister != ldr2.destRegister)
567 return;
568 // Loads from the GOT must be pointer sized.
569 if (ldr2.p2Size != 3 || ldr2.isFloat)
570 return;
571 applyAdrpLdr(buf, isec, offset1, offset2);
572 }
573}
574
575template <typename Callback>
576static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
577 std::array<uint64_t, 3> args;
578
579 auto readNext = [&]() -> uint64_t {
580 unsigned int n = 0;
581 uint64_t value = decodeULEB128(p: data.data(), n: &n, end: data.end());
582 data = data.drop_front(N: n);
583 return value;
584 };
585
586 while (!data.empty()) {
587 uint64_t type = readNext();
588 if (type == 0)
589 break;
590
591 uint64_t argCount = readNext();
592 for (unsigned i = 0; i < argCount; ++i) {
593 uint64_t arg = readNext();
594 if (i < 3)
595 args[i] = arg;
596 }
597 // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
598 if (argCount > 3)
599 continue;
600 callback(type, ArrayRef(args.data(), argCount));
601 }
602}
603
604// On RISC architectures like arm64, materializing a memory address generally
605// takes multiple instructions. If the referenced symbol is located close enough
606// in memory, fewer instructions are needed.
607//
608// Linker optimization hints record where addresses are computed. After
609// addresses have been assigned, if possible, we change them to a shorter
610// sequence of instructions. The size of the binary is not modified; the
611// eliminated instructions are replaced with NOPs. This still leads to faster
612// code as the CPU can skip over NOPs quickly.
613//
614// LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
615// points to a sequence of ULEB128-encoded numbers. Each entry specifies a
616// transformation kind, and 2 or 3 addresses where the instructions are located.
617void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const {
618 ArrayRef<uint8_t> data = obj.getOptimizationHints();
619 if (data.empty())
620 return;
621
622 const ConcatInputSection *section = nullptr;
623 uint64_t sectionAddr = 0;
624 uint8_t *buf = nullptr;
625
626 auto findSection = [&](uint64_t addr) {
627 if (section && addr >= sectionAddr &&
628 addr < sectionAddr + section->getSize())
629 return true;
630
631 if (obj.sections.empty())
632 return false;
633 auto secIt = std::prev(x: llvm::upper_bound(
634 Range: obj.sections, Value&: addr,
635 C: [](uint64_t off, const Section *sec) { return off < sec->addr; }));
636 const Section *sec = *secIt;
637
638 if (sec->subsections.empty())
639 return false;
640 auto subsecIt = std::prev(x: llvm::upper_bound(
641 Range: sec->subsections, Value: addr - sec->addr,
642 C: [](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
643 const Subsection &subsec = *subsecIt;
644 const ConcatInputSection *isec =
645 dyn_cast_or_null<ConcatInputSection>(Val: subsec.isec);
646 if (!isec || isec->shouldOmitFromOutput())
647 return false;
648
649 section = isec;
650 sectionAddr = subsec.offset + sec->addr;
651 buf = outBuf + section->outSecOff + section->parent->fileOff;
652 return true;
653 };
654
655 auto isValidOffset = [&](uint64_t offset) {
656 if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) {
657 error(msg: toString(file: &obj) +
658 ": linker optimization hint spans multiple sections");
659 return false;
660 }
661 return true;
662 };
663
664 bool hasAdrpAdrp = false;
665 forEachHint(data, callback: [&](uint64_t kind, ArrayRef<uint64_t> args) {
666 if (kind == LOH_ARM64_ADRP_ADRP) {
667 hasAdrpAdrp = true;
668 return;
669 }
670
671 if (!findSection(args[0]))
672 return;
673 switch (kind) {
674 case LOH_ARM64_ADRP_ADD:
675 if (isValidOffset(args[1]))
676 applyAdrpAdd(buf, isec: section, offset1: args[0] - sectionAddr,
677 offset2: args[1] - sectionAddr);
678 break;
679 case LOH_ARM64_ADRP_LDR:
680 if (isValidOffset(args[1]))
681 applyAdrpLdr(buf, isec: section, offset1: args[0] - sectionAddr,
682 offset2: args[1] - sectionAddr);
683 break;
684 case LOH_ARM64_ADRP_LDR_GOT:
685 if (isValidOffset(args[1]))
686 applyAdrpLdrGot(buf, isec: section, offset1: args[0] - sectionAddr,
687 offset2: args[1] - sectionAddr);
688 break;
689 case LOH_ARM64_ADRP_ADD_LDR:
690 if (isValidOffset(args[1]) && isValidOffset(args[2]))
691 applyAdrpAddLdr(buf, isec: section, offset1: args[0] - sectionAddr,
692 offset2: args[1] - sectionAddr, offset3: args[2] - sectionAddr);
693 break;
694 case LOH_ARM64_ADRP_LDR_GOT_LDR:
695 if (isValidOffset(args[1]) && isValidOffset(args[2]))
696 applyAdrpLdrGotLdr(buf, isec: section, offset1: args[0] - sectionAddr,
697 offset2: args[1] - sectionAddr, offset3: args[2] - sectionAddr);
698 break;
699 case LOH_ARM64_ADRP_ADD_STR:
700 case LOH_ARM64_ADRP_LDR_GOT_STR:
701 // TODO: Implement these
702 break;
703 }
704 });
705
706 if (!hasAdrpAdrp)
707 return;
708
709 // AdrpAdrp optimization hints are performed in a second pass because they
710 // might interfere with other transformations. For instance, consider the
711 // following input:
712 //
713 // adrp x0, _foo@PAGE
714 // add x1, x0, _foo@PAGEOFF
715 // adrp x0, _bar@PAGE
716 // add x2, x0, _bar@PAGEOFF
717 //
718 // If we perform the AdrpAdrp relaxation first, we get:
719 //
720 // adrp x0, _foo@PAGE
721 // add x1, x0, _foo@PAGEOFF
722 // nop
723 // add x2, x0, _bar@PAGEOFF
724 //
725 // If we then apply AdrpAdd to the first two instructions, the add will have a
726 // garbage value in x0:
727 //
728 // adr x1, _foo
729 // nop
730 // nop
731 // add x2, x0, _bar@PAGEOFF
732 forEachHint(data, callback: [&](uint64_t kind, ArrayRef<uint64_t> args) {
733 if (kind != LOH_ARM64_ADRP_ADRP)
734 return;
735 if (!findSection(args[0]))
736 return;
737 if (isValidOffset(args[1]))
738 applyAdrpAdrp(buf, isec: section, offset1: args[0] - sectionAddr, offset2: args[1] - sectionAddr);
739 });
740}
741
742TargetInfo *macho::createARM64TargetInfo() {
743 static ARM64 t;
744 return &t;
745}
746