1 | //===------------- OrcABISupport.cpp - ABI specific support code ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" |
10 | #include "llvm/Support/FormatVariadic.h" |
11 | #include "llvm/Support/Process.h" |
12 | #include "llvm/Support/raw_ostream.h" |
13 | |
14 | #define DEBUG_TYPE "orc" |
15 | |
16 | using namespace llvm; |
17 | using namespace llvm::orc; |
18 | |
19 | template <typename ORCABI> |
20 | static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr, |
21 | ExecutorAddr PointerBlockAddr, |
22 | unsigned NumStubs) { |
23 | constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement; |
24 | ExecutorAddr FirstStub = StubBlockAddr; |
25 | ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize); |
26 | ExecutorAddr FirstPointer = PointerBlockAddr; |
27 | ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize); |
28 | |
29 | if (FirstStub < FirstPointer) { |
30 | if (LastStub >= FirstPointer) |
31 | return false; // Ranges overlap. |
32 | return (FirstPointer - FirstStub <= MaxDisp) && |
33 | (LastPointer - LastStub <= MaxDisp); // out-of-range. |
34 | } |
35 | |
36 | if (LastPointer >= FirstStub) |
37 | return false; // Ranges overlap. |
38 | |
39 | return (FirstStub - FirstPointer <= MaxDisp) && |
40 | (LastStub - LastPointer <= MaxDisp); |
41 | } |
42 | |
43 | namespace llvm { |
44 | namespace orc { |
45 | |
46 | void OrcAArch64::writeResolverCode(char *ResolverWorkingMem, |
47 | ExecutorAddr ResolverTargetAddress, |
48 | ExecutorAddr ReentryFnAddr, |
49 | ExecutorAddr ReentryCtxAddr) { |
50 | |
51 | const uint32_t ResolverCode[] = { |
52 | // resolver_entry: |
53 | 0xa9bf47fd, // 0x000: stp x29, x17, [sp, #-16]! |
54 | 0x910003fd, // 0x004: mov x29, sp |
55 | 0xa9bf73fb, // 0x008: stp x27, x28, [sp, #-16]! |
56 | 0xa9bf6bf9, // 0x00c: stp x25, x26, [sp, #-16]! |
57 | 0xa9bf63f7, // 0x010: stp x23, x24, [sp, #-16]! |
58 | 0xa9bf5bf5, // 0x014: stp x21, x22, [sp, #-16]! |
59 | 0xa9bf53f3, // 0x018: stp x19, x20, [sp, #-16]! |
60 | 0xa9bf3fee, // 0x01c: stp x14, x15, [sp, #-16]! |
61 | 0xa9bf37ec, // 0x020: stp x12, x13, [sp, #-16]! |
62 | 0xa9bf2fea, // 0x024: stp x10, x11, [sp, #-16]! |
63 | 0xa9bf27e8, // 0x028: stp x8, x9, [sp, #-16]! |
64 | 0xa9bf1fe6, // 0x02c: stp x6, x7, [sp, #-16]! |
65 | 0xa9bf17e4, // 0x030: stp x4, x5, [sp, #-16]! |
66 | 0xa9bf0fe2, // 0x034: stp x2, x3, [sp, #-16]! |
67 | 0xa9bf07e0, // 0x038: stp x0, x1, [sp, #-16]! |
68 | 0xadbf7ffe, // 0x03c: stp q30, q31, [sp, #-32]! |
69 | 0xadbf77fc, // 0x040: stp q28, q29, [sp, #-32]! |
70 | 0xadbf6ffa, // 0x044: stp q26, q27, [sp, #-32]! |
71 | 0xadbf67f8, // 0x048: stp q24, q25, [sp, #-32]! |
72 | 0xadbf5ff6, // 0x04c: stp q22, q23, [sp, #-32]! |
73 | 0xadbf57f4, // 0x050: stp q20, q21, [sp, #-32]! |
74 | 0xadbf4ff2, // 0x054: stp q18, q19, [sp, #-32]! |
75 | 0xadbf47f0, // 0x058: stp q16, q17, [sp, #-32]! |
76 | 0xadbf3fee, // 0x05c: stp q14, q15, [sp, #-32]! |
77 | 0xadbf37ec, // 0x060: stp q12, q13, [sp, #-32]! |
78 | 0xadbf2fea, // 0x064: stp q10, q11, [sp, #-32]! |
79 | 0xadbf27e8, // 0x068: stp q8, q9, [sp, #-32]! |
80 | 0xadbf1fe6, // 0x06c: stp q6, q7, [sp, #-32]! |
81 | 0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]! |
82 | 0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]! |
83 | 0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]! |
84 | 0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr |
85 | 0xaa1e03e1, // 0x080: mov x1, x30 |
86 | 0xd1003021, // 0x084: sub x1, x1, #12 |
87 | 0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr |
88 | 0xd63f0040, // 0x08c: blr x2 |
89 | 0xaa0003f1, // 0x090: mov x17, x0 |
90 | 0xacc107e0, // 0x094: ldp q0, q1, [sp], #32 |
91 | 0xacc10fe2, // 0x098: ldp q2, q3, [sp], #32 |
92 | 0xacc117e4, // 0x09c: ldp q4, q5, [sp], #32 |
93 | 0xacc11fe6, // 0x0a0: ldp q6, q7, [sp], #32 |
94 | 0xacc127e8, // 0x0a4: ldp q8, q9, [sp], #32 |
95 | 0xacc12fea, // 0x0a8: ldp q10, q11, [sp], #32 |
96 | 0xacc137ec, // 0x0ac: ldp q12, q13, [sp], #32 |
97 | 0xacc13fee, // 0x0b0: ldp q14, q15, [sp], #32 |
98 | 0xacc147f0, // 0x0b4: ldp q16, q17, [sp], #32 |
99 | 0xacc14ff2, // 0x0b8: ldp q18, q19, [sp], #32 |
100 | 0xacc157f4, // 0x0bc: ldp q20, q21, [sp], #32 |
101 | 0xacc15ff6, // 0x0c0: ldp q22, q23, [sp], #32 |
102 | 0xacc167f8, // 0x0c4: ldp q24, q25, [sp], #32 |
103 | 0xacc16ffa, // 0x0c8: ldp q26, q27, [sp], #32 |
104 | 0xacc177fc, // 0x0cc: ldp q28, q29, [sp], #32 |
105 | 0xacc17ffe, // 0x0d0: ldp q30, q31, [sp], #32 |
106 | 0xa8c107e0, // 0x0d4: ldp x0, x1, [sp], #16 |
107 | 0xa8c10fe2, // 0x0d8: ldp x2, x3, [sp], #16 |
108 | 0xa8c117e4, // 0x0dc: ldp x4, x5, [sp], #16 |
109 | 0xa8c11fe6, // 0x0e0: ldp x6, x7, [sp], #16 |
110 | 0xa8c127e8, // 0x0e4: ldp x8, x9, [sp], #16 |
111 | 0xa8c12fea, // 0x0e8: ldp x10, x11, [sp], #16 |
112 | 0xa8c137ec, // 0x0ec: ldp x12, x13, [sp], #16 |
113 | 0xa8c13fee, // 0x0f0: ldp x14, x15, [sp], #16 |
114 | 0xa8c153f3, // 0x0f4: ldp x19, x20, [sp], #16 |
115 | 0xa8c15bf5, // 0x0f8: ldp x21, x22, [sp], #16 |
116 | 0xa8c163f7, // 0x0fc: ldp x23, x24, [sp], #16 |
117 | 0xa8c16bf9, // 0x100: ldp x25, x26, [sp], #16 |
118 | 0xa8c173fb, // 0x104: ldp x27, x28, [sp], #16 |
119 | 0xa8c17bfd, // 0x108: ldp x29, x30, [sp], #16 |
120 | 0xd65f0220, // 0x10c: ret x17 |
121 | 0x01234567, // 0x110: Lreentry_fn_ptr: |
122 | 0xdeadbeef, // 0x114: .quad 0 |
123 | 0x98765432, // 0x118: Lreentry_ctx_ptr: |
124 | 0xcafef00d // 0x11c: .quad 0 |
125 | }; |
126 | |
127 | const unsigned ReentryFnAddrOffset = 0x110; |
128 | const unsigned ReentryCtxAddrOffset = 0x118; |
129 | |
130 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
131 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
132 | n: sizeof(uint64_t)); |
133 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
134 | n: sizeof(uint64_t)); |
135 | } |
136 | |
137 | void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem, |
138 | ExecutorAddr TrampolineBlockTargetAddress, |
139 | ExecutorAddr ResolverAddr, |
140 | unsigned NumTrampolines) { |
141 | |
142 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
143 | |
144 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
145 | n: sizeof(uint64_t)); |
146 | |
147 | // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so |
148 | // subtract 32-bits. |
149 | OffsetToPtr -= 4; |
150 | |
151 | uint32_t *Trampolines = |
152 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
153 | |
154 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
155 | Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30 |
156 | Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr |
157 | Trampolines[3 * I + 2] = 0xd63f0200; // blr x16 |
158 | } |
159 | } |
160 | |
161 | void OrcAArch64::writeIndirectStubsBlock( |
162 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
163 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
164 | // Stub format is: |
165 | // |
166 | // .section __orc_stubs |
167 | // stub1: |
168 | // ldr x16, ptr1 ; PC-rel load of ptr1 |
169 | // br x16 ; Jump to resolver |
170 | // stub2: |
171 | // ldr x16, ptr2 ; PC-rel load of ptr2 |
172 | // br x16 ; Jump to resolver |
173 | // |
174 | // ... |
175 | // |
176 | // .section __orc_ptrs |
177 | // ptr1: |
178 | // .quad 0x0 |
179 | // ptr2: |
180 | // .quad 0x0 |
181 | // |
182 | // ... |
183 | |
184 | static_assert(StubSize == PointerSize, |
185 | "Pointer and stub size must match for algorithm below" ); |
186 | assert(stubAndPointerRangesOk<OrcAArch64>( |
187 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
188 | "PointersBlock is out of range" ); |
189 | uint64_t PtrDisplacement = |
190 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
191 | assert((PtrDisplacement % 8 == 0) && |
192 | "Displacement to pointer is not a multiple of 8" ); |
193 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
194 | uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5; |
195 | |
196 | for (unsigned I = 0; I < NumStubs; ++I) |
197 | Stub[I] = 0xd61f020058000010 | PtrOffsetField; |
198 | } |
199 | |
200 | void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem, |
201 | ExecutorAddr TrampolineBlockTargetAddress, |
202 | ExecutorAddr ResolverAddr, |
203 | unsigned NumTrampolines) { |
204 | |
205 | unsigned OffsetToPtr = NumTrampolines * TrampolineSize; |
206 | |
207 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
208 | n: sizeof(uint64_t)); |
209 | |
210 | uint64_t *Trampolines = |
211 | reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem); |
212 | uint64_t CallIndirPCRel = 0xf1c40000000015ff; |
213 | |
214 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) |
215 | Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16); |
216 | } |
217 | |
218 | void OrcX86_64_Base::writeIndirectStubsBlock( |
219 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
220 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
221 | // Stub format is: |
222 | // |
223 | // .section __orc_stubs |
224 | // stub1: |
225 | // jmpq *ptr1(%rip) |
226 | // .byte 0xC4 ; <- Invalid opcode padding. |
227 | // .byte 0xF1 |
228 | // stub2: |
229 | // jmpq *ptr2(%rip) |
230 | // |
231 | // ... |
232 | // |
233 | // .section __orc_ptrs |
234 | // ptr1: |
235 | // .quad 0x0 |
236 | // ptr2: |
237 | // .quad 0x0 |
238 | // |
239 | // ... |
240 | |
241 | // Populate the stubs page stubs and mark it executable. |
242 | static_assert(StubSize == PointerSize, |
243 | "Pointer and stub size must match for algorithm below" ); |
244 | assert(stubAndPointerRangesOk<OrcX86_64_Base>( |
245 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
246 | "PointersBlock is out of range" ); |
247 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
248 | uint64_t PtrOffsetField = |
249 | (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16; |
250 | for (unsigned I = 0; I < NumStubs; ++I) |
251 | Stub[I] = 0xF1C40000000025ff | PtrOffsetField; |
252 | } |
253 | |
254 | void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem, |
255 | ExecutorAddr ResolverTargetAddress, |
256 | ExecutorAddr ReentryFnAddr, |
257 | ExecutorAddr ReentryCtxAddr) { |
258 | |
259 | LLVM_DEBUG({ |
260 | dbgs() << "Writing resolver code to " |
261 | << formatv("{0:x16}" , ResolverTargetAddress) << "\n" ; |
262 | }); |
263 | |
264 | const uint8_t ResolverCode[] = { |
265 | // resolver_entry: |
266 | 0x55, // 0x00: pushq %rbp |
267 | 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp |
268 | 0x50, // 0x04: pushq %rax |
269 | 0x53, // 0x05: pushq %rbx |
270 | 0x51, // 0x06: pushq %rcx |
271 | 0x52, // 0x07: pushq %rdx |
272 | 0x56, // 0x08: pushq %rsi |
273 | 0x57, // 0x09: pushq %rdi |
274 | 0x41, 0x50, // 0x0a: pushq %r8 |
275 | 0x41, 0x51, // 0x0c: pushq %r9 |
276 | 0x41, 0x52, // 0x0e: pushq %r10 |
277 | 0x41, 0x53, // 0x10: pushq %r11 |
278 | 0x41, 0x54, // 0x12: pushq %r12 |
279 | 0x41, 0x55, // 0x14: pushq %r13 |
280 | 0x41, 0x56, // 0x16: pushq %r14 |
281 | 0x41, 0x57, // 0x18: pushq %r15 |
282 | 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp |
283 | 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) |
284 | 0x48, 0xbf, // 0x26: movabsq <CBMgr>, %rdi |
285 | |
286 | // 0x28: JIT re-entry ctx addr. |
287 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
288 | |
289 | 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi |
290 | 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi |
291 | 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax |
292 | |
293 | // 0x3a: JIT re-entry fn addr: |
294 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
295 | |
296 | 0xff, 0xd0, // 0x42: callq *%rax |
297 | 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp) |
298 | 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp) |
299 | 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp |
300 | 0x41, 0x5f, // 0x54: popq %r15 |
301 | 0x41, 0x5e, // 0x56: popq %r14 |
302 | 0x41, 0x5d, // 0x58: popq %r13 |
303 | 0x41, 0x5c, // 0x5a: popq %r12 |
304 | 0x41, 0x5b, // 0x5c: popq %r11 |
305 | 0x41, 0x5a, // 0x5e: popq %r10 |
306 | 0x41, 0x59, // 0x60: popq %r9 |
307 | 0x41, 0x58, // 0x62: popq %r8 |
308 | 0x5f, // 0x64: popq %rdi |
309 | 0x5e, // 0x65: popq %rsi |
310 | 0x5a, // 0x66: popq %rdx |
311 | 0x59, // 0x67: popq %rcx |
312 | 0x5b, // 0x68: popq %rbx |
313 | 0x58, // 0x69: popq %rax |
314 | 0x5d, // 0x6a: popq %rbp |
315 | 0xc3, // 0x6b: retq |
316 | }; |
317 | |
318 | const unsigned ReentryFnAddrOffset = 0x3a; |
319 | const unsigned ReentryCtxAddrOffset = 0x28; |
320 | |
321 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
322 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
323 | n: sizeof(uint64_t)); |
324 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
325 | n: sizeof(uint64_t)); |
326 | } |
327 | |
328 | void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem, |
329 | ExecutorAddr ResolverTargetAddress, |
330 | ExecutorAddr ReentryFnAddr, |
331 | ExecutorAddr ReentryCtxAddr) { |
332 | |
333 | // resolverCode is similar to OrcX86_64 with differences specific to windows |
334 | // x64 calling convention: arguments go into rcx, rdx and come in reverse |
335 | // order, shadow space allocation on stack |
336 | const uint8_t ResolverCode[] = { |
337 | // resolver_entry: |
338 | 0x55, // 0x00: pushq %rbp |
339 | 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp |
340 | 0x50, // 0x04: pushq %rax |
341 | 0x53, // 0x05: pushq %rbx |
342 | 0x51, // 0x06: pushq %rcx |
343 | 0x52, // 0x07: pushq %rdx |
344 | 0x56, // 0x08: pushq %rsi |
345 | 0x57, // 0x09: pushq %rdi |
346 | 0x41, 0x50, // 0x0a: pushq %r8 |
347 | 0x41, 0x51, // 0x0c: pushq %r9 |
348 | 0x41, 0x52, // 0x0e: pushq %r10 |
349 | 0x41, 0x53, // 0x10: pushq %r11 |
350 | 0x41, 0x54, // 0x12: pushq %r12 |
351 | 0x41, 0x55, // 0x14: pushq %r13 |
352 | 0x41, 0x56, // 0x16: pushq %r14 |
353 | 0x41, 0x57, // 0x18: pushq %r15 |
354 | 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp |
355 | 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) |
356 | |
357 | 0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx |
358 | // 0x28: JIT re-entry ctx addr. |
359 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
360 | |
361 | 0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8] |
362 | 0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6 |
363 | |
364 | 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax |
365 | // 0x3a: JIT re-entry fn addr: |
366 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
367 | |
368 | // 0x42: sub rsp, 0x20 (Allocate shadow space) |
369 | 0x48, 0x83, 0xEC, 0x20, |
370 | 0xff, 0xd0, // 0x46: callq *%rax |
371 | |
372 | // 0x48: add rsp, 0x20 (Free shadow space) |
373 | 0x48, 0x83, 0xC4, 0x20, |
374 | |
375 | 0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp) |
376 | 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp) |
377 | 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp |
378 | 0x41, 0x5f, // 0x5C: popq %r15 |
379 | 0x41, 0x5e, // 0x5E: popq %r14 |
380 | 0x41, 0x5d, // 0x60: popq %r13 |
381 | 0x41, 0x5c, // 0x62: popq %r12 |
382 | 0x41, 0x5b, // 0x64: popq %r11 |
383 | 0x41, 0x5a, // 0x66: popq %r10 |
384 | 0x41, 0x59, // 0x68: popq %r9 |
385 | 0x41, 0x58, // 0x6a: popq %r8 |
386 | 0x5f, // 0x6c: popq %rdi |
387 | 0x5e, // 0x6d: popq %rsi |
388 | 0x5a, // 0x6e: popq %rdx |
389 | 0x59, // 0x6f: popq %rcx |
390 | 0x5b, // 0x70: popq %rbx |
391 | 0x58, // 0x71: popq %rax |
392 | 0x5d, // 0x72: popq %rbp |
393 | 0xc3, // 0x73: retq |
394 | }; |
395 | |
396 | const unsigned ReentryFnAddrOffset = 0x3a; |
397 | const unsigned ReentryCtxAddrOffset = 0x28; |
398 | |
399 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
400 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
401 | n: sizeof(uint64_t)); |
402 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
403 | n: sizeof(uint64_t)); |
404 | } |
405 | |
406 | void OrcI386::writeResolverCode(char *ResolverWorkingMem, |
407 | ExecutorAddr ResolverTargetAddress, |
408 | ExecutorAddr ReentryFnAddr, |
409 | ExecutorAddr ReentryCtxAddr) { |
410 | |
411 | assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range" ); |
412 | assert((ReentryCtxAddr.getValue() >> 32) == 0 && |
413 | "ReentryCtxAddr out of range" ); |
414 | |
415 | const uint8_t ResolverCode[] = { |
416 | // resolver_entry: |
417 | 0x55, // 0x00: pushl %ebp |
418 | 0x89, 0xe5, // 0x01: movl %esp, %ebp |
419 | 0x54, // 0x03: pushl %esp |
420 | 0x83, 0xe4, 0xf0, // 0x04: andl $-0x10, %esp |
421 | 0x50, // 0x07: pushl %eax |
422 | 0x53, // 0x08: pushl %ebx |
423 | 0x51, // 0x09: pushl %ecx |
424 | 0x52, // 0x0a: pushl %edx |
425 | 0x56, // 0x0b: pushl %esi |
426 | 0x57, // 0x0c: pushl %edi |
427 | 0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl $0x218, %esp |
428 | 0x0f, 0xae, 0x44, 0x24, 0x10, // 0x13: fxsave 0x10(%esp) |
429 | 0x8b, 0x75, 0x04, // 0x18: movl 0x4(%ebp), %esi |
430 | 0x83, 0xee, 0x05, // 0x1b: subl $0x5, %esi |
431 | 0x89, 0x74, 0x24, 0x04, // 0x1e: movl %esi, 0x4(%esp) |
432 | 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, |
433 | 0x00, // 0x22: movl <cbmgr>, (%esp) |
434 | 0xb8, 0x00, 0x00, 0x00, 0x00, // 0x29: movl <reentry>, %eax |
435 | 0xff, 0xd0, // 0x2e: calll *%eax |
436 | 0x89, 0x45, 0x04, // 0x30: movl %eax, 0x4(%ebp) |
437 | 0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x33: fxrstor 0x10(%esp) |
438 | 0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl $0x218, %esp |
439 | 0x5f, // 0x3e: popl %edi |
440 | 0x5e, // 0x3f: popl %esi |
441 | 0x5a, // 0x40: popl %edx |
442 | 0x59, // 0x41: popl %ecx |
443 | 0x5b, // 0x42: popl %ebx |
444 | 0x58, // 0x43: popl %eax |
445 | 0x8b, 0x65, 0xfc, // 0x44: movl -0x4(%ebp), %esp |
446 | 0x5d, // 0x48: popl %ebp |
447 | 0xc3 // 0x49: retl |
448 | }; |
449 | |
450 | const unsigned ReentryFnAddrOffset = 0x2a; |
451 | const unsigned ReentryCtxAddrOffset = 0x25; |
452 | |
453 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
454 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
455 | n: sizeof(uint32_t)); |
456 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
457 | n: sizeof(uint32_t)); |
458 | } |
459 | |
460 | void OrcI386::writeTrampolines(char *TrampolineWorkingMem, |
461 | ExecutorAddr TrampolineBlockTargetAddress, |
462 | ExecutorAddr ResolverAddr, |
463 | unsigned NumTrampolines) { |
464 | assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range" ); |
465 | |
466 | uint64_t CallRelImm = 0xF1C4C400000000e8; |
467 | uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5; |
468 | |
469 | uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem); |
470 | for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize) |
471 | Trampolines[I] = CallRelImm | (ResolverRel << 8); |
472 | } |
473 | |
474 | void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem, |
475 | ExecutorAddr StubsBlockTargetAddress, |
476 | ExecutorAddr PointersBlockTargetAddress, |
477 | unsigned NumStubs) { |
478 | assert((StubsBlockTargetAddress.getValue() >> 32) == 0 && |
479 | "StubsBlockTargetAddress is out of range" ); |
480 | assert((PointersBlockTargetAddress.getValue() >> 32) == 0 && |
481 | "PointersBlockTargetAddress is out of range" ); |
482 | |
483 | // Stub format is: |
484 | // |
485 | // .section __orc_stubs |
486 | // stub1: |
487 | // jmpq *ptr1 |
488 | // .byte 0xC4 ; <- Invalid opcode padding. |
489 | // .byte 0xF1 |
490 | // stub2: |
491 | // jmpq *ptr2 |
492 | // |
493 | // ... |
494 | // |
495 | // .section __orc_ptrs |
496 | // ptr1: |
497 | // .quad 0x0 |
498 | // ptr2: |
499 | // .quad 0x0 |
500 | // |
501 | // ... |
502 | |
503 | assert(stubAndPointerRangesOk<OrcI386>( |
504 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
505 | "PointersBlock is out of range" ); |
506 | |
507 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
508 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
509 | for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4) |
510 | Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16); |
511 | } |
512 | |
513 | void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem, |
514 | ExecutorAddr ResolverTargetAddress, |
515 | ExecutorAddr ReentryFnAddr, |
516 | ExecutorAddr ReentryCtxAddr, |
517 | bool isBigEndian) { |
518 | |
519 | const uint32_t ResolverCode[] = { |
520 | // resolver_entry: |
521 | 0x27bdff98, // 0x00: addiu $sp,$sp,-104 |
522 | 0xafa20000, // 0x04: sw $v0,0($sp) |
523 | 0xafa30004, // 0x08: sw $v1,4($sp) |
524 | 0xafa40008, // 0x0c: sw $a0,8($sp) |
525 | 0xafa5000c, // 0x10: sw $a1,12($sp) |
526 | 0xafa60010, // 0x14: sw $a2,16($sp) |
527 | 0xafa70014, // 0x18: sw $a3,20($sp) |
528 | 0xafb00018, // 0x1c: sw $s0,24($sp) |
529 | 0xafb1001c, // 0x20: sw $s1,28($sp) |
530 | 0xafb20020, // 0x24: sw $s2,32($sp) |
531 | 0xafb30024, // 0x28: sw $s3,36($sp) |
532 | 0xafb40028, // 0x2c: sw $s4,40($sp) |
533 | 0xafb5002c, // 0x30: sw $s5,44($sp) |
534 | 0xafb60030, // 0x34: sw $s6,48($sp) |
535 | 0xafb70034, // 0x38: sw $s7,52($sp) |
536 | 0xafa80038, // 0x3c: sw $t0,56($sp) |
537 | 0xafa9003c, // 0x40: sw $t1,60($sp) |
538 | 0xafaa0040, // 0x44: sw $t2,64($sp) |
539 | 0xafab0044, // 0x48: sw $t3,68($sp) |
540 | 0xafac0048, // 0x4c: sw $t4,72($sp) |
541 | 0xafad004c, // 0x50: sw $t5,76($sp) |
542 | 0xafae0050, // 0x54: sw $t6,80($sp) |
543 | 0xafaf0054, // 0x58: sw $t7,84($sp) |
544 | 0xafb80058, // 0x5c: sw $t8,88($sp) |
545 | 0xafb9005c, // 0x60: sw $t9,92($sp) |
546 | 0xafbe0060, // 0x64: sw $fp,96($sp) |
547 | 0xafbf0064, // 0x68: sw $ra,100($sp) |
548 | |
549 | // JIT re-entry ctx addr. |
550 | 0x00000000, // 0x6c: lui $a0,ctx |
551 | 0x00000000, // 0x70: addiu $a0,$a0,ctx |
552 | |
553 | 0x03e02825, // 0x74: move $a1, $ra |
554 | 0x24a5ffec, // 0x78: addiu $a1,$a1,-20 |
555 | |
556 | // JIT re-entry fn addr: |
557 | 0x00000000, // 0x7c: lui $t9,reentry |
558 | 0x00000000, // 0x80: addiu $t9,$t9,reentry |
559 | |
560 | 0x0320f809, // 0x84: jalr $t9 |
561 | 0x00000000, // 0x88: nop |
562 | 0x8fbf0064, // 0x8c: lw $ra,100($sp) |
563 | 0x8fbe0060, // 0x90: lw $fp,96($sp) |
564 | 0x8fb9005c, // 0x94: lw $t9,92($sp) |
565 | 0x8fb80058, // 0x98: lw $t8,88($sp) |
566 | 0x8faf0054, // 0x9c: lw $t7,84($sp) |
567 | 0x8fae0050, // 0xa0: lw $t6,80($sp) |
568 | 0x8fad004c, // 0xa4: lw $t5,76($sp) |
569 | 0x8fac0048, // 0xa8: lw $t4,72($sp) |
570 | 0x8fab0044, // 0xac: lw $t3,68($sp) |
571 | 0x8faa0040, // 0xb0: lw $t2,64($sp) |
572 | 0x8fa9003c, // 0xb4: lw $t1,60($sp) |
573 | 0x8fa80038, // 0xb8: lw $t0,56($sp) |
574 | 0x8fb70034, // 0xbc: lw $s7,52($sp) |
575 | 0x8fb60030, // 0xc0: lw $s6,48($sp) |
576 | 0x8fb5002c, // 0xc4: lw $s5,44($sp) |
577 | 0x8fb40028, // 0xc8: lw $s4,40($sp) |
578 | 0x8fb30024, // 0xcc: lw $s3,36($sp) |
579 | 0x8fb20020, // 0xd0: lw $s2,32($sp) |
580 | 0x8fb1001c, // 0xd4: lw $s1,28($sp) |
581 | 0x8fb00018, // 0xd8: lw $s0,24($sp) |
582 | 0x8fa70014, // 0xdc: lw $a3,20($sp) |
583 | 0x8fa60010, // 0xe0: lw $a2,16($sp) |
584 | 0x8fa5000c, // 0xe4: lw $a1,12($sp) |
585 | 0x8fa40008, // 0xe8: lw $a0,8($sp) |
586 | 0x27bd0068, // 0xec: addiu $sp,$sp,104 |
587 | 0x0300f825, // 0xf0: move $ra, $t8 |
588 | 0x03200008, // 0xf4: jr $t9 |
589 | 0x00000000, // 0xf8: move $t9, $v0/v1 |
590 | }; |
591 | |
592 | const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui |
593 | const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui |
594 | const unsigned Offsett = 0xf8; |
595 | |
596 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
597 | |
598 | // Depending on endian return value will be in v0 or v1. |
599 | uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825; |
600 | memcpy(dest: ResolverWorkingMem + Offsett, src: &MoveVxT9, n: sizeof(MoveVxT9)); |
601 | |
602 | uint32_t ReentryCtxLUi = |
603 | 0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
604 | uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF); |
605 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxLUi, |
606 | n: sizeof(ReentryCtxLUi)); |
607 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset + 4, src: &ReentryCtxADDiu, |
608 | n: sizeof(ReentryCtxADDiu)); |
609 | |
610 | uint32_t ReentryFnLUi = |
611 | 0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
612 | uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF); |
613 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnLUi, |
614 | n: sizeof(ReentryFnLUi)); |
615 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset + 4, src: &ReentryFnADDiu, |
616 | n: sizeof(ReentryFnADDiu)); |
617 | } |
618 | |
619 | void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem, |
620 | ExecutorAddr TrampolineBlockTargetAddress, |
621 | ExecutorAddr ResolverAddr, |
622 | unsigned NumTrampolines) { |
623 | |
624 | assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range" ); |
625 | |
626 | uint32_t *Trampolines = |
627 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
628 | uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16); |
629 | |
630 | for (unsigned I = 0; I < NumTrampolines; ++I) { |
631 | // move $t8,$ra |
632 | // lui $t9,ResolverAddr |
633 | // addiu $t9,$t9,ResolverAddr |
634 | // jalr $t9 |
635 | // nop |
636 | Trampolines[5 * I + 0] = 0x03e0c025; |
637 | Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); |
638 | Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF); |
639 | Trampolines[5 * I + 3] = 0x0320f809; |
640 | Trampolines[5 * I + 4] = 0x00000000; |
641 | } |
642 | } |
643 | |
644 | void OrcMips32_Base::writeIndirectStubsBlock( |
645 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
646 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
647 | assert((StubsBlockTargetAddress.getValue() >> 32) == 0 && |
648 | "InitialPtrVal is out of range" ); |
649 | |
650 | // Stub format is: |
651 | // |
652 | // .section __orc_stubs |
653 | // stub1: |
654 | // lui $t9, ptr1 |
655 | // lw $t9, %lo(ptr1)($t9) |
656 | // jr $t9 |
657 | // stub2: |
658 | // lui $t9, ptr2 |
659 | // lw $t9,%lo(ptr1)($t9) |
660 | // jr $t9 |
661 | // |
662 | // ... |
663 | // |
664 | // .section __orc_ptrs |
665 | // ptr1: |
666 | // .word 0x0 |
667 | // ptr2: |
668 | // .word 0x0 |
669 | // |
670 | // i.. |
671 | |
672 | assert(stubAndPointerRangesOk<OrcMips32_Base>( |
673 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
674 | "PointersBlock is out of range" ); |
675 | |
676 | // Populate the stubs page stubs and mark it executable. |
677 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
678 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
679 | |
680 | for (unsigned I = 0; I < NumStubs; ++I) { |
681 | uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16); |
682 | Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1 |
683 | Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9) |
684 | Stub[4 * I + 2] = 0x03200008; // jr $t9 |
685 | Stub[4 * I + 3] = 0x00000000; // nop |
686 | PtrAddr += 4; |
687 | } |
688 | } |
689 | |
690 | void OrcMips64::writeResolverCode(char *ResolverWorkingMem, |
691 | ExecutorAddr ResolverTargetAddress, |
692 | ExecutorAddr ReentryFnAddr, |
693 | ExecutorAddr ReentryCtxAddr) { |
694 | |
695 | const uint32_t ResolverCode[] = { |
696 | //resolver_entry: |
697 | 0x67bdff30, // 0x00: daddiu $sp,$sp,-208 |
698 | 0xffa20000, // 0x04: sd v0,0(sp) |
699 | 0xffa30008, // 0x08: sd v1,8(sp) |
700 | 0xffa40010, // 0x0c: sd a0,16(sp) |
701 | 0xffa50018, // 0x10: sd a1,24(sp) |
702 | 0xffa60020, // 0x14: sd a2,32(sp) |
703 | 0xffa70028, // 0x18: sd a3,40(sp) |
704 | 0xffa80030, // 0x1c: sd a4,48(sp) |
705 | 0xffa90038, // 0x20: sd a5,56(sp) |
706 | 0xffaa0040, // 0x24: sd a6,64(sp) |
707 | 0xffab0048, // 0x28: sd a7,72(sp) |
708 | 0xffac0050, // 0x2c: sd t0,80(sp) |
709 | 0xffad0058, // 0x30: sd t1,88(sp) |
710 | 0xffae0060, // 0x34: sd t2,96(sp) |
711 | 0xffaf0068, // 0x38: sd t3,104(sp) |
712 | 0xffb00070, // 0x3c: sd s0,112(sp) |
713 | 0xffb10078, // 0x40: sd s1,120(sp) |
714 | 0xffb20080, // 0x44: sd s2,128(sp) |
715 | 0xffb30088, // 0x48: sd s3,136(sp) |
716 | 0xffb40090, // 0x4c: sd s4,144(sp) |
717 | 0xffb50098, // 0x50: sd s5,152(sp) |
718 | 0xffb600a0, // 0x54: sd s6,160(sp) |
719 | 0xffb700a8, // 0x58: sd s7,168(sp) |
720 | 0xffb800b0, // 0x5c: sd t8,176(sp) |
721 | 0xffb900b8, // 0x60: sd t9,184(sp) |
722 | 0xffbe00c0, // 0x64: sd fp,192(sp) |
723 | 0xffbf00c8, // 0x68: sd ra,200(sp) |
724 | |
725 | // JIT re-entry ctx addr. |
726 | 0x00000000, // 0x6c: lui $a0,heighest(ctx) |
727 | 0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx) |
728 | 0x00000000, // 0x74: dsll $a0,$a0,16 |
729 | 0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx) |
730 | 0x00000000, // 0x7c: dsll $a0,$a0,16 |
731 | 0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx) |
732 | |
733 | 0x03e02825, // 0x84: move $a1, $ra |
734 | 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36 |
735 | |
736 | // JIT re-entry fn addr: |
737 | 0x00000000, // 0x8c: lui $t9,reentry |
738 | 0x00000000, // 0x90: daddiu $t9,$t9,reentry |
739 | 0x00000000, // 0x94: dsll $t9,$t9, |
740 | 0x00000000, // 0x98: daddiu $t9,$t9, |
741 | 0x00000000, // 0x9c: dsll $t9,$t9, |
742 | 0x00000000, // 0xa0: daddiu $t9,$t9, |
743 | 0x0320f809, // 0xa4: jalr $t9 |
744 | 0x00000000, // 0xa8: nop |
745 | 0xdfbf00c8, // 0xac: ld ra, 200(sp) |
746 | 0xdfbe00c0, // 0xb0: ld fp, 192(sp) |
747 | 0xdfb900b8, // 0xb4: ld t9, 184(sp) |
748 | 0xdfb800b0, // 0xb8: ld t8, 176(sp) |
749 | 0xdfb700a8, // 0xbc: ld s7, 168(sp) |
750 | 0xdfb600a0, // 0xc0: ld s6, 160(sp) |
751 | 0xdfb50098, // 0xc4: ld s5, 152(sp) |
752 | 0xdfb40090, // 0xc8: ld s4, 144(sp) |
753 | 0xdfb30088, // 0xcc: ld s3, 136(sp) |
754 | 0xdfb20080, // 0xd0: ld s2, 128(sp) |
755 | 0xdfb10078, // 0xd4: ld s1, 120(sp) |
756 | 0xdfb00070, // 0xd8: ld s0, 112(sp) |
757 | 0xdfaf0068, // 0xdc: ld t3, 104(sp) |
758 | 0xdfae0060, // 0xe0: ld t2, 96(sp) |
759 | 0xdfad0058, // 0xe4: ld t1, 88(sp) |
760 | 0xdfac0050, // 0xe8: ld t0, 80(sp) |
761 | 0xdfab0048, // 0xec: ld a7, 72(sp) |
762 | 0xdfaa0040, // 0xf0: ld a6, 64(sp) |
763 | 0xdfa90038, // 0xf4: ld a5, 56(sp) |
764 | 0xdfa80030, // 0xf8: ld a4, 48(sp) |
765 | 0xdfa70028, // 0xfc: ld a3, 40(sp) |
766 | 0xdfa60020, // 0x100: ld a2, 32(sp) |
767 | 0xdfa50018, // 0x104: ld a1, 24(sp) |
768 | 0xdfa40010, // 0x108: ld a0, 16(sp) |
769 | 0xdfa30008, // 0x10c: ld v1, 8(sp) |
770 | 0x67bd00d0, // 0x110: daddiu $sp,$sp,208 |
771 | 0x0300f825, // 0x114: move $ra, $t8 |
772 | 0x03200008, // 0x118: jr $t9 |
773 | 0x0040c825, // 0x11c: move $t9, $v0 |
774 | }; |
775 | |
776 | const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui |
777 | const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui |
778 | |
779 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
780 | |
781 | uint32_t ReentryCtxLUi = |
782 | 0x3c040000 | |
783 | (((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF); |
784 | uint32_t ReentryCtxDADDiu = |
785 | 0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF); |
786 | uint32_t ReentryCtxDSLL = 0x00042438; |
787 | uint32_t ReentryCtxDADDiu2 = |
788 | 0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF)); |
789 | uint32_t ReentryCtxDSLL2 = 0x00042438; |
790 | uint32_t ReentryCtxDADDiu3 = |
791 | 0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF); |
792 | |
793 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxLUi, |
794 | n: sizeof(ReentryCtxLUi)); |
795 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 4), src: &ReentryCtxDADDiu, |
796 | n: sizeof(ReentryCtxDADDiu)); |
797 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 8), src: &ReentryCtxDSLL, |
798 | n: sizeof(ReentryCtxDSLL)); |
799 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 12), src: &ReentryCtxDADDiu2, |
800 | n: sizeof(ReentryCtxDADDiu2)); |
801 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 16), src: &ReentryCtxDSLL2, |
802 | n: sizeof(ReentryCtxDSLL2)); |
803 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 20), src: &ReentryCtxDADDiu3, |
804 | n: sizeof(ReentryCtxDADDiu3)); |
805 | |
806 | uint32_t ReentryFnLUi = |
807 | 0x3c190000 | |
808 | (((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF); |
809 | |
810 | uint32_t ReentryFnDADDiu = |
811 | 0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF); |
812 | |
813 | uint32_t ReentryFnDSLL = 0x0019cc38; |
814 | |
815 | uint32_t ReentryFnDADDiu2 = |
816 | 0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
817 | |
818 | uint32_t ReentryFnDSLL2 = 0x0019cc38; |
819 | |
820 | uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF); |
821 | |
822 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnLUi, |
823 | n: sizeof(ReentryFnLUi)); |
824 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 4), src: &ReentryFnDADDiu, |
825 | n: sizeof(ReentryFnDADDiu)); |
826 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 8), src: &ReentryFnDSLL, |
827 | n: sizeof(ReentryFnDSLL)); |
828 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 12), src: &ReentryFnDADDiu2, |
829 | n: sizeof(ReentryFnDADDiu2)); |
830 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 16), src: &ReentryFnDSLL2, |
831 | n: sizeof(ReentryFnDSLL2)); |
832 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 20), src: &ReentryFnDADDiu3, |
833 | n: sizeof(ReentryFnDADDiu3)); |
834 | } |
835 | |
836 | void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem, |
837 | ExecutorAddr TrampolineBlockTargetAddress, |
838 | ExecutorAddr ResolverAddr, |
839 | unsigned NumTrampolines) { |
840 | |
841 | uint32_t *Trampolines = |
842 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
843 | |
844 | uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48); |
845 | uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32); |
846 | uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16); |
847 | |
848 | for (unsigned I = 0; I < NumTrampolines; ++I) { |
849 | Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra |
850 | Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr |
851 | Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr) |
852 | Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16 |
853 | Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) |
854 | Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16 |
855 | Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() & |
856 | 0xFFFF); // daddiu $t9,$t9,%lo(ptr) |
857 | Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9 |
858 | Trampolines[10 * I + 8] = 0x00000000; // nop |
859 | Trampolines[10 * I + 9] = 0x00000000; // nop |
860 | } |
861 | } |
862 | |
863 | void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem, |
864 | ExecutorAddr StubsBlockTargetAddress, |
865 | ExecutorAddr PointersBlockTargetAddress, |
866 | unsigned NumStubs) { |
867 | // Stub format is: |
868 | // |
869 | // .section __orc_stubs |
870 | // stub1: |
871 | // lui $t9,ptr1 |
872 | // dsll $t9,$t9,16 |
873 | // daddiu $t9,$t9,%hi(ptr) |
874 | // dsll $t9,$t9,16 |
875 | // ld $t9,%lo(ptr) |
876 | // jr $t9 |
877 | // stub2: |
878 | // lui $t9,ptr1 |
879 | // dsll $t9,$t9,16 |
880 | // daddiu $t9,$t9,%hi(ptr) |
881 | // dsll $t9,$t9,16 |
882 | // ld $t9,%lo(ptr) |
883 | // jr $t9 |
884 | // |
885 | // ... |
886 | // |
887 | // .section __orc_ptrs |
888 | // ptr1: |
889 | // .dword 0x0 |
890 | // ptr2: |
891 | // .dword 0x0 |
892 | // |
893 | // ... |
894 | |
895 | assert(stubAndPointerRangesOk<OrcMips64>( |
896 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
897 | "PointersBlock is out of range" ); |
898 | |
899 | // Populate the stubs page stubs and mark it executable. |
900 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
901 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
902 | |
903 | for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { |
904 | uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48); |
905 | uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32); |
906 | uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16); |
907 | Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1 |
908 | Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr) |
909 | Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16 |
910 | Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) |
911 | Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16 |
912 | Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr) |
913 | Stub[8 * I + 6] = 0x03200008; // jr $t9 |
914 | Stub[8 * I + 7] = 0x00000000; // nop |
915 | } |
916 | } |
917 | |
918 | void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, |
919 | ExecutorAddr ResolverTargetAddress, |
920 | ExecutorAddr ReentryFnAddr, |
921 | ExecutorAddr ReentryCtxAddr) { |
922 | |
923 | const uint32_t ResolverCode[] = { |
924 | 0xef810113, // 0x00: addi sp,sp,-264 |
925 | 0x00813023, // 0x04: sd s0,0(sp) |
926 | 0x00913423, // 0x08: sd s1,8(sp) |
927 | 0x01213823, // 0x0c: sd s2,16(sp) |
928 | 0x01313c23, // 0x10: sd s3,24(sp) |
929 | 0x03413023, // 0x14: sd s4,32(sp) |
930 | 0x03513423, // 0x18: sd s5,40(sp) |
931 | 0x03613823, // 0x1c: sd s6,48(sp) |
932 | 0x03713c23, // 0x20: sd s7,56(sp) |
933 | 0x05813023, // 0x24: sd s8,64(sp) |
934 | 0x05913423, // 0x28: sd s9,72(sp) |
935 | 0x05a13823, // 0x2c: sd s10,80(sp) |
936 | 0x05b13c23, // 0x30: sd s11,88(sp) |
937 | 0x06113023, // 0x34: sd ra,96(sp) |
938 | 0x06a13423, // 0x38: sd a0,104(sp) |
939 | 0x06b13823, // 0x3c: sd a1,112(sp) |
940 | 0x06c13c23, // 0x40: sd a2,120(sp) |
941 | 0x08d13023, // 0x44: sd a3,128(sp) |
942 | 0x08e13423, // 0x48: sd a4,136(sp) |
943 | 0x08f13823, // 0x4c: sd a5,144(sp) |
944 | 0x09013c23, // 0x50: sd a6,152(sp) |
945 | 0x0b113023, // 0x54: sd a7,160(sp) |
946 | 0x0a813427, // 0x58: fsd fs0,168(sp) |
947 | 0x0a913827, // 0x5c: fsd fs1,176(sp) |
948 | 0x0b213c27, // 0x60: fsd fs2,184(sp) |
949 | 0x0d313027, // 0x64: fsd fs3,192(sp) |
950 | 0x0d413427, // 0x68: fsd fs4,200(sp) |
951 | 0x0d513827, // 0x6c: fsd fs5,208(sp) |
952 | 0x0d613c27, // 0x70: fsd fs6,216(sp) |
953 | 0x0f713027, // 0x74: fsd fs7,224(sp) |
954 | 0x0f813427, // 0x78: fsd fs8,232(sp) |
955 | 0x0f913827, // 0x7c: fsd fs9,240(sp) |
956 | 0x0fa13c27, // 0x80: fsd fs10,248(sp) |
957 | 0x11b13027, // 0x84: fsd fs11,256(sp) |
958 | 0x00000517, // 0x88: auipc a0,0x0 |
959 | 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138 |
960 | 0x00030593, // 0x90: mv a1,t1 |
961 | 0xff458593, // 0x94: addi a1,a1,-12 |
962 | 0x00000617, // 0x98: auipc a2,0x0 |
963 | 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140 |
964 | 0x000600e7, // 0xa0: jalr a2 |
965 | 0x00050293, // 0xa4: mv t0,a0 |
966 | 0x00013403, // 0xa8: ld s0,0(sp) |
967 | 0x00813483, // 0xac: ld s1,8(sp) |
968 | 0x01013903, // 0xb0: ld s2,16(sp) |
969 | 0x01813983, // 0xb4: ld s3,24(sp) |
970 | 0x02013a03, // 0xb8: ld s4,32(sp) |
971 | 0x02813a83, // 0xbc: ld s5,40(sp) |
972 | 0x03013b03, // 0xc0: ld s6,48(sp) |
973 | 0x03813b83, // 0xc4: ld s7,56(sp) |
974 | 0x04013c03, // 0xc8: ld s8,64(sp) |
975 | 0x04813c83, // 0xcc: ld s9,72(sp) |
976 | 0x05013d03, // 0xd0: ld s10,80(sp) |
977 | 0x05813d83, // 0xd4: ld s11,88(sp) |
978 | 0x06013083, // 0xd8: ld ra,96(sp) |
979 | 0x06813503, // 0xdc: ld a0,104(sp) |
980 | 0x07013583, // 0xe0: ld a1,112(sp) |
981 | 0x07813603, // 0xe4: ld a2,120(sp) |
982 | 0x08013683, // 0xe8: ld a3,128(sp) |
983 | 0x08813703, // 0xec: ld a4,136(sp) |
984 | 0x09013783, // 0xf0: ld a5,144(sp) |
985 | 0x09813803, // 0xf4: ld a6,152(sp) |
986 | 0x0a013883, // 0xf8: ld a7,160(sp) |
987 | 0x0a813407, // 0xfc: fld fs0,168(sp) |
988 | 0x0b013487, // 0x100: fld fs1,176(sp) |
989 | 0x0b813907, // 0x104: fld fs2,184(sp) |
990 | 0x0c013987, // 0x108: fld fs3,192(sp) |
991 | 0x0c813a07, // 0x10c: fld fs4,200(sp) |
992 | 0x0d013a87, // 0x110: fld fs5,208(sp) |
993 | 0x0d813b07, // 0x114: fld fs6,216(sp) |
994 | 0x0e013b87, // 0x118: fld fs7,224(sp) |
995 | 0x0e813c07, // 0x11c: fld fs8,232(sp) |
996 | 0x0f013c87, // 0x120: fld fs9,240(sp) |
997 | 0x0f813d07, // 0x124: fld fs10,248(sp) |
998 | 0x10013d87, // 0x128: fld fs11,256(sp) |
999 | 0x10810113, // 0x12c: addi sp,sp,264 |
1000 | 0x00028067, // 0x130: jr t0 |
1001 | 0x12345678, // 0x134: padding to align at 8 byte |
1002 | 0x12345678, // 0x138: Lreentry_ctx_ptr: |
1003 | 0xdeadbeef, // 0x13c: .quad 0 |
1004 | 0x98765432, // 0x140: Lreentry_fn_ptr: |
1005 | 0xcafef00d // 0x144: .quad 0 |
1006 | }; |
1007 | |
1008 | const unsigned ReentryCtxAddrOffset = 0x138; |
1009 | const unsigned ReentryFnAddrOffset = 0x140; |
1010 | |
1011 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
1012 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
1013 | n: sizeof(uint64_t)); |
1014 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
1015 | n: sizeof(uint64_t)); |
1016 | } |
1017 | |
1018 | void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem, |
1019 | ExecutorAddr TrampolineBlockTargetAddress, |
1020 | ExecutorAddr ResolverAddr, |
1021 | unsigned NumTrampolines) { |
1022 | |
1023 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
1024 | |
1025 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
1026 | n: sizeof(uint64_t)); |
1027 | |
1028 | uint32_t *Trampolines = |
1029 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
1030 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
1031 | uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000; |
1032 | uint32_t Lo12 = OffsetToPtr - Hi20; |
1033 | Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) |
1034 | Trampolines[4 * I + 1] = |
1035 | 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) |
1036 | Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0 |
1037 | Trampolines[4 * I + 3] = 0xdeadface; // padding |
1038 | } |
1039 | } |
1040 | |
1041 | void OrcRiscv64::writeIndirectStubsBlock( |
1042 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
1043 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
1044 | // Stub format is: |
1045 | // |
1046 | // .section __orc_stubs |
1047 | // stub1: |
1048 | // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 |
1049 | // ld t0, %lo(t0) |
1050 | // jr t0 ; Jump to resolver |
1051 | // .quad 0 ; Pad to 16 bytes |
1052 | // stub2: |
1053 | // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 |
1054 | // ld t0, %lo(t0) |
1055 | // jr t0 ; Jump to resolver |
1056 | // .quad 0 |
1057 | // |
1058 | // ... |
1059 | // |
1060 | // .section __orc_ptrs |
1061 | // ptr1: |
1062 | // .quad 0x0 |
1063 | // ptr2: |
1064 | // .quad 0x0 |
1065 | // |
1066 | // ... |
1067 | |
1068 | assert(stubAndPointerRangesOk<OrcRiscv64>( |
1069 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
1070 | "PointersBlock is out of range" ); |
1071 | |
1072 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
1073 | |
1074 | for (unsigned I = 0; I < NumStubs; ++I) { |
1075 | uint64_t PtrDisplacement = |
1076 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
1077 | uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000; |
1078 | uint32_t Lo12 = PtrDisplacement - Hi20; |
1079 | Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) |
1080 | Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) |
1081 | Stub[4 * I + 2] = 0x00028067; // jr t0 |
1082 | Stub[4 * I + 3] = 0xfeedbeef; // padding |
1083 | PointersBlockTargetAddress += PointerSize; |
1084 | StubsBlockTargetAddress += StubSize; |
1085 | } |
1086 | } |
1087 | |
1088 | void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, |
1089 | ExecutorAddr ResolverTargetAddress, |
1090 | ExecutorAddr ReentryFnAddr, |
1091 | ExecutorAddr ReentryCtxAddr) { |
1092 | |
1093 | LLVM_DEBUG({ |
1094 | dbgs() << "Writing resolver code to " |
1095 | << formatv("{0:x16}" , ResolverTargetAddress) << "\n" ; |
1096 | }); |
1097 | |
1098 | const uint32_t ResolverCode[] = { |
1099 | 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78) |
1100 | 0x29c00061, // 0x4: st.d $ra, $sp, 0 |
1101 | 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8) |
1102 | 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10) |
1103 | 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18) |
1104 | 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20) |
1105 | 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28) |
1106 | 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30) |
1107 | 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38) |
1108 | 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40) |
1109 | 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48) |
1110 | 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50) |
1111 | 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58) |
1112 | 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60) |
1113 | 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68) |
1114 | 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70) |
1115 | 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78) |
1116 | 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80) |
1117 | 0x1c000004, // 0x48: pcaddu12i $a0, 0 |
1118 | 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70) |
1119 | 0x001501a5, // 0x50: move $a1, $t1 |
1120 | 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4) |
1121 | 0x1c000006, // 0x58: pcaddu12i $a2, 0 |
1122 | 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68) |
1123 | 0x4c0000c1, // 0x60: jirl $ra, $a2, 0 |
1124 | 0x0015008c, // 0x64: move $t0, $a0 |
1125 | 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80) |
1126 | 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78) |
1127 | 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70) |
1128 | 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68) |
1129 | 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60) |
1130 | 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58) |
1131 | 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50) |
1132 | 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48) |
1133 | 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40) |
1134 | 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38) |
1135 | 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30) |
1136 | 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28) |
1137 | 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20) |
1138 | 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18) |
1139 | 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10) |
1140 | 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8) |
1141 | 0x28c00061, // 0xa8: ld.d $ra, $sp, 0 |
1142 | 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88) |
1143 | 0x4c000180, // 0xb0: jr $t0 |
1144 | 0x00000000, // 0xb4: padding to align at 8 bytes |
1145 | 0x01234567, // 0xb8: Lreentry_ctx_ptr: |
1146 | 0xdeedbeef, // 0xbc: .dword 0 |
1147 | 0x98765432, // 0xc0: Lreentry_fn_ptr: |
1148 | 0xcafef00d, // 0xc4: .dword 0 |
1149 | }; |
1150 | |
1151 | const unsigned ReentryCtxAddrOffset = 0xb8; |
1152 | const unsigned ReentryFnAddrOffset = 0xc0; |
1153 | |
1154 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
1155 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
1156 | n: sizeof(uint64_t)); |
1157 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
1158 | n: sizeof(uint64_t)); |
1159 | } |
1160 | |
1161 | void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem, |
1162 | ExecutorAddr TrampolineBlockTargetAddress, |
1163 | ExecutorAddr ResolverAddr, |
1164 | unsigned NumTrampolines) { |
1165 | |
1166 | LLVM_DEBUG({ |
1167 | dbgs() << "Writing trampoline code to " |
1168 | << formatv("{0:x16}" , TrampolineBlockTargetAddress) << "\n" ; |
1169 | }); |
1170 | |
1171 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
1172 | |
1173 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
1174 | n: sizeof(uint64_t)); |
1175 | |
1176 | uint32_t *Trampolines = |
1177 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
1178 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
1179 | uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000; |
1180 | uint32_t Lo12 = OffsetToPtr - Hi20; |
1181 | Trampolines[4 * I + 0] = |
1182 | 0x1c00000c | |
1183 | (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr) |
1184 | Trampolines[4 * I + 1] = |
1185 | 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) |
1186 | Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0 |
1187 | Trampolines[4 * I + 3] = 0x0; // padding |
1188 | } |
1189 | } |
1190 | |
1191 | void OrcLoongArch64::writeIndirectStubsBlock( |
1192 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
1193 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
1194 | // Stub format is: |
1195 | // |
1196 | // .section __orc_stubs |
1197 | // stub1: |
1198 | // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1 |
1199 | // ld.d $t0, $t0, %pc_lo12(ptr1) |
1200 | // jr $t0 ; Jump to resolver |
1201 | // .dword 0 ; Pad to 16 bytes |
1202 | // stub2: |
1203 | // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2 |
1204 | // ld.d $t0, $t0, %pc_lo12(ptr2) |
1205 | // jr $t0 ; Jump to resolver |
1206 | // .dword 0 ; Pad to 16 bytes |
1207 | // ... |
1208 | // |
1209 | // .section __orc_ptrs |
1210 | // ptr1: |
1211 | // .dword 0x0 |
1212 | // ptr2: |
1213 | // .dword 0x0 |
1214 | // ... |
1215 | LLVM_DEBUG({ |
1216 | dbgs() << "Writing stubs code to " |
1217 | << formatv("{0:x16}" , StubsBlockTargetAddress) << "\n" ; |
1218 | }); |
1219 | assert(stubAndPointerRangesOk<OrcLoongArch64>( |
1220 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
1221 | "PointersBlock is out of range" ); |
1222 | |
1223 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
1224 | |
1225 | for (unsigned I = 0; I < NumStubs; ++I) { |
1226 | uint64_t PtrDisplacement = |
1227 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
1228 | uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000; |
1229 | uint32_t Lo12 = PtrDisplacement - Hi20; |
1230 | Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff) |
1231 | << 5); // pcaddu12i $t0, %pc_hi20(Lptr) |
1232 | Stub[4 * I + 1] = |
1233 | 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) |
1234 | Stub[4 * I + 2] = 0x4c000180; // jr $t0 |
1235 | Stub[4 * I + 3] = 0x0; // padding |
1236 | PointersBlockTargetAddress += PointerSize; |
1237 | StubsBlockTargetAddress += StubSize; |
1238 | } |
1239 | } |
1240 | |
1241 | } // End namespace orc. |
1242 | } // End namespace llvm. |
1243 | |