1 | //===------------- OrcABISupport.cpp - ABI specific support code ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" |
10 | #include "llvm/Support/FormatVariadic.h" |
11 | #include "llvm/Support/raw_ostream.h" |
12 | |
13 | #define DEBUG_TYPE "orc" |
14 | |
15 | using namespace llvm; |
16 | using namespace llvm::orc; |
17 | |
18 | template <typename ORCABI> |
19 | static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr, |
20 | ExecutorAddr PointerBlockAddr, |
21 | unsigned NumStubs) { |
22 | constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement; |
23 | ExecutorAddr FirstStub = StubBlockAddr; |
24 | ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize); |
25 | ExecutorAddr FirstPointer = PointerBlockAddr; |
26 | ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize); |
27 | |
28 | if (FirstStub < FirstPointer) { |
29 | if (LastStub >= FirstPointer) |
30 | return false; // Ranges overlap. |
31 | return (FirstPointer - FirstStub <= MaxDisp) && |
32 | (LastPointer - LastStub <= MaxDisp); // out-of-range. |
33 | } |
34 | |
35 | if (LastPointer >= FirstStub) |
36 | return false; // Ranges overlap. |
37 | |
38 | return (FirstStub - FirstPointer <= MaxDisp) && |
39 | (LastStub - LastPointer <= MaxDisp); |
40 | } |
41 | |
42 | namespace llvm { |
43 | namespace orc { |
44 | |
45 | void OrcAArch64::writeResolverCode(char *ResolverWorkingMem, |
46 | ExecutorAddr ResolverTargetAddress, |
47 | ExecutorAddr ReentryFnAddr, |
48 | ExecutorAddr ReentryCtxAddr) { |
49 | |
50 | const uint32_t ResolverCode[] = { |
51 | // resolver_entry: |
52 | 0xa9bf47fd, // 0x000: stp x29, x17, [sp, #-16]! |
53 | 0x910003fd, // 0x004: mov x29, sp |
54 | 0xa9bf73fb, // 0x008: stp x27, x28, [sp, #-16]! |
55 | 0xa9bf6bf9, // 0x00c: stp x25, x26, [sp, #-16]! |
56 | 0xa9bf63f7, // 0x010: stp x23, x24, [sp, #-16]! |
57 | 0xa9bf5bf5, // 0x014: stp x21, x22, [sp, #-16]! |
58 | 0xa9bf53f3, // 0x018: stp x19, x20, [sp, #-16]! |
59 | 0xa9bf3fee, // 0x01c: stp x14, x15, [sp, #-16]! |
60 | 0xa9bf37ec, // 0x020: stp x12, x13, [sp, #-16]! |
61 | 0xa9bf2fea, // 0x024: stp x10, x11, [sp, #-16]! |
62 | 0xa9bf27e8, // 0x028: stp x8, x9, [sp, #-16]! |
63 | 0xa9bf1fe6, // 0x02c: stp x6, x7, [sp, #-16]! |
64 | 0xa9bf17e4, // 0x030: stp x4, x5, [sp, #-16]! |
65 | 0xa9bf0fe2, // 0x034: stp x2, x3, [sp, #-16]! |
66 | 0xa9bf07e0, // 0x038: stp x0, x1, [sp, #-16]! |
67 | 0xadbf7ffe, // 0x03c: stp q30, q31, [sp, #-32]! |
68 | 0xadbf77fc, // 0x040: stp q28, q29, [sp, #-32]! |
69 | 0xadbf6ffa, // 0x044: stp q26, q27, [sp, #-32]! |
70 | 0xadbf67f8, // 0x048: stp q24, q25, [sp, #-32]! |
71 | 0xadbf5ff6, // 0x04c: stp q22, q23, [sp, #-32]! |
72 | 0xadbf57f4, // 0x050: stp q20, q21, [sp, #-32]! |
73 | 0xadbf4ff2, // 0x054: stp q18, q19, [sp, #-32]! |
74 | 0xadbf47f0, // 0x058: stp q16, q17, [sp, #-32]! |
75 | 0xadbf3fee, // 0x05c: stp q14, q15, [sp, #-32]! |
76 | 0xadbf37ec, // 0x060: stp q12, q13, [sp, #-32]! |
77 | 0xadbf2fea, // 0x064: stp q10, q11, [sp, #-32]! |
78 | 0xadbf27e8, // 0x068: stp q8, q9, [sp, #-32]! |
79 | 0xadbf1fe6, // 0x06c: stp q6, q7, [sp, #-32]! |
80 | 0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]! |
81 | 0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]! |
82 | 0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]! |
83 | 0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr |
84 | 0xaa1e03e1, // 0x080: mov x1, x30 |
85 | 0xd1003021, // 0x084: sub x1, x1, #12 |
86 | 0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr |
87 | 0xd63f0040, // 0x08c: blr x2 |
88 | 0xaa0003f1, // 0x090: mov x17, x0 |
89 | 0xacc107e0, // 0x094: ldp q0, q1, [sp], #32 |
90 | 0xacc10fe2, // 0x098: ldp q2, q3, [sp], #32 |
91 | 0xacc117e4, // 0x09c: ldp q4, q5, [sp], #32 |
92 | 0xacc11fe6, // 0x0a0: ldp q6, q7, [sp], #32 |
93 | 0xacc127e8, // 0x0a4: ldp q8, q9, [sp], #32 |
94 | 0xacc12fea, // 0x0a8: ldp q10, q11, [sp], #32 |
95 | 0xacc137ec, // 0x0ac: ldp q12, q13, [sp], #32 |
96 | 0xacc13fee, // 0x0b0: ldp q14, q15, [sp], #32 |
97 | 0xacc147f0, // 0x0b4: ldp q16, q17, [sp], #32 |
98 | 0xacc14ff2, // 0x0b8: ldp q18, q19, [sp], #32 |
99 | 0xacc157f4, // 0x0bc: ldp q20, q21, [sp], #32 |
100 | 0xacc15ff6, // 0x0c0: ldp q22, q23, [sp], #32 |
101 | 0xacc167f8, // 0x0c4: ldp q24, q25, [sp], #32 |
102 | 0xacc16ffa, // 0x0c8: ldp q26, q27, [sp], #32 |
103 | 0xacc177fc, // 0x0cc: ldp q28, q29, [sp], #32 |
104 | 0xacc17ffe, // 0x0d0: ldp q30, q31, [sp], #32 |
105 | 0xa8c107e0, // 0x0d4: ldp x0, x1, [sp], #16 |
106 | 0xa8c10fe2, // 0x0d8: ldp x2, x3, [sp], #16 |
107 | 0xa8c117e4, // 0x0dc: ldp x4, x5, [sp], #16 |
108 | 0xa8c11fe6, // 0x0e0: ldp x6, x7, [sp], #16 |
109 | 0xa8c127e8, // 0x0e4: ldp x8, x9, [sp], #16 |
110 | 0xa8c12fea, // 0x0e8: ldp x10, x11, [sp], #16 |
111 | 0xa8c137ec, // 0x0ec: ldp x12, x13, [sp], #16 |
112 | 0xa8c13fee, // 0x0f0: ldp x14, x15, [sp], #16 |
113 | 0xa8c153f3, // 0x0f4: ldp x19, x20, [sp], #16 |
114 | 0xa8c15bf5, // 0x0f8: ldp x21, x22, [sp], #16 |
115 | 0xa8c163f7, // 0x0fc: ldp x23, x24, [sp], #16 |
116 | 0xa8c16bf9, // 0x100: ldp x25, x26, [sp], #16 |
117 | 0xa8c173fb, // 0x104: ldp x27, x28, [sp], #16 |
118 | 0xa8c17bfd, // 0x108: ldp x29, x30, [sp], #16 |
119 | 0xd65f0220, // 0x10c: ret x17 |
120 | 0x01234567, // 0x110: Lreentry_fn_ptr: |
121 | 0xdeadbeef, // 0x114: .quad 0 |
122 | 0x98765432, // 0x118: Lreentry_ctx_ptr: |
123 | 0xcafef00d // 0x11c: .quad 0 |
124 | }; |
125 | |
126 | const unsigned ReentryFnAddrOffset = 0x110; |
127 | const unsigned ReentryCtxAddrOffset = 0x118; |
128 | |
129 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
130 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
131 | n: sizeof(uint64_t)); |
132 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
133 | n: sizeof(uint64_t)); |
134 | } |
135 | |
136 | void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem, |
137 | ExecutorAddr TrampolineBlockTargetAddress, |
138 | ExecutorAddr ResolverAddr, |
139 | unsigned NumTrampolines) { |
140 | |
141 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
142 | |
143 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
144 | n: sizeof(uint64_t)); |
145 | |
146 | // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so |
147 | // subtract 32-bits. |
148 | OffsetToPtr -= 4; |
149 | |
150 | uint32_t *Trampolines = |
151 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
152 | |
153 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
154 | Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30 |
155 | Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // ldr x16, Lptr |
156 | Trampolines[3 * I + 2] = 0xd63f0200; // blr x16 |
157 | } |
158 | } |
159 | |
160 | void OrcAArch64::writeIndirectStubsBlock( |
161 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
162 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
163 | // Stub format is: |
164 | // |
165 | // .section __orc_stubs |
166 | // stub1: |
167 | // ldr x16, ptr1 ; PC-rel load of ptr1 |
168 | // br x16 ; Jump to resolver |
169 | // stub2: |
170 | // ldr x16, ptr2 ; PC-rel load of ptr2 |
171 | // br x16 ; Jump to resolver |
172 | // |
173 | // ... |
174 | // |
175 | // .section __orc_ptrs |
176 | // ptr1: |
177 | // .quad 0x0 |
178 | // ptr2: |
179 | // .quad 0x0 |
180 | // |
181 | // ... |
182 | |
183 | static_assert(StubSize == PointerSize, |
184 | "Pointer and stub size must match for algorithm below" ); |
185 | assert(stubAndPointerRangesOk<OrcAArch64>( |
186 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
187 | "PointersBlock is out of range" ); |
188 | uint64_t PtrDisplacement = |
189 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
190 | assert((PtrDisplacement % 8 == 0) && |
191 | "Displacement to pointer is not a multiple of 8" ); |
192 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
193 | uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5; |
194 | |
195 | for (unsigned I = 0; I < NumStubs; ++I) |
196 | Stub[I] = 0xd61f020058000010 | PtrOffsetField; |
197 | } |
198 | |
199 | void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem, |
200 | ExecutorAddr TrampolineBlockTargetAddress, |
201 | ExecutorAddr ResolverAddr, |
202 | unsigned NumTrampolines) { |
203 | |
204 | unsigned OffsetToPtr = NumTrampolines * TrampolineSize; |
205 | |
206 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
207 | n: sizeof(uint64_t)); |
208 | |
209 | uint64_t *Trampolines = |
210 | reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem); |
211 | uint64_t CallIndirPCRel = 0xf1c40000000015ff; |
212 | |
213 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) |
214 | Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16); |
215 | } |
216 | |
217 | void OrcX86_64_Base::writeIndirectStubsBlock( |
218 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
219 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
220 | // Stub format is: |
221 | // |
222 | // .section __orc_stubs |
223 | // stub1: |
224 | // jmpq *ptr1(%rip) |
225 | // .byte 0xC4 ; <- Invalid opcode padding. |
226 | // .byte 0xF1 |
227 | // stub2: |
228 | // jmpq *ptr2(%rip) |
229 | // |
230 | // ... |
231 | // |
232 | // .section __orc_ptrs |
233 | // ptr1: |
234 | // .quad 0x0 |
235 | // ptr2: |
236 | // .quad 0x0 |
237 | // |
238 | // ... |
239 | |
240 | // Populate the stubs page stubs and mark it executable. |
241 | static_assert(StubSize == PointerSize, |
242 | "Pointer and stub size must match for algorithm below" ); |
243 | assert(stubAndPointerRangesOk<OrcX86_64_Base>( |
244 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
245 | "PointersBlock is out of range" ); |
246 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
247 | uint64_t PtrOffsetField = |
248 | (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16; |
249 | for (unsigned I = 0; I < NumStubs; ++I) |
250 | Stub[I] = 0xF1C40000000025ff | PtrOffsetField; |
251 | } |
252 | |
253 | void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem, |
254 | ExecutorAddr ResolverTargetAddress, |
255 | ExecutorAddr ReentryFnAddr, |
256 | ExecutorAddr ReentryCtxAddr) { |
257 | |
258 | LLVM_DEBUG({ |
259 | dbgs() << "Writing resolver code to " |
260 | << formatv("{0:x16}" , ResolverTargetAddress) << "\n" ; |
261 | }); |
262 | |
263 | const uint8_t ResolverCode[] = { |
264 | // resolver_entry: |
265 | 0x55, // 0x00: pushq %rbp |
266 | 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp |
267 | 0x50, // 0x04: pushq %rax |
268 | 0x53, // 0x05: pushq %rbx |
269 | 0x51, // 0x06: pushq %rcx |
270 | 0x52, // 0x07: pushq %rdx |
271 | 0x56, // 0x08: pushq %rsi |
272 | 0x57, // 0x09: pushq %rdi |
273 | 0x41, 0x50, // 0x0a: pushq %r8 |
274 | 0x41, 0x51, // 0x0c: pushq %r9 |
275 | 0x41, 0x52, // 0x0e: pushq %r10 |
276 | 0x41, 0x53, // 0x10: pushq %r11 |
277 | 0x41, 0x54, // 0x12: pushq %r12 |
278 | 0x41, 0x55, // 0x14: pushq %r13 |
279 | 0x41, 0x56, // 0x16: pushq %r14 |
280 | 0x41, 0x57, // 0x18: pushq %r15 |
281 | 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp |
282 | 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) |
283 | 0x48, 0xbf, // 0x26: movabsq <CBMgr>, %rdi |
284 | |
285 | // 0x28: JIT re-entry ctx addr. |
286 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
287 | |
288 | 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi |
289 | 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi |
290 | 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax |
291 | |
292 | // 0x3a: JIT re-entry fn addr: |
293 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
294 | |
295 | 0xff, 0xd0, // 0x42: callq *%rax |
296 | 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp) |
297 | 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp) |
298 | 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp |
299 | 0x41, 0x5f, // 0x54: popq %r15 |
300 | 0x41, 0x5e, // 0x56: popq %r14 |
301 | 0x41, 0x5d, // 0x58: popq %r13 |
302 | 0x41, 0x5c, // 0x5a: popq %r12 |
303 | 0x41, 0x5b, // 0x5c: popq %r11 |
304 | 0x41, 0x5a, // 0x5e: popq %r10 |
305 | 0x41, 0x59, // 0x60: popq %r9 |
306 | 0x41, 0x58, // 0x62: popq %r8 |
307 | 0x5f, // 0x64: popq %rdi |
308 | 0x5e, // 0x65: popq %rsi |
309 | 0x5a, // 0x66: popq %rdx |
310 | 0x59, // 0x67: popq %rcx |
311 | 0x5b, // 0x68: popq %rbx |
312 | 0x58, // 0x69: popq %rax |
313 | 0x5d, // 0x6a: popq %rbp |
314 | 0xc3, // 0x6b: retq |
315 | }; |
316 | |
317 | const unsigned ReentryFnAddrOffset = 0x3a; |
318 | const unsigned ReentryCtxAddrOffset = 0x28; |
319 | |
320 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
321 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
322 | n: sizeof(uint64_t)); |
323 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
324 | n: sizeof(uint64_t)); |
325 | } |
326 | |
327 | void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem, |
328 | ExecutorAddr ResolverTargetAddress, |
329 | ExecutorAddr ReentryFnAddr, |
330 | ExecutorAddr ReentryCtxAddr) { |
331 | |
332 | // resolverCode is similar to OrcX86_64 with differences specific to windows |
333 | // x64 calling convention: arguments go into rcx, rdx and come in reverse |
334 | // order, shadow space allocation on stack |
335 | const uint8_t ResolverCode[] = { |
336 | // resolver_entry: |
337 | 0x55, // 0x00: pushq %rbp |
338 | 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp |
339 | 0x50, // 0x04: pushq %rax |
340 | 0x53, // 0x05: pushq %rbx |
341 | 0x51, // 0x06: pushq %rcx |
342 | 0x52, // 0x07: pushq %rdx |
343 | 0x56, // 0x08: pushq %rsi |
344 | 0x57, // 0x09: pushq %rdi |
345 | 0x41, 0x50, // 0x0a: pushq %r8 |
346 | 0x41, 0x51, // 0x0c: pushq %r9 |
347 | 0x41, 0x52, // 0x0e: pushq %r10 |
348 | 0x41, 0x53, // 0x10: pushq %r11 |
349 | 0x41, 0x54, // 0x12: pushq %r12 |
350 | 0x41, 0x55, // 0x14: pushq %r13 |
351 | 0x41, 0x56, // 0x16: pushq %r14 |
352 | 0x41, 0x57, // 0x18: pushq %r15 |
353 | 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp |
354 | 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) |
355 | |
356 | 0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx |
357 | // 0x28: JIT re-entry ctx addr. |
358 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
359 | |
360 | 0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8] |
361 | 0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6 |
362 | |
363 | 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax |
364 | // 0x3a: JIT re-entry fn addr: |
365 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
366 | |
367 | // 0x42: sub rsp, 0x20 (Allocate shadow space) |
368 | 0x48, 0x83, 0xEC, 0x20, |
369 | 0xff, 0xd0, // 0x46: callq *%rax |
370 | |
371 | // 0x48: add rsp, 0x20 (Free shadow space) |
372 | 0x48, 0x83, 0xC4, 0x20, |
373 | |
374 | 0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp) |
375 | 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp) |
376 | 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp |
377 | 0x41, 0x5f, // 0x5C: popq %r15 |
378 | 0x41, 0x5e, // 0x5E: popq %r14 |
379 | 0x41, 0x5d, // 0x60: popq %r13 |
380 | 0x41, 0x5c, // 0x62: popq %r12 |
381 | 0x41, 0x5b, // 0x64: popq %r11 |
382 | 0x41, 0x5a, // 0x66: popq %r10 |
383 | 0x41, 0x59, // 0x68: popq %r9 |
384 | 0x41, 0x58, // 0x6a: popq %r8 |
385 | 0x5f, // 0x6c: popq %rdi |
386 | 0x5e, // 0x6d: popq %rsi |
387 | 0x5a, // 0x6e: popq %rdx |
388 | 0x59, // 0x6f: popq %rcx |
389 | 0x5b, // 0x70: popq %rbx |
390 | 0x58, // 0x71: popq %rax |
391 | 0x5d, // 0x72: popq %rbp |
392 | 0xc3, // 0x73: retq |
393 | }; |
394 | |
395 | const unsigned ReentryFnAddrOffset = 0x3a; |
396 | const unsigned ReentryCtxAddrOffset = 0x28; |
397 | |
398 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
399 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
400 | n: sizeof(uint64_t)); |
401 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
402 | n: sizeof(uint64_t)); |
403 | } |
404 | |
405 | void OrcI386::writeResolverCode(char *ResolverWorkingMem, |
406 | ExecutorAddr ResolverTargetAddress, |
407 | ExecutorAddr ReentryFnAddr, |
408 | ExecutorAddr ReentryCtxAddr) { |
409 | |
410 | assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range" ); |
411 | assert((ReentryCtxAddr.getValue() >> 32) == 0 && |
412 | "ReentryCtxAddr out of range" ); |
413 | |
414 | const uint8_t ResolverCode[] = { |
415 | // resolver_entry: |
416 | 0x55, // 0x00: pushl %ebp |
417 | 0x89, 0xe5, // 0x01: movl %esp, %ebp |
418 | 0x54, // 0x03: pushl %esp |
419 | 0x83, 0xe4, 0xf0, // 0x04: andl $-0x10, %esp |
420 | 0x50, // 0x07: pushl %eax |
421 | 0x53, // 0x08: pushl %ebx |
422 | 0x51, // 0x09: pushl %ecx |
423 | 0x52, // 0x0a: pushl %edx |
424 | 0x56, // 0x0b: pushl %esi |
425 | 0x57, // 0x0c: pushl %edi |
426 | 0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl $0x218, %esp |
427 | 0x0f, 0xae, 0x44, 0x24, 0x10, // 0x13: fxsave 0x10(%esp) |
428 | 0x8b, 0x75, 0x04, // 0x18: movl 0x4(%ebp), %esi |
429 | 0x83, 0xee, 0x05, // 0x1b: subl $0x5, %esi |
430 | 0x89, 0x74, 0x24, 0x04, // 0x1e: movl %esi, 0x4(%esp) |
431 | 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, |
432 | 0x00, // 0x22: movl <cbmgr>, (%esp) |
433 | 0xb8, 0x00, 0x00, 0x00, 0x00, // 0x29: movl <reentry>, %eax |
434 | 0xff, 0xd0, // 0x2e: calll *%eax |
435 | 0x89, 0x45, 0x04, // 0x30: movl %eax, 0x4(%ebp) |
436 | 0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x33: fxrstor 0x10(%esp) |
437 | 0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl $0x218, %esp |
438 | 0x5f, // 0x3e: popl %edi |
439 | 0x5e, // 0x3f: popl %esi |
440 | 0x5a, // 0x40: popl %edx |
441 | 0x59, // 0x41: popl %ecx |
442 | 0x5b, // 0x42: popl %ebx |
443 | 0x58, // 0x43: popl %eax |
444 | 0x8b, 0x65, 0xfc, // 0x44: movl -0x4(%ebp), %esp |
445 | 0x5d, // 0x48: popl %ebp |
446 | 0xc3 // 0x49: retl |
447 | }; |
448 | |
449 | const unsigned ReentryFnAddrOffset = 0x2a; |
450 | const unsigned ReentryCtxAddrOffset = 0x25; |
451 | |
452 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
453 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
454 | n: sizeof(uint32_t)); |
455 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
456 | n: sizeof(uint32_t)); |
457 | } |
458 | |
459 | void OrcI386::writeTrampolines(char *TrampolineWorkingMem, |
460 | ExecutorAddr TrampolineBlockTargetAddress, |
461 | ExecutorAddr ResolverAddr, |
462 | unsigned NumTrampolines) { |
463 | assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range" ); |
464 | |
465 | uint64_t CallRelImm = 0xF1C4C400000000e8; |
466 | uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5; |
467 | |
468 | uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem); |
469 | for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize) |
470 | Trampolines[I] = CallRelImm | (ResolverRel << 8); |
471 | } |
472 | |
473 | void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem, |
474 | ExecutorAddr StubsBlockTargetAddress, |
475 | ExecutorAddr PointersBlockTargetAddress, |
476 | unsigned NumStubs) { |
477 | assert((StubsBlockTargetAddress.getValue() >> 32) == 0 && |
478 | "StubsBlockTargetAddress is out of range" ); |
479 | assert((PointersBlockTargetAddress.getValue() >> 32) == 0 && |
480 | "PointersBlockTargetAddress is out of range" ); |
481 | |
482 | // Stub format is: |
483 | // |
484 | // .section __orc_stubs |
485 | // stub1: |
486 | // jmpq *ptr1 |
487 | // .byte 0xC4 ; <- Invalid opcode padding. |
488 | // .byte 0xF1 |
489 | // stub2: |
490 | // jmpq *ptr2 |
491 | // |
492 | // ... |
493 | // |
494 | // .section __orc_ptrs |
495 | // ptr1: |
496 | // .quad 0x0 |
497 | // ptr2: |
498 | // .quad 0x0 |
499 | // |
500 | // ... |
501 | |
502 | assert(stubAndPointerRangesOk<OrcI386>( |
503 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
504 | "PointersBlock is out of range" ); |
505 | |
506 | uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); |
507 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
508 | for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4) |
509 | Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16); |
510 | } |
511 | |
512 | void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem, |
513 | ExecutorAddr ResolverTargetAddress, |
514 | ExecutorAddr ReentryFnAddr, |
515 | ExecutorAddr ReentryCtxAddr, |
516 | bool isBigEndian) { |
517 | |
518 | const uint32_t ResolverCode[] = { |
519 | // resolver_entry: |
520 | 0x27bdff98, // 0x00: addiu $sp,$sp,-104 |
521 | 0xafa20000, // 0x04: sw $v0,0($sp) |
522 | 0xafa30004, // 0x08: sw $v1,4($sp) |
523 | 0xafa40008, // 0x0c: sw $a0,8($sp) |
524 | 0xafa5000c, // 0x10: sw $a1,12($sp) |
525 | 0xafa60010, // 0x14: sw $a2,16($sp) |
526 | 0xafa70014, // 0x18: sw $a3,20($sp) |
527 | 0xafb00018, // 0x1c: sw $s0,24($sp) |
528 | 0xafb1001c, // 0x20: sw $s1,28($sp) |
529 | 0xafb20020, // 0x24: sw $s2,32($sp) |
530 | 0xafb30024, // 0x28: sw $s3,36($sp) |
531 | 0xafb40028, // 0x2c: sw $s4,40($sp) |
532 | 0xafb5002c, // 0x30: sw $s5,44($sp) |
533 | 0xafb60030, // 0x34: sw $s6,48($sp) |
534 | 0xafb70034, // 0x38: sw $s7,52($sp) |
535 | 0xafa80038, // 0x3c: sw $t0,56($sp) |
536 | 0xafa9003c, // 0x40: sw $t1,60($sp) |
537 | 0xafaa0040, // 0x44: sw $t2,64($sp) |
538 | 0xafab0044, // 0x48: sw $t3,68($sp) |
539 | 0xafac0048, // 0x4c: sw $t4,72($sp) |
540 | 0xafad004c, // 0x50: sw $t5,76($sp) |
541 | 0xafae0050, // 0x54: sw $t6,80($sp) |
542 | 0xafaf0054, // 0x58: sw $t7,84($sp) |
543 | 0xafb80058, // 0x5c: sw $t8,88($sp) |
544 | 0xafb9005c, // 0x60: sw $t9,92($sp) |
545 | 0xafbe0060, // 0x64: sw $fp,96($sp) |
546 | 0xafbf0064, // 0x68: sw $ra,100($sp) |
547 | |
548 | // JIT re-entry ctx addr. |
549 | 0x00000000, // 0x6c: lui $a0,ctx |
550 | 0x00000000, // 0x70: addiu $a0,$a0,ctx |
551 | |
552 | 0x03e02825, // 0x74: move $a1, $ra |
553 | 0x24a5ffec, // 0x78: addiu $a1,$a1,-20 |
554 | |
555 | // JIT re-entry fn addr: |
556 | 0x00000000, // 0x7c: lui $t9,reentry |
557 | 0x00000000, // 0x80: addiu $t9,$t9,reentry |
558 | |
559 | 0x0320f809, // 0x84: jalr $t9 |
560 | 0x00000000, // 0x88: nop |
561 | 0x8fbf0064, // 0x8c: lw $ra,100($sp) |
562 | 0x8fbe0060, // 0x90: lw $fp,96($sp) |
563 | 0x8fb9005c, // 0x94: lw $t9,92($sp) |
564 | 0x8fb80058, // 0x98: lw $t8,88($sp) |
565 | 0x8faf0054, // 0x9c: lw $t7,84($sp) |
566 | 0x8fae0050, // 0xa0: lw $t6,80($sp) |
567 | 0x8fad004c, // 0xa4: lw $t5,76($sp) |
568 | 0x8fac0048, // 0xa8: lw $t4,72($sp) |
569 | 0x8fab0044, // 0xac: lw $t3,68($sp) |
570 | 0x8faa0040, // 0xb0: lw $t2,64($sp) |
571 | 0x8fa9003c, // 0xb4: lw $t1,60($sp) |
572 | 0x8fa80038, // 0xb8: lw $t0,56($sp) |
573 | 0x8fb70034, // 0xbc: lw $s7,52($sp) |
574 | 0x8fb60030, // 0xc0: lw $s6,48($sp) |
575 | 0x8fb5002c, // 0xc4: lw $s5,44($sp) |
576 | 0x8fb40028, // 0xc8: lw $s4,40($sp) |
577 | 0x8fb30024, // 0xcc: lw $s3,36($sp) |
578 | 0x8fb20020, // 0xd0: lw $s2,32($sp) |
579 | 0x8fb1001c, // 0xd4: lw $s1,28($sp) |
580 | 0x8fb00018, // 0xd8: lw $s0,24($sp) |
581 | 0x8fa70014, // 0xdc: lw $a3,20($sp) |
582 | 0x8fa60010, // 0xe0: lw $a2,16($sp) |
583 | 0x8fa5000c, // 0xe4: lw $a1,12($sp) |
584 | 0x8fa40008, // 0xe8: lw $a0,8($sp) |
585 | 0x27bd0068, // 0xec: addiu $sp,$sp,104 |
586 | 0x0300f825, // 0xf0: move $ra, $t8 |
587 | 0x03200008, // 0xf4: jr $t9 |
588 | 0x00000000, // 0xf8: move $t9, $v0/v1 |
589 | }; |
590 | |
591 | const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui |
592 | const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui |
593 | const unsigned Offsett = 0xf8; |
594 | |
595 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
596 | |
597 | // Depending on endian return value will be in v0 or v1. |
598 | uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825; |
599 | memcpy(dest: ResolverWorkingMem + Offsett, src: &MoveVxT9, n: sizeof(MoveVxT9)); |
600 | |
601 | uint32_t ReentryCtxLUi = |
602 | 0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
603 | uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF); |
604 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxLUi, |
605 | n: sizeof(ReentryCtxLUi)); |
606 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset + 4, src: &ReentryCtxADDiu, |
607 | n: sizeof(ReentryCtxADDiu)); |
608 | |
609 | uint32_t ReentryFnLUi = |
610 | 0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
611 | uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF); |
612 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnLUi, |
613 | n: sizeof(ReentryFnLUi)); |
614 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset + 4, src: &ReentryFnADDiu, |
615 | n: sizeof(ReentryFnADDiu)); |
616 | } |
617 | |
618 | void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem, |
619 | ExecutorAddr TrampolineBlockTargetAddress, |
620 | ExecutorAddr ResolverAddr, |
621 | unsigned NumTrampolines) { |
622 | |
623 | assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range" ); |
624 | |
625 | uint32_t *Trampolines = |
626 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
627 | uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16); |
628 | |
629 | for (unsigned I = 0; I < NumTrampolines; ++I) { |
630 | // move $t8,$ra |
631 | // lui $t9,ResolverAddr |
632 | // addiu $t9,$t9,ResolverAddr |
633 | // jalr $t9 |
634 | // nop |
635 | Trampolines[5 * I + 0] = 0x03e0c025; |
636 | Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); |
637 | Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF); |
638 | Trampolines[5 * I + 3] = 0x0320f809; |
639 | Trampolines[5 * I + 4] = 0x00000000; |
640 | } |
641 | } |
642 | |
643 | void OrcMips32_Base::writeIndirectStubsBlock( |
644 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
645 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
646 | assert((StubsBlockTargetAddress.getValue() >> 32) == 0 && |
647 | "InitialPtrVal is out of range" ); |
648 | |
649 | // Stub format is: |
650 | // |
651 | // .section __orc_stubs |
652 | // stub1: |
653 | // lui $t9, ptr1 |
654 | // lw $t9, %lo(ptr1)($t9) |
655 | // jr $t9 |
656 | // stub2: |
657 | // lui $t9, ptr2 |
658 | // lw $t9,%lo(ptr1)($t9) |
659 | // jr $t9 |
660 | // |
661 | // ... |
662 | // |
663 | // .section __orc_ptrs |
664 | // ptr1: |
665 | // .word 0x0 |
666 | // ptr2: |
667 | // .word 0x0 |
668 | // |
669 | // i.. |
670 | |
671 | assert(stubAndPointerRangesOk<OrcMips32_Base>( |
672 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
673 | "PointersBlock is out of range" ); |
674 | |
675 | // Populate the stubs page stubs and mark it executable. |
676 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
677 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
678 | |
679 | for (unsigned I = 0; I < NumStubs; ++I) { |
680 | uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16); |
681 | Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1 |
682 | Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9) |
683 | Stub[4 * I + 2] = 0x03200008; // jr $t9 |
684 | Stub[4 * I + 3] = 0x00000000; // nop |
685 | PtrAddr += 4; |
686 | } |
687 | } |
688 | |
689 | void OrcMips64::writeResolverCode(char *ResolverWorkingMem, |
690 | ExecutorAddr ResolverTargetAddress, |
691 | ExecutorAddr ReentryFnAddr, |
692 | ExecutorAddr ReentryCtxAddr) { |
693 | |
694 | const uint32_t ResolverCode[] = { |
695 | //resolver_entry: |
696 | 0x67bdff30, // 0x00: daddiu $sp,$sp,-208 |
697 | 0xffa20000, // 0x04: sd v0,0(sp) |
698 | 0xffa30008, // 0x08: sd v1,8(sp) |
699 | 0xffa40010, // 0x0c: sd a0,16(sp) |
700 | 0xffa50018, // 0x10: sd a1,24(sp) |
701 | 0xffa60020, // 0x14: sd a2,32(sp) |
702 | 0xffa70028, // 0x18: sd a3,40(sp) |
703 | 0xffa80030, // 0x1c: sd a4,48(sp) |
704 | 0xffa90038, // 0x20: sd a5,56(sp) |
705 | 0xffaa0040, // 0x24: sd a6,64(sp) |
706 | 0xffab0048, // 0x28: sd a7,72(sp) |
707 | 0xffac0050, // 0x2c: sd t0,80(sp) |
708 | 0xffad0058, // 0x30: sd t1,88(sp) |
709 | 0xffae0060, // 0x34: sd t2,96(sp) |
710 | 0xffaf0068, // 0x38: sd t3,104(sp) |
711 | 0xffb00070, // 0x3c: sd s0,112(sp) |
712 | 0xffb10078, // 0x40: sd s1,120(sp) |
713 | 0xffb20080, // 0x44: sd s2,128(sp) |
714 | 0xffb30088, // 0x48: sd s3,136(sp) |
715 | 0xffb40090, // 0x4c: sd s4,144(sp) |
716 | 0xffb50098, // 0x50: sd s5,152(sp) |
717 | 0xffb600a0, // 0x54: sd s6,160(sp) |
718 | 0xffb700a8, // 0x58: sd s7,168(sp) |
719 | 0xffb800b0, // 0x5c: sd t8,176(sp) |
720 | 0xffb900b8, // 0x60: sd t9,184(sp) |
721 | 0xffbe00c0, // 0x64: sd fp,192(sp) |
722 | 0xffbf00c8, // 0x68: sd ra,200(sp) |
723 | |
724 | // JIT re-entry ctx addr. |
725 | 0x00000000, // 0x6c: lui $a0,heighest(ctx) |
726 | 0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx) |
727 | 0x00000000, // 0x74: dsll $a0,$a0,16 |
728 | 0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx) |
729 | 0x00000000, // 0x7c: dsll $a0,$a0,16 |
730 | 0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx) |
731 | |
732 | 0x03e02825, // 0x84: move $a1, $ra |
733 | 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36 |
734 | |
735 | // JIT re-entry fn addr: |
736 | 0x00000000, // 0x8c: lui $t9,reentry |
737 | 0x00000000, // 0x90: daddiu $t9,$t9,reentry |
738 | 0x00000000, // 0x94: dsll $t9,$t9, |
739 | 0x00000000, // 0x98: daddiu $t9,$t9, |
740 | 0x00000000, // 0x9c: dsll $t9,$t9, |
741 | 0x00000000, // 0xa0: daddiu $t9,$t9, |
742 | 0x0320f809, // 0xa4: jalr $t9 |
743 | 0x00000000, // 0xa8: nop |
744 | 0xdfbf00c8, // 0xac: ld ra, 200(sp) |
745 | 0xdfbe00c0, // 0xb0: ld fp, 192(sp) |
746 | 0xdfb900b8, // 0xb4: ld t9, 184(sp) |
747 | 0xdfb800b0, // 0xb8: ld t8, 176(sp) |
748 | 0xdfb700a8, // 0xbc: ld s7, 168(sp) |
749 | 0xdfb600a0, // 0xc0: ld s6, 160(sp) |
750 | 0xdfb50098, // 0xc4: ld s5, 152(sp) |
751 | 0xdfb40090, // 0xc8: ld s4, 144(sp) |
752 | 0xdfb30088, // 0xcc: ld s3, 136(sp) |
753 | 0xdfb20080, // 0xd0: ld s2, 128(sp) |
754 | 0xdfb10078, // 0xd4: ld s1, 120(sp) |
755 | 0xdfb00070, // 0xd8: ld s0, 112(sp) |
756 | 0xdfaf0068, // 0xdc: ld t3, 104(sp) |
757 | 0xdfae0060, // 0xe0: ld t2, 96(sp) |
758 | 0xdfad0058, // 0xe4: ld t1, 88(sp) |
759 | 0xdfac0050, // 0xe8: ld t0, 80(sp) |
760 | 0xdfab0048, // 0xec: ld a7, 72(sp) |
761 | 0xdfaa0040, // 0xf0: ld a6, 64(sp) |
762 | 0xdfa90038, // 0xf4: ld a5, 56(sp) |
763 | 0xdfa80030, // 0xf8: ld a4, 48(sp) |
764 | 0xdfa70028, // 0xfc: ld a3, 40(sp) |
765 | 0xdfa60020, // 0x100: ld a2, 32(sp) |
766 | 0xdfa50018, // 0x104: ld a1, 24(sp) |
767 | 0xdfa40010, // 0x108: ld a0, 16(sp) |
768 | 0xdfa30008, // 0x10c: ld v1, 8(sp) |
769 | 0x67bd00d0, // 0x110: daddiu $sp,$sp,208 |
770 | 0x0300f825, // 0x114: move $ra, $t8 |
771 | 0x03200008, // 0x118: jr $t9 |
772 | 0x0040c825, // 0x11c: move $t9, $v0 |
773 | }; |
774 | |
775 | const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui |
776 | const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui |
777 | |
778 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
779 | |
780 | uint32_t ReentryCtxLUi = |
781 | 0x3c040000 | |
782 | (((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF); |
783 | uint32_t ReentryCtxDADDiu = |
784 | 0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF); |
785 | uint32_t ReentryCtxDSLL = 0x00042438; |
786 | uint32_t ReentryCtxDADDiu2 = |
787 | 0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF)); |
788 | uint32_t ReentryCtxDSLL2 = 0x00042438; |
789 | uint32_t ReentryCtxDADDiu3 = |
790 | 0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF); |
791 | |
792 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxLUi, |
793 | n: sizeof(ReentryCtxLUi)); |
794 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 4), src: &ReentryCtxDADDiu, |
795 | n: sizeof(ReentryCtxDADDiu)); |
796 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 8), src: &ReentryCtxDSLL, |
797 | n: sizeof(ReentryCtxDSLL)); |
798 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 12), src: &ReentryCtxDADDiu2, |
799 | n: sizeof(ReentryCtxDADDiu2)); |
800 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 16), src: &ReentryCtxDSLL2, |
801 | n: sizeof(ReentryCtxDSLL2)); |
802 | memcpy(dest: ResolverWorkingMem + (ReentryCtxAddrOffset + 20), src: &ReentryCtxDADDiu3, |
803 | n: sizeof(ReentryCtxDADDiu3)); |
804 | |
805 | uint32_t ReentryFnLUi = |
806 | 0x3c190000 | |
807 | (((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF); |
808 | |
809 | uint32_t ReentryFnDADDiu = |
810 | 0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF); |
811 | |
812 | uint32_t ReentryFnDSLL = 0x0019cc38; |
813 | |
814 | uint32_t ReentryFnDADDiu2 = |
815 | 0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF); |
816 | |
817 | uint32_t ReentryFnDSLL2 = 0x0019cc38; |
818 | |
819 | uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF); |
820 | |
821 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnLUi, |
822 | n: sizeof(ReentryFnLUi)); |
823 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 4), src: &ReentryFnDADDiu, |
824 | n: sizeof(ReentryFnDADDiu)); |
825 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 8), src: &ReentryFnDSLL, |
826 | n: sizeof(ReentryFnDSLL)); |
827 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 12), src: &ReentryFnDADDiu2, |
828 | n: sizeof(ReentryFnDADDiu2)); |
829 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 16), src: &ReentryFnDSLL2, |
830 | n: sizeof(ReentryFnDSLL2)); |
831 | memcpy(dest: ResolverWorkingMem + (ReentryFnAddrOffset + 20), src: &ReentryFnDADDiu3, |
832 | n: sizeof(ReentryFnDADDiu3)); |
833 | } |
834 | |
835 | void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem, |
836 | ExecutorAddr TrampolineBlockTargetAddress, |
837 | ExecutorAddr ResolverAddr, |
838 | unsigned NumTrampolines) { |
839 | |
840 | uint32_t *Trampolines = |
841 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
842 | |
843 | uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48); |
844 | uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32); |
845 | uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16); |
846 | |
847 | for (unsigned I = 0; I < NumTrampolines; ++I) { |
848 | Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra |
849 | Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr |
850 | Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr) |
851 | Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16 |
852 | Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) |
853 | Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16 |
854 | Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() & |
855 | 0xFFFF); // daddiu $t9,$t9,%lo(ptr) |
856 | Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9 |
857 | Trampolines[10 * I + 8] = 0x00000000; // nop |
858 | Trampolines[10 * I + 9] = 0x00000000; // nop |
859 | } |
860 | } |
861 | |
862 | void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem, |
863 | ExecutorAddr StubsBlockTargetAddress, |
864 | ExecutorAddr PointersBlockTargetAddress, |
865 | unsigned NumStubs) { |
866 | // Stub format is: |
867 | // |
868 | // .section __orc_stubs |
869 | // stub1: |
870 | // lui $t9,ptr1 |
871 | // dsll $t9,$t9,16 |
872 | // daddiu $t9,$t9,%hi(ptr) |
873 | // dsll $t9,$t9,16 |
874 | // ld $t9,%lo(ptr) |
875 | // jr $t9 |
876 | // stub2: |
877 | // lui $t9,ptr1 |
878 | // dsll $t9,$t9,16 |
879 | // daddiu $t9,$t9,%hi(ptr) |
880 | // dsll $t9,$t9,16 |
881 | // ld $t9,%lo(ptr) |
882 | // jr $t9 |
883 | // |
884 | // ... |
885 | // |
886 | // .section __orc_ptrs |
887 | // ptr1: |
888 | // .dword 0x0 |
889 | // ptr2: |
890 | // .dword 0x0 |
891 | // |
892 | // ... |
893 | |
894 | assert(stubAndPointerRangesOk<OrcMips64>( |
895 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
896 | "PointersBlock is out of range" ); |
897 | |
898 | // Populate the stubs page stubs and mark it executable. |
899 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
900 | uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); |
901 | |
902 | for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { |
903 | uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48); |
904 | uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32); |
905 | uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16); |
906 | Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1 |
907 | Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr) |
908 | Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16 |
909 | Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) |
910 | Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16 |
911 | Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr) |
912 | Stub[8 * I + 6] = 0x03200008; // jr $t9 |
913 | Stub[8 * I + 7] = 0x00000000; // nop |
914 | } |
915 | } |
916 | |
917 | void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, |
918 | ExecutorAddr ResolverTargetAddress, |
919 | ExecutorAddr ReentryFnAddr, |
920 | ExecutorAddr ReentryCtxAddr) { |
921 | |
922 | const uint32_t ResolverCode[] = { |
923 | 0xef810113, // 0x00: addi sp,sp,-264 |
924 | 0x00813023, // 0x04: sd s0,0(sp) |
925 | 0x00913423, // 0x08: sd s1,8(sp) |
926 | 0x01213823, // 0x0c: sd s2,16(sp) |
927 | 0x01313c23, // 0x10: sd s3,24(sp) |
928 | 0x03413023, // 0x14: sd s4,32(sp) |
929 | 0x03513423, // 0x18: sd s5,40(sp) |
930 | 0x03613823, // 0x1c: sd s6,48(sp) |
931 | 0x03713c23, // 0x20: sd s7,56(sp) |
932 | 0x05813023, // 0x24: sd s8,64(sp) |
933 | 0x05913423, // 0x28: sd s9,72(sp) |
934 | 0x05a13823, // 0x2c: sd s10,80(sp) |
935 | 0x05b13c23, // 0x30: sd s11,88(sp) |
936 | 0x06113023, // 0x34: sd ra,96(sp) |
937 | 0x06a13423, // 0x38: sd a0,104(sp) |
938 | 0x06b13823, // 0x3c: sd a1,112(sp) |
939 | 0x06c13c23, // 0x40: sd a2,120(sp) |
940 | 0x08d13023, // 0x44: sd a3,128(sp) |
941 | 0x08e13423, // 0x48: sd a4,136(sp) |
942 | 0x08f13823, // 0x4c: sd a5,144(sp) |
943 | 0x09013c23, // 0x50: sd a6,152(sp) |
944 | 0x0b113023, // 0x54: sd a7,160(sp) |
945 | 0x0a813427, // 0x58: fsd fs0,168(sp) |
946 | 0x0a913827, // 0x5c: fsd fs1,176(sp) |
947 | 0x0b213c27, // 0x60: fsd fs2,184(sp) |
948 | 0x0d313027, // 0x64: fsd fs3,192(sp) |
949 | 0x0d413427, // 0x68: fsd fs4,200(sp) |
950 | 0x0d513827, // 0x6c: fsd fs5,208(sp) |
951 | 0x0d613c27, // 0x70: fsd fs6,216(sp) |
952 | 0x0f713027, // 0x74: fsd fs7,224(sp) |
953 | 0x0f813427, // 0x78: fsd fs8,232(sp) |
954 | 0x0f913827, // 0x7c: fsd fs9,240(sp) |
955 | 0x0fa13c27, // 0x80: fsd fs10,248(sp) |
956 | 0x11b13027, // 0x84: fsd fs11,256(sp) |
957 | 0x00000517, // 0x88: auipc a0,0x0 |
958 | 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138 |
959 | 0x00030593, // 0x90: mv a1,t1 |
960 | 0xff458593, // 0x94: addi a1,a1,-12 |
961 | 0x00000617, // 0x98: auipc a2,0x0 |
962 | 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140 |
963 | 0x000600e7, // 0xa0: jalr a2 |
964 | 0x00050293, // 0xa4: mv t0,a0 |
965 | 0x00013403, // 0xa8: ld s0,0(sp) |
966 | 0x00813483, // 0xac: ld s1,8(sp) |
967 | 0x01013903, // 0xb0: ld s2,16(sp) |
968 | 0x01813983, // 0xb4: ld s3,24(sp) |
969 | 0x02013a03, // 0xb8: ld s4,32(sp) |
970 | 0x02813a83, // 0xbc: ld s5,40(sp) |
971 | 0x03013b03, // 0xc0: ld s6,48(sp) |
972 | 0x03813b83, // 0xc4: ld s7,56(sp) |
973 | 0x04013c03, // 0xc8: ld s8,64(sp) |
974 | 0x04813c83, // 0xcc: ld s9,72(sp) |
975 | 0x05013d03, // 0xd0: ld s10,80(sp) |
976 | 0x05813d83, // 0xd4: ld s11,88(sp) |
977 | 0x06013083, // 0xd8: ld ra,96(sp) |
978 | 0x06813503, // 0xdc: ld a0,104(sp) |
979 | 0x07013583, // 0xe0: ld a1,112(sp) |
980 | 0x07813603, // 0xe4: ld a2,120(sp) |
981 | 0x08013683, // 0xe8: ld a3,128(sp) |
982 | 0x08813703, // 0xec: ld a4,136(sp) |
983 | 0x09013783, // 0xf0: ld a5,144(sp) |
984 | 0x09813803, // 0xf4: ld a6,152(sp) |
985 | 0x0a013883, // 0xf8: ld a7,160(sp) |
986 | 0x0a813407, // 0xfc: fld fs0,168(sp) |
987 | 0x0b013487, // 0x100: fld fs1,176(sp) |
988 | 0x0b813907, // 0x104: fld fs2,184(sp) |
989 | 0x0c013987, // 0x108: fld fs3,192(sp) |
990 | 0x0c813a07, // 0x10c: fld fs4,200(sp) |
991 | 0x0d013a87, // 0x110: fld fs5,208(sp) |
992 | 0x0d813b07, // 0x114: fld fs6,216(sp) |
993 | 0x0e013b87, // 0x118: fld fs7,224(sp) |
994 | 0x0e813c07, // 0x11c: fld fs8,232(sp) |
995 | 0x0f013c87, // 0x120: fld fs9,240(sp) |
996 | 0x0f813d07, // 0x124: fld fs10,248(sp) |
997 | 0x10013d87, // 0x128: fld fs11,256(sp) |
998 | 0x10810113, // 0x12c: addi sp,sp,264 |
999 | 0x00028067, // 0x130: jr t0 |
1000 | 0x12345678, // 0x134: padding to align at 8 byte |
1001 | 0x12345678, // 0x138: Lreentry_ctx_ptr: |
1002 | 0xdeadbeef, // 0x13c: .quad 0 |
1003 | 0x98765432, // 0x140: Lreentry_fn_ptr: |
1004 | 0xcafef00d // 0x144: .quad 0 |
1005 | }; |
1006 | |
1007 | const unsigned ReentryCtxAddrOffset = 0x138; |
1008 | const unsigned ReentryFnAddrOffset = 0x140; |
1009 | |
1010 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
1011 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
1012 | n: sizeof(uint64_t)); |
1013 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
1014 | n: sizeof(uint64_t)); |
1015 | } |
1016 | |
1017 | void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem, |
1018 | ExecutorAddr TrampolineBlockTargetAddress, |
1019 | ExecutorAddr ResolverAddr, |
1020 | unsigned NumTrampolines) { |
1021 | |
1022 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
1023 | |
1024 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
1025 | n: sizeof(uint64_t)); |
1026 | |
1027 | uint32_t *Trampolines = |
1028 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
1029 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
1030 | uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000; |
1031 | uint32_t Lo12 = OffsetToPtr - Hi20; |
1032 | Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) |
1033 | Trampolines[4 * I + 1] = |
1034 | 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) |
1035 | Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0 |
1036 | Trampolines[4 * I + 3] = 0xdeadface; // padding |
1037 | } |
1038 | } |
1039 | |
1040 | void OrcRiscv64::writeIndirectStubsBlock( |
1041 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
1042 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
1043 | // Stub format is: |
1044 | // |
1045 | // .section __orc_stubs |
1046 | // stub1: |
1047 | // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 |
1048 | // ld t0, %lo(t0) |
1049 | // jr t0 ; Jump to resolver |
1050 | // .quad 0 ; Pad to 16 bytes |
1051 | // stub2: |
1052 | // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 |
1053 | // ld t0, %lo(t0) |
1054 | // jr t0 ; Jump to resolver |
1055 | // .quad 0 |
1056 | // |
1057 | // ... |
1058 | // |
1059 | // .section __orc_ptrs |
1060 | // ptr1: |
1061 | // .quad 0x0 |
1062 | // ptr2: |
1063 | // .quad 0x0 |
1064 | // |
1065 | // ... |
1066 | |
1067 | assert(stubAndPointerRangesOk<OrcRiscv64>( |
1068 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
1069 | "PointersBlock is out of range" ); |
1070 | |
1071 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
1072 | |
1073 | for (unsigned I = 0; I < NumStubs; ++I) { |
1074 | uint64_t PtrDisplacement = |
1075 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
1076 | uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000; |
1077 | uint32_t Lo12 = PtrDisplacement - Hi20; |
1078 | Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) |
1079 | Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) |
1080 | Stub[4 * I + 2] = 0x00028067; // jr t0 |
1081 | Stub[4 * I + 3] = 0xfeedbeef; // padding |
1082 | PointersBlockTargetAddress += PointerSize; |
1083 | StubsBlockTargetAddress += StubSize; |
1084 | } |
1085 | } |
1086 | |
1087 | void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, |
1088 | ExecutorAddr ResolverTargetAddress, |
1089 | ExecutorAddr ReentryFnAddr, |
1090 | ExecutorAddr ReentryCtxAddr) { |
1091 | |
1092 | LLVM_DEBUG({ |
1093 | dbgs() << "Writing resolver code to " |
1094 | << formatv("{0:x16}" , ResolverTargetAddress) << "\n" ; |
1095 | }); |
1096 | |
1097 | const uint32_t ResolverCode[] = { |
1098 | 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78) |
1099 | 0x29c00061, // 0x4: st.d $ra, $sp, 0 |
1100 | 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8) |
1101 | 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10) |
1102 | 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18) |
1103 | 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20) |
1104 | 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28) |
1105 | 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30) |
1106 | 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38) |
1107 | 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40) |
1108 | 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48) |
1109 | 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50) |
1110 | 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58) |
1111 | 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60) |
1112 | 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68) |
1113 | 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70) |
1114 | 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78) |
1115 | 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80) |
1116 | 0x1c000004, // 0x48: pcaddu12i $a0, 0 |
1117 | 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70) |
1118 | 0x001501a5, // 0x50: move $a1, $t1 |
1119 | 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4) |
1120 | 0x1c000006, // 0x58: pcaddu12i $a2, 0 |
1121 | 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68) |
1122 | 0x4c0000c1, // 0x60: jirl $ra, $a2, 0 |
1123 | 0x0015008c, // 0x64: move $t0, $a0 |
1124 | 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80) |
1125 | 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78) |
1126 | 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70) |
1127 | 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68) |
1128 | 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60) |
1129 | 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58) |
1130 | 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50) |
1131 | 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48) |
1132 | 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40) |
1133 | 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38) |
1134 | 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30) |
1135 | 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28) |
1136 | 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20) |
1137 | 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18) |
1138 | 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10) |
1139 | 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8) |
1140 | 0x28c00061, // 0xa8: ld.d $ra, $sp, 0 |
1141 | 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88) |
1142 | 0x4c000180, // 0xb0: jr $t0 |
1143 | 0x00000000, // 0xb4: padding to align at 8 bytes |
1144 | 0x01234567, // 0xb8: Lreentry_ctx_ptr: |
1145 | 0xdeedbeef, // 0xbc: .dword 0 |
1146 | 0x98765432, // 0xc0: Lreentry_fn_ptr: |
1147 | 0xcafef00d, // 0xc4: .dword 0 |
1148 | }; |
1149 | |
1150 | const unsigned ReentryCtxAddrOffset = 0xb8; |
1151 | const unsigned ReentryFnAddrOffset = 0xc0; |
1152 | |
1153 | memcpy(dest: ResolverWorkingMem, src: ResolverCode, n: sizeof(ResolverCode)); |
1154 | memcpy(dest: ResolverWorkingMem + ReentryFnAddrOffset, src: &ReentryFnAddr, |
1155 | n: sizeof(uint64_t)); |
1156 | memcpy(dest: ResolverWorkingMem + ReentryCtxAddrOffset, src: &ReentryCtxAddr, |
1157 | n: sizeof(uint64_t)); |
1158 | } |
1159 | |
1160 | void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem, |
1161 | ExecutorAddr TrampolineBlockTargetAddress, |
1162 | ExecutorAddr ResolverAddr, |
1163 | unsigned NumTrampolines) { |
1164 | |
1165 | LLVM_DEBUG({ |
1166 | dbgs() << "Writing trampoline code to " |
1167 | << formatv("{0:x16}" , TrampolineBlockTargetAddress) << "\n" ; |
1168 | }); |
1169 | |
1170 | unsigned OffsetToPtr = alignTo(Value: NumTrampolines * TrampolineSize, Align: 8); |
1171 | |
1172 | memcpy(dest: TrampolineBlockWorkingMem + OffsetToPtr, src: &ResolverAddr, |
1173 | n: sizeof(uint64_t)); |
1174 | |
1175 | uint32_t *Trampolines = |
1176 | reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); |
1177 | for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { |
1178 | uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000; |
1179 | uint32_t Lo12 = OffsetToPtr - Hi20; |
1180 | Trampolines[4 * I + 0] = |
1181 | 0x1c00000c | |
1182 | (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr) |
1183 | Trampolines[4 * I + 1] = |
1184 | 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) |
1185 | Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0 |
1186 | Trampolines[4 * I + 3] = 0x0; // padding |
1187 | } |
1188 | } |
1189 | |
1190 | void OrcLoongArch64::writeIndirectStubsBlock( |
1191 | char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress, |
1192 | ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) { |
1193 | // Stub format is: |
1194 | // |
1195 | // .section __orc_stubs |
1196 | // stub1: |
1197 | // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1 |
1198 | // ld.d $t0, $t0, %pc_lo12(ptr1) |
1199 | // jr $t0 ; Jump to resolver |
1200 | // .dword 0 ; Pad to 16 bytes |
1201 | // stub2: |
1202 | // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2 |
1203 | // ld.d $t0, $t0, %pc_lo12(ptr2) |
1204 | // jr $t0 ; Jump to resolver |
1205 | // .dword 0 ; Pad to 16 bytes |
1206 | // ... |
1207 | // |
1208 | // .section __orc_ptrs |
1209 | // ptr1: |
1210 | // .dword 0x0 |
1211 | // ptr2: |
1212 | // .dword 0x0 |
1213 | // ... |
1214 | LLVM_DEBUG({ |
1215 | dbgs() << "Writing stubs code to " |
1216 | << formatv("{0:x16}" , StubsBlockTargetAddress) << "\n" ; |
1217 | }); |
1218 | assert(stubAndPointerRangesOk<OrcLoongArch64>( |
1219 | StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && |
1220 | "PointersBlock is out of range" ); |
1221 | |
1222 | uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); |
1223 | |
1224 | for (unsigned I = 0; I < NumStubs; ++I) { |
1225 | uint64_t PtrDisplacement = |
1226 | PointersBlockTargetAddress - StubsBlockTargetAddress; |
1227 | uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000; |
1228 | uint32_t Lo12 = PtrDisplacement - Hi20; |
1229 | Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff) |
1230 | << 5); // pcaddu12i $t0, %pc_hi20(Lptr) |
1231 | Stub[4 * I + 1] = |
1232 | 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) |
1233 | Stub[4 * I + 2] = 0x4c000180; // jr $t0 |
1234 | Stub[4 * I + 3] = 0x0; // padding |
1235 | PointersBlockTargetAddress += PointerSize; |
1236 | StubsBlockTargetAddress += StubSize; |
1237 | } |
1238 | } |
1239 | |
1240 | } // End namespace orc. |
1241 | } // End namespace llvm. |
1242 | |