| 1 | //===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
| 10 | // |
| 11 | // This implements the X86-specific assembler for the trampolines. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "../builtins/assembly.h" |
| 16 | #include "../sanitizer_common/sanitizer_asm.h" |
| 17 | |
| 18 | // XRay trampolines which are not produced by intrinsics are not System V AMD64 |
| 19 | // ABI compliant because they are called with a stack that is always misaligned |
| 20 | // by 8 bytes with respect to a 16 bytes alignment. This is because they are |
| 21 | // called immediately after the call to, or immediately before returning from, |
| 22 | // the function being instrumented. This saves space in the patch point, but |
| 23 | // misaligns the stack by 8 bytes. |
| 24 | |
| 25 | .macro ALIGN_STACK_16B |
| 26 | #if defined(__APPLE__) |
| 27 | subq $$8, %rsp |
| 28 | #else |
| 29 | subq $8, %rsp |
| 30 | #endif |
| 31 | CFI_ADJUST_CFA_OFFSET(8) |
| 32 | .endm |
| 33 | |
| 34 | .macro RESTORE_STACK_ALIGNMENT |
| 35 | #if defined(__APPLE__) |
| 36 | addq $$8, %rsp |
| 37 | #else |
| 38 | addq $8, %rsp |
| 39 | #endif |
| 40 | CFI_ADJUST_CFA_OFFSET(-8) |
| 41 | .endm |
| 42 | |
| 43 | // This macro should lower the stack pointer by an odd multiple of 8. |
| 44 | .macro SAVE_REGISTERS |
| 45 | pushfq |
| 46 | CFI_ADJUST_CFA_OFFSET(8) |
| 47 | subq $240, %rsp |
| 48 | CFI_ADJUST_CFA_OFFSET(240) |
| 49 | movq %rbp, 232(%rsp) |
| 50 | movupd %xmm0, 216(%rsp) |
| 51 | movupd %xmm1, 200(%rsp) |
| 52 | movupd %xmm2, 184(%rsp) |
| 53 | movupd %xmm3, 168(%rsp) |
| 54 | movupd %xmm4, 152(%rsp) |
| 55 | movupd %xmm5, 136(%rsp) |
| 56 | movupd %xmm6, 120(%rsp) |
| 57 | movupd %xmm7, 104(%rsp) |
| 58 | movq %rdi, 96(%rsp) |
| 59 | movq %rax, 88(%rsp) |
| 60 | movq %rdx, 80(%rsp) |
| 61 | movq %rsi, 72(%rsp) |
| 62 | movq %rcx, 64(%rsp) |
| 63 | movq %r8, 56(%rsp) |
| 64 | movq %r9, 48(%rsp) |
| 65 | movq %r10, 40(%rsp) |
| 66 | movq %r11, 32(%rsp) |
| 67 | movq %r12, 24(%rsp) |
| 68 | movq %r13, 16(%rsp) |
| 69 | movq %r14, 8(%rsp) |
| 70 | movq %r15, 0(%rsp) |
| 71 | .endm |
| 72 | |
| 73 | .macro RESTORE_REGISTERS |
| 74 | movq 232(%rsp), %rbp |
| 75 | movupd 216(%rsp), %xmm0 |
| 76 | movupd 200(%rsp), %xmm1 |
| 77 | movupd 184(%rsp), %xmm2 |
| 78 | movupd 168(%rsp), %xmm3 |
| 79 | movupd 152(%rsp), %xmm4 |
| 80 | movupd 136(%rsp), %xmm5 |
| 81 | movupd 120(%rsp) , %xmm6 |
| 82 | movupd 104(%rsp) , %xmm7 |
| 83 | movq 96(%rsp), %rdi |
| 84 | movq 88(%rsp), %rax |
| 85 | movq 80(%rsp), %rdx |
| 86 | movq 72(%rsp), %rsi |
| 87 | movq 64(%rsp), %rcx |
| 88 | movq 56(%rsp), %r8 |
| 89 | movq 48(%rsp), %r9 |
| 90 | movq 40(%rsp), %r10 |
| 91 | movq 32(%rsp), %r11 |
| 92 | movq 24(%rsp), %r12 |
| 93 | movq 16(%rsp), %r13 |
| 94 | movq 8(%rsp), %r14 |
| 95 | movq 0(%rsp), %r15 |
| 96 | addq $240, %rsp |
| 97 | CFI_ADJUST_CFA_OFFSET(-240) |
| 98 | popfq |
| 99 | CFI_ADJUST_CFA_OFFSET(-8) |
| 100 | .endm |
| 101 | |
| 102 | .text |
| 103 | #if !defined(__APPLE__) |
| 104 | .section .text |
| 105 | .file "xray_trampoline_x86.S" |
| 106 | #else |
| 107 | .section __TEXT,__text |
| 108 | #endif |
| 109 | |
| 110 | .macro LOAD_HANDLER_ADDR handler |
| 111 | #if !defined(XRAY_PIC) |
| 112 | movq ASM_SYMBOL(\handler)(%rip), %rax |
| 113 | #else |
| 114 | movq ASM_SYMBOL(\handler)@GOTPCREL(%rip), %rax |
| 115 | movq (%rax), %rax |
| 116 | #endif |
| 117 | .endm |
| 118 | |
| 119 | |
| 120 | //===----------------------------------------------------------------------===// |
| 121 | |
| 122 | .globl ASM_SYMBOL(__xray_FunctionEntry) |
| 123 | ASM_HIDDEN(__xray_FunctionEntry) |
| 124 | .align 16, 0x90 |
| 125 | ASM_TYPE_FUNCTION(__xray_FunctionEntry) |
| 126 | # LLVM-MCA-BEGIN __xray_FunctionEntry |
| 127 | ASM_SYMBOL(__xray_FunctionEntry): |
| 128 | CFI_STARTPROC |
| 129 | SAVE_REGISTERS |
| 130 | ALIGN_STACK_16B |
| 131 | |
| 132 | // This load has to be atomic, it's concurrent with __xray_patch(). |
| 133 | // On x86/amd64, a simple (type-aligned) MOV instruction is enough. |
| 134 | LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE |
| 135 | testq %rax, %rax |
| 136 | je LOCAL_LABEL(tmp0) |
| 137 | |
| 138 | // The patched function prologue puts its xray_instr_map index into %r10d. |
| 139 | movl %r10d, %edi |
| 140 | xor %esi,%esi |
| 141 | callq *%rax |
| 142 | |
| 143 | LOCAL_LABEL(tmp0): |
| 144 | RESTORE_STACK_ALIGNMENT |
| 145 | RESTORE_REGISTERS |
| 146 | retq |
| 147 | # LLVM-MCA-END |
| 148 | ASM_SIZE(__xray_FunctionEntry) |
| 149 | CFI_ENDPROC |
| 150 | |
| 151 | //===----------------------------------------------------------------------===// |
| 152 | |
| 153 | .globl ASM_SYMBOL(__xray_FunctionExit) |
| 154 | ASM_HIDDEN(__xray_FunctionExit) |
| 155 | .align 16, 0x90 |
| 156 | ASM_TYPE_FUNCTION(__xray_FunctionExit) |
| 157 | # LLVM-MCA-BEGIN __xray_FunctionExit |
| 158 | ASM_SYMBOL(__xray_FunctionExit): |
| 159 | CFI_STARTPROC |
| 160 | ALIGN_STACK_16B |
| 161 | |
| 162 | // Save the important registers first. Since we're assuming that this |
| 163 | // function is only jumped into, we only preserve the registers for |
| 164 | // returning. |
| 165 | subq $64, %rsp |
| 166 | CFI_ADJUST_CFA_OFFSET(64) |
| 167 | movq %rbp, 48(%rsp) |
| 168 | movupd %xmm0, 32(%rsp) |
| 169 | movupd %xmm1, 16(%rsp) |
| 170 | movq %rax, 8(%rsp) |
| 171 | movq %rdx, 0(%rsp) |
| 172 | LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE |
| 173 | testq %rax,%rax |
| 174 | je LOCAL_LABEL(tmp2) |
| 175 | |
| 176 | movl %r10d, %edi |
| 177 | movl $1, %esi |
| 178 | callq *%rax |
| 179 | |
| 180 | LOCAL_LABEL(tmp2): |
| 181 | // Restore the important registers. |
| 182 | movq 48(%rsp), %rbp |
| 183 | movupd 32(%rsp), %xmm0 |
| 184 | movupd 16(%rsp), %xmm1 |
| 185 | movq 8(%rsp), %rax |
| 186 | movq 0(%rsp), %rdx |
| 187 | addq $64, %rsp |
| 188 | CFI_ADJUST_CFA_OFFSET(-64) |
| 189 | |
| 190 | RESTORE_STACK_ALIGNMENT |
| 191 | retq |
| 192 | # LLVM-MCA-END |
| 193 | ASM_SIZE(__xray_FunctionExit) |
| 194 | CFI_ENDPROC |
| 195 | |
| 196 | //===----------------------------------------------------------------------===// |
| 197 | |
| 198 | .globl ASM_SYMBOL(__xray_FunctionTailExit) |
| 199 | ASM_HIDDEN(__xray_FunctionTailExit) |
| 200 | .align 16, 0x90 |
| 201 | ASM_TYPE_FUNCTION(__xray_FunctionTailExit) |
| 202 | # LLVM-MCA-BEGIN __xray_FunctionTailExit |
| 203 | ASM_SYMBOL(__xray_FunctionTailExit): |
| 204 | CFI_STARTPROC |
| 205 | SAVE_REGISTERS |
| 206 | ALIGN_STACK_16B |
| 207 | |
| 208 | LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE |
| 209 | testq %rax,%rax |
| 210 | je LOCAL_LABEL(tmp4) |
| 211 | |
| 212 | movl %r10d, %edi |
| 213 | movl $2, %esi |
| 214 | callq *%rax |
| 215 | |
| 216 | LOCAL_LABEL(tmp4): |
| 217 | RESTORE_STACK_ALIGNMENT |
| 218 | RESTORE_REGISTERS |
| 219 | retq |
| 220 | # LLVM-MCA-END |
| 221 | ASM_SIZE(__xray_FunctionTailExit) |
| 222 | CFI_ENDPROC |
| 223 | |
| 224 | //===----------------------------------------------------------------------===// |
| 225 | |
| 226 | .globl ASM_SYMBOL(__xray_ArgLoggerEntry) |
| 227 | ASM_HIDDEN(__xray_ArgLoggerEntry) |
| 228 | .align 16, 0x90 |
| 229 | ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) |
| 230 | # LLVM-MCA-BEGIN __xray_ArgLoggerEntry |
| 231 | ASM_SYMBOL(__xray_ArgLoggerEntry): |
| 232 | CFI_STARTPROC |
| 233 | SAVE_REGISTERS |
| 234 | ALIGN_STACK_16B |
| 235 | |
| 236 | // Again, these function pointer loads must be atomic; MOV is fine. |
| 237 | LOAD_HANDLER_ADDR _ZN6__xray13XRayArgLoggerE |
| 238 | testq %rax, %rax |
| 239 | jne LOCAL_LABEL(arg1entryLog) |
| 240 | |
| 241 | // If [arg1 logging handler] not set, defer to no-arg logging. |
| 242 | LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE |
| 243 | testq %rax, %rax |
| 244 | je LOCAL_LABEL(arg1entryFail) |
| 245 | |
| 246 | LOCAL_LABEL(arg1entryLog): |
| 247 | |
| 248 | // First argument will become the third |
| 249 | movq %rdi, %rdx |
| 250 | |
| 251 | // XRayEntryType::LOG_ARGS_ENTRY into the second |
| 252 | mov $0x3, %esi |
| 253 | |
| 254 | // 32-bit function ID becomes the first |
| 255 | movl %r10d, %edi |
| 256 | |
| 257 | callq *%rax |
| 258 | |
| 259 | LOCAL_LABEL(arg1entryFail): |
| 260 | RESTORE_STACK_ALIGNMENT |
| 261 | RESTORE_REGISTERS |
| 262 | retq |
| 263 | # LLVM-MCA-END |
| 264 | ASM_SIZE(__xray_ArgLoggerEntry) |
| 265 | CFI_ENDPROC |
| 266 | |
| 267 | //===----------------------------------------------------------------------===// |
| 268 | |
| 269 | // __xray_*Event have default visibility so that they can be referenced by user |
| 270 | // DSOs that do not link against the runtime. |
| 271 | .global ASM_SYMBOL(__xray_CustomEvent) |
| 272 | .align 16, 0x90 |
| 273 | ASM_TYPE_FUNCTION(__xray_CustomEvent) |
| 274 | # LLVM-MCA-BEGIN __xray_CustomEvent |
| 275 | ASM_SYMBOL(__xray_CustomEvent): |
| 276 | CFI_STARTPROC |
| 277 | SAVE_REGISTERS |
| 278 | |
| 279 | // We take two arguments to this trampoline, which should be in rdi and rsi |
| 280 | // already. |
| 281 | LOAD_HANDLER_ADDR _ZN6__xray22XRayPatchedCustomEventE |
| 282 | testq %rax,%rax |
| 283 | je LOCAL_LABEL(customEventCleanup) |
| 284 | |
| 285 | callq *%rax |
| 286 | |
| 287 | LOCAL_LABEL(customEventCleanup): |
| 288 | RESTORE_REGISTERS |
| 289 | retq |
| 290 | # LLVM-MCA-END |
| 291 | ASM_SIZE(__xray_CustomEvent) |
| 292 | CFI_ENDPROC |
| 293 | |
| 294 | //===----------------------------------------------------------------------===// |
| 295 | |
| 296 | .global ASM_SYMBOL(__xray_TypedEvent) |
| 297 | .align 16, 0x90 |
| 298 | ASM_TYPE_FUNCTION(__xray_TypedEvent) |
| 299 | # LLVM-MCA-BEGIN __xray_TypedEvent |
| 300 | ASM_SYMBOL(__xray_TypedEvent): |
| 301 | CFI_STARTPROC |
| 302 | SAVE_REGISTERS |
| 303 | |
| 304 | // We pass three arguments to this trampoline, which should be in rdi, rsi |
| 305 | // and rdx without our intervention. |
| 306 | LOAD_HANDLER_ADDR _ZN6__xray21XRayPatchedTypedEventE |
| 307 | testq %rax,%rax |
| 308 | je LOCAL_LABEL(typedEventCleanup) |
| 309 | |
| 310 | callq *%rax |
| 311 | |
| 312 | LOCAL_LABEL(typedEventCleanup): |
| 313 | RESTORE_REGISTERS |
| 314 | retq |
| 315 | # LLVM-MCA-END |
| 316 | ASM_SIZE(__xray_TypedEvent) |
| 317 | CFI_ENDPROC |
| 318 | |
| 319 | //===----------------------------------------------------------------------===// |
| 320 | |
| 321 | NO_EXEC_STACK_DIRECTIVE |
| 322 | |