| 1 | #include "cpuid.h" |
| 2 | #include "sanitizer_common/sanitizer_common.h" |
| 3 | #if !SANITIZER_FUCHSIA |
| 4 | #include "sanitizer_common/sanitizer_posix.h" |
| 5 | #endif |
| 6 | #include "xray_defs.h" |
| 7 | #include "xray_interface_internal.h" |
| 8 | |
| 9 | #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE |
| 10 | #include <sys/types.h> |
| 11 | #include <sys/sysctl.h> |
| 12 | #elif SANITIZER_FUCHSIA |
| 13 | #include <zircon/syscalls.h> |
| 14 | #endif |
| 15 | |
| 16 | #include <atomic> |
| 17 | #include <cstdint> |
| 18 | #include <errno.h> |
| 19 | #include <fcntl.h> |
| 20 | #include <iterator> |
| 21 | #include <limits> |
| 22 | #include <tuple> |
| 23 | #include <unistd.h> |
| 24 | |
| 25 | namespace __xray { |
| 26 | |
| 27 | #if SANITIZER_LINUX |
| 28 | static std::pair<ssize_t, bool> |
| 29 | retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { |
| 30 | auto BytesToRead = std::distance(first: Begin, last: End); |
| 31 | ssize_t BytesRead; |
| 32 | ssize_t TotalBytesRead = 0; |
| 33 | while (BytesToRead && (BytesRead = read(fd: Fd, buf: Begin, nbytes: BytesToRead))) { |
| 34 | if (BytesRead == -1) { |
| 35 | if (errno == EINTR) |
| 36 | continue; |
| 37 | Report(format: "Read error; errno = %d\n" , errno); |
| 38 | return std::make_pair(x&: TotalBytesRead, y: false); |
| 39 | } |
| 40 | |
| 41 | TotalBytesRead += BytesRead; |
| 42 | BytesToRead -= BytesRead; |
| 43 | Begin += BytesRead; |
| 44 | } |
| 45 | return std::make_pair(x&: TotalBytesRead, y: true); |
| 46 | } |
| 47 | |
| 48 | static bool readValueFromFile(const char *Filename, |
| 49 | long long *Value) XRAY_NEVER_INSTRUMENT { |
| 50 | int Fd = open(file: Filename, O_RDONLY | O_CLOEXEC); |
| 51 | if (Fd == -1) |
| 52 | return false; |
| 53 | static constexpr size_t BufSize = 256; |
| 54 | char Line[BufSize] = {}; |
| 55 | ssize_t BytesRead; |
| 56 | bool Success; |
| 57 | std::tie(args&: BytesRead, args&: Success) = retryingReadSome(Fd, Begin: Line, End: Line + BufSize); |
| 58 | close(fd: Fd); |
| 59 | if (!Success) |
| 60 | return false; |
| 61 | const char *End = nullptr; |
| 62 | long long Tmp = internal_simple_strtoll(nptr: Line, endptr: &End, base: 10); |
| 63 | bool Result = false; |
| 64 | if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { |
| 65 | *Value = Tmp; |
| 66 | Result = true; |
| 67 | } |
| 68 | return Result; |
| 69 | } |
| 70 | |
| 71 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
| 72 | long long TSCFrequency = -1; |
| 73 | if (readValueFromFile(Filename: "/sys/devices/system/cpu/cpu0/tsc_freq_khz" , |
| 74 | Value: &TSCFrequency)) { |
| 75 | TSCFrequency *= 1000; |
| 76 | } else if (readValueFromFile( |
| 77 | Filename: "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" , |
| 78 | Value: &TSCFrequency)) { |
| 79 | TSCFrequency *= 1000; |
| 80 | } else { |
| 81 | Report(format: "Unable to determine CPU frequency for TSC accounting.\n" ); |
| 82 | } |
| 83 | return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); |
| 84 | } |
| 85 | #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE |
| 86 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
| 87 | long long TSCFrequency = -1; |
| 88 | size_t tscfreqsz = sizeof(TSCFrequency); |
| 89 | #if SANITIZER_APPLE |
| 90 | if (internal_sysctlbyname("machdep.tsc.frequency" , &TSCFrequency, |
| 91 | &tscfreqsz, NULL, 0) != -1) { |
| 92 | |
| 93 | #else |
| 94 | if (internal_sysctlbyname("machdep.tsc_freq" , &TSCFrequency, &tscfreqsz, |
| 95 | NULL, 0) != -1) { |
| 96 | #endif |
| 97 | return static_cast<uint64_t>(TSCFrequency); |
| 98 | } else { |
| 99 | Report("Unable to determine CPU frequency for TSC accounting.\n" ); |
| 100 | } |
| 101 | |
| 102 | return 0; |
| 103 | } |
| 104 | #elif !SANITIZER_FUCHSIA |
| 105 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
| 106 | /* Not supported */ |
| 107 | return 0; |
| 108 | } |
| 109 | #endif |
| 110 | |
| 111 | static constexpr uint8_t CallOpCode = 0xe8; |
| 112 | static constexpr uint16_t MovR10Seq = 0xba41; |
| 113 | static constexpr uint16_t Jmp9Seq = 0x09eb; |
| 114 | static constexpr uint16_t Jmp20Seq = 0x14eb; |
| 115 | static constexpr uint16_t Jmp15Seq = 0x0feb; |
| 116 | static constexpr uint8_t JmpOpCode = 0xe9; |
| 117 | static constexpr uint8_t RetOpCode = 0xc3; |
| 118 | static constexpr uint16_t NopwSeq = 0x9066; |
| 119 | |
| 120 | static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; |
| 121 | static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; |
| 122 | |
| 123 | bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, |
| 124 | const XRaySledEntry &Sled, |
| 125 | const XRayTrampolines &Trampolines, |
| 126 | bool LogArgs) XRAY_NEVER_INSTRUMENT { |
| 127 | // Here we do the dance of replacing the following sled: |
| 128 | // |
| 129 | // xray_sled_n: |
| 130 | // jmp +9 |
| 131 | // <9 byte nop> |
| 132 | // |
| 133 | // With the following: |
| 134 | // |
| 135 | // mov r10d, <function id> |
| 136 | // call <relative 32bit offset to entry trampoline> |
| 137 | // |
| 138 | // We need to do this in the following order: |
| 139 | // |
| 140 | // 1. Put the function id first, 2 bytes from the start of the sled (just |
| 141 | // after the 2-byte jmp instruction). |
| 142 | // 2. Put the call opcode 6 bytes from the start of the sled. |
| 143 | // 3. Put the relative offset 7 bytes from the start of the sled. |
| 144 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" |
| 145 | // opcode and first operand. |
| 146 | // |
| 147 | // Prerequisite is to compute the relative offset to the trampoline's address. |
| 148 | auto Trampoline = |
| 149 | LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline; |
| 150 | const uint64_t Address = Sled.address(); |
| 151 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - |
| 152 | (static_cast<int64_t>(Address) + 11); |
| 153 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
| 154 | Report(format: "XRay Entry trampoline (%p) too far from sled (%p)\n" , |
| 155 | reinterpret_cast<void *>(Trampoline), |
| 156 | reinterpret_cast<void *>(Address)); |
| 157 | return false; |
| 158 | } |
| 159 | if (Enable) { |
| 160 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
| 161 | *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; |
| 162 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
| 163 | std::atomic_store_explicit( |
| 164 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
| 165 | m: std::memory_order_release); |
| 166 | } else { |
| 167 | std::atomic_store_explicit( |
| 168 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq, |
| 169 | m: std::memory_order_release); |
| 170 | // FIXME: Write out the nops still? |
| 171 | } |
| 172 | return true; |
| 173 | } |
| 174 | |
| 175 | bool patchFunctionExit( |
| 176 | const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, |
| 177 | const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { |
| 178 | // Here we do the dance of replacing the following sled: |
| 179 | // |
| 180 | // xray_sled_n: |
| 181 | // ret |
| 182 | // <10 byte nop> |
| 183 | // |
| 184 | // With the following: |
| 185 | // |
| 186 | // mov r10d, <function id> |
| 187 | // jmp <relative 32bit offset to exit trampoline> |
| 188 | // |
| 189 | // 1. Put the function id first, 2 bytes from the start of the sled (just |
| 190 | // after the 1-byte ret instruction). |
| 191 | // 2. Put the jmp opcode 6 bytes from the start of the sled. |
| 192 | // 3. Put the relative offset 7 bytes from the start of the sled. |
| 193 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" |
| 194 | // opcode and first operand. |
| 195 | // |
| 196 | // Prerequisite is to compute the relative offset fo the |
| 197 | // __xray_FunctionExit function's address. |
| 198 | auto Trampoline = Trampolines.ExitTrampoline; |
| 199 | const uint64_t Address = Sled.address(); |
| 200 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - |
| 201 | (static_cast<int64_t>(Address) + 11); |
| 202 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
| 203 | Report(format: "XRay Exit trampoline (%p) too far from sled (%p)\n" , |
| 204 | reinterpret_cast<void *>(Trampoline), |
| 205 | reinterpret_cast<void *>(Address)); |
| 206 | return false; |
| 207 | } |
| 208 | if (Enable) { |
| 209 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
| 210 | *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; |
| 211 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
| 212 | std::atomic_store_explicit( |
| 213 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
| 214 | m: std::memory_order_release); |
| 215 | } else { |
| 216 | std::atomic_store_explicit( |
| 217 | a: reinterpret_cast<std::atomic<uint8_t> *>(Address), i: RetOpCode, |
| 218 | m: std::memory_order_release); |
| 219 | // FIXME: Write out the nops still? |
| 220 | } |
| 221 | return true; |
| 222 | } |
| 223 | |
| 224 | bool patchFunctionTailExit( |
| 225 | const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, |
| 226 | const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { |
| 227 | // Here we do the dance of replacing the tail call sled with a similar |
| 228 | // sequence as the entry sled, but calls the tail exit sled instead. |
| 229 | auto Trampoline = Trampolines.TailExitTrampoline; |
| 230 | const uint64_t Address = Sled.address(); |
| 231 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - |
| 232 | (static_cast<int64_t>(Address) + 11); |
| 233 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
| 234 | Report(format: "XRay Tail Exit trampoline (%p) too far from sled (%p)\n" , |
| 235 | reinterpret_cast<void *>(Trampoline), |
| 236 | reinterpret_cast<void *>(Address)); |
| 237 | return false; |
| 238 | } |
| 239 | if (Enable) { |
| 240 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
| 241 | *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; |
| 242 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
| 243 | std::atomic_store_explicit( |
| 244 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
| 245 | m: std::memory_order_release); |
| 246 | } else { |
| 247 | std::atomic_store_explicit( |
| 248 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq, |
| 249 | m: std::memory_order_release); |
| 250 | // FIXME: Write out the nops still? |
| 251 | } |
| 252 | return true; |
| 253 | } |
| 254 | |
| 255 | bool patchCustomEvent(const bool Enable, const uint32_t FuncId, |
| 256 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
| 257 | // Here we do the dance of replacing the following sled: |
| 258 | // |
| 259 | // xray_sled_n: |
| 260 | // jmp +15 // 2 bytes |
| 261 | // ... |
| 262 | // |
| 263 | // With the following: |
| 264 | // |
| 265 | // nopw // 2 bytes* |
| 266 | // ... |
| 267 | // |
| 268 | // |
| 269 | // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'. |
| 270 | const uint64_t Address = Sled.address(); |
| 271 | if (Enable) { |
| 272 | std::atomic_store_explicit( |
| 273 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq, |
| 274 | m: std::memory_order_release); |
| 275 | } else { |
| 276 | std::atomic_store_explicit( |
| 277 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp15Seq, |
| 278 | m: std::memory_order_release); |
| 279 | } |
| 280 | return false; |
| 281 | } |
| 282 | |
| 283 | bool patchTypedEvent(const bool Enable, const uint32_t FuncId, |
| 284 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
| 285 | // Here we do the dance of replacing the following sled: |
| 286 | // |
| 287 | // xray_sled_n: |
| 288 | // jmp +20 // 2 byte instruction |
| 289 | // ... |
| 290 | // |
| 291 | // With the following: |
| 292 | // |
| 293 | // nopw // 2 bytes |
| 294 | // ... |
| 295 | // |
| 296 | // |
| 297 | // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. |
| 298 | // The 20 byte sled stashes three argument registers, calls the trampoline, |
| 299 | // unstashes the registers and returns. If the arguments are already in |
| 300 | // the correct registers, the stashing and unstashing become equivalently |
| 301 | // sized nops. |
| 302 | const uint64_t Address = Sled.address(); |
| 303 | if (Enable) { |
| 304 | std::atomic_store_explicit( |
| 305 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq, |
| 306 | m: std::memory_order_release); |
| 307 | } else { |
| 308 | std::atomic_store_explicit( |
| 309 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp20Seq, |
| 310 | m: std::memory_order_release); |
| 311 | } |
| 312 | return false; |
| 313 | } |
| 314 | |
| 315 | #if !SANITIZER_FUCHSIA |
| 316 | // We determine whether the CPU we're running on has the correct features we |
| 317 | // need. In x86_64 this will be rdtscp support. |
| 318 | bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { |
| 319 | unsigned int EAX, EBX, ECX, EDX; |
| 320 | |
| 321 | // We check whether rdtscp support is enabled. According to the x86_64 manual, |
| 322 | // level should be set at 0x80000001, and we should have a look at bit 27 in |
| 323 | // EDX. That's 0x8000000 (or 1u << 27). |
| 324 | __asm__ __volatile__("cpuid" : "=a" (EAX), "=b" (EBX), "=c" (ECX), "=d" (EDX) |
| 325 | : "0" (0x80000001)); |
| 326 | if (!(EDX & (1u << 27))) { |
| 327 | Report(format: "Missing rdtscp support.\n" ); |
| 328 | return false; |
| 329 | } |
| 330 | // Also check whether we can determine the CPU frequency, since if we cannot, |
| 331 | // we should use the emulated TSC instead. |
| 332 | if (!getTSCFrequency()) { |
| 333 | Report(format: "Unable to determine CPU frequency.\n" ); |
| 334 | return false; |
| 335 | } |
| 336 | return true; |
| 337 | } |
| 338 | #endif |
| 339 | |
| 340 | } // namespace __xray |
| 341 | |