1#include "cpuid.h"
2#include "sanitizer_common/sanitizer_common.h"
3#if !SANITIZER_FUCHSIA
4#include "sanitizer_common/sanitizer_posix.h"
5#endif
6#include "xray_defs.h"
7#include "xray_interface_internal.h"
8
9#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
10#include <sys/types.h>
11#include <sys/sysctl.h>
12#elif SANITIZER_FUCHSIA
13#include <zircon/syscalls.h>
14#endif
15
16#include <atomic>
17#include <cstdint>
18#include <errno.h>
19#include <fcntl.h>
20#include <iterator>
21#include <limits>
22#include <tuple>
23#include <unistd.h>
24
25namespace __xray {
26
27#if SANITIZER_LINUX
28static std::pair<ssize_t, bool>
29retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
30 auto BytesToRead = std::distance(first: Begin, last: End);
31 ssize_t BytesRead;
32 ssize_t TotalBytesRead = 0;
33 while (BytesToRead && (BytesRead = read(fd: Fd, buf: Begin, nbytes: BytesToRead))) {
34 if (BytesRead == -1) {
35 if (errno == EINTR)
36 continue;
37 Report(format: "Read error; errno = %d\n", errno);
38 return std::make_pair(x&: TotalBytesRead, y: false);
39 }
40
41 TotalBytesRead += BytesRead;
42 BytesToRead -= BytesRead;
43 Begin += BytesRead;
44 }
45 return std::make_pair(x&: TotalBytesRead, y: true);
46}
47
48static bool readValueFromFile(const char *Filename,
49 long long *Value) XRAY_NEVER_INSTRUMENT {
50 int Fd = open(file: Filename, O_RDONLY | O_CLOEXEC);
51 if (Fd == -1)
52 return false;
53 static constexpr size_t BufSize = 256;
54 char Line[BufSize] = {};
55 ssize_t BytesRead;
56 bool Success;
57 std::tie(args&: BytesRead, args&: Success) = retryingReadSome(Fd, Begin: Line, End: Line + BufSize);
58 close(fd: Fd);
59 if (!Success)
60 return false;
61 const char *End = nullptr;
62 long long Tmp = internal_simple_strtoll(nptr: Line, endptr: &End, base: 10);
63 bool Result = false;
64 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
65 *Value = Tmp;
66 Result = true;
67 }
68 return Result;
69}
70
71uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
72 long long TSCFrequency = -1;
73 if (readValueFromFile(Filename: "/sys/devices/system/cpu/cpu0/tsc_freq_khz",
74 Value: &TSCFrequency)) {
75 TSCFrequency *= 1000;
76 } else if (readValueFromFile(
77 Filename: "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
78 Value: &TSCFrequency)) {
79 TSCFrequency *= 1000;
80 } else {
81 Report(format: "Unable to determine CPU frequency for TSC accounting.\n");
82 }
83 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
84}
85#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
86uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
87 long long TSCFrequency = -1;
88 size_t tscfreqsz = sizeof(TSCFrequency);
89#if SANITIZER_APPLE
90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
91 &tscfreqsz, NULL, 0) != -1) {
92
93#else
94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
95 NULL, 0) != -1) {
96#endif
97 return static_cast<uint64_t>(TSCFrequency);
98 } else {
99 Report("Unable to determine CPU frequency for TSC accounting.\n");
100 }
101
102 return 0;
103}
104#elif !SANITIZER_FUCHSIA
105uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
106 /* Not supported */
107 return 0;
108}
109#endif
110
111static constexpr uint8_t CallOpCode = 0xe8;
112static constexpr uint16_t MovR10Seq = 0xba41;
113static constexpr uint16_t Jmp9Seq = 0x09eb;
114static constexpr uint16_t Jmp20Seq = 0x14eb;
115static constexpr uint16_t Jmp15Seq = 0x0feb;
116static constexpr uint8_t JmpOpCode = 0xe9;
117static constexpr uint8_t RetOpCode = 0xc3;
118static constexpr uint16_t NopwSeq = 0x9066;
119
120static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
121static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
122
123bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
124 const XRaySledEntry &Sled,
125 const XRayTrampolines &Trampolines,
126 bool LogArgs) XRAY_NEVER_INSTRUMENT {
127 // Here we do the dance of replacing the following sled:
128 //
129 // xray_sled_n:
130 // jmp +9
131 // <9 byte nop>
132 //
133 // With the following:
134 //
135 // mov r10d, <function id>
136 // call <relative 32bit offset to entry trampoline>
137 //
138 // We need to do this in the following order:
139 //
140 // 1. Put the function id first, 2 bytes from the start of the sled (just
141 // after the 2-byte jmp instruction).
142 // 2. Put the call opcode 6 bytes from the start of the sled.
143 // 3. Put the relative offset 7 bytes from the start of the sled.
144 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
145 // opcode and first operand.
146 //
147 // Prerequisite is to compute the relative offset to the trampoline's address.
148 auto Trampoline =
149 LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
150 const uint64_t Address = Sled.address();
151 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
152 (static_cast<int64_t>(Address) + 11);
153 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
154 Report(format: "XRay Entry trampoline (%p) too far from sled (%p)\n",
155 reinterpret_cast<void *>(Trampoline),
156 reinterpret_cast<void *>(Address));
157 return false;
158 }
159 if (Enable) {
160 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
161 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
162 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
163 std::atomic_store_explicit(
164 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq,
165 m: std::memory_order_release);
166 } else {
167 std::atomic_store_explicit(
168 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq,
169 m: std::memory_order_release);
170 // FIXME: Write out the nops still?
171 }
172 return true;
173}
174
175bool patchFunctionExit(
176 const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
177 const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
178 // Here we do the dance of replacing the following sled:
179 //
180 // xray_sled_n:
181 // ret
182 // <10 byte nop>
183 //
184 // With the following:
185 //
186 // mov r10d, <function id>
187 // jmp <relative 32bit offset to exit trampoline>
188 //
189 // 1. Put the function id first, 2 bytes from the start of the sled (just
190 // after the 1-byte ret instruction).
191 // 2. Put the jmp opcode 6 bytes from the start of the sled.
192 // 3. Put the relative offset 7 bytes from the start of the sled.
193 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
194 // opcode and first operand.
195 //
196 // Prerequisite is to compute the relative offset fo the
197 // __xray_FunctionExit function's address.
198 auto Trampoline = Trampolines.ExitTrampoline;
199 const uint64_t Address = Sled.address();
200 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
201 (static_cast<int64_t>(Address) + 11);
202 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
203 Report(format: "XRay Exit trampoline (%p) too far from sled (%p)\n",
204 reinterpret_cast<void *>(Trampoline),
205 reinterpret_cast<void *>(Address));
206 return false;
207 }
208 if (Enable) {
209 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
210 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
211 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
212 std::atomic_store_explicit(
213 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq,
214 m: std::memory_order_release);
215 } else {
216 std::atomic_store_explicit(
217 a: reinterpret_cast<std::atomic<uint8_t> *>(Address), i: RetOpCode,
218 m: std::memory_order_release);
219 // FIXME: Write out the nops still?
220 }
221 return true;
222}
223
224bool patchFunctionTailExit(
225 const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
226 const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
227 // Here we do the dance of replacing the tail call sled with a similar
228 // sequence as the entry sled, but calls the tail exit sled instead.
229 auto Trampoline = Trampolines.TailExitTrampoline;
230 const uint64_t Address = Sled.address();
231 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
232 (static_cast<int64_t>(Address) + 11);
233 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
234 Report(format: "XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
235 reinterpret_cast<void *>(Trampoline),
236 reinterpret_cast<void *>(Address));
237 return false;
238 }
239 if (Enable) {
240 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
241 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
242 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
243 std::atomic_store_explicit(
244 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq,
245 m: std::memory_order_release);
246 } else {
247 std::atomic_store_explicit(
248 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq,
249 m: std::memory_order_release);
250 // FIXME: Write out the nops still?
251 }
252 return true;
253}
254
255bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
256 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
257 // Here we do the dance of replacing the following sled:
258 //
259 // xray_sled_n:
260 // jmp +15 // 2 bytes
261 // ...
262 //
263 // With the following:
264 //
265 // nopw // 2 bytes*
266 // ...
267 //
268 //
269 // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'.
270 const uint64_t Address = Sled.address();
271 if (Enable) {
272 std::atomic_store_explicit(
273 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq,
274 m: std::memory_order_release);
275 } else {
276 std::atomic_store_explicit(
277 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp15Seq,
278 m: std::memory_order_release);
279 }
280 return false;
281}
282
283bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
284 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
285 // Here we do the dance of replacing the following sled:
286 //
287 // xray_sled_n:
288 // jmp +20 // 2 byte instruction
289 // ...
290 //
291 // With the following:
292 //
293 // nopw // 2 bytes
294 // ...
295 //
296 //
297 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
298 // The 20 byte sled stashes three argument registers, calls the trampoline,
299 // unstashes the registers and returns. If the arguments are already in
300 // the correct registers, the stashing and unstashing become equivalently
301 // sized nops.
302 const uint64_t Address = Sled.address();
303 if (Enable) {
304 std::atomic_store_explicit(
305 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq,
306 m: std::memory_order_release);
307 } else {
308 std::atomic_store_explicit(
309 a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp20Seq,
310 m: std::memory_order_release);
311 }
312 return false;
313}
314
315#if !SANITIZER_FUCHSIA
316// We determine whether the CPU we're running on has the correct features we
317// need. In x86_64 this will be rdtscp support.
318bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
319 unsigned int EAX, EBX, ECX, EDX;
320
321 // We check whether rdtscp support is enabled. According to the x86_64 manual,
322 // level should be set at 0x80000001, and we should have a look at bit 27 in
323 // EDX. That's 0x8000000 (or 1u << 27).
324 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
325 : "0"(0x80000001));
326 if (!(EDX & (1u << 27))) {
327 Report(format: "Missing rdtscp support.\n");
328 return false;
329 }
330 // Also check whether we can determine the CPU frequency, since if we cannot,
331 // we should use the emulated TSC instead.
332 if (!getTSCFrequency()) {
333 Report(format: "Unable to determine CPU frequency.\n");
334 return false;
335 }
336 return true;
337}
338#endif
339
340} // namespace __xray
341