1//===-- interception_win.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of AddressSanitizer, an address sanity checker.
10//
11// Windows-specific interception methods.
12//
13// This file is implementing several hooking techniques to intercept calls
14// to functions. The hooks are dynamically installed by modifying the assembly
15// code.
16//
17// The hooking techniques are making assumptions on the way the code is
18// generated and are safe under these assumptions.
19//
20// On 64-bit architecture, there is no direct 64-bit jump instruction. To allow
21// arbitrary branching on the whole memory space, the notion of trampoline
22// region is used. A trampoline region is a memory space withing 2G boundary
23// where it is safe to add custom assembly code to build 64-bit jumps.
24//
25// Hooking techniques
26// ==================
27//
28// 1) Detour
29//
30// The Detour hooking technique is assuming the presence of a header with
31// padding and an overridable 2-bytes nop instruction (mov edi, edi). The
32// nop instruction can safely be replaced by a 2-bytes jump without any need
33// to save the instruction. A jump to the target is encoded in the function
34// header and the nop instruction is replaced by a short jump to the header.
35//
36// head: 5 x nop head: jmp <hook>
37// func: mov edi, edi --> func: jmp short <head>
38// [...] real: [...]
39//
40// This technique is only implemented on 32-bit architecture.
41// Most of the time, Windows API are hookable with the detour technique.
42//
43// 2) Redirect Jump
44//
45// The redirect jump is applicable when the first instruction is a direct
46// jump. The instruction is replaced by jump to the hook.
47//
48// func: jmp <label> --> func: jmp <hook>
49//
50// On a 64-bit architecture, a trampoline is inserted.
51//
52// func: jmp <label> --> func: jmp <tramp>
53// [...]
54//
55// [trampoline]
56// tramp: jmp QWORD [addr]
57// addr: .bytes <hook>
58//
59// Note: <real> is equivalent to <label>.
60//
61// 3) HotPatch
62//
63// The HotPatch hooking is assuming the presence of a header with padding
64// and a first instruction with at least 2-bytes.
65//
66// The reason to enforce the 2-bytes limitation is to provide the minimal
67// space to encode a short jump. HotPatch technique is only rewriting one
68// instruction to avoid breaking a sequence of instructions containing a
69// branching target.
70//
71// Assumptions are enforced by MSVC compiler by using the /HOTPATCH flag.
72// see: https://msdn.microsoft.com/en-us/library/ms173507.aspx
73// Default padding length is 5 bytes in 32-bits and 6 bytes in 64-bits.
74//
75// head: 5 x nop head: jmp <hook>
76// func: <instr> --> func: jmp short <head>
77// [...] body: [...]
78//
79// [trampoline]
80// real: <instr>
81// jmp <body>
82//
83// On a 64-bit architecture:
84//
85// head: 6 x nop head: jmp QWORD [addr1]
86// func: <instr> --> func: jmp short <head>
87// [...] body: [...]
88//
89// [trampoline]
90// addr1: .bytes <hook>
91// real: <instr>
92// jmp QWORD [addr2]
93// addr2: .bytes <body>
94//
95// 4) Trampoline
96//
97// The Trampoline hooking technique is the most aggressive one. It is
98// assuming that there is a sequence of instructions that can be safely
99// replaced by a jump (enough room and no incoming branches).
100//
101// Unfortunately, these assumptions can't be safely presumed and code may
102// be broken after hooking.
103//
104// func: <instr> --> func: jmp <hook>
105// <instr>
106// [...] body: [...]
107//
108// [trampoline]
109// real: <instr>
110// <instr>
111// jmp <body>
112//
113// On a 64-bit architecture:
114//
115// func: <instr> --> func: jmp QWORD [addr1]
116// <instr>
117// [...] body: [...]
118//
119// [trampoline]
120// addr1: .bytes <hook>
121// real: <instr>
122// <instr>
123// jmp QWORD [addr2]
124// addr2: .bytes <body>
125//===----------------------------------------------------------------------===//
126
127#include "interception.h"
128
129#if SANITIZER_WINDOWS
130#include "sanitizer_common/sanitizer_platform.h"
131#define WIN32_LEAN_AND_MEAN
132#include <windows.h>
133#include <psapi.h>
134
135namespace __interception {
136
137bool DynamicLoaderAvailable() { return true; }
138
139void* OpenLibrary(const char* name) {
140 if (!name)
141 return reinterpret_cast<void*>(GetModuleHandleA(nullptr));
142 return reinterpret_cast<void*>(LoadLibraryA(name));
143}
144
145void* LookupSymbol(void* handle, const char* symbol) {
146 if (!handle)
147 return nullptr;
148 return reinterpret_cast<void*>(reinterpret_cast<__sanitizer::uptr>(
149 GetProcAddress(reinterpret_cast<HMODULE>(handle), symbol)));
150}
151
152void* LookupSymbolDefault(const char* symbol) {
153 return LookupSymbol(reinterpret_cast<void*>(GetModuleHandleA(nullptr)),
154 symbol);
155}
156
157void* LookupSymbolNext(const char*) { return nullptr; }
158
159void* LookupSymbolNextVersioned(const char*, const char*) { return nullptr; }
160
161static const int kAddressLength = FIRST_32_SECOND_64(4, 8);
162static const int kJumpInstructionLength = 5;
163static const int kShortJumpInstructionLength = 2;
164UNUSED static const int kIndirectJumpInstructionLength = 6;
165static const int kBranchLength =
166 FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength);
167static const int kDirectBranchLength = kBranchLength + kAddressLength;
168
169# if defined(_MSC_VER)
170# define INTERCEPTION_FORMAT(f, a)
171# else
172# define INTERCEPTION_FORMAT(f, a) __attribute__((format(printf, f, a)))
173# endif
174
175static void (*ErrorReportCallback)(const char *format, ...)
176 INTERCEPTION_FORMAT(1, 2);
177
178void SetErrorReportCallback(void (*callback)(const char *format, ...)) {
179 ErrorReportCallback = callback;
180}
181
182# define ReportError(...) \
183 do { \
184 if (ErrorReportCallback) \
185 ErrorReportCallback(__VA_ARGS__); \
186 } while (0)
187
188static void InterceptionFailed() {
189 ReportError("interception_win: failed due to an unrecoverable error.\n");
190 // This acts like an abort when no debugger is attached. According to an old
191 // comment, calling abort() leads to an infinite recursion in CheckFailed.
192 __debugbreak();
193}
194
195static bool DistanceIsWithin2Gig(uptr from, uptr target) {
196#if SANITIZER_WINDOWS64
197 if (from < target)
198 return target - from <= (uptr)0x7FFFFFFFU;
199 else
200 return from - target <= (uptr)0x80000000U;
201#else
202 // In a 32-bit address space, the address calculation will wrap, so this check
203 // is unnecessary.
204 return true;
205#endif
206}
207
208static uptr GetMmapGranularity() {
209 SYSTEM_INFO si;
210 GetSystemInfo(&si);
211 return si.dwAllocationGranularity;
212}
213
214UNUSED static uptr RoundDownTo(uptr size, uptr boundary) {
215 return size & ~(boundary - 1);
216}
217
218UNUSED static uptr RoundUpTo(uptr size, uptr boundary) {
219 return RoundDownTo(size + boundary - 1, boundary);
220}
221
222// FIXME: internal_str* and internal_mem* functions should be moved from the
223// ASan sources into interception/.
224
225static size_t _strlen(const char *str) {
226 const char* p = str;
227 while (*p != '\0') ++p;
228 return p - str;
229}
230
231static char* _strchr(char* str, char c) {
232 while (*str) {
233 if (*str == c)
234 return str;
235 ++str;
236 }
237 return nullptr;
238}
239
240static int _strcmp(const char *s1, const char *s2) {
241 while (true) {
242 unsigned c1 = *s1;
243 unsigned c2 = *s2;
244 if (c1 != c2) return (c1 < c2) ? -1 : 1;
245 if (c1 == 0) break;
246 s1++;
247 s2++;
248 }
249 return 0;
250}
251
252static void _memset(void *p, int value, size_t sz) {
253 for (size_t i = 0; i < sz; ++i)
254 ((char*)p)[i] = (char)value;
255}
256
257static void _memcpy(void *dst, void *src, size_t sz) {
258 char *dst_c = (char*)dst,
259 *src_c = (char*)src;
260 for (size_t i = 0; i < sz; ++i)
261 dst_c[i] = src_c[i];
262}
263
264static bool ChangeMemoryProtection(
265 uptr address, uptr size, DWORD *old_protection) {
266 return ::VirtualProtect((void*)address, size,
267 PAGE_EXECUTE_READWRITE,
268 old_protection) != FALSE;
269}
270
271static bool RestoreMemoryProtection(
272 uptr address, uptr size, DWORD old_protection) {
273 DWORD unused;
274 return ::VirtualProtect((void*)address, size,
275 old_protection,
276 &unused) != FALSE;
277}
278
279static bool IsMemoryPadding(uptr address, uptr size) {
280 u8* function = (u8*)address;
281 for (size_t i = 0; i < size; ++i)
282 if (function[i] != 0x90 && function[i] != 0xCC)
283 return false;
284 return true;
285}
286
287static const u8 kHintNop8Bytes[] = {
288 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
289};
290
291template<class T>
292static bool FunctionHasPrefix(uptr address, const T &pattern) {
293 u8* function = (u8*)address - sizeof(pattern);
294 for (size_t i = 0; i < sizeof(pattern); ++i)
295 if (function[i] != pattern[i])
296 return false;
297 return true;
298}
299
300static bool FunctionHasPadding(uptr address, uptr size) {
301 if (IsMemoryPadding(address - size, size))
302 return true;
303 if (size <= sizeof(kHintNop8Bytes) &&
304 FunctionHasPrefix(address, kHintNop8Bytes))
305 return true;
306 return false;
307}
308
309static void WritePadding(uptr from, uptr size) {
310 _memset((void*)from, 0xCC, (size_t)size);
311}
312
313static void WriteJumpInstruction(uptr from, uptr target) {
314 if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) {
315 ReportError(
316 "interception_win: cannot write jmp further than 2GB away, from %p to "
317 "%p.\n",
318 (void *)from, (void *)target);
319 InterceptionFailed();
320 }
321 ptrdiff_t offset = target - from - kJumpInstructionLength;
322 *(u8*)from = 0xE9;
323 *(u32*)(from + 1) = offset;
324}
325
326static void WriteShortJumpInstruction(uptr from, uptr target) {
327 sptr offset = target - from - kShortJumpInstructionLength;
328 if (offset < -128 || offset > 127) {
329 ReportError("interception_win: cannot write short jmp from %p to %p\n",
330 (void *)from, (void *)target);
331 InterceptionFailed();
332 }
333 *(u8*)from = 0xEB;
334 *(u8*)(from + 1) = (u8)offset;
335}
336
337#if SANITIZER_WINDOWS64
338static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) {
339 // jmp [rip + <offset>] = FF 25 <offset> where <offset> is a relative
340 // offset.
341 // The offset is the distance from then end of the jump instruction to the
342 // memory location containing the targeted address. The displacement is still
343 // 32-bit in x64, so indirect_target must be located within +/- 2GB range.
344 int offset = indirect_target - from - kIndirectJumpInstructionLength;
345 if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength,
346 indirect_target)) {
347 ReportError(
348 "interception_win: cannot write indirect jmp with target further than "
349 "2GB away, from %p to %p.\n",
350 (void *)from, (void *)indirect_target);
351 InterceptionFailed();
352 }
353 *(u16*)from = 0x25FF;
354 *(u32*)(from + 2) = offset;
355}
356#endif
357
358static void WriteBranch(
359 uptr from, uptr indirect_target, uptr target) {
360#if SANITIZER_WINDOWS64
361 WriteIndirectJumpInstruction(from, indirect_target);
362 *(u64*)indirect_target = target;
363#else
364 (void)indirect_target;
365 WriteJumpInstruction(from, target);
366#endif
367}
368
369static void WriteDirectBranch(uptr from, uptr target) {
370#if SANITIZER_WINDOWS64
371 // Emit an indirect jump through immediately following bytes:
372 // jmp [rip + kBranchLength]
373 // .quad <target>
374 WriteBranch(from, from + kBranchLength, target);
375#else
376 WriteJumpInstruction(from, target);
377#endif
378}
379
380struct TrampolineMemoryRegion {
381 uptr content;
382 uptr allocated_size;
383 uptr max_size;
384};
385
386UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31; // 2 gig
387static const int kMaxTrampolineRegion = 1024;
388static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
389
390static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr,
391 uptr func_addr, size_t granularity) {
392# if SANITIZER_WINDOWS64
393 // Clamp {min,max}_addr to the accessible address space.
394 SYSTEM_INFO system_info;
395 ::GetSystemInfo(&system_info);
396 uptr min_virtual_addr =
397 RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity);
398 uptr max_virtual_addr =
399 RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity);
400 if (min_addr < min_virtual_addr)
401 min_addr = min_virtual_addr;
402 if (max_addr > max_virtual_addr)
403 max_addr = max_virtual_addr;
404
405 // This loop probes the virtual address space to find free memory in the
406 // [min_addr, max_addr] interval. The search starts from func_addr and
407 // proceeds "outwards" towards the interval bounds using two probes, lo_addr
408 // and hi_addr, for addresses lower/higher than func_addr. At each step, it
409 // considers the probe closest to func_addr. If that address is not free, the
410 // probe is advanced (lower or higher depending on the probe) to the next
411 // memory block and the search continues.
412 uptr lo_addr = RoundDownTo(func_addr, granularity);
413 uptr hi_addr = RoundUpTo(func_addr, granularity);
414 while (lo_addr >= min_addr || hi_addr <= max_addr) {
415 // Consider the in-range address closest to func_addr.
416 uptr addr;
417 if (lo_addr < min_addr)
418 addr = hi_addr;
419 else if (hi_addr > max_addr)
420 addr = lo_addr;
421 else
422 addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr;
423
424 MEMORY_BASIC_INFORMATION info;
425 if (!::VirtualQuery((void *)addr, &info, sizeof(info))) {
426 ReportError(
427 "interception_win: VirtualQuery in AllocateTrampolineRegion failed "
428 "for %p\n",
429 (void *)addr);
430 return nullptr;
431 }
432
433 // Check whether a region can be allocated at |addr|.
434 if (info.State == MEM_FREE && info.RegionSize >= granularity) {
435 void *page =
436 ::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT,
437 PAGE_EXECUTE_READWRITE);
438 if (page == nullptr)
439 ReportError(
440 "interception_win: VirtualAlloc in AllocateTrampolineRegion failed "
441 "for %p\n",
442 (void *)addr);
443 return page;
444 }
445
446 if (addr == lo_addr)
447 lo_addr =
448 RoundDownTo((uptr)info.AllocationBase - granularity, granularity);
449 if (addr == hi_addr)
450 hi_addr =
451 RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity);
452 }
453
454 ReportError(
455 "interception_win: AllocateTrampolineRegion failed to find free memory; "
456 "min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n",
457 (void *)min_addr, (void *)max_addr, (void *)func_addr, granularity);
458 return nullptr;
459#else
460 return ::VirtualAlloc(nullptr,
461 granularity,
462 MEM_RESERVE | MEM_COMMIT,
463 PAGE_EXECUTE_READWRITE);
464#endif
465}
466
467// Used by unittests to release mapped memory space.
468void TestOnlyReleaseTrampolineRegions() {
469 for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
470 TrampolineMemoryRegion *current = &TrampolineRegions[bucket];
471 if (current->content == 0)
472 return;
473 ::VirtualFree((void*)current->content, 0, MEM_RELEASE);
474 current->content = 0;
475 }
476}
477
478static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
479# if SANITIZER_WINDOWS64
480 uptr min_addr = func_address - kTrampolineRangeLimit;
481 uptr max_addr = func_address + kTrampolineRangeLimit - size;
482
483 // Allocate memory within 2GB of the module (DLL or EXE file) so that any
484 // address within the module can be referenced with PC-relative operands.
485 // This allows us to not just jump to the trampoline with a PC-relative
486 // offset, but to relocate any instructions that we copy to the trampoline
487 // which have references to the original module. If we can't find the base
488 // address of the module (e.g. if func_address is in mmap'ed memory), just
489 // stay within 2GB of func_address.
490 HMODULE module;
491 if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
492 GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
493 (LPCWSTR)func_address, &module)) {
494 MODULEINFO module_info;
495 if (::GetModuleInformation(::GetCurrentProcess(), module,
496 &module_info, sizeof(module_info))) {
497 min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage -
498 kTrampolineRangeLimit;
499 max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size;
500 }
501 }
502
503 // Check for overflow.
504 if (min_addr > func_address)
505 min_addr = 0;
506 if (max_addr < func_address)
507 max_addr = ~(uptr)0;
508# else
509 uptr min_addr = 0;
510 uptr max_addr = ~min_addr;
511# endif
512
513 // Find a region within [min_addr,max_addr] with enough space to allocate
514 // |size| bytes.
515 TrampolineMemoryRegion *region = nullptr;
516 for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
517 TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
518 if (current->content == 0) {
519 // No valid region found, allocate a new region.
520 size_t bucket_size = GetMmapGranularity();
521 void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address,
522 bucket_size);
523 if (content == nullptr)
524 return 0U;
525
526 current->content = (uptr)content;
527 current->allocated_size = 0;
528 current->max_size = bucket_size;
529 region = current;
530 break;
531 } else if (current->max_size - current->allocated_size > size) {
532 uptr next_address = current->content + current->allocated_size;
533 if (next_address < min_addr || next_address > max_addr)
534 continue;
535 // The space can be allocated in the current region.
536 region = current;
537 break;
538 }
539 }
540
541 // Failed to find a region.
542 if (region == nullptr)
543 return 0U;
544
545 // Allocate the space in the current region.
546 uptr allocated_space = region->content + region->allocated_size;
547 region->allocated_size += size;
548 WritePadding(allocated_space, size);
549
550 return allocated_space;
551}
552
553// The following prologues cannot be patched because of the short jump
554// jumping to the patching region.
555
556// Short jump patterns below are only for x86_64.
557# if SANITIZER_WINDOWS_x64
558// ntdll!wcslen in Win11
559// 488bc1 mov rax,rcx
560// 0fb710 movzx edx,word ptr [rax]
561// 4883c002 add rax,2
562// 6685d2 test dx,dx
563// 75f4 jne -12
564static const u8 kPrologueWithShortJump1[] = {
565 0x48, 0x8b, 0xc1, 0x0f, 0xb7, 0x10, 0x48, 0x83,
566 0xc0, 0x02, 0x66, 0x85, 0xd2, 0x75, 0xf4,
567};
568
569// ntdll!strrchr in Win11
570// 4c8bc1 mov r8,rcx
571// 8a01 mov al,byte ptr [rcx]
572// 48ffc1 inc rcx
573// 84c0 test al,al
574// 75f7 jne -9
575static const u8 kPrologueWithShortJump2[] = {
576 0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1,
577 0x84, 0xc0, 0x75, 0xf7,
578};
579#endif
580
581// Returns 0 on error.
582static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
583 if (rel_offset) {
584 *rel_offset = 0;
585 }
586
587#if SANITIZER_ARM64
588 // An ARM64 instruction is 4 bytes long.
589 return 4;
590#endif
591
592# if SANITIZER_WINDOWS_x64
593 if (memcmp((u8*)address, kPrologueWithShortJump1,
594 sizeof(kPrologueWithShortJump1)) == 0 ||
595 memcmp((u8*)address, kPrologueWithShortJump2,
596 sizeof(kPrologueWithShortJump2)) == 0) {
597 return 0;
598 }
599#endif
600
601 switch (*(u64*)address) {
602 case 0x90909090909006EB: // stub: jmp over 6 x nop.
603 return 8;
604 }
605
606 switch (*(u8*)address) {
607 case 0x90: // 90 : nop
608 case 0xC3: // C3 : ret (for small/empty function interception
609 case 0xCC: // CC : int 3 i.e. registering weak functions)
610 return 1;
611
612 case 0x50: // push eax / rax
613 case 0x51: // push ecx / rcx
614 case 0x52: // push edx / rdx
615 case 0x53: // push ebx / rbx
616 case 0x54: // push esp / rsp
617 case 0x55: // push ebp / rbp
618 case 0x56: // push esi / rsi
619 case 0x57: // push edi / rdi
620 case 0x5D: // pop ebp / rbp
621 return 1;
622
623 case 0x6A: // 6A XX = push XX
624 return 2;
625
626 // This instruction can be encoded with a 16-bit immediate but that is
627 // incredibly unlikely.
628 case 0x68: // 68 XX XX XX XX : push imm32
629 return 5;
630
631 case 0xb8: // b8 XX XX XX XX : mov eax, XX XX XX XX
632 case 0xB9: // b9 XX XX XX XX : mov ecx, XX XX XX XX
633 case 0xBA: // ba XX XX XX XX : mov edx, XX XX XX XX
634 return 5;
635
636 // Cannot overwrite control-instruction. Return 0 to indicate failure.
637 case 0xE9: // E9 XX XX XX XX : jmp <label>
638 case 0xE8: // E8 XX XX XX XX : call <func>
639 case 0xEB: // EB XX : jmp XX (short jump)
640 case 0x70: // 7Y YY : jy XX (short conditional jump)
641 case 0x71:
642 case 0x72:
643 case 0x73:
644 case 0x74:
645 case 0x75:
646 case 0x76:
647 case 0x77:
648 case 0x78:
649 case 0x79:
650 case 0x7A:
651 case 0x7B:
652 case 0x7C:
653 case 0x7D:
654 case 0x7E:
655 case 0x7F:
656 return 0;
657 }
658
659 switch (*(u16*)(address)) {
660 case 0x018A: // 8A 01 : mov al, byte ptr [ecx]
661 case 0xFF8B: // 8B FF : mov edi, edi
662 case 0xEC8B: // 8B EC : mov ebp, esp
663 case 0xc889: // 89 C8 : mov eax, ecx
664 case 0xD189: // 89 D1 : mov ecx, edx
665 case 0xE589: // 89 E5 : mov ebp, esp
666 case 0xC18B: // 8B C1 : mov eax, ecx
667 case 0xC031: // 31 C0 : xor eax, eax
668 case 0xC931: // 31 C9 : xor ecx, ecx
669 case 0xD231: // 31 D2 : xor edx, edx
670 case 0xC033: // 33 C0 : xor eax, eax
671 case 0xC933: // 33 C9 : xor ecx, ecx
672 case 0xD233: // 33 D2 : xor edx, edx
673 case 0xFF33: // 33 FF : xor edi, edi
674 case 0x9066: // 66 90 : xchg %ax,%ax (Two-byte NOP)
675 case 0xDB84: // 84 DB : test bl,bl
676 case 0xC084: // 84 C0 : test al,al
677 case 0xC984: // 84 C9 : test cl,cl
678 case 0xD284: // 84 D2 : test dl,dl
679 return 2;
680
681 case 0x3980: // 80 39 XX : cmp BYTE PTR [rcx], XX
682 case 0x3a80: // 80 3A XX : cmp BYTE PTR [rdx], XX
683 case 0x4D8B: // 8B 4D XX : mov XX(%ebp), ecx
684 case 0x558B: // 8B 55 XX : mov XX(%ebp), edx
685 case 0x758B: // 8B 75 XX : mov XX(%ebp), esp
686 case 0xE483: // 83 E4 XX : and esp, XX
687 case 0xEC83: // 83 EC XX : sub esp, XX
688 case 0xC1F6: // F6 C1 XX : test cl, XX
689 return 3;
690
691 case 0x89FF: // FF 89 XX XX XX XX : dec dword ptr [ecx + XX XX XX XX]
692 case 0xEC81: // 81 EC XX XX XX XX : sub esp, XX XX XX XX
693 return 6;
694
695 // Cannot overwrite control-instruction. Return 0 to indicate failure.
696 case 0x25FF: // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
697 return 0;
698 }
699
700 switch (0x00FFFFFF & *(u32 *)address) {
701 case 0x244C8D: // 8D 4C 24 XX : lea ecx, [esp + XX]
702 case 0x2474FF: // FF 74 24 XX : push qword ptr [rsp + XX]
703 return 4;
704 case 0x24A48D: // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
705 return 7;
706 }
707
708 switch (0x000000FF & *(u32 *)address) {
709 case 0xc2: // C2 XX XX : ret XX (needed for registering weak functions)
710 return 3;
711 }
712
713# if SANITIZER_WINDOWS_x64
714 switch (*(u8*)address) {
715 case 0xA1: // A1 XX XX XX XX XX XX XX XX :
716 // movabs eax, dword ptr ds:[XXXXXXXX]
717 return 9;
718 case 0xF2:
719 switch (*(u32 *)(address + 1)) {
720 case 0x2444110f: // f2 0f 11 44 24 XX movsd QWORD PTR
721 // [rsp + XX], xmm0
722 case 0x244c110f: // f2 0f 11 4c 24 XX movsd QWORD PTR
723 // [rsp + XX], xmm1
724 case 0x2454110f: // f2 0f 11 54 24 XX movsd QWORD PTR
725 // [rsp + XX], xmm2
726 case 0x245c110f: // f2 0f 11 5c 24 XX movsd QWORD PTR
727 // [rsp + XX], xmm3
728 case 0x2464110f: // f2 0f 11 64 24 XX movsd QWORD PTR
729 // [rsp + XX], xmm4
730 return 6;
731 }
732 break;
733
734 case 0x83:
735 const u8 next_byte = *(u8*)(address + 1);
736 const u8 mod = next_byte >> 6;
737 const u8 rm = next_byte & 7;
738 if (mod == 1 && rm == 4)
739 return 5; // 83 ModR/M SIB Disp8 Imm8
740 // add|or|adc|sbb|and|sub|xor|cmp [r+disp8], imm8
741 }
742
743 switch (*(u16*)address) {
744 case 0x5040: // push rax
745 case 0x5140: // push rcx
746 case 0x5240: // push rdx
747 case 0x5340: // push rbx
748 case 0x5440: // push rsp
749 case 0x5540: // push rbp
750 case 0x5640: // push rsi
751 case 0x5740: // push rdi
752 case 0x5441: // push r12
753 case 0x5541: // push r13
754 case 0x5641: // push r14
755 case 0x5741: // push r15
756 case 0xc084: // test al, al
757 case 0x018a: // mov al, byte ptr [rcx]
758 return 2;
759
760 case 0x7E80: // 80 7E YY XX cmp BYTE PTR [rsi+YY], XX
761 case 0x7D80: // 80 7D YY XX cmp BYTE PTR [rbp+YY], XX
762 case 0x7A80: // 80 7A YY XX cmp BYTE PTR [rdx+YY], XX
763 case 0x7880: // 80 78 YY XX cmp BYTE PTR [rax+YY], XX
764 case 0x7B80: // 80 7B YY XX cmp BYTE PTR [rbx+YY], XX
765 case 0x7980: // 80 79 YY XX cmp BYTE ptr [rcx+YY], XX
766 return 4;
767
768 case 0x058A: // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
769 case 0x058B: // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
770 if (rel_offset)
771 *rel_offset = 2;
772 FALLTHROUGH;
773 case 0xB841: // 41 B8 XX XX XX XX : mov r8d, XX XX XX XX
774 return 6;
775
776 case 0x7E81: // 81 7E YY XX XX XX XX cmp DWORD PTR [rsi+YY], XX XX XX XX
777 case 0x7D81: // 81 7D YY XX XX XX XX cmp DWORD PTR [rbp+YY], XX XX XX XX
778 case 0x7A81: // 81 7A YY XX XX XX XX cmp DWORD PTR [rdx+YY], XX XX XX XX
779 case 0x7881: // 81 78 YY XX XX XX XX cmp DWORD PTR [rax+YY], XX XX XX XX
780 case 0x7B81: // 81 7B YY XX XX XX XX cmp DWORD PTR [rbx+YY], XX XX XX XX
781 case 0x7981: // 81 79 YY XX XX XX XX cmp dword ptr [rcx+YY], XX XX XX XX
782 return 7;
783
784 case 0xb848: // 48 b8 XX XX XX XX XX XX XX XX :
785 // movabsq XX XX XX XX XX XX XX XX, rax
786 case 0xba48: // 48 ba XX XX XX XX XX XX XX XX :
787 // movabsq XX XX XX XX XX XX XX XX, rdx
788 return 10;
789 }
790
791 switch (0x00FFFFFF & *(u32 *)address) {
792 case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax]
793 case 0x02b70f: // 0f b7 02 : movzx eax, WORD PTR [rdx]
794 case 0xc00b4d: // 4d 0b c0 : or r8, r8
795 case 0xc03345: // 45 33 c0 : xor r8d, r8d
796 case 0xc08548: // 48 85 c0 : test rax, rax
797 case 0xc0854d: // 4d 85 c0 : test r8, r8
798 case 0xc08b41: // 41 8b c0 : mov eax, r8d
799 case 0xc0ff48: // 48 ff c0 : inc rax
800 case 0xc0ff49: // 49 ff c0 : inc r8
801 case 0xc18b41: // 41 8b c1 : mov eax, r9d
802 case 0xc18b48: // 48 8b c1 : mov rax, rcx
803 case 0xc18b4c: // 4c 8b c1 : mov r8, rcx
804 case 0xc1ff48: // 48 ff c1 : inc rcx
805 case 0xc1ff49: // 49 ff c1 : inc r9
806 case 0xc28b41: // 41 8b c2 : mov eax, r10d
807 case 0x01b60f: // 0f b6 01 : movzx eax, BYTE PTR [rcx]
808 case 0x09b60f: // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
809 case 0x11b60f: // 0f b6 11 : movzx edx, BYTE PTR [rcx]
810 case 0xc2b60f: // 0f b6 c2 : movzx eax, dl
811 case 0xc2ff48: // 48 ff c2 : inc rdx
812 case 0xc2ff49: // 49 ff c2 : inc r10
813 case 0xc38b41: // 41 8b c3 : mov eax, r11d
814 case 0xc3ff48: // 48 ff c3 : inc rbx
815 case 0xc3ff49: // 49 ff c3 : inc r11
816 case 0xc48b41: // 41 8b c4 : mov eax, r12d
817 case 0xc48b48: // 48 8b c4 : mov rax, rsp
818 case 0xc4ff49: // 49 ff c4 : inc r12
819 case 0xc5ff49: // 49 ff c5 : inc r13
820 case 0xc6ff48: // 48 ff c6 : inc rsi
821 case 0xc6ff49: // 49 ff c6 : inc r14
822 case 0xc7ff48: // 48 ff c7 : inc rdi
823 case 0xc7ff49: // 49 ff c7 : inc r15
824 case 0xc93345: // 45 33 c9 : xor r9d, r9d
825 case 0xc98548: // 48 85 c9 : test rcx, rcx
826 case 0xc9854d: // 4d 85 c9 : test r9, r9
827 case 0xc98b4c: // 4c 8b c9 : mov r9, rcx
828 case 0xd12948: // 48 29 d1 : sub rcx, rdx
829 case 0xc22b4c: // 4c 2b c2 : sub r8, rdx
830 case 0xca2b48: // 48 2b ca : sub rcx, rdx
831 case 0xca3b48: // 48 3b ca : cmp rcx, rdx
832 case 0xd12b48: // 48 2b d1 : sub rdx, rcx
833 case 0xd18b48: // 48 8b d1 : mov rdx, rcx
834 case 0xd18b4c: // 4c 8b d1 : mov r10, rcx
835 case 0xd28548: // 48 85 d2 : test rdx, rdx
836 case 0xd2854d: // 4d 85 d2 : test r10, r10
837 case 0xd28b4c: // 4c 8b d2 : mov r10, rdx
838 case 0xd2b60f: // 0f b6 d2 : movzx edx, dl
839 case 0xd2be0f: // 0f be d2 : movsx edx, dl
840 case 0xd98b4c: // 4c 8b d9 : mov r11, rcx
841 case 0xd9f748: // 48 f7 d9 : neg rcx
842 case 0xc03145: // 45 31 c0 : xor r8d,r8d
843 case 0xc93145: // 45 31 c9 : xor r9d,r9d
844 case 0xd23345: // 45 33 d2 : xor r10d, r10d
845 case 0xdb3345: // 45 33 db : xor r11d, r11d
846 case 0xc08445: // 45 84 c0 : test r8b,r8b
847 case 0xd28445: // 45 84 d2 : test r10b,r10b
848 case 0xdb8548: // 48 85 db : test rbx, rbx
849 case 0xdb854d: // 4d 85 db : test r11, r11
850 case 0xdc8b4c: // 4c 8b dc : mov r11, rsp
851 case 0xe48548: // 48 85 e4 : test rsp, rsp
852 case 0xe4854d: // 4d 85 e4 : test r12, r12
853 case 0xc88948: // 48 89 c8 : mov rax,rcx
854 case 0xcb8948: // 48 89 cb : mov rbx,rcx
855 case 0xd08948: // 48 89 d0 : mov rax,rdx
856 case 0xd18948: // 48 89 d1 : mov rcx,rdx
857 case 0xd38948: // 48 89 d3 : mov rbx,rdx
858 case 0xe58948: // 48 89 e5 : mov rbp, rsp
859 case 0xed8548: // 48 85 ed : test rbp, rbp
860 case 0xc88949: // 49 89 c8 : mov r8, rcx
861 case 0xc98949: // 49 89 c9 : mov r9, rcx
862 case 0xca8949: // 49 89 ca : mov r10,rcx
863 case 0xd08949: // 49 89 d0 : mov r8, rdx
864 case 0xd18949: // 49 89 d1 : mov r9, rdx
865 case 0xd28949: // 49 89 d2 : mov r10, rdx
866 case 0xd38949: // 49 89 d3 : mov r11, rdx
867 case 0xed854d: // 4d 85 ed : test r13, r13
868 case 0xf6854d: // 4d 85 f6 : test r14, r14
869 case 0xff854d: // 4d 85 ff : test r15, r15
870 return 3;
871
872 case 0x245489: // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
873 case 0x428d44: // 44 8d 42 XX : lea r8d , [rdx + XX]
874 case 0x588948: // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
875 case 0xec8348: // 48 83 ec XX : sub rsp, XX
876 case 0xf88349: // 49 83 f8 XX : cmp r8, XX
877 case 0x488d49: // 49 8d 48 XX : lea rcx, [...]
878 case 0x048d4c: // 4c 8d 04 XX : lea r8, [...]
879 case 0x148d4e: // 4e 8d 14 XX : lea r10, [...]
880 case 0x398366: // 66 83 39 XX : cmp WORD PTR [rcx], XX
881 return 4;
882
883 case 0x441F0F: // 0F 1F 44 XX XX : nop DWORD PTR [...]
884 case 0x246483: // 83 64 24 XX YY : and DWORD PTR [rsp+XX], YY
885 return 5;
886
887 case 0x788166: // 66 81 78 XX YY YY cmp WORD PTR [rax+XX], YY YY
888 case 0x798166: // 66 81 79 XX YY YY cmp WORD PTR [rcx+XX], YY YY
889 case 0x7a8166: // 66 81 7a XX YY YY cmp WORD PTR [rdx+XX], YY YY
890 case 0x7b8166: // 66 81 7b XX YY YY cmp WORD PTR [rbx+XX], YY YY
891 case 0x7e8166: // 66 81 7e XX YY YY cmp WORD PTR [rsi+XX], YY YY
892 case 0x7f8166: // 66 81 7f XX YY YY cmp WORD PTR [rdi+XX], YY YY
893 return 6;
894
895 case 0xec8148: // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
896 case 0xc0c748: // 48 C7 C0 XX XX XX XX : mov rax, XX XX XX XX
897 return 7;
898
899 // clang-format off
900 case 0x788141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
901 case 0x798141: // 41 81 79 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
902 case 0x7a8141: // 41 81 7a XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
903 case 0x7b8141: // 41 81 7b XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
904 case 0x7d8141: // 41 81 7d XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
905 case 0x7e8141: // 41 81 7e XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
906 case 0x7f8141: // 41 81 7f YY XX XX XX XX : cmp DWORD PTR [r15+YY], XX XX XX XX
907 case 0x247c81: // 81 7c 24 YY XX XX XX XX : cmp DWORD PTR [rsp+YY], XX XX XX XX
908 return 8;
909 // clang-format on
910
911 case 0x058b48: // 48 8b 05 XX XX XX XX :
912 // mov rax, QWORD PTR [rip + XXXXXXXX]
913 case 0x058d48: // 48 8d 05 XX XX XX XX :
914 // lea rax, QWORD PTR [rip + XXXXXXXX]
915 case 0x0d8948: // 48 89 0d XX XX XX XX :
916 // mov QWORD PTR [rip + XXXXXXXX], rcx
917 case 0x158948: // 48 89 15 XX XX XX XX :
918 // mov QWORD PTR [rip + XXXXXXXX], rdx
919 case 0x25ff48: // 48 ff 25 XX XX XX XX :
920 // rex.W jmp QWORD PTR [rip + XXXXXXXX]
921 case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
922 // Instructions having offset relative to 'rip' need offset adjustment.
923 if (rel_offset)
924 *rel_offset = 3;
925 return 7;
926
927 case 0x2444c7: // C7 44 24 XX YY YY YY YY
928 // mov dword ptr [rsp + XX], YYYYYYYY
929 return 8;
930
931 case 0x7c8141: // 41 81 7c ZZ YY XX XX XX XX
932 // cmp DWORD PTR [reg+reg*n+YY], XX XX XX XX
933 return 9;
934 }
935
936 switch (*(u32*)(address)) {
937 case 0x01b60f44: // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
938 case 0x09b60f44: // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
939 case 0x0ab60f44: // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
940 case 0x11b60f44: // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
941 case 0x1ab60f44: // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
942 return 4;
943 case 0x24448b48: // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
944 case 0x246c8948: // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
945 case 0x245c8948: // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
946 case 0x24748948: // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
947 case 0x247c8948: // 48 89 7c 24 XX : mov QWORD PTR [rsp + XX], rdi
948 case 0x244C8948: // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx
949 case 0x24548948: // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
950 case 0x244c894c: // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
951 case 0x2444894c: // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
952 case 0x244c8944: // 44 89 4c 24 XX mov DWORD PTR [rsp + XX], r9d
953 case 0x24448944: // 44 89 44 24 XX mov DWORD PTR [rsp + XX], r8d
954 case 0x246c8d48: // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
955 return 5;
956 case 0x24648348: // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
957 return 6;
958 case 0x24A48D48: // 48 8D A4 24 XX XX XX XX : lea rsp, [rsp + XX XX XX XX]
959 return 8;
960 }
961
962 switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
963 case 0xC07E0F4866: // 66 48 0F 7E C0 : movq rax, xmm0
964 return 5;
965 }
966
967#else
968
969 switch (*(u8*)address) {
970 case 0xA1: // A1 XX XX XX XX : mov eax, dword ptr ds:[XXXXXXXX]
971 return 5;
972 }
973 switch (*(u16*)address) {
974 case 0x458B: // 8B 45 XX : mov eax, dword ptr [ebp + XX]
975 case 0x5D8B: // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
976 case 0x7D8B: // 8B 7D XX : mov edi, dword ptr [ebp + XX]
977 case 0x758B: // 8B 75 XX : mov esi, dword ptr [ebp + XX]
978 case 0x75FF: // FF 75 XX : push dword ptr [ebp + XX]
979 return 3;
980 case 0xC1F7: // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
981 return 6;
982 case 0x3D83: // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
983 return 7;
984 case 0x7D83: // 83 7D XX YY : cmp dword ptr [ebp + XX], YY
985 return 4;
986 }
987
988 switch (0x00FFFFFF & *(u32*)address) {
989 case 0x24448A: // 8A 44 24 XX : mov eal, dword ptr [esp + XX]
990 case 0x24448B: // 8B 44 24 XX : mov eax, dword ptr [esp + XX]
991 case 0x244C8B: // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX]
992 case 0x24548B: // 8B 54 24 XX : mov edx, dword ptr [esp + XX]
993 case 0x245C8B: // 8B 5C 24 XX : mov ebx, dword ptr [esp + XX]
994 case 0x246C8B: // 8B 6C 24 XX : mov ebp, dword ptr [esp + XX]
995 case 0x24748B: // 8B 74 24 XX : mov esi, dword ptr [esp + XX]
996 case 0x247C8B: // 8B 7C 24 XX : mov edi, dword ptr [esp + XX]
997 return 4;
998 }
999
1000 switch (*(u32*)address) {
1001 case 0x2444B60F: // 0F B6 44 24 XX : movzx eax, byte ptr [esp + XX]
1002 return 5;
1003 }
1004#endif
1005
1006 // Unknown instruction! This might happen when we add a new interceptor, use
1007 // a new compiler version, or if Windows changed how some functions are
1008 // compiled. In either case, we print the address and 8 bytes of instructions
1009 // to notify the user about the error and to help identify the unknown
1010 // instruction. Don't treat this as a fatal error, though we can break the
1011 // debugger if one has been attached.
1012 u8 *bytes = (u8 *)address;
1013 ReportError(
1014 "interception_win: unhandled instruction at %p: %02x %02x %02x %02x %02x "
1015 "%02x %02x %02x\n",
1016 (void *)address, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4],
1017 bytes[5], bytes[6], bytes[7]);
1018 if (::IsDebuggerPresent())
1019 __debugbreak();
1020 return 0;
1021}
1022
1023size_t TestOnlyGetInstructionSize(uptr address, size_t *rel_offset) {
1024 return GetInstructionSize(address, rel_offset);
1025}
1026
1027// Returns 0 on error.
1028static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
1029 size_t cursor = 0;
1030 while (cursor < size) {
1031 size_t instruction_size = GetInstructionSize(address + cursor);
1032 if (!instruction_size)
1033 return 0;
1034 cursor += instruction_size;
1035 }
1036 return cursor;
1037}
1038
1039static bool CopyInstructions(uptr to, uptr from, size_t size) {
1040 size_t cursor = 0;
1041 while (cursor != size) {
1042 size_t rel_offset = 0;
1043 size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
1044 if (!instruction_size)
1045 return false;
1046 _memcpy((void *)(to + cursor), (void *)(from + cursor),
1047 (size_t)instruction_size);
1048 if (rel_offset) {
1049# if SANITIZER_WINDOWS64
1050 // we want to make sure that the new relative offset still fits in 32-bits
1051 // this will be untrue if relocated_offset \notin [-2**31, 2**31)
1052 s64 delta = to - from;
1053 s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1054 if (-0x8000'0000ll > relocated_offset ||
1055 relocated_offset > 0x7FFF'FFFFll) {
1056 ReportError(
1057 "interception_win: CopyInstructions relocated_offset %lld outside "
1058 "32-bit range\n",
1059 (long long)relocated_offset);
1060 return false;
1061 }
1062# else
1063 // on 32-bit, the relative offset will always be correct
1064 s32 delta = to - from;
1065 s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1066# endif
1067 *(s32 *)(to + cursor + rel_offset) = relocated_offset;
1068 }
1069 cursor += instruction_size;
1070 }
1071 return true;
1072}
1073
1074
1075#if !SANITIZER_WINDOWS64
1076bool OverrideFunctionWithDetour(
1077 uptr old_func, uptr new_func, uptr *orig_old_func) {
1078 const int kDetourHeaderLen = 5;
1079 const u16 kDetourInstruction = 0xFF8B;
1080
1081 uptr header = (uptr)old_func - kDetourHeaderLen;
1082 uptr patch_length = kDetourHeaderLen + kShortJumpInstructionLength;
1083
1084 // Validate that the function is hookable.
1085 if (*(u16*)old_func != kDetourInstruction ||
1086 !IsMemoryPadding(header, kDetourHeaderLen))
1087 return false;
1088
1089 // Change memory protection to writable.
1090 DWORD protection = 0;
1091 if (!ChangeMemoryProtection(header, patch_length, &protection))
1092 return false;
1093
1094 // Write a relative jump to the redirected function.
1095 WriteJumpInstruction(header, new_func);
1096
1097 // Write the short jump to the function prefix.
1098 WriteShortJumpInstruction(old_func, header);
1099
1100 // Restore previous memory protection.
1101 if (!RestoreMemoryProtection(header, patch_length, protection))
1102 return false;
1103
1104 if (orig_old_func)
1105 *orig_old_func = old_func + kShortJumpInstructionLength;
1106
1107 return true;
1108}
1109#endif
1110
1111bool OverrideFunctionWithRedirectJump(
1112 uptr old_func, uptr new_func, uptr *orig_old_func) {
1113 // Check whether the first instruction is a relative jump.
1114 if (*(u8*)old_func != 0xE9)
1115 return false;
1116
1117 if (orig_old_func) {
1118 sptr relative_offset = *(s32 *)(old_func + 1);
1119 uptr absolute_target = old_func + relative_offset + kJumpInstructionLength;
1120 *orig_old_func = absolute_target;
1121 }
1122
1123#if SANITIZER_WINDOWS64
1124 // If needed, get memory space for a trampoline jump.
1125 uptr trampoline = AllocateMemoryForTrampoline(old_func, kDirectBranchLength);
1126 if (!trampoline)
1127 return false;
1128 WriteDirectBranch(trampoline, new_func);
1129#endif
1130
1131 // Change memory protection to writable.
1132 DWORD protection = 0;
1133 if (!ChangeMemoryProtection(old_func, kJumpInstructionLength, &protection))
1134 return false;
1135
1136 // Write a relative jump to the redirected function.
1137 WriteJumpInstruction(old_func, FIRST_32_SECOND_64(new_func, trampoline));
1138
1139 // Restore previous memory protection.
1140 if (!RestoreMemoryProtection(old_func, kJumpInstructionLength, protection))
1141 return false;
1142
1143 return true;
1144}
1145
1146bool OverrideFunctionWithHotPatch(
1147 uptr old_func, uptr new_func, uptr *orig_old_func) {
1148 const int kHotPatchHeaderLen = kBranchLength;
1149
1150 uptr header = (uptr)old_func - kHotPatchHeaderLen;
1151 uptr patch_length = kHotPatchHeaderLen + kShortJumpInstructionLength;
1152
1153 // Validate that the function is hot patchable.
1154 size_t instruction_size = GetInstructionSize(old_func);
1155 if (instruction_size < kShortJumpInstructionLength ||
1156 !FunctionHasPadding(old_func, kHotPatchHeaderLen))
1157 return false;
1158
1159 if (orig_old_func) {
1160 // Put the needed instructions into the trampoline bytes.
1161 uptr trampoline_length = instruction_size + kDirectBranchLength;
1162 uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1163 if (!trampoline)
1164 return false;
1165 if (!CopyInstructions(trampoline, old_func, instruction_size))
1166 return false;
1167 WriteDirectBranch(trampoline + instruction_size,
1168 old_func + instruction_size);
1169 *orig_old_func = trampoline;
1170 }
1171
1172 // If needed, get memory space for indirect address.
1173 uptr indirect_address = 0;
1174#if SANITIZER_WINDOWS64
1175 indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1176 if (!indirect_address)
1177 return false;
1178#endif
1179
1180 // Change memory protection to writable.
1181 DWORD protection = 0;
1182 if (!ChangeMemoryProtection(header, patch_length, &protection))
1183 return false;
1184
1185 // Write jumps to the redirected function.
1186 WriteBranch(header, indirect_address, new_func);
1187 WriteShortJumpInstruction(old_func, header);
1188
1189 // Restore previous memory protection.
1190 if (!RestoreMemoryProtection(header, patch_length, protection))
1191 return false;
1192
1193 return true;
1194}
1195
1196bool OverrideFunctionWithTrampoline(
1197 uptr old_func, uptr new_func, uptr *orig_old_func) {
1198
1199 size_t instructions_length = kBranchLength;
1200 size_t padding_length = 0;
1201 uptr indirect_address = 0;
1202
1203 if (orig_old_func) {
1204 // Find out the number of bytes of the instructions we need to copy
1205 // to the trampoline.
1206 instructions_length = RoundUpToInstrBoundary(kBranchLength, old_func);
1207 if (!instructions_length)
1208 return false;
1209
1210 // Put the needed instructions into the trampoline bytes.
1211 uptr trampoline_length = instructions_length + kDirectBranchLength;
1212 uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1213 if (!trampoline)
1214 return false;
1215 if (!CopyInstructions(trampoline, old_func, instructions_length))
1216 return false;
1217 WriteDirectBranch(trampoline + instructions_length,
1218 old_func + instructions_length);
1219 *orig_old_func = trampoline;
1220 }
1221
1222#if SANITIZER_WINDOWS64
1223 // Check if the targeted address can be encoded in the function padding.
1224 // Otherwise, allocate it in the trampoline region.
1225 if (IsMemoryPadding(old_func - kAddressLength, kAddressLength)) {
1226 indirect_address = old_func - kAddressLength;
1227 padding_length = kAddressLength;
1228 } else {
1229 indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1230 if (!indirect_address)
1231 return false;
1232 }
1233#endif
1234
1235 // Change memory protection to writable.
1236 uptr patch_address = old_func - padding_length;
1237 uptr patch_length = instructions_length + padding_length;
1238 DWORD protection = 0;
1239 if (!ChangeMemoryProtection(patch_address, patch_length, &protection))
1240 return false;
1241
1242 // Patch the original function.
1243 WriteBranch(old_func, indirect_address, new_func);
1244
1245 // Restore previous memory protection.
1246 if (!RestoreMemoryProtection(patch_address, patch_length, protection))
1247 return false;
1248
1249 return true;
1250}
1251
1252bool OverrideFunction(
1253 uptr old_func, uptr new_func, uptr *orig_old_func) {
1254#if !SANITIZER_WINDOWS64
1255 if (OverrideFunctionWithDetour(old_func, new_func, orig_old_func))
1256 return true;
1257#endif
1258 if (OverrideFunctionWithRedirectJump(old_func, new_func, orig_old_func))
1259 return true;
1260 if (OverrideFunctionWithHotPatch(old_func, new_func, orig_old_func))
1261 return true;
1262 if (OverrideFunctionWithTrampoline(old_func, new_func, orig_old_func))
1263 return true;
1264 return false;
1265}
1266
1267static void **InterestingDLLsAvailable() {
1268 static const char *InterestingDLLs[] = {
1269 "kernel32.dll",
1270 "msvcr100d.dll", // VS2010
1271 "msvcr110d.dll", // VS2012
1272 "msvcr120d.dll", // VS2013
1273 "vcruntime140d.dll", // VS2015
1274 "ucrtbased.dll", // Universal CRT
1275 "msvcr100.dll", // VS2010
1276 "msvcr110.dll", // VS2012
1277 "msvcr120.dll", // VS2013
1278 "vcruntime140.dll", // VS2015
1279 "ucrtbase.dll", // Universal CRT
1280# if (defined(__MINGW32__) && defined(__i386__))
1281 "libc++.dll", // libc++
1282 "libunwind.dll", // libunwind
1283# endif
1284 // NTDLL must go last as it gets special treatment in OverrideFunction.
1285 "ntdll.dll",
1286 NULL
1287 };
1288 static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
1289 if (!result[0]) {
1290 for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
1291 if (HMODULE h = GetModuleHandleA(InterestingDLLs[i]))
1292 result[j++] = (void *)h;
1293 }
1294 }
1295 return &result[0];
1296}
1297
1298namespace {
1299// Utility for reading loaded PE images.
1300template <typename T> class RVAPtr {
1301 public:
1302 RVAPtr(void *module, uptr rva)
1303 : ptr_(reinterpret_cast<T *>(reinterpret_cast<char *>(module) + rva)) {}
1304 operator T *() { return ptr_; }
1305 T *operator->() { return ptr_; }
1306 T *operator++() { return ++ptr_; }
1307
1308 private:
1309 T *ptr_;
1310};
1311} // namespace
1312
1313// Internal implementation of GetProcAddress. At least since Windows 8,
1314// GetProcAddress appears to initialize DLLs before returning function pointers
1315// into them. This is problematic for the sanitizers, because they typically
1316// want to intercept malloc *before* MSVCRT initializes. Our internal
1317// implementation walks the export list manually without doing initialization.
1318uptr InternalGetProcAddress(void *module, const char *func_name) {
1319 // Check that the module header is full and present.
1320 RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1321 RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1322 if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE || // "MZ"
1323 headers->Signature != IMAGE_NT_SIGNATURE || // "PE\0\0"
1324 headers->FileHeader.SizeOfOptionalHeader <
1325 sizeof(IMAGE_OPTIONAL_HEADER)) {
1326 return 0;
1327 }
1328
1329 IMAGE_DATA_DIRECTORY *export_directory =
1330 &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
1331 if (export_directory->Size == 0)
1332 return 0;
1333 RVAPtr<IMAGE_EXPORT_DIRECTORY> exports(module,
1334 export_directory->VirtualAddress);
1335 RVAPtr<DWORD> functions(module, exports->AddressOfFunctions);
1336 RVAPtr<DWORD> names(module, exports->AddressOfNames);
1337 RVAPtr<WORD> ordinals(module, exports->AddressOfNameOrdinals);
1338
1339 for (DWORD i = 0; i < exports->NumberOfNames; i++) {
1340 RVAPtr<char> name(module, names[i]);
1341 if (!_strcmp(func_name, name)) {
1342 DWORD index = ordinals[i];
1343 RVAPtr<char> func(module, functions[index]);
1344
1345 // Handle forwarded functions.
1346 DWORD offset = functions[index];
1347 if (offset >= export_directory->VirtualAddress &&
1348 offset < export_directory->VirtualAddress + export_directory->Size) {
1349 // An entry for a forwarded function is a string with the following
1350 // format: "<module> . <function_name>" that is stored into the
1351 // exported directory.
1352 char function_name[256];
1353 size_t funtion_name_length = _strlen(func);
1354 if (funtion_name_length >= sizeof(function_name) - 1) {
1355 ReportError("interception_win: func too long: '%s'\n", (char *)func);
1356 InterceptionFailed();
1357 }
1358
1359 _memcpy(function_name, func, funtion_name_length);
1360 function_name[funtion_name_length] = '\0';
1361 char* separator = _strchr(function_name, '.');
1362 if (!separator) {
1363 ReportError("interception_win: no separator in '%s'\n",
1364 function_name);
1365 InterceptionFailed();
1366 }
1367 *separator = '\0';
1368
1369 void* redirected_module = GetModuleHandleA(function_name);
1370 if (!redirected_module) {
1371 ReportError("interception_win: GetModuleHandleA failed for '%s'\n",
1372 function_name);
1373 InterceptionFailed();
1374 }
1375 return InternalGetProcAddress(redirected_module, separator + 1);
1376 }
1377
1378 return (uptr)(char *)func;
1379 }
1380 }
1381
1382 return 0;
1383}
1384
1385bool OverrideFunction(
1386 const char *func_name, uptr new_func, uptr *orig_old_func) {
1387 static const char *kNtDllIgnore[] = {
1388 "memcmp", "memcpy", "memmove", "memset"
1389 };
1390
1391 bool hooked = false;
1392 void **DLLs = InterestingDLLsAvailable();
1393 for (size_t i = 0; DLLs[i]; ++i) {
1394 if (DLLs[i + 1] == nullptr) {
1395 // This is the last DLL, i.e. NTDLL. It exports some functions that
1396 // we only want to override in the CRT.
1397 for (const char *ignored : kNtDllIgnore) {
1398 if (_strcmp(func_name, ignored) == 0)
1399 return hooked;
1400 }
1401 }
1402
1403 uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
1404 if (func_addr &&
1405 OverrideFunction(func_addr, new_func, orig_old_func)) {
1406 hooked = true;
1407 }
1408 }
1409 return hooked;
1410}
1411
1412bool OverrideImportedFunction(const char *module_to_patch,
1413 const char *imported_module,
1414 const char *function_name, uptr new_function,
1415 uptr *orig_old_func) {
1416 HMODULE module = GetModuleHandleA(module_to_patch);
1417 if (!module)
1418 return false;
1419
1420 // Check that the module header is full and present.
1421 RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1422 RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1423 if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE || // "MZ"
1424 headers->Signature != IMAGE_NT_SIGNATURE || // "PE\0\0"
1425 headers->FileHeader.SizeOfOptionalHeader <
1426 sizeof(IMAGE_OPTIONAL_HEADER)) {
1427 return false;
1428 }
1429
1430 IMAGE_DATA_DIRECTORY *import_directory =
1431 &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
1432
1433 // Iterate the list of imported DLLs. FirstThunk will be null for the last
1434 // entry.
1435 RVAPtr<IMAGE_IMPORT_DESCRIPTOR> imports(module,
1436 import_directory->VirtualAddress);
1437 for (; imports->FirstThunk != 0; ++imports) {
1438 RVAPtr<const char> modname(module, imports->Name);
1439 if (_stricmp(&*modname, imported_module) == 0)
1440 break;
1441 }
1442 if (imports->FirstThunk == 0)
1443 return false;
1444
1445 // We have two parallel arrays: the import address table (IAT) and the table
1446 // of names. They start out containing the same data, but the loader rewrites
1447 // the IAT to hold imported addresses and leaves the name table in
1448 // OriginalFirstThunk alone.
1449 RVAPtr<IMAGE_THUNK_DATA> name_table(module, imports->OriginalFirstThunk);
1450 RVAPtr<IMAGE_THUNK_DATA> iat(module, imports->FirstThunk);
1451 for (; name_table->u1.Ordinal != 0; ++name_table, ++iat) {
1452 if (!IMAGE_SNAP_BY_ORDINAL(name_table->u1.Ordinal)) {
1453 RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
1454 module, name_table->u1.ForwarderString);
1455 const char *funcname = &import_by_name->Name[0];
1456 if (_strcmp(funcname, function_name) == 0)
1457 break;
1458 }
1459 }
1460 if (name_table->u1.Ordinal == 0)
1461 return false;
1462
1463 // Now we have the correct IAT entry. Do the swap. We have to make the page
1464 // read/write first.
1465 if (orig_old_func)
1466 *orig_old_func = iat->u1.AddressOfData;
1467 DWORD old_prot, unused_prot;
1468 if (!VirtualProtect(&iat->u1.AddressOfData, 4, PAGE_EXECUTE_READWRITE,
1469 &old_prot))
1470 return false;
1471 iat->u1.AddressOfData = new_function;
1472 if (!VirtualProtect(&iat->u1.AddressOfData, 4, old_prot, &unused_prot))
1473 return false; // Not clear if this failure bothers us.
1474 return true;
1475}
1476
1477} // namespace __interception
1478
1479#endif // SANITIZER_WINDOWS
1480