1//===-- interception_win.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of AddressSanitizer, an address sanity checker.
10//
11// Windows-specific interception methods.
12//
13// This file is implementing several hooking techniques to intercept calls
14// to functions. The hooks are dynamically installed by modifying the assembly
15// code.
16//
17// The hooking techniques are making assumptions on the way the code is
18// generated and are safe under these assumptions.
19//
20// On 64-bit architecture, there is no direct 64-bit jump instruction. To allow
21// arbitrary branching on the whole memory space, the notion of trampoline
22// region is used. A trampoline region is a memory space withing 2G boundary
23// where it is safe to add custom assembly code to build 64-bit jumps.
24//
25// Hooking techniques
26// ==================
27//
28// 1) Detour
29//
30// The Detour hooking technique is assuming the presence of a header with
31// padding and an overridable 2-bytes nop instruction (mov edi, edi). The
32// nop instruction can safely be replaced by a 2-bytes jump without any need
33// to save the instruction. A jump to the target is encoded in the function
34// header and the nop instruction is replaced by a short jump to the header.
35//
36// head: 5 x nop head: jmp <hook>
37// func: mov edi, edi --> func: jmp short <head>
38// [...] real: [...]
39//
40// This technique is only implemented on 32-bit architecture.
41// Most of the time, Windows API are hookable with the detour technique.
42//
43// 2) Redirect Jump
44//
45// The redirect jump is applicable when the first instruction is a direct
46// jump. The instruction is replaced by jump to the hook.
47//
48// func: jmp <label> --> func: jmp <hook>
49//
50// On a 64-bit architecture, a trampoline is inserted.
51//
52// func: jmp <label> --> func: jmp <tramp>
53// [...]
54//
55// [trampoline]
56// tramp: jmp QWORD [addr]
57// addr: .bytes <hook>
58//
59// Note: <real> is equivalent to <label>.
60//
61// 3) HotPatch
62//
63// The HotPatch hooking is assuming the presence of a header with padding
64// and a first instruction with at least 2-bytes.
65//
66// The reason to enforce the 2-bytes limitation is to provide the minimal
67// space to encode a short jump. HotPatch technique is only rewriting one
68// instruction to avoid breaking a sequence of instructions containing a
69// branching target.
70//
71// Assumptions are enforced by MSVC compiler by using the /HOTPATCH flag.
72// see: https://msdn.microsoft.com/en-us/library/ms173507.aspx
73// Default padding length is 5 bytes in 32-bits and 6 bytes in 64-bits.
74//
75// head: 5 x nop head: jmp <hook>
76// func: <instr> --> func: jmp short <head>
77// [...] body: [...]
78//
79// [trampoline]
80// real: <instr>
81// jmp <body>
82//
83// On a 64-bit architecture:
84//
85// head: 6 x nop head: jmp QWORD [addr1]
86// func: <instr> --> func: jmp short <head>
87// [...] body: [...]
88//
89// [trampoline]
90// addr1: .bytes <hook>
91// real: <instr>
92// jmp QWORD [addr2]
93// addr2: .bytes <body>
94//
95// 4) Trampoline
96//
97// The Trampoline hooking technique is the most aggressive one. It is
98// assuming that there is a sequence of instructions that can be safely
99// replaced by a jump (enough room and no incoming branches).
100//
101// Unfortunately, these assumptions can't be safely presumed and code may
102// be broken after hooking.
103//
104// func: <instr> --> func: jmp <hook>
105// <instr>
106// [...] body: [...]
107//
108// [trampoline]
109// real: <instr>
110// <instr>
111// jmp <body>
112//
113// On a 64-bit architecture:
114//
115// func: <instr> --> func: jmp QWORD [addr1]
116// <instr>
117// [...] body: [...]
118//
119// [trampoline]
120// addr1: .bytes <hook>
121// real: <instr>
122// <instr>
123// jmp QWORD [addr2]
124// addr2: .bytes <body>
125//===----------------------------------------------------------------------===//
126
127#include "interception.h"
128
129#if SANITIZER_WINDOWS
130#include "sanitizer_common/sanitizer_platform.h"
131#define WIN32_LEAN_AND_MEAN
132#include <windows.h>
133#include <psapi.h>
134
135namespace __interception {
136
137static const int kAddressLength = FIRST_32_SECOND_64(4, 8);
138static const int kJumpInstructionLength = 5;
139static const int kShortJumpInstructionLength = 2;
140UNUSED static const int kIndirectJumpInstructionLength = 6;
141static const int kBranchLength =
142 FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength);
143static const int kDirectBranchLength = kBranchLength + kAddressLength;
144
145# if defined(_MSC_VER)
146# define INTERCEPTION_FORMAT(f, a)
147# else
148# define INTERCEPTION_FORMAT(f, a) __attribute__((format(printf, f, a)))
149# endif
150
151static void (*ErrorReportCallback)(const char *format, ...)
152 INTERCEPTION_FORMAT(1, 2);
153
154void SetErrorReportCallback(void (*callback)(const char *format, ...)) {
155 ErrorReportCallback = callback;
156}
157
158# define ReportError(...) \
159 do { \
160 if (ErrorReportCallback) \
161 ErrorReportCallback(__VA_ARGS__); \
162 } while (0)
163
164static void InterceptionFailed() {
165 ReportError("interception_win: failed due to an unrecoverable error.\n");
166 // This acts like an abort when no debugger is attached. According to an old
167 // comment, calling abort() leads to an infinite recursion in CheckFailed.
168 __debugbreak();
169}
170
171static bool DistanceIsWithin2Gig(uptr from, uptr target) {
172#if SANITIZER_WINDOWS64
173 if (from < target)
174 return target - from <= (uptr)0x7FFFFFFFU;
175 else
176 return from - target <= (uptr)0x80000000U;
177#else
178 // In a 32-bit address space, the address calculation will wrap, so this check
179 // is unnecessary.
180 return true;
181#endif
182}
183
184static uptr GetMmapGranularity() {
185 SYSTEM_INFO si;
186 GetSystemInfo(&si);
187 return si.dwAllocationGranularity;
188}
189
190UNUSED static uptr RoundDownTo(uptr size, uptr boundary) {
191 return size & ~(boundary - 1);
192}
193
194UNUSED static uptr RoundUpTo(uptr size, uptr boundary) {
195 return RoundDownTo(size + boundary - 1, boundary);
196}
197
198// FIXME: internal_str* and internal_mem* functions should be moved from the
199// ASan sources into interception/.
200
201static size_t _strlen(const char *str) {
202 const char* p = str;
203 while (*p != '\0') ++p;
204 return p - str;
205}
206
207static char* _strchr(char* str, char c) {
208 while (*str) {
209 if (*str == c)
210 return str;
211 ++str;
212 }
213 return nullptr;
214}
215
216static int _strcmp(const char *s1, const char *s2) {
217 while (true) {
218 unsigned c1 = *s1;
219 unsigned c2 = *s2;
220 if (c1 != c2) return (c1 < c2) ? -1 : 1;
221 if (c1 == 0) break;
222 s1++;
223 s2++;
224 }
225 return 0;
226}
227
228static void _memset(void *p, int value, size_t sz) {
229 for (size_t i = 0; i < sz; ++i)
230 ((char*)p)[i] = (char)value;
231}
232
233static void _memcpy(void *dst, void *src, size_t sz) {
234 char *dst_c = (char*)dst,
235 *src_c = (char*)src;
236 for (size_t i = 0; i < sz; ++i)
237 dst_c[i] = src_c[i];
238}
239
240static bool ChangeMemoryProtection(
241 uptr address, uptr size, DWORD *old_protection) {
242 return ::VirtualProtect((void*)address, size,
243 PAGE_EXECUTE_READWRITE,
244 old_protection) != FALSE;
245}
246
247static bool RestoreMemoryProtection(
248 uptr address, uptr size, DWORD old_protection) {
249 DWORD unused;
250 return ::VirtualProtect((void*)address, size,
251 old_protection,
252 &unused) != FALSE;
253}
254
255static bool IsMemoryPadding(uptr address, uptr size) {
256 u8* function = (u8*)address;
257 for (size_t i = 0; i < size; ++i)
258 if (function[i] != 0x90 && function[i] != 0xCC)
259 return false;
260 return true;
261}
262
263static const u8 kHintNop8Bytes[] = {
264 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
265};
266
267template<class T>
268static bool FunctionHasPrefix(uptr address, const T &pattern) {
269 u8* function = (u8*)address - sizeof(pattern);
270 for (size_t i = 0; i < sizeof(pattern); ++i)
271 if (function[i] != pattern[i])
272 return false;
273 return true;
274}
275
276static bool FunctionHasPadding(uptr address, uptr size) {
277 if (IsMemoryPadding(address - size, size))
278 return true;
279 if (size <= sizeof(kHintNop8Bytes) &&
280 FunctionHasPrefix(address, kHintNop8Bytes))
281 return true;
282 return false;
283}
284
285static void WritePadding(uptr from, uptr size) {
286 _memset((void*)from, 0xCC, (size_t)size);
287}
288
289static void WriteJumpInstruction(uptr from, uptr target) {
290 if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) {
291 ReportError(
292 "interception_win: cannot write jmp further than 2GB away, from %p to "
293 "%p.\n",
294 (void *)from, (void *)target);
295 InterceptionFailed();
296 }
297 ptrdiff_t offset = target - from - kJumpInstructionLength;
298 *(u8*)from = 0xE9;
299 *(u32*)(from + 1) = offset;
300}
301
302static void WriteShortJumpInstruction(uptr from, uptr target) {
303 sptr offset = target - from - kShortJumpInstructionLength;
304 if (offset < -128 || offset > 127) {
305 ReportError("interception_win: cannot write short jmp from %p to %p\n",
306 (void *)from, (void *)target);
307 InterceptionFailed();
308 }
309 *(u8*)from = 0xEB;
310 *(u8*)(from + 1) = (u8)offset;
311}
312
313#if SANITIZER_WINDOWS64
314static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) {
315 // jmp [rip + <offset>] = FF 25 <offset> where <offset> is a relative
316 // offset.
317 // The offset is the distance from then end of the jump instruction to the
318 // memory location containing the targeted address. The displacement is still
319 // 32-bit in x64, so indirect_target must be located within +/- 2GB range.
320 int offset = indirect_target - from - kIndirectJumpInstructionLength;
321 if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength,
322 indirect_target)) {
323 ReportError(
324 "interception_win: cannot write indirect jmp with target further than "
325 "2GB away, from %p to %p.\n",
326 (void *)from, (void *)indirect_target);
327 InterceptionFailed();
328 }
329 *(u16*)from = 0x25FF;
330 *(u32*)(from + 2) = offset;
331}
332#endif
333
334static void WriteBranch(
335 uptr from, uptr indirect_target, uptr target) {
336#if SANITIZER_WINDOWS64
337 WriteIndirectJumpInstruction(from, indirect_target);
338 *(u64*)indirect_target = target;
339#else
340 (void)indirect_target;
341 WriteJumpInstruction(from, target);
342#endif
343}
344
345static void WriteDirectBranch(uptr from, uptr target) {
346#if SANITIZER_WINDOWS64
347 // Emit an indirect jump through immediately following bytes:
348 // jmp [rip + kBranchLength]
349 // .quad <target>
350 WriteBranch(from, from + kBranchLength, target);
351#else
352 WriteJumpInstruction(from, target);
353#endif
354}
355
356struct TrampolineMemoryRegion {
357 uptr content;
358 uptr allocated_size;
359 uptr max_size;
360};
361
362UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31; // 2 gig
363static const int kMaxTrampolineRegion = 1024;
364static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
365
366static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr,
367 uptr func_addr, size_t granularity) {
368# if SANITIZER_WINDOWS64
369 // Clamp {min,max}_addr to the accessible address space.
370 SYSTEM_INFO system_info;
371 ::GetSystemInfo(&system_info);
372 uptr min_virtual_addr =
373 RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity);
374 uptr max_virtual_addr =
375 RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity);
376 if (min_addr < min_virtual_addr)
377 min_addr = min_virtual_addr;
378 if (max_addr > max_virtual_addr)
379 max_addr = max_virtual_addr;
380
381 // This loop probes the virtual address space to find free memory in the
382 // [min_addr, max_addr] interval. The search starts from func_addr and
383 // proceeds "outwards" towards the interval bounds using two probes, lo_addr
384 // and hi_addr, for addresses lower/higher than func_addr. At each step, it
385 // considers the probe closest to func_addr. If that address is not free, the
386 // probe is advanced (lower or higher depending on the probe) to the next
387 // memory block and the search continues.
388 uptr lo_addr = RoundDownTo(func_addr, granularity);
389 uptr hi_addr = RoundUpTo(func_addr, granularity);
390 while (lo_addr >= min_addr || hi_addr <= max_addr) {
391 // Consider the in-range address closest to func_addr.
392 uptr addr;
393 if (lo_addr < min_addr)
394 addr = hi_addr;
395 else if (hi_addr > max_addr)
396 addr = lo_addr;
397 else
398 addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr;
399
400 MEMORY_BASIC_INFORMATION info;
401 if (!::VirtualQuery((void *)addr, &info, sizeof(info))) {
402 ReportError(
403 "interception_win: VirtualQuery in AllocateTrampolineRegion failed "
404 "for %p\n",
405 (void *)addr);
406 return nullptr;
407 }
408
409 // Check whether a region can be allocated at |addr|.
410 if (info.State == MEM_FREE && info.RegionSize >= granularity) {
411 void *page =
412 ::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT,
413 PAGE_EXECUTE_READWRITE);
414 if (page == nullptr)
415 ReportError(
416 "interception_win: VirtualAlloc in AllocateTrampolineRegion failed "
417 "for %p\n",
418 (void *)addr);
419 return page;
420 }
421
422 if (addr == lo_addr)
423 lo_addr =
424 RoundDownTo((uptr)info.AllocationBase - granularity, granularity);
425 if (addr == hi_addr)
426 hi_addr =
427 RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity);
428 }
429
430 ReportError(
431 "interception_win: AllocateTrampolineRegion failed to find free memory; "
432 "min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n",
433 (void *)min_addr, (void *)max_addr, (void *)func_addr, granularity);
434 return nullptr;
435#else
436 return ::VirtualAlloc(nullptr,
437 granularity,
438 MEM_RESERVE | MEM_COMMIT,
439 PAGE_EXECUTE_READWRITE);
440#endif
441}
442
443// Used by unittests to release mapped memory space.
444void TestOnlyReleaseTrampolineRegions() {
445 for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
446 TrampolineMemoryRegion *current = &TrampolineRegions[bucket];
447 if (current->content == 0)
448 return;
449 ::VirtualFree((void*)current->content, 0, MEM_RELEASE);
450 current->content = 0;
451 }
452}
453
454static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
455# if SANITIZER_WINDOWS64
456 uptr min_addr = func_address - kTrampolineRangeLimit;
457 uptr max_addr = func_address + kTrampolineRangeLimit - size;
458
459 // Allocate memory within 2GB of the module (DLL or EXE file) so that any
460 // address within the module can be referenced with PC-relative operands.
461 // This allows us to not just jump to the trampoline with a PC-relative
462 // offset, but to relocate any instructions that we copy to the trampoline
463 // which have references to the original module. If we can't find the base
464 // address of the module (e.g. if func_address is in mmap'ed memory), just
465 // stay within 2GB of func_address.
466 HMODULE module;
467 if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
468 GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
469 (LPCWSTR)func_address, &module)) {
470 MODULEINFO module_info;
471 if (::GetModuleInformation(::GetCurrentProcess(), module,
472 &module_info, sizeof(module_info))) {
473 min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage -
474 kTrampolineRangeLimit;
475 max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size;
476 }
477 }
478
479 // Check for overflow.
480 if (min_addr > func_address)
481 min_addr = 0;
482 if (max_addr < func_address)
483 max_addr = ~(uptr)0;
484# else
485 uptr min_addr = 0;
486 uptr max_addr = ~min_addr;
487# endif
488
489 // Find a region within [min_addr,max_addr] with enough space to allocate
490 // |size| bytes.
491 TrampolineMemoryRegion *region = nullptr;
492 for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
493 TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
494 if (current->content == 0) {
495 // No valid region found, allocate a new region.
496 size_t bucket_size = GetMmapGranularity();
497 void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address,
498 bucket_size);
499 if (content == nullptr)
500 return 0U;
501
502 current->content = (uptr)content;
503 current->allocated_size = 0;
504 current->max_size = bucket_size;
505 region = current;
506 break;
507 } else if (current->max_size - current->allocated_size > size) {
508 uptr next_address = current->content + current->allocated_size;
509 if (next_address < min_addr || next_address > max_addr)
510 continue;
511 // The space can be allocated in the current region.
512 region = current;
513 break;
514 }
515 }
516
517 // Failed to find a region.
518 if (region == nullptr)
519 return 0U;
520
521 // Allocate the space in the current region.
522 uptr allocated_space = region->content + region->allocated_size;
523 region->allocated_size += size;
524 WritePadding(allocated_space, size);
525
526 return allocated_space;
527}
528
529// The following prologues cannot be patched because of the short jump
530// jumping to the patching region.
531
532// Short jump patterns below are only for x86_64.
533# if SANITIZER_WINDOWS_x64
534// ntdll!wcslen in Win11
535// 488bc1 mov rax,rcx
536// 0fb710 movzx edx,word ptr [rax]
537// 4883c002 add rax,2
538// 6685d2 test dx,dx
539// 75f4 jne -12
540static const u8 kPrologueWithShortJump1[] = {
541 0x48, 0x8b, 0xc1, 0x0f, 0xb7, 0x10, 0x48, 0x83,
542 0xc0, 0x02, 0x66, 0x85, 0xd2, 0x75, 0xf4,
543};
544
545// ntdll!strrchr in Win11
546// 4c8bc1 mov r8,rcx
547// 8a01 mov al,byte ptr [rcx]
548// 48ffc1 inc rcx
549// 84c0 test al,al
550// 75f7 jne -9
551static const u8 kPrologueWithShortJump2[] = {
552 0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1,
553 0x84, 0xc0, 0x75, 0xf7,
554};
555#endif
556
557// Returns 0 on error.
558static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
559 if (rel_offset) {
560 *rel_offset = 0;
561 }
562
563#if SANITIZER_ARM64
564 // An ARM64 instruction is 4 bytes long.
565 return 4;
566#endif
567
568# if SANITIZER_WINDOWS_x64
569 if (memcmp((u8*)address, kPrologueWithShortJump1,
570 sizeof(kPrologueWithShortJump1)) == 0 ||
571 memcmp((u8*)address, kPrologueWithShortJump2,
572 sizeof(kPrologueWithShortJump2)) == 0) {
573 return 0;
574 }
575#endif
576
577 switch (*(u64*)address) {
578 case 0x90909090909006EB: // stub: jmp over 6 x nop.
579 return 8;
580 }
581
582 switch (*(u8*)address) {
583 case 0x90: // 90 : nop
584 case 0xC3: // C3 : ret (for small/empty function interception
585 case 0xCC: // CC : int 3 i.e. registering weak functions)
586 return 1;
587
588 case 0x50: // push eax / rax
589 case 0x51: // push ecx / rcx
590 case 0x52: // push edx / rdx
591 case 0x53: // push ebx / rbx
592 case 0x54: // push esp / rsp
593 case 0x55: // push ebp / rbp
594 case 0x56: // push esi / rsi
595 case 0x57: // push edi / rdi
596 case 0x5D: // pop ebp / rbp
597 return 1;
598
599 case 0x6A: // 6A XX = push XX
600 return 2;
601
602 // This instruction can be encoded with a 16-bit immediate but that is
603 // incredibly unlikely.
604 case 0x68: // 68 XX XX XX XX : push imm32
605 return 5;
606
607 case 0xb8: // b8 XX XX XX XX : mov eax, XX XX XX XX
608 case 0xB9: // b9 XX XX XX XX : mov ecx, XX XX XX XX
609 case 0xBA: // ba XX XX XX XX : mov edx, XX XX XX XX
610 return 5;
611
612 // Cannot overwrite control-instruction. Return 0 to indicate failure.
613 case 0xE9: // E9 XX XX XX XX : jmp <label>
614 case 0xE8: // E8 XX XX XX XX : call <func>
615 case 0xEB: // EB XX : jmp XX (short jump)
616 case 0x70: // 7Y YY : jy XX (short conditional jump)
617 case 0x71:
618 case 0x72:
619 case 0x73:
620 case 0x74:
621 case 0x75:
622 case 0x76:
623 case 0x77:
624 case 0x78:
625 case 0x79:
626 case 0x7A:
627 case 0x7B:
628 case 0x7C:
629 case 0x7D:
630 case 0x7E:
631 case 0x7F:
632 return 0;
633 }
634
635 switch (*(u16*)(address)) {
636 case 0x018A: // 8A 01 : mov al, byte ptr [ecx]
637 case 0xFF8B: // 8B FF : mov edi, edi
638 case 0xEC8B: // 8B EC : mov ebp, esp
639 case 0xc889: // 89 C8 : mov eax, ecx
640 case 0xD189: // 89 D1 : mov ecx, edx
641 case 0xE589: // 89 E5 : mov ebp, esp
642 case 0xC18B: // 8B C1 : mov eax, ecx
643 case 0xC031: // 31 C0 : xor eax, eax
644 case 0xC931: // 31 C9 : xor ecx, ecx
645 case 0xD231: // 31 D2 : xor edx, edx
646 case 0xC033: // 33 C0 : xor eax, eax
647 case 0xC933: // 33 C9 : xor ecx, ecx
648 case 0xD233: // 33 D2 : xor edx, edx
649 case 0x9066: // 66 90 : xchg %ax,%ax (Two-byte NOP)
650 case 0xDB84: // 84 DB : test bl,bl
651 case 0xC084: // 84 C0 : test al,al
652 case 0xC984: // 84 C9 : test cl,cl
653 case 0xD284: // 84 D2 : test dl,dl
654 return 2;
655
656 case 0x3980: // 80 39 XX : cmp BYTE PTR [rcx], XX
657 case 0x4D8B: // 8B 4D XX : mov XX(%ebp), ecx
658 case 0x558B: // 8B 55 XX : mov XX(%ebp), edx
659 case 0x758B: // 8B 75 XX : mov XX(%ebp), esp
660 case 0xE483: // 83 E4 XX : and esp, XX
661 case 0xEC83: // 83 EC XX : sub esp, XX
662 case 0xC1F6: // F6 C1 XX : test cl, XX
663 return 3;
664
665 case 0x89FF: // FF 89 XX XX XX XX : dec dword ptr [ecx + XX XX XX XX]
666 case 0xEC81: // 81 EC XX XX XX XX : sub esp, XX XX XX XX
667 return 6;
668
669 // Cannot overwrite control-instruction. Return 0 to indicate failure.
670 case 0x25FF: // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
671 return 0;
672 }
673
674 switch (0x00FFFFFF & *(u32 *)address) {
675 case 0x244C8D: // 8D 4C 24 XX : lea ecx, [esp + XX]
676 case 0x2474FF: // FF 74 24 XX : push qword ptr [rsp + XX]
677 return 4;
678 case 0x24A48D: // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
679 return 7;
680 }
681
682 switch (0x000000FF & *(u32 *)address) {
683 case 0xc2: // C2 XX XX : ret XX (needed for registering weak functions)
684 return 3;
685 }
686
687# if SANITIZER_WINDOWS_x64
688 switch (*(u8*)address) {
689 case 0xA1: // A1 XX XX XX XX XX XX XX XX :
690 // movabs eax, dword ptr ds:[XXXXXXXX]
691 return 9;
692 case 0xF2:
693 switch (*(u32 *)(address + 1)) {
694 case 0x2444110f: // f2 0f 11 44 24 XX movsd QWORD PTR
695 // [rsp + XX], xmm0
696 case 0x244c110f: // f2 0f 11 4c 24 XX movsd QWORD PTR
697 // [rsp + XX], xmm1
698 case 0x2454110f: // f2 0f 11 54 24 XX movsd QWORD PTR
699 // [rsp + XX], xmm2
700 case 0x245c110f: // f2 0f 11 5c 24 XX movsd QWORD PTR
701 // [rsp + XX], xmm3
702 case 0x2464110f: // f2 0f 11 64 24 XX movsd QWORD PTR
703 // [rsp + XX], xmm4
704 return 6;
705 }
706 break;
707
708 case 0x83:
709 const u8 next_byte = *(u8*)(address + 1);
710 const u8 mod = next_byte >> 6;
711 const u8 rm = next_byte & 7;
712 if (mod == 1 && rm == 4)
713 return 5; // 83 ModR/M SIB Disp8 Imm8
714 // add|or|adc|sbb|and|sub|xor|cmp [r+disp8], imm8
715 }
716
717 switch (*(u16*)address) {
718 case 0x5040: // push rax
719 case 0x5140: // push rcx
720 case 0x5240: // push rdx
721 case 0x5340: // push rbx
722 case 0x5440: // push rsp
723 case 0x5540: // push rbp
724 case 0x5640: // push rsi
725 case 0x5740: // push rdi
726 case 0x5441: // push r12
727 case 0x5541: // push r13
728 case 0x5641: // push r14
729 case 0x5741: // push r15
730 case 0xc084: // test al, al
731 case 0x018a: // mov al, byte ptr [rcx]
732 return 2;
733
734 case 0x7E80: // 80 7E YY XX cmp BYTE PTR [rsi+YY], XX
735 case 0x7D80: // 80 7D YY XX cmp BYTE PTR [rbp+YY], XX
736 case 0x7A80: // 80 7A YY XX cmp BYTE PTR [rdx+YY], XX
737 case 0x7880: // 80 78 YY XX cmp BYTE PTR [rax+YY], XX
738 case 0x7B80: // 80 7B YY XX cmp BYTE PTR [rbx+YY], XX
739 case 0x7980: // 80 79 YY XX cmp BYTE ptr [rcx+YY], XX
740 return 4;
741
742 case 0x058A: // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
743 case 0x058B: // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
744 if (rel_offset)
745 *rel_offset = 2;
746 FALLTHROUGH;
747 case 0xB841: // 41 B8 XX XX XX XX : mov r8d, XX XX XX XX
748 return 6;
749
750 case 0x7E81: // 81 7E YY XX XX XX XX cmp DWORD PTR [rsi+YY], XX XX XX XX
751 case 0x7D81: // 81 7D YY XX XX XX XX cmp DWORD PTR [rbp+YY], XX XX XX XX
752 case 0x7A81: // 81 7A YY XX XX XX XX cmp DWORD PTR [rdx+YY], XX XX XX XX
753 case 0x7881: // 81 78 YY XX XX XX XX cmp DWORD PTR [rax+YY], XX XX XX XX
754 case 0x7B81: // 81 7B YY XX XX XX XX cmp DWORD PTR [rbx+YY], XX XX XX XX
755 case 0x7981: // 81 79 YY XX XX XX XX cmp dword ptr [rcx+YY], XX XX XX XX
756 return 7;
757
758 case 0xb848: // 48 b8 XX XX XX XX XX XX XX XX :
759 // movabsq XX XX XX XX XX XX XX XX, rax
760 case 0xba48: // 48 ba XX XX XX XX XX XX XX XX :
761 // movabsq XX XX XX XX XX XX XX XX, rdx
762 return 10;
763 }
764
765 switch (0x00FFFFFF & *(u32 *)address) {
766 case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax]
767 case 0xc00b4d: // 4d 0b c0 : or r8, r8
768 case 0xc03345: // 45 33 c0 : xor r8d, r8d
769 case 0xc08548: // 48 85 c0 : test rax, rax
770 case 0xc0854d: // 4d 85 c0 : test r8, r8
771 case 0xc08b41: // 41 8b c0 : mov eax, r8d
772 case 0xc0ff48: // 48 ff c0 : inc rax
773 case 0xc0ff49: // 49 ff c0 : inc r8
774 case 0xc18b41: // 41 8b c1 : mov eax, r9d
775 case 0xc18b48: // 48 8b c1 : mov rax, rcx
776 case 0xc18b4c: // 4c 8b c1 : mov r8, rcx
777 case 0xc1ff48: // 48 ff c1 : inc rcx
778 case 0xc1ff49: // 49 ff c1 : inc r9
779 case 0xc28b41: // 41 8b c2 : mov eax, r10d
780 case 0x01b60f: // 0f b6 01 : movzx eax, BYTE PTR [rcx]
781 case 0x09b60f: // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
782 case 0x11b60f: // 0f b6 11 : movzx edx, BYTE PTR [rcx]
783 case 0xc2b60f: // 0f b6 c2 : movzx eax, dl
784 case 0xc2ff48: // 48 ff c2 : inc rdx
785 case 0xc2ff49: // 49 ff c2 : inc r10
786 case 0xc38b41: // 41 8b c3 : mov eax, r11d
787 case 0xc3ff48: // 48 ff c3 : inc rbx
788 case 0xc3ff49: // 49 ff c3 : inc r11
789 case 0xc48b41: // 41 8b c4 : mov eax, r12d
790 case 0xc48b48: // 48 8b c4 : mov rax, rsp
791 case 0xc4ff49: // 49 ff c4 : inc r12
792 case 0xc5ff49: // 49 ff c5 : inc r13
793 case 0xc6ff48: // 48 ff c6 : inc rsi
794 case 0xc6ff49: // 49 ff c6 : inc r14
795 case 0xc7ff48: // 48 ff c7 : inc rdi
796 case 0xc7ff49: // 49 ff c7 : inc r15
797 case 0xc93345: // 45 33 c9 : xor r9d, r9d
798 case 0xc98548: // 48 85 c9 : test rcx, rcx
799 case 0xc9854d: // 4d 85 c9 : test r9, r9
800 case 0xc98b4c: // 4c 8b c9 : mov r9, rcx
801 case 0xd12948: // 48 29 d1 : sub rcx, rdx
802 case 0xca2b48: // 48 2b ca : sub rcx, rdx
803 case 0xca3b48: // 48 3b ca : cmp rcx, rdx
804 case 0xd12b48: // 48 2b d1 : sub rdx, rcx
805 case 0xd18b48: // 48 8b d1 : mov rdx, rcx
806 case 0xd18b4c: // 4c 8b d1 : mov r10, rcx
807 case 0xd28548: // 48 85 d2 : test rdx, rdx
808 case 0xd2854d: // 4d 85 d2 : test r10, r10
809 case 0xd28b4c: // 4c 8b d2 : mov r10, rdx
810 case 0xd2b60f: // 0f b6 d2 : movzx edx, dl
811 case 0xd2be0f: // 0f be d2 : movsx edx, dl
812 case 0xd98b4c: // 4c 8b d9 : mov r11, rcx
813 case 0xd9f748: // 48 f7 d9 : neg rcx
814 case 0xc03145: // 45 31 c0 : xor r8d,r8d
815 case 0xc93145: // 45 31 c9 : xor r9d,r9d
816 case 0xdb3345: // 45 33 db : xor r11d, r11d
817 case 0xc08445: // 45 84 c0 : test r8b,r8b
818 case 0xd28445: // 45 84 d2 : test r10b,r10b
819 case 0xdb8548: // 48 85 db : test rbx, rbx
820 case 0xdb854d: // 4d 85 db : test r11, r11
821 case 0xdc8b4c: // 4c 8b dc : mov r11, rsp
822 case 0xe48548: // 48 85 e4 : test rsp, rsp
823 case 0xe4854d: // 4d 85 e4 : test r12, r12
824 case 0xc88948: // 48 89 c8 : mov rax,rcx
825 case 0xcb8948: // 48 89 cb : mov rbx,rcx
826 case 0xd08948: // 48 89 d0 : mov rax,rdx
827 case 0xd18948: // 48 89 d1 : mov rcx,rdx
828 case 0xd38948: // 48 89 d3 : mov rbx,rdx
829 case 0xe58948: // 48 89 e5 : mov rbp, rsp
830 case 0xed8548: // 48 85 ed : test rbp, rbp
831 case 0xc88949: // 49 89 c8 : mov r8, rcx
832 case 0xc98949: // 49 89 c9 : mov r9, rcx
833 case 0xca8949: // 49 89 ca : mov r10,rcx
834 case 0xd08949: // 49 89 d0 : mov r8, rdx
835 case 0xd18949: // 49 89 d1 : mov r9, rdx
836 case 0xd28949: // 49 89 d2 : mov r10, rdx
837 case 0xd38949: // 49 89 d3 : mov r11, rdx
838 case 0xed854d: // 4d 85 ed : test r13, r13
839 case 0xf6854d: // 4d 85 f6 : test r14, r14
840 case 0xff854d: // 4d 85 ff : test r15, r15
841 return 3;
842
843 case 0x245489: // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
844 case 0x428d44: // 44 8d 42 XX : lea r8d , [rdx + XX]
845 case 0x588948: // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
846 case 0xec8348: // 48 83 ec XX : sub rsp, XX
847 case 0xf88349: // 49 83 f8 XX : cmp r8, XX
848 case 0x488d49: // 49 8d 48 XX : lea rcx, [...]
849 case 0x048d4c: // 4c 8d 04 XX : lea r8, [...]
850 case 0x148d4e: // 4e 8d 14 XX : lea r10, [...]
851 case 0x398366: // 66 83 39 XX : cmp WORD PTR [rcx], XX
852 return 4;
853
854 case 0x441F0F: // 0F 1F 44 XX XX : nop DWORD PTR [...]
855 case 0x246483: // 83 64 24 XX YY : and DWORD PTR [rsp+XX], YY
856 return 5;
857
858 case 0x788166: // 66 81 78 XX YY YY cmp WORD PTR [rax+XX], YY YY
859 case 0x798166: // 66 81 79 XX YY YY cmp WORD PTR [rcx+XX], YY YY
860 case 0x7a8166: // 66 81 7a XX YY YY cmp WORD PTR [rdx+XX], YY YY
861 case 0x7b8166: // 66 81 7b XX YY YY cmp WORD PTR [rbx+XX], YY YY
862 case 0x7e8166: // 66 81 7e XX YY YY cmp WORD PTR [rsi+XX], YY YY
863 case 0x7f8166: // 66 81 7f XX YY YY cmp WORD PTR [rdi+XX], YY YY
864 return 6;
865
866 case 0xec8148: // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
867 case 0xc0c748: // 48 C7 C0 XX XX XX XX : mov rax, XX XX XX XX
868 return 7;
869
870 // clang-format off
871 case 0x788141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
872 case 0x798141: // 41 81 79 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
873 case 0x7a8141: // 41 81 7a XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
874 case 0x7b8141: // 41 81 7b XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
875 case 0x7d8141: // 41 81 7d XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
876 case 0x7e8141: // 41 81 7e XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
877 case 0x7f8141: // 41 81 7f YY XX XX XX XX : cmp DWORD PTR [r15+YY], XX XX XX XX
878 case 0x247c81: // 81 7c 24 YY XX XX XX XX : cmp DWORD PTR [rsp+YY], XX XX XX XX
879 return 8;
880 // clang-format on
881
882 case 0x058b48: // 48 8b 05 XX XX XX XX :
883 // mov rax, QWORD PTR [rip + XXXXXXXX]
884 case 0x058d48: // 48 8d 05 XX XX XX XX :
885 // lea rax, QWORD PTR [rip + XXXXXXXX]
886 case 0x0d8948: // 48 89 0d XX XX XX XX :
887 // mov QWORD PTR [rip + XXXXXXXX], rcx
888 case 0x158948: // 48 89 15 XX XX XX XX :
889 // mov QWORD PTR [rip + XXXXXXXX], rdx
890 case 0x25ff48: // 48 ff 25 XX XX XX XX :
891 // rex.W jmp QWORD PTR [rip + XXXXXXXX]
892 case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
893 // Instructions having offset relative to 'rip' need offset adjustment.
894 if (rel_offset)
895 *rel_offset = 3;
896 return 7;
897
898 case 0x2444c7: // C7 44 24 XX YY YY YY YY
899 // mov dword ptr [rsp + XX], YYYYYYYY
900 return 8;
901
902 case 0x7c8141: // 41 81 7c ZZ YY XX XX XX XX
903 // cmp DWORD PTR [reg+reg*n+YY], XX XX XX XX
904 return 9;
905 }
906
907 switch (*(u32*)(address)) {
908 case 0x01b60f44: // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
909 case 0x09b60f44: // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
910 case 0x0ab60f44: // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
911 case 0x11b60f44: // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
912 case 0x1ab60f44: // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
913 return 4;
914 case 0x24448b48: // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
915 case 0x246c8948: // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
916 case 0x245c8948: // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
917 case 0x24748948: // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
918 case 0x247c8948: // 48 89 7c 24 XX : mov QWORD PTR [rsp + XX], rdi
919 case 0x244C8948: // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx
920 case 0x24548948: // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
921 case 0x244c894c: // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
922 case 0x2444894c: // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
923 case 0x244c8944: // 44 89 4c 24 XX mov DWORD PTR [rsp + XX], r9d
924 case 0x24448944: // 44 89 44 24 XX mov DWORD PTR [rsp + XX], r8d
925 case 0x246c8d48: // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
926 return 5;
927 case 0x24648348: // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
928 return 6;
929 case 0x24A48D48: // 48 8D A4 24 XX XX XX XX : lea rsp, [rsp + XX XX XX XX]
930 return 8;
931 }
932
933 switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
934 case 0xC07E0F4866: // 66 48 0F 7E C0 : movq rax, xmm0
935 return 5;
936 }
937
938#else
939
940 switch (*(u8*)address) {
941 case 0xA1: // A1 XX XX XX XX : mov eax, dword ptr ds:[XXXXXXXX]
942 return 5;
943 }
944 switch (*(u16*)address) {
945 case 0x458B: // 8B 45 XX : mov eax, dword ptr [ebp + XX]
946 case 0x5D8B: // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
947 case 0x7D8B: // 8B 7D XX : mov edi, dword ptr [ebp + XX]
948 case 0x758B: // 8B 75 XX : mov esi, dword ptr [ebp + XX]
949 case 0x75FF: // FF 75 XX : push dword ptr [ebp + XX]
950 return 3;
951 case 0xC1F7: // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
952 return 6;
953 case 0x3D83: // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
954 return 7;
955 case 0x7D83: // 83 7D XX YY : cmp dword ptr [ebp + XX], YY
956 return 4;
957 }
958
959 switch (0x00FFFFFF & *(u32*)address) {
960 case 0x24448A: // 8A 44 24 XX : mov eal, dword ptr [esp + XX]
961 case 0x24448B: // 8B 44 24 XX : mov eax, dword ptr [esp + XX]
962 case 0x244C8B: // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX]
963 case 0x24548B: // 8B 54 24 XX : mov edx, dword ptr [esp + XX]
964 case 0x245C8B: // 8B 5C 24 XX : mov ebx, dword ptr [esp + XX]
965 case 0x246C8B: // 8B 6C 24 XX : mov ebp, dword ptr [esp + XX]
966 case 0x24748B: // 8B 74 24 XX : mov esi, dword ptr [esp + XX]
967 case 0x247C8B: // 8B 7C 24 XX : mov edi, dword ptr [esp + XX]
968 return 4;
969 }
970
971 switch (*(u32*)address) {
972 case 0x2444B60F: // 0F B6 44 24 XX : movzx eax, byte ptr [esp + XX]
973 return 5;
974 }
975#endif
976
977 // Unknown instruction! This might happen when we add a new interceptor, use
978 // a new compiler version, or if Windows changed how some functions are
979 // compiled. In either case, we print the address and 8 bytes of instructions
980 // to notify the user about the error and to help identify the unknown
981 // instruction. Don't treat this as a fatal error, though we can break the
982 // debugger if one has been attached.
983 u8 *bytes = (u8 *)address;
984 ReportError(
985 "interception_win: unhandled instruction at %p: %02x %02x %02x %02x %02x "
986 "%02x %02x %02x\n",
987 (void *)address, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4],
988 bytes[5], bytes[6], bytes[7]);
989 if (::IsDebuggerPresent())
990 __debugbreak();
991 return 0;
992}
993
994size_t TestOnlyGetInstructionSize(uptr address, size_t *rel_offset) {
995 return GetInstructionSize(address, rel_offset);
996}
997
998// Returns 0 on error.
999static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
1000 size_t cursor = 0;
1001 while (cursor < size) {
1002 size_t instruction_size = GetInstructionSize(address + cursor);
1003 if (!instruction_size)
1004 return 0;
1005 cursor += instruction_size;
1006 }
1007 return cursor;
1008}
1009
1010static bool CopyInstructions(uptr to, uptr from, size_t size) {
1011 size_t cursor = 0;
1012 while (cursor != size) {
1013 size_t rel_offset = 0;
1014 size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
1015 if (!instruction_size)
1016 return false;
1017 _memcpy((void *)(to + cursor), (void *)(from + cursor),
1018 (size_t)instruction_size);
1019 if (rel_offset) {
1020# if SANITIZER_WINDOWS64
1021 // we want to make sure that the new relative offset still fits in 32-bits
1022 // this will be untrue if relocated_offset \notin [-2**31, 2**31)
1023 s64 delta = to - from;
1024 s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1025 if (-0x8000'0000ll > relocated_offset ||
1026 relocated_offset > 0x7FFF'FFFFll) {
1027 ReportError(
1028 "interception_win: CopyInstructions relocated_offset %lld outside "
1029 "32-bit range\n",
1030 (long long)relocated_offset);
1031 return false;
1032 }
1033# else
1034 // on 32-bit, the relative offset will always be correct
1035 s32 delta = to - from;
1036 s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1037# endif
1038 *(s32 *)(to + cursor + rel_offset) = relocated_offset;
1039 }
1040 cursor += instruction_size;
1041 }
1042 return true;
1043}
1044
1045
1046#if !SANITIZER_WINDOWS64
1047bool OverrideFunctionWithDetour(
1048 uptr old_func, uptr new_func, uptr *orig_old_func) {
1049 const int kDetourHeaderLen = 5;
1050 const u16 kDetourInstruction = 0xFF8B;
1051
1052 uptr header = (uptr)old_func - kDetourHeaderLen;
1053 uptr patch_length = kDetourHeaderLen + kShortJumpInstructionLength;
1054
1055 // Validate that the function is hookable.
1056 if (*(u16*)old_func != kDetourInstruction ||
1057 !IsMemoryPadding(header, kDetourHeaderLen))
1058 return false;
1059
1060 // Change memory protection to writable.
1061 DWORD protection = 0;
1062 if (!ChangeMemoryProtection(header, patch_length, &protection))
1063 return false;
1064
1065 // Write a relative jump to the redirected function.
1066 WriteJumpInstruction(header, new_func);
1067
1068 // Write the short jump to the function prefix.
1069 WriteShortJumpInstruction(old_func, header);
1070
1071 // Restore previous memory protection.
1072 if (!RestoreMemoryProtection(header, patch_length, protection))
1073 return false;
1074
1075 if (orig_old_func)
1076 *orig_old_func = old_func + kShortJumpInstructionLength;
1077
1078 return true;
1079}
1080#endif
1081
1082bool OverrideFunctionWithRedirectJump(
1083 uptr old_func, uptr new_func, uptr *orig_old_func) {
1084 // Check whether the first instruction is a relative jump.
1085 if (*(u8*)old_func != 0xE9)
1086 return false;
1087
1088 if (orig_old_func) {
1089 sptr relative_offset = *(s32 *)(old_func + 1);
1090 uptr absolute_target = old_func + relative_offset + kJumpInstructionLength;
1091 *orig_old_func = absolute_target;
1092 }
1093
1094#if SANITIZER_WINDOWS64
1095 // If needed, get memory space for a trampoline jump.
1096 uptr trampoline = AllocateMemoryForTrampoline(old_func, kDirectBranchLength);
1097 if (!trampoline)
1098 return false;
1099 WriteDirectBranch(trampoline, new_func);
1100#endif
1101
1102 // Change memory protection to writable.
1103 DWORD protection = 0;
1104 if (!ChangeMemoryProtection(old_func, kJumpInstructionLength, &protection))
1105 return false;
1106
1107 // Write a relative jump to the redirected function.
1108 WriteJumpInstruction(old_func, FIRST_32_SECOND_64(new_func, trampoline));
1109
1110 // Restore previous memory protection.
1111 if (!RestoreMemoryProtection(old_func, kJumpInstructionLength, protection))
1112 return false;
1113
1114 return true;
1115}
1116
1117bool OverrideFunctionWithHotPatch(
1118 uptr old_func, uptr new_func, uptr *orig_old_func) {
1119 const int kHotPatchHeaderLen = kBranchLength;
1120
1121 uptr header = (uptr)old_func - kHotPatchHeaderLen;
1122 uptr patch_length = kHotPatchHeaderLen + kShortJumpInstructionLength;
1123
1124 // Validate that the function is hot patchable.
1125 size_t instruction_size = GetInstructionSize(old_func);
1126 if (instruction_size < kShortJumpInstructionLength ||
1127 !FunctionHasPadding(old_func, kHotPatchHeaderLen))
1128 return false;
1129
1130 if (orig_old_func) {
1131 // Put the needed instructions into the trampoline bytes.
1132 uptr trampoline_length = instruction_size + kDirectBranchLength;
1133 uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1134 if (!trampoline)
1135 return false;
1136 if (!CopyInstructions(trampoline, old_func, instruction_size))
1137 return false;
1138 WriteDirectBranch(trampoline + instruction_size,
1139 old_func + instruction_size);
1140 *orig_old_func = trampoline;
1141 }
1142
1143 // If needed, get memory space for indirect address.
1144 uptr indirect_address = 0;
1145#if SANITIZER_WINDOWS64
1146 indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1147 if (!indirect_address)
1148 return false;
1149#endif
1150
1151 // Change memory protection to writable.
1152 DWORD protection = 0;
1153 if (!ChangeMemoryProtection(header, patch_length, &protection))
1154 return false;
1155
1156 // Write jumps to the redirected function.
1157 WriteBranch(header, indirect_address, new_func);
1158 WriteShortJumpInstruction(old_func, header);
1159
1160 // Restore previous memory protection.
1161 if (!RestoreMemoryProtection(header, patch_length, protection))
1162 return false;
1163
1164 return true;
1165}
1166
1167bool OverrideFunctionWithTrampoline(
1168 uptr old_func, uptr new_func, uptr *orig_old_func) {
1169
1170 size_t instructions_length = kBranchLength;
1171 size_t padding_length = 0;
1172 uptr indirect_address = 0;
1173
1174 if (orig_old_func) {
1175 // Find out the number of bytes of the instructions we need to copy
1176 // to the trampoline.
1177 instructions_length = RoundUpToInstrBoundary(kBranchLength, old_func);
1178 if (!instructions_length)
1179 return false;
1180
1181 // Put the needed instructions into the trampoline bytes.
1182 uptr trampoline_length = instructions_length + kDirectBranchLength;
1183 uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1184 if (!trampoline)
1185 return false;
1186 if (!CopyInstructions(trampoline, old_func, instructions_length))
1187 return false;
1188 WriteDirectBranch(trampoline + instructions_length,
1189 old_func + instructions_length);
1190 *orig_old_func = trampoline;
1191 }
1192
1193#if SANITIZER_WINDOWS64
1194 // Check if the targeted address can be encoded in the function padding.
1195 // Otherwise, allocate it in the trampoline region.
1196 if (IsMemoryPadding(old_func - kAddressLength, kAddressLength)) {
1197 indirect_address = old_func - kAddressLength;
1198 padding_length = kAddressLength;
1199 } else {
1200 indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1201 if (!indirect_address)
1202 return false;
1203 }
1204#endif
1205
1206 // Change memory protection to writable.
1207 uptr patch_address = old_func - padding_length;
1208 uptr patch_length = instructions_length + padding_length;
1209 DWORD protection = 0;
1210 if (!ChangeMemoryProtection(patch_address, patch_length, &protection))
1211 return false;
1212
1213 // Patch the original function.
1214 WriteBranch(old_func, indirect_address, new_func);
1215
1216 // Restore previous memory protection.
1217 if (!RestoreMemoryProtection(patch_address, patch_length, protection))
1218 return false;
1219
1220 return true;
1221}
1222
1223bool OverrideFunction(
1224 uptr old_func, uptr new_func, uptr *orig_old_func) {
1225#if !SANITIZER_WINDOWS64
1226 if (OverrideFunctionWithDetour(old_func, new_func, orig_old_func))
1227 return true;
1228#endif
1229 if (OverrideFunctionWithRedirectJump(old_func, new_func, orig_old_func))
1230 return true;
1231 if (OverrideFunctionWithHotPatch(old_func, new_func, orig_old_func))
1232 return true;
1233 if (OverrideFunctionWithTrampoline(old_func, new_func, orig_old_func))
1234 return true;
1235 return false;
1236}
1237
1238static void **InterestingDLLsAvailable() {
1239 static const char *InterestingDLLs[] = {
1240 "kernel32.dll",
1241 "msvcr100d.dll", // VS2010
1242 "msvcr110d.dll", // VS2012
1243 "msvcr120d.dll", // VS2013
1244 "vcruntime140d.dll", // VS2015
1245 "ucrtbased.dll", // Universal CRT
1246 "msvcr100.dll", // VS2010
1247 "msvcr110.dll", // VS2012
1248 "msvcr120.dll", // VS2013
1249 "vcruntime140.dll", // VS2015
1250 "ucrtbase.dll", // Universal CRT
1251# if (defined(__MINGW32__) && defined(__i386__))
1252 "libc++.dll", // libc++
1253 "libunwind.dll", // libunwind
1254# endif
1255 // NTDLL must go last as it gets special treatment in OverrideFunction.
1256 "ntdll.dll",
1257 NULL
1258 };
1259 static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
1260 if (!result[0]) {
1261 for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
1262 if (HMODULE h = GetModuleHandleA(InterestingDLLs[i]))
1263 result[j++] = (void *)h;
1264 }
1265 }
1266 return &result[0];
1267}
1268
1269namespace {
1270// Utility for reading loaded PE images.
1271template <typename T> class RVAPtr {
1272 public:
1273 RVAPtr(void *module, uptr rva)
1274 : ptr_(reinterpret_cast<T *>(reinterpret_cast<char *>(module) + rva)) {}
1275 operator T *() { return ptr_; }
1276 T *operator->() { return ptr_; }
1277 T *operator++() { return ++ptr_; }
1278
1279 private:
1280 T *ptr_;
1281};
1282} // namespace
1283
1284// Internal implementation of GetProcAddress. At least since Windows 8,
1285// GetProcAddress appears to initialize DLLs before returning function pointers
1286// into them. This is problematic for the sanitizers, because they typically
1287// want to intercept malloc *before* MSVCRT initializes. Our internal
1288// implementation walks the export list manually without doing initialization.
1289uptr InternalGetProcAddress(void *module, const char *func_name) {
1290 // Check that the module header is full and present.
1291 RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1292 RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1293 if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE || // "MZ"
1294 headers->Signature != IMAGE_NT_SIGNATURE || // "PE\0\0"
1295 headers->FileHeader.SizeOfOptionalHeader <
1296 sizeof(IMAGE_OPTIONAL_HEADER)) {
1297 return 0;
1298 }
1299
1300 IMAGE_DATA_DIRECTORY *export_directory =
1301 &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
1302 if (export_directory->Size == 0)
1303 return 0;
1304 RVAPtr<IMAGE_EXPORT_DIRECTORY> exports(module,
1305 export_directory->VirtualAddress);
1306 RVAPtr<DWORD> functions(module, exports->AddressOfFunctions);
1307 RVAPtr<DWORD> names(module, exports->AddressOfNames);
1308 RVAPtr<WORD> ordinals(module, exports->AddressOfNameOrdinals);
1309
1310 for (DWORD i = 0; i < exports->NumberOfNames; i++) {
1311 RVAPtr<char> name(module, names[i]);
1312 if (!_strcmp(func_name, name)) {
1313 DWORD index = ordinals[i];
1314 RVAPtr<char> func(module, functions[index]);
1315
1316 // Handle forwarded functions.
1317 DWORD offset = functions[index];
1318 if (offset >= export_directory->VirtualAddress &&
1319 offset < export_directory->VirtualAddress + export_directory->Size) {
1320 // An entry for a forwarded function is a string with the following
1321 // format: "<module> . <function_name>" that is stored into the
1322 // exported directory.
1323 char function_name[256];
1324 size_t funtion_name_length = _strlen(func);
1325 if (funtion_name_length >= sizeof(function_name) - 1) {
1326 ReportError("interception_win: func too long: '%s'\n", (char *)func);
1327 InterceptionFailed();
1328 }
1329
1330 _memcpy(function_name, func, funtion_name_length);
1331 function_name[funtion_name_length] = '\0';
1332 char* separator = _strchr(function_name, '.');
1333 if (!separator) {
1334 ReportError("interception_win: no separator in '%s'\n",
1335 function_name);
1336 InterceptionFailed();
1337 }
1338 *separator = '\0';
1339
1340 void* redirected_module = GetModuleHandleA(function_name);
1341 if (!redirected_module) {
1342 ReportError("interception_win: GetModuleHandleA failed for '%s'\n",
1343 function_name);
1344 InterceptionFailed();
1345 }
1346 return InternalGetProcAddress(redirected_module, separator + 1);
1347 }
1348
1349 return (uptr)(char *)func;
1350 }
1351 }
1352
1353 return 0;
1354}
1355
1356bool OverrideFunction(
1357 const char *func_name, uptr new_func, uptr *orig_old_func) {
1358 static const char *kNtDllIgnore[] = {
1359 "memcmp", "memcpy", "memmove", "memset"
1360 };
1361
1362 bool hooked = false;
1363 void **DLLs = InterestingDLLsAvailable();
1364 for (size_t i = 0; DLLs[i]; ++i) {
1365 if (DLLs[i + 1] == nullptr) {
1366 // This is the last DLL, i.e. NTDLL. It exports some functions that
1367 // we only want to override in the CRT.
1368 for (const char *ignored : kNtDllIgnore) {
1369 if (_strcmp(func_name, ignored) == 0)
1370 return hooked;
1371 }
1372 }
1373
1374 uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
1375 if (func_addr &&
1376 OverrideFunction(func_addr, new_func, orig_old_func)) {
1377 hooked = true;
1378 }
1379 }
1380 return hooked;
1381}
1382
1383bool OverrideImportedFunction(const char *module_to_patch,
1384 const char *imported_module,
1385 const char *function_name, uptr new_function,
1386 uptr *orig_old_func) {
1387 HMODULE module = GetModuleHandleA(module_to_patch);
1388 if (!module)
1389 return false;
1390
1391 // Check that the module header is full and present.
1392 RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1393 RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1394 if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE || // "MZ"
1395 headers->Signature != IMAGE_NT_SIGNATURE || // "PE\0\0"
1396 headers->FileHeader.SizeOfOptionalHeader <
1397 sizeof(IMAGE_OPTIONAL_HEADER)) {
1398 return false;
1399 }
1400
1401 IMAGE_DATA_DIRECTORY *import_directory =
1402 &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
1403
1404 // Iterate the list of imported DLLs. FirstThunk will be null for the last
1405 // entry.
1406 RVAPtr<IMAGE_IMPORT_DESCRIPTOR> imports(module,
1407 import_directory->VirtualAddress);
1408 for (; imports->FirstThunk != 0; ++imports) {
1409 RVAPtr<const char> modname(module, imports->Name);
1410 if (_stricmp(&*modname, imported_module) == 0)
1411 break;
1412 }
1413 if (imports->FirstThunk == 0)
1414 return false;
1415
1416 // We have two parallel arrays: the import address table (IAT) and the table
1417 // of names. They start out containing the same data, but the loader rewrites
1418 // the IAT to hold imported addresses and leaves the name table in
1419 // OriginalFirstThunk alone.
1420 RVAPtr<IMAGE_THUNK_DATA> name_table(module, imports->OriginalFirstThunk);
1421 RVAPtr<IMAGE_THUNK_DATA> iat(module, imports->FirstThunk);
1422 for (; name_table->u1.Ordinal != 0; ++name_table, ++iat) {
1423 if (!IMAGE_SNAP_BY_ORDINAL(name_table->u1.Ordinal)) {
1424 RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
1425 module, name_table->u1.ForwarderString);
1426 const char *funcname = &import_by_name->Name[0];
1427 if (_strcmp(funcname, function_name) == 0)
1428 break;
1429 }
1430 }
1431 if (name_table->u1.Ordinal == 0)
1432 return false;
1433
1434 // Now we have the correct IAT entry. Do the swap. We have to make the page
1435 // read/write first.
1436 if (orig_old_func)
1437 *orig_old_func = iat->u1.AddressOfData;
1438 DWORD old_prot, unused_prot;
1439 if (!VirtualProtect(&iat->u1.AddressOfData, 4, PAGE_EXECUTE_READWRITE,
1440 &old_prot))
1441 return false;
1442 iat->u1.AddressOfData = new_function;
1443 if (!VirtualProtect(&iat->u1.AddressOfData, 4, old_prot, &unused_prot))
1444 return false; // Not clear if this failure bothers us.
1445 return true;
1446}
1447
1448} // namespace __interception
1449
1450#endif // SANITIZER_WINDOWS
1451