1 | //===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Support/CrashRecoveryContext.h" |
10 | #include "llvm/Config/llvm-config.h" |
11 | #include "llvm/Support/ErrorHandling.h" |
12 | #include "llvm/Support/ExitCodes.h" |
13 | #include "llvm/Support/Signals.h" |
14 | #include "llvm/Support/thread.h" |
15 | #include <cassert> |
16 | #include <mutex> |
17 | #include <setjmp.h> |
18 | |
19 | using namespace llvm; |
20 | |
21 | namespace { |
22 | |
23 | struct CrashRecoveryContextImpl; |
24 | static LLVM_THREAD_LOCAL const CrashRecoveryContextImpl *CurrentContext; |
25 | |
26 | struct CrashRecoveryContextImpl { |
27 | // When threads are disabled, this links up all active |
28 | // CrashRecoveryContextImpls. When threads are enabled there's one thread |
29 | // per CrashRecoveryContext and CurrentContext is a thread-local, so only one |
30 | // CrashRecoveryContextImpl is active per thread and this is always null. |
31 | const CrashRecoveryContextImpl *Next; |
32 | |
33 | CrashRecoveryContext *CRC; |
34 | ::jmp_buf JumpBuffer; |
35 | volatile unsigned Failed : 1; |
36 | unsigned SwitchedThread : 1; |
37 | unsigned ValidJumpBuffer : 1; |
38 | |
39 | public: |
40 | CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept |
41 | : CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) { |
42 | Next = CurrentContext; |
43 | CurrentContext = this; |
44 | } |
45 | ~CrashRecoveryContextImpl() { |
46 | if (!SwitchedThread) |
47 | CurrentContext = Next; |
48 | } |
49 | |
50 | /// Called when the separate crash-recovery thread was finished, to |
51 | /// indicate that we don't need to clear the thread-local CurrentContext. |
52 | void setSwitchedThread() { |
53 | #if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 |
54 | SwitchedThread = true; |
55 | #endif |
56 | } |
57 | |
58 | // If the function ran by the CrashRecoveryContext crashes or fails, then |
59 | // 'RetCode' represents the returned error code, as if it was returned by a |
60 | // process. 'Context' represents the signal type on Unix; on Windows, it is |
61 | // the ExceptionContext. |
62 | void HandleCrash(int RetCode, uintptr_t Context) { |
63 | // Eliminate the current context entry, to avoid re-entering in case the |
64 | // cleanup code crashes. |
65 | CurrentContext = Next; |
66 | |
67 | assert(!Failed && "Crash recovery context already failed!" ); |
68 | Failed = true; |
69 | |
70 | if (CRC->DumpStackAndCleanupOnFailure) |
71 | sys::CleanupOnSignal(Context); |
72 | |
73 | CRC->RetCode = RetCode; |
74 | |
75 | // Jump back to the RunSafely we were called under. |
76 | if (ValidJumpBuffer) |
77 | longjmp(env: JumpBuffer, val: 1); |
78 | |
79 | // Otherwise let the caller decide of the outcome of the crash. Currently |
80 | // this occurs when using SEH on Windows with MSVC or clang-cl. |
81 | } |
82 | }; |
83 | |
84 | std::mutex &getCrashRecoveryContextMutex() { |
85 | static std::mutex CrashRecoveryContextMutex; |
86 | return CrashRecoveryContextMutex; |
87 | } |
88 | |
89 | static bool gCrashRecoveryEnabled = false; |
90 | |
91 | static LLVM_THREAD_LOCAL const CrashRecoveryContext *IsRecoveringFromCrash; |
92 | |
93 | } // namespace |
94 | |
95 | static void installExceptionOrSignalHandlers(); |
96 | static void uninstallExceptionOrSignalHandlers(); |
97 | |
98 | CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() = default; |
99 | |
100 | CrashRecoveryContext::CrashRecoveryContext() { |
101 | // On Windows, if abort() was previously triggered (and caught by a previous |
102 | // CrashRecoveryContext) the Windows CRT removes our installed signal handler, |
103 | // so we need to install it again. |
104 | sys::DisableSystemDialogsOnCrash(); |
105 | } |
106 | |
107 | CrashRecoveryContext::~CrashRecoveryContext() { |
108 | // Reclaim registered resources. |
109 | CrashRecoveryContextCleanup *i = head; |
110 | const CrashRecoveryContext *PC = IsRecoveringFromCrash; |
111 | IsRecoveringFromCrash = this; |
112 | while (i) { |
113 | CrashRecoveryContextCleanup *tmp = i; |
114 | i = tmp->next; |
115 | tmp->cleanupFired = true; |
116 | tmp->recoverResources(); |
117 | delete tmp; |
118 | } |
119 | IsRecoveringFromCrash = PC; |
120 | |
121 | CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; |
122 | delete CRCI; |
123 | } |
124 | |
125 | bool CrashRecoveryContext::isRecoveringFromCrash() { |
126 | return IsRecoveringFromCrash != nullptr; |
127 | } |
128 | |
129 | CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { |
130 | if (!gCrashRecoveryEnabled) |
131 | return nullptr; |
132 | |
133 | const CrashRecoveryContextImpl *CRCI = CurrentContext; |
134 | if (!CRCI) |
135 | return nullptr; |
136 | |
137 | return CRCI->CRC; |
138 | } |
139 | |
140 | void CrashRecoveryContext::Enable() { |
141 | std::lock_guard<std::mutex> L(getCrashRecoveryContextMutex()); |
142 | // FIXME: Shouldn't this be a refcount or something? |
143 | if (gCrashRecoveryEnabled) |
144 | return; |
145 | gCrashRecoveryEnabled = true; |
146 | installExceptionOrSignalHandlers(); |
147 | } |
148 | |
149 | void CrashRecoveryContext::Disable() { |
150 | std::lock_guard<std::mutex> L(getCrashRecoveryContextMutex()); |
151 | if (!gCrashRecoveryEnabled) |
152 | return; |
153 | gCrashRecoveryEnabled = false; |
154 | uninstallExceptionOrSignalHandlers(); |
155 | } |
156 | |
157 | void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) |
158 | { |
159 | if (!cleanup) |
160 | return; |
161 | if (head) |
162 | head->prev = cleanup; |
163 | cleanup->next = head; |
164 | head = cleanup; |
165 | } |
166 | |
167 | void |
168 | CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { |
169 | if (!cleanup) |
170 | return; |
171 | if (cleanup == head) { |
172 | head = cleanup->next; |
173 | if (head) |
174 | head->prev = nullptr; |
175 | } |
176 | else { |
177 | cleanup->prev->next = cleanup->next; |
178 | if (cleanup->next) |
179 | cleanup->next->prev = cleanup->prev; |
180 | } |
181 | delete cleanup; |
182 | } |
183 | |
184 | #if defined(_MSC_VER) |
185 | |
186 | #include <windows.h> // for GetExceptionInformation |
187 | |
188 | // If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way |
189 | // better than VEH. Vectored exception handling catches all exceptions happening |
190 | // on the thread with installed exception handlers, so it can interfere with |
191 | // internal exception handling of other libraries on that thread. SEH works |
192 | // exactly as you would expect normal exception handling to work: it only |
193 | // catches exceptions if they would bubble out from the stack frame with __try / |
194 | // __except. |
195 | |
196 | static void installExceptionOrSignalHandlers() {} |
197 | static void uninstallExceptionOrSignalHandlers() {} |
198 | |
199 | // We need this function because the call to GetExceptionInformation() can only |
200 | // occur inside the __except evaluation block |
201 | static int ExceptionFilter(_EXCEPTION_POINTERS *Except) { |
202 | // Lookup the current thread local recovery object. |
203 | const CrashRecoveryContextImpl *CRCI = CurrentContext; |
204 | |
205 | if (!CRCI) { |
206 | // Something has gone horribly wrong, so let's just tell everyone |
207 | // to keep searching |
208 | CrashRecoveryContext::Disable(); |
209 | return EXCEPTION_CONTINUE_SEARCH; |
210 | } |
211 | |
212 | int RetCode = (int)Except->ExceptionRecord->ExceptionCode; |
213 | if ((RetCode & 0xF0000000) == 0xE0000000) |
214 | RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit |
215 | |
216 | // Handle the crash |
217 | const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash( |
218 | RetCode, reinterpret_cast<uintptr_t>(Except)); |
219 | |
220 | return EXCEPTION_EXECUTE_HANDLER; |
221 | } |
222 | |
223 | #if defined(__clang__) && defined(_M_IX86) |
224 | // Work around PR44697. |
225 | __attribute__((optnone)) |
226 | #endif |
227 | bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { |
228 | if (!gCrashRecoveryEnabled) { |
229 | Fn(); |
230 | return true; |
231 | } |
232 | assert(!Impl && "Crash recovery context already initialized!" ); |
233 | Impl = new CrashRecoveryContextImpl(this); |
234 | __try { |
235 | Fn(); |
236 | } __except (ExceptionFilter(GetExceptionInformation())) { |
237 | return false; |
238 | } |
239 | return true; |
240 | } |
241 | |
242 | #else // !_MSC_VER |
243 | |
244 | #if defined(_WIN32) |
245 | // This is a non-MSVC compiler, probably mingw gcc or clang without |
246 | // -fms-extensions. Use vectored exception handling (VEH). |
247 | // |
248 | // On Windows, we can make use of vectored exception handling to catch most |
249 | // crashing situations. Note that this does mean we will be alerted of |
250 | // exceptions *before* structured exception handling has the opportunity to |
251 | // catch it. Unfortunately, this causes problems in practice with other code |
252 | // running on threads with LLVM crash recovery contexts, so we would like to |
253 | // eventually move away from VEH. |
254 | // |
255 | // Vectored works on a per-thread basis, which is an advantage over |
256 | // SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have |
257 | // any native support for chaining exception handlers, but VEH allows more than |
258 | // one. |
259 | // |
260 | // The vectored exception handler functionality was added in Windows |
261 | // XP, so if support for older versions of Windows is required, |
262 | // it will have to be added. |
263 | |
264 | #include "llvm/Support/Windows/WindowsSupport.h" |
265 | |
266 | static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) |
267 | { |
268 | // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported |
269 | // compilers and platforms, so we define it manually. |
270 | constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL; |
271 | switch (ExceptionInfo->ExceptionRecord->ExceptionCode) |
272 | { |
273 | case DBG_PRINTEXCEPTION_C: |
274 | case DbgPrintExceptionWideC: |
275 | case 0x406D1388: // set debugger thread name |
276 | return EXCEPTION_CONTINUE_EXECUTION; |
277 | } |
278 | |
279 | // Lookup the current thread local recovery object. |
280 | const CrashRecoveryContextImpl *CRCI = CurrentContext; |
281 | |
282 | if (!CRCI) { |
283 | // Something has gone horribly wrong, so let's just tell everyone |
284 | // to keep searching |
285 | CrashRecoveryContext::Disable(); |
286 | return EXCEPTION_CONTINUE_SEARCH; |
287 | } |
288 | |
289 | // TODO: We can capture the stack backtrace here and store it on the |
290 | // implementation if we so choose. |
291 | |
292 | int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode; |
293 | if ((RetCode & 0xF0000000) == 0xE0000000) |
294 | RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit |
295 | |
296 | // Handle the crash |
297 | const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash( |
298 | RetCode, reinterpret_cast<uintptr_t>(ExceptionInfo)); |
299 | |
300 | // Note that we don't actually get here because HandleCrash calls |
301 | // longjmp, which means the HandleCrash function never returns. |
302 | llvm_unreachable("Handled the crash, should have longjmp'ed out of here" ); |
303 | } |
304 | |
305 | // Because the Enable and Disable calls are static, it means that |
306 | // there may not actually be an Impl available, or even a current |
307 | // CrashRecoveryContext at all. So we make use of a thread-local |
308 | // exception table. The handles contained in here will either be |
309 | // non-NULL, valid VEH handles, or NULL. |
310 | static LLVM_THREAD_LOCAL const void* sCurrentExceptionHandle; |
311 | |
312 | static void installExceptionOrSignalHandlers() { |
313 | // We can set up vectored exception handling now. We will install our |
314 | // handler as the front of the list, though there's no assurances that |
315 | // it will remain at the front (another call could install itself before |
316 | // our handler). This 1) isn't likely, and 2) shouldn't cause problems. |
317 | PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler); |
318 | sCurrentExceptionHandle = handle; |
319 | } |
320 | |
321 | static void uninstallExceptionOrSignalHandlers() { |
322 | PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle); |
323 | if (currentHandle) { |
324 | // Now we can remove the vectored exception handler from the chain |
325 | ::RemoveVectoredExceptionHandler(currentHandle); |
326 | |
327 | // Reset the handle in our thread-local set. |
328 | sCurrentExceptionHandle = NULL; |
329 | } |
330 | } |
331 | |
332 | #else // !_WIN32 |
333 | |
334 | // Generic POSIX implementation. |
335 | // |
336 | // This implementation relies on synchronous signals being delivered to the |
337 | // current thread. We use a thread local object to keep track of the active |
338 | // crash recovery context, and install signal handlers to invoke HandleCrash on |
339 | // the active object. |
340 | // |
341 | // This implementation does not attempt to chain signal handlers in any |
342 | // reliable fashion -- if we get a signal outside of a crash recovery context we |
343 | // simply disable crash recovery and raise the signal again. |
344 | |
345 | #include <signal.h> |
346 | |
347 | static const int Signals[] = |
348 | { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; |
349 | static const unsigned NumSignals = std::size(Signals); |
350 | static struct sigaction PrevActions[NumSignals]; |
351 | |
352 | static void CrashRecoverySignalHandler(int Signal) { |
353 | // Lookup the current thread local recovery object. |
354 | const CrashRecoveryContextImpl *CRCI = CurrentContext; |
355 | |
356 | if (!CRCI) { |
357 | // We didn't find a crash recovery context -- this means either we got a |
358 | // signal on a thread we didn't expect it on, the application got a signal |
359 | // outside of a crash recovery context, or something else went horribly |
360 | // wrong. |
361 | // |
362 | // Disable crash recovery and raise the signal again. The assumption here is |
363 | // that the enclosing application will terminate soon, and we won't want to |
364 | // attempt crash recovery again. |
365 | // |
366 | // This call of Disable isn't thread safe, but it doesn't actually matter. |
367 | CrashRecoveryContext::Disable(); |
368 | raise(sig: Signal); |
369 | |
370 | // The signal will be thrown once the signal mask is restored. |
371 | return; |
372 | } |
373 | |
374 | // Unblock the signal we received. |
375 | sigset_t SigMask; |
376 | sigemptyset(set: &SigMask); |
377 | sigaddset(set: &SigMask, signo: Signal); |
378 | sigprocmask(SIG_UNBLOCK, set: &SigMask, oset: nullptr); |
379 | |
380 | // Return the same error code as if the program crashed, as mentioned in the |
381 | // section "Exit Status for Commands": |
382 | // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html |
383 | int RetCode = 128 + Signal; |
384 | |
385 | // Don't consider a broken pipe as a crash (see clang/lib/Driver/Driver.cpp) |
386 | if (Signal == SIGPIPE) |
387 | RetCode = EX_IOERR; |
388 | |
389 | if (CRCI) |
390 | const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(RetCode, Context: Signal); |
391 | } |
392 | |
393 | static void installExceptionOrSignalHandlers() { |
394 | // Setup the signal handler. |
395 | struct sigaction Handler; |
396 | Handler.sa_handler = CrashRecoverySignalHandler; |
397 | Handler.sa_flags = 0; |
398 | sigemptyset(set: &Handler.sa_mask); |
399 | |
400 | for (unsigned i = 0; i != NumSignals; ++i) { |
401 | sigaction(sig: Signals[i], act: &Handler, oact: &PrevActions[i]); |
402 | } |
403 | } |
404 | |
405 | static void uninstallExceptionOrSignalHandlers() { |
406 | // Restore the previous signal handlers. |
407 | for (unsigned i = 0; i != NumSignals; ++i) |
408 | sigaction(sig: Signals[i], act: &PrevActions[i], oact: nullptr); |
409 | } |
410 | |
411 | #endif // !_WIN32 |
412 | |
413 | bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { |
414 | // If crash recovery is disabled, do nothing. |
415 | if (gCrashRecoveryEnabled) { |
416 | assert(!Impl && "Crash recovery context already initialized!" ); |
417 | CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this); |
418 | Impl = CRCI; |
419 | |
420 | CRCI->ValidJumpBuffer = true; |
421 | if (setjmp(CRCI->JumpBuffer) != 0) { |
422 | return false; |
423 | } |
424 | } |
425 | |
426 | Fn(); |
427 | return true; |
428 | } |
429 | |
430 | #endif // !_MSC_VER |
431 | |
432 | [[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) { |
433 | #if defined(_WIN32) |
434 | // Since the exception code is actually of NTSTATUS type, we use the |
435 | // Microsoft-recommended 0xE prefix, to signify that this is a user error. |
436 | // This value is a combination of the customer field (bit 29) and severity |
437 | // field (bits 30-31) in the NTSTATUS specification. |
438 | ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL); |
439 | #else |
440 | // On Unix we don't need to raise an exception, we go directly to |
441 | // HandleCrash(), then longjmp will unwind the stack for us. |
442 | CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl; |
443 | assert(CRCI && "Crash recovery context never initialized!" ); |
444 | CRCI->HandleCrash(RetCode, Context: 0 /*no sig num*/); |
445 | #endif |
446 | llvm_unreachable("Most likely setjmp wasn't called!" ); |
447 | } |
448 | |
449 | bool CrashRecoveryContext::isCrash(int RetCode) { |
450 | #if defined(_WIN32) |
451 | // On Windows, the code is interpreted as NTSTATUS. The two high bits |
452 | // represent the severity. Values starting with 0x80000000 are reserved for |
453 | // "warnings"; values of 0xC0000000 and up are for "errors". In practice, both |
454 | // are interpreted as a non-continuable signal. |
455 | unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28; |
456 | if (Code != 0xC && Code != 8) |
457 | return false; |
458 | #else |
459 | // On Unix, signals are represented by return codes of 128 or higher. |
460 | // Exit code 128 is a reserved value and should not be raised as a signal. |
461 | if (RetCode <= 128) |
462 | return false; |
463 | #endif |
464 | return true; |
465 | } |
466 | |
467 | bool CrashRecoveryContext::throwIfCrash(int RetCode) { |
468 | if (!isCrash(RetCode)) |
469 | return false; |
470 | #if defined(_WIN32) |
471 | ::RaiseException(RetCode, 0, 0, NULL); |
472 | #else |
473 | llvm::sys::unregisterHandlers(); |
474 | raise(sig: RetCode - 128); |
475 | #endif |
476 | return true; |
477 | } |
478 | |
479 | // FIXME: Portability. |
480 | static void setThreadBackgroundPriority() { |
481 | #ifdef __APPLE__ |
482 | setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG); |
483 | #endif |
484 | } |
485 | |
486 | static bool hasThreadBackgroundPriority() { |
487 | #ifdef __APPLE__ |
488 | return getpriority(PRIO_DARWIN_THREAD, 0) == 1; |
489 | #else |
490 | return false; |
491 | #endif |
492 | } |
493 | |
494 | namespace { |
495 | struct RunSafelyOnThreadInfo { |
496 | function_ref<void()> Fn; |
497 | CrashRecoveryContext *CRC; |
498 | bool UseBackgroundPriority; |
499 | bool Result; |
500 | }; |
501 | } // namespace |
502 | |
503 | static void RunSafelyOnThread_Dispatch(void *UserData) { |
504 | RunSafelyOnThreadInfo *Info = |
505 | reinterpret_cast<RunSafelyOnThreadInfo*>(UserData); |
506 | |
507 | if (Info->UseBackgroundPriority) |
508 | setThreadBackgroundPriority(); |
509 | |
510 | Info->Result = Info->CRC->RunSafely(Fn: Info->Fn); |
511 | } |
512 | bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn, |
513 | unsigned RequestedStackSize) { |
514 | bool UseBackgroundPriority = hasThreadBackgroundPriority(); |
515 | RunSafelyOnThreadInfo Info = { .Fn: Fn, .CRC: this, .UseBackgroundPriority: UseBackgroundPriority, .Result: false }; |
516 | llvm::thread Thread(RequestedStackSize == 0 |
517 | ? std::nullopt |
518 | : std::optional<unsigned>(RequestedStackSize), |
519 | RunSafelyOnThread_Dispatch, &Info); |
520 | Thread.join(); |
521 | |
522 | if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl) |
523 | CRC->setSwitchedThread(); |
524 | return Info.Result; |
525 | } |
526 | |