1 | //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file lowers exception-related instructions and setjmp/longjmp function |
11 | /// calls to use Emscripten's library functions. The pass uses JavaScript's try |
12 | /// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in |
13 | /// case of Emscripten SjLJ. |
14 | /// |
15 | /// * Emscripten exception handling |
16 | /// This pass lowers invokes and landingpads into library functions in JS glue |
17 | /// code. Invokes are lowered into function wrappers called invoke wrappers that |
18 | /// exist in JS side, which wraps the original function call with JS try-catch. |
19 | /// If an exception occurred, cxa_throw() function in JS side sets some |
20 | /// variables (see below) so we can check whether an exception occurred from |
21 | /// wasm code and handle it appropriately. |
22 | /// |
23 | /// * Emscripten setjmp-longjmp handling |
24 | /// This pass lowers setjmp to a reasonably-performant approach for emscripten. |
25 | /// The idea is that each block with a setjmp is broken up into two parts: the |
26 | /// part containing setjmp and the part right after the setjmp. The latter part |
27 | /// is either reached from the setjmp, or later from a longjmp. To handle the |
28 | /// longjmp, all calls that might longjmp are also called using invoke wrappers |
29 | /// and thus JS / try-catch. JS longjmp() function also sets some variables so |
30 | /// we can check / whether a longjmp occurred from wasm code. Each block with a |
31 | /// function call that might longjmp is also split up after the longjmp call. |
32 | /// After the longjmp call, we check whether a longjmp occurred, and if it did, |
33 | /// which setjmp it corresponds to, and jump to the right post-setjmp block. |
34 | /// We assume setjmp-longjmp handling always run after EH handling, which means |
35 | /// we don't expect any exception-related instructions when SjLj runs. |
36 | /// FIXME Currently this scheme does not support indirect call of setjmp, |
37 | /// because of the limitation of the scheme itself. fastcomp does not support it |
38 | /// either. |
39 | /// |
40 | /// In detail, this pass does following things: |
41 | /// |
42 | /// 1) Assumes the existence of global variables: __THREW__, __threwValue |
43 | /// __THREW__ and __threwValue are defined in compiler-rt in Emscripten. |
44 | /// These variables are used for both exceptions and setjmp/longjmps. |
45 | /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 |
46 | /// means nothing occurred, 1 means an exception occurred, and other numbers |
47 | /// mean a longjmp occurred. In the case of longjmp, __THREW__ variable |
48 | /// indicates the corresponding setjmp buffer the longjmp corresponds to. |
49 | /// __threwValue is 0 for exceptions, and the argument to longjmp in case of |
50 | /// longjmp. |
51 | /// |
52 | /// * Emscripten exception handling |
53 | /// |
54 | /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions |
55 | /// at link time. setThrew exists in Emscripten's compiler-rt: |
56 | /// |
57 | /// void setThrew(uintptr_t threw, int value) { |
58 | /// if (__THREW__ == 0) { |
59 | /// __THREW__ = threw; |
60 | /// __threwValue = value; |
61 | /// } |
62 | /// } |
63 | // |
64 | /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. |
65 | /// In exception handling, getTempRet0 indicates the type of an exception |
66 | /// caught, and in setjmp/longjmp, it means the second argument to longjmp |
67 | /// function. |
68 | /// |
69 | /// 3) Lower |
70 | /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad |
71 | /// into |
72 | /// __THREW__ = 0; |
73 | /// call @__invoke_SIG(func, arg1, arg2) |
74 | /// %__THREW__.val = __THREW__; |
75 | /// __THREW__ = 0; |
76 | /// if (%__THREW__.val == 1) |
77 | /// goto %lpad |
78 | /// else |
79 | /// goto %invoke.cont |
80 | /// SIG is a mangled string generated based on the LLVM IR-level function |
81 | /// signature. After LLVM IR types are lowered to the target wasm types, |
82 | /// the names for these wrappers will change based on wasm types as well, |
83 | /// as in invoke_vi (function takes an int and returns void). The bodies of |
84 | /// these wrappers will be generated in JS glue code, and inside those |
85 | /// wrappers we use JS try-catch to generate actual exception effects. It |
86 | /// also calls the original callee function. An example wrapper in JS code |
87 | /// would look like this: |
88 | /// function invoke_vi(index,a1) { |
89 | /// try { |
90 | /// Module["dynCall_vi"](index,a1); // This calls original callee |
91 | /// } catch(e) { |
92 | /// if (typeof e !== 'number' && e !== 'longjmp') throw e; |
93 | /// _setThrew(1, 0); // setThrew is called here |
94 | /// } |
95 | /// } |
96 | /// If an exception is thrown, __THREW__ will be set to true in a wrapper, |
97 | /// so we can jump to the right BB based on this value. |
98 | /// |
99 | /// 4) Lower |
100 | /// %val = landingpad catch c1 catch c2 catch c3 ... |
101 | /// ... use %val ... |
102 | /// into |
103 | /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) |
104 | /// %val = {%fmc, getTempRet0()} |
105 | /// ... use %val ... |
106 | /// Here N is a number calculated based on the number of clauses. |
107 | /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. |
108 | /// |
109 | /// 5) Lower |
110 | /// resume {%a, %b} |
111 | /// into |
112 | /// call @__resumeException(%a) |
113 | /// where __resumeException() is a function in JS glue code. |
114 | /// |
115 | /// 6) Lower |
116 | /// call @llvm.eh.typeid.for(type) (intrinsic) |
117 | /// into |
118 | /// call @llvm_eh_typeid_for(type) |
119 | /// llvm_eh_typeid_for function will be generated in JS glue code. |
120 | /// |
121 | /// * Emscripten setjmp / longjmp handling |
122 | /// |
123 | /// If there are calls to longjmp() |
124 | /// |
125 | /// 1) Lower |
126 | /// longjmp(env, val) |
127 | /// into |
128 | /// emscripten_longjmp(env, val) |
129 | /// |
130 | /// If there are calls to setjmp() |
131 | /// |
132 | /// 2) In the function entry that calls setjmp, initialize |
133 | /// functionInvocationId as follows: |
134 | /// |
135 | /// functionInvocationId = alloca(4) |
136 | /// |
137 | /// Note: the alloca size is not important as this pointer is |
138 | /// merely used for pointer comparisions. |
139 | /// |
140 | /// 3) Lower |
141 | /// setjmp(env) |
142 | /// into |
143 | /// __wasm_setjmp(env, label, functionInvocationId) |
144 | /// |
145 | /// __wasm_setjmp records the necessary info (the label and |
146 | /// functionInvocationId) to the "env". |
147 | /// A BB with setjmp is split into two after setjmp call in order to |
148 | /// make the post-setjmp BB the possible destination of longjmp BB. |
149 | /// |
150 | /// 4) Lower every call that might longjmp into |
151 | /// __THREW__ = 0; |
152 | /// call @__invoke_SIG(func, arg1, arg2) |
153 | /// %__THREW__.val = __THREW__; |
154 | /// __THREW__ = 0; |
155 | /// %__threwValue.val = __threwValue; |
156 | /// if (%__THREW__.val != 0 & %__threwValue.val != 0) { |
157 | /// %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
158 | /// if (%label == 0) |
159 | /// emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
160 | /// setTempRet0(%__threwValue.val); |
161 | /// } else { |
162 | /// %label = -1; |
163 | /// } |
164 | /// longjmp_result = getTempRet0(); |
165 | /// switch %label { |
166 | /// label 1: goto post-setjmp BB 1 |
167 | /// label 2: goto post-setjmp BB 2 |
168 | /// ... |
169 | /// default: goto splitted next BB |
170 | /// } |
171 | /// |
172 | /// __wasm_setjmp_test examines the jmp buf to see if it was for a matching |
173 | /// setjmp call. After calling an invoke wrapper, if a longjmp occurred, |
174 | /// __THREW__ will be the address of matching jmp_buf buffer and |
175 | /// __threwValue be the second argument to longjmp. |
176 | /// __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp |
177 | /// callsite. Label 0 means this longjmp buffer does not correspond to one |
178 | /// of the setjmp callsites in this function, so in this case we just chain |
179 | /// the longjmp to the caller. Label -1 means no longjmp occurred. |
180 | /// Otherwise we jump to the right post-setjmp BB based on the label. |
181 | /// |
182 | /// * Wasm setjmp / longjmp handling |
183 | /// This mode still uses some Emscripten library functions but not JavaScript's |
184 | /// try-catch mechanism. It instead uses Wasm exception handling intrinsics, |
185 | /// which will be lowered to exception handling instructions. |
186 | /// |
187 | /// If there are calls to longjmp() |
188 | /// |
189 | /// 1) Lower |
190 | /// longjmp(env, val) |
191 | /// into |
192 | /// __wasm_longjmp(env, val) |
193 | /// |
194 | /// If there are calls to setjmp() |
195 | /// |
196 | /// 2) and 3): The same as 2) and 3) in Emscripten SjLj. |
197 | /// (functionInvocationId initialization + setjmp callsite transformation) |
198 | /// |
199 | /// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value |
200 | /// thrown by __wasm_longjmp function. In the runtime library, we have an |
201 | /// equivalent of the following struct: |
202 | /// |
203 | /// struct __WasmLongjmpArgs { |
204 | /// void *env; |
205 | /// int val; |
206 | /// }; |
207 | /// |
208 | /// The thrown value here is a pointer to the struct. We use this struct to |
209 | /// transfer two values by throwing a single value. Wasm throw and catch |
210 | /// instructions are capable of throwing and catching multiple values, but |
211 | /// it also requires multivalue support that is currently not very reliable. |
212 | /// TODO Switch to throwing and catching two values without using the struct |
213 | /// |
214 | /// All longjmpable function calls will be converted to an invoke that will |
215 | /// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we |
216 | /// test the thrown values using __wasm_setjmp_test function as we do for |
217 | /// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to |
218 | /// transform every longjmpable callsite into a sequence of code including |
219 | /// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one |
220 | /// place, in this catchpad. |
221 | /// |
222 | /// After testing calling __wasm_setjmp_test(), if the longjmp does not |
223 | /// correspond to one of the setjmps within the current function, it rethrows |
224 | /// the longjmp by calling __wasm_longjmp(). If it corresponds to one of |
225 | /// setjmps in the function, we jump to the beginning of the function, which |
226 | /// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this |
227 | /// switch is added for every longjmpable callsite; in Wasm SjLj we do this |
228 | /// only once at the top of the function. (after functionInvocationId |
229 | /// initialization) |
230 | /// |
231 | /// The below is the pseudocode for what we have described |
232 | /// |
233 | /// entry: |
234 | /// Initialize functionInvocationId |
235 | /// |
236 | /// setjmp.dispatch: |
237 | /// switch %label { |
238 | /// label 1: goto post-setjmp BB 1 |
239 | /// label 2: goto post-setjmp BB 2 |
240 | /// ... |
241 | /// default: goto splitted next BB |
242 | /// } |
243 | /// ... |
244 | /// |
245 | /// bb: |
246 | /// invoke void @foo() ;; foo is a longjmpable function |
247 | /// to label %next unwind label %catch.dispatch.longjmp |
248 | /// ... |
249 | /// |
250 | /// catch.dispatch.longjmp: |
251 | /// %0 = catchswitch within none [label %catch.longjmp] unwind to caller |
252 | /// |
253 | /// catch.longjmp: |
254 | /// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs |
255 | /// %env = load 'env' field from __WasmLongjmpArgs |
256 | /// %val = load 'val' field from __WasmLongjmpArgs |
257 | /// %label = __wasm_setjmp_test(%env, functionInvocationId); |
258 | /// if (%label == 0) |
259 | /// __wasm_longjmp(%env, %val) |
260 | /// catchret to %setjmp.dispatch |
261 | /// |
262 | ///===----------------------------------------------------------------------===// |
263 | |
264 | #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" |
265 | #include "WebAssembly.h" |
266 | #include "WebAssemblyTargetMachine.h" |
267 | #include "llvm/ADT/StringExtras.h" |
268 | #include "llvm/CodeGen/TargetPassConfig.h" |
269 | #include "llvm/CodeGen/WasmEHFuncInfo.h" |
270 | #include "llvm/IR/DebugInfoMetadata.h" |
271 | #include "llvm/IR/Dominators.h" |
272 | #include "llvm/IR/IRBuilder.h" |
273 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
274 | #include "llvm/IR/Module.h" |
275 | #include "llvm/Support/CommandLine.h" |
276 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
277 | #include "llvm/Transforms/Utils/Local.h" |
278 | #include "llvm/Transforms/Utils/SSAUpdater.h" |
279 | #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" |
280 | #include <set> |
281 | |
282 | using namespace llvm; |
283 | |
284 | #define DEBUG_TYPE "wasm-lower-em-ehsjlj" |
285 | |
286 | static cl::list<std::string> |
287 | EHAllowlist("emscripten-cxx-exceptions-allowed" , |
288 | cl::desc("The list of function names in which Emscripten-style " |
289 | "exception handling is enabled (see emscripten " |
290 | "EMSCRIPTEN_CATCHING_ALLOWED options)" ), |
291 | cl::CommaSeparated); |
292 | |
293 | namespace { |
294 | class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { |
295 | bool EnableEmEH; // Enable Emscripten exception handling |
296 | bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling |
297 | bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling |
298 | bool DoSjLj; // Whether we actually perform setjmp/longjmp handling |
299 | |
300 | GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten) |
301 | GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten) |
302 | Function *GetTempRet0F = nullptr; // getTempRet0() (Emscripten) |
303 | Function *SetTempRet0F = nullptr; // setTempRet0() (Emscripten) |
304 | Function *ResumeF = nullptr; // __resumeException() (Emscripten) |
305 | Function *EHTypeIDF = nullptr; // llvm.eh.typeid.for() (intrinsic) |
306 | Function *EmLongjmpF = nullptr; // emscripten_longjmp() (Emscripten) |
307 | Function *WasmSetjmpF = nullptr; // __wasm_setjmp() (Emscripten) |
308 | Function *WasmSetjmpTestF = nullptr; // __wasm_setjmp_test() (Emscripten) |
309 | Function *WasmLongjmpF = nullptr; // __wasm_longjmp() (Emscripten) |
310 | Function *CatchF = nullptr; // wasm.catch() (intrinsic) |
311 | |
312 | // type of 'struct __WasmLongjmpArgs' defined in emscripten |
313 | Type *LongjmpArgsTy = nullptr; |
314 | |
315 | // __cxa_find_matching_catch_N functions. |
316 | // Indexed by the number of clauses in an original landingpad instruction. |
317 | DenseMap<int, Function *> FindMatchingCatches; |
318 | // Map of <function signature string, invoke_ wrappers> |
319 | StringMap<Function *> InvokeWrappers; |
320 | // Set of allowed function names for exception handling |
321 | std::set<std::string, std::less<>> EHAllowlistSet; |
322 | // Functions that contains calls to setjmp |
323 | SmallPtrSet<Function *, 8> SetjmpUsers; |
324 | |
325 | StringRef getPassName() const override { |
326 | return "WebAssembly Lower Emscripten Exceptions" ; |
327 | } |
328 | |
329 | using InstVector = SmallVectorImpl<Instruction *>; |
330 | bool runEHOnFunction(Function &F); |
331 | bool runSjLjOnFunction(Function &F); |
332 | void handleLongjmpableCallsForEmscriptenSjLj( |
333 | Function &F, Instruction *FunctionInvocationId, |
334 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs); |
335 | void |
336 | handleLongjmpableCallsForWasmSjLj(Function &F, |
337 | Instruction *FunctionInvocationId, |
338 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs); |
339 | Function *getFindMatchingCatch(Module &M, unsigned NumClauses); |
340 | |
341 | Value *wrapInvoke(CallBase *CI); |
342 | void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw, |
343 | Value *FunctionInvocationId, Value *&Label, |
344 | Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, |
345 | PHINode *&CallEmLongjmpBBThrewPHI, |
346 | PHINode *&CallEmLongjmpBBThrewValuePHI, |
347 | BasicBlock *&EndBB); |
348 | Function *getInvokeWrapper(CallBase *CI); |
349 | |
350 | bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); } |
351 | bool supportsException(const Function *F) const { |
352 | return EnableEmEH && |
353 | (areAllExceptionsAllowed() || EHAllowlistSet.count(x: F->getName())); |
354 | } |
355 | void replaceLongjmpWith(Function *LongjmpF, Function *NewF); |
356 | |
357 | void rebuildSSA(Function &F); |
358 | |
359 | public: |
360 | static char ID; |
361 | |
362 | WebAssemblyLowerEmscriptenEHSjLj() |
363 | : ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH), |
364 | EnableEmSjLj(WebAssembly::WasmEnableEmSjLj), |
365 | EnableWasmSjLj(WebAssembly::WasmEnableSjLj) { |
366 | assert(!(EnableEmSjLj && EnableWasmSjLj) && |
367 | "Two SjLj modes cannot be turned on at the same time" ); |
368 | assert(!(EnableEmEH && EnableWasmSjLj) && |
369 | "Wasm SjLj should be only used with Wasm EH" ); |
370 | EHAllowlistSet.insert(first: EHAllowlist.begin(), last: EHAllowlist.end()); |
371 | } |
372 | bool runOnModule(Module &M) override; |
373 | |
374 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
375 | AU.addRequired<DominatorTreeWrapperPass>(); |
376 | } |
377 | }; |
378 | } // End anonymous namespace |
379 | |
380 | char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; |
381 | INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, |
382 | "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp" , |
383 | false, false) |
384 | |
385 | ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() { |
386 | return new WebAssemblyLowerEmscriptenEHSjLj(); |
387 | } |
388 | |
389 | static bool canThrow(const Value *V) { |
390 | if (const auto *F = dyn_cast<const Function>(Val: V)) { |
391 | // Intrinsics cannot throw |
392 | if (F->isIntrinsic()) |
393 | return false; |
394 | StringRef Name = F->getName(); |
395 | // leave setjmp and longjmp (mostly) alone, we process them properly later |
396 | if (Name == "setjmp" || Name == "longjmp" || Name == "emscripten_longjmp" ) |
397 | return false; |
398 | return !F->doesNotThrow(); |
399 | } |
400 | // not a function, so an indirect call - can throw, we can't tell |
401 | return true; |
402 | } |
403 | |
404 | // Get a thread-local global variable with the given name. If it doesn't exist |
405 | // declare it, which will generate an import and assume that it will exist at |
406 | // link time. |
407 | static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, |
408 | WebAssemblyTargetMachine &TM, |
409 | const char *Name) { |
410 | auto *GV = dyn_cast<GlobalVariable>(Val: M.getOrInsertGlobal(Name, Ty)); |
411 | if (!GV) |
412 | report_fatal_error(reason: Twine("unable to create global: " ) + Name); |
413 | |
414 | // Variables created by this function are thread local. If the target does not |
415 | // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it |
416 | // to non-thread-local ones, in which case we don't allow this object to be |
417 | // linked with other objects using shared memory. |
418 | GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); |
419 | return GV; |
420 | } |
421 | |
422 | // Simple function name mangler. |
423 | // This function simply takes LLVM's string representation of parameter types |
424 | // and concatenate them with '_'. There are non-alphanumeric characters but llc |
425 | // is ok with it, and we need to postprocess these names after the lowering |
426 | // phase anyway. |
427 | static std::string getSignature(FunctionType *FTy) { |
428 | std::string Sig; |
429 | raw_string_ostream OS(Sig); |
430 | OS << *FTy->getReturnType(); |
431 | for (Type *ParamTy : FTy->params()) |
432 | OS << "_" << *ParamTy; |
433 | if (FTy->isVarArg()) |
434 | OS << "_..." ; |
435 | Sig = OS.str(); |
436 | erase_if(C&: Sig, P: isSpace); |
437 | // When s2wasm parses .s file, a comma means the end of an argument. So a |
438 | // mangled function name can contain any character but a comma. |
439 | llvm::replace(Range&: Sig, OldValue: ',', NewValue: '.'); |
440 | return Sig; |
441 | } |
442 | |
443 | static Function *getFunction(FunctionType *Ty, const Twine &Name, Module *M) { |
444 | return Function::Create(Ty, Linkage: GlobalValue::ExternalLinkage, N: Name, M); |
445 | } |
446 | |
447 | static void markAsImported(Function *F) { |
448 | // Tell the linker that this function is expected to be imported from the |
449 | // 'env' module. This is necessary for functions that do not have fixed names |
450 | // (e.g. __import_xyz). These names cannot be provided by any kind of shared |
451 | // or static library as instead we mark them explictly as imported. |
452 | if (!F->hasFnAttribute(Kind: "wasm-import-module" )) { |
453 | llvm::AttrBuilder B(F->getParent()->getContext()); |
454 | B.addAttribute(A: "wasm-import-module" , V: "env" ); |
455 | F->addFnAttrs(Attrs: B); |
456 | } |
457 | if (!F->hasFnAttribute(Kind: "wasm-import-name" )) { |
458 | llvm::AttrBuilder B(F->getParent()->getContext()); |
459 | B.addAttribute(A: "wasm-import-name" , V: F->getName()); |
460 | F->addFnAttrs(Attrs: B); |
461 | } |
462 | } |
463 | |
464 | // Returns an integer type for the target architecture's address space. |
465 | // i32 for wasm32 and i64 for wasm64. |
466 | static Type *getAddrIntType(Module *M) { |
467 | IRBuilder<> IRB(M->getContext()); |
468 | return IRB.getIntNTy(N: M->getDataLayout().getPointerSizeInBits()); |
469 | } |
470 | |
471 | // Returns an integer pointer type for the target architecture's address space. |
472 | // i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr |
473 | // in address space zero. |
474 | static Type *getAddrPtrType(Module *M) { |
475 | return PointerType::getUnqual(C&: M->getContext()); |
476 | } |
477 | |
478 | // Returns an integer whose type is the integer type for the target's address |
479 | // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the |
480 | // integer. |
481 | static Value *getAddrSizeInt(Module *M, uint64_t C) { |
482 | IRBuilder<> IRB(M->getContext()); |
483 | return IRB.getIntN(N: M->getDataLayout().getPointerSizeInBits(), C); |
484 | } |
485 | |
486 | // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. |
487 | // This is because a landingpad instruction contains two more arguments, a |
488 | // personality function and a cleanup bit, and __cxa_find_matching_catch_N |
489 | // functions are named after the number of arguments in the original landingpad |
490 | // instruction. |
491 | Function * |
492 | WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, |
493 | unsigned NumClauses) { |
494 | auto [It, Inserted] = FindMatchingCatches.try_emplace(Key: NumClauses); |
495 | if (!Inserted) |
496 | return It->second; |
497 | PointerType *Int8PtrTy = PointerType::getUnqual(C&: M.getContext()); |
498 | SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy); |
499 | FunctionType *FTy = FunctionType::get(Result: Int8PtrTy, Params: Args, isVarArg: false); |
500 | Function *F = getFunction( |
501 | Ty: FTy, Name: "__cxa_find_matching_catch_" + Twine(NumClauses + 2), M: &M); |
502 | markAsImported(F); |
503 | It->second = F; |
504 | return F; |
505 | } |
506 | |
507 | // Generate invoke wrapper seqence with preamble and postamble |
508 | // Preamble: |
509 | // __THREW__ = 0; |
510 | // Postamble: |
511 | // %__THREW__.val = __THREW__; __THREW__ = 0; |
512 | // Returns %__THREW__.val, which indicates whether an exception is thrown (or |
513 | // whether longjmp occurred), for future use. |
514 | Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { |
515 | Module *M = CI->getModule(); |
516 | LLVMContext &C = M->getContext(); |
517 | |
518 | IRBuilder<> IRB(C); |
519 | IRB.SetInsertPoint(CI); |
520 | |
521 | // Pre-invoke |
522 | // __THREW__ = 0; |
523 | IRB.CreateStore(Val: getAddrSizeInt(M, C: 0), Ptr: ThrewGV); |
524 | |
525 | // Invoke function wrapper in JavaScript |
526 | SmallVector<Value *, 16> Args; |
527 | // Put the pointer to the callee as first argument, so it can be called |
528 | // within the invoke wrapper later |
529 | Args.push_back(Elt: CI->getCalledOperand()); |
530 | Args.append(in_start: CI->arg_begin(), in_end: CI->arg_end()); |
531 | CallInst *NewCall = IRB.CreateCall(Callee: getInvokeWrapper(CI), Args); |
532 | NewCall->takeName(V: CI); |
533 | NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); |
534 | NewCall->setDebugLoc(CI->getDebugLoc()); |
535 | |
536 | // Because we added the pointer to the callee as first argument, all |
537 | // argument attribute indices have to be incremented by one. |
538 | SmallVector<AttributeSet, 8> ArgAttributes; |
539 | const AttributeList &InvokeAL = CI->getAttributes(); |
540 | |
541 | // No attributes for the callee pointer. |
542 | ArgAttributes.push_back(Elt: AttributeSet()); |
543 | // Copy the argument attributes from the original |
544 | for (unsigned I = 0, E = CI->arg_size(); I < E; ++I) |
545 | ArgAttributes.push_back(Elt: InvokeAL.getParamAttrs(ArgNo: I)); |
546 | |
547 | AttrBuilder FnAttrs(CI->getContext(), InvokeAL.getFnAttrs()); |
548 | if (auto Args = FnAttrs.getAllocSizeArgs()) { |
549 | // The allocsize attribute (if any) referes to parameters by index and needs |
550 | // to be adjusted. |
551 | auto [SizeArg, NEltArg] = *Args; |
552 | SizeArg += 1; |
553 | if (NEltArg) |
554 | NEltArg = *NEltArg + 1; |
555 | FnAttrs.addAllocSizeAttr(ElemSizeArg: SizeArg, NumElemsArg: NEltArg); |
556 | } |
557 | // In case the callee has 'noreturn' attribute, We need to remove it, because |
558 | // we expect invoke wrappers to return. |
559 | FnAttrs.removeAttribute(Val: Attribute::NoReturn); |
560 | |
561 | // Reconstruct the AttributesList based on the vector we constructed. |
562 | AttributeList NewCallAL = AttributeList::get( |
563 | C, FnAttrs: AttributeSet::get(C, B: FnAttrs), RetAttrs: InvokeAL.getRetAttrs(), ArgAttrs: ArgAttributes); |
564 | NewCall->setAttributes(NewCallAL); |
565 | |
566 | CI->replaceAllUsesWith(V: NewCall); |
567 | |
568 | // Post-invoke |
569 | // %__THREW__.val = __THREW__; __THREW__ = 0; |
570 | Value *Threw = |
571 | IRB.CreateLoad(Ty: getAddrIntType(M), Ptr: ThrewGV, Name: ThrewGV->getName() + ".val" ); |
572 | IRB.CreateStore(Val: getAddrSizeInt(M, C: 0), Ptr: ThrewGV); |
573 | return Threw; |
574 | } |
575 | |
576 | // Get matching invoke wrapper based on callee signature |
577 | Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { |
578 | Module *M = CI->getModule(); |
579 | SmallVector<Type *, 16> ArgTys; |
580 | FunctionType *CalleeFTy = CI->getFunctionType(); |
581 | |
582 | std::string Sig = getSignature(FTy: CalleeFTy); |
583 | auto It = InvokeWrappers.find(Key: Sig); |
584 | if (It != InvokeWrappers.end()) |
585 | return It->second; |
586 | |
587 | // Put the pointer to the callee as first argument |
588 | ArgTys.push_back(Elt: PointerType::getUnqual(C&: CI->getContext())); |
589 | // Add argument types |
590 | ArgTys.append(in_start: CalleeFTy->param_begin(), in_end: CalleeFTy->param_end()); |
591 | |
592 | FunctionType *FTy = FunctionType::get(Result: CalleeFTy->getReturnType(), Params: ArgTys, |
593 | isVarArg: CalleeFTy->isVarArg()); |
594 | Function *F = getFunction(Ty: FTy, Name: "__invoke_" + Sig, M); |
595 | markAsImported(F); |
596 | InvokeWrappers[Sig] = F; |
597 | return F; |
598 | } |
599 | |
600 | static bool canLongjmp(const Value *Callee) { |
601 | if (auto *CalleeF = dyn_cast<Function>(Val: Callee)) |
602 | if (CalleeF->isIntrinsic()) |
603 | return false; |
604 | |
605 | // Attempting to transform inline assembly will result in something like: |
606 | // call void @__invoke_void(void ()* asm ...) |
607 | // which is invalid because inline assembly blocks do not have addresses |
608 | // and can't be passed by pointer. The result is a crash with illegal IR. |
609 | if (isa<InlineAsm>(Val: Callee)) |
610 | return false; |
611 | StringRef CalleeName = Callee->getName(); |
612 | |
613 | // TODO Include more functions or consider checking with mangled prefixes |
614 | |
615 | // The reason we include malloc/free here is to exclude the malloc/free |
616 | // calls generated in setjmp prep / cleanup routines. |
617 | if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free" ) |
618 | return false; |
619 | |
620 | // There are functions in Emscripten's JS glue code or compiler-rt |
621 | if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || |
622 | CalleeName == "__wasm_setjmp" || CalleeName == "__wasm_setjmp_test" || |
623 | CalleeName == "getTempRet0" || CalleeName == "setTempRet0" ) |
624 | return false; |
625 | |
626 | // __cxa_find_matching_catch_N functions cannot longjmp |
627 | if (Callee->getName().starts_with(Prefix: "__cxa_find_matching_catch_" )) |
628 | return false; |
629 | |
630 | // Exception-catching related functions |
631 | // |
632 | // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though |
633 | // it surely cannot longjmp, in order to maintain the unwind relationship from |
634 | // all existing catchpads (and calls within them) to catch.dispatch.longjmp. |
635 | // |
636 | // In Wasm EH + Wasm SjLj, we |
637 | // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to |
638 | // catch.dispatch.longjmp instead |
639 | // 2. Convert all longjmpable calls to invokes that unwind to |
640 | // catch.dispatch.longjmp |
641 | // But catchswitch BBs are removed in isel, so if an EH catchswitch (generated |
642 | // from an exception)'s catchpad does not contain any calls that are converted |
643 | // into invokes unwinding to catch.dispatch.longjmp, this unwind relationship |
644 | // (EH catchswitch BB -> catch.dispatch.longjmp BB) is lost and |
645 | // catch.dispatch.longjmp BB can be placed before the EH catchswitch BB in |
646 | // CFGSort. |
647 | // int ret = setjmp(buf); |
648 | // try { |
649 | // foo(); // longjmps |
650 | // } catch (...) { |
651 | // } |
652 | // Then in this code, if 'foo' longjmps, it first unwinds to 'catch (...)' |
653 | // catchswitch, and is not caught by that catchswitch because it is a longjmp, |
654 | // then it should next unwind to catch.dispatch.longjmp BB. But if this 'catch |
655 | // (...)' catchswitch -> catch.dispatch.longjmp unwind relationship is lost, |
656 | // it will not unwind to catch.dispatch.longjmp, producing an incorrect |
657 | // result. |
658 | // |
659 | // Every catchpad generated by Wasm C++ contains __cxa_end_catch, so we |
660 | // intentionally treat it as longjmpable to work around this problem. This is |
661 | // a hacky fix but an easy one. |
662 | if (CalleeName == "__cxa_end_catch" ) |
663 | return WebAssembly::WasmEnableSjLj; |
664 | if (CalleeName == "__cxa_begin_catch" || |
665 | CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || |
666 | CalleeName == "__clang_call_terminate" ) |
667 | return false; |
668 | |
669 | // std::terminate, which is generated when another exception occurs while |
670 | // handling an exception, cannot longjmp. |
671 | if (CalleeName == "_ZSt9terminatev" ) |
672 | return false; |
673 | |
674 | // Otherwise we don't know |
675 | return true; |
676 | } |
677 | |
678 | static bool isEmAsmCall(const Value *Callee) { |
679 | StringRef CalleeName = Callee->getName(); |
680 | // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>. |
681 | return CalleeName == "emscripten_asm_const_int" || |
682 | CalleeName == "emscripten_asm_const_double" || |
683 | CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || |
684 | CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || |
685 | CalleeName == "emscripten_asm_const_async_on_main_thread" ; |
686 | } |
687 | |
688 | // Generate __wasm_setjmp_test function call seqence with preamble and |
689 | // postamble. The code this generates is equivalent to the following |
690 | // JavaScript code: |
691 | // %__threwValue.val = __threwValue; |
692 | // if (%__THREW__.val != 0 & %__threwValue.val != 0) { |
693 | // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
694 | // if (%label == 0) |
695 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
696 | // setTempRet0(%__threwValue.val); |
697 | // } else { |
698 | // %label = -1; |
699 | // } |
700 | // %longjmp_result = getTempRet0(); |
701 | // |
702 | // As output parameters. returns %label, %longjmp_result, and the BB the last |
703 | // instruction (%longjmp_result = ...) is in. |
704 | void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( |
705 | BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId, |
706 | Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, |
707 | PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI, |
708 | BasicBlock *&EndBB) { |
709 | Function *F = BB->getParent(); |
710 | Module *M = F->getParent(); |
711 | LLVMContext &C = M->getContext(); |
712 | IRBuilder<> IRB(C); |
713 | IRB.SetCurrentDebugLocation(DL); |
714 | |
715 | // if (%__THREW__.val != 0 & %__threwValue.val != 0) |
716 | IRB.SetInsertPoint(BB); |
717 | BasicBlock *ThenBB1 = BasicBlock::Create(Context&: C, Name: "if.then1" , Parent: F); |
718 | BasicBlock *ElseBB1 = BasicBlock::Create(Context&: C, Name: "if.else1" , Parent: F); |
719 | BasicBlock *EndBB1 = BasicBlock::Create(Context&: C, Name: "if.end" , Parent: F); |
720 | Value *ThrewCmp = IRB.CreateICmpNE(LHS: Threw, RHS: getAddrSizeInt(M, C: 0)); |
721 | Value *ThrewValue = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ThrewValueGV, |
722 | Name: ThrewValueGV->getName() + ".val" ); |
723 | Value *ThrewValueCmp = IRB.CreateICmpNE(LHS: ThrewValue, RHS: IRB.getInt32(C: 0)); |
724 | Value *Cmp1 = IRB.CreateAnd(LHS: ThrewCmp, RHS: ThrewValueCmp, Name: "cmp1" ); |
725 | IRB.CreateCondBr(Cond: Cmp1, True: ThenBB1, False: ElseBB1); |
726 | |
727 | // Generate call.em.longjmp BB once and share it within the function |
728 | if (!CallEmLongjmpBB) { |
729 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
730 | CallEmLongjmpBB = BasicBlock::Create(Context&: C, Name: "call.em.longjmp" , Parent: F); |
731 | IRB.SetInsertPoint(CallEmLongjmpBB); |
732 | CallEmLongjmpBBThrewPHI = IRB.CreatePHI(Ty: getAddrIntType(M), NumReservedValues: 4, Name: "threw.phi" ); |
733 | CallEmLongjmpBBThrewValuePHI = |
734 | IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 4, Name: "threwvalue.phi" ); |
735 | CallEmLongjmpBBThrewPHI->addIncoming(V: Threw, BB: ThenBB1); |
736 | CallEmLongjmpBBThrewValuePHI->addIncoming(V: ThrewValue, BB: ThenBB1); |
737 | IRB.CreateCall(Callee: EmLongjmpF, |
738 | Args: {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI}); |
739 | IRB.CreateUnreachable(); |
740 | } else { |
741 | CallEmLongjmpBBThrewPHI->addIncoming(V: Threw, BB: ThenBB1); |
742 | CallEmLongjmpBBThrewValuePHI->addIncoming(V: ThrewValue, BB: ThenBB1); |
743 | } |
744 | |
745 | // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
746 | // if (%label == 0) |
747 | IRB.SetInsertPoint(ThenBB1); |
748 | BasicBlock *EndBB2 = BasicBlock::Create(Context&: C, Name: "if.end2" , Parent: F); |
749 | Value *ThrewPtr = |
750 | IRB.CreateIntToPtr(V: Threw, DestTy: getAddrPtrType(M), Name: Threw->getName() + ".p" ); |
751 | Value *ThenLabel = IRB.CreateCall(Callee: WasmSetjmpTestF, |
752 | Args: {ThrewPtr, FunctionInvocationId}, Name: "label" ); |
753 | Value *Cmp2 = IRB.CreateICmpEQ(LHS: ThenLabel, RHS: IRB.getInt32(C: 0)); |
754 | IRB.CreateCondBr(Cond: Cmp2, True: CallEmLongjmpBB, False: EndBB2); |
755 | |
756 | // setTempRet0(%__threwValue.val); |
757 | IRB.SetInsertPoint(EndBB2); |
758 | IRB.CreateCall(Callee: SetTempRet0F, Args: ThrewValue); |
759 | IRB.CreateBr(Dest: EndBB1); |
760 | |
761 | IRB.SetInsertPoint(ElseBB1); |
762 | IRB.CreateBr(Dest: EndBB1); |
763 | |
764 | // longjmp_result = getTempRet0(); |
765 | IRB.SetInsertPoint(EndBB1); |
766 | PHINode *LabelPHI = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "label" ); |
767 | LabelPHI->addIncoming(V: ThenLabel, BB: EndBB2); |
768 | |
769 | LabelPHI->addIncoming(V: IRB.getInt32(C: -1), BB: ElseBB1); |
770 | |
771 | // Output parameter assignment |
772 | Label = LabelPHI; |
773 | EndBB = EndBB1; |
774 | LongjmpResult = IRB.CreateCall(Callee: GetTempRet0F, Args: {}, Name: "longjmp_result" ); |
775 | } |
776 | |
777 | void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { |
778 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); |
779 | DT.recalculate(Func&: F); // CFG has been changed |
780 | |
781 | SSAUpdaterBulk SSA; |
782 | for (BasicBlock &BB : F) { |
783 | for (Instruction &I : BB) { |
784 | if (I.getType()->isVoidTy()) |
785 | continue; |
786 | unsigned VarID = SSA.AddVariable(Name: I.getName(), Ty: I.getType()); |
787 | // If a value is defined by an invoke instruction, it is only available in |
788 | // its normal destination and not in its unwind destination. |
789 | if (auto *II = dyn_cast<InvokeInst>(Val: &I)) |
790 | SSA.AddAvailableValue(Var: VarID, BB: II->getNormalDest(), V: II); |
791 | else |
792 | SSA.AddAvailableValue(Var: VarID, BB: &BB, V: &I); |
793 | for (auto &U : I.uses()) { |
794 | auto *User = cast<Instruction>(Val: U.getUser()); |
795 | if (auto *UserPN = dyn_cast<PHINode>(Val: User)) |
796 | if (UserPN->getIncomingBlock(U) == &BB) |
797 | continue; |
798 | if (DT.dominates(Def: &I, User)) |
799 | continue; |
800 | SSA.AddUse(Var: VarID, U: &U); |
801 | } |
802 | } |
803 | } |
804 | SSA.RewriteAllUses(DT: &DT); |
805 | } |
806 | |
807 | // Replace uses of longjmp with a new longjmp function in Emscripten library. |
808 | // In Emscripten SjLj, the new function is |
809 | // void emscripten_longjmp(uintptr_t, i32) |
810 | // In Wasm SjLj, the new function is |
811 | // void __wasm_longjmp(i8*, i32) |
812 | // Because the original libc longjmp function takes (jmp_buf*, i32), we need a |
813 | // ptrtoint/bitcast instruction here to make the type match. jmp_buf* will |
814 | // eventually be lowered to i32/i64 in the wasm backend. |
815 | void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF, |
816 | Function *NewF) { |
817 | assert(NewF == EmLongjmpF || NewF == WasmLongjmpF); |
818 | Module *M = LongjmpF->getParent(); |
819 | SmallVector<CallInst *, 8> ToErase; |
820 | LLVMContext &C = LongjmpF->getParent()->getContext(); |
821 | IRBuilder<> IRB(C); |
822 | |
823 | // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and |
824 | // cast its first argument (jmp_buf*) appropriately |
825 | for (User *U : LongjmpF->users()) { |
826 | auto *CI = dyn_cast<CallInst>(Val: U); |
827 | if (CI && CI->getCalledFunction() == LongjmpF) { |
828 | IRB.SetInsertPoint(CI); |
829 | Value *Env = nullptr; |
830 | if (NewF == EmLongjmpF) |
831 | Env = |
832 | IRB.CreatePtrToInt(V: CI->getArgOperand(i: 0), DestTy: getAddrIntType(M), Name: "env" ); |
833 | else // WasmLongjmpF |
834 | Env = IRB.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: IRB.getPtrTy(), Name: "env" ); |
835 | IRB.CreateCall(Callee: NewF, Args: {Env, CI->getArgOperand(i: 1)}); |
836 | ToErase.push_back(Elt: CI); |
837 | } |
838 | } |
839 | for (auto *I : ToErase) |
840 | I->eraseFromParent(); |
841 | |
842 | // If we have any remaining uses of longjmp's function pointer, replace it |
843 | // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp. |
844 | if (!LongjmpF->uses().empty()) { |
845 | Value *NewLongjmp = |
846 | IRB.CreateBitCast(V: NewF, DestTy: LongjmpF->getType(), Name: "longjmp.cast" ); |
847 | LongjmpF->replaceAllUsesWith(V: NewLongjmp); |
848 | } |
849 | } |
850 | |
851 | static bool containsLongjmpableCalls(const Function *F) { |
852 | for (const auto &BB : *F) |
853 | for (const auto &I : BB) |
854 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) |
855 | if (canLongjmp(Callee: CB->getCalledOperand())) |
856 | return true; |
857 | return false; |
858 | } |
859 | |
860 | // When a function contains a setjmp call but not other calls that can longjmp, |
861 | // we don't do setjmp transformation for that setjmp. But we need to convert the |
862 | // setjmp calls into "i32 0" so they don't cause link time errors. setjmp always |
863 | // returns 0 when called directly. |
864 | static void nullifySetjmp(Function *F) { |
865 | Module &M = *F->getParent(); |
866 | IRBuilder<> IRB(M.getContext()); |
867 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
868 | SmallVector<Instruction *, 1> ToErase; |
869 | |
870 | for (User *U : make_early_inc_range(Range: SetjmpF->users())) { |
871 | auto *CB = cast<CallBase>(Val: U); |
872 | BasicBlock *BB = CB->getParent(); |
873 | if (BB->getParent() != F) // in other function |
874 | continue; |
875 | CallInst *CI = nullptr; |
876 | // setjmp cannot throw. So if it is an invoke, lower it to a call |
877 | if (auto *II = dyn_cast<InvokeInst>(Val: CB)) |
878 | CI = llvm::changeToCall(II); |
879 | else |
880 | CI = cast<CallInst>(Val: CB); |
881 | ToErase.push_back(Elt: CI); |
882 | CI->replaceAllUsesWith(V: IRB.getInt32(C: 0)); |
883 | } |
884 | for (auto *I : ToErase) |
885 | I->eraseFromParent(); |
886 | } |
887 | |
888 | bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { |
889 | LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n" ); |
890 | |
891 | LLVMContext &C = M.getContext(); |
892 | IRBuilder<> IRB(C); |
893 | |
894 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
895 | Function *LongjmpF = M.getFunction(Name: "longjmp" ); |
896 | |
897 | // In some platforms _setjmp and _longjmp are used instead. Change these to |
898 | // use setjmp/longjmp instead, because we later detect these functions by |
899 | // their names. |
900 | Function *SetjmpF2 = M.getFunction(Name: "_setjmp" ); |
901 | Function *LongjmpF2 = M.getFunction(Name: "_longjmp" ); |
902 | if (SetjmpF2) { |
903 | if (SetjmpF) { |
904 | if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType()) |
905 | report_fatal_error(reason: "setjmp and _setjmp have different function types" ); |
906 | } else { |
907 | SetjmpF = Function::Create(Ty: SetjmpF2->getFunctionType(), |
908 | Linkage: GlobalValue::ExternalLinkage, N: "setjmp" , M); |
909 | } |
910 | SetjmpF2->replaceAllUsesWith(V: SetjmpF); |
911 | } |
912 | if (LongjmpF2) { |
913 | if (LongjmpF) { |
914 | if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType()) |
915 | report_fatal_error( |
916 | reason: "longjmp and _longjmp have different function types" ); |
917 | } else { |
918 | LongjmpF = Function::Create(Ty: LongjmpF2->getFunctionType(), |
919 | Linkage: GlobalValue::ExternalLinkage, N: "setjmp" , M); |
920 | } |
921 | LongjmpF2->replaceAllUsesWith(V: LongjmpF); |
922 | } |
923 | |
924 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); |
925 | assert(TPC && "Expected a TargetPassConfig" ); |
926 | auto &TM = TPC->getTM<WebAssemblyTargetMachine>(); |
927 | |
928 | // Declare (or get) global variables __THREW__, __threwValue, and |
929 | // getTempRet0/setTempRet0 function which are used in common for both |
930 | // exception handling and setjmp/longjmp handling |
931 | ThrewGV = getGlobalVariable(M, Ty: getAddrIntType(M: &M), TM, Name: "__THREW__" ); |
932 | ThrewValueGV = getGlobalVariable(M, Ty: IRB.getInt32Ty(), TM, Name: "__threwValue" ); |
933 | GetTempRet0F = getFunction(Ty: FunctionType::get(Result: IRB.getInt32Ty(), isVarArg: false), |
934 | Name: "getTempRet0" , M: &M); |
935 | SetTempRet0F = |
936 | getFunction(Ty: FunctionType::get(Result: IRB.getVoidTy(), Params: IRB.getInt32Ty(), isVarArg: false), |
937 | Name: "setTempRet0" , M: &M); |
938 | GetTempRet0F->setDoesNotThrow(); |
939 | SetTempRet0F->setDoesNotThrow(); |
940 | |
941 | bool Changed = false; |
942 | |
943 | // Function registration for exception handling |
944 | if (EnableEmEH) { |
945 | // Register __resumeException function |
946 | FunctionType *ResumeFTy = |
947 | FunctionType::get(Result: IRB.getVoidTy(), Params: IRB.getPtrTy(), isVarArg: false); |
948 | ResumeF = getFunction(Ty: ResumeFTy, Name: "__resumeException" , M: &M); |
949 | ResumeF->addFnAttr(Kind: Attribute::NoReturn); |
950 | |
951 | // Register llvm_eh_typeid_for function |
952 | FunctionType *EHTypeIDTy = |
953 | FunctionType::get(Result: IRB.getInt32Ty(), Params: IRB.getPtrTy(), isVarArg: false); |
954 | EHTypeIDF = getFunction(Ty: EHTypeIDTy, Name: "llvm_eh_typeid_for" , M: &M); |
955 | } |
956 | |
957 | // Functions that contains calls to setjmp but don't have other longjmpable |
958 | // calls within them. |
959 | SmallPtrSet<Function *, 4> SetjmpUsersToNullify; |
960 | |
961 | if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) { |
962 | // Precompute setjmp users |
963 | for (User *U : SetjmpF->users()) { |
964 | if (auto *CB = dyn_cast<CallBase>(Val: U)) { |
965 | auto *UserF = CB->getFunction(); |
966 | // If a function that calls setjmp does not contain any other calls that |
967 | // can longjmp, we don't need to do any transformation on that function, |
968 | // so can ignore it |
969 | if (containsLongjmpableCalls(F: UserF)) |
970 | SetjmpUsers.insert(Ptr: UserF); |
971 | else |
972 | SetjmpUsersToNullify.insert(Ptr: UserF); |
973 | } else { |
974 | std::string S; |
975 | raw_string_ostream SS(S); |
976 | SS << *U; |
977 | report_fatal_error(reason: Twine("Indirect use of setjmp is not supported: " ) + |
978 | SS.str()); |
979 | } |
980 | } |
981 | } |
982 | |
983 | bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty(); |
984 | bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); |
985 | DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed); |
986 | |
987 | // Function registration and data pre-gathering for setjmp/longjmp handling |
988 | if (DoSjLj) { |
989 | assert(EnableEmSjLj || EnableWasmSjLj); |
990 | if (EnableEmSjLj) { |
991 | // Register emscripten_longjmp function |
992 | FunctionType *FTy = FunctionType::get( |
993 | Result: IRB.getVoidTy(), Params: {getAddrIntType(M: &M), IRB.getInt32Ty()}, isVarArg: false); |
994 | EmLongjmpF = getFunction(Ty: FTy, Name: "emscripten_longjmp" , M: &M); |
995 | EmLongjmpF->addFnAttr(Kind: Attribute::NoReturn); |
996 | } else { // EnableWasmSjLj |
997 | Type *Int8PtrTy = IRB.getPtrTy(); |
998 | // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp. |
999 | FunctionType *FTy = FunctionType::get( |
1000 | Result: IRB.getVoidTy(), Params: {Int8PtrTy, IRB.getInt32Ty()}, isVarArg: false); |
1001 | WasmLongjmpF = getFunction(Ty: FTy, Name: "__wasm_longjmp" , M: &M); |
1002 | WasmLongjmpF->addFnAttr(Kind: Attribute::NoReturn); |
1003 | } |
1004 | |
1005 | if (SetjmpF) { |
1006 | Type *Int8PtrTy = IRB.getPtrTy(); |
1007 | Type *Int32PtrTy = IRB.getPtrTy(); |
1008 | Type *Int32Ty = IRB.getInt32Ty(); |
1009 | |
1010 | // Register __wasm_setjmp function |
1011 | FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); |
1012 | FunctionType *FTy = FunctionType::get( |
1013 | Result: IRB.getVoidTy(), Params: {SetjmpFTy->getParamType(i: 0), Int32Ty, Int32PtrTy}, |
1014 | isVarArg: false); |
1015 | WasmSetjmpF = getFunction(Ty: FTy, Name: "__wasm_setjmp" , M: &M); |
1016 | |
1017 | // Register __wasm_setjmp_test function |
1018 | FTy = FunctionType::get(Result: Int32Ty, Params: {Int32PtrTy, Int32PtrTy}, isVarArg: false); |
1019 | WasmSetjmpTestF = getFunction(Ty: FTy, Name: "__wasm_setjmp_test" , M: &M); |
1020 | |
1021 | // wasm.catch() will be lowered down to wasm 'catch' instruction in |
1022 | // instruction selection. |
1023 | CatchF = Intrinsic::getOrInsertDeclaration(M: &M, id: Intrinsic::wasm_catch); |
1024 | // Type for struct __WasmLongjmpArgs |
1025 | LongjmpArgsTy = StructType::get(elt1: Int8PtrTy, // env |
1026 | elts: Int32Ty // val |
1027 | ); |
1028 | } |
1029 | } |
1030 | |
1031 | // Exception handling transformation |
1032 | if (EnableEmEH) { |
1033 | for (Function &F : M) { |
1034 | if (F.isDeclaration()) |
1035 | continue; |
1036 | Changed |= runEHOnFunction(F); |
1037 | } |
1038 | } |
1039 | |
1040 | // Setjmp/longjmp handling transformation |
1041 | if (DoSjLj) { |
1042 | Changed = true; // We have setjmp or longjmp somewhere |
1043 | if (LongjmpF) |
1044 | replaceLongjmpWith(LongjmpF, NewF: EnableEmSjLj ? EmLongjmpF : WasmLongjmpF); |
1045 | // Only traverse functions that uses setjmp in order not to insert |
1046 | // unnecessary prep / cleanup code in every function |
1047 | if (SetjmpF) |
1048 | for (Function *F : SetjmpUsers) |
1049 | runSjLjOnFunction(F&: *F); |
1050 | } |
1051 | |
1052 | // Replace unnecessary setjmp calls with 0 |
1053 | if ((EnableEmSjLj || EnableWasmSjLj) && !SetjmpUsersToNullify.empty()) { |
1054 | Changed = true; |
1055 | assert(SetjmpF); |
1056 | for (Function *F : SetjmpUsersToNullify) |
1057 | nullifySetjmp(F); |
1058 | } |
1059 | |
1060 | // Delete unused global variables and functions |
1061 | for (auto *V : {ThrewGV, ThrewValueGV}) |
1062 | if (V && V->use_empty()) |
1063 | V->eraseFromParent(); |
1064 | for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF, |
1065 | WasmSetjmpF, WasmSetjmpTestF, WasmLongjmpF, CatchF}) |
1066 | if (V && V->use_empty()) |
1067 | V->eraseFromParent(); |
1068 | |
1069 | return Changed; |
1070 | } |
1071 | |
1072 | bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { |
1073 | Module &M = *F.getParent(); |
1074 | LLVMContext &C = F.getContext(); |
1075 | IRBuilder<> IRB(C); |
1076 | bool Changed = false; |
1077 | SmallVector<Instruction *, 64> ToErase; |
1078 | SmallPtrSet<LandingPadInst *, 32> LandingPads; |
1079 | |
1080 | // rethrow.longjmp BB that will be shared within the function. |
1081 | BasicBlock *RethrowLongjmpBB = nullptr; |
1082 | // PHI node for the loaded value of __THREW__ global variable in |
1083 | // rethrow.longjmp BB |
1084 | PHINode *RethrowLongjmpBBThrewPHI = nullptr; |
1085 | |
1086 | for (BasicBlock &BB : F) { |
1087 | auto *II = dyn_cast<InvokeInst>(Val: BB.getTerminator()); |
1088 | if (!II) |
1089 | continue; |
1090 | Changed = true; |
1091 | LandingPads.insert(Ptr: II->getLandingPadInst()); |
1092 | IRB.SetInsertPoint(II); |
1093 | |
1094 | const Value *Callee = II->getCalledOperand(); |
1095 | bool NeedInvoke = supportsException(F: &F) && canThrow(V: Callee); |
1096 | if (NeedInvoke) { |
1097 | // Wrap invoke with invoke wrapper and generate preamble/postamble |
1098 | Value *Threw = wrapInvoke(CI: II); |
1099 | ToErase.push_back(Elt: II); |
1100 | |
1101 | // If setjmp/longjmp handling is enabled, the thrown value can be not an |
1102 | // exception but a longjmp. If the current function contains calls to |
1103 | // setjmp, it will be appropriately handled in runSjLjOnFunction. But even |
1104 | // if the function does not contain setjmp calls, we shouldn't silently |
1105 | // ignore longjmps; we should rethrow them so they can be correctly |
1106 | // handled in somewhere up the call chain where setjmp is. __THREW__'s |
1107 | // value is 0 when nothing happened, 1 when an exception is thrown, and |
1108 | // other values when longjmp is thrown. |
1109 | // |
1110 | // if (%__THREW__.val == 0 || %__THREW__.val == 1) |
1111 | // goto %tail |
1112 | // else |
1113 | // goto %longjmp.rethrow |
1114 | // |
1115 | // rethrow.longjmp: ;; This is longjmp. Rethrow it |
1116 | // %__threwValue.val = __threwValue |
1117 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
1118 | // |
1119 | // tail: ;; Nothing happened or an exception is thrown |
1120 | // ... Continue exception handling ... |
1121 | if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(Ptr: &F) && |
1122 | canLongjmp(Callee)) { |
1123 | // Create longjmp.rethrow BB once and share it within the function |
1124 | if (!RethrowLongjmpBB) { |
1125 | RethrowLongjmpBB = BasicBlock::Create(Context&: C, Name: "rethrow.longjmp" , Parent: &F); |
1126 | IRB.SetInsertPoint(RethrowLongjmpBB); |
1127 | RethrowLongjmpBBThrewPHI = |
1128 | IRB.CreatePHI(Ty: getAddrIntType(M: &M), NumReservedValues: 4, Name: "threw.phi" ); |
1129 | RethrowLongjmpBBThrewPHI->addIncoming(V: Threw, BB: &BB); |
1130 | Value *ThrewValue = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ThrewValueGV, |
1131 | Name: ThrewValueGV->getName() + ".val" ); |
1132 | IRB.CreateCall(Callee: EmLongjmpF, Args: {RethrowLongjmpBBThrewPHI, ThrewValue}); |
1133 | IRB.CreateUnreachable(); |
1134 | } else { |
1135 | RethrowLongjmpBBThrewPHI->addIncoming(V: Threw, BB: &BB); |
1136 | } |
1137 | |
1138 | IRB.SetInsertPoint(II); // Restore the insert point back |
1139 | BasicBlock *Tail = BasicBlock::Create(Context&: C, Name: "tail" , Parent: &F); |
1140 | Value *CmpEqOne = |
1141 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp.eq.one" ); |
1142 | Value *CmpEqZero = |
1143 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 0), Name: "cmp.eq.zero" ); |
1144 | Value *Or = IRB.CreateOr(LHS: CmpEqZero, RHS: CmpEqOne, Name: "or" ); |
1145 | IRB.CreateCondBr(Cond: Or, True: Tail, False: RethrowLongjmpBB); |
1146 | IRB.SetInsertPoint(Tail); |
1147 | BB.replaceSuccessorsPhiUsesWith(Old: &BB, New: Tail); |
1148 | } |
1149 | |
1150 | // Insert a branch based on __THREW__ variable |
1151 | Value *Cmp = IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp" ); |
1152 | IRB.CreateCondBr(Cond: Cmp, True: II->getUnwindDest(), False: II->getNormalDest()); |
1153 | |
1154 | } else { |
1155 | // This can't throw, and we don't need this invoke, just replace it with a |
1156 | // call+branch |
1157 | changeToCall(II); |
1158 | } |
1159 | } |
1160 | |
1161 | // Process resume instructions |
1162 | for (BasicBlock &BB : F) { |
1163 | // Scan the body of the basic block for resumes |
1164 | for (Instruction &I : BB) { |
1165 | auto *RI = dyn_cast<ResumeInst>(Val: &I); |
1166 | if (!RI) |
1167 | continue; |
1168 | Changed = true; |
1169 | |
1170 | // Split the input into legal values |
1171 | Value *Input = RI->getValue(); |
1172 | IRB.SetInsertPoint(RI); |
1173 | Value *Low = IRB.CreateExtractValue(Agg: Input, Idxs: 0, Name: "low" ); |
1174 | // Create a call to __resumeException function |
1175 | IRB.CreateCall(Callee: ResumeF, Args: {Low}); |
1176 | // Add a terminator to the block |
1177 | IRB.CreateUnreachable(); |
1178 | ToErase.push_back(Elt: RI); |
1179 | } |
1180 | } |
1181 | |
1182 | // Process llvm.eh.typeid.for intrinsics |
1183 | for (BasicBlock &BB : F) { |
1184 | for (Instruction &I : BB) { |
1185 | auto *CI = dyn_cast<CallInst>(Val: &I); |
1186 | if (!CI) |
1187 | continue; |
1188 | const Function *Callee = CI->getCalledFunction(); |
1189 | if (!Callee) |
1190 | continue; |
1191 | if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) |
1192 | continue; |
1193 | Changed = true; |
1194 | |
1195 | IRB.SetInsertPoint(CI); |
1196 | CallInst *NewCI = |
1197 | IRB.CreateCall(Callee: EHTypeIDF, Args: CI->getArgOperand(i: 0), Name: "typeid" ); |
1198 | CI->replaceAllUsesWith(V: NewCI); |
1199 | ToErase.push_back(Elt: CI); |
1200 | } |
1201 | } |
1202 | |
1203 | // Look for orphan landingpads, can occur in blocks with no predecessors |
1204 | for (BasicBlock &BB : F) { |
1205 | BasicBlock::iterator I = BB.getFirstNonPHIIt(); |
1206 | if (auto *LPI = dyn_cast<LandingPadInst>(Val&: I)) |
1207 | LandingPads.insert(Ptr: LPI); |
1208 | } |
1209 | Changed |= !LandingPads.empty(); |
1210 | |
1211 | // Handle all the landingpad for this function together, as multiple invokes |
1212 | // may share a single lp |
1213 | for (LandingPadInst *LPI : LandingPads) { |
1214 | IRB.SetInsertPoint(LPI); |
1215 | SmallVector<Value *, 16> FMCArgs; |
1216 | for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { |
1217 | Constant *Clause = LPI->getClause(Idx: I); |
1218 | // TODO Handle filters (= exception specifications). |
1219 | // https://github.com/llvm/llvm-project/issues/49740 |
1220 | if (LPI->isCatch(Idx: I)) |
1221 | FMCArgs.push_back(Elt: Clause); |
1222 | } |
1223 | |
1224 | // Create a call to __cxa_find_matching_catch_N function |
1225 | Function *FMCF = getFindMatchingCatch(M, NumClauses: FMCArgs.size()); |
1226 | CallInst *FMCI = IRB.CreateCall(Callee: FMCF, Args: FMCArgs, Name: "fmc" ); |
1227 | Value *Poison = PoisonValue::get(T: LPI->getType()); |
1228 | Value *Pair0 = IRB.CreateInsertValue(Agg: Poison, Val: FMCI, Idxs: 0, Name: "pair0" ); |
1229 | Value *TempRet0 = IRB.CreateCall(Callee: GetTempRet0F, Args: {}, Name: "tempret0" ); |
1230 | Value *Pair1 = IRB.CreateInsertValue(Agg: Pair0, Val: TempRet0, Idxs: 1, Name: "pair1" ); |
1231 | |
1232 | LPI->replaceAllUsesWith(V: Pair1); |
1233 | ToErase.push_back(Elt: LPI); |
1234 | } |
1235 | |
1236 | // Erase everything we no longer need in this function |
1237 | for (Instruction *I : ToErase) |
1238 | I->eraseFromParent(); |
1239 | |
1240 | return Changed; |
1241 | } |
1242 | |
1243 | // This tries to get debug info from the instruction before which a new |
1244 | // instruction will be inserted, and if there's no debug info in that |
1245 | // instruction, tries to get the info instead from the previous instruction (if |
1246 | // any). If none of these has debug info and a DISubprogram is provided, it |
1247 | // creates a dummy debug info with the first line of the function, because IR |
1248 | // verifier requires all inlinable callsites should have debug info when both a |
1249 | // caller and callee have DISubprogram. If none of these conditions are met, |
1250 | // returns empty info. |
1251 | static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, |
1252 | DISubprogram *SP) { |
1253 | assert(InsertBefore); |
1254 | if (InsertBefore->getDebugLoc()) |
1255 | return InsertBefore->getDebugLoc(); |
1256 | const Instruction *Prev = InsertBefore->getPrevNode(); |
1257 | if (Prev && Prev->getDebugLoc()) |
1258 | return Prev->getDebugLoc(); |
1259 | if (SP) |
1260 | return DILocation::get(Context&: SP->getContext(), Line: SP->getLine(), Column: 1, Scope: SP); |
1261 | return DebugLoc(); |
1262 | } |
1263 | |
1264 | bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { |
1265 | assert(EnableEmSjLj || EnableWasmSjLj); |
1266 | Module &M = *F.getParent(); |
1267 | LLVMContext &C = F.getContext(); |
1268 | IRBuilder<> IRB(C); |
1269 | SmallVector<Instruction *, 64> ToErase; |
1270 | |
1271 | // Setjmp preparation |
1272 | |
1273 | BasicBlock *Entry = &F.getEntryBlock(); |
1274 | DebugLoc FirstDL = getOrCreateDebugLoc(InsertBefore: &*Entry->begin(), SP: F.getSubprogram()); |
1275 | SplitBlock(Old: Entry, SplitPt: &*Entry->getFirstInsertionPt()); |
1276 | |
1277 | IRB.SetInsertPoint(Entry->getTerminator()->getIterator()); |
1278 | // This alloca'ed pointer is used by the runtime to identify function |
1279 | // invocations. It's just for pointer comparisons. It will never be |
1280 | // dereferenced. |
1281 | Instruction *FunctionInvocationId = |
1282 | IRB.CreateAlloca(Ty: IRB.getInt32Ty(), ArraySize: nullptr, Name: "functionInvocationId" ); |
1283 | FunctionInvocationId->setDebugLoc(FirstDL); |
1284 | |
1285 | // Setjmp transformation |
1286 | SmallVector<PHINode *, 4> SetjmpRetPHIs; |
1287 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
1288 | for (auto *U : make_early_inc_range(Range: SetjmpF->users())) { |
1289 | auto *CB = cast<CallBase>(Val: U); |
1290 | BasicBlock *BB = CB->getParent(); |
1291 | if (BB->getParent() != &F) // in other function |
1292 | continue; |
1293 | if (CB->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
1294 | std::string S; |
1295 | raw_string_ostream SS(S); |
1296 | SS << "In function " + F.getName() + |
1297 | ": setjmp within a catch clause is not supported in Wasm EH:\n" ; |
1298 | SS << *CB; |
1299 | report_fatal_error(reason: StringRef(SS.str())); |
1300 | } |
1301 | |
1302 | CallInst *CI = nullptr; |
1303 | // setjmp cannot throw. So if it is an invoke, lower it to a call |
1304 | if (auto *II = dyn_cast<InvokeInst>(Val: CB)) |
1305 | CI = llvm::changeToCall(II); |
1306 | else |
1307 | CI = cast<CallInst>(Val: CB); |
1308 | |
1309 | // The tail is everything right after the call, and will be reached once |
1310 | // when setjmp is called, and later when longjmp returns to the setjmp |
1311 | BasicBlock *Tail = SplitBlock(Old: BB, SplitPt: CI->getNextNode()); |
1312 | // Add a phi to the tail, which will be the output of setjmp, which |
1313 | // indicates if this is the first call or a longjmp back. The phi directly |
1314 | // uses the right value based on where we arrive from |
1315 | IRB.SetInsertPoint(TheBB: Tail, IP: Tail->getFirstNonPHIIt()); |
1316 | PHINode *SetjmpRet = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "setjmp.ret" ); |
1317 | |
1318 | // setjmp initial call returns 0 |
1319 | SetjmpRet->addIncoming(V: IRB.getInt32(C: 0), BB); |
1320 | // The proper output is now this, not the setjmp call itself |
1321 | CI->replaceAllUsesWith(V: SetjmpRet); |
1322 | // longjmp returns to the setjmp will add themselves to this phi |
1323 | SetjmpRetPHIs.push_back(Elt: SetjmpRet); |
1324 | |
1325 | // Fix call target |
1326 | // Our index in the function is our place in the array + 1 to avoid index |
1327 | // 0, because index 0 means the longjmp is not ours to handle. |
1328 | IRB.SetInsertPoint(CI); |
1329 | Value *Args[] = {CI->getArgOperand(i: 0), IRB.getInt32(C: SetjmpRetPHIs.size()), |
1330 | FunctionInvocationId}; |
1331 | IRB.CreateCall(Callee: WasmSetjmpF, Args); |
1332 | ToErase.push_back(Elt: CI); |
1333 | } |
1334 | |
1335 | // Handle longjmpable calls. |
1336 | if (EnableEmSjLj) |
1337 | handleLongjmpableCallsForEmscriptenSjLj(F, FunctionInvocationId, |
1338 | SetjmpRetPHIs); |
1339 | else // EnableWasmSjLj |
1340 | handleLongjmpableCallsForWasmSjLj(F, FunctionInvocationId, SetjmpRetPHIs); |
1341 | |
1342 | // Erase everything we no longer need in this function |
1343 | for (Instruction *I : ToErase) |
1344 | I->eraseFromParent(); |
1345 | |
1346 | // Finally, our modifications to the cfg can break dominance of SSA variables. |
1347 | // For example, in this code, |
1348 | // if (x()) { .. setjmp() .. } |
1349 | // if (y()) { .. longjmp() .. } |
1350 | // We must split the longjmp block, and it can jump into the block splitted |
1351 | // from setjmp one. But that means that when we split the setjmp block, it's |
1352 | // first part no longer dominates its second part - there is a theoretically |
1353 | // possible control flow path where x() is false, then y() is true and we |
1354 | // reach the second part of the setjmp block, without ever reaching the first |
1355 | // part. So, we rebuild SSA form here. |
1356 | rebuildSSA(F); |
1357 | return true; |
1358 | } |
1359 | |
1360 | // Update each call that can longjmp so it can return to the corresponding |
1361 | // setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the |
1362 | // comments at top of the file for details. |
1363 | void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( |
1364 | Function &F, Instruction *FunctionInvocationId, |
1365 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { |
1366 | Module &M = *F.getParent(); |
1367 | LLVMContext &C = F.getContext(); |
1368 | IRBuilder<> IRB(C); |
1369 | SmallVector<Instruction *, 64> ToErase; |
1370 | |
1371 | // call.em.longjmp BB that will be shared within the function. |
1372 | BasicBlock *CallEmLongjmpBB = nullptr; |
1373 | // PHI node for the loaded value of __THREW__ global variable in |
1374 | // call.em.longjmp BB |
1375 | PHINode *CallEmLongjmpBBThrewPHI = nullptr; |
1376 | // PHI node for the loaded value of __threwValue global variable in |
1377 | // call.em.longjmp BB |
1378 | PHINode *CallEmLongjmpBBThrewValuePHI = nullptr; |
1379 | // rethrow.exn BB that will be shared within the function. |
1380 | BasicBlock *RethrowExnBB = nullptr; |
1381 | |
1382 | // Because we are creating new BBs while processing and don't want to make |
1383 | // all these newly created BBs candidates again for longjmp processing, we |
1384 | // first make the vector of candidate BBs. |
1385 | std::vector<BasicBlock *> BBs; |
1386 | for (BasicBlock &BB : F) |
1387 | BBs.push_back(x: &BB); |
1388 | |
1389 | // BBs.size() will change within the loop, so we query it every time |
1390 | for (unsigned I = 0; I < BBs.size(); I++) { |
1391 | BasicBlock *BB = BBs[I]; |
1392 | for (Instruction &I : *BB) { |
1393 | if (isa<InvokeInst>(Val: &I)) { |
1394 | std::string S; |
1395 | raw_string_ostream SS(S); |
1396 | SS << "In function " << F.getName() |
1397 | << ": When using Wasm EH with Emscripten SjLj, there is a " |
1398 | "restriction that `setjmp` function call and exception cannot be " |
1399 | "used within the same function:\n" ; |
1400 | SS << I; |
1401 | report_fatal_error(reason: StringRef(SS.str())); |
1402 | } |
1403 | auto *CI = dyn_cast<CallInst>(Val: &I); |
1404 | if (!CI) |
1405 | continue; |
1406 | |
1407 | const Value *Callee = CI->getCalledOperand(); |
1408 | if (!canLongjmp(Callee)) |
1409 | continue; |
1410 | if (isEmAsmCall(Callee)) |
1411 | report_fatal_error(reason: "Cannot use EM_ASM* alongside setjmp/longjmp in " + |
1412 | F.getName() + |
1413 | ". Please consider using EM_JS, or move the " |
1414 | "EM_ASM into another function." , |
1415 | gen_crash_diag: false); |
1416 | |
1417 | Value *Threw = nullptr; |
1418 | BasicBlock *Tail; |
1419 | if (Callee->getName().starts_with(Prefix: "__invoke_" )) { |
1420 | // If invoke wrapper has already been generated for this call in |
1421 | // previous EH phase, search for the load instruction |
1422 | // %__THREW__.val = __THREW__; |
1423 | // in postamble after the invoke wrapper call |
1424 | LoadInst *ThrewLI = nullptr; |
1425 | StoreInst *ThrewResetSI = nullptr; |
1426 | for (auto I = std::next(x: BasicBlock::iterator(CI)), IE = BB->end(); |
1427 | I != IE; ++I) { |
1428 | if (auto *LI = dyn_cast<LoadInst>(Val&: I)) |
1429 | if (auto *GV = dyn_cast<GlobalVariable>(Val: LI->getPointerOperand())) |
1430 | if (GV == ThrewGV) { |
1431 | Threw = ThrewLI = LI; |
1432 | break; |
1433 | } |
1434 | } |
1435 | // Search for the store instruction after the load above |
1436 | // __THREW__ = 0; |
1437 | for (auto I = std::next(x: BasicBlock::iterator(ThrewLI)), IE = BB->end(); |
1438 | I != IE; ++I) { |
1439 | if (auto *SI = dyn_cast<StoreInst>(Val&: I)) { |
1440 | if (auto *GV = dyn_cast<GlobalVariable>(Val: SI->getPointerOperand())) { |
1441 | if (GV == ThrewGV && |
1442 | SI->getValueOperand() == getAddrSizeInt(M: &M, C: 0)) { |
1443 | ThrewResetSI = SI; |
1444 | break; |
1445 | } |
1446 | } |
1447 | } |
1448 | } |
1449 | assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke" ); |
1450 | assert(ThrewResetSI && "Cannot find __THREW__ store after invoke" ); |
1451 | Tail = SplitBlock(Old: BB, SplitPt: ThrewResetSI->getNextNode()); |
1452 | |
1453 | } else { |
1454 | // Wrap call with invoke wrapper and generate preamble/postamble |
1455 | Threw = wrapInvoke(CI); |
1456 | ToErase.push_back(Elt: CI); |
1457 | Tail = SplitBlock(Old: BB, SplitPt: CI->getNextNode()); |
1458 | |
1459 | // If exception handling is enabled, the thrown value can be not a |
1460 | // longjmp but an exception, in which case we shouldn't silently ignore |
1461 | // exceptions; we should rethrow them. |
1462 | // __THREW__'s value is 0 when nothing happened, 1 when an exception is |
1463 | // thrown, other values when longjmp is thrown. |
1464 | // |
1465 | // if (%__THREW__.val == 1) |
1466 | // goto %eh.rethrow |
1467 | // else |
1468 | // goto %normal |
1469 | // |
1470 | // eh.rethrow: ;; Rethrow exception |
1471 | // %exn = call @__cxa_find_matching_catch_2() ;; Retrieve thrown ptr |
1472 | // __resumeException(%exn) |
1473 | // |
1474 | // normal: |
1475 | // <-- Insertion point. Will insert sjlj handling code from here |
1476 | // goto %tail |
1477 | // |
1478 | // tail: |
1479 | // ... |
1480 | if (supportsException(F: &F) && canThrow(V: Callee)) { |
1481 | // We will add a new conditional branch. So remove the branch created |
1482 | // when we split the BB |
1483 | ToErase.push_back(Elt: BB->getTerminator()); |
1484 | |
1485 | // Generate rethrow.exn BB once and share it within the function |
1486 | if (!RethrowExnBB) { |
1487 | RethrowExnBB = BasicBlock::Create(Context&: C, Name: "rethrow.exn" , Parent: &F); |
1488 | IRB.SetInsertPoint(RethrowExnBB); |
1489 | CallInst *Exn = |
1490 | IRB.CreateCall(Callee: getFindMatchingCatch(M, NumClauses: 0), Args: {}, Name: "exn" ); |
1491 | IRB.CreateCall(Callee: ResumeF, Args: {Exn}); |
1492 | IRB.CreateUnreachable(); |
1493 | } |
1494 | |
1495 | IRB.SetInsertPoint(CI); |
1496 | BasicBlock *NormalBB = BasicBlock::Create(Context&: C, Name: "normal" , Parent: &F); |
1497 | Value *CmpEqOne = |
1498 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp.eq.one" ); |
1499 | IRB.CreateCondBr(Cond: CmpEqOne, True: RethrowExnBB, False: NormalBB); |
1500 | |
1501 | IRB.SetInsertPoint(NormalBB); |
1502 | IRB.CreateBr(Dest: Tail); |
1503 | BB = NormalBB; // New insertion point to insert __wasm_setjmp_test() |
1504 | } |
1505 | } |
1506 | |
1507 | // We need to replace the terminator in Tail - SplitBlock makes BB go |
1508 | // straight to Tail, we need to check if a longjmp occurred, and go to the |
1509 | // right setjmp-tail if so |
1510 | ToErase.push_back(Elt: BB->getTerminator()); |
1511 | |
1512 | // Generate a function call to __wasm_setjmp_test function and |
1513 | // preamble/postamble code to figure out (1) whether longjmp |
1514 | // occurred (2) if longjmp occurred, which setjmp it corresponds to |
1515 | Value *Label = nullptr; |
1516 | Value *LongjmpResult = nullptr; |
1517 | BasicBlock *EndBB = nullptr; |
1518 | wrapTestSetjmp(BB, DL: CI->getDebugLoc(), Threw, FunctionInvocationId, Label, |
1519 | LongjmpResult, CallEmLongjmpBB, CallEmLongjmpBBThrewPHI, |
1520 | CallEmLongjmpBBThrewValuePHI, EndBB); |
1521 | assert(Label && LongjmpResult && EndBB); |
1522 | |
1523 | // Create switch instruction |
1524 | IRB.SetInsertPoint(EndBB); |
1525 | IRB.SetCurrentDebugLocation(EndBB->back().getDebugLoc()); |
1526 | SwitchInst *SI = IRB.CreateSwitch(V: Label, Dest: Tail, NumCases: SetjmpRetPHIs.size()); |
1527 | // -1 means no longjmp happened, continue normally (will hit the default |
1528 | // switch case). 0 means a longjmp that is not ours to handle, needs a |
1529 | // rethrow. Otherwise the index is the same as the index in P+1 (to avoid |
1530 | // 0). |
1531 | for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { |
1532 | SI->addCase(OnVal: IRB.getInt32(C: I + 1), Dest: SetjmpRetPHIs[I]->getParent()); |
1533 | SetjmpRetPHIs[I]->addIncoming(V: LongjmpResult, BB: EndBB); |
1534 | } |
1535 | |
1536 | // We are splitting the block here, and must continue to find other calls |
1537 | // in the block - which is now split. so continue to traverse in the Tail |
1538 | BBs.push_back(x: Tail); |
1539 | } |
1540 | } |
1541 | |
1542 | for (Instruction *I : ToErase) |
1543 | I->eraseFromParent(); |
1544 | } |
1545 | |
1546 | static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) { |
1547 | for (const User *U : CPI->users()) |
1548 | if (const auto *CRI = dyn_cast<CleanupReturnInst>(Val: U)) |
1549 | return CRI->getUnwindDest(); |
1550 | return nullptr; |
1551 | } |
1552 | |
1553 | // Create a catchpad in which we catch a longjmp's env and val arguments, test |
1554 | // if the longjmp corresponds to one of setjmps in the current function, and if |
1555 | // so, jump to the setjmp dispatch BB from which we go to one of post-setjmp |
1556 | // BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at |
1557 | // top of the file for details. |
1558 | void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( |
1559 | Function &F, Instruction *FunctionInvocationId, |
1560 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { |
1561 | Module &M = *F.getParent(); |
1562 | LLVMContext &C = F.getContext(); |
1563 | IRBuilder<> IRB(C); |
1564 | |
1565 | // A function with catchswitch/catchpad instruction should have a personality |
1566 | // function attached to it. Search for the wasm personality function, and if |
1567 | // it exists, use it, and if it doesn't, create a dummy personality function. |
1568 | // (SjLj is not going to call it anyway.) |
1569 | if (!F.hasPersonalityFn()) { |
1570 | StringRef PersName = getEHPersonalityName(Pers: EHPersonality::Wasm_CXX); |
1571 | FunctionType *PersType = |
1572 | FunctionType::get(Result: IRB.getInt32Ty(), /* isVarArg */ true); |
1573 | Value *PersF = M.getOrInsertFunction(Name: PersName, T: PersType).getCallee(); |
1574 | F.setPersonalityFn( |
1575 | cast<Constant>(Val: IRB.CreateBitCast(V: PersF, DestTy: IRB.getPtrTy()))); |
1576 | } |
1577 | |
1578 | // Use the entry BB's debugloc as a fallback |
1579 | BasicBlock *Entry = &F.getEntryBlock(); |
1580 | DebugLoc FirstDL = getOrCreateDebugLoc(InsertBefore: &*Entry->begin(), SP: F.getSubprogram()); |
1581 | IRB.SetCurrentDebugLocation(FirstDL); |
1582 | |
1583 | // Add setjmp.dispatch BB right after the entry block. Because we have |
1584 | // initialized functionInvocationId in the entry block and split the |
1585 | // rest into another BB, here 'OrigEntry' is the function's original entry |
1586 | // block before the transformation. |
1587 | // |
1588 | // entry: |
1589 | // functionInvocationId initialization |
1590 | // setjmp.dispatch: |
1591 | // switch will be inserted here later |
1592 | // entry.split: (OrigEntry) |
1593 | // the original function starts here |
1594 | BasicBlock *OrigEntry = Entry->getNextNode(); |
1595 | BasicBlock *SetjmpDispatchBB = |
1596 | BasicBlock::Create(Context&: C, Name: "setjmp.dispatch" , Parent: &F, InsertBefore: OrigEntry); |
1597 | cast<BranchInst>(Val: Entry->getTerminator())->setSuccessor(idx: 0, NewSucc: SetjmpDispatchBB); |
1598 | |
1599 | // Create catch.dispatch.longjmp BB and a catchswitch instruction |
1600 | BasicBlock *CatchDispatchLongjmpBB = |
1601 | BasicBlock::Create(Context&: C, Name: "catch.dispatch.longjmp" , Parent: &F); |
1602 | IRB.SetInsertPoint(CatchDispatchLongjmpBB); |
1603 | CatchSwitchInst *CatchSwitchLongjmp = |
1604 | IRB.CreateCatchSwitch(ParentPad: ConstantTokenNone::get(Context&: C), UnwindBB: nullptr, NumHandlers: 1); |
1605 | |
1606 | // Create catch.longjmp BB and a catchpad instruction |
1607 | BasicBlock *CatchLongjmpBB = BasicBlock::Create(Context&: C, Name: "catch.longjmp" , Parent: &F); |
1608 | CatchSwitchLongjmp->addHandler(Dest: CatchLongjmpBB); |
1609 | IRB.SetInsertPoint(CatchLongjmpBB); |
1610 | CatchPadInst *CatchPad = IRB.CreateCatchPad(ParentPad: CatchSwitchLongjmp, Args: {}); |
1611 | |
1612 | // Wasm throw and catch instructions can throw and catch multiple values, but |
1613 | // that requires multivalue support in the toolchain, which is currently not |
1614 | // very reliable. We instead throw and catch a pointer to a struct value of |
1615 | // type 'struct __WasmLongjmpArgs', which is defined in Emscripten. |
1616 | Instruction *LongjmpArgs = |
1617 | IRB.CreateCall(Callee: CatchF, Args: {IRB.getInt32(C: WebAssembly::C_LONGJMP)}, Name: "thrown" ); |
1618 | Value *EnvField = |
1619 | IRB.CreateConstGEP2_32(Ty: LongjmpArgsTy, Ptr: LongjmpArgs, Idx0: 0, Idx1: 0, Name: "env_gep" ); |
1620 | Value *ValField = |
1621 | IRB.CreateConstGEP2_32(Ty: LongjmpArgsTy, Ptr: LongjmpArgs, Idx0: 0, Idx1: 1, Name: "val_gep" ); |
1622 | // void *env = __wasm_longjmp_args.env; |
1623 | Instruction *Env = IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: EnvField, Name: "env" ); |
1624 | // int val = __wasm_longjmp_args.val; |
1625 | Instruction *Val = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ValField, Name: "val" ); |
1626 | |
1627 | // %label = __wasm_setjmp_test(%env, functionInvocatinoId); |
1628 | // if (%label == 0) |
1629 | // __wasm_longjmp(%env, %val) |
1630 | // catchret to %setjmp.dispatch |
1631 | BasicBlock *ThenBB = BasicBlock::Create(Context&: C, Name: "if.then" , Parent: &F); |
1632 | BasicBlock *EndBB = BasicBlock::Create(Context&: C, Name: "if.end" , Parent: &F); |
1633 | Value *EnvP = IRB.CreateBitCast(V: Env, DestTy: getAddrPtrType(M: &M), Name: "env.p" ); |
1634 | Value *Label = IRB.CreateCall(Callee: WasmSetjmpTestF, Args: {EnvP, FunctionInvocationId}, |
1635 | OpBundles: OperandBundleDef("funclet" , CatchPad), Name: "label" ); |
1636 | Value *Cmp = IRB.CreateICmpEQ(LHS: Label, RHS: IRB.getInt32(C: 0)); |
1637 | IRB.CreateCondBr(Cond: Cmp, True: ThenBB, False: EndBB); |
1638 | |
1639 | IRB.SetInsertPoint(ThenBB); |
1640 | CallInst *WasmLongjmpCI = IRB.CreateCall( |
1641 | Callee: WasmLongjmpF, Args: {Env, Val}, OpBundles: OperandBundleDef("funclet" , CatchPad)); |
1642 | IRB.CreateUnreachable(); |
1643 | |
1644 | IRB.SetInsertPoint(EndBB); |
1645 | // Jump to setjmp.dispatch block |
1646 | IRB.CreateCatchRet(CatchPad, BB: SetjmpDispatchBB); |
1647 | |
1648 | // Go back to setjmp.dispatch BB |
1649 | // setjmp.dispatch: |
1650 | // switch %label { |
1651 | // label 1: goto post-setjmp BB 1 |
1652 | // label 2: goto post-setjmp BB 2 |
1653 | // ... |
1654 | // default: goto splitted next BB |
1655 | // } |
1656 | IRB.SetInsertPoint(SetjmpDispatchBB); |
1657 | PHINode *LabelPHI = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "label.phi" ); |
1658 | LabelPHI->addIncoming(V: Label, BB: EndBB); |
1659 | LabelPHI->addIncoming(V: IRB.getInt32(C: -1), BB: Entry); |
1660 | SwitchInst *SI = IRB.CreateSwitch(V: LabelPHI, Dest: OrigEntry, NumCases: SetjmpRetPHIs.size()); |
1661 | // -1 means no longjmp happened, continue normally (will hit the default |
1662 | // switch case). 0 means a longjmp that is not ours to handle, needs a |
1663 | // rethrow. Otherwise the index is the same as the index in P+1 (to avoid |
1664 | // 0). |
1665 | for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { |
1666 | SI->addCase(OnVal: IRB.getInt32(C: I + 1), Dest: SetjmpRetPHIs[I]->getParent()); |
1667 | SetjmpRetPHIs[I]->addIncoming(V: Val, BB: SetjmpDispatchBB); |
1668 | } |
1669 | |
1670 | // Convert all longjmpable call instructions to invokes that unwind to the |
1671 | // newly created catch.dispatch.longjmp BB. |
1672 | SmallVector<CallInst *, 64> LongjmpableCalls; |
1673 | for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) { |
1674 | for (auto &I : *BB) { |
1675 | auto *CI = dyn_cast<CallInst>(Val: &I); |
1676 | if (!CI) |
1677 | continue; |
1678 | const Value *Callee = CI->getCalledOperand(); |
1679 | if (!canLongjmp(Callee)) |
1680 | continue; |
1681 | if (isEmAsmCall(Callee)) |
1682 | report_fatal_error(reason: "Cannot use EM_ASM* alongside setjmp/longjmp in " + |
1683 | F.getName() + |
1684 | ". Please consider using EM_JS, or move the " |
1685 | "EM_ASM into another function." , |
1686 | gen_crash_diag: false); |
1687 | // This is __wasm_longjmp() call we inserted in this function, which |
1688 | // rethrows the longjmp when the longjmp does not correspond to one of |
1689 | // setjmps in this function. We should not convert this call to an invoke. |
1690 | if (CI == WasmLongjmpCI) |
1691 | continue; |
1692 | LongjmpableCalls.push_back(Elt: CI); |
1693 | } |
1694 | } |
1695 | |
1696 | SmallDenseMap<BasicBlock *, SmallSetVector<BasicBlock *, 4>, 4> |
1697 | UnwindDestToNewPreds; |
1698 | for (auto *CI : LongjmpableCalls) { |
1699 | // Even if the callee function has attribute 'nounwind', which is true for |
1700 | // all C functions, it can longjmp, which means it can throw a Wasm |
1701 | // exception now. |
1702 | CI->removeFnAttr(Kind: Attribute::NoUnwind); |
1703 | if (Function *CalleeF = CI->getCalledFunction()) |
1704 | CalleeF->removeFnAttr(Kind: Attribute::NoUnwind); |
1705 | |
1706 | // Change it to an invoke and make it unwind to the catch.dispatch.longjmp |
1707 | // BB. If the call is enclosed in another catchpad/cleanuppad scope, unwind |
1708 | // to its parent pad's unwind destination instead to preserve the scope |
1709 | // structure. It will eventually unwind to the catch.dispatch.longjmp. |
1710 | BasicBlock *UnwindDest = nullptr; |
1711 | if (auto Bundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
1712 | Instruction *FromPad = cast<Instruction>(Val: Bundle->Inputs[0]); |
1713 | while (!UnwindDest) { |
1714 | if (auto *CPI = dyn_cast<CatchPadInst>(Val: FromPad)) { |
1715 | UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); |
1716 | break; |
1717 | } |
1718 | if (auto *CPI = dyn_cast<CleanupPadInst>(Val: FromPad)) { |
1719 | // getCleanupRetUnwindDest() can return nullptr when |
1720 | // 1. This cleanuppad's matching cleanupret uwninds to caller |
1721 | // 2. There is no matching cleanupret because it ends with |
1722 | // unreachable. |
1723 | // In case of 2, we need to traverse the parent pad chain. |
1724 | UnwindDest = getCleanupRetUnwindDest(CPI); |
1725 | Value *ParentPad = CPI->getParentPad(); |
1726 | if (isa<ConstantTokenNone>(Val: ParentPad)) |
1727 | break; |
1728 | FromPad = cast<Instruction>(Val: ParentPad); |
1729 | } |
1730 | } |
1731 | } |
1732 | if (!UnwindDest) |
1733 | UnwindDest = CatchDispatchLongjmpBB; |
1734 | // Because we are changing a longjmpable call to an invoke, its unwind |
1735 | // destination can be an existing EH pad that already have phis, and the BB |
1736 | // with the newly created invoke will become a new predecessor of that EH |
1737 | // pad. In this case we need to add the new predecessor to those phis. |
1738 | UnwindDestToNewPreds[UnwindDest].insert(X: CI->getParent()); |
1739 | changeToInvokeAndSplitBasicBlock(CI, UnwindEdge: UnwindDest); |
1740 | } |
1741 | |
1742 | SmallVector<Instruction *, 16> ToErase; |
1743 | for (auto &BB : F) { |
1744 | if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: BB.getFirstNonPHIIt())) { |
1745 | if (CSI != CatchSwitchLongjmp && CSI->unwindsToCaller()) { |
1746 | IRB.SetInsertPoint(CSI); |
1747 | ToErase.push_back(Elt: CSI); |
1748 | auto *NewCSI = IRB.CreateCatchSwitch(ParentPad: CSI->getParentPad(), |
1749 | UnwindBB: CatchDispatchLongjmpBB, NumHandlers: 1); |
1750 | NewCSI->addHandler(Dest: *CSI->handler_begin()); |
1751 | NewCSI->takeName(V: CSI); |
1752 | CSI->replaceAllUsesWith(V: NewCSI); |
1753 | } |
1754 | } |
1755 | |
1756 | if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB.getTerminator())) { |
1757 | if (CRI->unwindsToCaller()) { |
1758 | IRB.SetInsertPoint(CRI); |
1759 | ToErase.push_back(Elt: CRI); |
1760 | IRB.CreateCleanupRet(CleanupPad: CRI->getCleanupPad(), UnwindBB: CatchDispatchLongjmpBB); |
1761 | } |
1762 | } |
1763 | } |
1764 | |
1765 | for (Instruction *I : ToErase) |
1766 | I->eraseFromParent(); |
1767 | |
1768 | // Add entries for new predecessors to phis in unwind destinations. We use |
1769 | // 'poison' as a placeholder value. We should make sure the phis have a valid |
1770 | // set of predecessors before running SSAUpdater, because SSAUpdater |
1771 | // internally can use existing phis to gather predecessor info rather than |
1772 | // scanning the actual CFG (See FindPredecessorBlocks in SSAUpdater.cpp for |
1773 | // details). |
1774 | for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) { |
1775 | for (PHINode &PN : UnwindDest->phis()) { |
1776 | for (auto *NewPred : NewPreds) { |
1777 | assert(PN.getBasicBlockIndex(NewPred) == -1); |
1778 | PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB: NewPred); |
1779 | } |
1780 | } |
1781 | } |
1782 | |
1783 | // For unwind destinations for newly added invokes to longjmpable functions, |
1784 | // calculate incoming values for the newly added predecessors using |
1785 | // SSAUpdater. We add existing values in the phis to SSAUpdater as available |
1786 | // values and let it calculate what the value should be at the end of new |
1787 | // incoming blocks. |
1788 | for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) { |
1789 | for (PHINode &PN : UnwindDest->phis()) { |
1790 | SSAUpdater SSA; |
1791 | SSA.Initialize(Ty: PN.getType(), Name: PN.getName()); |
1792 | for (unsigned Idx = 0, E = PN.getNumIncomingValues(); Idx != E; ++Idx) { |
1793 | if (NewPreds.contains(key: PN.getIncomingBlock(i: Idx))) |
1794 | continue; |
1795 | Value *V = PN.getIncomingValue(i: Idx); |
1796 | if (auto *II = dyn_cast<InvokeInst>(Val: V)) |
1797 | SSA.AddAvailableValue(BB: II->getNormalDest(), V: II); |
1798 | else if (auto *I = dyn_cast<Instruction>(Val: V)) |
1799 | SSA.AddAvailableValue(BB: I->getParent(), V: I); |
1800 | else |
1801 | SSA.AddAvailableValue(BB: PN.getIncomingBlock(i: Idx), V); |
1802 | } |
1803 | for (auto *NewPred : NewPreds) |
1804 | PN.setIncomingValueForBlock(BB: NewPred, V: SSA.GetValueAtEndOfBlock(BB: NewPred)); |
1805 | assert(PN.isComplete()); |
1806 | } |
1807 | } |
1808 | } |
1809 | |