| 1 | //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file lowers exception-related instructions and setjmp/longjmp function |
| 11 | /// calls to use Emscripten's library functions. The pass uses JavaScript's try |
| 12 | /// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in |
| 13 | /// case of Emscripten SjLJ. |
| 14 | /// |
| 15 | /// * Emscripten exception handling |
| 16 | /// This pass lowers invokes and landingpads into library functions in JS glue |
| 17 | /// code. Invokes are lowered into function wrappers called invoke wrappers that |
| 18 | /// exist in JS side, which wraps the original function call with JS try-catch. |
| 19 | /// If an exception occurred, cxa_throw() function in JS side sets some |
| 20 | /// variables (see below) so we can check whether an exception occurred from |
| 21 | /// wasm code and handle it appropriately. |
| 22 | /// |
| 23 | /// * Emscripten setjmp-longjmp handling |
| 24 | /// This pass lowers setjmp to a reasonably-performant approach for emscripten. |
| 25 | /// The idea is that each block with a setjmp is broken up into two parts: the |
| 26 | /// part containing setjmp and the part right after the setjmp. The latter part |
| 27 | /// is either reached from the setjmp, or later from a longjmp. To handle the |
| 28 | /// longjmp, all calls that might longjmp are also called using invoke wrappers |
| 29 | /// and thus JS / try-catch. JS longjmp() function also sets some variables so |
| 30 | /// we can check / whether a longjmp occurred from wasm code. Each block with a |
| 31 | /// function call that might longjmp is also split up after the longjmp call. |
| 32 | /// After the longjmp call, we check whether a longjmp occurred, and if it did, |
| 33 | /// which setjmp it corresponds to, and jump to the right post-setjmp block. |
| 34 | /// We assume setjmp-longjmp handling always run after EH handling, which means |
| 35 | /// we don't expect any exception-related instructions when SjLj runs. |
| 36 | /// FIXME Currently this scheme does not support indirect call of setjmp, |
| 37 | /// because of the limitation of the scheme itself. fastcomp does not support it |
| 38 | /// either. |
| 39 | /// |
| 40 | /// In detail, this pass does following things: |
| 41 | /// |
| 42 | /// 1) Assumes the existence of global variables: __THREW__, __threwValue |
| 43 | /// __THREW__ and __threwValue are defined in compiler-rt in Emscripten. |
| 44 | /// These variables are used for both exceptions and setjmp/longjmps. |
| 45 | /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 |
| 46 | /// means nothing occurred, 1 means an exception occurred, and other numbers |
| 47 | /// mean a longjmp occurred. In the case of longjmp, __THREW__ variable |
| 48 | /// indicates the corresponding setjmp buffer the longjmp corresponds to. |
| 49 | /// __threwValue is 0 for exceptions, and the argument to longjmp in case of |
| 50 | /// longjmp. |
| 51 | /// |
| 52 | /// * Emscripten exception handling |
| 53 | /// |
| 54 | /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions |
| 55 | /// at link time. setThrew exists in Emscripten's compiler-rt: |
| 56 | /// |
| 57 | /// void setThrew(uintptr_t threw, int value) { |
| 58 | /// if (__THREW__ == 0) { |
| 59 | /// __THREW__ = threw; |
| 60 | /// __threwValue = value; |
| 61 | /// } |
| 62 | /// } |
| 63 | // |
| 64 | /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. |
| 65 | /// In exception handling, getTempRet0 indicates the type of an exception |
| 66 | /// caught, and in setjmp/longjmp, it means the second argument to longjmp |
| 67 | /// function. |
| 68 | /// |
| 69 | /// 3) Lower |
| 70 | /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad |
| 71 | /// into |
| 72 | /// __THREW__ = 0; |
| 73 | /// call @__invoke_SIG(func, arg1, arg2) |
| 74 | /// %__THREW__.val = __THREW__; |
| 75 | /// __THREW__ = 0; |
| 76 | /// if (%__THREW__.val == 1) |
| 77 | /// goto %lpad |
| 78 | /// else |
| 79 | /// goto %invoke.cont |
| 80 | /// SIG is a mangled string generated based on the LLVM IR-level function |
| 81 | /// signature. After LLVM IR types are lowered to the target wasm types, |
| 82 | /// the names for these wrappers will change based on wasm types as well, |
| 83 | /// as in invoke_vi (function takes an int and returns void). The bodies of |
| 84 | /// these wrappers will be generated in JS glue code, and inside those |
| 85 | /// wrappers we use JS try-catch to generate actual exception effects. It |
| 86 | /// also calls the original callee function. An example wrapper in JS code |
| 87 | /// would look like this: |
| 88 | /// function invoke_vi(index,a1) { |
| 89 | /// try { |
| 90 | /// Module["dynCall_vi"](index,a1); // This calls original callee |
| 91 | /// } catch(e) { |
| 92 | /// if (typeof e !== 'number' && e !== 'longjmp') throw e; |
| 93 | /// _setThrew(1, 0); // setThrew is called here |
| 94 | /// } |
| 95 | /// } |
| 96 | /// If an exception is thrown, __THREW__ will be set to true in a wrapper, |
| 97 | /// so we can jump to the right BB based on this value. |
| 98 | /// |
| 99 | /// 4) Lower |
| 100 | /// %val = landingpad catch c1 catch c2 catch c3 ... |
| 101 | /// ... use %val ... |
| 102 | /// into |
| 103 | /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) |
| 104 | /// %val = {%fmc, getTempRet0()} |
| 105 | /// ... use %val ... |
| 106 | /// Here N is a number calculated based on the number of clauses. |
| 107 | /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. |
| 108 | /// |
| 109 | /// 5) Lower |
| 110 | /// resume {%a, %b} |
| 111 | /// into |
| 112 | /// call @__resumeException(%a) |
| 113 | /// where __resumeException() is a function in JS glue code. |
| 114 | /// |
| 115 | /// 6) Lower |
| 116 | /// call @llvm.eh.typeid.for(type) (intrinsic) |
| 117 | /// into |
| 118 | /// call @llvm_eh_typeid_for(type) |
| 119 | /// llvm_eh_typeid_for function will be generated in JS glue code. |
| 120 | /// |
| 121 | /// * Emscripten setjmp / longjmp handling |
| 122 | /// |
| 123 | /// If there are calls to longjmp() |
| 124 | /// |
| 125 | /// 1) Lower |
| 126 | /// longjmp(env, val) |
| 127 | /// into |
| 128 | /// emscripten_longjmp(env, val) |
| 129 | /// |
| 130 | /// If there are calls to setjmp() |
| 131 | /// |
| 132 | /// 2) In the function entry that calls setjmp, initialize |
| 133 | /// functionInvocationId as follows: |
| 134 | /// |
| 135 | /// functionInvocationId = alloca(4) |
| 136 | /// |
| 137 | /// Note: the alloca size is not important as this pointer is |
| 138 | /// merely used for pointer comparisions. |
| 139 | /// |
| 140 | /// 3) Lower |
| 141 | /// setjmp(env) |
| 142 | /// into |
| 143 | /// __wasm_setjmp(env, label, functionInvocationId) |
| 144 | /// |
| 145 | /// __wasm_setjmp records the necessary info (the label and |
| 146 | /// functionInvocationId) to the "env". |
| 147 | /// A BB with setjmp is split into two after setjmp call in order to |
| 148 | /// make the post-setjmp BB the possible destination of longjmp BB. |
| 149 | /// |
| 150 | /// 4) Lower every call that might longjmp into |
| 151 | /// __THREW__ = 0; |
| 152 | /// call @__invoke_SIG(func, arg1, arg2) |
| 153 | /// %__THREW__.val = __THREW__; |
| 154 | /// __THREW__ = 0; |
| 155 | /// %__threwValue.val = __threwValue; |
| 156 | /// if (%__THREW__.val != 0 & %__threwValue.val != 0) { |
| 157 | /// %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
| 158 | /// if (%label == 0) |
| 159 | /// emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
| 160 | /// setTempRet0(%__threwValue.val); |
| 161 | /// } else { |
| 162 | /// %label = -1; |
| 163 | /// } |
| 164 | /// longjmp_result = getTempRet0(); |
| 165 | /// switch %label { |
| 166 | /// label 1: goto post-setjmp BB 1 |
| 167 | /// label 2: goto post-setjmp BB 2 |
| 168 | /// ... |
| 169 | /// default: goto splitted next BB |
| 170 | /// } |
| 171 | /// |
| 172 | /// __wasm_setjmp_test examines the jmp buf to see if it was for a matching |
| 173 | /// setjmp call. After calling an invoke wrapper, if a longjmp occurred, |
| 174 | /// __THREW__ will be the address of matching jmp_buf buffer and |
| 175 | /// __threwValue be the second argument to longjmp. |
| 176 | /// __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp |
| 177 | /// callsite. Label 0 means this longjmp buffer does not correspond to one |
| 178 | /// of the setjmp callsites in this function, so in this case we just chain |
| 179 | /// the longjmp to the caller. Label -1 means no longjmp occurred. |
| 180 | /// Otherwise we jump to the right post-setjmp BB based on the label. |
| 181 | /// |
| 182 | /// * Wasm setjmp / longjmp handling |
| 183 | /// This mode still uses some Emscripten library functions but not JavaScript's |
| 184 | /// try-catch mechanism. It instead uses Wasm exception handling intrinsics, |
| 185 | /// which will be lowered to exception handling instructions. |
| 186 | /// |
| 187 | /// If there are calls to longjmp() |
| 188 | /// |
| 189 | /// 1) Lower |
| 190 | /// longjmp(env, val) |
| 191 | /// into |
| 192 | /// __wasm_longjmp(env, val) |
| 193 | /// |
| 194 | /// If there are calls to setjmp() |
| 195 | /// |
| 196 | /// 2) and 3): The same as 2) and 3) in Emscripten SjLj. |
| 197 | /// (functionInvocationId initialization + setjmp callsite transformation) |
| 198 | /// |
| 199 | /// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value |
| 200 | /// thrown by __wasm_longjmp function. In the runtime library, we have an |
| 201 | /// equivalent of the following struct: |
| 202 | /// |
| 203 | /// struct __WasmLongjmpArgs { |
| 204 | /// void *env; |
| 205 | /// int val; |
| 206 | /// }; |
| 207 | /// |
| 208 | /// The thrown value here is a pointer to the struct. We use this struct to |
| 209 | /// transfer two values by throwing a single value. Wasm throw and catch |
| 210 | /// instructions are capable of throwing and catching multiple values, but |
| 211 | /// it also requires multivalue support that is currently not very reliable. |
| 212 | /// TODO Switch to throwing and catching two values without using the struct |
| 213 | /// |
| 214 | /// All longjmpable function calls will be converted to an invoke that will |
| 215 | /// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we |
| 216 | /// test the thrown values using __wasm_setjmp_test function as we do for |
| 217 | /// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to |
| 218 | /// transform every longjmpable callsite into a sequence of code including |
| 219 | /// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one |
| 220 | /// place, in this catchpad. |
| 221 | /// |
| 222 | /// After testing calling __wasm_setjmp_test(), if the longjmp does not |
| 223 | /// correspond to one of the setjmps within the current function, it rethrows |
| 224 | /// the longjmp by calling __wasm_longjmp(). If it corresponds to one of |
| 225 | /// setjmps in the function, we jump to the beginning of the function, which |
| 226 | /// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this |
| 227 | /// switch is added for every longjmpable callsite; in Wasm SjLj we do this |
| 228 | /// only once at the top of the function. (after functionInvocationId |
| 229 | /// initialization) |
| 230 | /// |
| 231 | /// The below is the pseudocode for what we have described |
| 232 | /// |
| 233 | /// entry: |
| 234 | /// Initialize functionInvocationId |
| 235 | /// |
| 236 | /// setjmp.dispatch: |
| 237 | /// switch %label { |
| 238 | /// label 1: goto post-setjmp BB 1 |
| 239 | /// label 2: goto post-setjmp BB 2 |
| 240 | /// ... |
| 241 | /// default: goto splitted next BB |
| 242 | /// } |
| 243 | /// ... |
| 244 | /// |
| 245 | /// bb: |
| 246 | /// invoke void @foo() ;; foo is a longjmpable function |
| 247 | /// to label %next unwind label %catch.dispatch.longjmp |
| 248 | /// ... |
| 249 | /// |
| 250 | /// catch.dispatch.longjmp: |
| 251 | /// %0 = catchswitch within none [label %catch.longjmp] unwind to caller |
| 252 | /// |
| 253 | /// catch.longjmp: |
| 254 | /// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs |
| 255 | /// %env = load 'env' field from __WasmLongjmpArgs |
| 256 | /// %val = load 'val' field from __WasmLongjmpArgs |
| 257 | /// %label = __wasm_setjmp_test(%env, functionInvocationId); |
| 258 | /// if (%label == 0) |
| 259 | /// __wasm_longjmp(%env, %val) |
| 260 | /// catchret to %setjmp.dispatch |
| 261 | /// |
| 262 | ///===----------------------------------------------------------------------===// |
| 263 | |
| 264 | #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" |
| 265 | #include "WebAssembly.h" |
| 266 | #include "WebAssemblyTargetMachine.h" |
| 267 | #include "llvm/ADT/StringExtras.h" |
| 268 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 269 | #include "llvm/CodeGen/WasmEHFuncInfo.h" |
| 270 | #include "llvm/IR/DebugInfoMetadata.h" |
| 271 | #include "llvm/IR/Dominators.h" |
| 272 | #include "llvm/IR/IRBuilder.h" |
| 273 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
| 274 | #include "llvm/IR/Module.h" |
| 275 | #include "llvm/Support/CommandLine.h" |
| 276 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 277 | #include "llvm/Transforms/Utils/Local.h" |
| 278 | #include "llvm/Transforms/Utils/SSAUpdater.h" |
| 279 | #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" |
| 280 | #include <set> |
| 281 | |
| 282 | using namespace llvm; |
| 283 | |
| 284 | #define DEBUG_TYPE "wasm-lower-em-ehsjlj" |
| 285 | |
| 286 | static cl::list<std::string> |
| 287 | EHAllowlist("emscripten-cxx-exceptions-allowed" , |
| 288 | cl::desc("The list of function names in which Emscripten-style " |
| 289 | "exception handling is enabled (see emscripten " |
| 290 | "EMSCRIPTEN_CATCHING_ALLOWED options)" ), |
| 291 | cl::CommaSeparated); |
| 292 | |
| 293 | namespace { |
| 294 | class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { |
| 295 | bool EnableEmEH; // Enable Emscripten exception handling |
| 296 | bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling |
| 297 | bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling |
| 298 | bool DoSjLj; // Whether we actually perform setjmp/longjmp handling |
| 299 | |
| 300 | GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten) |
| 301 | GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten) |
| 302 | Function *GetTempRet0F = nullptr; // getTempRet0() (Emscripten) |
| 303 | Function *SetTempRet0F = nullptr; // setTempRet0() (Emscripten) |
| 304 | Function *ResumeF = nullptr; // __resumeException() (Emscripten) |
| 305 | Function *EHTypeIDF = nullptr; // llvm.eh.typeid.for() (intrinsic) |
| 306 | Function *EmLongjmpF = nullptr; // emscripten_longjmp() (Emscripten) |
| 307 | Function *WasmSetjmpF = nullptr; // __wasm_setjmp() (Emscripten) |
| 308 | Function *WasmSetjmpTestF = nullptr; // __wasm_setjmp_test() (Emscripten) |
| 309 | Function *WasmLongjmpF = nullptr; // __wasm_longjmp() (Emscripten) |
| 310 | Function *CatchF = nullptr; // wasm.catch() (intrinsic) |
| 311 | |
| 312 | // type of 'struct __WasmLongjmpArgs' defined in emscripten |
| 313 | Type *LongjmpArgsTy = nullptr; |
| 314 | |
| 315 | // __cxa_find_matching_catch_N functions. |
| 316 | // Indexed by the number of clauses in an original landingpad instruction. |
| 317 | DenseMap<int, Function *> FindMatchingCatches; |
| 318 | // Map of <function signature string, invoke_ wrappers> |
| 319 | StringMap<Function *> InvokeWrappers; |
| 320 | // Set of allowed function names for exception handling |
| 321 | std::set<std::string, std::less<>> EHAllowlistSet; |
| 322 | // Functions that contains calls to setjmp |
| 323 | SmallPtrSet<Function *, 8> SetjmpUsers; |
| 324 | |
| 325 | StringRef getPassName() const override { |
| 326 | return "WebAssembly Lower Emscripten Exceptions" ; |
| 327 | } |
| 328 | |
| 329 | using InstVector = SmallVectorImpl<Instruction *>; |
| 330 | bool runEHOnFunction(Function &F); |
| 331 | bool runSjLjOnFunction(Function &F); |
| 332 | void handleLongjmpableCallsForEmscriptenSjLj( |
| 333 | Function &F, Instruction *FunctionInvocationId, |
| 334 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs); |
| 335 | void |
| 336 | handleLongjmpableCallsForWasmSjLj(Function &F, |
| 337 | Instruction *FunctionInvocationId, |
| 338 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs); |
| 339 | Function *getFindMatchingCatch(Module &M, unsigned NumClauses); |
| 340 | |
| 341 | Value *wrapInvoke(CallBase *CI); |
| 342 | void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw, |
| 343 | Value *FunctionInvocationId, Value *&Label, |
| 344 | Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, |
| 345 | PHINode *&CallEmLongjmpBBThrewPHI, |
| 346 | PHINode *&CallEmLongjmpBBThrewValuePHI, |
| 347 | BasicBlock *&EndBB); |
| 348 | Function *getInvokeWrapper(CallBase *CI); |
| 349 | |
| 350 | bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); } |
| 351 | bool supportsException(const Function *F) const { |
| 352 | return EnableEmEH && |
| 353 | (areAllExceptionsAllowed() || EHAllowlistSet.count(x: F->getName())); |
| 354 | } |
| 355 | void replaceLongjmpWith(Function *LongjmpF, Function *NewF); |
| 356 | |
| 357 | void rebuildSSA(Function &F); |
| 358 | |
| 359 | public: |
| 360 | static char ID; |
| 361 | |
| 362 | WebAssemblyLowerEmscriptenEHSjLj() |
| 363 | : ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH), |
| 364 | EnableEmSjLj(WebAssembly::WasmEnableEmSjLj), |
| 365 | EnableWasmSjLj(WebAssembly::WasmEnableSjLj) { |
| 366 | assert(!(EnableEmSjLj && EnableWasmSjLj) && |
| 367 | "Two SjLj modes cannot be turned on at the same time" ); |
| 368 | assert(!(EnableEmEH && EnableWasmSjLj) && |
| 369 | "Wasm SjLj should be only used with Wasm EH" ); |
| 370 | EHAllowlistSet.insert(first: EHAllowlist.begin(), last: EHAllowlist.end()); |
| 371 | } |
| 372 | bool runOnModule(Module &M) override; |
| 373 | |
| 374 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 375 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 376 | } |
| 377 | }; |
| 378 | } // End anonymous namespace |
| 379 | |
| 380 | char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; |
| 381 | INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, |
| 382 | "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp" , |
| 383 | false, false) |
| 384 | |
| 385 | ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() { |
| 386 | return new WebAssemblyLowerEmscriptenEHSjLj(); |
| 387 | } |
| 388 | |
| 389 | static bool canThrow(const Value *V) { |
| 390 | if (const auto *F = dyn_cast<const Function>(Val: V)) { |
| 391 | // Intrinsics cannot throw |
| 392 | if (F->isIntrinsic()) |
| 393 | return false; |
| 394 | StringRef Name = F->getName(); |
| 395 | // leave setjmp and longjmp (mostly) alone, we process them properly later |
| 396 | if (Name == "setjmp" || Name == "longjmp" || Name == "emscripten_longjmp" ) |
| 397 | return false; |
| 398 | return !F->doesNotThrow(); |
| 399 | } |
| 400 | // not a function, so an indirect call - can throw, we can't tell |
| 401 | return true; |
| 402 | } |
| 403 | |
| 404 | // Get a thread-local global variable with the given name. If it doesn't exist |
| 405 | // declare it, which will generate an import and assume that it will exist at |
| 406 | // link time. |
| 407 | static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, |
| 408 | WebAssemblyTargetMachine &TM, |
| 409 | const char *Name) { |
| 410 | auto *GV = dyn_cast<GlobalVariable>(Val: M.getOrInsertGlobal(Name, Ty)); |
| 411 | if (!GV) |
| 412 | report_fatal_error(reason: Twine("unable to create global: " ) + Name); |
| 413 | |
| 414 | // Variables created by this function are thread local. If the target does not |
| 415 | // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it |
| 416 | // to non-thread-local ones, in which case we don't allow this object to be |
| 417 | // linked with other objects using shared memory. |
| 418 | GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); |
| 419 | return GV; |
| 420 | } |
| 421 | |
| 422 | // Simple function name mangler. |
| 423 | // This function simply takes LLVM's string representation of parameter types |
| 424 | // and concatenate them with '_'. There are non-alphanumeric characters but llc |
| 425 | // is ok with it, and we need to postprocess these names after the lowering |
| 426 | // phase anyway. |
| 427 | static std::string getSignature(FunctionType *FTy) { |
| 428 | std::string Sig; |
| 429 | raw_string_ostream OS(Sig); |
| 430 | OS << *FTy->getReturnType(); |
| 431 | for (Type *ParamTy : FTy->params()) |
| 432 | OS << "_" << *ParamTy; |
| 433 | if (FTy->isVarArg()) |
| 434 | OS << "_..." ; |
| 435 | Sig = OS.str(); |
| 436 | erase_if(C&: Sig, P: isSpace); |
| 437 | // When s2wasm parses .s file, a comma means the end of an argument. So a |
| 438 | // mangled function name can contain any character but a comma. |
| 439 | llvm::replace(Range&: Sig, OldValue: ',', NewValue: '.'); |
| 440 | return Sig; |
| 441 | } |
| 442 | |
| 443 | static Function *getFunction(FunctionType *Ty, const Twine &Name, Module *M) { |
| 444 | return Function::Create(Ty, Linkage: GlobalValue::ExternalLinkage, N: Name, M); |
| 445 | } |
| 446 | |
| 447 | static void markAsImported(Function *F) { |
| 448 | // Tell the linker that this function is expected to be imported from the |
| 449 | // 'env' module. This is necessary for functions that do not have fixed names |
| 450 | // (e.g. __import_xyz). These names cannot be provided by any kind of shared |
| 451 | // or static library as instead we mark them explictly as imported. |
| 452 | if (!F->hasFnAttribute(Kind: "wasm-import-module" )) { |
| 453 | llvm::AttrBuilder B(F->getParent()->getContext()); |
| 454 | B.addAttribute(A: "wasm-import-module" , V: "env" ); |
| 455 | F->addFnAttrs(Attrs: B); |
| 456 | } |
| 457 | if (!F->hasFnAttribute(Kind: "wasm-import-name" )) { |
| 458 | llvm::AttrBuilder B(F->getParent()->getContext()); |
| 459 | B.addAttribute(A: "wasm-import-name" , V: F->getName()); |
| 460 | F->addFnAttrs(Attrs: B); |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | // Returns an integer type for the target architecture's address space. |
| 465 | // i32 for wasm32 and i64 for wasm64. |
| 466 | static Type *getAddrIntType(Module *M) { |
| 467 | IRBuilder<> IRB(M->getContext()); |
| 468 | return IRB.getIntNTy(N: M->getDataLayout().getPointerSizeInBits()); |
| 469 | } |
| 470 | |
| 471 | // Returns an integer pointer type for the target architecture's address space. |
| 472 | // i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr |
| 473 | // in address space zero. |
| 474 | static Type *getAddrPtrType(Module *M) { |
| 475 | return PointerType::getUnqual(C&: M->getContext()); |
| 476 | } |
| 477 | |
| 478 | // Returns an integer whose type is the integer type for the target's address |
| 479 | // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the |
| 480 | // integer. |
| 481 | static Value *getAddrSizeInt(Module *M, uint64_t C) { |
| 482 | IRBuilder<> IRB(M->getContext()); |
| 483 | return IRB.getIntN(N: M->getDataLayout().getPointerSizeInBits(), C); |
| 484 | } |
| 485 | |
| 486 | // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. |
| 487 | // This is because a landingpad instruction contains two more arguments, a |
| 488 | // personality function and a cleanup bit, and __cxa_find_matching_catch_N |
| 489 | // functions are named after the number of arguments in the original landingpad |
| 490 | // instruction. |
| 491 | Function * |
| 492 | WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, |
| 493 | unsigned NumClauses) { |
| 494 | auto [It, Inserted] = FindMatchingCatches.try_emplace(Key: NumClauses); |
| 495 | if (!Inserted) |
| 496 | return It->second; |
| 497 | PointerType *Int8PtrTy = PointerType::getUnqual(C&: M.getContext()); |
| 498 | SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy); |
| 499 | FunctionType *FTy = FunctionType::get(Result: Int8PtrTy, Params: Args, isVarArg: false); |
| 500 | Function *F = getFunction( |
| 501 | Ty: FTy, Name: "__cxa_find_matching_catch_" + Twine(NumClauses + 2), M: &M); |
| 502 | markAsImported(F); |
| 503 | It->second = F; |
| 504 | return F; |
| 505 | } |
| 506 | |
| 507 | // Generate invoke wrapper seqence with preamble and postamble |
| 508 | // Preamble: |
| 509 | // __THREW__ = 0; |
| 510 | // Postamble: |
| 511 | // %__THREW__.val = __THREW__; __THREW__ = 0; |
| 512 | // Returns %__THREW__.val, which indicates whether an exception is thrown (or |
| 513 | // whether longjmp occurred), for future use. |
| 514 | Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { |
| 515 | Module *M = CI->getModule(); |
| 516 | LLVMContext &C = M->getContext(); |
| 517 | |
| 518 | IRBuilder<> IRB(C); |
| 519 | IRB.SetInsertPoint(CI); |
| 520 | |
| 521 | // Pre-invoke |
| 522 | // __THREW__ = 0; |
| 523 | IRB.CreateStore(Val: getAddrSizeInt(M, C: 0), Ptr: ThrewGV); |
| 524 | |
| 525 | // Invoke function wrapper in JavaScript |
| 526 | SmallVector<Value *, 16> Args; |
| 527 | // Put the pointer to the callee as first argument, so it can be called |
| 528 | // within the invoke wrapper later |
| 529 | Args.push_back(Elt: CI->getCalledOperand()); |
| 530 | Args.append(in_start: CI->arg_begin(), in_end: CI->arg_end()); |
| 531 | CallInst *NewCall = IRB.CreateCall(Callee: getInvokeWrapper(CI), Args); |
| 532 | NewCall->takeName(V: CI); |
| 533 | NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); |
| 534 | NewCall->setDebugLoc(CI->getDebugLoc()); |
| 535 | |
| 536 | // Because we added the pointer to the callee as first argument, all |
| 537 | // argument attribute indices have to be incremented by one. |
| 538 | SmallVector<AttributeSet, 8> ArgAttributes; |
| 539 | const AttributeList &InvokeAL = CI->getAttributes(); |
| 540 | |
| 541 | // No attributes for the callee pointer. |
| 542 | ArgAttributes.push_back(Elt: AttributeSet()); |
| 543 | // Copy the argument attributes from the original |
| 544 | for (unsigned I = 0, E = CI->arg_size(); I < E; ++I) |
| 545 | ArgAttributes.push_back(Elt: InvokeAL.getParamAttrs(ArgNo: I)); |
| 546 | |
| 547 | AttrBuilder FnAttrs(CI->getContext(), InvokeAL.getFnAttrs()); |
| 548 | if (auto Args = FnAttrs.getAllocSizeArgs()) { |
| 549 | // The allocsize attribute (if any) referes to parameters by index and needs |
| 550 | // to be adjusted. |
| 551 | auto [SizeArg, NEltArg] = *Args; |
| 552 | SizeArg += 1; |
| 553 | if (NEltArg) |
| 554 | NEltArg = *NEltArg + 1; |
| 555 | FnAttrs.addAllocSizeAttr(ElemSizeArg: SizeArg, NumElemsArg: NEltArg); |
| 556 | } |
| 557 | // In case the callee has 'noreturn' attribute, We need to remove it, because |
| 558 | // we expect invoke wrappers to return. |
| 559 | FnAttrs.removeAttribute(Val: Attribute::NoReturn); |
| 560 | |
| 561 | // Reconstruct the AttributesList based on the vector we constructed. |
| 562 | AttributeList NewCallAL = AttributeList::get( |
| 563 | C, FnAttrs: AttributeSet::get(C, B: FnAttrs), RetAttrs: InvokeAL.getRetAttrs(), ArgAttrs: ArgAttributes); |
| 564 | NewCall->setAttributes(NewCallAL); |
| 565 | |
| 566 | CI->replaceAllUsesWith(V: NewCall); |
| 567 | |
| 568 | // Post-invoke |
| 569 | // %__THREW__.val = __THREW__; __THREW__ = 0; |
| 570 | Value *Threw = |
| 571 | IRB.CreateLoad(Ty: getAddrIntType(M), Ptr: ThrewGV, Name: ThrewGV->getName() + ".val" ); |
| 572 | IRB.CreateStore(Val: getAddrSizeInt(M, C: 0), Ptr: ThrewGV); |
| 573 | return Threw; |
| 574 | } |
| 575 | |
| 576 | // Get matching invoke wrapper based on callee signature |
| 577 | Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { |
| 578 | Module *M = CI->getModule(); |
| 579 | SmallVector<Type *, 16> ArgTys; |
| 580 | FunctionType *CalleeFTy = CI->getFunctionType(); |
| 581 | |
| 582 | std::string Sig = getSignature(FTy: CalleeFTy); |
| 583 | auto It = InvokeWrappers.find(Key: Sig); |
| 584 | if (It != InvokeWrappers.end()) |
| 585 | return It->second; |
| 586 | |
| 587 | // Put the pointer to the callee as first argument |
| 588 | ArgTys.push_back(Elt: PointerType::getUnqual(C&: CI->getContext())); |
| 589 | // Add argument types |
| 590 | ArgTys.append(in_start: CalleeFTy->param_begin(), in_end: CalleeFTy->param_end()); |
| 591 | |
| 592 | FunctionType *FTy = FunctionType::get(Result: CalleeFTy->getReturnType(), Params: ArgTys, |
| 593 | isVarArg: CalleeFTy->isVarArg()); |
| 594 | Function *F = getFunction(Ty: FTy, Name: "__invoke_" + Sig, M); |
| 595 | markAsImported(F); |
| 596 | InvokeWrappers[Sig] = F; |
| 597 | return F; |
| 598 | } |
| 599 | |
| 600 | static bool canLongjmp(const Value *Callee) { |
| 601 | if (auto *CalleeF = dyn_cast<Function>(Val: Callee)) |
| 602 | if (CalleeF->isIntrinsic()) |
| 603 | return false; |
| 604 | |
| 605 | // Attempting to transform inline assembly will result in something like: |
| 606 | // call void @__invoke_void(void ()* asm ...) |
| 607 | // which is invalid because inline assembly blocks do not have addresses |
| 608 | // and can't be passed by pointer. The result is a crash with illegal IR. |
| 609 | if (isa<InlineAsm>(Val: Callee)) |
| 610 | return false; |
| 611 | StringRef CalleeName = Callee->getName(); |
| 612 | |
| 613 | // TODO Include more functions or consider checking with mangled prefixes |
| 614 | |
| 615 | // The reason we include malloc/free here is to exclude the malloc/free |
| 616 | // calls generated in setjmp prep / cleanup routines. |
| 617 | if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free" ) |
| 618 | return false; |
| 619 | |
| 620 | // There are functions in Emscripten's JS glue code or compiler-rt |
| 621 | if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || |
| 622 | CalleeName == "__wasm_setjmp" || CalleeName == "__wasm_setjmp_test" || |
| 623 | CalleeName == "getTempRet0" || CalleeName == "setTempRet0" ) |
| 624 | return false; |
| 625 | |
| 626 | // __cxa_find_matching_catch_N functions cannot longjmp |
| 627 | if (Callee->getName().starts_with(Prefix: "__cxa_find_matching_catch_" )) |
| 628 | return false; |
| 629 | |
| 630 | // Exception-catching related functions |
| 631 | // |
| 632 | // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though |
| 633 | // it surely cannot longjmp, in order to maintain the unwind relationship from |
| 634 | // all existing catchpads (and calls within them) to catch.dispatch.longjmp. |
| 635 | // |
| 636 | // In Wasm EH + Wasm SjLj, we |
| 637 | // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to |
| 638 | // catch.dispatch.longjmp instead |
| 639 | // 2. Convert all longjmpable calls to invokes that unwind to |
| 640 | // catch.dispatch.longjmp |
| 641 | // But catchswitch BBs are removed in isel, so if an EH catchswitch (generated |
| 642 | // from an exception)'s catchpad does not contain any calls that are converted |
| 643 | // into invokes unwinding to catch.dispatch.longjmp, this unwind relationship |
| 644 | // (EH catchswitch BB -> catch.dispatch.longjmp BB) is lost and |
| 645 | // catch.dispatch.longjmp BB can be placed before the EH catchswitch BB in |
| 646 | // CFGSort. |
| 647 | // int ret = setjmp(buf); |
| 648 | // try { |
| 649 | // foo(); // longjmps |
| 650 | // } catch (...) { |
| 651 | // } |
| 652 | // Then in this code, if 'foo' longjmps, it first unwinds to 'catch (...)' |
| 653 | // catchswitch, and is not caught by that catchswitch because it is a longjmp, |
| 654 | // then it should next unwind to catch.dispatch.longjmp BB. But if this 'catch |
| 655 | // (...)' catchswitch -> catch.dispatch.longjmp unwind relationship is lost, |
| 656 | // it will not unwind to catch.dispatch.longjmp, producing an incorrect |
| 657 | // result. |
| 658 | // |
| 659 | // Every catchpad generated by Wasm C++ contains __cxa_end_catch, so we |
| 660 | // intentionally treat it as longjmpable to work around this problem. This is |
| 661 | // a hacky fix but an easy one. |
| 662 | if (CalleeName == "__cxa_end_catch" ) |
| 663 | return WebAssembly::WasmEnableSjLj; |
| 664 | if (CalleeName == "__cxa_begin_catch" || |
| 665 | CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || |
| 666 | CalleeName == "__clang_call_terminate" ) |
| 667 | return false; |
| 668 | |
| 669 | // std::terminate, which is generated when another exception occurs while |
| 670 | // handling an exception, cannot longjmp. |
| 671 | if (CalleeName == "_ZSt9terminatev" ) |
| 672 | return false; |
| 673 | |
| 674 | // Otherwise we don't know |
| 675 | return true; |
| 676 | } |
| 677 | |
| 678 | static bool isEmAsmCall(const Value *Callee) { |
| 679 | StringRef CalleeName = Callee->getName(); |
| 680 | // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>. |
| 681 | return CalleeName == "emscripten_asm_const_int" || |
| 682 | CalleeName == "emscripten_asm_const_double" || |
| 683 | CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || |
| 684 | CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || |
| 685 | CalleeName == "emscripten_asm_const_async_on_main_thread" ; |
| 686 | } |
| 687 | |
| 688 | // Generate __wasm_setjmp_test function call seqence with preamble and |
| 689 | // postamble. The code this generates is equivalent to the following |
| 690 | // JavaScript code: |
| 691 | // %__threwValue.val = __threwValue; |
| 692 | // if (%__THREW__.val != 0 & %__threwValue.val != 0) { |
| 693 | // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
| 694 | // if (%label == 0) |
| 695 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
| 696 | // setTempRet0(%__threwValue.val); |
| 697 | // } else { |
| 698 | // %label = -1; |
| 699 | // } |
| 700 | // %longjmp_result = getTempRet0(); |
| 701 | // |
| 702 | // As output parameters. returns %label, %longjmp_result, and the BB the last |
| 703 | // instruction (%longjmp_result = ...) is in. |
| 704 | void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( |
| 705 | BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId, |
| 706 | Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, |
| 707 | PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI, |
| 708 | BasicBlock *&EndBB) { |
| 709 | Function *F = BB->getParent(); |
| 710 | Module *M = F->getParent(); |
| 711 | LLVMContext &C = M->getContext(); |
| 712 | IRBuilder<> IRB(C); |
| 713 | IRB.SetCurrentDebugLocation(DL); |
| 714 | |
| 715 | // if (%__THREW__.val != 0 & %__threwValue.val != 0) |
| 716 | IRB.SetInsertPoint(BB); |
| 717 | BasicBlock *ThenBB1 = BasicBlock::Create(Context&: C, Name: "if.then1" , Parent: F); |
| 718 | BasicBlock *ElseBB1 = BasicBlock::Create(Context&: C, Name: "if.else1" , Parent: F); |
| 719 | BasicBlock *EndBB1 = BasicBlock::Create(Context&: C, Name: "if.end" , Parent: F); |
| 720 | Value *ThrewCmp = IRB.CreateICmpNE(LHS: Threw, RHS: getAddrSizeInt(M, C: 0)); |
| 721 | Value *ThrewValue = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ThrewValueGV, |
| 722 | Name: ThrewValueGV->getName() + ".val" ); |
| 723 | Value *ThrewValueCmp = IRB.CreateICmpNE(LHS: ThrewValue, RHS: IRB.getInt32(C: 0)); |
| 724 | Value *Cmp1 = IRB.CreateAnd(LHS: ThrewCmp, RHS: ThrewValueCmp, Name: "cmp1" ); |
| 725 | IRB.CreateCondBr(Cond: Cmp1, True: ThenBB1, False: ElseBB1); |
| 726 | |
| 727 | // Generate call.em.longjmp BB once and share it within the function |
| 728 | if (!CallEmLongjmpBB) { |
| 729 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
| 730 | CallEmLongjmpBB = BasicBlock::Create(Context&: C, Name: "call.em.longjmp" , Parent: F); |
| 731 | IRB.SetInsertPoint(CallEmLongjmpBB); |
| 732 | CallEmLongjmpBBThrewPHI = IRB.CreatePHI(Ty: getAddrIntType(M), NumReservedValues: 4, Name: "threw.phi" ); |
| 733 | CallEmLongjmpBBThrewValuePHI = |
| 734 | IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 4, Name: "threwvalue.phi" ); |
| 735 | CallEmLongjmpBBThrewPHI->addIncoming(V: Threw, BB: ThenBB1); |
| 736 | CallEmLongjmpBBThrewValuePHI->addIncoming(V: ThrewValue, BB: ThenBB1); |
| 737 | IRB.CreateCall(Callee: EmLongjmpF, |
| 738 | Args: {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI}); |
| 739 | IRB.CreateUnreachable(); |
| 740 | } else { |
| 741 | CallEmLongjmpBBThrewPHI->addIncoming(V: Threw, BB: ThenBB1); |
| 742 | CallEmLongjmpBBThrewValuePHI->addIncoming(V: ThrewValue, BB: ThenBB1); |
| 743 | } |
| 744 | |
| 745 | // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); |
| 746 | // if (%label == 0) |
| 747 | IRB.SetInsertPoint(ThenBB1); |
| 748 | BasicBlock *EndBB2 = BasicBlock::Create(Context&: C, Name: "if.end2" , Parent: F); |
| 749 | Value *ThrewPtr = |
| 750 | IRB.CreateIntToPtr(V: Threw, DestTy: getAddrPtrType(M), Name: Threw->getName() + ".p" ); |
| 751 | Value *ThenLabel = IRB.CreateCall(Callee: WasmSetjmpTestF, |
| 752 | Args: {ThrewPtr, FunctionInvocationId}, Name: "label" ); |
| 753 | Value *Cmp2 = IRB.CreateICmpEQ(LHS: ThenLabel, RHS: IRB.getInt32(C: 0)); |
| 754 | IRB.CreateCondBr(Cond: Cmp2, True: CallEmLongjmpBB, False: EndBB2); |
| 755 | |
| 756 | // setTempRet0(%__threwValue.val); |
| 757 | IRB.SetInsertPoint(EndBB2); |
| 758 | IRB.CreateCall(Callee: SetTempRet0F, Args: ThrewValue); |
| 759 | IRB.CreateBr(Dest: EndBB1); |
| 760 | |
| 761 | IRB.SetInsertPoint(ElseBB1); |
| 762 | IRB.CreateBr(Dest: EndBB1); |
| 763 | |
| 764 | // longjmp_result = getTempRet0(); |
| 765 | IRB.SetInsertPoint(EndBB1); |
| 766 | PHINode *LabelPHI = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "label" ); |
| 767 | LabelPHI->addIncoming(V: ThenLabel, BB: EndBB2); |
| 768 | |
| 769 | LabelPHI->addIncoming(V: IRB.getInt32(C: -1), BB: ElseBB1); |
| 770 | |
| 771 | // Output parameter assignment |
| 772 | Label = LabelPHI; |
| 773 | EndBB = EndBB1; |
| 774 | LongjmpResult = IRB.CreateCall(Callee: GetTempRet0F, Args: {}, Name: "longjmp_result" ); |
| 775 | } |
| 776 | |
| 777 | void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { |
| 778 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); |
| 779 | DT.recalculate(Func&: F); // CFG has been changed |
| 780 | |
| 781 | SSAUpdaterBulk SSA; |
| 782 | for (BasicBlock &BB : F) { |
| 783 | for (Instruction &I : BB) { |
| 784 | if (I.getType()->isVoidTy()) |
| 785 | continue; |
| 786 | unsigned VarID = SSA.AddVariable(Name: I.getName(), Ty: I.getType()); |
| 787 | // If a value is defined by an invoke instruction, it is only available in |
| 788 | // its normal destination and not in its unwind destination. |
| 789 | if (auto *II = dyn_cast<InvokeInst>(Val: &I)) |
| 790 | SSA.AddAvailableValue(Var: VarID, BB: II->getNormalDest(), V: II); |
| 791 | else |
| 792 | SSA.AddAvailableValue(Var: VarID, BB: &BB, V: &I); |
| 793 | for (auto &U : I.uses()) { |
| 794 | auto *User = cast<Instruction>(Val: U.getUser()); |
| 795 | if (auto *UserPN = dyn_cast<PHINode>(Val: User)) |
| 796 | if (UserPN->getIncomingBlock(U) == &BB) |
| 797 | continue; |
| 798 | if (DT.dominates(Def: &I, User)) |
| 799 | continue; |
| 800 | SSA.AddUse(Var: VarID, U: &U); |
| 801 | } |
| 802 | } |
| 803 | } |
| 804 | SSA.RewriteAllUses(DT: &DT); |
| 805 | } |
| 806 | |
| 807 | // Replace uses of longjmp with a new longjmp function in Emscripten library. |
| 808 | // In Emscripten SjLj, the new function is |
| 809 | // void emscripten_longjmp(uintptr_t, i32) |
| 810 | // In Wasm SjLj, the new function is |
| 811 | // void __wasm_longjmp(i8*, i32) |
| 812 | // Because the original libc longjmp function takes (jmp_buf*, i32), we need a |
| 813 | // ptrtoint/bitcast instruction here to make the type match. jmp_buf* will |
| 814 | // eventually be lowered to i32/i64 in the wasm backend. |
| 815 | void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF, |
| 816 | Function *NewF) { |
| 817 | assert(NewF == EmLongjmpF || NewF == WasmLongjmpF); |
| 818 | Module *M = LongjmpF->getParent(); |
| 819 | SmallVector<CallInst *, 8> ToErase; |
| 820 | LLVMContext &C = LongjmpF->getParent()->getContext(); |
| 821 | IRBuilder<> IRB(C); |
| 822 | |
| 823 | // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and |
| 824 | // cast its first argument (jmp_buf*) appropriately |
| 825 | for (User *U : LongjmpF->users()) { |
| 826 | auto *CI = dyn_cast<CallInst>(Val: U); |
| 827 | if (CI && CI->getCalledFunction() == LongjmpF) { |
| 828 | IRB.SetInsertPoint(CI); |
| 829 | Value *Env = nullptr; |
| 830 | if (NewF == EmLongjmpF) |
| 831 | Env = |
| 832 | IRB.CreatePtrToInt(V: CI->getArgOperand(i: 0), DestTy: getAddrIntType(M), Name: "env" ); |
| 833 | else // WasmLongjmpF |
| 834 | Env = IRB.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: IRB.getPtrTy(), Name: "env" ); |
| 835 | IRB.CreateCall(Callee: NewF, Args: {Env, CI->getArgOperand(i: 1)}); |
| 836 | ToErase.push_back(Elt: CI); |
| 837 | } |
| 838 | } |
| 839 | for (auto *I : ToErase) |
| 840 | I->eraseFromParent(); |
| 841 | |
| 842 | // If we have any remaining uses of longjmp's function pointer, replace it |
| 843 | // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp. |
| 844 | if (!LongjmpF->uses().empty()) { |
| 845 | Value *NewLongjmp = |
| 846 | IRB.CreateBitCast(V: NewF, DestTy: LongjmpF->getType(), Name: "longjmp.cast" ); |
| 847 | LongjmpF->replaceAllUsesWith(V: NewLongjmp); |
| 848 | } |
| 849 | } |
| 850 | |
| 851 | static bool containsLongjmpableCalls(const Function *F) { |
| 852 | for (const auto &BB : *F) |
| 853 | for (const auto &I : BB) |
| 854 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) |
| 855 | if (canLongjmp(Callee: CB->getCalledOperand())) |
| 856 | return true; |
| 857 | return false; |
| 858 | } |
| 859 | |
| 860 | // When a function contains a setjmp call but not other calls that can longjmp, |
| 861 | // we don't do setjmp transformation for that setjmp. But we need to convert the |
| 862 | // setjmp calls into "i32 0" so they don't cause link time errors. setjmp always |
| 863 | // returns 0 when called directly. |
| 864 | static void nullifySetjmp(Function *F) { |
| 865 | Module &M = *F->getParent(); |
| 866 | IRBuilder<> IRB(M.getContext()); |
| 867 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
| 868 | SmallVector<Instruction *, 1> ToErase; |
| 869 | |
| 870 | for (User *U : make_early_inc_range(Range: SetjmpF->users())) { |
| 871 | auto *CB = cast<CallBase>(Val: U); |
| 872 | BasicBlock *BB = CB->getParent(); |
| 873 | if (BB->getParent() != F) // in other function |
| 874 | continue; |
| 875 | CallInst *CI = nullptr; |
| 876 | // setjmp cannot throw. So if it is an invoke, lower it to a call |
| 877 | if (auto *II = dyn_cast<InvokeInst>(Val: CB)) |
| 878 | CI = llvm::changeToCall(II); |
| 879 | else |
| 880 | CI = cast<CallInst>(Val: CB); |
| 881 | ToErase.push_back(Elt: CI); |
| 882 | CI->replaceAllUsesWith(V: IRB.getInt32(C: 0)); |
| 883 | } |
| 884 | for (auto *I : ToErase) |
| 885 | I->eraseFromParent(); |
| 886 | } |
| 887 | |
| 888 | bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { |
| 889 | LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n" ); |
| 890 | |
| 891 | LLVMContext &C = M.getContext(); |
| 892 | IRBuilder<> IRB(C); |
| 893 | |
| 894 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
| 895 | Function *LongjmpF = M.getFunction(Name: "longjmp" ); |
| 896 | |
| 897 | // In some platforms _setjmp and _longjmp are used instead. Change these to |
| 898 | // use setjmp/longjmp instead, because we later detect these functions by |
| 899 | // their names. |
| 900 | Function *SetjmpF2 = M.getFunction(Name: "_setjmp" ); |
| 901 | Function *LongjmpF2 = M.getFunction(Name: "_longjmp" ); |
| 902 | if (SetjmpF2) { |
| 903 | if (SetjmpF) { |
| 904 | if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType()) |
| 905 | report_fatal_error(reason: "setjmp and _setjmp have different function types" ); |
| 906 | } else { |
| 907 | SetjmpF = Function::Create(Ty: SetjmpF2->getFunctionType(), |
| 908 | Linkage: GlobalValue::ExternalLinkage, N: "setjmp" , M); |
| 909 | } |
| 910 | SetjmpF2->replaceAllUsesWith(V: SetjmpF); |
| 911 | } |
| 912 | if (LongjmpF2) { |
| 913 | if (LongjmpF) { |
| 914 | if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType()) |
| 915 | report_fatal_error( |
| 916 | reason: "longjmp and _longjmp have different function types" ); |
| 917 | } else { |
| 918 | LongjmpF = Function::Create(Ty: LongjmpF2->getFunctionType(), |
| 919 | Linkage: GlobalValue::ExternalLinkage, N: "setjmp" , M); |
| 920 | } |
| 921 | LongjmpF2->replaceAllUsesWith(V: LongjmpF); |
| 922 | } |
| 923 | |
| 924 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); |
| 925 | assert(TPC && "Expected a TargetPassConfig" ); |
| 926 | auto &TM = TPC->getTM<WebAssemblyTargetMachine>(); |
| 927 | |
| 928 | // Declare (or get) global variables __THREW__, __threwValue, and |
| 929 | // getTempRet0/setTempRet0 function which are used in common for both |
| 930 | // exception handling and setjmp/longjmp handling |
| 931 | ThrewGV = getGlobalVariable(M, Ty: getAddrIntType(M: &M), TM, Name: "__THREW__" ); |
| 932 | ThrewValueGV = getGlobalVariable(M, Ty: IRB.getInt32Ty(), TM, Name: "__threwValue" ); |
| 933 | GetTempRet0F = getFunction(Ty: FunctionType::get(Result: IRB.getInt32Ty(), isVarArg: false), |
| 934 | Name: "getTempRet0" , M: &M); |
| 935 | SetTempRet0F = |
| 936 | getFunction(Ty: FunctionType::get(Result: IRB.getVoidTy(), Params: IRB.getInt32Ty(), isVarArg: false), |
| 937 | Name: "setTempRet0" , M: &M); |
| 938 | GetTempRet0F->setDoesNotThrow(); |
| 939 | SetTempRet0F->setDoesNotThrow(); |
| 940 | |
| 941 | bool Changed = false; |
| 942 | |
| 943 | // Function registration for exception handling |
| 944 | if (EnableEmEH) { |
| 945 | // Register __resumeException function |
| 946 | FunctionType *ResumeFTy = |
| 947 | FunctionType::get(Result: IRB.getVoidTy(), Params: IRB.getPtrTy(), isVarArg: false); |
| 948 | ResumeF = getFunction(Ty: ResumeFTy, Name: "__resumeException" , M: &M); |
| 949 | ResumeF->addFnAttr(Kind: Attribute::NoReturn); |
| 950 | |
| 951 | // Register llvm_eh_typeid_for function |
| 952 | FunctionType *EHTypeIDTy = |
| 953 | FunctionType::get(Result: IRB.getInt32Ty(), Params: IRB.getPtrTy(), isVarArg: false); |
| 954 | EHTypeIDF = getFunction(Ty: EHTypeIDTy, Name: "llvm_eh_typeid_for" , M: &M); |
| 955 | } |
| 956 | |
| 957 | // Functions that contains calls to setjmp but don't have other longjmpable |
| 958 | // calls within them. |
| 959 | SmallPtrSet<Function *, 4> SetjmpUsersToNullify; |
| 960 | |
| 961 | if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) { |
| 962 | // Precompute setjmp users |
| 963 | for (User *U : SetjmpF->users()) { |
| 964 | if (auto *CB = dyn_cast<CallBase>(Val: U)) { |
| 965 | auto *UserF = CB->getFunction(); |
| 966 | // If a function that calls setjmp does not contain any other calls that |
| 967 | // can longjmp, we don't need to do any transformation on that function, |
| 968 | // so can ignore it |
| 969 | if (containsLongjmpableCalls(F: UserF)) |
| 970 | SetjmpUsers.insert(Ptr: UserF); |
| 971 | else |
| 972 | SetjmpUsersToNullify.insert(Ptr: UserF); |
| 973 | } else { |
| 974 | std::string S; |
| 975 | raw_string_ostream SS(S); |
| 976 | SS << *U; |
| 977 | report_fatal_error(reason: Twine("Indirect use of setjmp is not supported: " ) + |
| 978 | SS.str()); |
| 979 | } |
| 980 | } |
| 981 | } |
| 982 | |
| 983 | bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty(); |
| 984 | bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); |
| 985 | DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed); |
| 986 | |
| 987 | // Function registration and data pre-gathering for setjmp/longjmp handling |
| 988 | if (DoSjLj) { |
| 989 | assert(EnableEmSjLj || EnableWasmSjLj); |
| 990 | if (EnableEmSjLj) { |
| 991 | // Register emscripten_longjmp function |
| 992 | FunctionType *FTy = FunctionType::get( |
| 993 | Result: IRB.getVoidTy(), Params: {getAddrIntType(M: &M), IRB.getInt32Ty()}, isVarArg: false); |
| 994 | EmLongjmpF = getFunction(Ty: FTy, Name: "emscripten_longjmp" , M: &M); |
| 995 | EmLongjmpF->addFnAttr(Kind: Attribute::NoReturn); |
| 996 | } else { // EnableWasmSjLj |
| 997 | Type *Int8PtrTy = IRB.getPtrTy(); |
| 998 | // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp. |
| 999 | FunctionType *FTy = FunctionType::get( |
| 1000 | Result: IRB.getVoidTy(), Params: {Int8PtrTy, IRB.getInt32Ty()}, isVarArg: false); |
| 1001 | WasmLongjmpF = getFunction(Ty: FTy, Name: "__wasm_longjmp" , M: &M); |
| 1002 | WasmLongjmpF->addFnAttr(Kind: Attribute::NoReturn); |
| 1003 | } |
| 1004 | |
| 1005 | if (SetjmpF) { |
| 1006 | Type *Int8PtrTy = IRB.getPtrTy(); |
| 1007 | Type *Int32PtrTy = IRB.getPtrTy(); |
| 1008 | Type *Int32Ty = IRB.getInt32Ty(); |
| 1009 | |
| 1010 | // Register __wasm_setjmp function |
| 1011 | FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); |
| 1012 | FunctionType *FTy = FunctionType::get( |
| 1013 | Result: IRB.getVoidTy(), Params: {SetjmpFTy->getParamType(i: 0), Int32Ty, Int32PtrTy}, |
| 1014 | isVarArg: false); |
| 1015 | WasmSetjmpF = getFunction(Ty: FTy, Name: "__wasm_setjmp" , M: &M); |
| 1016 | |
| 1017 | // Register __wasm_setjmp_test function |
| 1018 | FTy = FunctionType::get(Result: Int32Ty, Params: {Int32PtrTy, Int32PtrTy}, isVarArg: false); |
| 1019 | WasmSetjmpTestF = getFunction(Ty: FTy, Name: "__wasm_setjmp_test" , M: &M); |
| 1020 | |
| 1021 | // wasm.catch() will be lowered down to wasm 'catch' instruction in |
| 1022 | // instruction selection. |
| 1023 | CatchF = Intrinsic::getOrInsertDeclaration(M: &M, id: Intrinsic::wasm_catch); |
| 1024 | // Type for struct __WasmLongjmpArgs |
| 1025 | LongjmpArgsTy = StructType::get(elt1: Int8PtrTy, // env |
| 1026 | elts: Int32Ty // val |
| 1027 | ); |
| 1028 | } |
| 1029 | } |
| 1030 | |
| 1031 | // Exception handling transformation |
| 1032 | if (EnableEmEH) { |
| 1033 | for (Function &F : M) { |
| 1034 | if (F.isDeclaration()) |
| 1035 | continue; |
| 1036 | Changed |= runEHOnFunction(F); |
| 1037 | } |
| 1038 | } |
| 1039 | |
| 1040 | // Setjmp/longjmp handling transformation |
| 1041 | if (DoSjLj) { |
| 1042 | Changed = true; // We have setjmp or longjmp somewhere |
| 1043 | if (LongjmpF) |
| 1044 | replaceLongjmpWith(LongjmpF, NewF: EnableEmSjLj ? EmLongjmpF : WasmLongjmpF); |
| 1045 | // Only traverse functions that uses setjmp in order not to insert |
| 1046 | // unnecessary prep / cleanup code in every function |
| 1047 | if (SetjmpF) |
| 1048 | for (Function *F : SetjmpUsers) |
| 1049 | runSjLjOnFunction(F&: *F); |
| 1050 | } |
| 1051 | |
| 1052 | // Replace unnecessary setjmp calls with 0 |
| 1053 | if ((EnableEmSjLj || EnableWasmSjLj) && !SetjmpUsersToNullify.empty()) { |
| 1054 | Changed = true; |
| 1055 | assert(SetjmpF); |
| 1056 | for (Function *F : SetjmpUsersToNullify) |
| 1057 | nullifySetjmp(F); |
| 1058 | } |
| 1059 | |
| 1060 | // Delete unused global variables and functions |
| 1061 | for (auto *V : {ThrewGV, ThrewValueGV}) |
| 1062 | if (V && V->use_empty()) |
| 1063 | V->eraseFromParent(); |
| 1064 | for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF, |
| 1065 | WasmSetjmpF, WasmSetjmpTestF, WasmLongjmpF, CatchF}) |
| 1066 | if (V && V->use_empty()) |
| 1067 | V->eraseFromParent(); |
| 1068 | |
| 1069 | return Changed; |
| 1070 | } |
| 1071 | |
| 1072 | bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { |
| 1073 | Module &M = *F.getParent(); |
| 1074 | LLVMContext &C = F.getContext(); |
| 1075 | IRBuilder<> IRB(C); |
| 1076 | bool Changed = false; |
| 1077 | SmallVector<Instruction *, 64> ToErase; |
| 1078 | SmallPtrSet<LandingPadInst *, 32> LandingPads; |
| 1079 | |
| 1080 | // rethrow.longjmp BB that will be shared within the function. |
| 1081 | BasicBlock *RethrowLongjmpBB = nullptr; |
| 1082 | // PHI node for the loaded value of __THREW__ global variable in |
| 1083 | // rethrow.longjmp BB |
| 1084 | PHINode *RethrowLongjmpBBThrewPHI = nullptr; |
| 1085 | |
| 1086 | for (BasicBlock &BB : F) { |
| 1087 | auto *II = dyn_cast<InvokeInst>(Val: BB.getTerminator()); |
| 1088 | if (!II) |
| 1089 | continue; |
| 1090 | Changed = true; |
| 1091 | LandingPads.insert(Ptr: II->getLandingPadInst()); |
| 1092 | IRB.SetInsertPoint(II); |
| 1093 | |
| 1094 | const Value *Callee = II->getCalledOperand(); |
| 1095 | bool NeedInvoke = supportsException(F: &F) && canThrow(V: Callee); |
| 1096 | if (NeedInvoke) { |
| 1097 | // Wrap invoke with invoke wrapper and generate preamble/postamble |
| 1098 | Value *Threw = wrapInvoke(CI: II); |
| 1099 | ToErase.push_back(Elt: II); |
| 1100 | |
| 1101 | // If setjmp/longjmp handling is enabled, the thrown value can be not an |
| 1102 | // exception but a longjmp. If the current function contains calls to |
| 1103 | // setjmp, it will be appropriately handled in runSjLjOnFunction. But even |
| 1104 | // if the function does not contain setjmp calls, we shouldn't silently |
| 1105 | // ignore longjmps; we should rethrow them so they can be correctly |
| 1106 | // handled in somewhere up the call chain where setjmp is. __THREW__'s |
| 1107 | // value is 0 when nothing happened, 1 when an exception is thrown, and |
| 1108 | // other values when longjmp is thrown. |
| 1109 | // |
| 1110 | // if (%__THREW__.val == 0 || %__THREW__.val == 1) |
| 1111 | // goto %tail |
| 1112 | // else |
| 1113 | // goto %longjmp.rethrow |
| 1114 | // |
| 1115 | // rethrow.longjmp: ;; This is longjmp. Rethrow it |
| 1116 | // %__threwValue.val = __threwValue |
| 1117 | // emscripten_longjmp(%__THREW__.val, %__threwValue.val); |
| 1118 | // |
| 1119 | // tail: ;; Nothing happened or an exception is thrown |
| 1120 | // ... Continue exception handling ... |
| 1121 | if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(Ptr: &F) && |
| 1122 | canLongjmp(Callee)) { |
| 1123 | // Create longjmp.rethrow BB once and share it within the function |
| 1124 | if (!RethrowLongjmpBB) { |
| 1125 | RethrowLongjmpBB = BasicBlock::Create(Context&: C, Name: "rethrow.longjmp" , Parent: &F); |
| 1126 | IRB.SetInsertPoint(RethrowLongjmpBB); |
| 1127 | RethrowLongjmpBBThrewPHI = |
| 1128 | IRB.CreatePHI(Ty: getAddrIntType(M: &M), NumReservedValues: 4, Name: "threw.phi" ); |
| 1129 | RethrowLongjmpBBThrewPHI->addIncoming(V: Threw, BB: &BB); |
| 1130 | Value *ThrewValue = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ThrewValueGV, |
| 1131 | Name: ThrewValueGV->getName() + ".val" ); |
| 1132 | IRB.CreateCall(Callee: EmLongjmpF, Args: {RethrowLongjmpBBThrewPHI, ThrewValue}); |
| 1133 | IRB.CreateUnreachable(); |
| 1134 | } else { |
| 1135 | RethrowLongjmpBBThrewPHI->addIncoming(V: Threw, BB: &BB); |
| 1136 | } |
| 1137 | |
| 1138 | IRB.SetInsertPoint(II); // Restore the insert point back |
| 1139 | BasicBlock *Tail = BasicBlock::Create(Context&: C, Name: "tail" , Parent: &F); |
| 1140 | Value *CmpEqOne = |
| 1141 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp.eq.one" ); |
| 1142 | Value *CmpEqZero = |
| 1143 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 0), Name: "cmp.eq.zero" ); |
| 1144 | Value *Or = IRB.CreateOr(LHS: CmpEqZero, RHS: CmpEqOne, Name: "or" ); |
| 1145 | IRB.CreateCondBr(Cond: Or, True: Tail, False: RethrowLongjmpBB); |
| 1146 | IRB.SetInsertPoint(Tail); |
| 1147 | BB.replaceSuccessorsPhiUsesWith(Old: &BB, New: Tail); |
| 1148 | } |
| 1149 | |
| 1150 | // Insert a branch based on __THREW__ variable |
| 1151 | Value *Cmp = IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp" ); |
| 1152 | IRB.CreateCondBr(Cond: Cmp, True: II->getUnwindDest(), False: II->getNormalDest()); |
| 1153 | |
| 1154 | } else { |
| 1155 | // This can't throw, and we don't need this invoke, just replace it with a |
| 1156 | // call+branch |
| 1157 | changeToCall(II); |
| 1158 | } |
| 1159 | } |
| 1160 | |
| 1161 | // Process resume instructions |
| 1162 | for (BasicBlock &BB : F) { |
| 1163 | // Scan the body of the basic block for resumes |
| 1164 | for (Instruction &I : BB) { |
| 1165 | auto *RI = dyn_cast<ResumeInst>(Val: &I); |
| 1166 | if (!RI) |
| 1167 | continue; |
| 1168 | Changed = true; |
| 1169 | |
| 1170 | // Split the input into legal values |
| 1171 | Value *Input = RI->getValue(); |
| 1172 | IRB.SetInsertPoint(RI); |
| 1173 | Value *Low = IRB.CreateExtractValue(Agg: Input, Idxs: 0, Name: "low" ); |
| 1174 | // Create a call to __resumeException function |
| 1175 | IRB.CreateCall(Callee: ResumeF, Args: {Low}); |
| 1176 | // Add a terminator to the block |
| 1177 | IRB.CreateUnreachable(); |
| 1178 | ToErase.push_back(Elt: RI); |
| 1179 | } |
| 1180 | } |
| 1181 | |
| 1182 | // Process llvm.eh.typeid.for intrinsics |
| 1183 | for (BasicBlock &BB : F) { |
| 1184 | for (Instruction &I : BB) { |
| 1185 | auto *CI = dyn_cast<CallInst>(Val: &I); |
| 1186 | if (!CI) |
| 1187 | continue; |
| 1188 | const Function *Callee = CI->getCalledFunction(); |
| 1189 | if (!Callee) |
| 1190 | continue; |
| 1191 | if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) |
| 1192 | continue; |
| 1193 | Changed = true; |
| 1194 | |
| 1195 | IRB.SetInsertPoint(CI); |
| 1196 | CallInst *NewCI = |
| 1197 | IRB.CreateCall(Callee: EHTypeIDF, Args: CI->getArgOperand(i: 0), Name: "typeid" ); |
| 1198 | CI->replaceAllUsesWith(V: NewCI); |
| 1199 | ToErase.push_back(Elt: CI); |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | // Look for orphan landingpads, can occur in blocks with no predecessors |
| 1204 | for (BasicBlock &BB : F) { |
| 1205 | BasicBlock::iterator I = BB.getFirstNonPHIIt(); |
| 1206 | if (auto *LPI = dyn_cast<LandingPadInst>(Val&: I)) |
| 1207 | LandingPads.insert(Ptr: LPI); |
| 1208 | } |
| 1209 | Changed |= !LandingPads.empty(); |
| 1210 | |
| 1211 | // Handle all the landingpad for this function together, as multiple invokes |
| 1212 | // may share a single lp |
| 1213 | for (LandingPadInst *LPI : LandingPads) { |
| 1214 | IRB.SetInsertPoint(LPI); |
| 1215 | SmallVector<Value *, 16> FMCArgs; |
| 1216 | for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { |
| 1217 | Constant *Clause = LPI->getClause(Idx: I); |
| 1218 | // TODO Handle filters (= exception specifications). |
| 1219 | // https://github.com/llvm/llvm-project/issues/49740 |
| 1220 | if (LPI->isCatch(Idx: I)) |
| 1221 | FMCArgs.push_back(Elt: Clause); |
| 1222 | } |
| 1223 | |
| 1224 | // Create a call to __cxa_find_matching_catch_N function |
| 1225 | Function *FMCF = getFindMatchingCatch(M, NumClauses: FMCArgs.size()); |
| 1226 | CallInst *FMCI = IRB.CreateCall(Callee: FMCF, Args: FMCArgs, Name: "fmc" ); |
| 1227 | Value *Poison = PoisonValue::get(T: LPI->getType()); |
| 1228 | Value *Pair0 = IRB.CreateInsertValue(Agg: Poison, Val: FMCI, Idxs: 0, Name: "pair0" ); |
| 1229 | Value *TempRet0 = IRB.CreateCall(Callee: GetTempRet0F, Args: {}, Name: "tempret0" ); |
| 1230 | Value *Pair1 = IRB.CreateInsertValue(Agg: Pair0, Val: TempRet0, Idxs: 1, Name: "pair1" ); |
| 1231 | |
| 1232 | LPI->replaceAllUsesWith(V: Pair1); |
| 1233 | ToErase.push_back(Elt: LPI); |
| 1234 | } |
| 1235 | |
| 1236 | // Erase everything we no longer need in this function |
| 1237 | for (Instruction *I : ToErase) |
| 1238 | I->eraseFromParent(); |
| 1239 | |
| 1240 | return Changed; |
| 1241 | } |
| 1242 | |
| 1243 | // This tries to get debug info from the instruction before which a new |
| 1244 | // instruction will be inserted, and if there's no debug info in that |
| 1245 | // instruction, tries to get the info instead from the previous instruction (if |
| 1246 | // any). If none of these has debug info and a DISubprogram is provided, it |
| 1247 | // creates a dummy debug info with the first line of the function, because IR |
| 1248 | // verifier requires all inlinable callsites should have debug info when both a |
| 1249 | // caller and callee have DISubprogram. If none of these conditions are met, |
| 1250 | // returns empty info. |
| 1251 | static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, |
| 1252 | DISubprogram *SP) { |
| 1253 | assert(InsertBefore); |
| 1254 | if (InsertBefore->getDebugLoc()) |
| 1255 | return InsertBefore->getDebugLoc(); |
| 1256 | const Instruction *Prev = InsertBefore->getPrevNode(); |
| 1257 | if (Prev && Prev->getDebugLoc()) |
| 1258 | return Prev->getDebugLoc(); |
| 1259 | if (SP) |
| 1260 | return DILocation::get(Context&: SP->getContext(), Line: SP->getLine(), Column: 1, Scope: SP); |
| 1261 | return DebugLoc(); |
| 1262 | } |
| 1263 | |
| 1264 | bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { |
| 1265 | assert(EnableEmSjLj || EnableWasmSjLj); |
| 1266 | Module &M = *F.getParent(); |
| 1267 | LLVMContext &C = F.getContext(); |
| 1268 | IRBuilder<> IRB(C); |
| 1269 | SmallVector<Instruction *, 64> ToErase; |
| 1270 | |
| 1271 | // Setjmp preparation |
| 1272 | |
| 1273 | BasicBlock *Entry = &F.getEntryBlock(); |
| 1274 | DebugLoc FirstDL = getOrCreateDebugLoc(InsertBefore: &*Entry->begin(), SP: F.getSubprogram()); |
| 1275 | SplitBlock(Old: Entry, SplitPt: &*Entry->getFirstInsertionPt()); |
| 1276 | |
| 1277 | IRB.SetInsertPoint(Entry->getTerminator()->getIterator()); |
| 1278 | // This alloca'ed pointer is used by the runtime to identify function |
| 1279 | // invocations. It's just for pointer comparisons. It will never be |
| 1280 | // dereferenced. |
| 1281 | Instruction *FunctionInvocationId = |
| 1282 | IRB.CreateAlloca(Ty: IRB.getInt32Ty(), ArraySize: nullptr, Name: "functionInvocationId" ); |
| 1283 | FunctionInvocationId->setDebugLoc(FirstDL); |
| 1284 | |
| 1285 | // Setjmp transformation |
| 1286 | SmallVector<PHINode *, 4> SetjmpRetPHIs; |
| 1287 | Function *SetjmpF = M.getFunction(Name: "setjmp" ); |
| 1288 | for (auto *U : make_early_inc_range(Range: SetjmpF->users())) { |
| 1289 | auto *CB = cast<CallBase>(Val: U); |
| 1290 | BasicBlock *BB = CB->getParent(); |
| 1291 | if (BB->getParent() != &F) // in other function |
| 1292 | continue; |
| 1293 | if (CB->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
| 1294 | std::string S; |
| 1295 | raw_string_ostream SS(S); |
| 1296 | SS << "In function " + F.getName() + |
| 1297 | ": setjmp within a catch clause is not supported in Wasm EH:\n" ; |
| 1298 | SS << *CB; |
| 1299 | report_fatal_error(reason: StringRef(SS.str())); |
| 1300 | } |
| 1301 | |
| 1302 | CallInst *CI = nullptr; |
| 1303 | // setjmp cannot throw. So if it is an invoke, lower it to a call |
| 1304 | if (auto *II = dyn_cast<InvokeInst>(Val: CB)) |
| 1305 | CI = llvm::changeToCall(II); |
| 1306 | else |
| 1307 | CI = cast<CallInst>(Val: CB); |
| 1308 | |
| 1309 | // The tail is everything right after the call, and will be reached once |
| 1310 | // when setjmp is called, and later when longjmp returns to the setjmp |
| 1311 | BasicBlock *Tail = SplitBlock(Old: BB, SplitPt: CI->getNextNode()); |
| 1312 | // Add a phi to the tail, which will be the output of setjmp, which |
| 1313 | // indicates if this is the first call or a longjmp back. The phi directly |
| 1314 | // uses the right value based on where we arrive from |
| 1315 | IRB.SetInsertPoint(TheBB: Tail, IP: Tail->getFirstNonPHIIt()); |
| 1316 | PHINode *SetjmpRet = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "setjmp.ret" ); |
| 1317 | |
| 1318 | // setjmp initial call returns 0 |
| 1319 | SetjmpRet->addIncoming(V: IRB.getInt32(C: 0), BB); |
| 1320 | // The proper output is now this, not the setjmp call itself |
| 1321 | CI->replaceAllUsesWith(V: SetjmpRet); |
| 1322 | // longjmp returns to the setjmp will add themselves to this phi |
| 1323 | SetjmpRetPHIs.push_back(Elt: SetjmpRet); |
| 1324 | |
| 1325 | // Fix call target |
| 1326 | // Our index in the function is our place in the array + 1 to avoid index |
| 1327 | // 0, because index 0 means the longjmp is not ours to handle. |
| 1328 | IRB.SetInsertPoint(CI); |
| 1329 | Value *Args[] = {CI->getArgOperand(i: 0), IRB.getInt32(C: SetjmpRetPHIs.size()), |
| 1330 | FunctionInvocationId}; |
| 1331 | IRB.CreateCall(Callee: WasmSetjmpF, Args); |
| 1332 | ToErase.push_back(Elt: CI); |
| 1333 | } |
| 1334 | |
| 1335 | // Handle longjmpable calls. |
| 1336 | if (EnableEmSjLj) |
| 1337 | handleLongjmpableCallsForEmscriptenSjLj(F, FunctionInvocationId, |
| 1338 | SetjmpRetPHIs); |
| 1339 | else // EnableWasmSjLj |
| 1340 | handleLongjmpableCallsForWasmSjLj(F, FunctionInvocationId, SetjmpRetPHIs); |
| 1341 | |
| 1342 | // Erase everything we no longer need in this function |
| 1343 | for (Instruction *I : ToErase) |
| 1344 | I->eraseFromParent(); |
| 1345 | |
| 1346 | // Finally, our modifications to the cfg can break dominance of SSA variables. |
| 1347 | // For example, in this code, |
| 1348 | // if (x()) { .. setjmp() .. } |
| 1349 | // if (y()) { .. longjmp() .. } |
| 1350 | // We must split the longjmp block, and it can jump into the block splitted |
| 1351 | // from setjmp one. But that means that when we split the setjmp block, it's |
| 1352 | // first part no longer dominates its second part - there is a theoretically |
| 1353 | // possible control flow path where x() is false, then y() is true and we |
| 1354 | // reach the second part of the setjmp block, without ever reaching the first |
| 1355 | // part. So, we rebuild SSA form here. |
| 1356 | rebuildSSA(F); |
| 1357 | return true; |
| 1358 | } |
| 1359 | |
| 1360 | // Update each call that can longjmp so it can return to the corresponding |
| 1361 | // setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the |
| 1362 | // comments at top of the file for details. |
| 1363 | void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( |
| 1364 | Function &F, Instruction *FunctionInvocationId, |
| 1365 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { |
| 1366 | Module &M = *F.getParent(); |
| 1367 | LLVMContext &C = F.getContext(); |
| 1368 | IRBuilder<> IRB(C); |
| 1369 | SmallVector<Instruction *, 64> ToErase; |
| 1370 | |
| 1371 | // call.em.longjmp BB that will be shared within the function. |
| 1372 | BasicBlock *CallEmLongjmpBB = nullptr; |
| 1373 | // PHI node for the loaded value of __THREW__ global variable in |
| 1374 | // call.em.longjmp BB |
| 1375 | PHINode *CallEmLongjmpBBThrewPHI = nullptr; |
| 1376 | // PHI node for the loaded value of __threwValue global variable in |
| 1377 | // call.em.longjmp BB |
| 1378 | PHINode *CallEmLongjmpBBThrewValuePHI = nullptr; |
| 1379 | // rethrow.exn BB that will be shared within the function. |
| 1380 | BasicBlock *RethrowExnBB = nullptr; |
| 1381 | |
| 1382 | // Because we are creating new BBs while processing and don't want to make |
| 1383 | // all these newly created BBs candidates again for longjmp processing, we |
| 1384 | // first make the vector of candidate BBs. |
| 1385 | std::vector<BasicBlock *> BBs; |
| 1386 | for (BasicBlock &BB : F) |
| 1387 | BBs.push_back(x: &BB); |
| 1388 | |
| 1389 | // BBs.size() will change within the loop, so we query it every time |
| 1390 | for (unsigned I = 0; I < BBs.size(); I++) { |
| 1391 | BasicBlock *BB = BBs[I]; |
| 1392 | for (Instruction &I : *BB) { |
| 1393 | if (isa<InvokeInst>(Val: &I)) { |
| 1394 | std::string S; |
| 1395 | raw_string_ostream SS(S); |
| 1396 | SS << "In function " << F.getName() |
| 1397 | << ": When using Wasm EH with Emscripten SjLj, there is a " |
| 1398 | "restriction that `setjmp` function call and exception cannot be " |
| 1399 | "used within the same function:\n" ; |
| 1400 | SS << I; |
| 1401 | report_fatal_error(reason: StringRef(SS.str())); |
| 1402 | } |
| 1403 | auto *CI = dyn_cast<CallInst>(Val: &I); |
| 1404 | if (!CI) |
| 1405 | continue; |
| 1406 | |
| 1407 | const Value *Callee = CI->getCalledOperand(); |
| 1408 | if (!canLongjmp(Callee)) |
| 1409 | continue; |
| 1410 | if (isEmAsmCall(Callee)) |
| 1411 | report_fatal_error(reason: "Cannot use EM_ASM* alongside setjmp/longjmp in " + |
| 1412 | F.getName() + |
| 1413 | ". Please consider using EM_JS, or move the " |
| 1414 | "EM_ASM into another function." , |
| 1415 | gen_crash_diag: false); |
| 1416 | |
| 1417 | Value *Threw = nullptr; |
| 1418 | BasicBlock *Tail; |
| 1419 | if (Callee->getName().starts_with(Prefix: "__invoke_" )) { |
| 1420 | // If invoke wrapper has already been generated for this call in |
| 1421 | // previous EH phase, search for the load instruction |
| 1422 | // %__THREW__.val = __THREW__; |
| 1423 | // in postamble after the invoke wrapper call |
| 1424 | LoadInst *ThrewLI = nullptr; |
| 1425 | StoreInst *ThrewResetSI = nullptr; |
| 1426 | for (auto I = std::next(x: BasicBlock::iterator(CI)), IE = BB->end(); |
| 1427 | I != IE; ++I) { |
| 1428 | if (auto *LI = dyn_cast<LoadInst>(Val&: I)) |
| 1429 | if (auto *GV = dyn_cast<GlobalVariable>(Val: LI->getPointerOperand())) |
| 1430 | if (GV == ThrewGV) { |
| 1431 | Threw = ThrewLI = LI; |
| 1432 | break; |
| 1433 | } |
| 1434 | } |
| 1435 | // Search for the store instruction after the load above |
| 1436 | // __THREW__ = 0; |
| 1437 | for (auto I = std::next(x: BasicBlock::iterator(ThrewLI)), IE = BB->end(); |
| 1438 | I != IE; ++I) { |
| 1439 | if (auto *SI = dyn_cast<StoreInst>(Val&: I)) { |
| 1440 | if (auto *GV = dyn_cast<GlobalVariable>(Val: SI->getPointerOperand())) { |
| 1441 | if (GV == ThrewGV && |
| 1442 | SI->getValueOperand() == getAddrSizeInt(M: &M, C: 0)) { |
| 1443 | ThrewResetSI = SI; |
| 1444 | break; |
| 1445 | } |
| 1446 | } |
| 1447 | } |
| 1448 | } |
| 1449 | assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke" ); |
| 1450 | assert(ThrewResetSI && "Cannot find __THREW__ store after invoke" ); |
| 1451 | Tail = SplitBlock(Old: BB, SplitPt: ThrewResetSI->getNextNode()); |
| 1452 | |
| 1453 | } else { |
| 1454 | // Wrap call with invoke wrapper and generate preamble/postamble |
| 1455 | Threw = wrapInvoke(CI); |
| 1456 | ToErase.push_back(Elt: CI); |
| 1457 | Tail = SplitBlock(Old: BB, SplitPt: CI->getNextNode()); |
| 1458 | |
| 1459 | // If exception handling is enabled, the thrown value can be not a |
| 1460 | // longjmp but an exception, in which case we shouldn't silently ignore |
| 1461 | // exceptions; we should rethrow them. |
| 1462 | // __THREW__'s value is 0 when nothing happened, 1 when an exception is |
| 1463 | // thrown, other values when longjmp is thrown. |
| 1464 | // |
| 1465 | // if (%__THREW__.val == 1) |
| 1466 | // goto %eh.rethrow |
| 1467 | // else |
| 1468 | // goto %normal |
| 1469 | // |
| 1470 | // eh.rethrow: ;; Rethrow exception |
| 1471 | // %exn = call @__cxa_find_matching_catch_2() ;; Retrieve thrown ptr |
| 1472 | // __resumeException(%exn) |
| 1473 | // |
| 1474 | // normal: |
| 1475 | // <-- Insertion point. Will insert sjlj handling code from here |
| 1476 | // goto %tail |
| 1477 | // |
| 1478 | // tail: |
| 1479 | // ... |
| 1480 | if (supportsException(F: &F) && canThrow(V: Callee)) { |
| 1481 | // We will add a new conditional branch. So remove the branch created |
| 1482 | // when we split the BB |
| 1483 | ToErase.push_back(Elt: BB->getTerminator()); |
| 1484 | |
| 1485 | // Generate rethrow.exn BB once and share it within the function |
| 1486 | if (!RethrowExnBB) { |
| 1487 | RethrowExnBB = BasicBlock::Create(Context&: C, Name: "rethrow.exn" , Parent: &F); |
| 1488 | IRB.SetInsertPoint(RethrowExnBB); |
| 1489 | CallInst *Exn = |
| 1490 | IRB.CreateCall(Callee: getFindMatchingCatch(M, NumClauses: 0), Args: {}, Name: "exn" ); |
| 1491 | IRB.CreateCall(Callee: ResumeF, Args: {Exn}); |
| 1492 | IRB.CreateUnreachable(); |
| 1493 | } |
| 1494 | |
| 1495 | IRB.SetInsertPoint(CI); |
| 1496 | BasicBlock *NormalBB = BasicBlock::Create(Context&: C, Name: "normal" , Parent: &F); |
| 1497 | Value *CmpEqOne = |
| 1498 | IRB.CreateICmpEQ(LHS: Threw, RHS: getAddrSizeInt(M: &M, C: 1), Name: "cmp.eq.one" ); |
| 1499 | IRB.CreateCondBr(Cond: CmpEqOne, True: RethrowExnBB, False: NormalBB); |
| 1500 | |
| 1501 | IRB.SetInsertPoint(NormalBB); |
| 1502 | IRB.CreateBr(Dest: Tail); |
| 1503 | BB = NormalBB; // New insertion point to insert __wasm_setjmp_test() |
| 1504 | } |
| 1505 | } |
| 1506 | |
| 1507 | // We need to replace the terminator in Tail - SplitBlock makes BB go |
| 1508 | // straight to Tail, we need to check if a longjmp occurred, and go to the |
| 1509 | // right setjmp-tail if so |
| 1510 | ToErase.push_back(Elt: BB->getTerminator()); |
| 1511 | |
| 1512 | // Generate a function call to __wasm_setjmp_test function and |
| 1513 | // preamble/postamble code to figure out (1) whether longjmp |
| 1514 | // occurred (2) if longjmp occurred, which setjmp it corresponds to |
| 1515 | Value *Label = nullptr; |
| 1516 | Value *LongjmpResult = nullptr; |
| 1517 | BasicBlock *EndBB = nullptr; |
| 1518 | wrapTestSetjmp(BB, DL: CI->getDebugLoc(), Threw, FunctionInvocationId, Label, |
| 1519 | LongjmpResult, CallEmLongjmpBB, CallEmLongjmpBBThrewPHI, |
| 1520 | CallEmLongjmpBBThrewValuePHI, EndBB); |
| 1521 | assert(Label && LongjmpResult && EndBB); |
| 1522 | |
| 1523 | // Create switch instruction |
| 1524 | IRB.SetInsertPoint(EndBB); |
| 1525 | IRB.SetCurrentDebugLocation(EndBB->back().getDebugLoc()); |
| 1526 | SwitchInst *SI = IRB.CreateSwitch(V: Label, Dest: Tail, NumCases: SetjmpRetPHIs.size()); |
| 1527 | // -1 means no longjmp happened, continue normally (will hit the default |
| 1528 | // switch case). 0 means a longjmp that is not ours to handle, needs a |
| 1529 | // rethrow. Otherwise the index is the same as the index in P+1 (to avoid |
| 1530 | // 0). |
| 1531 | for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { |
| 1532 | SI->addCase(OnVal: IRB.getInt32(C: I + 1), Dest: SetjmpRetPHIs[I]->getParent()); |
| 1533 | SetjmpRetPHIs[I]->addIncoming(V: LongjmpResult, BB: EndBB); |
| 1534 | } |
| 1535 | |
| 1536 | // We are splitting the block here, and must continue to find other calls |
| 1537 | // in the block - which is now split. so continue to traverse in the Tail |
| 1538 | BBs.push_back(x: Tail); |
| 1539 | } |
| 1540 | } |
| 1541 | |
| 1542 | for (Instruction *I : ToErase) |
| 1543 | I->eraseFromParent(); |
| 1544 | } |
| 1545 | |
| 1546 | static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) { |
| 1547 | for (const User *U : CPI->users()) |
| 1548 | if (const auto *CRI = dyn_cast<CleanupReturnInst>(Val: U)) |
| 1549 | return CRI->getUnwindDest(); |
| 1550 | return nullptr; |
| 1551 | } |
| 1552 | |
| 1553 | // Create a catchpad in which we catch a longjmp's env and val arguments, test |
| 1554 | // if the longjmp corresponds to one of setjmps in the current function, and if |
| 1555 | // so, jump to the setjmp dispatch BB from which we go to one of post-setjmp |
| 1556 | // BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at |
| 1557 | // top of the file for details. |
| 1558 | void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( |
| 1559 | Function &F, Instruction *FunctionInvocationId, |
| 1560 | SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { |
| 1561 | Module &M = *F.getParent(); |
| 1562 | LLVMContext &C = F.getContext(); |
| 1563 | IRBuilder<> IRB(C); |
| 1564 | |
| 1565 | // A function with catchswitch/catchpad instruction should have a personality |
| 1566 | // function attached to it. Search for the wasm personality function, and if |
| 1567 | // it exists, use it, and if it doesn't, create a dummy personality function. |
| 1568 | // (SjLj is not going to call it anyway.) |
| 1569 | if (!F.hasPersonalityFn()) { |
| 1570 | StringRef PersName = getEHPersonalityName(Pers: EHPersonality::Wasm_CXX); |
| 1571 | FunctionType *PersType = |
| 1572 | FunctionType::get(Result: IRB.getInt32Ty(), /* isVarArg */ true); |
| 1573 | Value *PersF = M.getOrInsertFunction(Name: PersName, T: PersType).getCallee(); |
| 1574 | F.setPersonalityFn( |
| 1575 | cast<Constant>(Val: IRB.CreateBitCast(V: PersF, DestTy: IRB.getPtrTy()))); |
| 1576 | } |
| 1577 | |
| 1578 | // Use the entry BB's debugloc as a fallback |
| 1579 | BasicBlock *Entry = &F.getEntryBlock(); |
| 1580 | DebugLoc FirstDL = getOrCreateDebugLoc(InsertBefore: &*Entry->begin(), SP: F.getSubprogram()); |
| 1581 | IRB.SetCurrentDebugLocation(FirstDL); |
| 1582 | |
| 1583 | // Add setjmp.dispatch BB right after the entry block. Because we have |
| 1584 | // initialized functionInvocationId in the entry block and split the |
| 1585 | // rest into another BB, here 'OrigEntry' is the function's original entry |
| 1586 | // block before the transformation. |
| 1587 | // |
| 1588 | // entry: |
| 1589 | // functionInvocationId initialization |
| 1590 | // setjmp.dispatch: |
| 1591 | // switch will be inserted here later |
| 1592 | // entry.split: (OrigEntry) |
| 1593 | // the original function starts here |
| 1594 | BasicBlock *OrigEntry = Entry->getNextNode(); |
| 1595 | BasicBlock *SetjmpDispatchBB = |
| 1596 | BasicBlock::Create(Context&: C, Name: "setjmp.dispatch" , Parent: &F, InsertBefore: OrigEntry); |
| 1597 | cast<BranchInst>(Val: Entry->getTerminator())->setSuccessor(idx: 0, NewSucc: SetjmpDispatchBB); |
| 1598 | |
| 1599 | // Create catch.dispatch.longjmp BB and a catchswitch instruction |
| 1600 | BasicBlock *CatchDispatchLongjmpBB = |
| 1601 | BasicBlock::Create(Context&: C, Name: "catch.dispatch.longjmp" , Parent: &F); |
| 1602 | IRB.SetInsertPoint(CatchDispatchLongjmpBB); |
| 1603 | CatchSwitchInst *CatchSwitchLongjmp = |
| 1604 | IRB.CreateCatchSwitch(ParentPad: ConstantTokenNone::get(Context&: C), UnwindBB: nullptr, NumHandlers: 1); |
| 1605 | |
| 1606 | // Create catch.longjmp BB and a catchpad instruction |
| 1607 | BasicBlock *CatchLongjmpBB = BasicBlock::Create(Context&: C, Name: "catch.longjmp" , Parent: &F); |
| 1608 | CatchSwitchLongjmp->addHandler(Dest: CatchLongjmpBB); |
| 1609 | IRB.SetInsertPoint(CatchLongjmpBB); |
| 1610 | CatchPadInst *CatchPad = IRB.CreateCatchPad(ParentPad: CatchSwitchLongjmp, Args: {}); |
| 1611 | |
| 1612 | // Wasm throw and catch instructions can throw and catch multiple values, but |
| 1613 | // that requires multivalue support in the toolchain, which is currently not |
| 1614 | // very reliable. We instead throw and catch a pointer to a struct value of |
| 1615 | // type 'struct __WasmLongjmpArgs', which is defined in Emscripten. |
| 1616 | Instruction *LongjmpArgs = |
| 1617 | IRB.CreateCall(Callee: CatchF, Args: {IRB.getInt32(C: WebAssembly::C_LONGJMP)}, Name: "thrown" ); |
| 1618 | Value *EnvField = |
| 1619 | IRB.CreateConstGEP2_32(Ty: LongjmpArgsTy, Ptr: LongjmpArgs, Idx0: 0, Idx1: 0, Name: "env_gep" ); |
| 1620 | Value *ValField = |
| 1621 | IRB.CreateConstGEP2_32(Ty: LongjmpArgsTy, Ptr: LongjmpArgs, Idx0: 0, Idx1: 1, Name: "val_gep" ); |
| 1622 | // void *env = __wasm_longjmp_args.env; |
| 1623 | Instruction *Env = IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: EnvField, Name: "env" ); |
| 1624 | // int val = __wasm_longjmp_args.val; |
| 1625 | Instruction *Val = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: ValField, Name: "val" ); |
| 1626 | |
| 1627 | // %label = __wasm_setjmp_test(%env, functionInvocatinoId); |
| 1628 | // if (%label == 0) |
| 1629 | // __wasm_longjmp(%env, %val) |
| 1630 | // catchret to %setjmp.dispatch |
| 1631 | BasicBlock *ThenBB = BasicBlock::Create(Context&: C, Name: "if.then" , Parent: &F); |
| 1632 | BasicBlock *EndBB = BasicBlock::Create(Context&: C, Name: "if.end" , Parent: &F); |
| 1633 | Value *EnvP = IRB.CreateBitCast(V: Env, DestTy: getAddrPtrType(M: &M), Name: "env.p" ); |
| 1634 | Value *Label = IRB.CreateCall(Callee: WasmSetjmpTestF, Args: {EnvP, FunctionInvocationId}, |
| 1635 | OpBundles: OperandBundleDef("funclet" , CatchPad), Name: "label" ); |
| 1636 | Value *Cmp = IRB.CreateICmpEQ(LHS: Label, RHS: IRB.getInt32(C: 0)); |
| 1637 | IRB.CreateCondBr(Cond: Cmp, True: ThenBB, False: EndBB); |
| 1638 | |
| 1639 | IRB.SetInsertPoint(ThenBB); |
| 1640 | CallInst *WasmLongjmpCI = IRB.CreateCall( |
| 1641 | Callee: WasmLongjmpF, Args: {Env, Val}, OpBundles: OperandBundleDef("funclet" , CatchPad)); |
| 1642 | IRB.CreateUnreachable(); |
| 1643 | |
| 1644 | IRB.SetInsertPoint(EndBB); |
| 1645 | // Jump to setjmp.dispatch block |
| 1646 | IRB.CreateCatchRet(CatchPad, BB: SetjmpDispatchBB); |
| 1647 | |
| 1648 | // Go back to setjmp.dispatch BB |
| 1649 | // setjmp.dispatch: |
| 1650 | // switch %label { |
| 1651 | // label 1: goto post-setjmp BB 1 |
| 1652 | // label 2: goto post-setjmp BB 2 |
| 1653 | // ... |
| 1654 | // default: goto splitted next BB |
| 1655 | // } |
| 1656 | IRB.SetInsertPoint(SetjmpDispatchBB); |
| 1657 | PHINode *LabelPHI = IRB.CreatePHI(Ty: IRB.getInt32Ty(), NumReservedValues: 2, Name: "label.phi" ); |
| 1658 | LabelPHI->addIncoming(V: Label, BB: EndBB); |
| 1659 | LabelPHI->addIncoming(V: IRB.getInt32(C: -1), BB: Entry); |
| 1660 | SwitchInst *SI = IRB.CreateSwitch(V: LabelPHI, Dest: OrigEntry, NumCases: SetjmpRetPHIs.size()); |
| 1661 | // -1 means no longjmp happened, continue normally (will hit the default |
| 1662 | // switch case). 0 means a longjmp that is not ours to handle, needs a |
| 1663 | // rethrow. Otherwise the index is the same as the index in P+1 (to avoid |
| 1664 | // 0). |
| 1665 | for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { |
| 1666 | SI->addCase(OnVal: IRB.getInt32(C: I + 1), Dest: SetjmpRetPHIs[I]->getParent()); |
| 1667 | SetjmpRetPHIs[I]->addIncoming(V: Val, BB: SetjmpDispatchBB); |
| 1668 | } |
| 1669 | |
| 1670 | // Convert all longjmpable call instructions to invokes that unwind to the |
| 1671 | // newly created catch.dispatch.longjmp BB. |
| 1672 | SmallVector<CallInst *, 64> LongjmpableCalls; |
| 1673 | for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) { |
| 1674 | for (auto &I : *BB) { |
| 1675 | auto *CI = dyn_cast<CallInst>(Val: &I); |
| 1676 | if (!CI) |
| 1677 | continue; |
| 1678 | const Value *Callee = CI->getCalledOperand(); |
| 1679 | if (!canLongjmp(Callee)) |
| 1680 | continue; |
| 1681 | if (isEmAsmCall(Callee)) |
| 1682 | report_fatal_error(reason: "Cannot use EM_ASM* alongside setjmp/longjmp in " + |
| 1683 | F.getName() + |
| 1684 | ". Please consider using EM_JS, or move the " |
| 1685 | "EM_ASM into another function." , |
| 1686 | gen_crash_diag: false); |
| 1687 | // This is __wasm_longjmp() call we inserted in this function, which |
| 1688 | // rethrows the longjmp when the longjmp does not correspond to one of |
| 1689 | // setjmps in this function. We should not convert this call to an invoke. |
| 1690 | if (CI == WasmLongjmpCI) |
| 1691 | continue; |
| 1692 | LongjmpableCalls.push_back(Elt: CI); |
| 1693 | } |
| 1694 | } |
| 1695 | |
| 1696 | SmallDenseMap<BasicBlock *, SmallSetVector<BasicBlock *, 4>, 4> |
| 1697 | UnwindDestToNewPreds; |
| 1698 | for (auto *CI : LongjmpableCalls) { |
| 1699 | // Even if the callee function has attribute 'nounwind', which is true for |
| 1700 | // all C functions, it can longjmp, which means it can throw a Wasm |
| 1701 | // exception now. |
| 1702 | CI->removeFnAttr(Kind: Attribute::NoUnwind); |
| 1703 | if (Function *CalleeF = CI->getCalledFunction()) |
| 1704 | CalleeF->removeFnAttr(Kind: Attribute::NoUnwind); |
| 1705 | |
| 1706 | // Change it to an invoke and make it unwind to the catch.dispatch.longjmp |
| 1707 | // BB. If the call is enclosed in another catchpad/cleanuppad scope, unwind |
| 1708 | // to its parent pad's unwind destination instead to preserve the scope |
| 1709 | // structure. It will eventually unwind to the catch.dispatch.longjmp. |
| 1710 | BasicBlock *UnwindDest = nullptr; |
| 1711 | if (auto Bundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
| 1712 | Instruction *FromPad = cast<Instruction>(Val: Bundle->Inputs[0]); |
| 1713 | while (!UnwindDest) { |
| 1714 | if (auto *CPI = dyn_cast<CatchPadInst>(Val: FromPad)) { |
| 1715 | UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); |
| 1716 | break; |
| 1717 | } |
| 1718 | if (auto *CPI = dyn_cast<CleanupPadInst>(Val: FromPad)) { |
| 1719 | // getCleanupRetUnwindDest() can return nullptr when |
| 1720 | // 1. This cleanuppad's matching cleanupret uwninds to caller |
| 1721 | // 2. There is no matching cleanupret because it ends with |
| 1722 | // unreachable. |
| 1723 | // In case of 2, we need to traverse the parent pad chain. |
| 1724 | UnwindDest = getCleanupRetUnwindDest(CPI); |
| 1725 | Value *ParentPad = CPI->getParentPad(); |
| 1726 | if (isa<ConstantTokenNone>(Val: ParentPad)) |
| 1727 | break; |
| 1728 | FromPad = cast<Instruction>(Val: ParentPad); |
| 1729 | } |
| 1730 | } |
| 1731 | } |
| 1732 | if (!UnwindDest) |
| 1733 | UnwindDest = CatchDispatchLongjmpBB; |
| 1734 | // Because we are changing a longjmpable call to an invoke, its unwind |
| 1735 | // destination can be an existing EH pad that already have phis, and the BB |
| 1736 | // with the newly created invoke will become a new predecessor of that EH |
| 1737 | // pad. In this case we need to add the new predecessor to those phis. |
| 1738 | UnwindDestToNewPreds[UnwindDest].insert(X: CI->getParent()); |
| 1739 | changeToInvokeAndSplitBasicBlock(CI, UnwindEdge: UnwindDest); |
| 1740 | } |
| 1741 | |
| 1742 | SmallVector<Instruction *, 16> ToErase; |
| 1743 | for (auto &BB : F) { |
| 1744 | if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: BB.getFirstNonPHIIt())) { |
| 1745 | if (CSI != CatchSwitchLongjmp && CSI->unwindsToCaller()) { |
| 1746 | IRB.SetInsertPoint(CSI); |
| 1747 | ToErase.push_back(Elt: CSI); |
| 1748 | auto *NewCSI = IRB.CreateCatchSwitch(ParentPad: CSI->getParentPad(), |
| 1749 | UnwindBB: CatchDispatchLongjmpBB, NumHandlers: 1); |
| 1750 | NewCSI->addHandler(Dest: *CSI->handler_begin()); |
| 1751 | NewCSI->takeName(V: CSI); |
| 1752 | CSI->replaceAllUsesWith(V: NewCSI); |
| 1753 | } |
| 1754 | } |
| 1755 | |
| 1756 | if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB.getTerminator())) { |
| 1757 | if (CRI->unwindsToCaller()) { |
| 1758 | IRB.SetInsertPoint(CRI); |
| 1759 | ToErase.push_back(Elt: CRI); |
| 1760 | IRB.CreateCleanupRet(CleanupPad: CRI->getCleanupPad(), UnwindBB: CatchDispatchLongjmpBB); |
| 1761 | } |
| 1762 | } |
| 1763 | } |
| 1764 | |
| 1765 | for (Instruction *I : ToErase) |
| 1766 | I->eraseFromParent(); |
| 1767 | |
| 1768 | // Add entries for new predecessors to phis in unwind destinations. We use |
| 1769 | // 'poison' as a placeholder value. We should make sure the phis have a valid |
| 1770 | // set of predecessors before running SSAUpdater, because SSAUpdater |
| 1771 | // internally can use existing phis to gather predecessor info rather than |
| 1772 | // scanning the actual CFG (See FindPredecessorBlocks in SSAUpdater.cpp for |
| 1773 | // details). |
| 1774 | for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) { |
| 1775 | for (PHINode &PN : UnwindDest->phis()) { |
| 1776 | for (auto *NewPred : NewPreds) { |
| 1777 | assert(PN.getBasicBlockIndex(NewPred) == -1); |
| 1778 | PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB: NewPred); |
| 1779 | } |
| 1780 | } |
| 1781 | } |
| 1782 | |
| 1783 | // For unwind destinations for newly added invokes to longjmpable functions, |
| 1784 | // calculate incoming values for the newly added predecessors using |
| 1785 | // SSAUpdater. We add existing values in the phis to SSAUpdater as available |
| 1786 | // values and let it calculate what the value should be at the end of new |
| 1787 | // incoming blocks. |
| 1788 | for (auto &[UnwindDest, NewPreds] : UnwindDestToNewPreds) { |
| 1789 | for (PHINode &PN : UnwindDest->phis()) { |
| 1790 | SSAUpdater SSA; |
| 1791 | SSA.Initialize(Ty: PN.getType(), Name: PN.getName()); |
| 1792 | for (unsigned Idx = 0, E = PN.getNumIncomingValues(); Idx != E; ++Idx) { |
| 1793 | if (NewPreds.contains(key: PN.getIncomingBlock(i: Idx))) |
| 1794 | continue; |
| 1795 | Value *V = PN.getIncomingValue(i: Idx); |
| 1796 | if (auto *II = dyn_cast<InvokeInst>(Val: V)) |
| 1797 | SSA.AddAvailableValue(BB: II->getNormalDest(), V: II); |
| 1798 | else if (auto *I = dyn_cast<Instruction>(Val: V)) |
| 1799 | SSA.AddAvailableValue(BB: I->getParent(), V: I); |
| 1800 | else |
| 1801 | SSA.AddAvailableValue(BB: PN.getIncomingBlock(i: Idx), V); |
| 1802 | } |
| 1803 | for (auto *NewPred : NewPreds) |
| 1804 | PN.setIncomingValueForBlock(BB: NewPred, V: SSA.GetValueAtEndOfBlock(BB: NewPred)); |
| 1805 | assert(PN.isComplete()); |
| 1806 | } |
| 1807 | } |
| 1808 | } |
| 1809 | |