1 | //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This pass builds the coroutine frame and outlines resume and destroy parts |
9 | // of the coroutine into separate functions. |
10 | // |
11 | // We present a coroutine to an LLVM as an ordinary function with suspension |
12 | // points marked up with intrinsics. We let the optimizer party on the coroutine |
13 | // as a single function for as long as possible. Shortly before the coroutine is |
14 | // eligible to be inlined into its callers, we split up the coroutine into parts |
15 | // corresponding to an initial, resume and destroy invocations of the coroutine, |
16 | // add them to the current SCC and restart the IPO pipeline to optimize the |
17 | // coroutine subfunctions we extracted before proceeding to the caller of the |
18 | // coroutine. |
19 | //===----------------------------------------------------------------------===// |
20 | |
21 | #include "llvm/Transforms/Coroutines/CoroSplit.h" |
22 | #include "CoroInstr.h" |
23 | #include "CoroInternal.h" |
24 | #include "llvm/ADT/DenseMap.h" |
25 | #include "llvm/ADT/PriorityWorklist.h" |
26 | #include "llvm/ADT/SmallPtrSet.h" |
27 | #include "llvm/ADT/SmallVector.h" |
28 | #include "llvm/ADT/StringRef.h" |
29 | #include "llvm/ADT/Twine.h" |
30 | #include "llvm/Analysis/CFG.h" |
31 | #include "llvm/Analysis/CallGraph.h" |
32 | #include "llvm/Analysis/ConstantFolding.h" |
33 | #include "llvm/Analysis/LazyCallGraph.h" |
34 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
35 | #include "llvm/Analysis/TargetTransformInfo.h" |
36 | #include "llvm/BinaryFormat/Dwarf.h" |
37 | #include "llvm/IR/Argument.h" |
38 | #include "llvm/IR/Attributes.h" |
39 | #include "llvm/IR/BasicBlock.h" |
40 | #include "llvm/IR/CFG.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/Constants.h" |
43 | #include "llvm/IR/DataLayout.h" |
44 | #include "llvm/IR/DerivedTypes.h" |
45 | #include "llvm/IR/Dominators.h" |
46 | #include "llvm/IR/Function.h" |
47 | #include "llvm/IR/GlobalValue.h" |
48 | #include "llvm/IR/GlobalVariable.h" |
49 | #include "llvm/IR/IRBuilder.h" |
50 | #include "llvm/IR/InstIterator.h" |
51 | #include "llvm/IR/InstrTypes.h" |
52 | #include "llvm/IR/Instruction.h" |
53 | #include "llvm/IR/Instructions.h" |
54 | #include "llvm/IR/IntrinsicInst.h" |
55 | #include "llvm/IR/LLVMContext.h" |
56 | #include "llvm/IR/Module.h" |
57 | #include "llvm/IR/Type.h" |
58 | #include "llvm/IR/Value.h" |
59 | #include "llvm/IR/Verifier.h" |
60 | #include "llvm/Support/Casting.h" |
61 | #include "llvm/Support/Debug.h" |
62 | #include "llvm/Support/PrettyStackTrace.h" |
63 | #include "llvm/Support/raw_ostream.h" |
64 | #include "llvm/Transforms/Scalar.h" |
65 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
66 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" |
67 | #include "llvm/Transforms/Utils/Cloning.h" |
68 | #include "llvm/Transforms/Utils/Local.h" |
69 | #include "llvm/Transforms/Utils/ValueMapper.h" |
70 | #include <cassert> |
71 | #include <cstddef> |
72 | #include <cstdint> |
73 | #include <initializer_list> |
74 | #include <iterator> |
75 | |
76 | using namespace llvm; |
77 | |
78 | #define DEBUG_TYPE "coro-split" |
79 | |
80 | namespace { |
81 | |
82 | /// A little helper class for building |
83 | class CoroCloner { |
84 | public: |
85 | enum class Kind { |
86 | /// The shared resume function for a switch lowering. |
87 | SwitchResume, |
88 | |
89 | /// The shared unwind function for a switch lowering. |
90 | SwitchUnwind, |
91 | |
92 | /// The shared cleanup function for a switch lowering. |
93 | SwitchCleanup, |
94 | |
95 | /// An individual continuation function. |
96 | Continuation, |
97 | |
98 | /// An async resume function. |
99 | Async, |
100 | }; |
101 | |
102 | private: |
103 | Function &OrigF; |
104 | Function *NewF; |
105 | const Twine &Suffix; |
106 | coro::Shape &Shape; |
107 | Kind FKind; |
108 | ValueToValueMapTy VMap; |
109 | IRBuilder<> Builder; |
110 | Value *NewFramePtr = nullptr; |
111 | |
112 | /// The active suspend instruction; meaningful only for continuation and async |
113 | /// ABIs. |
114 | AnyCoroSuspendInst *ActiveSuspend = nullptr; |
115 | |
116 | TargetTransformInfo &TTI; |
117 | |
118 | public: |
119 | /// Create a cloner for a switch lowering. |
120 | CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, |
121 | Kind FKind, TargetTransformInfo &TTI) |
122 | : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind), |
123 | Builder(OrigF.getContext()), TTI(TTI) { |
124 | assert(Shape.ABI == coro::ABI::Switch); |
125 | } |
126 | |
127 | /// Create a cloner for a continuation lowering. |
128 | CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, |
129 | Function *NewF, AnyCoroSuspendInst *ActiveSuspend, |
130 | TargetTransformInfo &TTI) |
131 | : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), |
132 | FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), |
133 | Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) { |
134 | assert(Shape.ABI == coro::ABI::Retcon || |
135 | Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); |
136 | assert(NewF && "need existing function for continuation" ); |
137 | assert(ActiveSuspend && "need active suspend point for continuation" ); |
138 | } |
139 | |
140 | Function *getFunction() const { |
141 | assert(NewF != nullptr && "declaration not yet set" ); |
142 | return NewF; |
143 | } |
144 | |
145 | void create(); |
146 | |
147 | private: |
148 | bool isSwitchDestroyFunction() { |
149 | switch (FKind) { |
150 | case Kind::Async: |
151 | case Kind::Continuation: |
152 | case Kind::SwitchResume: |
153 | return false; |
154 | case Kind::SwitchUnwind: |
155 | case Kind::SwitchCleanup: |
156 | return true; |
157 | } |
158 | llvm_unreachable("Unknown CoroCloner::Kind enum" ); |
159 | } |
160 | |
161 | void replaceEntryBlock(); |
162 | Value *deriveNewFramePointer(); |
163 | void replaceRetconOrAsyncSuspendUses(); |
164 | void replaceCoroSuspends(); |
165 | void replaceCoroEnds(); |
166 | void replaceSwiftErrorOps(); |
167 | void salvageDebugInfo(); |
168 | void handleFinalSuspend(); |
169 | }; |
170 | |
171 | } // end anonymous namespace |
172 | |
173 | // FIXME: |
174 | // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape |
175 | // and it is known that other transformations, for example, sanitizers |
176 | // won't lead to incorrect code. |
177 | static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, |
178 | coro::Shape &Shape) { |
179 | auto Wrapper = CB->getWrapperFunction(); |
180 | auto Awaiter = CB->getAwaiter(); |
181 | auto FramePtr = CB->getFrame(); |
182 | |
183 | Builder.SetInsertPoint(CB); |
184 | |
185 | CallBase *NewCall = nullptr; |
186 | // await_suspend has only 2 parameters, awaiter and handle. |
187 | // Copy parameter attributes from the intrinsic call, but remove the last, |
188 | // because the last parameter now becomes the function that is being called. |
189 | AttributeList NewAttributes = |
190 | CB->getAttributes().removeParamAttributes(C&: CB->getContext(), ArgNo: 2); |
191 | |
192 | if (auto Invoke = dyn_cast<InvokeInst>(Val: CB)) { |
193 | auto WrapperInvoke = |
194 | Builder.CreateInvoke(Callee: Wrapper, NormalDest: Invoke->getNormalDest(), |
195 | UnwindDest: Invoke->getUnwindDest(), Args: {Awaiter, FramePtr}); |
196 | |
197 | WrapperInvoke->setCallingConv(Invoke->getCallingConv()); |
198 | std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(), |
199 | WrapperInvoke->bundle_op_info_begin()); |
200 | WrapperInvoke->setAttributes(NewAttributes); |
201 | WrapperInvoke->setDebugLoc(Invoke->getDebugLoc()); |
202 | NewCall = WrapperInvoke; |
203 | } else if (auto Call = dyn_cast<CallInst>(Val: CB)) { |
204 | auto WrapperCall = Builder.CreateCall(Callee: Wrapper, Args: {Awaiter, FramePtr}); |
205 | |
206 | WrapperCall->setAttributes(NewAttributes); |
207 | WrapperCall->setDebugLoc(Call->getDebugLoc()); |
208 | NewCall = WrapperCall; |
209 | } else { |
210 | llvm_unreachable("Unexpected coro_await_suspend invocation method" ); |
211 | } |
212 | |
213 | if (CB->getCalledFunction()->getIntrinsicID() == |
214 | Intrinsic::coro_await_suspend_handle) { |
215 | // Follow the lowered await_suspend call above with a lowered resume call |
216 | // to the returned coroutine. |
217 | if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) { |
218 | // If the await_suspend call is an invoke, we continue in the next block. |
219 | Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); |
220 | } |
221 | |
222 | coro::LowererBase LB(*Wrapper->getParent()); |
223 | auto *ResumeAddr = LB.makeSubFnCall(Arg: NewCall, Index: CoroSubFnInst::ResumeIndex, |
224 | InsertPt: &*Builder.GetInsertPoint()); |
225 | |
226 | LLVMContext &Ctx = Builder.getContext(); |
227 | FunctionType *ResumeTy = FunctionType::get( |
228 | Result: Type::getVoidTy(C&: Ctx), Params: PointerType::getUnqual(C&: Ctx), isVarArg: false); |
229 | auto *ResumeCall = Builder.CreateCall(FTy: ResumeTy, Callee: ResumeAddr, Args: {NewCall}); |
230 | ResumeCall->setCallingConv(CallingConv::Fast); |
231 | |
232 | // We can't insert the 'ret' instruction and adjust the cc until the |
233 | // function has been split, so remember this for later. |
234 | Shape.SymmetricTransfers.push_back(Elt: ResumeCall); |
235 | |
236 | NewCall = ResumeCall; |
237 | } |
238 | |
239 | CB->replaceAllUsesWith(V: NewCall); |
240 | CB->eraseFromParent(); |
241 | } |
242 | |
243 | static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { |
244 | IRBuilder<> Builder(F.getContext()); |
245 | for (auto *AWS : Shape.CoroAwaitSuspends) |
246 | lowerAwaitSuspend(Builder, CB: AWS, Shape); |
247 | } |
248 | |
249 | static void maybeFreeRetconStorage(IRBuilder<> &Builder, |
250 | const coro::Shape &Shape, Value *FramePtr, |
251 | CallGraph *CG) { |
252 | assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); |
253 | if (Shape.RetconLowering.IsFrameInlineInStorage) |
254 | return; |
255 | |
256 | Shape.emitDealloc(Builder, Ptr: FramePtr, CG); |
257 | } |
258 | |
259 | /// Replace an llvm.coro.end.async. |
260 | /// Will inline the must tail call function call if there is one. |
261 | /// \returns true if cleanup of the coro.end block is needed, false otherwise. |
262 | static bool replaceCoroEndAsync(AnyCoroEndInst *End) { |
263 | IRBuilder<> Builder(End); |
264 | |
265 | auto *EndAsync = dyn_cast<CoroAsyncEndInst>(Val: End); |
266 | if (!EndAsync) { |
267 | Builder.CreateRetVoid(); |
268 | return true /*needs cleanup of coro.end block*/; |
269 | } |
270 | |
271 | auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); |
272 | if (!MustTailCallFunc) { |
273 | Builder.CreateRetVoid(); |
274 | return true /*needs cleanup of coro.end block*/; |
275 | } |
276 | |
277 | // Move the must tail call from the predecessor block into the end block. |
278 | auto *CoroEndBlock = End->getParent(); |
279 | auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); |
280 | assert(MustTailCallFuncBlock && "Must have a single predecessor block" ); |
281 | auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); |
282 | auto *MustTailCall = cast<CallInst>(Val: &*std::prev(x: It)); |
283 | CoroEndBlock->splice(ToIt: End->getIterator(), FromBB: MustTailCallFuncBlock, |
284 | FromIt: MustTailCall->getIterator()); |
285 | |
286 | // Insert the return instruction. |
287 | Builder.SetInsertPoint(End); |
288 | Builder.CreateRetVoid(); |
289 | InlineFunctionInfo FnInfo; |
290 | |
291 | // Remove the rest of the block, by splitting it into an unreachable block. |
292 | auto *BB = End->getParent(); |
293 | BB->splitBasicBlock(I: End); |
294 | BB->getTerminator()->eraseFromParent(); |
295 | |
296 | auto InlineRes = InlineFunction(CB&: *MustTailCall, IFI&: FnInfo); |
297 | assert(InlineRes.isSuccess() && "Expected inlining to succeed" ); |
298 | (void)InlineRes; |
299 | |
300 | // We have cleaned up the coro.end block above. |
301 | return false; |
302 | } |
303 | |
304 | /// Replace a non-unwind call to llvm.coro.end. |
305 | static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, |
306 | const coro::Shape &Shape, Value *FramePtr, |
307 | bool InResume, CallGraph *CG) { |
308 | // Start inserting right before the coro.end. |
309 | IRBuilder<> Builder(End); |
310 | |
311 | // Create the return instruction. |
312 | switch (Shape.ABI) { |
313 | // The cloned functions in switch-lowering always return void. |
314 | case coro::ABI::Switch: |
315 | assert(!cast<CoroEndInst>(End)->hasResults() && |
316 | "switch coroutine should not return any values" ); |
317 | // coro.end doesn't immediately end the coroutine in the main function |
318 | // in this lowering, because we need to deallocate the coroutine. |
319 | if (!InResume) |
320 | return; |
321 | Builder.CreateRetVoid(); |
322 | break; |
323 | |
324 | // In async lowering this returns. |
325 | case coro::ABI::Async: { |
326 | bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); |
327 | if (!CoroEndBlockNeedsCleanup) |
328 | return; |
329 | break; |
330 | } |
331 | |
332 | // In unique continuation lowering, the continuations always return void. |
333 | // But we may have implicitly allocated storage. |
334 | case coro::ABI::RetconOnce: { |
335 | maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
336 | auto *CoroEnd = cast<CoroEndInst>(Val: End); |
337 | auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); |
338 | |
339 | if (!CoroEnd->hasResults()) { |
340 | assert(RetTy->isVoidTy()); |
341 | Builder.CreateRetVoid(); |
342 | break; |
343 | } |
344 | |
345 | auto *CoroResults = CoroEnd->getResults(); |
346 | unsigned NumReturns = CoroResults->numReturns(); |
347 | |
348 | if (auto *RetStructTy = dyn_cast<StructType>(Val: RetTy)) { |
349 | assert(RetStructTy->getNumElements() == NumReturns && |
350 | "numbers of returns should match resume function singature" ); |
351 | Value *ReturnValue = UndefValue::get(T: RetStructTy); |
352 | unsigned Idx = 0; |
353 | for (Value *RetValEl : CoroResults->return_values()) |
354 | ReturnValue = Builder.CreateInsertValue(Agg: ReturnValue, Val: RetValEl, Idxs: Idx++); |
355 | Builder.CreateRet(V: ReturnValue); |
356 | } else if (NumReturns == 0) { |
357 | assert(RetTy->isVoidTy()); |
358 | Builder.CreateRetVoid(); |
359 | } else { |
360 | assert(NumReturns == 1); |
361 | Builder.CreateRet(V: *CoroResults->retval_begin()); |
362 | } |
363 | CoroResults->replaceAllUsesWith( |
364 | V: ConstantTokenNone::get(Context&: CoroResults->getContext())); |
365 | CoroResults->eraseFromParent(); |
366 | break; |
367 | } |
368 | |
369 | // In non-unique continuation lowering, we signal completion by returning |
370 | // a null continuation. |
371 | case coro::ABI::Retcon: { |
372 | assert(!cast<CoroEndInst>(End)->hasResults() && |
373 | "retcon coroutine should not return any values" ); |
374 | maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
375 | auto RetTy = Shape.getResumeFunctionType()->getReturnType(); |
376 | auto RetStructTy = dyn_cast<StructType>(Val: RetTy); |
377 | PointerType *ContinuationTy = |
378 | cast<PointerType>(Val: RetStructTy ? RetStructTy->getElementType(N: 0) : RetTy); |
379 | |
380 | Value *ReturnValue = ConstantPointerNull::get(T: ContinuationTy); |
381 | if (RetStructTy) { |
382 | ReturnValue = Builder.CreateInsertValue(Agg: UndefValue::get(T: RetStructTy), |
383 | Val: ReturnValue, Idxs: 0); |
384 | } |
385 | Builder.CreateRet(V: ReturnValue); |
386 | break; |
387 | } |
388 | } |
389 | |
390 | // Remove the rest of the block, by splitting it into an unreachable block. |
391 | auto *BB = End->getParent(); |
392 | BB->splitBasicBlock(I: End); |
393 | BB->getTerminator()->eraseFromParent(); |
394 | } |
395 | |
396 | // Mark a coroutine as done, which implies that the coroutine is finished and |
397 | // never get resumed. |
398 | // |
399 | // In resume-switched ABI, the done state is represented by storing zero in |
400 | // ResumeFnAddr. |
401 | // |
402 | // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the |
403 | // pointer to the frame in splitted function is not stored in `Shape`. |
404 | static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, |
405 | Value *FramePtr) { |
406 | assert( |
407 | Shape.ABI == coro::ABI::Switch && |
408 | "markCoroutineAsDone is only supported for Switch-Resumed ABI for now." ); |
409 | auto *GepIndex = Builder.CreateStructGEP( |
410 | Ty: Shape.FrameTy, Ptr: FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume, |
411 | Name: "ResumeFn.addr" ); |
412 | auto *NullPtr = ConstantPointerNull::get(T: cast<PointerType>( |
413 | Val: Shape.FrameTy->getTypeAtIndex(N: coro::Shape::SwitchFieldIndex::Resume))); |
414 | Builder.CreateStore(Val: NullPtr, Ptr: GepIndex); |
415 | |
416 | // If the coroutine don't have unwind coro end, we could omit the store to |
417 | // the final suspend point since we could infer the coroutine is suspended |
418 | // at the final suspend point by the nullness of ResumeFnAddr. |
419 | // However, we can't skip it if the coroutine have unwind coro end. Since |
420 | // the coroutine reaches unwind coro end is considered suspended at the |
421 | // final suspend point (the ResumeFnAddr is null) but in fact the coroutine |
422 | // didn't complete yet. We need the IndexVal for the final suspend point |
423 | // to make the states clear. |
424 | if (Shape.SwitchLowering.HasUnwindCoroEnd && |
425 | Shape.SwitchLowering.HasFinalSuspend) { |
426 | assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() && |
427 | "The final suspend should only live in the last position of " |
428 | "CoroSuspends." ); |
429 | ConstantInt *IndexVal = Shape.getIndex(Value: Shape.CoroSuspends.size() - 1); |
430 | auto *FinalIndex = Builder.CreateStructGEP( |
431 | Ty: Shape.FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr" ); |
432 | |
433 | Builder.CreateStore(Val: IndexVal, Ptr: FinalIndex); |
434 | } |
435 | } |
436 | |
437 | /// Replace an unwind call to llvm.coro.end. |
438 | static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, |
439 | Value *FramePtr, bool InResume, |
440 | CallGraph *CG) { |
441 | IRBuilder<> Builder(End); |
442 | |
443 | switch (Shape.ABI) { |
444 | // In switch-lowering, this does nothing in the main function. |
445 | case coro::ABI::Switch: { |
446 | // In C++'s specification, the coroutine should be marked as done |
447 | // if promise.unhandled_exception() throws. The frontend will |
448 | // call coro.end(true) along this path. |
449 | // |
450 | // FIXME: We should refactor this once there is other language |
451 | // which uses Switch-Resumed style other than C++. |
452 | markCoroutineAsDone(Builder, Shape, FramePtr); |
453 | if (!InResume) |
454 | return; |
455 | break; |
456 | } |
457 | // In async lowering this does nothing. |
458 | case coro::ABI::Async: |
459 | break; |
460 | // In continuation-lowering, this frees the continuation storage. |
461 | case coro::ABI::Retcon: |
462 | case coro::ABI::RetconOnce: |
463 | maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
464 | break; |
465 | } |
466 | |
467 | // If coro.end has an associated bundle, add cleanupret instruction. |
468 | if (auto Bundle = End->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
469 | auto *FromPad = cast<CleanupPadInst>(Val: Bundle->Inputs[0]); |
470 | auto *CleanupRet = Builder.CreateCleanupRet(CleanupPad: FromPad, UnwindBB: nullptr); |
471 | End->getParent()->splitBasicBlock(I: End); |
472 | CleanupRet->getParent()->getTerminator()->eraseFromParent(); |
473 | } |
474 | } |
475 | |
476 | static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, |
477 | Value *FramePtr, bool InResume, CallGraph *CG) { |
478 | if (End->isUnwind()) |
479 | replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); |
480 | else |
481 | replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); |
482 | |
483 | auto &Context = End->getContext(); |
484 | End->replaceAllUsesWith(V: InResume ? ConstantInt::getTrue(Context) |
485 | : ConstantInt::getFalse(Context)); |
486 | End->eraseFromParent(); |
487 | } |
488 | |
489 | // In the resume function, we remove the last case (when coro::Shape is built, |
490 | // the final suspend point (if present) is always the last element of |
491 | // CoroSuspends array) since it is an undefined behavior to resume a coroutine |
492 | // suspended at the final suspend point. |
493 | // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL |
494 | // and the coroutine doesn't suspend at the final suspend point actually (this |
495 | // is possible since the coroutine is considered suspended at the final suspend |
496 | // point if promise.unhandled_exception() exits via an exception), we can |
497 | // remove the last case. |
498 | void CoroCloner::handleFinalSuspend() { |
499 | assert(Shape.ABI == coro::ABI::Switch && |
500 | Shape.SwitchLowering.HasFinalSuspend); |
501 | |
502 | if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) |
503 | return; |
504 | |
505 | auto *Switch = cast<SwitchInst>(Val&: VMap[Shape.SwitchLowering.ResumeSwitch]); |
506 | auto FinalCaseIt = std::prev(x: Switch->case_end()); |
507 | BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); |
508 | Switch->removeCase(I: FinalCaseIt); |
509 | if (isSwitchDestroyFunction()) { |
510 | BasicBlock *OldSwitchBB = Switch->getParent(); |
511 | auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(I: Switch, BBName: "Switch" ); |
512 | Builder.SetInsertPoint(OldSwitchBB->getTerminator()); |
513 | |
514 | if (NewF->isCoroOnlyDestroyWhenComplete()) { |
515 | // When the coroutine can only be destroyed when complete, we don't need |
516 | // to generate code for other cases. |
517 | Builder.CreateBr(Dest: ResumeBB); |
518 | } else { |
519 | auto *GepIndex = Builder.CreateStructGEP( |
520 | Ty: Shape.FrameTy, Ptr: NewFramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume, |
521 | Name: "ResumeFn.addr" ); |
522 | auto *Load = |
523 | Builder.CreateLoad(Ty: Shape.getSwitchResumePointerType(), Ptr: GepIndex); |
524 | auto *Cond = Builder.CreateIsNull(Arg: Load); |
525 | Builder.CreateCondBr(Cond, True: ResumeBB, False: NewSwitchBB); |
526 | } |
527 | OldSwitchBB->getTerminator()->eraseFromParent(); |
528 | } |
529 | } |
530 | |
531 | static FunctionType * |
532 | getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { |
533 | auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Val: Suspend); |
534 | auto *StructTy = cast<StructType>(Val: AsyncSuspend->getType()); |
535 | auto &Context = Suspend->getParent()->getParent()->getContext(); |
536 | auto *VoidTy = Type::getVoidTy(C&: Context); |
537 | return FunctionType::get(Result: VoidTy, Params: StructTy->elements(), isVarArg: false); |
538 | } |
539 | |
540 | static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, |
541 | const Twine &Suffix, |
542 | Module::iterator InsertBefore, |
543 | AnyCoroSuspendInst *ActiveSuspend) { |
544 | Module *M = OrigF.getParent(); |
545 | auto *FnTy = (Shape.ABI != coro::ABI::Async) |
546 | ? Shape.getResumeFunctionType() |
547 | : getFunctionTypeFromAsyncSuspend(Suspend: ActiveSuspend); |
548 | |
549 | Function *NewF = |
550 | Function::Create(Ty: FnTy, Linkage: GlobalValue::LinkageTypes::InternalLinkage, |
551 | N: OrigF.getName() + Suffix); |
552 | |
553 | M->getFunctionList().insert(where: InsertBefore, New: NewF); |
554 | |
555 | return NewF; |
556 | } |
557 | |
558 | /// Replace uses of the active llvm.coro.suspend.retcon/async call with the |
559 | /// arguments to the continuation function. |
560 | /// |
561 | /// This assumes that the builder has a meaningful insertion point. |
562 | void CoroCloner::replaceRetconOrAsyncSuspendUses() { |
563 | assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || |
564 | Shape.ABI == coro::ABI::Async); |
565 | |
566 | auto NewS = VMap[ActiveSuspend]; |
567 | if (NewS->use_empty()) |
568 | return; |
569 | |
570 | // Copy out all the continuation arguments after the buffer pointer into |
571 | // an easily-indexed data structure for convenience. |
572 | SmallVector<Value *, 8> Args; |
573 | // The async ABI includes all arguments -- including the first argument. |
574 | bool IsAsyncABI = Shape.ABI == coro::ABI::Async; |
575 | for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(x: NewF->arg_begin()), |
576 | E = NewF->arg_end(); |
577 | I != E; ++I) |
578 | Args.push_back(Elt: &*I); |
579 | |
580 | // If the suspend returns a single scalar value, we can just do a simple |
581 | // replacement. |
582 | if (!isa<StructType>(Val: NewS->getType())) { |
583 | assert(Args.size() == 1); |
584 | NewS->replaceAllUsesWith(V: Args.front()); |
585 | return; |
586 | } |
587 | |
588 | // Try to peephole extracts of an aggregate return. |
589 | for (Use &U : llvm::make_early_inc_range(Range: NewS->uses())) { |
590 | auto *EVI = dyn_cast<ExtractValueInst>(Val: U.getUser()); |
591 | if (!EVI || EVI->getNumIndices() != 1) |
592 | continue; |
593 | |
594 | EVI->replaceAllUsesWith(V: Args[EVI->getIndices().front()]); |
595 | EVI->eraseFromParent(); |
596 | } |
597 | |
598 | // If we have no remaining uses, we're done. |
599 | if (NewS->use_empty()) |
600 | return; |
601 | |
602 | // Otherwise, we need to create an aggregate. |
603 | Value *Agg = PoisonValue::get(T: NewS->getType()); |
604 | for (size_t I = 0, E = Args.size(); I != E; ++I) |
605 | Agg = Builder.CreateInsertValue(Agg, Val: Args[I], Idxs: I); |
606 | |
607 | NewS->replaceAllUsesWith(V: Agg); |
608 | } |
609 | |
610 | void CoroCloner::replaceCoroSuspends() { |
611 | Value *SuspendResult; |
612 | |
613 | switch (Shape.ABI) { |
614 | // In switch lowering, replace coro.suspend with the appropriate value |
615 | // for the type of function we're extracting. |
616 | // Replacing coro.suspend with (0) will result in control flow proceeding to |
617 | // a resume label associated with a suspend point, replacing it with (1) will |
618 | // result in control flow proceeding to a cleanup label associated with this |
619 | // suspend point. |
620 | case coro::ABI::Switch: |
621 | SuspendResult = Builder.getInt8(C: isSwitchDestroyFunction() ? 1 : 0); |
622 | break; |
623 | |
624 | // In async lowering there are no uses of the result. |
625 | case coro::ABI::Async: |
626 | return; |
627 | |
628 | // In returned-continuation lowering, the arguments from earlier |
629 | // continuations are theoretically arbitrary, and they should have been |
630 | // spilled. |
631 | case coro::ABI::RetconOnce: |
632 | case coro::ABI::Retcon: |
633 | return; |
634 | } |
635 | |
636 | for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { |
637 | // The active suspend was handled earlier. |
638 | if (CS == ActiveSuspend) |
639 | continue; |
640 | |
641 | auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[CS]); |
642 | MappedCS->replaceAllUsesWith(V: SuspendResult); |
643 | MappedCS->eraseFromParent(); |
644 | } |
645 | } |
646 | |
647 | void CoroCloner::replaceCoroEnds() { |
648 | for (AnyCoroEndInst *CE : Shape.CoroEnds) { |
649 | // We use a null call graph because there's no call graph node for |
650 | // the cloned function yet. We'll just be rebuilding that later. |
651 | auto *NewCE = cast<AnyCoroEndInst>(Val&: VMap[CE]); |
652 | replaceCoroEnd(End: NewCE, Shape, FramePtr: NewFramePtr, /*in resume*/ InResume: true, CG: nullptr); |
653 | } |
654 | } |
655 | |
656 | static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, |
657 | ValueToValueMapTy *VMap) { |
658 | if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) |
659 | return; |
660 | Value *CachedSlot = nullptr; |
661 | auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { |
662 | if (CachedSlot) |
663 | return CachedSlot; |
664 | |
665 | // Check if the function has a swifterror argument. |
666 | for (auto &Arg : F.args()) { |
667 | if (Arg.isSwiftError()) { |
668 | CachedSlot = &Arg; |
669 | return &Arg; |
670 | } |
671 | } |
672 | |
673 | // Create a swifterror alloca. |
674 | IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); |
675 | auto Alloca = Builder.CreateAlloca(Ty: ValueTy); |
676 | Alloca->setSwiftError(true); |
677 | |
678 | CachedSlot = Alloca; |
679 | return Alloca; |
680 | }; |
681 | |
682 | for (CallInst *Op : Shape.SwiftErrorOps) { |
683 | auto MappedOp = VMap ? cast<CallInst>(Val&: (*VMap)[Op]) : Op; |
684 | IRBuilder<> Builder(MappedOp); |
685 | |
686 | // If there are no arguments, this is a 'get' operation. |
687 | Value *MappedResult; |
688 | if (Op->arg_empty()) { |
689 | auto ValueTy = Op->getType(); |
690 | auto Slot = getSwiftErrorSlot(ValueTy); |
691 | MappedResult = Builder.CreateLoad(Ty: ValueTy, Ptr: Slot); |
692 | } else { |
693 | assert(Op->arg_size() == 1); |
694 | auto Value = MappedOp->getArgOperand(i: 0); |
695 | auto ValueTy = Value->getType(); |
696 | auto Slot = getSwiftErrorSlot(ValueTy); |
697 | Builder.CreateStore(Val: Value, Ptr: Slot); |
698 | MappedResult = Slot; |
699 | } |
700 | |
701 | MappedOp->replaceAllUsesWith(V: MappedResult); |
702 | MappedOp->eraseFromParent(); |
703 | } |
704 | |
705 | // If we're updating the original function, we've invalidated SwiftErrorOps. |
706 | if (VMap == nullptr) { |
707 | Shape.SwiftErrorOps.clear(); |
708 | } |
709 | } |
710 | |
711 | /// Returns all DbgVariableIntrinsic in F. |
712 | static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, |
713 | SmallVector<DbgVariableRecord *>> |
714 | collectDbgVariableIntrinsics(Function &F) { |
715 | SmallVector<DbgVariableIntrinsic *, 8> Intrinsics; |
716 | SmallVector<DbgVariableRecord *> DbgVariableRecords; |
717 | for (auto &I : instructions(F)) { |
718 | for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange())) |
719 | DbgVariableRecords.push_back(Elt: &DVR); |
720 | if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I)) |
721 | Intrinsics.push_back(Elt: DVI); |
722 | } |
723 | return {Intrinsics, DbgVariableRecords}; |
724 | } |
725 | |
726 | void CoroCloner::replaceSwiftErrorOps() { |
727 | ::replaceSwiftErrorOps(F&: *NewF, Shape, VMap: &VMap); |
728 | } |
729 | |
730 | void CoroCloner::salvageDebugInfo() { |
731 | auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(F&: *NewF); |
732 | SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; |
733 | |
734 | // Only 64-bit ABIs have a register we can refer to with the entry value. |
735 | bool UseEntryValue = |
736 | llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit(); |
737 | for (DbgVariableIntrinsic *DVI : Worklist) |
738 | coro::salvageDebugInfo(ArgToAllocaMap, DVI&: *DVI, OptimizeFrame: Shape.OptimizeFrame, |
739 | IsEntryPoint: UseEntryValue); |
740 | for (DbgVariableRecord *DVR : DbgVariableRecords) |
741 | coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, OptimizeFrame: Shape.OptimizeFrame, |
742 | UseEntryValue); |
743 | |
744 | // Remove all salvaged dbg.declare intrinsics that became |
745 | // either unreachable or stale due to the CoroSplit transformation. |
746 | DominatorTree DomTree(*NewF); |
747 | auto IsUnreachableBlock = [&](BasicBlock *BB) { |
748 | return !isPotentiallyReachable(From: &NewF->getEntryBlock(), To: BB, ExclusionSet: nullptr, |
749 | DT: &DomTree); |
750 | }; |
751 | auto RemoveOne = [&](auto *DVI) { |
752 | if (IsUnreachableBlock(DVI->getParent())) |
753 | DVI->eraseFromParent(); |
754 | else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) { |
755 | // Count all non-debuginfo uses in reachable blocks. |
756 | unsigned Uses = 0; |
757 | for (auto *User : DVI->getVariableLocationOp(0)->users()) |
758 | if (auto *I = dyn_cast<Instruction>(User)) |
759 | if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) |
760 | ++Uses; |
761 | if (!Uses) |
762 | DVI->eraseFromParent(); |
763 | } |
764 | }; |
765 | for_each(Range&: Worklist, F: RemoveOne); |
766 | for_each(Range&: DbgVariableRecords, F: RemoveOne); |
767 | } |
768 | |
769 | void CoroCloner::replaceEntryBlock() { |
770 | // In the original function, the AllocaSpillBlock is a block immediately |
771 | // following the allocation of the frame object which defines GEPs for |
772 | // all the allocas that have been moved into the frame, and it ends by |
773 | // branching to the original beginning of the coroutine. Make this |
774 | // the entry block of the cloned function. |
775 | auto *Entry = cast<BasicBlock>(Val&: VMap[Shape.AllocaSpillBlock]); |
776 | auto *OldEntry = &NewF->getEntryBlock(); |
777 | Entry->setName("entry" + Suffix); |
778 | Entry->moveBefore(MovePos: OldEntry); |
779 | Entry->getTerminator()->eraseFromParent(); |
780 | |
781 | // Clear all predecessors of the new entry block. There should be |
782 | // exactly one predecessor, which we created when splitting out |
783 | // AllocaSpillBlock to begin with. |
784 | assert(Entry->hasOneUse()); |
785 | auto BranchToEntry = cast<BranchInst>(Val: Entry->user_back()); |
786 | assert(BranchToEntry->isUnconditional()); |
787 | Builder.SetInsertPoint(BranchToEntry); |
788 | Builder.CreateUnreachable(); |
789 | BranchToEntry->eraseFromParent(); |
790 | |
791 | // Branch from the entry to the appropriate place. |
792 | Builder.SetInsertPoint(Entry); |
793 | switch (Shape.ABI) { |
794 | case coro::ABI::Switch: { |
795 | // In switch-lowering, we built a resume-entry block in the original |
796 | // function. Make the entry block branch to this. |
797 | auto *SwitchBB = |
798 | cast<BasicBlock>(Val&: VMap[Shape.SwitchLowering.ResumeEntryBlock]); |
799 | Builder.CreateBr(Dest: SwitchBB); |
800 | break; |
801 | } |
802 | case coro::ABI::Async: |
803 | case coro::ABI::Retcon: |
804 | case coro::ABI::RetconOnce: { |
805 | // In continuation ABIs, we want to branch to immediately after the |
806 | // active suspend point. Earlier phases will have put the suspend in its |
807 | // own basic block, so just thread our jump directly to its successor. |
808 | assert((Shape.ABI == coro::ABI::Async && |
809 | isa<CoroSuspendAsyncInst>(ActiveSuspend)) || |
810 | ((Shape.ABI == coro::ABI::Retcon || |
811 | Shape.ABI == coro::ABI::RetconOnce) && |
812 | isa<CoroSuspendRetconInst>(ActiveSuspend))); |
813 | auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[ActiveSuspend]); |
814 | auto Branch = cast<BranchInst>(Val: MappedCS->getNextNode()); |
815 | assert(Branch->isUnconditional()); |
816 | Builder.CreateBr(Dest: Branch->getSuccessor(i: 0)); |
817 | break; |
818 | } |
819 | } |
820 | |
821 | // Any static alloca that's still being used but not reachable from the new |
822 | // entry needs to be moved to the new entry. |
823 | Function *F = OldEntry->getParent(); |
824 | DominatorTree DT{*F}; |
825 | for (Instruction &I : llvm::make_early_inc_range(Range: instructions(F))) { |
826 | auto *Alloca = dyn_cast<AllocaInst>(Val: &I); |
827 | if (!Alloca || I.use_empty()) |
828 | continue; |
829 | if (DT.isReachableFromEntry(A: I.getParent()) || |
830 | !isa<ConstantInt>(Val: Alloca->getArraySize())) |
831 | continue; |
832 | I.moveBefore(BB&: *Entry, I: Entry->getFirstInsertionPt()); |
833 | } |
834 | } |
835 | |
836 | /// Derive the value of the new frame pointer. |
837 | Value *CoroCloner::deriveNewFramePointer() { |
838 | // Builder should be inserting to the front of the new entry block. |
839 | |
840 | switch (Shape.ABI) { |
841 | // In switch-lowering, the argument is the frame pointer. |
842 | case coro::ABI::Switch: |
843 | return &*NewF->arg_begin(); |
844 | // In async-lowering, one of the arguments is an async context as determined |
845 | // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of |
846 | // the resume function from the async context projection function associated |
847 | // with the active suspend. The frame is located as a tail to the async |
848 | // context header. |
849 | case coro::ABI::Async: { |
850 | auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend); |
851 | auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; |
852 | auto *CalleeContext = NewF->getArg(i: ContextIdx); |
853 | auto *ProjectionFunc = |
854 | ActiveAsyncSuspend->getAsyncContextProjectionFunction(); |
855 | auto DbgLoc = |
856 | cast<CoroSuspendAsyncInst>(Val&: VMap[ActiveSuspend])->getDebugLoc(); |
857 | // Calling i8* (i8*) |
858 | auto *CallerContext = Builder.CreateCall(FTy: ProjectionFunc->getFunctionType(), |
859 | Callee: ProjectionFunc, Args: CalleeContext); |
860 | CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); |
861 | CallerContext->setDebugLoc(DbgLoc); |
862 | // The frame is located after the async_context header. |
863 | auto &Context = Builder.getContext(); |
864 | auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( |
865 | Ty: Type::getInt8Ty(C&: Context), Ptr: CallerContext, |
866 | Idx0: Shape.AsyncLowering.FrameOffset, Name: "async.ctx.frameptr" ); |
867 | // Inline the projection function. |
868 | InlineFunctionInfo InlineInfo; |
869 | auto InlineRes = InlineFunction(CB&: *CallerContext, IFI&: InlineInfo); |
870 | assert(InlineRes.isSuccess()); |
871 | (void)InlineRes; |
872 | return FramePtrAddr; |
873 | } |
874 | // In continuation-lowering, the argument is the opaque storage. |
875 | case coro::ABI::Retcon: |
876 | case coro::ABI::RetconOnce: { |
877 | Argument *NewStorage = &*NewF->arg_begin(); |
878 | auto FramePtrTy = PointerType::getUnqual(C&: Shape.FrameTy->getContext()); |
879 | |
880 | // If the storage is inline, just bitcast to the storage to the frame type. |
881 | if (Shape.RetconLowering.IsFrameInlineInStorage) |
882 | return NewStorage; |
883 | |
884 | // Otherwise, load the real frame from the opaque storage. |
885 | return Builder.CreateLoad(Ty: FramePtrTy, Ptr: NewStorage); |
886 | } |
887 | } |
888 | llvm_unreachable("bad ABI" ); |
889 | } |
890 | |
891 | static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, |
892 | unsigned ParamIndex, uint64_t Size, |
893 | Align Alignment, bool NoAlias) { |
894 | AttrBuilder ParamAttrs(Context); |
895 | ParamAttrs.addAttribute(Val: Attribute::NonNull); |
896 | ParamAttrs.addAttribute(Val: Attribute::NoUndef); |
897 | |
898 | if (NoAlias) |
899 | ParamAttrs.addAttribute(Val: Attribute::NoAlias); |
900 | |
901 | ParamAttrs.addAlignmentAttr(Align: Alignment); |
902 | ParamAttrs.addDereferenceableAttr(Bytes: Size); |
903 | Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs); |
904 | } |
905 | |
906 | static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, |
907 | unsigned ParamIndex) { |
908 | AttrBuilder ParamAttrs(Context); |
909 | ParamAttrs.addAttribute(Val: Attribute::SwiftAsync); |
910 | Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs); |
911 | } |
912 | |
913 | static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, |
914 | unsigned ParamIndex) { |
915 | AttrBuilder ParamAttrs(Context); |
916 | ParamAttrs.addAttribute(Val: Attribute::SwiftSelf); |
917 | Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs); |
918 | } |
919 | |
920 | /// Clone the body of the original function into a resume function of |
921 | /// some sort. |
922 | void CoroCloner::create() { |
923 | // Create the new function if we don't already have one. |
924 | if (!NewF) { |
925 | NewF = createCloneDeclaration(OrigF, Shape, Suffix, |
926 | InsertBefore: OrigF.getParent()->end(), ActiveSuspend); |
927 | } |
928 | |
929 | // Replace all args with dummy instructions. If an argument is the old frame |
930 | // pointer, the dummy will be replaced by the new frame pointer once it is |
931 | // computed below. Uses of all other arguments should have already been |
932 | // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine |
933 | // frame. |
934 | SmallVector<Instruction *> DummyArgs; |
935 | for (Argument &A : OrigF.args()) { |
936 | DummyArgs.push_back(Elt: new FreezeInst(PoisonValue::get(T: A.getType()))); |
937 | VMap[&A] = DummyArgs.back(); |
938 | } |
939 | |
940 | SmallVector<ReturnInst *, 4> Returns; |
941 | |
942 | // Ignore attempts to change certain attributes of the function. |
943 | // TODO: maybe there should be a way to suppress this during cloning? |
944 | auto savedVisibility = NewF->getVisibility(); |
945 | auto savedUnnamedAddr = NewF->getUnnamedAddr(); |
946 | auto savedDLLStorageClass = NewF->getDLLStorageClass(); |
947 | |
948 | // NewF's linkage (which CloneFunctionInto does *not* change) might not |
949 | // be compatible with the visibility of OrigF (which it *does* change), |
950 | // so protect against that. |
951 | auto savedLinkage = NewF->getLinkage(); |
952 | NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); |
953 | |
954 | CloneFunctionInto(NewFunc: NewF, OldFunc: &OrigF, VMap, |
955 | Changes: CloneFunctionChangeType::LocalChangesOnly, Returns); |
956 | |
957 | auto &Context = NewF->getContext(); |
958 | |
959 | // For async functions / continuations, adjust the scope line of the |
960 | // clone to the line number of the suspend point. However, only |
961 | // adjust the scope line when the files are the same. This ensures |
962 | // line number and file name belong together. The scope line is |
963 | // associated with all pre-prologue instructions. This avoids a jump |
964 | // in the linetable from the function declaration to the suspend point. |
965 | if (DISubprogram *SP = NewF->getSubprogram()) { |
966 | assert(SP != OrigF.getSubprogram() && SP->isDistinct()); |
967 | if (ActiveSuspend) |
968 | if (auto DL = ActiveSuspend->getDebugLoc()) |
969 | if (SP->getFile() == DL->getFile()) |
970 | SP->setScopeLine(DL->getLine()); |
971 | // Update the linkage name to reflect the modified symbol name. It |
972 | // is necessary to update the linkage name in Swift, since the |
973 | // mangling changes for resume functions. It might also be the |
974 | // right thing to do in C++, but due to a limitation in LLVM's |
975 | // AsmPrinter we can only do this if the function doesn't have an |
976 | // abstract specification, since the DWARF backend expects the |
977 | // abstract specification to contain the linkage name and asserts |
978 | // that they are identical. |
979 | if (SP->getUnit() && |
980 | SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) { |
981 | SP->replaceLinkageName(LN: MDString::get(Context, Str: NewF->getName())); |
982 | if (auto *Decl = SP->getDeclaration()) { |
983 | auto *NewDecl = DISubprogram::get( |
984 | Context&: Decl->getContext(), Scope: Decl->getScope(), Name: Decl->getName(), |
985 | LinkageName: NewF->getName(), File: Decl->getFile(), Line: Decl->getLine(), Type: Decl->getType(), |
986 | ScopeLine: Decl->getScopeLine(), ContainingType: Decl->getContainingType(), |
987 | VirtualIndex: Decl->getVirtualIndex(), ThisAdjustment: Decl->getThisAdjustment(), |
988 | Flags: Decl->getFlags(), SPFlags: Decl->getSPFlags(), Unit: Decl->getUnit(), |
989 | TemplateParams: Decl->getTemplateParams(), Declaration: nullptr, RetainedNodes: Decl->getRetainedNodes(), |
990 | ThrownTypes: Decl->getThrownTypes(), Annotations: Decl->getAnnotations(), |
991 | TargetFuncName: Decl->getTargetFuncName()); |
992 | SP->replaceDeclaration(Decl: NewDecl); |
993 | } |
994 | } |
995 | } |
996 | |
997 | NewF->setLinkage(savedLinkage); |
998 | NewF->setVisibility(savedVisibility); |
999 | NewF->setUnnamedAddr(savedUnnamedAddr); |
1000 | NewF->setDLLStorageClass(savedDLLStorageClass); |
1001 | // The function sanitizer metadata needs to match the signature of the |
1002 | // function it is being attached to. However this does not hold for split |
1003 | // functions here. Thus remove the metadata for split functions. |
1004 | if (Shape.ABI == coro::ABI::Switch && |
1005 | NewF->hasMetadata(KindID: LLVMContext::MD_func_sanitize)) |
1006 | NewF->eraseMetadata(KindID: LLVMContext::MD_func_sanitize); |
1007 | |
1008 | // Replace the attributes of the new function: |
1009 | auto OrigAttrs = NewF->getAttributes(); |
1010 | auto NewAttrs = AttributeList(); |
1011 | |
1012 | switch (Shape.ABI) { |
1013 | case coro::ABI::Switch: |
1014 | // Bootstrap attributes by copying function attributes from the |
1015 | // original function. This should include optimization settings and so on. |
1016 | NewAttrs = NewAttrs.addFnAttributes( |
1017 | C&: Context, B: AttrBuilder(Context, OrigAttrs.getFnAttrs())); |
1018 | |
1019 | addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0, Size: Shape.FrameSize, |
1020 | Alignment: Shape.FrameAlign, /*NoAlias=*/false); |
1021 | break; |
1022 | case coro::ABI::Async: { |
1023 | auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend); |
1024 | if (OrigF.hasParamAttribute(ArgNo: Shape.AsyncLowering.ContextArgNo, |
1025 | Kind: Attribute::SwiftAsync)) { |
1026 | uint32_t ArgAttributeIndices = |
1027 | ActiveAsyncSuspend->getStorageArgumentIndex(); |
1028 | auto ContextArgIndex = ArgAttributeIndices & 0xff; |
1029 | addAsyncContextAttrs(Attrs&: NewAttrs, Context, ParamIndex: ContextArgIndex); |
1030 | |
1031 | // `swiftasync` must preceed `swiftself` so 0 is not a valid index for |
1032 | // `swiftself`. |
1033 | auto SwiftSelfIndex = ArgAttributeIndices >> 8; |
1034 | if (SwiftSelfIndex) |
1035 | addSwiftSelfAttrs(Attrs&: NewAttrs, Context, ParamIndex: SwiftSelfIndex); |
1036 | } |
1037 | |
1038 | // Transfer the original function's attributes. |
1039 | auto FnAttrs = OrigF.getAttributes().getFnAttrs(); |
1040 | NewAttrs = NewAttrs.addFnAttributes(C&: Context, B: AttrBuilder(Context, FnAttrs)); |
1041 | break; |
1042 | } |
1043 | case coro::ABI::Retcon: |
1044 | case coro::ABI::RetconOnce: |
1045 | // If we have a continuation prototype, just use its attributes, |
1046 | // full-stop. |
1047 | NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); |
1048 | |
1049 | /// FIXME: Is it really good to add the NoAlias attribute? |
1050 | addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0, |
1051 | Size: Shape.getRetconCoroId()->getStorageSize(), |
1052 | Alignment: Shape.getRetconCoroId()->getStorageAlignment(), |
1053 | /*NoAlias=*/true); |
1054 | |
1055 | break; |
1056 | } |
1057 | |
1058 | switch (Shape.ABI) { |
1059 | // In these ABIs, the cloned functions always return 'void', and the |
1060 | // existing return sites are meaningless. Note that for unique |
1061 | // continuations, this includes the returns associated with suspends; |
1062 | // this is fine because we can't suspend twice. |
1063 | case coro::ABI::Switch: |
1064 | case coro::ABI::RetconOnce: |
1065 | // Remove old returns. |
1066 | for (ReturnInst *Return : Returns) |
1067 | changeToUnreachable(I: Return); |
1068 | break; |
1069 | |
1070 | // With multi-suspend continuations, we'll already have eliminated the |
1071 | // original returns and inserted returns before all the suspend points, |
1072 | // so we want to leave any returns in place. |
1073 | case coro::ABI::Retcon: |
1074 | break; |
1075 | // Async lowering will insert musttail call functions at all suspend points |
1076 | // followed by a return. |
1077 | // Don't change returns to unreachable because that will trip up the verifier. |
1078 | // These returns should be unreachable from the clone. |
1079 | case coro::ABI::Async: |
1080 | break; |
1081 | } |
1082 | |
1083 | NewF->setAttributes(NewAttrs); |
1084 | NewF->setCallingConv(Shape.getResumeFunctionCC()); |
1085 | |
1086 | // Set up the new entry block. |
1087 | replaceEntryBlock(); |
1088 | |
1089 | // Turn symmetric transfers into musttail calls. |
1090 | for (CallInst *ResumeCall : Shape.SymmetricTransfers) { |
1091 | ResumeCall = cast<CallInst>(Val&: VMap[ResumeCall]); |
1092 | if (TTI.supportsTailCallFor(CB: ResumeCall)) { |
1093 | // FIXME: Could we support symmetric transfer effectively without |
1094 | // musttail? |
1095 | ResumeCall->setTailCallKind(CallInst::TCK_MustTail); |
1096 | } |
1097 | |
1098 | // Put a 'ret void' after the call, and split any remaining instructions to |
1099 | // an unreachable block. |
1100 | BasicBlock *BB = ResumeCall->getParent(); |
1101 | BB->splitBasicBlock(I: ResumeCall->getNextNode()); |
1102 | Builder.SetInsertPoint(BB->getTerminator()); |
1103 | Builder.CreateRetVoid(); |
1104 | BB->getTerminator()->eraseFromParent(); |
1105 | } |
1106 | |
1107 | Builder.SetInsertPoint(&NewF->getEntryBlock().front()); |
1108 | NewFramePtr = deriveNewFramePointer(); |
1109 | |
1110 | // Remap frame pointer. |
1111 | Value *OldFramePtr = VMap[Shape.FramePtr]; |
1112 | NewFramePtr->takeName(V: OldFramePtr); |
1113 | OldFramePtr->replaceAllUsesWith(V: NewFramePtr); |
1114 | |
1115 | // Remap vFrame pointer. |
1116 | auto *NewVFrame = Builder.CreateBitCast( |
1117 | V: NewFramePtr, DestTy: PointerType::getUnqual(C&: Builder.getContext()), Name: "vFrame" ); |
1118 | Value *OldVFrame = cast<Value>(Val&: VMap[Shape.CoroBegin]); |
1119 | if (OldVFrame != NewVFrame) |
1120 | OldVFrame->replaceAllUsesWith(V: NewVFrame); |
1121 | |
1122 | // All uses of the arguments should have been resolved by this point, |
1123 | // so we can safely remove the dummy values. |
1124 | for (Instruction *DummyArg : DummyArgs) { |
1125 | DummyArg->replaceAllUsesWith(V: PoisonValue::get(T: DummyArg->getType())); |
1126 | DummyArg->deleteValue(); |
1127 | } |
1128 | |
1129 | switch (Shape.ABI) { |
1130 | case coro::ABI::Switch: |
1131 | // Rewrite final suspend handling as it is not done via switch (allows to |
1132 | // remove final case from the switch, since it is undefined behavior to |
1133 | // resume the coroutine suspended at the final suspend point. |
1134 | if (Shape.SwitchLowering.HasFinalSuspend) |
1135 | handleFinalSuspend(); |
1136 | break; |
1137 | case coro::ABI::Async: |
1138 | case coro::ABI::Retcon: |
1139 | case coro::ABI::RetconOnce: |
1140 | // Replace uses of the active suspend with the corresponding |
1141 | // continuation-function arguments. |
1142 | assert(ActiveSuspend != nullptr && |
1143 | "no active suspend when lowering a continuation-style coroutine" ); |
1144 | replaceRetconOrAsyncSuspendUses(); |
1145 | break; |
1146 | } |
1147 | |
1148 | // Handle suspends. |
1149 | replaceCoroSuspends(); |
1150 | |
1151 | // Handle swifterror. |
1152 | replaceSwiftErrorOps(); |
1153 | |
1154 | // Remove coro.end intrinsics. |
1155 | replaceCoroEnds(); |
1156 | |
1157 | // Salvage debug info that points into the coroutine frame. |
1158 | salvageDebugInfo(); |
1159 | |
1160 | // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, |
1161 | // to suppress deallocation code. |
1162 | if (Shape.ABI == coro::ABI::Switch) |
1163 | coro::replaceCoroFree(CoroId: cast<CoroIdInst>(Val&: VMap[Shape.CoroBegin->getId()]), |
1164 | /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup); |
1165 | } |
1166 | |
1167 | static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { |
1168 | assert(Shape.ABI == coro::ABI::Async); |
1169 | |
1170 | auto *FuncPtrStruct = cast<ConstantStruct>( |
1171 | Val: Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); |
1172 | auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(i_nocapture: 0); |
1173 | auto *OrigContextSize = FuncPtrStruct->getOperand(i_nocapture: 1); |
1174 | auto *NewContextSize = ConstantInt::get(Ty: OrigContextSize->getType(), |
1175 | V: Shape.AsyncLowering.ContextSize); |
1176 | auto *NewFuncPtrStruct = ConstantStruct::get( |
1177 | T: FuncPtrStruct->getType(), Vs: OrigRelativeFunOffset, Vs: NewContextSize); |
1178 | |
1179 | Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); |
1180 | } |
1181 | |
1182 | static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { |
1183 | if (Shape.ABI == coro::ABI::Async) |
1184 | updateAsyncFuncPointerContextSize(Shape); |
1185 | |
1186 | for (CoroAlignInst *CA : Shape.CoroAligns) { |
1187 | CA->replaceAllUsesWith( |
1188 | V: ConstantInt::get(Ty: CA->getType(), V: Shape.FrameAlign.value())); |
1189 | CA->eraseFromParent(); |
1190 | } |
1191 | |
1192 | if (Shape.CoroSizes.empty()) |
1193 | return; |
1194 | |
1195 | // In the same function all coro.sizes should have the same result type. |
1196 | auto *SizeIntrin = Shape.CoroSizes.back(); |
1197 | Module *M = SizeIntrin->getModule(); |
1198 | const DataLayout &DL = M->getDataLayout(); |
1199 | auto Size = DL.getTypeAllocSize(Ty: Shape.FrameTy); |
1200 | auto *SizeConstant = ConstantInt::get(Ty: SizeIntrin->getType(), V: Size); |
1201 | |
1202 | for (CoroSizeInst *CS : Shape.CoroSizes) { |
1203 | CS->replaceAllUsesWith(V: SizeConstant); |
1204 | CS->eraseFromParent(); |
1205 | } |
1206 | } |
1207 | |
1208 | static void postSplitCleanup(Function &F) { |
1209 | removeUnreachableBlocks(F); |
1210 | |
1211 | #ifndef NDEBUG |
1212 | // For now, we do a mandatory verification step because we don't |
1213 | // entirely trust this pass. Note that we don't want to add a verifier |
1214 | // pass to FPM below because it will also verify all the global data. |
1215 | if (verifyFunction(F, &errs())) |
1216 | report_fatal_error("Broken function" ); |
1217 | #endif |
1218 | } |
1219 | |
1220 | // Coroutine has no suspend points. Remove heap allocation for the coroutine |
1221 | // frame if possible. |
1222 | static void handleNoSuspendCoroutine(coro::Shape &Shape) { |
1223 | auto *CoroBegin = Shape.CoroBegin; |
1224 | auto *CoroId = CoroBegin->getId(); |
1225 | auto *AllocInst = CoroId->getCoroAlloc(); |
1226 | switch (Shape.ABI) { |
1227 | case coro::ABI::Switch: { |
1228 | auto SwitchId = cast<CoroIdInst>(Val: CoroId); |
1229 | coro::replaceCoroFree(CoroId: SwitchId, /*Elide=*/AllocInst != nullptr); |
1230 | if (AllocInst) { |
1231 | IRBuilder<> Builder(AllocInst); |
1232 | auto *Frame = Builder.CreateAlloca(Ty: Shape.FrameTy); |
1233 | Frame->setAlignment(Shape.FrameAlign); |
1234 | AllocInst->replaceAllUsesWith(V: Builder.getFalse()); |
1235 | AllocInst->eraseFromParent(); |
1236 | CoroBegin->replaceAllUsesWith(V: Frame); |
1237 | } else { |
1238 | CoroBegin->replaceAllUsesWith(V: CoroBegin->getMem()); |
1239 | } |
1240 | |
1241 | break; |
1242 | } |
1243 | case coro::ABI::Async: |
1244 | case coro::ABI::Retcon: |
1245 | case coro::ABI::RetconOnce: |
1246 | CoroBegin->replaceAllUsesWith(V: UndefValue::get(T: CoroBegin->getType())); |
1247 | break; |
1248 | } |
1249 | |
1250 | CoroBegin->eraseFromParent(); |
1251 | Shape.CoroBegin = nullptr; |
1252 | } |
1253 | |
1254 | // SimplifySuspendPoint needs to check that there is no calls between |
1255 | // coro_save and coro_suspend, since any of the calls may potentially resume |
1256 | // the coroutine and if that is the case we cannot eliminate the suspend point. |
1257 | static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { |
1258 | for (Instruction *I = From; I != To; I = I->getNextNode()) { |
1259 | // Assume that no intrinsic can resume the coroutine. |
1260 | if (isa<IntrinsicInst>(Val: I)) |
1261 | continue; |
1262 | |
1263 | if (isa<CallBase>(Val: I)) |
1264 | return true; |
1265 | } |
1266 | return false; |
1267 | } |
1268 | |
1269 | static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { |
1270 | SmallPtrSet<BasicBlock *, 8> Set; |
1271 | SmallVector<BasicBlock *, 8> Worklist; |
1272 | |
1273 | Set.insert(Ptr: SaveBB); |
1274 | Worklist.push_back(Elt: ResDesBB); |
1275 | |
1276 | // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr |
1277 | // returns a token consumed by suspend instruction, all blocks in between |
1278 | // will have to eventually hit SaveBB when going backwards from ResDesBB. |
1279 | while (!Worklist.empty()) { |
1280 | auto *BB = Worklist.pop_back_val(); |
1281 | Set.insert(Ptr: BB); |
1282 | for (auto *Pred : predecessors(BB)) |
1283 | if (!Set.contains(Ptr: Pred)) |
1284 | Worklist.push_back(Elt: Pred); |
1285 | } |
1286 | |
1287 | // SaveBB and ResDesBB are checked separately in hasCallsBetween. |
1288 | Set.erase(Ptr: SaveBB); |
1289 | Set.erase(Ptr: ResDesBB); |
1290 | |
1291 | for (auto *BB : Set) |
1292 | if (hasCallsInBlockBetween(From: BB->getFirstNonPHI(), To: nullptr)) |
1293 | return true; |
1294 | |
1295 | return false; |
1296 | } |
1297 | |
1298 | static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { |
1299 | auto *SaveBB = Save->getParent(); |
1300 | auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); |
1301 | |
1302 | if (SaveBB == ResumeOrDestroyBB) |
1303 | return hasCallsInBlockBetween(From: Save->getNextNode(), To: ResumeOrDestroy); |
1304 | |
1305 | // Any calls from Save to the end of the block? |
1306 | if (hasCallsInBlockBetween(From: Save->getNextNode(), To: nullptr)) |
1307 | return true; |
1308 | |
1309 | // Any calls from begging of the block up to ResumeOrDestroy? |
1310 | if (hasCallsInBlockBetween(From: ResumeOrDestroyBB->getFirstNonPHI(), |
1311 | To: ResumeOrDestroy)) |
1312 | return true; |
1313 | |
1314 | // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? |
1315 | if (hasCallsInBlocksBetween(SaveBB, ResDesBB: ResumeOrDestroyBB)) |
1316 | return true; |
1317 | |
1318 | return false; |
1319 | } |
1320 | |
1321 | // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the |
1322 | // suspend point and replace it with nornal control flow. |
1323 | static bool simplifySuspendPoint(CoroSuspendInst *Suspend, |
1324 | CoroBeginInst *CoroBegin) { |
1325 | Instruction *Prev = Suspend->getPrevNode(); |
1326 | if (!Prev) { |
1327 | auto *Pred = Suspend->getParent()->getSinglePredecessor(); |
1328 | if (!Pred) |
1329 | return false; |
1330 | Prev = Pred->getTerminator(); |
1331 | } |
1332 | |
1333 | CallBase *CB = dyn_cast<CallBase>(Val: Prev); |
1334 | if (!CB) |
1335 | return false; |
1336 | |
1337 | auto *Callee = CB->getCalledOperand()->stripPointerCasts(); |
1338 | |
1339 | // See if the callsite is for resumption or destruction of the coroutine. |
1340 | auto *SubFn = dyn_cast<CoroSubFnInst>(Val: Callee); |
1341 | if (!SubFn) |
1342 | return false; |
1343 | |
1344 | // Does not refer to the current coroutine, we cannot do anything with it. |
1345 | if (SubFn->getFrame() != CoroBegin) |
1346 | return false; |
1347 | |
1348 | // See if the transformation is safe. Specifically, see if there are any |
1349 | // calls in between Save and CallInstr. They can potenitally resume the |
1350 | // coroutine rendering this optimization unsafe. |
1351 | auto *Save = Suspend->getCoroSave(); |
1352 | if (hasCallsBetween(Save, ResumeOrDestroy: CB)) |
1353 | return false; |
1354 | |
1355 | // Replace llvm.coro.suspend with the value that results in resumption over |
1356 | // the resume or cleanup path. |
1357 | Suspend->replaceAllUsesWith(V: SubFn->getRawIndex()); |
1358 | Suspend->eraseFromParent(); |
1359 | Save->eraseFromParent(); |
1360 | |
1361 | // No longer need a call to coro.resume or coro.destroy. |
1362 | if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) { |
1363 | BranchInst::Create(IfTrue: Invoke->getNormalDest(), InsertBefore: Invoke->getIterator()); |
1364 | } |
1365 | |
1366 | // Grab the CalledValue from CB before erasing the CallInstr. |
1367 | auto *CalledValue = CB->getCalledOperand(); |
1368 | CB->eraseFromParent(); |
1369 | |
1370 | // If no more users remove it. Usually it is a bitcast of SubFn. |
1371 | if (CalledValue != SubFn && CalledValue->user_empty()) |
1372 | if (auto *I = dyn_cast<Instruction>(Val: CalledValue)) |
1373 | I->eraseFromParent(); |
1374 | |
1375 | // Now we are good to remove SubFn. |
1376 | if (SubFn->user_empty()) |
1377 | SubFn->eraseFromParent(); |
1378 | |
1379 | return true; |
1380 | } |
1381 | |
1382 | // Remove suspend points that are simplified. |
1383 | static void simplifySuspendPoints(coro::Shape &Shape) { |
1384 | // Currently, the only simplification we do is switch-lowering-specific. |
1385 | if (Shape.ABI != coro::ABI::Switch) |
1386 | return; |
1387 | |
1388 | auto &S = Shape.CoroSuspends; |
1389 | size_t I = 0, N = S.size(); |
1390 | if (N == 0) |
1391 | return; |
1392 | |
1393 | size_t ChangedFinalIndex = std::numeric_limits<size_t>::max(); |
1394 | while (true) { |
1395 | auto SI = cast<CoroSuspendInst>(Val: S[I]); |
1396 | // Leave final.suspend to handleFinalSuspend since it is undefined behavior |
1397 | // to resume a coroutine suspended at the final suspend point. |
1398 | if (!SI->isFinal() && simplifySuspendPoint(Suspend: SI, CoroBegin: Shape.CoroBegin)) { |
1399 | if (--N == I) |
1400 | break; |
1401 | |
1402 | std::swap(a&: S[I], b&: S[N]); |
1403 | |
1404 | if (cast<CoroSuspendInst>(Val: S[I])->isFinal()) { |
1405 | assert(Shape.SwitchLowering.HasFinalSuspend); |
1406 | ChangedFinalIndex = I; |
1407 | } |
1408 | |
1409 | continue; |
1410 | } |
1411 | if (++I == N) |
1412 | break; |
1413 | } |
1414 | S.resize(N); |
1415 | |
1416 | // Maintain final.suspend in case final suspend was swapped. |
1417 | // Due to we requrie the final suspend to be the last element of CoroSuspends. |
1418 | if (ChangedFinalIndex < N) { |
1419 | assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal()); |
1420 | std::swap(a&: S[ChangedFinalIndex], b&: S.back()); |
1421 | } |
1422 | } |
1423 | |
1424 | namespace { |
1425 | |
1426 | struct SwitchCoroutineSplitter { |
1427 | static void split(Function &F, coro::Shape &Shape, |
1428 | SmallVectorImpl<Function *> &Clones, |
1429 | TargetTransformInfo &TTI) { |
1430 | assert(Shape.ABI == coro::ABI::Switch); |
1431 | |
1432 | createResumeEntryBlock(F, Shape); |
1433 | auto *ResumeClone = |
1434 | createClone(F, Suffix: ".resume" , Shape, FKind: CoroCloner::Kind::SwitchResume, TTI); |
1435 | auto *DestroyClone = |
1436 | createClone(F, Suffix: ".destroy" , Shape, FKind: CoroCloner::Kind::SwitchUnwind, TTI); |
1437 | auto *CleanupClone = |
1438 | createClone(F, Suffix: ".cleanup" , Shape, FKind: CoroCloner::Kind::SwitchCleanup, TTI); |
1439 | |
1440 | postSplitCleanup(F&: *ResumeClone); |
1441 | postSplitCleanup(F&: *DestroyClone); |
1442 | postSplitCleanup(F&: *CleanupClone); |
1443 | |
1444 | // Store addresses resume/destroy/cleanup functions in the coroutine frame. |
1445 | updateCoroFrame(Shape, ResumeFn: ResumeClone, DestroyFn: DestroyClone, CleanupFn: CleanupClone); |
1446 | |
1447 | assert(Clones.empty()); |
1448 | Clones.push_back(Elt: ResumeClone); |
1449 | Clones.push_back(Elt: DestroyClone); |
1450 | Clones.push_back(Elt: CleanupClone); |
1451 | |
1452 | // Create a constant array referring to resume/destroy/clone functions |
1453 | // pointed by the last argument of @llvm.coro.info, so that CoroElide pass |
1454 | // can determined correct function to call. |
1455 | setCoroInfo(F, Shape, Fns: Clones); |
1456 | } |
1457 | |
1458 | private: |
1459 | // Create a resume clone by cloning the body of the original function, setting |
1460 | // new entry block and replacing coro.suspend an appropriate value to force |
1461 | // resume or cleanup pass for every suspend point. |
1462 | static Function *createClone(Function &F, const Twine &Suffix, |
1463 | coro::Shape &Shape, CoroCloner::Kind FKind, |
1464 | TargetTransformInfo &TTI) { |
1465 | CoroCloner Cloner(F, Suffix, Shape, FKind, TTI); |
1466 | Cloner.create(); |
1467 | return Cloner.getFunction(); |
1468 | } |
1469 | |
1470 | // Create an entry block for a resume function with a switch that will jump to |
1471 | // suspend points. |
1472 | static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { |
1473 | LLVMContext &C = F.getContext(); |
1474 | |
1475 | // resume.entry: |
1476 | // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 |
1477 | // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label |
1478 | // %unreachable [ |
1479 | // i32 0, label %resume.0 |
1480 | // i32 1, label %resume.1 |
1481 | // ... |
1482 | // ] |
1483 | |
1484 | auto *NewEntry = BasicBlock::Create(Context&: C, Name: "resume.entry" , Parent: &F); |
1485 | auto *UnreachBB = BasicBlock::Create(Context&: C, Name: "unreachable" , Parent: &F); |
1486 | |
1487 | IRBuilder<> Builder(NewEntry); |
1488 | auto *FramePtr = Shape.FramePtr; |
1489 | auto *FrameTy = Shape.FrameTy; |
1490 | auto *GepIndex = Builder.CreateStructGEP( |
1491 | Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr" ); |
1492 | auto *Index = Builder.CreateLoad(Ty: Shape.getIndexType(), Ptr: GepIndex, Name: "index" ); |
1493 | auto *Switch = |
1494 | Builder.CreateSwitch(V: Index, Dest: UnreachBB, NumCases: Shape.CoroSuspends.size()); |
1495 | Shape.SwitchLowering.ResumeSwitch = Switch; |
1496 | |
1497 | size_t SuspendIndex = 0; |
1498 | for (auto *AnyS : Shape.CoroSuspends) { |
1499 | auto *S = cast<CoroSuspendInst>(Val: AnyS); |
1500 | ConstantInt *IndexVal = Shape.getIndex(Value: SuspendIndex); |
1501 | |
1502 | // Replace CoroSave with a store to Index: |
1503 | // %index.addr = getelementptr %f.frame... (index field number) |
1504 | // store i32 %IndexVal, i32* %index.addr1 |
1505 | auto *Save = S->getCoroSave(); |
1506 | Builder.SetInsertPoint(Save); |
1507 | if (S->isFinal()) { |
1508 | // The coroutine should be marked done if it reaches the final suspend |
1509 | // point. |
1510 | markCoroutineAsDone(Builder, Shape, FramePtr); |
1511 | } else { |
1512 | auto *GepIndex = Builder.CreateStructGEP( |
1513 | Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr" ); |
1514 | Builder.CreateStore(Val: IndexVal, Ptr: GepIndex); |
1515 | } |
1516 | |
1517 | Save->replaceAllUsesWith(V: ConstantTokenNone::get(Context&: C)); |
1518 | Save->eraseFromParent(); |
1519 | |
1520 | // Split block before and after coro.suspend and add a jump from an entry |
1521 | // switch: |
1522 | // |
1523 | // whateverBB: |
1524 | // whatever |
1525 | // %0 = call i8 @llvm.coro.suspend(token none, i1 false) |
1526 | // switch i8 %0, label %suspend[i8 0, label %resume |
1527 | // i8 1, label %cleanup] |
1528 | // becomes: |
1529 | // |
1530 | // whateverBB: |
1531 | // whatever |
1532 | // br label %resume.0.landing |
1533 | // |
1534 | // resume.0: ; <--- jump from the switch in the resume.entry |
1535 | // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) |
1536 | // br label %resume.0.landing |
1537 | // |
1538 | // resume.0.landing: |
1539 | // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] |
1540 | // switch i8 % 1, label %suspend [i8 0, label %resume |
1541 | // i8 1, label %cleanup] |
1542 | |
1543 | auto *SuspendBB = S->getParent(); |
1544 | auto *ResumeBB = |
1545 | SuspendBB->splitBasicBlock(I: S, BBName: "resume." + Twine(SuspendIndex)); |
1546 | auto *LandingBB = ResumeBB->splitBasicBlock( |
1547 | I: S->getNextNode(), BBName: ResumeBB->getName() + Twine(".landing" )); |
1548 | Switch->addCase(OnVal: IndexVal, Dest: ResumeBB); |
1549 | |
1550 | cast<BranchInst>(Val: SuspendBB->getTerminator())->setSuccessor(idx: 0, NewSucc: LandingBB); |
1551 | auto *PN = PHINode::Create(Ty: Builder.getInt8Ty(), NumReservedValues: 2, NameStr: "" ); |
1552 | PN->insertBefore(InsertPos: LandingBB->begin()); |
1553 | S->replaceAllUsesWith(V: PN); |
1554 | PN->addIncoming(V: Builder.getInt8(C: -1), BB: SuspendBB); |
1555 | PN->addIncoming(V: S, BB: ResumeBB); |
1556 | |
1557 | ++SuspendIndex; |
1558 | } |
1559 | |
1560 | Builder.SetInsertPoint(UnreachBB); |
1561 | Builder.CreateUnreachable(); |
1562 | |
1563 | Shape.SwitchLowering.ResumeEntryBlock = NewEntry; |
1564 | } |
1565 | |
1566 | // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. |
1567 | static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, |
1568 | Function *DestroyFn, Function *CleanupFn) { |
1569 | IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr()); |
1570 | |
1571 | auto *ResumeAddr = Builder.CreateStructGEP( |
1572 | Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume, |
1573 | Name: "resume.addr" ); |
1574 | Builder.CreateStore(Val: ResumeFn, Ptr: ResumeAddr); |
1575 | |
1576 | Value *DestroyOrCleanupFn = DestroyFn; |
1577 | |
1578 | CoroIdInst *CoroId = Shape.getSwitchCoroId(); |
1579 | if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { |
1580 | // If there is a CoroAlloc and it returns false (meaning we elide the |
1581 | // allocation, use CleanupFn instead of DestroyFn). |
1582 | DestroyOrCleanupFn = Builder.CreateSelect(C: CA, True: DestroyFn, False: CleanupFn); |
1583 | } |
1584 | |
1585 | auto *DestroyAddr = Builder.CreateStructGEP( |
1586 | Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Destroy, |
1587 | Name: "destroy.addr" ); |
1588 | Builder.CreateStore(Val: DestroyOrCleanupFn, Ptr: DestroyAddr); |
1589 | } |
1590 | |
1591 | // Create a global constant array containing pointers to functions provided |
1592 | // and set Info parameter of CoroBegin to point at this constant. Example: |
1593 | // |
1594 | // @f.resumers = internal constant [2 x void(%f.frame*)*] |
1595 | // [void(%f.frame*)* @f.resume, void(%f.frame*)* |
1596 | // @f.destroy] |
1597 | // define void @f() { |
1598 | // ... |
1599 | // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, |
1600 | // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to |
1601 | // i8*)) |
1602 | // |
1603 | // Assumes that all the functions have the same signature. |
1604 | static void setCoroInfo(Function &F, coro::Shape &Shape, |
1605 | ArrayRef<Function *> Fns) { |
1606 | // This only works under the switch-lowering ABI because coro elision |
1607 | // only works on the switch-lowering ABI. |
1608 | SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); |
1609 | assert(!Args.empty()); |
1610 | Function *Part = *Fns.begin(); |
1611 | Module *M = Part->getParent(); |
1612 | auto *ArrTy = ArrayType::get(ElementType: Part->getType(), NumElements: Args.size()); |
1613 | |
1614 | auto *ConstVal = ConstantArray::get(T: ArrTy, V: Args); |
1615 | auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, |
1616 | GlobalVariable::PrivateLinkage, ConstVal, |
1617 | F.getName() + Twine(".resumers" )); |
1618 | |
1619 | // Update coro.begin instruction to refer to this constant. |
1620 | LLVMContext &C = F.getContext(); |
1621 | auto *BC = ConstantExpr::getPointerCast(C: GV, Ty: PointerType::getUnqual(C)); |
1622 | Shape.getSwitchCoroId()->setInfo(BC); |
1623 | } |
1624 | }; |
1625 | |
1626 | } // namespace |
1627 | |
1628 | static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, |
1629 | Value *Continuation) { |
1630 | auto *ResumeIntrinsic = Suspend->getResumeFunction(); |
1631 | auto &Context = Suspend->getParent()->getParent()->getContext(); |
1632 | auto *Int8PtrTy = PointerType::getUnqual(C&: Context); |
1633 | |
1634 | IRBuilder<> Builder(ResumeIntrinsic); |
1635 | auto *Val = Builder.CreateBitOrPointerCast(V: Continuation, DestTy: Int8PtrTy); |
1636 | ResumeIntrinsic->replaceAllUsesWith(V: Val); |
1637 | ResumeIntrinsic->eraseFromParent(); |
1638 | Suspend->setOperand(i_nocapture: CoroSuspendAsyncInst::ResumeFunctionArg, |
1639 | Val_nocapture: UndefValue::get(T: Int8PtrTy)); |
1640 | } |
1641 | |
1642 | /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. |
1643 | static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, |
1644 | ArrayRef<Value *> FnArgs, |
1645 | SmallVectorImpl<Value *> &CallArgs) { |
1646 | size_t ArgIdx = 0; |
1647 | for (auto *paramTy : FnTy->params()) { |
1648 | assert(ArgIdx < FnArgs.size()); |
1649 | if (paramTy != FnArgs[ArgIdx]->getType()) |
1650 | CallArgs.push_back( |
1651 | Elt: Builder.CreateBitOrPointerCast(V: FnArgs[ArgIdx], DestTy: paramTy)); |
1652 | else |
1653 | CallArgs.push_back(Elt: FnArgs[ArgIdx]); |
1654 | ++ArgIdx; |
1655 | } |
1656 | } |
1657 | |
1658 | CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, |
1659 | TargetTransformInfo &TTI, |
1660 | ArrayRef<Value *> Arguments, |
1661 | IRBuilder<> &Builder) { |
1662 | auto *FnTy = MustTailCallFn->getFunctionType(); |
1663 | // Coerce the arguments, llvm optimizations seem to ignore the types in |
1664 | // vaarg functions and throws away casts in optimized mode. |
1665 | SmallVector<Value *, 8> CallArgs; |
1666 | coerceArguments(Builder, FnTy, FnArgs: Arguments, CallArgs); |
1667 | |
1668 | auto *TailCall = Builder.CreateCall(FTy: FnTy, Callee: MustTailCallFn, Args: CallArgs); |
1669 | // Skip targets which don't support tail call. |
1670 | if (TTI.supportsTailCallFor(CB: TailCall)) { |
1671 | TailCall->setTailCallKind(CallInst::TCK_MustTail); |
1672 | } |
1673 | TailCall->setDebugLoc(Loc); |
1674 | TailCall->setCallingConv(MustTailCallFn->getCallingConv()); |
1675 | return TailCall; |
1676 | } |
1677 | |
1678 | static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, |
1679 | SmallVectorImpl<Function *> &Clones, |
1680 | TargetTransformInfo &TTI) { |
1681 | assert(Shape.ABI == coro::ABI::Async); |
1682 | assert(Clones.empty()); |
1683 | // Reset various things that the optimizer might have decided it |
1684 | // "knows" about the coroutine function due to not seeing a return. |
1685 | F.removeFnAttr(Kind: Attribute::NoReturn); |
1686 | F.removeRetAttr(Kind: Attribute::NoAlias); |
1687 | F.removeRetAttr(Kind: Attribute::NonNull); |
1688 | |
1689 | auto &Context = F.getContext(); |
1690 | auto *Int8PtrTy = PointerType::getUnqual(C&: Context); |
1691 | |
1692 | auto *Id = cast<CoroIdAsyncInst>(Val: Shape.CoroBegin->getId()); |
1693 | IRBuilder<> Builder(Id); |
1694 | |
1695 | auto *FramePtr = Id->getStorage(); |
1696 | FramePtr = Builder.CreateBitOrPointerCast(V: FramePtr, DestTy: Int8PtrTy); |
1697 | FramePtr = Builder.CreateConstInBoundsGEP1_32( |
1698 | Ty: Type::getInt8Ty(C&: Context), Ptr: FramePtr, Idx0: Shape.AsyncLowering.FrameOffset, |
1699 | Name: "async.ctx.frameptr" ); |
1700 | |
1701 | // Map all uses of llvm.coro.begin to the allocated frame pointer. |
1702 | { |
1703 | // Make sure we don't invalidate Shape.FramePtr. |
1704 | TrackingVH<Value> Handle(Shape.FramePtr); |
1705 | Shape.CoroBegin->replaceAllUsesWith(V: FramePtr); |
1706 | Shape.FramePtr = Handle.getValPtr(); |
1707 | } |
1708 | |
1709 | // Create all the functions in order after the main function. |
1710 | auto NextF = std::next(x: F.getIterator()); |
1711 | |
1712 | // Create a continuation function for each of the suspend points. |
1713 | Clones.reserve(N: Shape.CoroSuspends.size()); |
1714 | for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { |
1715 | auto *Suspend = cast<CoroSuspendAsyncInst>(Val: Shape.CoroSuspends[Idx]); |
1716 | |
1717 | // Create the clone declaration. |
1718 | auto ResumeNameSuffix = ".resume." ; |
1719 | auto ProjectionFunctionName = |
1720 | Suspend->getAsyncContextProjectionFunction()->getName(); |
1721 | bool UseSwiftMangling = false; |
1722 | if (ProjectionFunctionName == "__swift_async_resume_project_context" ) { |
1723 | ResumeNameSuffix = "TQ" ; |
1724 | UseSwiftMangling = true; |
1725 | } else if (ProjectionFunctionName == "__swift_async_resume_get_context" ) { |
1726 | ResumeNameSuffix = "TY" ; |
1727 | UseSwiftMangling = true; |
1728 | } |
1729 | auto *Continuation = createCloneDeclaration( |
1730 | OrigF&: F, Shape, |
1731 | Suffix: UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" |
1732 | : ResumeNameSuffix + Twine(Idx), |
1733 | InsertBefore: NextF, ActiveSuspend: Suspend); |
1734 | Clones.push_back(Elt: Continuation); |
1735 | |
1736 | // Insert a branch to a new return block immediately before the suspend |
1737 | // point. |
1738 | auto *SuspendBB = Suspend->getParent(); |
1739 | auto *NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend); |
1740 | auto *Branch = cast<BranchInst>(Val: SuspendBB->getTerminator()); |
1741 | |
1742 | // Place it before the first suspend. |
1743 | auto *ReturnBB = |
1744 | BasicBlock::Create(Context&: F.getContext(), Name: "coro.return" , Parent: &F, InsertBefore: NewSuspendBB); |
1745 | Branch->setSuccessor(idx: 0, NewSucc: ReturnBB); |
1746 | |
1747 | IRBuilder<> Builder(ReturnBB); |
1748 | |
1749 | // Insert the call to the tail call function and inline it. |
1750 | auto *Fn = Suspend->getMustTailCallFunction(); |
1751 | SmallVector<Value *, 8> Args(Suspend->args()); |
1752 | auto FnArgs = ArrayRef<Value *>(Args).drop_front( |
1753 | N: CoroSuspendAsyncInst::MustTailCallFuncArg + 1); |
1754 | auto *TailCall = coro::createMustTailCall(Loc: Suspend->getDebugLoc(), MustTailCallFn: Fn, TTI, |
1755 | Arguments: FnArgs, Builder); |
1756 | Builder.CreateRetVoid(); |
1757 | InlineFunctionInfo FnInfo; |
1758 | (void)InlineFunction(CB&: *TailCall, IFI&: FnInfo); |
1759 | |
1760 | // Replace the lvm.coro.async.resume intrisic call. |
1761 | replaceAsyncResumeFunction(Suspend, Continuation); |
1762 | } |
1763 | |
1764 | assert(Clones.size() == Shape.CoroSuspends.size()); |
1765 | for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { |
1766 | auto *Suspend = Shape.CoroSuspends[Idx]; |
1767 | auto *Clone = Clones[Idx]; |
1768 | |
1769 | CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create(); |
1770 | } |
1771 | } |
1772 | |
1773 | static void splitRetconCoroutine(Function &F, coro::Shape &Shape, |
1774 | SmallVectorImpl<Function *> &Clones, |
1775 | TargetTransformInfo &TTI) { |
1776 | assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); |
1777 | assert(Clones.empty()); |
1778 | |
1779 | // Reset various things that the optimizer might have decided it |
1780 | // "knows" about the coroutine function due to not seeing a return. |
1781 | F.removeFnAttr(Kind: Attribute::NoReturn); |
1782 | F.removeRetAttr(Kind: Attribute::NoAlias); |
1783 | F.removeRetAttr(Kind: Attribute::NonNull); |
1784 | |
1785 | // Allocate the frame. |
1786 | auto *Id = cast<AnyCoroIdRetconInst>(Val: Shape.CoroBegin->getId()); |
1787 | Value *RawFramePtr; |
1788 | if (Shape.RetconLowering.IsFrameInlineInStorage) { |
1789 | RawFramePtr = Id->getStorage(); |
1790 | } else { |
1791 | IRBuilder<> Builder(Id); |
1792 | |
1793 | // Determine the size of the frame. |
1794 | const DataLayout &DL = F.getDataLayout(); |
1795 | auto Size = DL.getTypeAllocSize(Ty: Shape.FrameTy); |
1796 | |
1797 | // Allocate. We don't need to update the call graph node because we're |
1798 | // going to recompute it from scratch after splitting. |
1799 | // FIXME: pass the required alignment |
1800 | RawFramePtr = Shape.emitAlloc(Builder, Size: Builder.getInt64(C: Size), CG: nullptr); |
1801 | RawFramePtr = |
1802 | Builder.CreateBitCast(V: RawFramePtr, DestTy: Shape.CoroBegin->getType()); |
1803 | |
1804 | // Stash the allocated frame pointer in the continuation storage. |
1805 | Builder.CreateStore(Val: RawFramePtr, Ptr: Id->getStorage()); |
1806 | } |
1807 | |
1808 | // Map all uses of llvm.coro.begin to the allocated frame pointer. |
1809 | { |
1810 | // Make sure we don't invalidate Shape.FramePtr. |
1811 | TrackingVH<Value> Handle(Shape.FramePtr); |
1812 | Shape.CoroBegin->replaceAllUsesWith(V: RawFramePtr); |
1813 | Shape.FramePtr = Handle.getValPtr(); |
1814 | } |
1815 | |
1816 | // Create a unique return block. |
1817 | BasicBlock *ReturnBB = nullptr; |
1818 | SmallVector<PHINode *, 4> ReturnPHIs; |
1819 | |
1820 | // Create all the functions in order after the main function. |
1821 | auto NextF = std::next(x: F.getIterator()); |
1822 | |
1823 | // Create a continuation function for each of the suspend points. |
1824 | Clones.reserve(N: Shape.CoroSuspends.size()); |
1825 | for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { |
1826 | auto Suspend = cast<CoroSuspendRetconInst>(Val: Shape.CoroSuspends[i]); |
1827 | |
1828 | // Create the clone declaration. |
1829 | auto Continuation = |
1830 | createCloneDeclaration(OrigF&: F, Shape, Suffix: ".resume." + Twine(i), InsertBefore: NextF, ActiveSuspend: nullptr); |
1831 | Clones.push_back(Elt: Continuation); |
1832 | |
1833 | // Insert a branch to the unified return block immediately before |
1834 | // the suspend point. |
1835 | auto SuspendBB = Suspend->getParent(); |
1836 | auto NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend); |
1837 | auto Branch = cast<BranchInst>(Val: SuspendBB->getTerminator()); |
1838 | |
1839 | // Create the unified return block. |
1840 | if (!ReturnBB) { |
1841 | // Place it before the first suspend. |
1842 | ReturnBB = |
1843 | BasicBlock::Create(Context&: F.getContext(), Name: "coro.return" , Parent: &F, InsertBefore: NewSuspendBB); |
1844 | Shape.RetconLowering.ReturnBlock = ReturnBB; |
1845 | |
1846 | IRBuilder<> Builder(ReturnBB); |
1847 | |
1848 | // Create PHIs for all the return values. |
1849 | assert(ReturnPHIs.empty()); |
1850 | |
1851 | // First, the continuation. |
1852 | ReturnPHIs.push_back(Elt: Builder.CreatePHI(Ty: Continuation->getType(), |
1853 | NumReservedValues: Shape.CoroSuspends.size())); |
1854 | |
1855 | // Next, all the directly-yielded values. |
1856 | for (auto *ResultTy : Shape.getRetconResultTypes()) |
1857 | ReturnPHIs.push_back( |
1858 | Elt: Builder.CreatePHI(Ty: ResultTy, NumReservedValues: Shape.CoroSuspends.size())); |
1859 | |
1860 | // Build the return value. |
1861 | auto RetTy = F.getReturnType(); |
1862 | |
1863 | // Cast the continuation value if necessary. |
1864 | // We can't rely on the types matching up because that type would |
1865 | // have to be infinite. |
1866 | auto CastedContinuationTy = |
1867 | (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(N: 0)); |
1868 | auto *CastedContinuation = |
1869 | Builder.CreateBitCast(V: ReturnPHIs[0], DestTy: CastedContinuationTy); |
1870 | |
1871 | Value *RetV; |
1872 | if (ReturnPHIs.size() == 1) { |
1873 | RetV = CastedContinuation; |
1874 | } else { |
1875 | RetV = PoisonValue::get(T: RetTy); |
1876 | RetV = Builder.CreateInsertValue(Agg: RetV, Val: CastedContinuation, Idxs: 0); |
1877 | for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) |
1878 | RetV = Builder.CreateInsertValue(Agg: RetV, Val: ReturnPHIs[I], Idxs: I); |
1879 | } |
1880 | |
1881 | Builder.CreateRet(V: RetV); |
1882 | } |
1883 | |
1884 | // Branch to the return block. |
1885 | Branch->setSuccessor(idx: 0, NewSucc: ReturnBB); |
1886 | ReturnPHIs[0]->addIncoming(V: Continuation, BB: SuspendBB); |
1887 | size_t NextPHIIndex = 1; |
1888 | for (auto &VUse : Suspend->value_operands()) |
1889 | ReturnPHIs[NextPHIIndex++]->addIncoming(V: &*VUse, BB: SuspendBB); |
1890 | assert(NextPHIIndex == ReturnPHIs.size()); |
1891 | } |
1892 | |
1893 | assert(Clones.size() == Shape.CoroSuspends.size()); |
1894 | for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { |
1895 | auto Suspend = Shape.CoroSuspends[i]; |
1896 | auto Clone = Clones[i]; |
1897 | |
1898 | CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create(); |
1899 | } |
1900 | } |
1901 | |
1902 | namespace { |
1903 | class PrettyStackTraceFunction : public PrettyStackTraceEntry { |
1904 | Function &F; |
1905 | |
1906 | public: |
1907 | PrettyStackTraceFunction(Function &F) : F(F) {} |
1908 | void print(raw_ostream &OS) const override { |
1909 | OS << "While splitting coroutine " ; |
1910 | F.printAsOperand(O&: OS, /*print type*/ PrintType: false, M: F.getParent()); |
1911 | OS << "\n" ; |
1912 | } |
1913 | }; |
1914 | } // namespace |
1915 | |
1916 | static coro::Shape |
1917 | splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, |
1918 | TargetTransformInfo &TTI, bool OptimizeFrame, |
1919 | std::function<bool(Instruction &)> MaterializableCallback) { |
1920 | PrettyStackTraceFunction prettyStackTrace(F); |
1921 | |
1922 | // The suspend-crossing algorithm in buildCoroutineFrame get tripped |
1923 | // up by uses in unreachable blocks, so remove them as a first pass. |
1924 | removeUnreachableBlocks(F); |
1925 | |
1926 | coro::Shape Shape(F, OptimizeFrame); |
1927 | if (!Shape.CoroBegin) |
1928 | return Shape; |
1929 | |
1930 | lowerAwaitSuspends(F, Shape); |
1931 | |
1932 | simplifySuspendPoints(Shape); |
1933 | buildCoroutineFrame(F, Shape, TTI, MaterializableCallback); |
1934 | replaceFrameSizeAndAlignment(Shape); |
1935 | |
1936 | // If there are no suspend points, no split required, just remove |
1937 | // the allocation and deallocation blocks, they are not needed. |
1938 | if (Shape.CoroSuspends.empty()) { |
1939 | handleNoSuspendCoroutine(Shape); |
1940 | } else { |
1941 | switch (Shape.ABI) { |
1942 | case coro::ABI::Switch: |
1943 | SwitchCoroutineSplitter::split(F, Shape, Clones, TTI); |
1944 | break; |
1945 | case coro::ABI::Async: |
1946 | splitAsyncCoroutine(F, Shape, Clones, TTI); |
1947 | break; |
1948 | case coro::ABI::Retcon: |
1949 | case coro::ABI::RetconOnce: |
1950 | splitRetconCoroutine(F, Shape, Clones, TTI); |
1951 | break; |
1952 | } |
1953 | } |
1954 | |
1955 | // Replace all the swifterror operations in the original function. |
1956 | // This invalidates SwiftErrorOps in the Shape. |
1957 | replaceSwiftErrorOps(F, Shape, VMap: nullptr); |
1958 | |
1959 | // Salvage debug intrinsics that point into the coroutine frame in the |
1960 | // original function. The Cloner has already salvaged debug info in the new |
1961 | // coroutine funclets. |
1962 | SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; |
1963 | auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F); |
1964 | for (auto *DDI : DbgInsts) |
1965 | coro::salvageDebugInfo(ArgToAllocaMap, DVI&: *DDI, OptimizeFrame: Shape.OptimizeFrame, |
1966 | IsEntryPoint: false /*UseEntryValue*/); |
1967 | for (DbgVariableRecord *DVR : DbgVariableRecords) |
1968 | coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, OptimizeFrame: Shape.OptimizeFrame, |
1969 | UseEntryValue: false /*UseEntryValue*/); |
1970 | return Shape; |
1971 | } |
1972 | |
1973 | /// Remove calls to llvm.coro.end in the original function. |
1974 | static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { |
1975 | if (Shape.ABI != coro::ABI::Switch) { |
1976 | for (auto *End : Shape.CoroEnds) { |
1977 | replaceCoroEnd(End, Shape, FramePtr: Shape.FramePtr, /*in resume*/ InResume: false, CG: nullptr); |
1978 | } |
1979 | } else { |
1980 | for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { |
1981 | auto &Context = End->getContext(); |
1982 | End->replaceAllUsesWith(V: ConstantInt::getFalse(Context)); |
1983 | End->eraseFromParent(); |
1984 | } |
1985 | } |
1986 | } |
1987 | |
1988 | static void updateCallGraphAfterCoroutineSplit( |
1989 | LazyCallGraph::Node &N, const coro::Shape &Shape, |
1990 | const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, |
1991 | LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, |
1992 | FunctionAnalysisManager &FAM) { |
1993 | |
1994 | if (!Clones.empty()) { |
1995 | switch (Shape.ABI) { |
1996 | case coro::ABI::Switch: |
1997 | // Each clone in the Switch lowering is independent of the other clones. |
1998 | // Let the LazyCallGraph know about each one separately. |
1999 | for (Function *Clone : Clones) |
2000 | CG.addSplitFunction(OriginalFunction&: N.getFunction(), NewFunction&: *Clone); |
2001 | break; |
2002 | case coro::ABI::Async: |
2003 | case coro::ABI::Retcon: |
2004 | case coro::ABI::RetconOnce: |
2005 | // Each clone in the Async/Retcon lowering references of the other clones. |
2006 | // Let the LazyCallGraph know about all of them at once. |
2007 | if (!Clones.empty()) |
2008 | CG.addSplitRefRecursiveFunctions(OriginalFunction&: N.getFunction(), NewFunctions: Clones); |
2009 | break; |
2010 | } |
2011 | |
2012 | // Let the CGSCC infra handle the changes to the original function. |
2013 | updateCGAndAnalysisManagerForCGSCCPass(G&: CG, C, N, AM, UR, FAM); |
2014 | } |
2015 | |
2016 | // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges |
2017 | // to the split functions. |
2018 | postSplitCleanup(F&: N.getFunction()); |
2019 | updateCGAndAnalysisManagerForFunctionPass(G&: CG, C, N, AM, UR, FAM); |
2020 | } |
2021 | |
2022 | /// Replace a call to llvm.coro.prepare.retcon. |
2023 | static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, |
2024 | LazyCallGraph::SCC &C) { |
2025 | auto CastFn = Prepare->getArgOperand(i: 0); // as an i8* |
2026 | auto Fn = CastFn->stripPointerCasts(); // as its original type |
2027 | |
2028 | // Attempt to peephole this pattern: |
2029 | // %0 = bitcast [[TYPE]] @some_function to i8* |
2030 | // %1 = call @llvm.coro.prepare.retcon(i8* %0) |
2031 | // %2 = bitcast %1 to [[TYPE]] |
2032 | // ==> |
2033 | // %2 = @some_function |
2034 | for (Use &U : llvm::make_early_inc_range(Range: Prepare->uses())) { |
2035 | // Look for bitcasts back to the original function type. |
2036 | auto *Cast = dyn_cast<BitCastInst>(Val: U.getUser()); |
2037 | if (!Cast || Cast->getType() != Fn->getType()) |
2038 | continue; |
2039 | |
2040 | // Replace and remove the cast. |
2041 | Cast->replaceAllUsesWith(V: Fn); |
2042 | Cast->eraseFromParent(); |
2043 | } |
2044 | |
2045 | // Replace any remaining uses with the function as an i8*. |
2046 | // This can never directly be a callee, so we don't need to update CG. |
2047 | Prepare->replaceAllUsesWith(V: CastFn); |
2048 | Prepare->eraseFromParent(); |
2049 | |
2050 | // Kill dead bitcasts. |
2051 | while (auto *Cast = dyn_cast<BitCastInst>(Val: CastFn)) { |
2052 | if (!Cast->use_empty()) |
2053 | break; |
2054 | CastFn = Cast->getOperand(i_nocapture: 0); |
2055 | Cast->eraseFromParent(); |
2056 | } |
2057 | } |
2058 | |
2059 | static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, |
2060 | LazyCallGraph::SCC &C) { |
2061 | bool Changed = false; |
2062 | for (Use &P : llvm::make_early_inc_range(Range: PrepareFn->uses())) { |
2063 | // Intrinsics can only be used in calls. |
2064 | auto *Prepare = cast<CallInst>(Val: P.getUser()); |
2065 | replacePrepare(Prepare, CG, C); |
2066 | Changed = true; |
2067 | } |
2068 | |
2069 | return Changed; |
2070 | } |
2071 | |
2072 | static void addPrepareFunction(const Module &M, |
2073 | SmallVectorImpl<Function *> &Fns, |
2074 | StringRef Name) { |
2075 | auto *PrepareFn = M.getFunction(Name); |
2076 | if (PrepareFn && !PrepareFn->use_empty()) |
2077 | Fns.push_back(Elt: PrepareFn); |
2078 | } |
2079 | |
2080 | CoroSplitPass::CoroSplitPass(bool OptimizeFrame) |
2081 | : MaterializableCallback(coro::defaultMaterializable), |
2082 | OptimizeFrame(OptimizeFrame) {} |
2083 | |
2084 | PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, |
2085 | CGSCCAnalysisManager &AM, |
2086 | LazyCallGraph &CG, CGSCCUpdateResult &UR) { |
2087 | // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a |
2088 | // non-zero number of nodes, so we assume that here and grab the first |
2089 | // node's function's module. |
2090 | Module &M = *C.begin()->getFunction().getParent(); |
2091 | auto &FAM = |
2092 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager(); |
2093 | |
2094 | // Check for uses of llvm.coro.prepare.retcon/async. |
2095 | SmallVector<Function *, 2> PrepareFns; |
2096 | addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.retcon" ); |
2097 | addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.async" ); |
2098 | |
2099 | // Find coroutines for processing. |
2100 | SmallVector<LazyCallGraph::Node *> Coroutines; |
2101 | for (LazyCallGraph::Node &N : C) |
2102 | if (N.getFunction().isPresplitCoroutine()) |
2103 | Coroutines.push_back(Elt: &N); |
2104 | |
2105 | if (Coroutines.empty() && PrepareFns.empty()) |
2106 | return PreservedAnalyses::all(); |
2107 | |
2108 | // Split all the coroutines. |
2109 | for (LazyCallGraph::Node *N : Coroutines) { |
2110 | Function &F = N->getFunction(); |
2111 | LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() |
2112 | << "\n" ); |
2113 | F.setSplittedCoroutine(); |
2114 | |
2115 | SmallVector<Function *, 4> Clones; |
2116 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
2117 | const coro::Shape Shape = |
2118 | splitCoroutine(F, Clones, TTI&: FAM.getResult<TargetIRAnalysis>(IR&: F), |
2119 | OptimizeFrame, MaterializableCallback); |
2120 | removeCoroEndsFromRampFunction(Shape); |
2121 | updateCallGraphAfterCoroutineSplit(N&: *N, Shape, Clones, C, CG, AM, UR, FAM); |
2122 | |
2123 | ORE.emit(RemarkBuilder: [&]() { |
2124 | return OptimizationRemark(DEBUG_TYPE, "CoroSplit" , &F) |
2125 | << "Split '" << ore::NV("function" , F.getName()) |
2126 | << "' (frame_size=" << ore::NV("frame_size" , Shape.FrameSize) |
2127 | << ", align=" << ore::NV("align" , Shape.FrameAlign.value()) << ")" ; |
2128 | }); |
2129 | |
2130 | if (!Shape.CoroSuspends.empty()) { |
2131 | // Run the CGSCC pipeline on the original and newly split functions. |
2132 | UR.CWorklist.insert(X: &C); |
2133 | for (Function *Clone : Clones) |
2134 | UR.CWorklist.insert(X: CG.lookupSCC(N&: CG.get(F&: *Clone))); |
2135 | } |
2136 | } |
2137 | |
2138 | for (auto *PrepareFn : PrepareFns) { |
2139 | replaceAllPrepares(PrepareFn, CG, C); |
2140 | } |
2141 | |
2142 | return PreservedAnalyses::none(); |
2143 | } |
2144 | |