1//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass builds the coroutine frame and outlines resume and destroy parts
9// of the coroutine into separate functions.
10//
11// We present a coroutine to an LLVM as an ordinary function with suspension
12// points marked up with intrinsics. We let the optimizer party on the coroutine
13// as a single function for as long as possible. Shortly before the coroutine is
14// eligible to be inlined into its callers, we split up the coroutine into parts
15// corresponding to an initial, resume and destroy invocations of the coroutine,
16// add them to the current SCC and restart the IPO pipeline to optimize the
17// coroutine subfunctions we extracted before proceeding to the caller of the
18// coroutine.
19//===----------------------------------------------------------------------===//
20
21#include "llvm/Transforms/Coroutines/CoroSplit.h"
22#include "CoroCloner.h"
23#include "CoroInternal.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/PriorityWorklist.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SmallPtrSet.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/CFG.h"
33#include "llvm/Analysis/CallGraph.h"
34#include "llvm/Analysis/ConstantFolding.h"
35#include "llvm/Analysis/LazyCallGraph.h"
36#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37#include "llvm/Analysis/TargetTransformInfo.h"
38#include "llvm/BinaryFormat/Dwarf.h"
39#include "llvm/IR/Argument.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/CFG.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DIBuilder.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/DebugInfo.h"
48#include "llvm/IR/DerivedTypes.h"
49#include "llvm/IR/Dominators.h"
50#include "llvm/IR/GlobalValue.h"
51#include "llvm/IR/GlobalVariable.h"
52#include "llvm/IR/InstIterator.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/Value.h"
61#include "llvm/IR/Verifier.h"
62#include "llvm/Support/Casting.h"
63#include "llvm/Support/Debug.h"
64#include "llvm/Support/PrettyStackTrace.h"
65#include "llvm/Support/raw_ostream.h"
66#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
67#include "llvm/Transforms/Scalar.h"
68#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69#include "llvm/Transforms/Utils/CallGraphUpdater.h"
70#include "llvm/Transforms/Utils/Cloning.h"
71#include "llvm/Transforms/Utils/Local.h"
72#include <cassert>
73#include <cstddef>
74#include <cstdint>
75#include <initializer_list>
76#include <iterator>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "coro-split"
81
82// FIXME:
83// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
84// and it is known that other transformations, for example, sanitizers
85// won't lead to incorrect code.
86static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
87 coro::Shape &Shape) {
88 auto Wrapper = CB->getWrapperFunction();
89 auto Awaiter = CB->getAwaiter();
90 auto FramePtr = CB->getFrame();
91
92 Builder.SetInsertPoint(CB);
93
94 CallBase *NewCall = nullptr;
95 // await_suspend has only 2 parameters, awaiter and handle.
96 // Copy parameter attributes from the intrinsic call, but remove the last,
97 // because the last parameter now becomes the function that is being called.
98 AttributeList NewAttributes =
99 CB->getAttributes().removeParamAttributes(C&: CB->getContext(), ArgNo: 2);
100
101 if (auto Invoke = dyn_cast<InvokeInst>(Val: CB)) {
102 auto WrapperInvoke =
103 Builder.CreateInvoke(Callee: Wrapper, NormalDest: Invoke->getNormalDest(),
104 UnwindDest: Invoke->getUnwindDest(), Args: {Awaiter, FramePtr});
105
106 WrapperInvoke->setCallingConv(Invoke->getCallingConv());
107 std::copy(first: Invoke->bundle_op_info_begin(), last: Invoke->bundle_op_info_end(),
108 result: WrapperInvoke->bundle_op_info_begin());
109 WrapperInvoke->setAttributes(NewAttributes);
110 WrapperInvoke->setDebugLoc(Invoke->getDebugLoc());
111 NewCall = WrapperInvoke;
112 } else if (auto Call = dyn_cast<CallInst>(Val: CB)) {
113 auto WrapperCall = Builder.CreateCall(Callee: Wrapper, Args: {Awaiter, FramePtr});
114
115 WrapperCall->setAttributes(NewAttributes);
116 WrapperCall->setDebugLoc(Call->getDebugLoc());
117 NewCall = WrapperCall;
118 } else {
119 llvm_unreachable("Unexpected coro_await_suspend invocation method");
120 }
121
122 if (CB->getCalledFunction()->getIntrinsicID() ==
123 Intrinsic::coro_await_suspend_handle) {
124 // Follow the lowered await_suspend call above with a lowered resume call
125 // to the returned coroutine.
126 if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) {
127 // If the await_suspend call is an invoke, we continue in the next block.
128 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
129 }
130
131 coro::LowererBase LB(*Wrapper->getParent());
132 auto *ResumeAddr = LB.makeSubFnCall(Arg: NewCall, Index: CoroSubFnInst::ResumeIndex,
133 InsertPt: &*Builder.GetInsertPoint());
134
135 LLVMContext &Ctx = Builder.getContext();
136 FunctionType *ResumeTy = FunctionType::get(
137 Result: Type::getVoidTy(C&: Ctx), Params: PointerType::getUnqual(C&: Ctx), isVarArg: false);
138 auto *ResumeCall = Builder.CreateCall(FTy: ResumeTy, Callee: ResumeAddr, Args: {NewCall});
139 ResumeCall->setCallingConv(CallingConv::Fast);
140
141 // We can't insert the 'ret' instruction and adjust the cc until the
142 // function has been split, so remember this for later.
143 Shape.SymmetricTransfers.push_back(Elt: ResumeCall);
144
145 NewCall = ResumeCall;
146 }
147
148 CB->replaceAllUsesWith(V: NewCall);
149 CB->eraseFromParent();
150}
151
152static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
153 IRBuilder<> Builder(F.getContext());
154 for (auto *AWS : Shape.CoroAwaitSuspends)
155 lowerAwaitSuspend(Builder, CB: AWS, Shape);
156}
157
158static void maybeFreeRetconStorage(IRBuilder<> &Builder,
159 const coro::Shape &Shape, Value *FramePtr,
160 CallGraph *CG) {
161 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
162 if (Shape.RetconLowering.IsFrameInlineInStorage)
163 return;
164
165 Shape.emitDealloc(Builder, Ptr: FramePtr, CG);
166}
167
168/// Replace an llvm.coro.end.async.
169/// Will inline the must tail call function call if there is one.
170/// \returns true if cleanup of the coro.end block is needed, false otherwise.
171static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
172 IRBuilder<> Builder(End);
173
174 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(Val: End);
175 if (!EndAsync) {
176 Builder.CreateRetVoid();
177 return true /*needs cleanup of coro.end block*/;
178 }
179
180 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
181 if (!MustTailCallFunc) {
182 Builder.CreateRetVoid();
183 return true /*needs cleanup of coro.end block*/;
184 }
185
186 // Move the must tail call from the predecessor block into the end block.
187 auto *CoroEndBlock = End->getParent();
188 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
189 assert(MustTailCallFuncBlock && "Must have a single predecessor block");
190 auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
191 auto *MustTailCall = cast<CallInst>(Val: &*std::prev(x: It));
192 CoroEndBlock->splice(ToIt: End->getIterator(), FromBB: MustTailCallFuncBlock,
193 FromIt: MustTailCall->getIterator());
194
195 // Insert the return instruction.
196 Builder.SetInsertPoint(End);
197 Builder.CreateRetVoid();
198 InlineFunctionInfo FnInfo;
199
200 // Remove the rest of the block, by splitting it into an unreachable block.
201 auto *BB = End->getParent();
202 BB->splitBasicBlock(I: End);
203 BB->getTerminator()->eraseFromParent();
204
205 auto InlineRes = InlineFunction(CB&: *MustTailCall, IFI&: FnInfo);
206 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
207 (void)InlineRes;
208
209 // We have cleaned up the coro.end block above.
210 return false;
211}
212
213/// Replace a non-unwind call to llvm.coro.end.
214static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
215 const coro::Shape &Shape, Value *FramePtr,
216 bool InRamp, CallGraph *CG) {
217 // Start inserting right before the coro.end.
218 IRBuilder<> Builder(End);
219
220 // Create the return instruction.
221 switch (Shape.ABI) {
222 // The cloned functions in switch-lowering always return void.
223 case coro::ABI::Switch:
224 assert(!cast<CoroEndInst>(End)->hasResults() &&
225 "switch coroutine should not return any values");
226 // coro.end doesn't immediately end the coroutine in the main function
227 // in this lowering, because we need to deallocate the coroutine.
228 if (InRamp)
229 return;
230 Builder.CreateRetVoid();
231 break;
232
233 // In async lowering this returns.
234 case coro::ABI::Async: {
235 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
236 if (!CoroEndBlockNeedsCleanup)
237 return;
238 break;
239 }
240
241 // In unique continuation lowering, the continuations always return void.
242 // But we may have implicitly allocated storage.
243 case coro::ABI::RetconOnce: {
244 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
245 auto *CoroEnd = cast<CoroEndInst>(Val: End);
246 auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
247
248 if (!CoroEnd->hasResults()) {
249 assert(RetTy->isVoidTy());
250 Builder.CreateRetVoid();
251 break;
252 }
253
254 auto *CoroResults = CoroEnd->getResults();
255 unsigned NumReturns = CoroResults->numReturns();
256
257 if (auto *RetStructTy = dyn_cast<StructType>(Val: RetTy)) {
258 assert(RetStructTy->getNumElements() == NumReturns &&
259 "numbers of returns should match resume function singature");
260 Value *ReturnValue = PoisonValue::get(T: RetStructTy);
261 unsigned Idx = 0;
262 for (Value *RetValEl : CoroResults->return_values())
263 ReturnValue = Builder.CreateInsertValue(Agg: ReturnValue, Val: RetValEl, Idxs: Idx++);
264 Builder.CreateRet(V: ReturnValue);
265 } else if (NumReturns == 0) {
266 assert(RetTy->isVoidTy());
267 Builder.CreateRetVoid();
268 } else {
269 assert(NumReturns == 1);
270 Builder.CreateRet(V: *CoroResults->retval_begin());
271 }
272 CoroResults->replaceAllUsesWith(
273 V: ConstantTokenNone::get(Context&: CoroResults->getContext()));
274 CoroResults->eraseFromParent();
275 break;
276 }
277
278 // In non-unique continuation lowering, we signal completion by returning
279 // a null continuation.
280 case coro::ABI::Retcon: {
281 assert(!cast<CoroEndInst>(End)->hasResults() &&
282 "retcon coroutine should not return any values");
283 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
284 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
285 auto RetStructTy = dyn_cast<StructType>(Val: RetTy);
286 PointerType *ContinuationTy =
287 cast<PointerType>(Val: RetStructTy ? RetStructTy->getElementType(N: 0) : RetTy);
288
289 Value *ReturnValue = ConstantPointerNull::get(T: ContinuationTy);
290 if (RetStructTy) {
291 ReturnValue = Builder.CreateInsertValue(Agg: PoisonValue::get(T: RetStructTy),
292 Val: ReturnValue, Idxs: 0);
293 }
294 Builder.CreateRet(V: ReturnValue);
295 break;
296 }
297 }
298
299 // Remove the rest of the block, by splitting it into an unreachable block.
300 auto *BB = End->getParent();
301 BB->splitBasicBlock(I: End);
302 BB->getTerminator()->eraseFromParent();
303}
304
305/// Create a pointer to the switch index field in the coroutine frame.
306static Value *createSwitchIndexPtr(const coro::Shape &Shape,
307 IRBuilder<> &Builder, Value *FramePtr) {
308 auto *Offset = ConstantInt::get(Ty: Type::getInt64Ty(C&: FramePtr->getContext()),
309 V: Shape.SwitchLowering.IndexOffset);
310 return Builder.CreateInBoundsPtrAdd(Ptr: FramePtr, Offset, Name: "index.addr");
311}
312
313// Mark a coroutine as done, which implies that the coroutine is finished and
314// never gets resumed.
315//
316// In resume-switched ABI, the done state is represented by storing zero in
317// ResumeFnAddr.
318//
319// NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
320// pointer to the frame in splitted function is not stored in `Shape`.
321static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
322 Value *FramePtr) {
323 assert(
324 Shape.ABI == coro::ABI::Switch &&
325 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
326 // Resume function pointer is always first
327 auto *NullPtr = ConstantPointerNull::get(T: Shape.getSwitchResumePointerType());
328 Builder.CreateStore(Val: NullPtr, Ptr: FramePtr);
329
330 // If the coroutine don't have unwind coro end, we could omit the store to
331 // the final suspend point since we could infer the coroutine is suspended
332 // at the final suspend point by the nullness of ResumeFnAddr.
333 // However, we can't skip it if the coroutine have unwind coro end. Since
334 // the coroutine reaches unwind coro end is considered suspended at the
335 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
336 // didn't complete yet. We need the IndexVal for the final suspend point
337 // to make the states clear.
338 if (Shape.SwitchLowering.HasUnwindCoroEnd &&
339 Shape.SwitchLowering.HasFinalSuspend) {
340 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
341 "The final suspend should only live in the last position of "
342 "CoroSuspends.");
343 ConstantInt *IndexVal = Shape.getIndex(Value: Shape.CoroSuspends.size() - 1);
344 Value *FinalIndex = createSwitchIndexPtr(Shape, Builder, FramePtr);
345 Builder.CreateStore(Val: IndexVal, Ptr: FinalIndex);
346 }
347}
348
349/// Replace an unwind call to llvm.coro.end.
350static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
351 Value *FramePtr, bool InRamp, CallGraph *CG) {
352 IRBuilder<> Builder(End);
353
354 switch (Shape.ABI) {
355 // In switch-lowering, this does nothing in the main function.
356 case coro::ABI::Switch: {
357 // In C++'s specification, the coroutine should be marked as done
358 // if promise.unhandled_exception() throws. The frontend will
359 // call coro.end(true) along this path.
360 //
361 // FIXME: We should refactor this once there is other language
362 // which uses Switch-Resumed style other than C++.
363 markCoroutineAsDone(Builder, Shape, FramePtr);
364 if (InRamp)
365 return;
366 break;
367 }
368 // In async lowering this does nothing.
369 case coro::ABI::Async:
370 break;
371 // In continuation-lowering, this frees the continuation storage.
372 case coro::ABI::Retcon:
373 case coro::ABI::RetconOnce:
374 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
375 break;
376 }
377
378 // If coro.end has an associated bundle, add cleanupret instruction.
379 if (auto Bundle = End->getOperandBundle(ID: LLVMContext::OB_funclet)) {
380 auto *FromPad = cast<CleanupPadInst>(Val: Bundle->Inputs[0]);
381 auto *CleanupRet = Builder.CreateCleanupRet(CleanupPad: FromPad, UnwindBB: nullptr);
382 End->getParent()->splitBasicBlock(I: End);
383 CleanupRet->getParent()->getTerminator()->eraseFromParent();
384 }
385}
386
387static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
388 Value *FramePtr, bool InRamp, CallGraph *CG) {
389 if (End->isUnwind())
390 replaceUnwindCoroEnd(End, Shape, FramePtr, InRamp, CG);
391 else
392 replaceFallthroughCoroEnd(End, Shape, FramePtr, InRamp, CG);
393 End->eraseFromParent();
394}
395
396// In the resume function, we remove the last case (when coro::Shape is built,
397// the final suspend point (if present) is always the last element of
398// CoroSuspends array) since it is an undefined behavior to resume a coroutine
399// suspended at the final suspend point.
400// In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
401// and the coroutine doesn't suspend at the final suspend point actually (this
402// is possible since the coroutine is considered suspended at the final suspend
403// point if promise.unhandled_exception() exits via an exception), we can
404// remove the last case.
405void coro::BaseCloner::handleFinalSuspend() {
406 assert(Shape.ABI == coro::ABI::Switch &&
407 Shape.SwitchLowering.HasFinalSuspend);
408
409 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
410 return;
411
412 auto *Switch = cast<SwitchInst>(Val&: VMap[Shape.SwitchLowering.ResumeSwitch]);
413 auto FinalCaseIt = std::prev(x: Switch->case_end());
414 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
415 Switch->removeCase(I: FinalCaseIt);
416 if (isSwitchDestroyFunction()) {
417 BasicBlock *OldSwitchBB = Switch->getParent();
418 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(I: Switch, BBName: "Switch");
419 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
420
421 if (NewF->isCoroOnlyDestroyWhenComplete()) {
422 // When the coroutine can only be destroyed when complete, we don't need
423 // to generate code for other cases.
424 Builder.CreateBr(Dest: ResumeBB);
425 } else {
426 // Resume function pointer is always first
427 auto *Load =
428 Builder.CreateLoad(Ty: Shape.getSwitchResumePointerType(), Ptr: NewFramePtr);
429 auto *Cond = Builder.CreateIsNull(Arg: Load);
430 Builder.CreateCondBr(Cond, True: ResumeBB, False: NewSwitchBB);
431 }
432 OldSwitchBB->getTerminator()->eraseFromParent();
433 }
434}
435
436static FunctionType *
437getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
438 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Val: Suspend);
439 auto *StructTy = cast<StructType>(Val: AsyncSuspend->getType());
440 auto &Context = Suspend->getParent()->getParent()->getContext();
441 auto *VoidTy = Type::getVoidTy(C&: Context);
442 return FunctionType::get(Result: VoidTy, Params: StructTy->elements(), isVarArg: false);
443}
444
445static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
446 const Twine &Suffix,
447 Module::iterator InsertBefore,
448 AnyCoroSuspendInst *ActiveSuspend) {
449 Module *M = OrigF.getParent();
450 auto *FnTy = (Shape.ABI != coro::ABI::Async)
451 ? Shape.getResumeFunctionType()
452 : getFunctionTypeFromAsyncSuspend(Suspend: ActiveSuspend);
453
454 Function *NewF =
455 Function::Create(Ty: FnTy, Linkage: GlobalValue::LinkageTypes::InternalLinkage,
456 AddrSpace: OrigF.getAddressSpace(), N: OrigF.getName() + Suffix);
457
458 M->getFunctionList().insert(where: InsertBefore, New: NewF);
459
460 return NewF;
461}
462
463/// Replace uses of the active llvm.coro.suspend.retcon/async call with the
464/// arguments to the continuation function.
465///
466/// This assumes that the builder has a meaningful insertion point.
467void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() {
468 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
469 Shape.ABI == coro::ABI::Async);
470
471 auto NewS = VMap[ActiveSuspend];
472 if (NewS->use_empty())
473 return;
474
475 // Copy out all the continuation arguments after the buffer pointer into
476 // an easily-indexed data structure for convenience.
477 SmallVector<Value *, 8> Args;
478 // The async ABI includes all arguments -- including the first argument.
479 bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
480 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(x: NewF->arg_begin()),
481 E = NewF->arg_end();
482 I != E; ++I)
483 Args.push_back(Elt: &*I);
484
485 // If the suspend returns a single scalar value, we can just do a simple
486 // replacement.
487 if (!isa<StructType>(Val: NewS->getType())) {
488 assert(Args.size() == 1);
489 NewS->replaceAllUsesWith(V: Args.front());
490 return;
491 }
492
493 // Try to peephole extracts of an aggregate return.
494 for (Use &U : llvm::make_early_inc_range(Range: NewS->uses())) {
495 auto *EVI = dyn_cast<ExtractValueInst>(Val: U.getUser());
496 if (!EVI || EVI->getNumIndices() != 1)
497 continue;
498
499 EVI->replaceAllUsesWith(V: Args[EVI->getIndices().front()]);
500 EVI->eraseFromParent();
501 }
502
503 // If we have no remaining uses, we're done.
504 if (NewS->use_empty())
505 return;
506
507 // Otherwise, we need to create an aggregate.
508 Value *Aggr = PoisonValue::get(T: NewS->getType());
509 for (auto [Idx, Arg] : llvm::enumerate(First&: Args))
510 Aggr = Builder.CreateInsertValue(Agg: Aggr, Val: Arg, Idxs: Idx);
511
512 NewS->replaceAllUsesWith(V: Aggr);
513}
514
515void coro::BaseCloner::replaceCoroSuspends() {
516 Value *SuspendResult;
517
518 switch (Shape.ABI) {
519 // In switch lowering, replace coro.suspend with the appropriate value
520 // for the type of function we're extracting.
521 // Replacing coro.suspend with (0) will result in control flow proceeding to
522 // a resume label associated with a suspend point, replacing it with (1) will
523 // result in control flow proceeding to a cleanup label associated with this
524 // suspend point.
525 case coro::ABI::Switch:
526 SuspendResult = Builder.getInt8(C: isSwitchDestroyFunction() ? 1 : 0);
527 break;
528
529 // In async lowering there are no uses of the result.
530 case coro::ABI::Async:
531 return;
532
533 // In returned-continuation lowering, the arguments from earlier
534 // continuations are theoretically arbitrary, and they should have been
535 // spilled.
536 case coro::ABI::RetconOnce:
537 case coro::ABI::Retcon:
538 return;
539 }
540
541 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
542 // The active suspend was handled earlier.
543 if (CS == ActiveSuspend)
544 continue;
545
546 auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[CS]);
547 MappedCS->replaceAllUsesWith(V: SuspendResult);
548 MappedCS->eraseFromParent();
549 }
550}
551
552void coro::BaseCloner::replaceCoroEnds() {
553 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
554 // We use a null call graph because there's no call graph node for
555 // the cloned function yet. We'll just be rebuilding that later.
556 auto *NewCE = cast<AnyCoroEndInst>(Val&: VMap[CE]);
557 replaceCoroEnd(End: NewCE, Shape, FramePtr: NewFramePtr, /*in ramp*/ InRamp: false, CG: nullptr);
558 }
559}
560
561void coro::BaseCloner::replaceCoroIsInRamp() {
562 auto &Ctx = OrigF.getContext();
563 for (auto *II : Shape.CoroIsInRampInsts) {
564 auto *NewII = cast<CoroIsInRampInst>(Val&: VMap[II]);
565 NewII->replaceAllUsesWith(V: ConstantInt::getFalse(Context&: Ctx));
566 NewII->eraseFromParent();
567 }
568}
569
570static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
571 ValueToValueMapTy *VMap) {
572 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
573 return;
574 Value *CachedSlot = nullptr;
575 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
576 if (CachedSlot)
577 return CachedSlot;
578
579 // Check if the function has a swifterror argument.
580 for (auto &Arg : F.args()) {
581 if (Arg.isSwiftError()) {
582 CachedSlot = &Arg;
583 return &Arg;
584 }
585 }
586
587 // Create a swifterror alloca.
588 IRBuilder<> Builder(&F.getEntryBlock(),
589 F.getEntryBlock().getFirstNonPHIOrDbg());
590 auto Alloca = Builder.CreateAlloca(Ty: ValueTy);
591 Alloca->setSwiftError(true);
592
593 CachedSlot = Alloca;
594 return Alloca;
595 };
596
597 for (CallInst *Op : Shape.SwiftErrorOps) {
598 auto MappedOp = VMap ? cast<CallInst>(Val&: (*VMap)[Op]) : Op;
599 IRBuilder<> Builder(MappedOp);
600
601 // If there are no arguments, this is a 'get' operation.
602 Value *MappedResult;
603 if (Op->arg_empty()) {
604 auto ValueTy = Op->getType();
605 auto Slot = getSwiftErrorSlot(ValueTy);
606 MappedResult = Builder.CreateLoad(Ty: ValueTy, Ptr: Slot);
607 } else {
608 assert(Op->arg_size() == 1);
609 auto Value = MappedOp->getArgOperand(i: 0);
610 auto ValueTy = Value->getType();
611 auto Slot = getSwiftErrorSlot(ValueTy);
612 Builder.CreateStore(Val: Value, Ptr: Slot);
613 MappedResult = Slot;
614 }
615
616 MappedOp->replaceAllUsesWith(V: MappedResult);
617 MappedOp->eraseFromParent();
618 }
619
620 // If we're updating the original function, we've invalidated SwiftErrorOps.
621 if (VMap == nullptr) {
622 Shape.SwiftErrorOps.clear();
623 }
624}
625
626/// Returns all debug records in F.
627static SmallVector<DbgVariableRecord *>
628collectDbgVariableRecords(Function &F) {
629 SmallVector<DbgVariableRecord *> DbgVariableRecords;
630 for (auto &I : instructions(F)) {
631 for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange()))
632 DbgVariableRecords.push_back(Elt: &DVR);
633 }
634 return DbgVariableRecords;
635}
636
637void coro::BaseCloner::replaceSwiftErrorOps() {
638 ::replaceSwiftErrorOps(F&: *NewF, Shape, VMap: &VMap);
639}
640
641void coro::BaseCloner::salvageDebugInfo() {
642 auto DbgVariableRecords = collectDbgVariableRecords(F&: *NewF);
643 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
644
645 // Only 64-bit ABIs have a register we can refer to with the entry value.
646 bool UseEntryValue = OrigF.getParent()->getTargetTriple().isArch64Bit();
647 for (DbgVariableRecord *DVR : DbgVariableRecords)
648 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, UseEntryValue);
649
650 // Remove all salvaged dbg.declare intrinsics that became
651 // either unreachable or stale due to the CoroSplit transformation.
652 DominatorTree DomTree(*NewF);
653 auto IsUnreachableBlock = [&](BasicBlock *BB) {
654 return !isPotentiallyReachable(From: &NewF->getEntryBlock(), To: BB, ExclusionSet: nullptr,
655 DT: &DomTree);
656 };
657 auto RemoveOne = [&](DbgVariableRecord *DVI) {
658 if (IsUnreachableBlock(DVI->getParent()))
659 DVI->eraseFromParent();
660 else if (isa_and_nonnull<AllocaInst>(Val: DVI->getVariableLocationOp(OpIdx: 0))) {
661 // Count all non-debuginfo uses in reachable blocks.
662 unsigned Uses = 0;
663 for (auto *User : DVI->getVariableLocationOp(OpIdx: 0)->users())
664 if (auto *I = dyn_cast<Instruction>(Val: User))
665 if (!isa<AllocaInst>(Val: I) && !IsUnreachableBlock(I->getParent()))
666 ++Uses;
667 if (!Uses)
668 DVI->eraseFromParent();
669 }
670 };
671 for_each(Range&: DbgVariableRecords, F: RemoveOne);
672}
673
674void coro::BaseCloner::replaceEntryBlock() {
675 // In the original function, the AllocaSpillBlock is a block immediately
676 // following the allocation of the frame object which defines GEPs for
677 // all the allocas that have been moved into the frame, and it ends by
678 // branching to the original beginning of the coroutine. Make this
679 // the entry block of the cloned function.
680 auto *Entry = cast<BasicBlock>(Val&: VMap[Shape.AllocaSpillBlock]);
681 auto *OldEntry = &NewF->getEntryBlock();
682 Entry->setName("entry" + Suffix);
683 Entry->moveBefore(MovePos: OldEntry);
684 Entry->getTerminator()->eraseFromParent();
685
686 // Clear all predecessors of the new entry block. There should be
687 // exactly one predecessor, which we created when splitting out
688 // AllocaSpillBlock to begin with.
689 assert(Entry->hasOneUse());
690 auto BranchToEntry = cast<UncondBrInst>(Val: Entry->user_back());
691 Builder.SetInsertPoint(BranchToEntry);
692 Builder.CreateUnreachable();
693 BranchToEntry->eraseFromParent();
694
695 // Branch from the entry to the appropriate place.
696 Builder.SetInsertPoint(Entry);
697 switch (Shape.ABI) {
698 case coro::ABI::Switch: {
699 // In switch-lowering, we built a resume-entry block in the original
700 // function. Make the entry block branch to this.
701 auto *SwitchBB =
702 cast<BasicBlock>(Val&: VMap[Shape.SwitchLowering.ResumeEntryBlock]);
703 Builder.CreateBr(Dest: SwitchBB);
704 SwitchBB->moveAfter(MovePos: Entry);
705 break;
706 }
707 case coro::ABI::Async:
708 case coro::ABI::Retcon:
709 case coro::ABI::RetconOnce: {
710 // In continuation ABIs, we want to branch to immediately after the
711 // active suspend point. Earlier phases will have put the suspend in its
712 // own basic block, so just thread our jump directly to its successor.
713 assert((Shape.ABI == coro::ABI::Async &&
714 isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
715 ((Shape.ABI == coro::ABI::Retcon ||
716 Shape.ABI == coro::ABI::RetconOnce) &&
717 isa<CoroSuspendRetconInst>(ActiveSuspend)));
718 auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[ActiveSuspend]);
719 auto Branch = cast<UncondBrInst>(Val: MappedCS->getNextNode());
720 Builder.CreateBr(Dest: Branch->getSuccessor(i: 0));
721 break;
722 }
723 }
724
725 // Any static alloca that's still being used but not reachable from the new
726 // entry needs to be moved to the new entry.
727 Function *F = OldEntry->getParent();
728 DominatorTree DT{*F};
729 for (Instruction &I : llvm::make_early_inc_range(Range: instructions(F))) {
730 auto *Alloca = dyn_cast<AllocaInst>(Val: &I);
731 if (!Alloca || I.use_empty())
732 continue;
733 if (DT.isReachableFromEntry(A: I.getParent()) ||
734 !isa<ConstantInt>(Val: Alloca->getArraySize()))
735 continue;
736 I.moveBefore(BB&: *Entry, I: Entry->getFirstInsertionPt());
737 }
738}
739
740/// Derive the value of the new frame pointer.
741Value *coro::BaseCloner::deriveNewFramePointer() {
742 // Builder should be inserting to the front of the new entry block.
743
744 switch (Shape.ABI) {
745 // In switch-lowering, the argument is the frame pointer.
746 case coro::ABI::Switch:
747 return &*NewF->arg_begin();
748 // In async-lowering, one of the arguments is an async context as determined
749 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
750 // the resume function from the async context projection function associated
751 // with the active suspend. The frame is located as a tail to the async
752 // context header.
753 case coro::ABI::Async: {
754 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
755 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
756 auto *CalleeContext = NewF->getArg(i: ContextIdx);
757 auto *ProjectionFunc =
758 ActiveAsyncSuspend->getAsyncContextProjectionFunction();
759 auto DbgLoc =
760 cast<CoroSuspendAsyncInst>(Val&: VMap[ActiveSuspend])->getDebugLoc();
761 // Calling i8* (i8*)
762 auto *CallerContext = Builder.CreateCall(FTy: ProjectionFunc->getFunctionType(),
763 Callee: ProjectionFunc, Args: CalleeContext);
764 CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
765 CallerContext->setDebugLoc(DbgLoc);
766 // The frame is located after the async_context header.
767 auto &Context = Builder.getContext();
768 auto *FramePtrAddr = Builder.CreateInBoundsPtrAdd(
769 Ptr: CallerContext,
770 Offset: ConstantInt::get(Ty: Type::getInt64Ty(C&: Context),
771 V: Shape.AsyncLowering.FrameOffset),
772 Name: "async.ctx.frameptr");
773 // Inline the projection function.
774 InlineFunctionInfo InlineInfo;
775 auto InlineRes = InlineFunction(CB&: *CallerContext, IFI&: InlineInfo);
776 assert(InlineRes.isSuccess());
777 (void)InlineRes;
778 return FramePtrAddr;
779 }
780 // In continuation-lowering, the argument is the opaque storage.
781 case coro::ABI::Retcon:
782 case coro::ABI::RetconOnce: {
783 Argument *NewStorage = &*NewF->arg_begin();
784 auto FramePtrTy = PointerType::getUnqual(C&: Shape.FramePtr->getContext());
785
786 // If the storage is inline, just bitcast to the storage to the frame type.
787 if (Shape.RetconLowering.IsFrameInlineInStorage)
788 return NewStorage;
789
790 // Otherwise, load the real frame from the opaque storage.
791 return Builder.CreateLoad(Ty: FramePtrTy, Ptr: NewStorage);
792 }
793 }
794 llvm_unreachable("bad ABI");
795}
796
797/// Adjust the scope line of the funclet to the first line number after the
798/// suspend point. This avoids a jump in the line table from the function
799/// declaration (where prologue instructions are attributed to) to the suspend
800/// point.
801/// Only adjust the scope line when the files are the same.
802/// If no candidate line number is found, fallback to the line of ActiveSuspend.
803static void updateScopeLine(Instruction *ActiveSuspend,
804 DISubprogram &SPToUpdate) {
805 if (!ActiveSuspend)
806 return;
807
808 // No subsequent instruction -> fallback to the location of ActiveSuspend.
809 if (!ActiveSuspend->getNextNode()) {
810 if (auto DL = ActiveSuspend->getDebugLoc())
811 if (SPToUpdate.getFile() == DL->getFile())
812 SPToUpdate.setScopeLine(DL->getLine());
813 return;
814 }
815
816 BasicBlock::iterator Successor = ActiveSuspend->getNextNode()->getIterator();
817 // Corosplit splits the BB around ActiveSuspend, so the meaningful
818 // instructions are not in the same BB.
819 // FIXME: remove this hardcoded number of tries.
820 for (unsigned Repeat = 0; Repeat < 2; Repeat++) {
821 auto *Branch = dyn_cast_or_null<UncondBrInst>(Val&: Successor);
822 if (!Branch)
823 break;
824 Successor = Branch->getSuccessor()->getFirstNonPHIOrDbg();
825 }
826
827 // Find the first successor of ActiveSuspend with a non-zero line location.
828 // If that matches the file of ActiveSuspend, use it.
829 BasicBlock *PBB = Successor->getParent();
830 for (; Successor != PBB->end(); Successor = std::next(x: Successor)) {
831 Successor = skipDebugIntrinsics(It: Successor);
832 auto DL = Successor->getDebugLoc();
833 if (!DL || DL.getLine() == 0)
834 continue;
835
836 if (SPToUpdate.getFile() == DL->getFile()) {
837 SPToUpdate.setScopeLine(DL.getLine());
838 return;
839 }
840
841 break;
842 }
843
844 // If the search above failed, fallback to the location of ActiveSuspend.
845 if (auto DL = ActiveSuspend->getDebugLoc())
846 if (SPToUpdate.getFile() == DL->getFile())
847 SPToUpdate.setScopeLine(DL->getLine());
848}
849
850static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
851 unsigned ParamIndex, uint64_t Size,
852 Align Alignment, bool NoAlias) {
853 AttrBuilder ParamAttrs(Context);
854 ParamAttrs.addAttribute(Val: Attribute::NonNull);
855 ParamAttrs.addAttribute(Val: Attribute::NoUndef);
856
857 if (NoAlias)
858 ParamAttrs.addAttribute(Val: Attribute::NoAlias);
859
860 ParamAttrs.addAlignmentAttr(Align: Alignment);
861 ParamAttrs.addDereferenceableAttr(Bytes: Size);
862 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
863}
864
865static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
866 unsigned ParamIndex) {
867 AttrBuilder ParamAttrs(Context);
868 ParamAttrs.addAttribute(Val: Attribute::SwiftAsync);
869 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
870}
871
872static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
873 unsigned ParamIndex) {
874 AttrBuilder ParamAttrs(Context);
875 ParamAttrs.addAttribute(Val: Attribute::SwiftSelf);
876 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
877}
878
879/// Clone the body of the original function into a resume function of
880/// some sort.
881void coro::BaseCloner::create() {
882 assert(NewF);
883
884 // Replace all args with dummy instructions. If an argument is the old frame
885 // pointer, the dummy will be replaced by the new frame pointer once it is
886 // computed below. Uses of all other arguments should have already been
887 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
888 // frame.
889 SmallVector<Instruction *> DummyArgs;
890 for (Argument &A : OrigF.args()) {
891 DummyArgs.push_back(Elt: new FreezeInst(PoisonValue::get(T: A.getType())));
892 VMap[&A] = DummyArgs.back();
893 }
894
895 SmallVector<ReturnInst *, 4> Returns;
896
897 // Ignore attempts to change certain attributes of the function.
898 // TODO: maybe there should be a way to suppress this during cloning?
899 auto savedVisibility = NewF->getVisibility();
900 auto savedUnnamedAddr = NewF->getUnnamedAddr();
901 auto savedDLLStorageClass = NewF->getDLLStorageClass();
902
903 // NewF's linkage (which CloneFunctionInto does *not* change) might not
904 // be compatible with the visibility of OrigF (which it *does* change),
905 // so protect against that.
906 auto savedLinkage = NewF->getLinkage();
907 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
908
909 CloneFunctionInto(NewFunc: NewF, OldFunc: &OrigF, VMap,
910 Changes: CloneFunctionChangeType::LocalChangesOnly, Returns);
911
912 auto &Context = NewF->getContext();
913
914 if (DISubprogram *SP = NewF->getSubprogram()) {
915 assert(SP != OrigF.getSubprogram() && SP->isDistinct());
916 updateScopeLine(ActiveSuspend, SPToUpdate&: *SP);
917
918 // Update the linkage name and the function name to reflect the modified
919 // name.
920 MDString *NewLinkageName = MDString::get(Context, Str: NewF->getName());
921 SP->replaceLinkageName(LN: NewLinkageName);
922 if (DISubprogram *Decl = SP->getDeclaration()) {
923 TempDISubprogram NewDecl = Decl->clone();
924 NewDecl->replaceLinkageName(LN: NewLinkageName);
925 SP->replaceDeclaration(Decl: MDNode::replaceWithUniqued(N: std::move(NewDecl)));
926 }
927 }
928
929 NewF->setLinkage(savedLinkage);
930 NewF->setVisibility(savedVisibility);
931 NewF->setUnnamedAddr(savedUnnamedAddr);
932 NewF->setDLLStorageClass(savedDLLStorageClass);
933 // The function sanitizer metadata needs to match the signature of the
934 // function it is being attached to. However this does not hold for split
935 // functions here. Thus remove the metadata for split functions.
936 if (Shape.ABI == coro::ABI::Switch &&
937 NewF->hasMetadata(KindID: LLVMContext::MD_func_sanitize))
938 NewF->eraseMetadata(KindID: LLVMContext::MD_func_sanitize);
939
940 // Replace the attributes of the new function:
941 auto OrigAttrs = NewF->getAttributes();
942 auto NewAttrs = AttributeList();
943
944 switch (Shape.ABI) {
945 case coro::ABI::Switch:
946 // Bootstrap attributes by copying function attributes from the
947 // original function. This should include optimization settings and so on.
948 NewAttrs = NewAttrs.addFnAttributes(
949 C&: Context, B: AttrBuilder(Context, OrigAttrs.getFnAttrs()));
950
951 addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0, Size: Shape.FrameSize,
952 Alignment: Shape.FrameAlign, /*NoAlias=*/false);
953 break;
954 case coro::ABI::Async: {
955 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
956 if (OrigF.hasParamAttribute(ArgNo: Shape.AsyncLowering.ContextArgNo,
957 Kind: Attribute::SwiftAsync)) {
958 uint32_t ArgAttributeIndices =
959 ActiveAsyncSuspend->getStorageArgumentIndex();
960 auto ContextArgIndex = ArgAttributeIndices & 0xff;
961 addAsyncContextAttrs(Attrs&: NewAttrs, Context, ParamIndex: ContextArgIndex);
962
963 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
964 // `swiftself`.
965 auto SwiftSelfIndex = ArgAttributeIndices >> 8;
966 if (SwiftSelfIndex)
967 addSwiftSelfAttrs(Attrs&: NewAttrs, Context, ParamIndex: SwiftSelfIndex);
968 }
969
970 // Transfer the original function's attributes.
971 auto FnAttrs = OrigF.getAttributes().getFnAttrs();
972 NewAttrs = NewAttrs.addFnAttributes(C&: Context, B: AttrBuilder(Context, FnAttrs));
973 break;
974 }
975 case coro::ABI::Retcon:
976 case coro::ABI::RetconOnce:
977 // If we have a continuation prototype, just use its attributes,
978 // full-stop.
979 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
980
981 /// FIXME: Is it really good to add the NoAlias attribute?
982 addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0,
983 Size: Shape.getRetconCoroId()->getStorageSize(),
984 Alignment: Shape.getRetconCoroId()->getStorageAlignment(),
985 /*NoAlias=*/true);
986
987 break;
988 }
989
990 switch (Shape.ABI) {
991 // In these ABIs, the cloned functions always return 'void', and the
992 // existing return sites are meaningless. Note that for unique
993 // continuations, this includes the returns associated with suspends;
994 // this is fine because we can't suspend twice.
995 case coro::ABI::Switch:
996 case coro::ABI::RetconOnce:
997 // Remove old returns.
998 for (ReturnInst *Return : Returns)
999 changeToUnreachable(I: Return);
1000 break;
1001
1002 // With multi-suspend continuations, we'll already have eliminated the
1003 // original returns and inserted returns before all the suspend points,
1004 // so we want to leave any returns in place.
1005 case coro::ABI::Retcon:
1006 break;
1007 // Async lowering will insert musttail call functions at all suspend points
1008 // followed by a return.
1009 // Don't change returns to unreachable because that will trip up the verifier.
1010 // These returns should be unreachable from the clone.
1011 case coro::ABI::Async:
1012 break;
1013 }
1014
1015 NewF->setAttributes(NewAttrs);
1016 NewF->setCallingConv(Shape.getResumeFunctionCC());
1017
1018 // Set up the new entry block.
1019 replaceEntryBlock();
1020
1021 // Turn symmetric transfers into musttail calls.
1022 for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
1023 ResumeCall = cast<CallInst>(Val&: VMap[ResumeCall]);
1024 if (TTI.supportsTailCallFor(CB: ResumeCall)) {
1025 // FIXME: Could we support symmetric transfer effectively without
1026 // musttail?
1027 ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
1028 }
1029
1030 // Put a 'ret void' after the call, and split any remaining instructions to
1031 // an unreachable block.
1032 BasicBlock *BB = ResumeCall->getParent();
1033 BB->splitBasicBlock(I: ResumeCall->getNextNode());
1034 Builder.SetInsertPoint(BB->getTerminator());
1035 Builder.CreateRetVoid();
1036 BB->getTerminator()->eraseFromParent();
1037 }
1038
1039 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1040 NewFramePtr = deriveNewFramePointer();
1041
1042 // Remap frame pointer.
1043 Value *OldFramePtr = VMap[Shape.FramePtr];
1044 NewFramePtr->takeName(V: OldFramePtr);
1045 OldFramePtr->replaceAllUsesWith(V: NewFramePtr);
1046
1047 // Remap vFrame pointer.
1048 auto *NewVFrame = Builder.CreateBitCast(
1049 V: NewFramePtr, DestTy: PointerType::getUnqual(C&: Builder.getContext()), Name: "vFrame");
1050 Value *OldVFrame = cast<Value>(Val&: VMap[Shape.CoroBegin]);
1051 if (OldVFrame != NewVFrame)
1052 OldVFrame->replaceAllUsesWith(V: NewVFrame);
1053
1054 // All uses of the arguments should have been resolved by this point,
1055 // so we can safely remove the dummy values.
1056 for (Instruction *DummyArg : DummyArgs) {
1057 DummyArg->replaceAllUsesWith(V: PoisonValue::get(T: DummyArg->getType()));
1058 DummyArg->deleteValue();
1059 }
1060
1061 switch (Shape.ABI) {
1062 case coro::ABI::Switch:
1063 // Rewrite final suspend handling as it is not done via switch (allows to
1064 // remove final case from the switch, since it is undefined behavior to
1065 // resume the coroutine suspended at the final suspend point.
1066 if (Shape.SwitchLowering.HasFinalSuspend)
1067 handleFinalSuspend();
1068 break;
1069 case coro::ABI::Async:
1070 case coro::ABI::Retcon:
1071 case coro::ABI::RetconOnce:
1072 // Replace uses of the active suspend with the corresponding
1073 // continuation-function arguments.
1074 assert(ActiveSuspend != nullptr &&
1075 "no active suspend when lowering a continuation-style coroutine");
1076 replaceRetconOrAsyncSuspendUses();
1077 break;
1078 }
1079
1080 // Handle suspends.
1081 replaceCoroSuspends();
1082
1083 // Handle swifterror.
1084 replaceSwiftErrorOps();
1085
1086 // Remove coro.end intrinsics.
1087 replaceCoroEnds();
1088
1089 replaceCoroIsInRamp();
1090
1091 // Salvage debug info that points into the coroutine frame.
1092 salvageDebugInfo();
1093}
1094
1095void coro::SwitchCloner::create() {
1096 // Create a new function matching the original type
1097 NewF = createCloneDeclaration(OrigF, Shape, Suffix, InsertBefore: OrigF.getParent()->end(),
1098 ActiveSuspend);
1099
1100 // Clone the function
1101 coro::BaseCloner::create();
1102
1103 // Replacing coro.free with 'null' in cleanup to suppress deallocation code.
1104 if (FKind == coro::CloneKind::SwitchCleanup)
1105 elideCoroFree(FramePtr: NewFramePtr);
1106}
1107
1108static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1109 assert(Shape.ABI == coro::ABI::Async);
1110
1111 auto *FuncPtrStruct = cast<ConstantStruct>(
1112 Val: Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1113 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(i_nocapture: 0);
1114 auto *OrigContextSize = FuncPtrStruct->getOperand(i_nocapture: 1);
1115 auto *NewContextSize = ConstantInt::get(Ty: OrigContextSize->getType(),
1116 V: Shape.AsyncLowering.ContextSize);
1117 auto *NewFuncPtrStruct = ConstantStruct::get(
1118 T: FuncPtrStruct->getType(), Vs: OrigRelativeFunOffset, Vs: NewContextSize);
1119
1120 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1121}
1122
1123static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1124 if (Shape.ABI == coro::ABI::Async)
1125 updateAsyncFuncPointerContextSize(Shape);
1126
1127 for (CoroAlignInst *CA : Shape.CoroAligns) {
1128 CA->replaceAllUsesWith(
1129 V: ConstantInt::get(Ty: CA->getType(), V: Shape.FrameAlign.value()));
1130 CA->eraseFromParent();
1131 }
1132
1133 if (Shape.CoroSizes.empty())
1134 return;
1135
1136 // In the same function all coro.sizes should have the same result type.
1137 auto *SizeIntrin = Shape.CoroSizes.back();
1138 auto *SizeConstant = ConstantInt::get(Ty: SizeIntrin->getType(),
1139 V: TypeSize::getFixed(ExactSize: Shape.FrameSize));
1140
1141 for (CoroSizeInst *CS : Shape.CoroSizes) {
1142 CS->replaceAllUsesWith(V: SizeConstant);
1143 CS->eraseFromParent();
1144 }
1145}
1146
1147static void postSplitCleanup(Function &F) {
1148 removeUnreachableBlocks(F);
1149
1150#ifndef NDEBUG
1151 // For now, we do a mandatory verification step because we don't
1152 // entirely trust this pass. Note that we don't want to add a verifier
1153 // pass to FPM below because it will also verify all the global data.
1154 if (verifyFunction(F, &errs()))
1155 report_fatal_error("Broken function");
1156#endif
1157}
1158
1159// Coroutine has no suspend points. Remove heap allocation for the coroutine
1160// frame if possible.
1161static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1162 auto *CoroBegin = Shape.CoroBegin;
1163 switch (Shape.ABI) {
1164 case coro::ABI::Switch: {
1165 if (auto *AllocInst = Shape.getSwitchCoroId()->getCoroAlloc()) {
1166 coro::elideCoroFree(FramePtr: CoroBegin);
1167
1168 IRBuilder<> Builder(AllocInst);
1169 // Create an alloca for a byte array of the frame size
1170 auto *FrameTy = ArrayType::get(ElementType: Type::getInt8Ty(C&: Builder.getContext()),
1171 NumElements: Shape.FrameSize);
1172 auto *Frame = Builder.CreateAlloca(
1173 Ty: FrameTy, ArraySize: nullptr, Name: AllocInst->getFunction()->getName() + ".Frame");
1174 Frame->setAlignment(Shape.FrameAlign);
1175 AllocInst->replaceAllUsesWith(V: Builder.getFalse());
1176 AllocInst->eraseFromParent();
1177 CoroBegin->replaceAllUsesWith(V: Frame);
1178 } else {
1179 CoroBegin->replaceAllUsesWith(V: CoroBegin->getMem());
1180 }
1181
1182 break;
1183 }
1184 case coro::ABI::Async:
1185 case coro::ABI::Retcon:
1186 case coro::ABI::RetconOnce:
1187 CoroBegin->replaceAllUsesWith(V: PoisonValue::get(T: CoroBegin->getType()));
1188 break;
1189 }
1190
1191 CoroBegin->eraseFromParent();
1192 Shape.CoroBegin = nullptr;
1193}
1194
1195// SimplifySuspendPoint needs to check that there is no calls between
1196// coro_save and coro_suspend, since any of the calls may potentially resume
1197// the coroutine and if that is the case we cannot eliminate the suspend point.
1198static bool hasCallsInBlockBetween(iterator_range<BasicBlock::iterator> R) {
1199 for (Instruction &I : R) {
1200 // Assume that no intrinsic can resume the coroutine.
1201 if (isa<IntrinsicInst>(Val: I))
1202 continue;
1203
1204 if (isa<CallBase>(Val: I))
1205 return true;
1206 }
1207 return false;
1208}
1209
1210static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1211 SmallPtrSet<BasicBlock *, 8> Set;
1212 SmallVector<BasicBlock *, 8> Worklist;
1213
1214 Set.insert(Ptr: SaveBB);
1215 Worklist.push_back(Elt: ResDesBB);
1216
1217 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1218 // returns a token consumed by suspend instruction, all blocks in between
1219 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1220 while (!Worklist.empty()) {
1221 auto *BB = Worklist.pop_back_val();
1222 Set.insert(Ptr: BB);
1223 for (auto *Pred : predecessors(BB))
1224 if (!Set.contains(Ptr: Pred))
1225 Worklist.push_back(Elt: Pred);
1226 }
1227
1228 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1229 Set.erase(Ptr: SaveBB);
1230 Set.erase(Ptr: ResDesBB);
1231
1232 for (auto *BB : Set)
1233 if (hasCallsInBlockBetween(R: {BB->getFirstNonPHIIt(), BB->end()}))
1234 return true;
1235
1236 return false;
1237}
1238
1239static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1240 auto *SaveBB = Save->getParent();
1241 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1242 BasicBlock::iterator SaveIt = Save->getIterator();
1243 BasicBlock::iterator ResumeOrDestroyIt = ResumeOrDestroy->getIterator();
1244
1245 if (SaveBB == ResumeOrDestroyBB)
1246 return hasCallsInBlockBetween(R: {std::next(x: SaveIt), ResumeOrDestroyIt});
1247
1248 // Any calls from Save to the end of the block?
1249 if (hasCallsInBlockBetween(R: {std::next(x: SaveIt), SaveBB->end()}))
1250 return true;
1251
1252 // Any calls from begging of the block up to ResumeOrDestroy?
1253 if (hasCallsInBlockBetween(
1254 R: {ResumeOrDestroyBB->getFirstNonPHIIt(), ResumeOrDestroyIt}))
1255 return true;
1256
1257 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1258 if (hasCallsInBlocksBetween(SaveBB, ResDesBB: ResumeOrDestroyBB))
1259 return true;
1260
1261 return false;
1262}
1263
1264// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1265// suspend point and replace it with nornal control flow.
1266static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1267 CoroBeginInst *CoroBegin) {
1268 Instruction *Prev = Suspend->getPrevNode();
1269 if (!Prev) {
1270 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1271 if (!Pred)
1272 return false;
1273 Prev = Pred->getTerminator();
1274 }
1275
1276 CallBase *CB = dyn_cast<CallBase>(Val: Prev);
1277 if (!CB)
1278 return false;
1279
1280 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1281
1282 // See if the callsite is for resumption or destruction of the coroutine.
1283 auto *SubFn = dyn_cast<CoroSubFnInst>(Val: Callee);
1284 if (!SubFn)
1285 return false;
1286
1287 // Does not refer to the current coroutine, we cannot do anything with it.
1288 if (SubFn->getFrame() != CoroBegin)
1289 return false;
1290
1291 // See if the transformation is safe. Specifically, see if there are any
1292 // calls in between Save and CallInstr. They can potenitally resume the
1293 // coroutine rendering this optimization unsafe.
1294 auto *Save = Suspend->getCoroSave();
1295 if (hasCallsBetween(Save, ResumeOrDestroy: CB))
1296 return false;
1297
1298 // Replace llvm.coro.suspend with the value that results in resumption over
1299 // the resume or cleanup path.
1300 Suspend->replaceAllUsesWith(V: SubFn->getRawIndex());
1301 Suspend->eraseFromParent();
1302 Save->eraseFromParent();
1303
1304 // No longer need a call to coro.resume or coro.destroy.
1305 if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) {
1306 UncondBrInst::Create(Target: Invoke->getNormalDest(), InsertBefore: Invoke->getIterator());
1307 }
1308
1309 // Grab the CalledValue from CB before erasing the CallInstr.
1310 auto *CalledValue = CB->getCalledOperand();
1311 CB->eraseFromParent();
1312
1313 // If no more users remove it. Usually it is a bitcast of SubFn.
1314 if (CalledValue != SubFn && CalledValue->user_empty())
1315 if (auto *I = dyn_cast<Instruction>(Val: CalledValue))
1316 I->eraseFromParent();
1317
1318 // Now we are good to remove SubFn.
1319 if (SubFn->user_empty())
1320 SubFn->eraseFromParent();
1321
1322 return true;
1323}
1324
1325// Remove suspend points that are simplified.
1326static void simplifySuspendPoints(coro::Shape &Shape) {
1327 // Currently, the only simplification we do is switch-lowering-specific.
1328 if (Shape.ABI != coro::ABI::Switch)
1329 return;
1330
1331 auto &S = Shape.CoroSuspends;
1332 size_t I = 0, N = S.size();
1333 if (N == 0)
1334 return;
1335
1336 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1337 while (true) {
1338 auto SI = cast<CoroSuspendInst>(Val: S[I]);
1339 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1340 // to resume a coroutine suspended at the final suspend point.
1341 if (!SI->isFinal() && simplifySuspendPoint(Suspend: SI, CoroBegin: Shape.CoroBegin)) {
1342 if (--N == I)
1343 break;
1344
1345 std::swap(a&: S[I], b&: S[N]);
1346
1347 if (cast<CoroSuspendInst>(Val: S[I])->isFinal()) {
1348 assert(Shape.SwitchLowering.HasFinalSuspend);
1349 ChangedFinalIndex = I;
1350 }
1351
1352 continue;
1353 }
1354 if (++I == N)
1355 break;
1356 }
1357 S.resize(N);
1358
1359 // Maintain final.suspend in case final suspend was swapped.
1360 // Due to we requrie the final suspend to be the last element of CoroSuspends.
1361 if (ChangedFinalIndex < N) {
1362 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1363 std::swap(a&: S[ChangedFinalIndex], b&: S.back());
1364 }
1365}
1366
1367namespace {
1368
1369struct SwitchCoroutineSplitter {
1370 static void split(Function &F, coro::Shape &Shape,
1371 SmallVectorImpl<Function *> &Clones,
1372 TargetTransformInfo &TTI) {
1373 assert(Shape.ABI == coro::ABI::Switch);
1374
1375 // Create a resume clone by cloning the body of the original function,
1376 // setting new entry block and replacing coro.suspend an appropriate value
1377 // to force resume or cleanup pass for every suspend point.
1378 createResumeEntryBlock(F, Shape);
1379 auto *ResumeClone = coro::SwitchCloner::createClone(
1380 OrigF&: F, Suffix: ".resume", Shape, FKind: coro::CloneKind::SwitchResume, TTI);
1381 auto *DestroyClone = coro::SwitchCloner::createClone(
1382 OrigF&: F, Suffix: ".destroy", Shape, FKind: coro::CloneKind::SwitchUnwind, TTI);
1383 auto *CleanupClone = coro::SwitchCloner::createClone(
1384 OrigF&: F, Suffix: ".cleanup", Shape, FKind: coro::CloneKind::SwitchCleanup, TTI);
1385
1386 postSplitCleanup(F&: *ResumeClone);
1387 postSplitCleanup(F&: *DestroyClone);
1388 postSplitCleanup(F&: *CleanupClone);
1389
1390 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1391 updateCoroFrame(Shape, ResumeFn: ResumeClone, DestroyFn: DestroyClone, CleanupFn: CleanupClone);
1392
1393 assert(Clones.empty());
1394 Clones.push_back(Elt: ResumeClone);
1395 Clones.push_back(Elt: DestroyClone);
1396 Clones.push_back(Elt: CleanupClone);
1397
1398 // Create a constant array referring to resume/destroy/clone functions
1399 // pointed by the last argument of @llvm.coro.info, so that CoroElide pass
1400 // can determined correct function to call.
1401 setCoroInfo(F, Shape, Fns: Clones);
1402 }
1403
1404 // Create a variant of ramp function that does not perform heap allocation
1405 // for a switch ABI coroutine.
1406 //
1407 // The newly split `.noalloc` ramp function has the following differences:
1408 // - Has one additional frame pointer parameter in lieu of dynamic
1409 // allocation.
1410 // - Suppressed allocations by replacing coro.alloc and coro.free.
1411 static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
1412 SmallVectorImpl<Function *> &Clones) {
1413 assert(Shape.ABI == coro::ABI::Switch);
1414 auto *OrigFnTy = F.getFunctionType();
1415 auto OldParams = OrigFnTy->params();
1416
1417 SmallVector<Type *> NewParams;
1418 NewParams.reserve(N: OldParams.size() + 1);
1419 NewParams.append(in_start: OldParams.begin(), in_end: OldParams.end());
1420 NewParams.push_back(Elt: PointerType::getUnqual(C&: Shape.FramePtr->getContext()));
1421
1422 auto *NewFnTy = FunctionType::get(Result: OrigFnTy->getReturnType(), Params: NewParams,
1423 isVarArg: OrigFnTy->isVarArg());
1424 Function *NoAllocF = Function::Create(
1425 Ty: NewFnTy, Linkage: F.getLinkage(), AddrSpace: F.getAddressSpace(), N: F.getName() + ".noalloc");
1426
1427 ValueToValueMapTy VMap;
1428 unsigned int Idx = 0;
1429 for (const auto &I : F.args()) {
1430 VMap[&I] = NoAllocF->getArg(i: Idx++);
1431 }
1432 // We just appended the frame pointer as the last argument of the new
1433 // function.
1434 auto FrameIdx = NoAllocF->arg_size() - 1;
1435 SmallVector<ReturnInst *, 4> Returns;
1436 CloneFunctionInto(NewFunc: NoAllocF, OldFunc: &F, VMap,
1437 Changes: CloneFunctionChangeType::LocalChangesOnly, Returns);
1438
1439 if (Shape.CoroBegin) {
1440 auto *NewCoroBegin =
1441 cast_if_present<CoroBeginInst>(Val&: VMap[Shape.CoroBegin]);
1442 coro::elideCoroFree(FramePtr: NewCoroBegin);
1443 coro::suppressCoroAllocs(CoroId: cast<CoroIdInst>(Val: NewCoroBegin->getId()));
1444 NewCoroBegin->replaceAllUsesWith(V: NoAllocF->getArg(i: FrameIdx));
1445 NewCoroBegin->eraseFromParent();
1446 }
1447
1448 Module *M = F.getParent();
1449 M->getFunctionList().insert(where: M->end(), New: NoAllocF);
1450
1451 removeUnreachableBlocks(F&: *NoAllocF);
1452 auto NewAttrs = NoAllocF->getAttributes();
1453 // When we elide allocation, we read these attributes to determine the
1454 // frame size and alignment.
1455 addFramePointerAttrs(Attrs&: NewAttrs, Context&: NoAllocF->getContext(), ParamIndex: FrameIdx,
1456 Size: Shape.FrameSize, Alignment: Shape.FrameAlign,
1457 /*NoAlias=*/false);
1458
1459 NoAllocF->setAttributes(NewAttrs);
1460
1461 Clones.push_back(Elt: NoAllocF);
1462 // Reset the original function's coro info, make the new noalloc variant
1463 // connected to the original ramp function.
1464 setCoroInfo(F, Shape, Fns: Clones);
1465 // After copying, set the linkage to internal linkage. Original function
1466 // may have different linkage, but optimization dependent on this function
1467 // generally relies on LTO.
1468 NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage);
1469 return NoAllocF;
1470 }
1471
1472private:
1473 // Create an entry block for a resume function with a switch that will jump to
1474 // suspend points.
1475 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
1476 LLVMContext &C = F.getContext();
1477
1478 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
1479 DISubprogram *DIS = F.getSubprogram();
1480 // If there is no DISubprogram for F, it implies the function is compiled
1481 // without debug info. So we also don't generate debug info for the
1482 // suspension points.
1483 bool AddDebugLabels = DIS && DIS->getUnit() &&
1484 (DIS->getUnit()->getEmissionKind() ==
1485 DICompileUnit::DebugEmissionKind::FullDebug);
1486
1487 // resume.entry:
1488 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32
1489 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label
1490 // %unreachable [
1491 // i32 0, label %resume.0
1492 // i32 1, label %resume.1
1493 // ...
1494 // ]
1495
1496 auto *NewEntry = BasicBlock::Create(Context&: C, Name: "resume.entry", Parent: &F);
1497 auto *UnreachBB = BasicBlock::Create(Context&: C, Name: "unreachable", Parent: &F);
1498
1499 IRBuilder<> Builder(NewEntry);
1500 auto *FramePtr = Shape.FramePtr;
1501 Value *GepIndex = createSwitchIndexPtr(Shape, Builder, FramePtr);
1502 auto *Index = Builder.CreateLoad(Ty: Shape.getIndexType(), Ptr: GepIndex, Name: "index");
1503 auto *Switch =
1504 Builder.CreateSwitch(V: Index, Dest: UnreachBB, NumCases: Shape.CoroSuspends.size());
1505 Shape.SwitchLowering.ResumeSwitch = Switch;
1506
1507 // Split all coro.suspend calls
1508 size_t SuspendIndex = 0;
1509 for (auto *AnyS : Shape.CoroSuspends) {
1510 auto *S = cast<CoroSuspendInst>(Val: AnyS);
1511 ConstantInt *IndexVal = Shape.getIndex(Value: SuspendIndex);
1512
1513 // Replace CoroSave with a store to Index:
1514 // %index.addr = getelementptr %f.frame... (index field number)
1515 // store i32 %IndexVal, i32* %index.addr1
1516 auto *Save = S->getCoroSave();
1517 Builder.SetInsertPoint(Save);
1518 if (S->isFinal()) {
1519 // The coroutine should be marked done if it reaches the final suspend
1520 // point.
1521 markCoroutineAsDone(Builder, Shape, FramePtr);
1522 } else {
1523 Value *GepIndex = createSwitchIndexPtr(Shape, Builder, FramePtr);
1524 Builder.CreateStore(Val: IndexVal, Ptr: GepIndex);
1525 }
1526
1527 Save->replaceAllUsesWith(V: ConstantTokenNone::get(Context&: C));
1528 Save->eraseFromParent();
1529
1530 // Split block before and after coro.suspend and add a jump from an entry
1531 // switch:
1532 //
1533 // whateverBB:
1534 // whatever
1535 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
1536 // switch i8 %0, label %suspend[i8 0, label %resume
1537 // i8 1, label %cleanup]
1538 // becomes:
1539 //
1540 // whateverBB:
1541 // whatever
1542 // br label %resume.0.landing
1543 //
1544 // resume.0: ; <--- jump from the switch in the resume.entry
1545 // #dbg_label(...) ; <--- artificial label for debuggers
1546 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
1547 // br label %resume.0.landing
1548 //
1549 // resume.0.landing:
1550 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
1551 // switch i8 % 1, label %suspend [i8 0, label %resume
1552 // i8 1, label %cleanup]
1553
1554 auto *SuspendBB = S->getParent();
1555 auto *ResumeBB =
1556 SuspendBB->splitBasicBlock(I: S, BBName: "resume." + Twine(SuspendIndex));
1557 auto *LandingBB = ResumeBB->splitBasicBlock(
1558 I: S->getNextNode(), BBName: ResumeBB->getName() + Twine(".landing"));
1559 Switch->addCase(OnVal: IndexVal, Dest: ResumeBB);
1560
1561 cast<UncondBrInst>(Val: SuspendBB->getTerminator())->setSuccessor(LandingBB);
1562 auto *PN = PHINode::Create(Ty: Builder.getInt8Ty(), NumReservedValues: 2, NameStr: "");
1563 PN->insertBefore(InsertPos: LandingBB->begin());
1564 S->replaceAllUsesWith(V: PN);
1565 PN->addIncoming(V: Builder.getInt8(C: -1), BB: SuspendBB);
1566 PN->addIncoming(V: S, BB: ResumeBB);
1567
1568 if (AddDebugLabels) {
1569 if (DebugLoc SuspendLoc = S->getDebugLoc()) {
1570 std::string LabelName =
1571 ("__coro_resume_" + Twine(SuspendIndex)).str();
1572 // Take the "inlined at" location recursively, if present. This is
1573 // mandatory as the DILabel insertion checks that the scopes of label
1574 // and the attached location match. This is not the case when the
1575 // suspend location has been inlined due to pointing to the original
1576 // scope.
1577 DILocation *DILoc = SuspendLoc;
1578 while (DILocation *InlinedAt = DILoc->getInlinedAt())
1579 DILoc = InlinedAt;
1580
1581 DILabel *ResumeLabel =
1582 DBuilder.createLabel(Scope: DIS, Name: LabelName, File: DILoc->getFile(),
1583 LineNo: SuspendLoc.getLine(), Column: SuspendLoc.getCol(),
1584 /*IsArtificial=*/true,
1585 /*CoroSuspendIdx=*/SuspendIndex,
1586 /*AlwaysPreserve=*/false);
1587 DBuilder.insertLabel(LabelInfo: ResumeLabel, DL: DILoc, InsertPt: ResumeBB->begin());
1588 }
1589 }
1590
1591 ++SuspendIndex;
1592 }
1593
1594 Builder.SetInsertPoint(UnreachBB);
1595 Builder.CreateUnreachable();
1596 DBuilder.finalize();
1597
1598 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
1599 }
1600
1601 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
1602 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1603 Function *DestroyFn, Function *CleanupFn) {
1604 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
1605 LLVMContext &C = ResumeFn->getContext();
1606
1607 // Resume function pointer
1608 Value *ResumeAddr = Shape.FramePtr;
1609 Builder.CreateStore(Val: ResumeFn, Ptr: ResumeAddr);
1610
1611 Value *DestroyOrCleanupFn = DestroyFn;
1612
1613 CoroIdInst *CoroId = Shape.getSwitchCoroId();
1614 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1615 // If there is a CoroAlloc and it returns false (meaning we elide the
1616 // allocation, use CleanupFn instead of DestroyFn).
1617 DestroyOrCleanupFn = Builder.CreateSelect(C: CA, True: DestroyFn, False: CleanupFn);
1618 }
1619
1620 // Destroy function pointer
1621 Value *DestroyAddr = Builder.CreateInBoundsPtrAdd(
1622 Ptr: Shape.FramePtr,
1623 Offset: ConstantInt::get(Ty: Type::getInt64Ty(C),
1624 V: Shape.SwitchLowering.DestroyOffset),
1625 Name: "destroy.addr");
1626 Builder.CreateStore(Val: DestroyOrCleanupFn, Ptr: DestroyAddr);
1627 }
1628
1629 // Create a global constant array containing pointers to functions provided
1630 // and set Info parameter of CoroBegin to point at this constant. Example:
1631 //
1632 // @f.resumers = internal constant [2 x void(%f.frame*)*]
1633 // [void(%f.frame*)* @f.resume, void(%f.frame*)*
1634 // @f.destroy]
1635 // define void @f() {
1636 // ...
1637 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1638 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to
1639 // i8*))
1640 //
1641 // Assumes that all the functions have the same signature.
1642 static void setCoroInfo(Function &F, coro::Shape &Shape,
1643 ArrayRef<Function *> Fns) {
1644 // This only works under the switch-lowering ABI because coro elision
1645 // only works on the switch-lowering ABI.
1646 SmallVector<Constant *, 4> Args(Fns);
1647 assert(!Args.empty());
1648 Function *Part = *Fns.begin();
1649 Module *M = Part->getParent();
1650 auto *ArrTy = ArrayType::get(ElementType: Part->getType(), NumElements: Args.size());
1651
1652 auto *ConstVal = ConstantArray::get(T: ArrTy, V: Args);
1653 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1654 GlobalVariable::PrivateLinkage, ConstVal,
1655 F.getName() + Twine(".resumers"));
1656
1657 // Update coro.begin instruction to refer to this constant.
1658 LLVMContext &C = F.getContext();
1659 auto *BC = ConstantExpr::getPointerCast(C: GV, Ty: PointerType::getUnqual(C));
1660 Shape.getSwitchCoroId()->setInfo(BC);
1661 }
1662};
1663
1664} // namespace
1665
1666static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1667 Value *Continuation) {
1668 auto *ResumeIntrinsic = Suspend->getResumeFunction();
1669 auto &Context = Suspend->getParent()->getParent()->getContext();
1670 auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1671
1672 IRBuilder<> Builder(ResumeIntrinsic);
1673 auto *Val = Builder.CreateBitOrPointerCast(V: Continuation, DestTy: Int8PtrTy);
1674 ResumeIntrinsic->replaceAllUsesWith(V: Val);
1675 ResumeIntrinsic->eraseFromParent();
1676 Suspend->setOperand(i_nocapture: CoroSuspendAsyncInst::ResumeFunctionArg,
1677 Val_nocapture: PoisonValue::get(T: Int8PtrTy));
1678}
1679
1680/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
1681static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1682 ArrayRef<Value *> FnArgs,
1683 SmallVectorImpl<Value *> &CallArgs) {
1684 size_t ArgIdx = 0;
1685 for (auto *paramTy : FnTy->params()) {
1686 assert(ArgIdx < FnArgs.size());
1687 if (paramTy != FnArgs[ArgIdx]->getType())
1688 CallArgs.push_back(
1689 Elt: Builder.CreateBitOrPointerCast(V: FnArgs[ArgIdx], DestTy: paramTy));
1690 else
1691 CallArgs.push_back(Elt: FnArgs[ArgIdx]);
1692 ++ArgIdx;
1693 }
1694}
1695
1696CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1697 TargetTransformInfo &TTI,
1698 ArrayRef<Value *> Arguments,
1699 IRBuilder<> &Builder) {
1700 auto *FnTy = MustTailCallFn->getFunctionType();
1701 // Coerce the arguments, llvm optimizations seem to ignore the types in
1702 // vaarg functions and throws away casts in optimized mode.
1703 SmallVector<Value *, 8> CallArgs;
1704 coerceArguments(Builder, FnTy, FnArgs: Arguments, CallArgs);
1705
1706 auto *TailCall = Builder.CreateCall(FTy: FnTy, Callee: MustTailCallFn, Args: CallArgs);
1707 // Skip targets which don't support tail call.
1708 if (TTI.supportsTailCallFor(CB: TailCall)) {
1709 TailCall->setTailCallKind(CallInst::TCK_MustTail);
1710 }
1711 TailCall->setDebugLoc(Loc);
1712 TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1713 return TailCall;
1714}
1715
1716void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
1717 SmallVectorImpl<Function *> &Clones,
1718 TargetTransformInfo &TTI) {
1719 assert(Shape.ABI == coro::ABI::Async);
1720 assert(Clones.empty());
1721 // Reset various things that the optimizer might have decided it
1722 // "knows" about the coroutine function due to not seeing a return.
1723 F.removeFnAttr(Kind: Attribute::NoReturn);
1724 F.removeRetAttr(Kind: Attribute::NoAlias);
1725 F.removeRetAttr(Kind: Attribute::NonNull);
1726
1727 auto &Context = F.getContext();
1728 auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1729
1730 auto *Id = Shape.getAsyncCoroId();
1731 IRBuilder<> Builder(Id);
1732
1733 auto *FramePtr = Id->getStorage();
1734 FramePtr = Builder.CreateBitOrPointerCast(V: FramePtr, DestTy: Int8PtrTy);
1735 FramePtr = Builder.CreateInBoundsPtrAdd(
1736 Ptr: FramePtr,
1737 Offset: ConstantInt::get(Ty: Type::getInt64Ty(C&: Context),
1738 V: Shape.AsyncLowering.FrameOffset),
1739 Name: "async.ctx.frameptr");
1740
1741 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1742 {
1743 // Make sure we don't invalidate Shape.FramePtr.
1744 TrackingVH<Value> Handle(Shape.FramePtr);
1745 Shape.CoroBegin->replaceAllUsesWith(V: FramePtr);
1746 Shape.FramePtr = Handle.getValPtr();
1747 }
1748
1749 // Create all the functions in order after the main function.
1750 auto NextF = std::next(x: F.getIterator());
1751
1752 // Create a continuation function for each of the suspend points.
1753 Clones.reserve(N: Shape.CoroSuspends.size());
1754 for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1755 auto *Suspend = cast<CoroSuspendAsyncInst>(Val: CS);
1756
1757 // Create the clone declaration.
1758 auto ResumeNameSuffix = ".resume.";
1759 auto ProjectionFunctionName =
1760 Suspend->getAsyncContextProjectionFunction()->getName();
1761 bool UseSwiftMangling = false;
1762 if (ProjectionFunctionName == "__swift_async_resume_project_context") {
1763 ResumeNameSuffix = "TQ";
1764 UseSwiftMangling = true;
1765 } else if (ProjectionFunctionName == "__swift_async_resume_get_context") {
1766 ResumeNameSuffix = "TY";
1767 UseSwiftMangling = true;
1768 }
1769 auto *Continuation = createCloneDeclaration(
1770 OrigF&: F, Shape,
1771 Suffix: UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1772 : ResumeNameSuffix + Twine(Idx),
1773 InsertBefore: NextF, ActiveSuspend: Suspend);
1774 Clones.push_back(Elt: Continuation);
1775
1776 // Insert a branch to a new return block immediately before the suspend
1777 // point.
1778 auto *SuspendBB = Suspend->getParent();
1779 auto *NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1780 auto *Branch = cast<UncondBrInst>(Val: SuspendBB->getTerminator());
1781
1782 // Place it before the first suspend.
1783 auto *ReturnBB =
1784 BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1785 Branch->setSuccessor(idx: 0, NewSucc: ReturnBB);
1786
1787 IRBuilder<> Builder(ReturnBB);
1788
1789 // Insert the call to the tail call function and inline it.
1790 auto *Fn = Suspend->getMustTailCallFunction();
1791 SmallVector<Value *, 8> Args(Suspend->args());
1792 auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1793 N: CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1794 auto *TailCall = coro::createMustTailCall(Loc: Suspend->getDebugLoc(), MustTailCallFn: Fn, TTI,
1795 Arguments: FnArgs, Builder);
1796 Builder.CreateRetVoid();
1797 InlineFunctionInfo FnInfo;
1798 (void)InlineFunction(CB&: *TailCall, IFI&: FnInfo);
1799
1800 // Replace the lvm.coro.async.resume intrisic call.
1801 replaceAsyncResumeFunction(Suspend, Continuation);
1802 }
1803
1804 assert(Clones.size() == Shape.CoroSuspends.size());
1805
1806 for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1807 auto *Suspend = CS;
1808 auto *Clone = Clones[Idx];
1809
1810 coro::BaseCloner::createClone(OrigF&: F, Suffix: "resume." + Twine(Idx), Shape, NewF: Clone,
1811 ActiveSuspend: Suspend, TTI);
1812 }
1813}
1814
1815void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
1816 SmallVectorImpl<Function *> &Clones,
1817 TargetTransformInfo &TTI) {
1818 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
1819 assert(Clones.empty());
1820
1821 // Reset various things that the optimizer might have decided it
1822 // "knows" about the coroutine function due to not seeing a return.
1823 F.removeFnAttr(Kind: Attribute::NoReturn);
1824 F.removeRetAttr(Kind: Attribute::NoAlias);
1825 F.removeRetAttr(Kind: Attribute::NonNull);
1826
1827 // Allocate the frame.
1828 auto *Id = Shape.getRetconCoroId();
1829 Value *RawFramePtr;
1830 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1831 RawFramePtr = Id->getStorage();
1832 } else {
1833 IRBuilder<> Builder(Id);
1834
1835 auto FrameSize = Builder.getInt64(C: Shape.FrameSize);
1836
1837 // Allocate. We don't need to update the call graph node because we're
1838 // going to recompute it from scratch after splitting.
1839 // FIXME: pass the required alignment
1840 RawFramePtr = Shape.emitAlloc(Builder, Size: FrameSize, CG: nullptr);
1841 RawFramePtr =
1842 Builder.CreateBitCast(V: RawFramePtr, DestTy: Shape.CoroBegin->getType());
1843
1844 // Stash the allocated frame pointer in the continuation storage.
1845 Builder.CreateStore(Val: RawFramePtr, Ptr: Id->getStorage());
1846 }
1847
1848 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1849 {
1850 // Make sure we don't invalidate Shape.FramePtr.
1851 TrackingVH<Value> Handle(Shape.FramePtr);
1852 Shape.CoroBegin->replaceAllUsesWith(V: RawFramePtr);
1853 Shape.FramePtr = Handle.getValPtr();
1854 }
1855
1856 // Create a unique return block.
1857 BasicBlock *ReturnBB = nullptr;
1858 PHINode *ContinuationPhi = nullptr;
1859 SmallVector<PHINode *, 4> ReturnPHIs;
1860
1861 // Create all the functions in order after the main function.
1862 auto NextF = std::next(x: F.getIterator());
1863
1864 // Create a continuation function for each of the suspend points.
1865 Clones.reserve(N: Shape.CoroSuspends.size());
1866 for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1867 auto Suspend = cast<CoroSuspendRetconInst>(Val: CS);
1868
1869 // Create the clone declaration.
1870 auto Continuation = createCloneDeclaration(
1871 OrigF&: F, Shape, Suffix: ".resume." + Twine(Idx), InsertBefore: NextF, ActiveSuspend: nullptr);
1872 Clones.push_back(Elt: Continuation);
1873
1874 // Insert a branch to the unified return block immediately before
1875 // the suspend point.
1876 auto SuspendBB = Suspend->getParent();
1877 auto NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1878 auto Branch = cast<UncondBrInst>(Val: SuspendBB->getTerminator());
1879
1880 // Create the unified return block.
1881 if (!ReturnBB) {
1882 // Place it before the first suspend.
1883 ReturnBB =
1884 BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1885 Shape.RetconLowering.ReturnBlock = ReturnBB;
1886
1887 IRBuilder<> Builder(ReturnBB);
1888
1889 // First, the continuation.
1890 ContinuationPhi =
1891 Builder.CreatePHI(Ty: Continuation->getType(), NumReservedValues: Shape.CoroSuspends.size());
1892
1893 // Create PHIs for all other return values.
1894 assert(ReturnPHIs.empty());
1895
1896 // Next, all the directly-yielded values.
1897 for (auto *ResultTy : Shape.getRetconResultTypes())
1898 ReturnPHIs.push_back(
1899 Elt: Builder.CreatePHI(Ty: ResultTy, NumReservedValues: Shape.CoroSuspends.size()));
1900
1901 // Build the return value.
1902 auto RetTy = F.getReturnType();
1903
1904 // Cast the continuation value if necessary.
1905 // We can't rely on the types matching up because that type would
1906 // have to be infinite.
1907 auto CastedContinuationTy =
1908 (ReturnPHIs.empty() ? RetTy : RetTy->getStructElementType(N: 0));
1909 auto *CastedContinuation =
1910 Builder.CreateBitCast(V: ContinuationPhi, DestTy: CastedContinuationTy);
1911
1912 Value *RetV = CastedContinuation;
1913 if (!ReturnPHIs.empty()) {
1914 auto ValueIdx = 0;
1915 RetV = PoisonValue::get(T: RetTy);
1916 RetV = Builder.CreateInsertValue(Agg: RetV, Val: CastedContinuation, Idxs: ValueIdx++);
1917
1918 for (auto Phi : ReturnPHIs)
1919 RetV = Builder.CreateInsertValue(Agg: RetV, Val: Phi, Idxs: ValueIdx++);
1920 }
1921
1922 Builder.CreateRet(V: RetV);
1923 }
1924
1925 // Branch to the return block.
1926 Branch->setSuccessor(idx: 0, NewSucc: ReturnBB);
1927 assert(ContinuationPhi);
1928 ContinuationPhi->addIncoming(V: Continuation, BB: SuspendBB);
1929 for (auto [Phi, VUse] :
1930 llvm::zip_equal(t&: ReturnPHIs, u: Suspend->value_operands()))
1931 Phi->addIncoming(V: VUse, BB: SuspendBB);
1932 }
1933
1934 assert(Clones.size() == Shape.CoroSuspends.size());
1935
1936 for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1937 auto Suspend = CS;
1938 auto Clone = Clones[Idx];
1939
1940 coro::BaseCloner::createClone(OrigF&: F, Suffix: "resume." + Twine(Idx), Shape, NewF: Clone,
1941 ActiveSuspend: Suspend, TTI);
1942 }
1943}
1944
1945namespace {
1946class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1947 Function &F;
1948
1949public:
1950 PrettyStackTraceFunction(Function &F) : F(F) {}
1951 void print(raw_ostream &OS) const override {
1952 OS << "While splitting coroutine ";
1953 F.printAsOperand(O&: OS, /*print type*/ PrintType: false, M: F.getParent());
1954 OS << "\n";
1955 }
1956};
1957} // namespace
1958
1959/// Remove calls to llvm.coro.end in the original function.
1960static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
1961 if (Shape.ABI != coro::ABI::Switch) {
1962 for (auto *End : Shape.CoroEnds) {
1963 replaceCoroEnd(End, Shape, FramePtr: Shape.FramePtr, /*in ramp*/ InRamp: true, CG: nullptr);
1964 }
1965 } else {
1966 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds)
1967 End->eraseFromParent();
1968 }
1969}
1970
1971static void removeCoroIsInRampFromRampFunction(const coro::Shape &Shape) {
1972 for (auto *II : Shape.CoroIsInRampInsts) {
1973 auto &Ctx = II->getContext();
1974 II->replaceAllUsesWith(V: ConstantInt::getTrue(Context&: Ctx));
1975 II->eraseFromParent();
1976 }
1977}
1978
1979static bool hasSafeElideCaller(Function &F) {
1980 for (auto *U : F.users()) {
1981 if (auto *CB = dyn_cast<CallBase>(Val: U)) {
1982 auto *Caller = CB->getFunction();
1983 if (Caller && Caller->isPresplitCoroutine() &&
1984 CB->hasFnAttr(Kind: llvm::Attribute::CoroElideSafe))
1985 return true;
1986 }
1987 }
1988 return false;
1989}
1990
1991void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
1992 SmallVectorImpl<Function *> &Clones,
1993 TargetTransformInfo &TTI) {
1994 SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
1995}
1996
1997static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
1998 coro::BaseABI &ABI, TargetTransformInfo &TTI,
1999 bool OptimizeFrame) {
2000 PrettyStackTraceFunction prettyStackTrace(F);
2001
2002 auto &Shape = ABI.Shape;
2003 assert(Shape.CoroBegin);
2004
2005 lowerAwaitSuspends(F, Shape);
2006
2007 simplifySuspendPoints(Shape);
2008
2009 normalizeCoroutine(F, Shape, TTI);
2010 ABI.buildCoroutineFrame(OptimizeFrame);
2011 replaceFrameSizeAndAlignment(Shape);
2012
2013 bool isNoSuspendCoroutine = Shape.CoroSuspends.empty();
2014
2015 bool shouldCreateNoAllocVariant =
2016 !isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch &&
2017 hasSafeElideCaller(F) && !F.hasFnAttribute(Kind: llvm::Attribute::NoInline);
2018
2019 // If there are no suspend points, no split required, just remove
2020 // the allocation and deallocation blocks, they are not needed.
2021 if (isNoSuspendCoroutine) {
2022 handleNoSuspendCoroutine(Shape);
2023 } else {
2024 ABI.splitCoroutine(F, Shape, Clones, TTI);
2025 }
2026
2027 // Replace all the swifterror operations in the original function.
2028 // This invalidates SwiftErrorOps in the Shape.
2029 replaceSwiftErrorOps(F, Shape, VMap: nullptr);
2030
2031 // Salvage debug intrinsics that point into the coroutine frame in the
2032 // original function. The Cloner has already salvaged debug info in the new
2033 // coroutine funclets.
2034 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
2035 auto DbgVariableRecords = collectDbgVariableRecords(F);
2036 for (DbgVariableRecord *DVR : DbgVariableRecords)
2037 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, UseEntryValue: false /*UseEntryValue*/);
2038
2039 removeCoroEndsFromRampFunction(Shape);
2040 removeCoroIsInRampFromRampFunction(Shape);
2041
2042 if (shouldCreateNoAllocVariant)
2043 SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones);
2044}
2045
2046static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit(
2047 LazyCallGraph::Node &N, const coro::Shape &Shape,
2048 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
2049 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
2050 FunctionAnalysisManager &FAM) {
2051
2052 auto *CurrentSCC = &C;
2053 if (!Clones.empty()) {
2054 switch (Shape.ABI) {
2055 case coro::ABI::Switch:
2056 // Each clone in the Switch lowering is independent of the other clones.
2057 // Let the LazyCallGraph know about each one separately.
2058 for (Function *Clone : Clones)
2059 CG.addSplitFunction(OriginalFunction&: N.getFunction(), NewFunction&: *Clone);
2060 break;
2061 case coro::ABI::Async:
2062 case coro::ABI::Retcon:
2063 case coro::ABI::RetconOnce:
2064 // Each clone in the Async/Retcon lowering references of the other clones.
2065 // Let the LazyCallGraph know about all of them at once.
2066 if (!Clones.empty())
2067 CG.addSplitRefRecursiveFunctions(OriginalFunction&: N.getFunction(), NewFunctions: Clones);
2068 break;
2069 }
2070
2071 // Let the CGSCC infra handle the changes to the original function.
2072 CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(G&: CG, C&: *CurrentSCC, N, AM,
2073 UR, FAM);
2074 }
2075
2076 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2077 // to the split functions.
2078 postSplitCleanup(F&: N.getFunction());
2079 CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(G&: CG, C&: *CurrentSCC, N,
2080 AM, UR, FAM);
2081 return *CurrentSCC;
2082}
2083
2084/// Replace a call to llvm.coro.prepare.retcon.
2085static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2086 LazyCallGraph::SCC &C) {
2087 auto CastFn = Prepare->getArgOperand(i: 0); // as an i8*
2088 auto Fn = CastFn->stripPointerCasts(); // as its original type
2089
2090 // Attempt to peephole this pattern:
2091 // %0 = bitcast [[TYPE]] @some_function to i8*
2092 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
2093 // %2 = bitcast %1 to [[TYPE]]
2094 // ==>
2095 // %2 = @some_function
2096 for (Use &U : llvm::make_early_inc_range(Range: Prepare->uses())) {
2097 // Look for bitcasts back to the original function type.
2098 auto *Cast = dyn_cast<BitCastInst>(Val: U.getUser());
2099 if (!Cast || Cast->getType() != Fn->getType())
2100 continue;
2101
2102 // Replace and remove the cast.
2103 Cast->replaceAllUsesWith(V: Fn);
2104 Cast->eraseFromParent();
2105 }
2106
2107 // Replace any remaining uses with the function as an i8*.
2108 // This can never directly be a callee, so we don't need to update CG.
2109 Prepare->replaceAllUsesWith(V: CastFn);
2110 Prepare->eraseFromParent();
2111
2112 // Kill dead bitcasts.
2113 while (auto *Cast = dyn_cast<BitCastInst>(Val: CastFn)) {
2114 if (!Cast->use_empty())
2115 break;
2116 CastFn = Cast->getOperand(i_nocapture: 0);
2117 Cast->eraseFromParent();
2118 }
2119}
2120
2121static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2122 LazyCallGraph::SCC &C) {
2123 bool Changed = false;
2124 for (Use &P : llvm::make_early_inc_range(Range: PrepareFn->uses())) {
2125 // Intrinsics can only be used in calls.
2126 auto *Prepare = cast<CallInst>(Val: P.getUser());
2127 replacePrepare(Prepare, CG, C);
2128 Changed = true;
2129 }
2130
2131 return Changed;
2132}
2133
2134static void addPrepareFunction(const Module &M,
2135 SmallVectorImpl<Function *> &Fns,
2136 StringRef Name) {
2137 auto *PrepareFn = M.getFunction(Name);
2138 if (PrepareFn && !PrepareFn->use_empty())
2139 Fns.push_back(Elt: PrepareFn);
2140}
2141
2142static std::unique_ptr<coro::BaseABI>
2143CreateNewABI(Function &F, coro::Shape &S,
2144 std::function<bool(Instruction &)> IsMatCallback,
2145 const SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs) {
2146 if (S.CoroBegin->hasCustomABI()) {
2147 unsigned CustomABI = S.CoroBegin->getCustomABI();
2148 if (CustomABI >= GenCustomABIs.size())
2149 llvm_unreachable("Custom ABI not found amoung those specified");
2150 return GenCustomABIs[CustomABI](F, S);
2151 }
2152
2153 switch (S.ABI) {
2154 case coro::ABI::Switch:
2155 return std::make_unique<coro::SwitchABI>(args&: F, args&: S, args&: IsMatCallback);
2156 case coro::ABI::Async:
2157 return std::make_unique<coro::AsyncABI>(args&: F, args&: S, args&: IsMatCallback);
2158 case coro::ABI::Retcon:
2159 return std::make_unique<coro::AnyRetconABI>(args&: F, args&: S, args&: IsMatCallback);
2160 case coro::ABI::RetconOnce:
2161 return std::make_unique<coro::AnyRetconABI>(args&: F, args&: S, args&: IsMatCallback);
2162 }
2163 llvm_unreachable("Unknown ABI");
2164}
2165
2166CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
2167 : CreateAndInitABI([](Function &F, coro::Shape &S) {
2168 std::unique_ptr<coro::BaseABI> ABI =
2169 CreateNewABI(F, S, IsMatCallback: coro::isTriviallyMaterializable, GenCustomABIs: {});
2170 ABI->init();
2171 return ABI;
2172 }),
2173 OptimizeFrame(OptimizeFrame) {}
2174
2175CoroSplitPass::CoroSplitPass(
2176 SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame)
2177 : CreateAndInitABI([=](Function &F, coro::Shape &S) {
2178 std::unique_ptr<coro::BaseABI> ABI =
2179 CreateNewABI(F, S, IsMatCallback: coro::isTriviallyMaterializable, GenCustomABIs);
2180 ABI->init();
2181 return ABI;
2182 }),
2183 OptimizeFrame(OptimizeFrame) {}
2184
2185// For back compatibility, constructor takes a materializable callback and
2186// creates a generator for an ABI with a modified materializable callback.
2187CoroSplitPass::CoroSplitPass(std::function<bool(Instruction &)> IsMatCallback,
2188 bool OptimizeFrame)
2189 : CreateAndInitABI([=](Function &F, coro::Shape &S) {
2190 std::unique_ptr<coro::BaseABI> ABI =
2191 CreateNewABI(F, S, IsMatCallback, GenCustomABIs: {});
2192 ABI->init();
2193 return ABI;
2194 }),
2195 OptimizeFrame(OptimizeFrame) {}
2196
2197// For back compatibility, constructor takes a materializable callback and
2198// creates a generator for an ABI with a modified materializable callback.
2199CoroSplitPass::CoroSplitPass(
2200 std::function<bool(Instruction &)> IsMatCallback,
2201 SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame)
2202 : CreateAndInitABI([=](Function &F, coro::Shape &S) {
2203 std::unique_ptr<coro::BaseABI> ABI =
2204 CreateNewABI(F, S, IsMatCallback, GenCustomABIs);
2205 ABI->init();
2206 return ABI;
2207 }),
2208 OptimizeFrame(OptimizeFrame) {}
2209
2210PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2211 CGSCCAnalysisManager &AM,
2212 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2213 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2214 // non-zero number of nodes, so we assume that here and grab the first
2215 // node's function's module.
2216 Module &M = *C.begin()->getFunction().getParent();
2217 auto &FAM =
2218 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2219
2220 // Check for uses of llvm.coro.prepare.retcon/async.
2221 SmallVector<Function *, 2> PrepareFns;
2222 addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.retcon");
2223 addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.async");
2224
2225 // Find coroutines for processing.
2226 SmallVector<LazyCallGraph::Node *> Coroutines;
2227 for (LazyCallGraph::Node &N : C)
2228 if (N.getFunction().isPresplitCoroutine())
2229 Coroutines.push_back(Elt: &N);
2230
2231 if (Coroutines.empty() && PrepareFns.empty())
2232 return PreservedAnalyses::all();
2233
2234 auto *CurrentSCC = &C;
2235 // Split all the coroutines.
2236 for (LazyCallGraph::Node *N : Coroutines) {
2237 Function &F = N->getFunction();
2238 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2239 << "\n");
2240
2241 // The suspend-crossing algorithm in buildCoroutineFrame gets tripped up
2242 // by unreachable blocks, so remove them as a first pass. Remove the
2243 // unreachable blocks before collecting intrinsics into Shape.
2244 removeUnreachableBlocks(F);
2245
2246 coro::Shape Shape(F);
2247 if (!Shape.CoroBegin)
2248 continue;
2249
2250 F.setSplittedCoroutine();
2251
2252 std::unique_ptr<coro::BaseABI> ABI = CreateAndInitABI(F, Shape);
2253
2254 SmallVector<Function *, 4> Clones;
2255 auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
2256 doSplitCoroutine(F, Clones, ABI&: *ABI, TTI, OptimizeFrame);
2257 CurrentSCC = &updateCallGraphAfterCoroutineSplit(
2258 N&: *N, Shape, Clones, C&: *CurrentSCC, CG, AM, UR, FAM);
2259
2260 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2261 ORE.emit(RemarkBuilder: [&]() {
2262 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F)
2263 << "Split '" << ore::NV("function", F.getName())
2264 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize)
2265 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")";
2266 });
2267
2268 if (!Shape.CoroSuspends.empty()) {
2269 // Run the CGSCC pipeline on the original and newly split functions.
2270 UR.CWorklist.insert(X: CurrentSCC);
2271 for (Function *Clone : Clones)
2272 UR.CWorklist.insert(X: CG.lookupSCC(N&: CG.get(F&: *Clone)));
2273 } else if (Shape.ABI == coro::ABI::Async) {
2274 // Reprocess the function to inline the tail called return function of
2275 // coro.async.end.
2276 UR.CWorklist.insert(X: &C);
2277 }
2278 }
2279
2280 for (auto *PrepareFn : PrepareFns) {
2281 replaceAllPrepares(PrepareFn, CG, C&: *CurrentSCC);
2282 }
2283
2284 return PreservedAnalyses::none();
2285}
2286