CoroSplit.cpp source code [llvm_projects/llvm/lib/Transforms/Coroutines/CoroSplit.cpp]

1	//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This pass builds the coroutine frame and outlines resume and destroy parts
9	// of the coroutine into separate functions.
10	//
11	// We present a coroutine to an LLVM as an ordinary function with suspension
12	// points marked up with intrinsics. We let the optimizer party on the coroutine
13	// as a single function for as long as possible. Shortly before the coroutine is
14	// eligible to be inlined into its callers, we split up the coroutine into parts
15	// corresponding to an initial, resume and destroy invocations of the coroutine,
16	// add them to the current SCC and restart the IPO pipeline to optimize the
17	// coroutine subfunctions we extracted before proceeding to the caller of the
18	// coroutine.
19	//===----------------------------------------------------------------------===//
20
21	#include "llvm/Transforms/Coroutines/CoroSplit.h"
22	#include "CoroCloner.h"
23	#include "CoroInternal.h"
24	#include "llvm/ADT/DenseMap.h"
25	#include "llvm/ADT/PriorityWorklist.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallVector.h"
29	#include "llvm/ADT/StringExtras.h"
30	#include "llvm/ADT/StringRef.h"
31	#include "llvm/ADT/Twine.h"
32	#include "llvm/Analysis/CFG.h"
33	#include "llvm/Analysis/CallGraph.h"
34	#include "llvm/Analysis/ConstantFolding.h"
35	#include "llvm/Analysis/LazyCallGraph.h"
36	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37	#include "llvm/Analysis/TargetTransformInfo.h"
38	#include "llvm/BinaryFormat/Dwarf.h"
39	#include "llvm/IR/Argument.h"
40	#include "llvm/IR/Attributes.h"
41	#include "llvm/IR/BasicBlock.h"
42	#include "llvm/IR/CFG.h"
43	#include "llvm/IR/CallingConv.h"
44	#include "llvm/IR/Constants.h"
45	#include "llvm/IR/DIBuilder.h"
46	#include "llvm/IR/DataLayout.h"
47	#include "llvm/IR/DebugInfo.h"
48	#include "llvm/IR/DerivedTypes.h"
49	#include "llvm/IR/Dominators.h"
50	#include "llvm/IR/GlobalValue.h"
51	#include "llvm/IR/GlobalVariable.h"
52	#include "llvm/IR/InstIterator.h"
53	#include "llvm/IR/InstrTypes.h"
54	#include "llvm/IR/Instruction.h"
55	#include "llvm/IR/Instructions.h"
56	#include "llvm/IR/IntrinsicInst.h"
57	#include "llvm/IR/LLVMContext.h"
58	#include "llvm/IR/Module.h"
59	#include "llvm/IR/Type.h"
60	#include "llvm/IR/Value.h"
61	#include "llvm/IR/Verifier.h"
62	#include "llvm/Support/Casting.h"
63	#include "llvm/Support/Debug.h"
64	#include "llvm/Support/PrettyStackTrace.h"
65	#include "llvm/Support/raw_ostream.h"
66	#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
67	#include "llvm/Transforms/Scalar.h"
68	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69	#include "llvm/Transforms/Utils/CallGraphUpdater.h"
70	#include "llvm/Transforms/Utils/Cloning.h"
71	#include "llvm/Transforms/Utils/Local.h"
72	#include <cassert>
73	#include <cstddef>
74	#include <cstdint>
75	#include <initializer_list>
76	#include <iterator>
77
78	using namespace llvm;
79
80	#define DEBUG_TYPE "coro-split"
81
82	// FIXME:
83	// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
84	// and it is known that other transformations, for example, sanitizers
85	// won't lead to incorrect code.
86	static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
87	coro::Shape &Shape) {
88	auto Wrapper = CB->getWrapperFunction();
89	auto Awaiter = CB->getAwaiter();
90	auto FramePtr = CB->getFrame();
91
92	Builder.SetInsertPoint(CB);
93
94	CallBase NewCall = nullptr*;
95	// await_suspend has only 2 parameters, awaiter and handle.
96	// Copy parameter attributes from the intrinsic call, but remove the last,
97	// because the last parameter now becomes the function that is being called.
98	AttributeList NewAttributes =
99	CB->getAttributes().removeParamAttributes(C&: CB->getContext(), ArgNo: `2`);
100
101	if (auto Invoke = dyn_cast<InvokeInst>(Val: CB)) {
102	auto WrapperInvoke =
103	Builder.CreateInvoke(Callee: Wrapper, NormalDest: Invoke->getNormalDest(),
104	UnwindDest: Invoke->getUnwindDest(), Args: {Awaiter, FramePtr});
105
106	WrapperInvoke->setCallingConv(Invoke->getCallingConv());
107	std::copy(first: Invoke->bundle_op_info_begin(), last: Invoke->bundle_op_info_end(),
108	result: WrapperInvoke->bundle_op_info_begin());
109	WrapperInvoke->setAttributes(NewAttributes);
110	WrapperInvoke->setDebugLoc(Invoke->getDebugLoc());
111	NewCall = WrapperInvoke;
112	} else if (auto Call = dyn_cast<CallInst>(Val: CB)) {
113	auto WrapperCall = Builder.CreateCall(Callee: Wrapper, Args: {Awaiter, FramePtr});
114
115	WrapperCall->setAttributes(NewAttributes);
116	WrapperCall->setDebugLoc(Call->getDebugLoc());
117	NewCall = WrapperCall;
118	} else {
119	llvm_unreachable("Unexpected coro_await_suspend invocation method");
120	}
121
122	if (CB->getCalledFunction()->getIntrinsicID() ==
123	Intrinsic::coro_await_suspend_handle) {
124	// Follow the lowered await_suspend call above with a lowered resume call
125	// to the returned coroutine.
126	if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) {
127	// If the await_suspend call is an invoke, we continue in the next block.
128	Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
129	}
130
131	coro::LowererBase LB(*Wrapper->getParent());
132	auto *ResumeAddr = LB.makeSubFnCall(Arg: NewCall, Index: CoroSubFnInst::ResumeIndex,
133	InsertPt: &*Builder.GetInsertPoint());
134
135	LLVMContext &Ctx = Builder.getContext();
136	FunctionType *ResumeTy = FunctionType::get(
137	Result: Type::getVoidTy(C&: Ctx), Params: PointerType::getUnqual(C&: Ctx), isVarArg: false);
138	auto *ResumeCall = Builder.CreateCall(FTy: ResumeTy, Callee: ResumeAddr, Args: {NewCall});
139	ResumeCall->setCallingConv(CallingConv::Fast);
140
141	// We can't insert the 'ret' instruction and adjust the cc until the
142	// function has been split, so remember this for later.
143	Shape.SymmetricTransfers.push_back(Elt: ResumeCall);
144
145	NewCall = ResumeCall;
146	}
147
148	CB->replaceAllUsesWith(V: NewCall);
149	CB->eraseFromParent();
150	}
151
152	static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
153	IRBuilder<> Builder(F.getContext());
154	for (auto *AWS : Shape.CoroAwaitSuspends)
155	lowerAwaitSuspend(Builder, CB: AWS, Shape);
156	}
157
158	static void maybeFreeRetconStorage(IRBuilder<> &Builder,
159	const coro::Shape &Shape, Value *FramePtr,
160	CallGraph *CG) {
161	assert(Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce);
162	if (Shape.RetconLowering.IsFrameInlineInStorage)
163	return;
164
165	Shape.emitDealloc(Builder, Ptr: FramePtr, CG);
166	}
167
168	/// Replace an llvm.coro.end.async.
169	/// Will inline the must tail call function call if there is one.
170	/// \returns true if cleanup of the coro.end block is needed, false otherwise.
171	static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
172	IRBuilder<> Builder(End);
173
174	auto *EndAsync = dyn_cast<CoroAsyncEndInst>(Val: End);
175	if (!EndAsync) {
176	Builder.CreateRetVoid();
177	return true /needs cleanup of coro.end block/;
178	}
179
180	auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
181	if (!MustTailCallFunc) {
182	Builder.CreateRetVoid();
183	return true /needs cleanup of coro.end block/;
184	}
185
186	// Move the must tail call from the predecessor block into the end block.
187	auto *CoroEndBlock = End->getParent();
188	auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
189	assert(MustTailCallFuncBlock && "Must have a single predecessor block");
190	auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
191	auto MustTailCall = cast<CallInst>(Val: &std::prev(x: It));
192	CoroEndBlock->splice(ToIt: End->getIterator(), FromBB: MustTailCallFuncBlock,
193	FromIt: MustTailCall->getIterator());
194
195	// Insert the return instruction.
196	Builder.SetInsertPoint(End);
197	Builder.CreateRetVoid();
198	InlineFunctionInfo FnInfo;
199
200	// Remove the rest of the block, by splitting it into an unreachable block.
201	auto *BB = End->getParent();
202	BB->splitBasicBlock(I: End);
203	BB->getTerminator()->eraseFromParent();
204
205	auto InlineRes = InlineFunction(CB&: *MustTailCall, IFI&: FnInfo);
206	assert(InlineRes.isSuccess() && "Expected inlining to succeed");
207	(void)InlineRes;
208
209	// We have cleaned up the coro.end block above.
210	return false;
211	}
212
213	/// Replace a non-unwind call to llvm.coro.end.
214	static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
215	const coro::Shape &Shape, Value *FramePtr,
216	bool InResume, CallGraph *CG) {
217	// Start inserting right before the coro.end.
218	IRBuilder<> Builder(End);
219
220	// Create the return instruction.
221	switch (Shape.ABI) {
222	// The cloned functions in switch-lowering always return void.
223	case coro::ABI::Switch:
224	assert(!cast<CoroEndInst>(End)->hasResults() &&
225	"switch coroutine should not return any values");
226	// coro.end doesn't immediately end the coroutine in the main function
227	// in this lowering, because we need to deallocate the coroutine.
228	if (!InResume)
229	return;
230	Builder.CreateRetVoid();
231	break;
232
233	// In async lowering this returns.
234	case coro::ABI::Async: {
235	bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
236	if (!CoroEndBlockNeedsCleanup)
237	return;
238	break;
239	}
240
241	// In unique continuation lowering, the continuations always return void.
242	// But we may have implicitly allocated storage.
243	case coro::ABI::RetconOnce: {
244	maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
245	auto *CoroEnd = cast<CoroEndInst>(Val: End);
246	auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
247
248	if (!CoroEnd->hasResults()) {
249	assert(RetTy->isVoidTy());
250	Builder.CreateRetVoid();
251	break;
252	}
253
254	auto *CoroResults = CoroEnd->getResults();
255	unsigned NumReturns = CoroResults->numReturns();
256
257	if (auto *RetStructTy = dyn_cast<StructType>(Val: RetTy)) {
258	assert(RetStructTy->getNumElements() == NumReturns &&
259	"numbers of returns should match resume function singature");
260	Value *ReturnValue = PoisonValue::get(T: RetStructTy);
261	unsigned Idx = `0`;
262	for (Value *RetValEl : CoroResults->return_values())
263	ReturnValue = Builder.CreateInsertValue(Agg: ReturnValue, Val: RetValEl, Idxs: Idx++);
264	Builder.CreateRet(V: ReturnValue);
265	} else if (NumReturns == `0`) {
266	assert(RetTy->isVoidTy());
267	Builder.CreateRetVoid();
268	} else {
269	assert(NumReturns == `1`);
270	Builder.CreateRet(V: *CoroResults->retval_begin());
271	}
272	CoroResults->replaceAllUsesWith(
273	V: ConstantTokenNone::get(Context&: CoroResults->getContext()));
274	CoroResults->eraseFromParent();
275	break;
276	}
277
278	// In non-unique continuation lowering, we signal completion by returning
279	// a null continuation.
280	case coro::ABI::Retcon: {
281	assert(!cast<CoroEndInst>(End)->hasResults() &&
282	"retcon coroutine should not return any values");
283	maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
284	auto RetTy = Shape.getResumeFunctionType()->getReturnType();
285	auto RetStructTy = dyn_cast<StructType>(Val: RetTy);
286	PointerType *ContinuationTy =
287	cast<PointerType>(Val: RetStructTy ? RetStructTy->getElementType(N: `0`) : RetTy);
288
289	Value *ReturnValue = ConstantPointerNull::get(T: ContinuationTy);
290	if (RetStructTy) {
291	ReturnValue = Builder.CreateInsertValue(Agg: PoisonValue::get(T: RetStructTy),
292	Val: ReturnValue, Idxs: `0`);
293	}
294	Builder.CreateRet(V: ReturnValue);
295	break;
296	}
297	}
298
299	// Remove the rest of the block, by splitting it into an unreachable block.
300	auto *BB = End->getParent();
301	BB->splitBasicBlock(I: End);
302	BB->getTerminator()->eraseFromParent();
303	}
304
305	// Mark a coroutine as done, which implies that the coroutine is finished and
306	// never gets resumed.
307	//
308	// In resume-switched ABI, the done state is represented by storing zero in
309	// ResumeFnAddr.
310	//
311	// NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
312	// pointer to the frame in splitted function is not stored in `Shape`.
313	static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
314	Value *FramePtr) {
315	assert(
316	Shape.ABI == coro::ABI::Switch &&
317	"markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
318	auto *GepIndex = Builder.CreateStructGEP(
319	Ty: Shape.FrameTy, Ptr: FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
320	Name: "ResumeFn.addr");
321	auto *NullPtr = ConstantPointerNull::get(T: cast<PointerType>(
322	Val: Shape.FrameTy->getTypeAtIndex(N: coro::Shape::SwitchFieldIndex::Resume)));
323	Builder.CreateStore(Val: NullPtr, Ptr: GepIndex);
324
325	// If the coroutine don't have unwind coro end, we could omit the store to
326	// the final suspend point since we could infer the coroutine is suspended
327	// at the final suspend point by the nullness of ResumeFnAddr.
328	// However, we can't skip it if the coroutine have unwind coro end. Since
329	// the coroutine reaches unwind coro end is considered suspended at the
330	// final suspend point (the ResumeFnAddr is null) but in fact the coroutine
331	// didn't complete yet. We need the IndexVal for the final suspend point
332	// to make the states clear.
333	if (Shape.SwitchLowering.HasUnwindCoroEnd &&
334	Shape.SwitchLowering.HasFinalSuspend) {
335	assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
336	"The final suspend should only live in the last position of "
337	"CoroSuspends.");
338	ConstantInt *IndexVal = Shape.getIndex(Value: Shape.CoroSuspends.size() - `1`);
339	auto *FinalIndex = Builder.CreateStructGEP(
340	Ty: Shape.FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
341
342	Builder.CreateStore(Val: IndexVal, Ptr: FinalIndex);
343	}
344	}
345
346	/// Replace an unwind call to llvm.coro.end.
347	static void replaceUnwindCoroEnd(AnyCoroEndInst End, const* coro::Shape &Shape,
348	Value FramePtr, bool* InResume,
349	CallGraph *CG) {
350	IRBuilder<> Builder(End);
351
352	switch (Shape.ABI) {
353	// In switch-lowering, this does nothing in the main function.
354	case coro::ABI::Switch: {
355	// In C++'s specification, the coroutine should be marked as done
356	// if promise.unhandled_exception() throws. The frontend will
357	// call coro.end(true) along this path.
358	//
359	// FIXME: We should refactor this once there is other language
360	// which uses Switch-Resumed style other than C++.
361	markCoroutineAsDone(Builder, Shape, FramePtr);
362	if (!InResume)
363	return;
364	break;
365	}
366	// In async lowering this does nothing.
367	case coro::ABI::Async:
368	break;
369	// In continuation-lowering, this frees the continuation storage.
370	case coro::ABI::Retcon:
371	case coro::ABI::RetconOnce:
372	maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
373	break;
374	}
375
376	// If coro.end has an associated bundle, add cleanupret instruction.
377	if (auto Bundle = End->getOperandBundle(ID: LLVMContext::OB_funclet)) {
378	auto *FromPad = cast<CleanupPadInst>(Val: Bundle ->Inputs [`0`]);
379	auto CleanupRet = Builder.CreateCleanupRet(CleanupPad: FromPad, UnwindBB: nullptr*);
380	End->getParent()->splitBasicBlock(I: End);
381	CleanupRet->getParent()->getTerminator()->eraseFromParent();
382	}
383	}
384
385	static void replaceCoroEnd(AnyCoroEndInst End, const* coro::Shape &Shape,
386	Value FramePtr, bool* InResume, CallGraph *CG) {
387	if (End->isUnwind())
388	replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
389	else
390	replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
391
392	auto &Context = End->getContext();
393	End->replaceAllUsesWith(V: InResume ? ConstantInt::getTrue(Context)
394	: ConstantInt::getFalse(Context));
395	End->eraseFromParent();
396	}
397
398	// In the resume function, we remove the last case (when coro::Shape is built,
399	// the final suspend point (if present) is always the last element of
400	// CoroSuspends array) since it is an undefined behavior to resume a coroutine
401	// suspended at the final suspend point.
402	// In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
403	// and the coroutine doesn't suspend at the final suspend point actually (this
404	// is possible since the coroutine is considered suspended at the final suspend
405	// point if promise.unhandled_exception() exits via an exception), we can
406	// remove the last case.
407	void coro::BaseCloner::handleFinalSuspend() {
408	assert(Shape.ABI == coro::ABI::Switch &&
409	Shape.SwitchLowering.HasFinalSuspend);
410
411	if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
412	return;
413
414	auto *Switch = cast<SwitchInst>(Val&: VMap [Shape.SwitchLowering.ResumeSwitch]);
415	auto FinalCaseIt = std::prev(x: Switch->case_end());
416	BasicBlock *ResumeBB = FinalCaseIt ->getCaseSuccessor();
417	Switch->removeCase(I: FinalCaseIt);
418	if (isSwitchDestroyFunction()) {
419	BasicBlock *OldSwitchBB = Switch->getParent();
420	auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(I: Switch, BBName: "Switch");
421	Builder.SetInsertPoint(OldSwitchBB->getTerminator());
422
423	if (NewF->isCoroOnlyDestroyWhenComplete()) {
424	// When the coroutine can only be destroyed when complete, we don't need
425	// to generate code for other cases.
426	Builder.CreateBr(Dest: ResumeBB);
427	} else {
428	auto *GepIndex = Builder.CreateStructGEP(
429	Ty: Shape.FrameTy, Ptr: NewFramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
430	Name: "ResumeFn.addr");
431	auto *Load =
432	Builder.CreateLoad(Ty: Shape.getSwitchResumePointerType(), Ptr: GepIndex);
433	auto *Cond = Builder.CreateIsNull(Arg: Load);
434	Builder.CreateCondBr(Cond, True: ResumeBB, False: NewSwitchBB);
435	}
436	OldSwitchBB->getTerminator()->eraseFromParent();
437	}
438	}
439
440	static FunctionType *
441	getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
442	auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Val: Suspend);
443	auto *StructTy = cast<StructType>(Val: AsyncSuspend->getType());
444	auto &Context = Suspend->getParent()->getParent()->getContext();
445	auto *VoidTy = Type::getVoidTy(C&: Context);
446	return FunctionType::get(Result: VoidTy, Params: StructTy->elements(), isVarArg: false);
447	}
448
449	static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
450	const Twine &Suffix,
451	Module::iterator InsertBefore,
452	AnyCoroSuspendInst *ActiveSuspend) {
453	Module *M = OrigF.getParent();
454	auto *FnTy = (Shape.ABI != coro::ABI::Async)
455	? Shape.getResumeFunctionType()
456	: getFunctionTypeFromAsyncSuspend(Suspend: ActiveSuspend);
457
458	Function *NewF =
459	Function::Create(Ty: FnTy, Linkage: GlobalValue::LinkageTypes::InternalLinkage,
460	N: OrigF.getName() + Suffix);
461
462	M->getFunctionList().insert(where: InsertBefore, New: NewF);
463
464	return NewF;
465	}
466
467	/// Replace uses of the active llvm.coro.suspend.retcon/async call with the
468	/// arguments to the continuation function.
469	///
470	/// This assumes that the builder has a meaningful insertion point.
471	void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() {
472	assert(Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce \|\|
473	Shape.ABI == coro::ABI::Async);
474
475	auto NewS = VMap [ActiveSuspend];
476	if (NewS ->use_empty())
477	return;
478
479	// Copy out all the continuation arguments after the buffer pointer into
480	// an easily-indexed data structure for convenience.
481	SmallVector<Value *, `8`> Args;
482	// The async ABI includes all arguments -- including the first argument.
483	bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
484	for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(x: NewF->arg_begin()),
485	E = NewF->arg_end();
486	I != E; ++I)
487	Args.push_back(Elt: &*I);
488
489	// If the suspend returns a single scalar value, we can just do a simple
490	// replacement.
491	if (!isa<StructType>(Val: NewS ->getType())) {
492	assert(Args.size() == `1`);
493	NewS ->replaceAllUsesWith(V: Args.front());
494	return;
495	}
496
497	// Try to peephole extracts of an aggregate return.
498	for (Use &U : llvm::make_early_inc_range(Range: NewS ->uses())) {
499	auto *EVI = dyn_cast<ExtractValueInst>(Val: U.getUser());
500	if (!EVI \|\| EVI->getNumIndices() != `1`)
501	continue;
502
503	EVI->replaceAllUsesWith(V: Args [EVI->getIndices().front()]);
504	EVI->eraseFromParent();
505	}
506
507	// If we have no remaining uses, we're done.
508	if (NewS ->use_empty())
509	return;
510
511	// Otherwise, we need to create an aggregate.
512	Value *Aggr = PoisonValue::get(T: NewS ->getType());
513	for (auto [Idx, Arg] : llvm::enumerate(First&: Args))
514	Aggr = Builder.CreateInsertValue(Agg: Aggr, Val: Arg, Idxs: Idx);
515
516	NewS ->replaceAllUsesWith(V: Aggr);
517	}
518
519	void coro::BaseCloner::replaceCoroSuspends() {
520	Value *SuspendResult;
521
522	switch (Shape.ABI) {
523	// In switch lowering, replace coro.suspend with the appropriate value
524	// for the type of function we're extracting.
525	// Replacing coro.suspend with (0) will result in control flow proceeding to
526	// a resume label associated with a suspend point, replacing it with (1) will
527	// result in control flow proceeding to a cleanup label associated with this
528	// suspend point.
529	case coro::ABI::Switch:
530	SuspendResult = Builder.getInt8(C: isSwitchDestroyFunction() ? `1` : `0`);
531	break;
532
533	// In async lowering there are no uses of the result.
534	case coro::ABI::Async:
535	return;
536
537	// In returned-continuation lowering, the arguments from earlier
538	// continuations are theoretically arbitrary, and they should have been
539	// spilled.
540	case coro::ABI::RetconOnce:
541	case coro::ABI::Retcon:
542	return;
543	}
544
545	for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
546	// The active suspend was handled earlier.
547	if (CS == ActiveSuspend)
548	continue;
549
550	auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap [CS]);
551	MappedCS->replaceAllUsesWith(V: SuspendResult);
552	MappedCS->eraseFromParent();
553	}
554	}
555
556	void coro::BaseCloner::replaceCoroEnds() {
557	for (AnyCoroEndInst *CE : Shape.CoroEnds) {
558	// We use a null call graph because there's no call graph node for
559	// the cloned function yet. We'll just be rebuilding that later.
560	auto *NewCE = cast<AnyCoroEndInst>(Val&: VMap [CE]);
561	replaceCoroEnd(End: NewCE, Shape, FramePtr: NewFramePtr, /in resume/ InResume: true, CG: nullptr);
562	}
563	}
564
565	static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
566	ValueToValueMapTy *VMap) {
567	if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
568	return;
569	Value CachedSlot = nullptr*;
570	auto getSwiftErrorSlot = [&](Type ValueTy) -> Value {
571	if (CachedSlot)
572	return CachedSlot;
573
574	// Check if the function has a swifterror argument.
575	for (auto &Arg : F.args()) {
576	if (Arg.isSwiftError()) {
577	CachedSlot = &Arg;
578	return &Arg;
579	}
580	}
581
582	// Create a swifterror alloca.
583	IRBuilder<> Builder(&F.getEntryBlock(),
584	F.getEntryBlock().getFirstNonPHIOrDbg());
585	auto Alloca = Builder.CreateAlloca(Ty: ValueTy);
586	Alloca->setSwiftError(true);
587
588	CachedSlot = Alloca;
589	return Alloca;
590	};
591
592	for (CallInst *Op : Shape.SwiftErrorOps) {
593	auto MappedOp = VMap ? cast<CallInst>(Val&: (*VMap)[Op]) : Op;
594	IRBuilder<> Builder(MappedOp);
595
596	// If there are no arguments, this is a 'get' operation.
597	Value *MappedResult;
598	if (Op->arg_empty()) {
599	auto ValueTy = Op->getType();
600	auto Slot = getSwiftErrorSlot (ValueTy);
601	MappedResult = Builder.CreateLoad(Ty: ValueTy, Ptr: Slot);
602	} else {
603	assert(Op->arg_size() == `1`);
604	auto Value = MappedOp->getArgOperand(i: `0`);
605	auto ValueTy = Value->getType();
606	auto Slot = getSwiftErrorSlot (ValueTy);
607	Builder.CreateStore(Val: Value, Ptr: Slot);
608	MappedResult = Slot;
609	}
610
611	MappedOp->replaceAllUsesWith(V: MappedResult);
612	MappedOp->eraseFromParent();
613	}
614
615	// If we're updating the original function, we've invalidated SwiftErrorOps.
616	if (VMap == nullptr) {
617	Shape.SwiftErrorOps.clear();
618	}
619	}
620
621	/// Returns all DbgVariableIntrinsic in F.
622	static std::pair<SmallVector<DbgVariableIntrinsic *, `8`>,
623	SmallVector<DbgVariableRecord *>>
624	collectDbgVariableIntrinsics(Function &F) {
625	SmallVector<DbgVariableIntrinsic *, `8`> Intrinsics;
626	SmallVector<DbgVariableRecord *> DbgVariableRecords;
627	for (auto &I : instructions(F)) {
628	for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange()))
629	DbgVariableRecords.push_back(Elt: &DVR);
630	if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I))
631	Intrinsics.push_back(Elt: DVI);
632	}
633	return {Intrinsics, DbgVariableRecords};
634	}
635
636	void coro::BaseCloner::replaceSwiftErrorOps() {
637	::replaceSwiftErrorOps(F&: *NewF, Shape, VMap: &VMap);
638	}
639
640	void coro::BaseCloner::salvageDebugInfo() {
641	auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(F&: *NewF);
642	SmallDenseMap<Argument , AllocaInst , `4`> ArgToAllocaMap;
643
644	// Only 64-bit ABIs have a register we can refer to with the entry value.
645	bool UseEntryValue = OrigF.getParent()->getTargetTriple().isArch64Bit();
646	for (DbgVariableIntrinsic *DVI : Worklist)
647	coro::salvageDebugInfo(ArgToAllocaMap, DVI&: *DVI, IsEntryPoint: UseEntryValue);
648	for (DbgVariableRecord *DVR : DbgVariableRecords)
649	coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, UseEntryValue);
650
651	// Remove all salvaged dbg.declare intrinsics that became
652	// either unreachable or stale due to the CoroSplit transformation.
653	DominatorTree DomTree(*NewF);
654	auto IsUnreachableBlock = [&](BasicBlock *BB) {
655	return !isPotentiallyReachable(From: &NewF->getEntryBlock(), To: BB, ExclusionSet: nullptr,
656	DT: &DomTree);
657	};
658	auto RemoveOne = [&](auto *DVI) {
659	if (IsUnreachableBlock(DVI->getParent()))
660	DVI->eraseFromParent();
661	else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(`0`))) {
662	// Count all non-debuginfo uses in reachable blocks.
663	unsigned Uses = `0`;
664	for (auto *User : DVI->getVariableLocationOp(`0`)->users())
665	if (auto *I = dyn_cast<Instruction>(User))
666	if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
667	++Uses;
668	if (!Uses)
669	DVI->eraseFromParent();
670	}
671	};
672	for_each(Range&: Worklist, F: RemoveOne);
673	for_each(Range&: DbgVariableRecords, F: RemoveOne);
674	}
675
676	void coro::BaseCloner::replaceEntryBlock() {
677	// In the original function, the AllocaSpillBlock is a block immediately
678	// following the allocation of the frame object which defines GEPs for
679	// all the allocas that have been moved into the frame, and it ends by
680	// branching to the original beginning of the coroutine. Make this
681	// the entry block of the cloned function.
682	auto *Entry = cast<BasicBlock>(Val&: VMap [Shape.AllocaSpillBlock]);
683	auto *OldEntry = &NewF->getEntryBlock();
684	Entry->setName("entry" + Suffix);
685	Entry->moveBefore(MovePos: OldEntry);
686	Entry->getTerminator()->eraseFromParent();
687
688	// Clear all predecessors of the new entry block. There should be
689	// exactly one predecessor, which we created when splitting out
690	// AllocaSpillBlock to begin with.
691	assert(Entry->hasOneUse());
692	auto BranchToEntry = cast<BranchInst>(Val: Entry->user_back());
693	assert(BranchToEntry->isUnconditional());
694	Builder.SetInsertPoint(BranchToEntry);
695	Builder.CreateUnreachable();
696	BranchToEntry->eraseFromParent();
697
698	// Branch from the entry to the appropriate place.
699	Builder.SetInsertPoint(Entry);
700	switch (Shape.ABI) {
701	case coro::ABI::Switch: {
702	// In switch-lowering, we built a resume-entry block in the original
703	// function. Make the entry block branch to this.
704	auto *SwitchBB =
705	cast<BasicBlock>(Val&: VMap [Shape.SwitchLowering.ResumeEntryBlock]);
706	Builder.CreateBr(Dest: SwitchBB);
707	SwitchBB->moveAfter(MovePos: Entry);
708	break;
709	}
710	case coro::ABI::Async:
711	case coro::ABI::Retcon:
712	case coro::ABI::RetconOnce: {
713	// In continuation ABIs, we want to branch to immediately after the
714	// active suspend point. Earlier phases will have put the suspend in its
715	// own basic block, so just thread our jump directly to its successor.
716	assert((Shape.ABI == coro::ABI::Async &&
717	isa<CoroSuspendAsyncInst>(ActiveSuspend)) \|\|
718	((Shape.ABI == coro::ABI::Retcon \|\|
719	Shape.ABI == coro::ABI::RetconOnce) &&
720	isa<CoroSuspendRetconInst>(ActiveSuspend)));
721	auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap [ActiveSuspend]);
722	auto Branch = cast<BranchInst>(Val: MappedCS->getNextNode());
723	assert(Branch->isUnconditional());
724	Builder.CreateBr(Dest: Branch->getSuccessor(i: `0`));
725	break;
726	}
727	}
728
729	// Any static alloca that's still being used but not reachable from the new
730	// entry needs to be moved to the new entry.
731	Function *F = OldEntry->getParent();
732	DominatorTree DT{*F};
733	for (Instruction &I : llvm::make_early_inc_range(Range: instructions(F))) {
734	auto *Alloca = dyn_cast<AllocaInst>(Val: &I);
735	if (!Alloca \|\| I.use_empty())
736	continue;
737	if (DT.isReachableFromEntry(A: I.getParent()) \|\|
738	!isa<ConstantInt>(Val: Alloca->getArraySize()))
739	continue;
740	I.moveBefore(BB&: *Entry, I: Entry->getFirstInsertionPt());
741	}
742	}
743
744	/// Derive the value of the new frame pointer.
745	Value *coro::BaseCloner::deriveNewFramePointer() {
746	// Builder should be inserting to the front of the new entry block.
747
748	switch (Shape.ABI) {
749	// In switch-lowering, the argument is the frame pointer.
750	case coro::ABI::Switch:
751	return &*NewF->arg_begin();
752	// In async-lowering, one of the arguments is an async context as determined
753	// by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
754	// the resume function from the async context projection function associated
755	// with the active suspend. The frame is located as a tail to the async
756	// context header.
757	case coro::ABI::Async: {
758	auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
759	auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & `0xff`;
760	auto *CalleeContext = NewF->getArg(i: ContextIdx);
761	auto *ProjectionFunc =
762	ActiveAsyncSuspend->getAsyncContextProjectionFunction();
763	auto DbgLoc =
764	cast<CoroSuspendAsyncInst>(Val&: VMap [ActiveSuspend])->getDebugLoc();
765	// Calling i8 (i8)
766	auto *CallerContext = Builder.CreateCall(FTy: ProjectionFunc->getFunctionType(),
767	Callee: ProjectionFunc, Args: CalleeContext);
768	CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
769	CallerContext->setDebugLoc(DbgLoc);
770	// The frame is located after the async_context header.
771	auto &Context = Builder.getContext();
772	auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
773	Ty: Type::getInt8Ty(C&: Context), Ptr: CallerContext,
774	Idx0: Shape.AsyncLowering.FrameOffset, Name: "async.ctx.frameptr");
775	// Inline the projection function.
776	InlineFunctionInfo InlineInfo;
777	auto InlineRes = InlineFunction(CB&: *CallerContext, IFI&: InlineInfo);
778	assert(InlineRes.isSuccess());
779	(void)InlineRes;
780	return FramePtrAddr;
781	}
782	// In continuation-lowering, the argument is the opaque storage.
783	case coro::ABI::Retcon:
784	case coro::ABI::RetconOnce: {
785	Argument NewStorage = &NewF->arg_begin();
786	auto FramePtrTy = PointerType::getUnqual(C&: Shape.FrameTy->getContext());
787
788	// If the storage is inline, just bitcast to the storage to the frame type.
789	if (Shape.RetconLowering.IsFrameInlineInStorage)
790	return NewStorage;
791
792	// Otherwise, load the real frame from the opaque storage.
793	return Builder.CreateLoad(Ty: FramePtrTy, Ptr: NewStorage);
794	}
795	}
796	llvm_unreachable("bad ABI");
797	}
798
799	/// Adjust the scope line of the funclet to the first line number after the
800	/// suspend point. This avoids a jump in the line table from the function
801	/// declaration (where prologue instructions are attributed to) to the suspend
802	/// point.
803	/// Only adjust the scope line when the files are the same.
804	/// If no candidate line number is found, fallback to the line of ActiveSuspend.
805	static void updateScopeLine(Instruction *ActiveSuspend,
806	DISubprogram &SPToUpdate) {
807	if (!ActiveSuspend)
808	return;
809
810	// No subsequent instruction -> fallback to the location of ActiveSuspend.
811	if (!ActiveSuspend->getNextNonDebugInstruction()) {
812	if (auto DL = ActiveSuspend->getDebugLoc())
813	if (SPToUpdate.getFile() == DL ->getFile())
814	SPToUpdate.setScopeLine(DL ->getLine());
815	return;
816	}
817
818	BasicBlock::iterator Successor =
819	ActiveSuspend->getNextNonDebugInstruction()->getIterator();
820	// Corosplit splits the BB around ActiveSuspend, so the meaningful
821	// instructions are not in the same BB.
822	if (auto *Branch = dyn_cast_or_null<BranchInst>(Val&: Successor);
823	Branch && Branch->isUnconditional())
824	Successor = Branch->getSuccessor(i: `0`)->getFirstNonPHIOrDbg();
825
826	// Find the first successor of ActiveSuspend with a non-zero line location.
827	// If that matches the file of ActiveSuspend, use it.
828	BasicBlock *PBB = Successor ->getParent();
829	for (; Successor != PBB->end(); Successor = std::next(x: Successor)) {
830	Successor = skipDebugIntrinsics(It: Successor);
831	auto DL = Successor ->getDebugLoc();
832	if (!DL \|\| DL.getLine() == `0`)
833	continue;
834
835	if (SPToUpdate.getFile() == DL ->getFile()) {
836	SPToUpdate.setScopeLine(DL.getLine());
837	return;
838	}
839
840	break;
841	}
842
843	// If the search above failed, fallback to the location of ActiveSuspend.
844	if (auto DL = ActiveSuspend->getDebugLoc())
845	if (SPToUpdate.getFile() == DL ->getFile())
846	SPToUpdate.setScopeLine(DL ->getLine());
847	}
848
849	static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
850	unsigned ParamIndex, uint64_t Size,
851	Align Alignment, bool NoAlias) {
852	AttrBuilder ParamAttrs(Context);
853	ParamAttrs.addAttribute(Val: Attribute::NonNull);
854	ParamAttrs.addAttribute(Val: Attribute::NoUndef);
855
856	if (NoAlias)
857	ParamAttrs.addAttribute(Val: Attribute::NoAlias);
858
859	ParamAttrs.addAlignmentAttr(Align: Alignment);
860	ParamAttrs.addDereferenceableAttr(Bytes: Size);
861	Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
862	}
863
864	static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
865	unsigned ParamIndex) {
866	AttrBuilder ParamAttrs(Context);
867	ParamAttrs.addAttribute(Val: Attribute::SwiftAsync);
868	Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
869	}
870
871	static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
872	unsigned ParamIndex) {
873	AttrBuilder ParamAttrs(Context);
874	ParamAttrs.addAttribute(Val: Attribute::SwiftSelf);
875	Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
876	}
877
878	/// Clone the body of the original function into a resume function of
879	/// some sort.
880	void coro::BaseCloner::create() {
881	assert(NewF);
882
883	// Replace all args with dummy instructions. If an argument is the old frame
884	// pointer, the dummy will be replaced by the new frame pointer once it is
885	// computed below. Uses of all other arguments should have already been
886	// rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
887	// frame.
888	SmallVector<Instruction *> DummyArgs;
889	for (Argument &A : OrigF.args()) {
890	DummyArgs.push_back(Elt: new FreezeInst (PoisonValue::get(T: A.getType())));
891	VMap [&A] = DummyArgs.back();
892	}
893
894	SmallVector<ReturnInst *, `4`> Returns;
895
896	// Ignore attempts to change certain attributes of the function.
897	// TODO: maybe there should be a way to suppress this during cloning?
898	auto savedVisibility = NewF->getVisibility();
899	auto savedUnnamedAddr = NewF->getUnnamedAddr();
900	auto savedDLLStorageClass = NewF->getDLLStorageClass();
901
902	// NewF's linkage (which CloneFunctionInto does not* change) might not*
903	// be compatible with the visibility of OrigF (which it does* change),*
904	// so protect against that.
905	auto savedLinkage = NewF->getLinkage();
906	NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
907
908	CloneFunctionInto(NewFunc: NewF, OldFunc: &OrigF, VMap,
909	Changes: CloneFunctionChangeType::LocalChangesOnly, Returns);
910
911	auto &Context = NewF->getContext();
912
913	if (DISubprogram *SP = NewF->getSubprogram()) {
914	assert(SP != OrigF.getSubprogram() && SP->isDistinct());
915	updateScopeLine(ActiveSuspend, SPToUpdate&: *SP);
916
917	// Update the linkage name and the function name to reflect the modified
918	// name.
919	MDString *NewLinkageName = MDString::get(Context, Str: NewF->getName());
920	SP->replaceLinkageName(LN: NewLinkageName);
921	if (DISubprogram *Decl = SP->getDeclaration()) {
922	TempDISubprogram NewDecl = Decl->clone();
923	NewDecl ->replaceLinkageName(LN: NewLinkageName);
924	SP->replaceDeclaration(Decl: MDNode::replaceWithUniqued(N: std::move(NewDecl)));
925	}
926	}
927
928	NewF->setLinkage(savedLinkage);
929	NewF->setVisibility(savedVisibility);
930	NewF->setUnnamedAddr(savedUnnamedAddr);
931	NewF->setDLLStorageClass(savedDLLStorageClass);
932	// The function sanitizer metadata needs to match the signature of the
933	// function it is being attached to. However this does not hold for split
934	// functions here. Thus remove the metadata for split functions.
935	if (Shape.ABI == coro::ABI::Switch &&
936	NewF->hasMetadata(KindID: LLVMContext::MD_func_sanitize))
937	NewF->eraseMetadata(KindID: LLVMContext::MD_func_sanitize);
938
939	// Replace the attributes of the new function:
940	auto OrigAttrs = NewF->getAttributes();
941	auto NewAttrs = AttributeList ();
942
943	switch (Shape.ABI) {
944	case coro::ABI::Switch:
945	// Bootstrap attributes by copying function attributes from the
946	// original function. This should include optimization settings and so on.
947	NewAttrs = NewAttrs.addFnAttributes(
948	C&: Context, B: AttrBuilder (Context, OrigAttrs.getFnAttrs()));
949
950	addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: `0`, Size: Shape.FrameSize,
951	Alignment: Shape.FrameAlign, /NoAlias=/false);
952	break;
953	case coro::ABI::Async: {
954	auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
955	if (OrigF.hasParamAttribute(ArgNo: Shape.AsyncLowering.ContextArgNo,
956	Kind: Attribute::SwiftAsync)) {
957	uint32_t ArgAttributeIndices =
958	ActiveAsyncSuspend->getStorageArgumentIndex();
959	auto ContextArgIndex = ArgAttributeIndices & `0xff`;
960	addAsyncContextAttrs(Attrs&: NewAttrs, Context, ParamIndex: ContextArgIndex);
961
962	// `swiftasync` must preceed `swiftself` so 0 is not a valid index for
963	// `swiftself`.
964	auto SwiftSelfIndex = ArgAttributeIndices >> `8`;
965	if (SwiftSelfIndex)
966	addSwiftSelfAttrs(Attrs&: NewAttrs, Context, ParamIndex: SwiftSelfIndex);
967	}
968
969	// Transfer the original function's attributes.
970	auto FnAttrs = OrigF.getAttributes().getFnAttrs();
971	NewAttrs = NewAttrs.addFnAttributes(C&: Context, B: AttrBuilder (Context, FnAttrs));
972	break;
973	}
974	case coro::ABI::Retcon:
975	case coro::ABI::RetconOnce:
976	// If we have a continuation prototype, just use its attributes,
977	// full-stop.
978	NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
979
980	/// FIXME: Is it really good to add the NoAlias attribute?
981	addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: `0`,
982	Size: Shape.getRetconCoroId()->getStorageSize(),
983	Alignment: Shape.getRetconCoroId()->getStorageAlignment(),
984	/NoAlias=/true);
985
986	break;
987	}
988
989	switch (Shape.ABI) {
990	// In these ABIs, the cloned functions always return 'void', and the
991	// existing return sites are meaningless. Note that for unique
992	// continuations, this includes the returns associated with suspends;
993	// this is fine because we can't suspend twice.
994	case coro::ABI::Switch:
995	case coro::ABI::RetconOnce:
996	// Remove old returns.
997	for (ReturnInst *Return : Returns)
998	changeToUnreachable(I: Return);
999	break;
1000
1001	// With multi-suspend continuations, we'll already have eliminated the
1002	// original returns and inserted returns before all the suspend points,
1003	// so we want to leave any returns in place.
1004	case coro::ABI::Retcon:
1005	break;
1006	// Async lowering will insert musttail call functions at all suspend points
1007	// followed by a return.
1008	// Don't change returns to unreachable because that will trip up the verifier.
1009	// These returns should be unreachable from the clone.
1010	case coro::ABI::Async:
1011	break;
1012	}
1013
1014	NewF->setAttributes(NewAttrs);
1015	NewF->setCallingConv(Shape.getResumeFunctionCC());
1016
1017	// Set up the new entry block.
1018	replaceEntryBlock();
1019
1020	// Turn symmetric transfers into musttail calls.
1021	for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
1022	ResumeCall = cast<CallInst>(Val&: VMap [ResumeCall]);
1023	if (TTI.supportsTailCallFor(CB: ResumeCall)) {
1024	// FIXME: Could we support symmetric transfer effectively without
1025	// musttail?
1026	ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
1027	}
1028
1029	// Put a 'ret void' after the call, and split any remaining instructions to
1030	// an unreachable block.
1031	BasicBlock *BB = ResumeCall->getParent();
1032	BB->splitBasicBlock(I: ResumeCall->getNextNode());
1033	Builder.SetInsertPoint(BB->getTerminator());
1034	Builder.CreateRetVoid();
1035	BB->getTerminator()->eraseFromParent();
1036	}
1037
1038	Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1039	NewFramePtr = deriveNewFramePointer();
1040
1041	// Remap frame pointer.
1042	Value *OldFramePtr = VMap [Shape.FramePtr];
1043	NewFramePtr->takeName(V: OldFramePtr);
1044	OldFramePtr->replaceAllUsesWith(V: NewFramePtr);
1045
1046	// Remap vFrame pointer.
1047	auto *NewVFrame = Builder.CreateBitCast(
1048	V: NewFramePtr, DestTy: PointerType::getUnqual(C&: Builder.getContext()), Name: "vFrame");
1049	Value *OldVFrame = cast<Value>(Val&: VMap [Shape.CoroBegin]);
1050	if (OldVFrame != NewVFrame)
1051	OldVFrame->replaceAllUsesWith(V: NewVFrame);
1052
1053	// All uses of the arguments should have been resolved by this point,
1054	// so we can safely remove the dummy values.
1055	for (Instruction *DummyArg : DummyArgs) {
1056	DummyArg->replaceAllUsesWith(V: PoisonValue::get(T: DummyArg->getType()));
1057	DummyArg->deleteValue();
1058	}
1059
1060	switch (Shape.ABI) {
1061	case coro::ABI::Switch:
1062	// Rewrite final suspend handling as it is not done via switch (allows to
1063	// remove final case from the switch, since it is undefined behavior to
1064	// resume the coroutine suspended at the final suspend point.
1065	if (Shape.SwitchLowering.HasFinalSuspend)
1066	handleFinalSuspend();
1067	break;
1068	case coro::ABI::Async:
1069	case coro::ABI::Retcon:
1070	case coro::ABI::RetconOnce:
1071	// Replace uses of the active suspend with the corresponding
1072	// continuation-function arguments.
1073	assert(ActiveSuspend != nullptr &&
1074	"no active suspend when lowering a continuation-style coroutine");
1075	replaceRetconOrAsyncSuspendUses();
1076	break;
1077	}
1078
1079	// Handle suspends.
1080	replaceCoroSuspends();
1081
1082	// Handle swifterror.
1083	replaceSwiftErrorOps();
1084
1085	// Remove coro.end intrinsics.
1086	replaceCoroEnds();
1087
1088	// Salvage debug info that points into the coroutine frame.
1089	salvageDebugInfo();
1090	}
1091
1092	void coro::SwitchCloner::create() {
1093	// Create a new function matching the original type
1094	NewF = createCloneDeclaration(OrigF, Shape, Suffix, InsertBefore: OrigF.getParent()->end(),
1095	ActiveSuspend);
1096
1097	// Clone the function
1098	coro::BaseCloner::create();
1099
1100	// Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1101	// to suppress deallocation code.
1102	coro::replaceCoroFree(CoroId: cast<CoroIdInst>(Val&: VMap [Shape.CoroBegin->getId()]),
1103	/Elide=/FKind == coro::CloneKind::SwitchCleanup);
1104	}
1105
1106	static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1107	assert(Shape.ABI == coro::ABI::Async);
1108
1109	auto *FuncPtrStruct = cast<ConstantStruct>(
1110	Val: Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1111	auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(i_nocapture: `0`);
1112	auto *OrigContextSize = FuncPtrStruct->getOperand(i_nocapture: `1`);
1113	auto *NewContextSize = ConstantInt::get(Ty: OrigContextSize->getType(),
1114	V: Shape.AsyncLowering.ContextSize);
1115	auto *NewFuncPtrStruct = ConstantStruct::get(
1116	T: FuncPtrStruct->getType(), Vs: OrigRelativeFunOffset, Vs: NewContextSize);
1117
1118	Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1119	}
1120
1121	static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
1122	// In the same function all coro.sizes should have the same result type.
1123	auto *SizeIntrin = Shape.CoroSizes.back();
1124	Module *M = SizeIntrin->getModule();
1125	const DataLayout &DL = M->getDataLayout();
1126	return DL.getTypeAllocSize(Ty: Shape.FrameTy);
1127	}
1128
1129	static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1130	if (Shape.ABI == coro::ABI::Async)
1131	updateAsyncFuncPointerContextSize(Shape);
1132
1133	for (CoroAlignInst *CA : Shape.CoroAligns) {
1134	CA->replaceAllUsesWith(
1135	V: ConstantInt::get(Ty: CA->getType(), V: Shape.FrameAlign.value()));
1136	CA->eraseFromParent();
1137	}
1138
1139	if (Shape.CoroSizes.empty())
1140	return;
1141
1142	// In the same function all coro.sizes should have the same result type.
1143	auto *SizeIntrin = Shape.CoroSizes.back();
1144	auto *SizeConstant =
1145	ConstantInt::get(Ty: SizeIntrin->getType(), V: getFrameSizeForShape(Shape));
1146
1147	for (CoroSizeInst *CS : Shape.CoroSizes) {
1148	CS->replaceAllUsesWith(V: SizeConstant);
1149	CS->eraseFromParent();
1150	}
1151	}
1152
1153	static void postSplitCleanup(Function &F) {
1154	removeUnreachableBlocks(F);
1155
1156	#ifndef NDEBUG
1157	// For now, we do a mandatory verification step because we don't
1158	// entirely trust this pass. Note that we don't want to add a verifier
1159	// pass to FPM below because it will also verify all the global data.
1160	if (verifyFunction(F, &errs()))
1161	report_fatal_error("Broken function");
1162	#endif
1163	}
1164
1165	// Coroutine has no suspend points. Remove heap allocation for the coroutine
1166	// frame if possible.
1167	static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1168	auto *CoroBegin = Shape.CoroBegin;
1169	switch (Shape.ABI) {
1170	case coro::ABI::Switch: {
1171	auto SwitchId = Shape.getSwitchCoroId();
1172	auto *AllocInst = SwitchId->getCoroAlloc();
1173	coro::replaceCoroFree(CoroId: SwitchId, /Elide=/AllocInst != nullptr);
1174	if (AllocInst) {
1175	IRBuilder<> Builder(AllocInst);
1176	auto *Frame = Builder.CreateAlloca(Ty: Shape.FrameTy);
1177	Frame->setAlignment(Shape.FrameAlign);
1178	AllocInst->replaceAllUsesWith(V: Builder.getFalse());
1179	AllocInst->eraseFromParent();
1180	CoroBegin->replaceAllUsesWith(V: Frame);
1181	} else {
1182	CoroBegin->replaceAllUsesWith(V: CoroBegin->getMem());
1183	}
1184
1185	break;
1186	}
1187	case coro::ABI::Async:
1188	case coro::ABI::Retcon:
1189	case coro::ABI::RetconOnce:
1190	CoroBegin->replaceAllUsesWith(V: PoisonValue::get(T: CoroBegin->getType()));
1191	break;
1192	}
1193
1194	CoroBegin->eraseFromParent();
1195	Shape.CoroBegin = nullptr;
1196	}
1197
1198	// SimplifySuspendPoint needs to check that there is no calls between
1199	// coro_save and coro_suspend, since any of the calls may potentially resume
1200	// the coroutine and if that is the case we cannot eliminate the suspend point.
1201	static bool hasCallsInBlockBetween(iterator_range<BasicBlock::iterator> R) {
1202	for (Instruction &I : R) {
1203	// Assume that no intrinsic can resume the coroutine.
1204	if (isa<IntrinsicInst>(Val: I))
1205	continue;
1206
1207	if (isa<CallBase>(Val: I))
1208	return true;
1209	}
1210	return false;
1211	}
1212
1213	static bool hasCallsInBlocksBetween(BasicBlock SaveBB, BasicBlock ResDesBB) {
1214	SmallPtrSet<BasicBlock *, `8`> Set;
1215	SmallVector<BasicBlock *, `8`> Worklist;
1216
1217	Set.insert(Ptr: SaveBB);
1218	Worklist.push_back(Elt: ResDesBB);
1219
1220	// Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1221	// returns a token consumed by suspend instruction, all blocks in between
1222	// will have to eventually hit SaveBB when going backwards from ResDesBB.
1223	while (!Worklist.empty()) {
1224	auto *BB = Worklist.pop_back_val();
1225	Set.insert(Ptr: BB);
1226	for (auto *Pred : predecessors(BB))
1227	if (!Set.contains(Ptr: Pred))
1228	Worklist.push_back(Elt: Pred);
1229	}
1230
1231	// SaveBB and ResDesBB are checked separately in hasCallsBetween.
1232	Set.erase(Ptr: SaveBB);
1233	Set.erase(Ptr: ResDesBB);
1234
1235	for (auto *BB : Set)
1236	if (hasCallsInBlockBetween(R: {BB->getFirstNonPHIIt(), BB->end()}))
1237	return true;
1238
1239	return false;
1240	}
1241
1242	static bool hasCallsBetween(Instruction Save, Instruction ResumeOrDestroy) {
1243	auto *SaveBB = Save->getParent();
1244	auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1245	BasicBlock::iterator SaveIt = Save->getIterator();
1246	BasicBlock::iterator ResumeOrDestroyIt = ResumeOrDestroy->getIterator();
1247
1248	if (SaveBB == ResumeOrDestroyBB)
1249	return hasCallsInBlockBetween(R: {std::next(x: SaveIt), ResumeOrDestroyIt});
1250
1251	// Any calls from Save to the end of the block?
1252	if (hasCallsInBlockBetween(R: {std::next(x: SaveIt), SaveBB->end()}))
1253	return true;
1254
1255	// Any calls from begging of the block up to ResumeOrDestroy?
1256	if (hasCallsInBlockBetween(
1257	R: {ResumeOrDestroyBB->getFirstNonPHIIt(), ResumeOrDestroyIt}))
1258	return true;
1259
1260	// Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1261	if (hasCallsInBlocksBetween(SaveBB, ResDesBB: ResumeOrDestroyBB))
1262	return true;
1263
1264	return false;
1265	}
1266
1267	// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1268	// suspend point and replace it with nornal control flow.
1269	static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1270	CoroBeginInst *CoroBegin) {
1271	Instruction *Prev = Suspend->getPrevNode();
1272	if (!Prev) {
1273	auto *Pred = Suspend->getParent()->getSinglePredecessor();
1274	if (!Pred)
1275	return false;
1276	Prev = Pred->getTerminator();
1277	}
1278
1279	CallBase *CB = dyn_cast<CallBase>(Val: Prev);
1280	if (!CB)
1281	return false;
1282
1283	auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1284
1285	// See if the callsite is for resumption or destruction of the coroutine.
1286	auto *SubFn = dyn_cast<CoroSubFnInst>(Val: Callee);
1287	if (!SubFn)
1288	return false;
1289
1290	// Does not refer to the current coroutine, we cannot do anything with it.
1291	if (SubFn->getFrame() != CoroBegin)
1292	return false;
1293
1294	// See if the transformation is safe. Specifically, see if there are any
1295	// calls in between Save and CallInstr. They can potenitally resume the
1296	// coroutine rendering this optimization unsafe.
1297	auto *Save = Suspend->getCoroSave();
1298	if (hasCallsBetween(Save, ResumeOrDestroy: CB))
1299	return false;
1300
1301	// Replace llvm.coro.suspend with the value that results in resumption over
1302	// the resume or cleanup path.
1303	Suspend->replaceAllUsesWith(V: SubFn->getRawIndex());
1304	Suspend->eraseFromParent();
1305	Save->eraseFromParent();
1306
1307	// No longer need a call to coro.resume or coro.destroy.
1308	if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) {
1309	BranchInst::Create(IfTrue: Invoke->getNormalDest(), InsertBefore: Invoke->getIterator());
1310	}
1311
1312	// Grab the CalledValue from CB before erasing the CallInstr.
1313	auto *CalledValue = CB->getCalledOperand();
1314	CB->eraseFromParent();
1315
1316	// If no more users remove it. Usually it is a bitcast of SubFn.
1317	if (CalledValue != SubFn && CalledValue->user_empty())
1318	if (auto *I = dyn_cast<Instruction>(Val: CalledValue))
1319	I->eraseFromParent();
1320
1321	// Now we are good to remove SubFn.
1322	if (SubFn->user_empty())
1323	SubFn->eraseFromParent();
1324
1325	return true;
1326	}
1327
1328	// Remove suspend points that are simplified.
1329	static void simplifySuspendPoints(coro::Shape &Shape) {
1330	// Currently, the only simplification we do is switch-lowering-specific.
1331	if (Shape.ABI != coro::ABI::Switch)
1332	return;
1333
1334	auto &S = Shape.CoroSuspends;
1335	size_t I = `0`, N = S.size();
1336	if (N == `0`)
1337	return;
1338
1339	size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1340	while (true) {
1341	auto SI = cast<CoroSuspendInst>(Val: S [I]);
1342	// Leave final.suspend to handleFinalSuspend since it is undefined behavior
1343	// to resume a coroutine suspended at the final suspend point.
1344	if (!SI->isFinal() && simplifySuspendPoint(Suspend: SI, CoroBegin: Shape.CoroBegin)) {
1345	if (--N == I)
1346	break;
1347
1348	std::swap(a&: S [I], b&: S [N]);
1349
1350	if (cast<CoroSuspendInst>(Val: S [I])->isFinal()) {
1351	assert(Shape.SwitchLowering.HasFinalSuspend);
1352	ChangedFinalIndex = I;
1353	}
1354
1355	continue;
1356	}
1357	if (++I == N)
1358	break;
1359	}
1360	S.resize(N);
1361
1362	// Maintain final.suspend in case final suspend was swapped.
1363	// Due to we requrie the final suspend to be the last element of CoroSuspends.
1364	if (ChangedFinalIndex < N) {
1365	assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1366	std::swap(a&: S [ChangedFinalIndex], b&: S.back());
1367	}
1368	}
1369
1370	namespace {
1371
1372	struct SwitchCoroutineSplitter {
1373	static void split(Function &F, coro::Shape &Shape,
1374	SmallVectorImpl<Function *> &Clones,
1375	TargetTransformInfo &TTI) {
1376	assert(Shape.ABI == coro::ABI::Switch);
1377
1378	// Create a resume clone by cloning the body of the original function,
1379	// setting new entry block and replacing coro.suspend an appropriate value
1380	// to force resume or cleanup pass for every suspend point.
1381	createResumeEntryBlock(F, Shape);
1382	auto *ResumeClone = coro::SwitchCloner::createClone(
1383	OrigF&: F, Suffix: ".resume", Shape, FKind: coro::CloneKind::SwitchResume, TTI);
1384	auto *DestroyClone = coro::SwitchCloner::createClone(
1385	OrigF&: F, Suffix: ".destroy", Shape, FKind: coro::CloneKind::SwitchUnwind, TTI);
1386	auto *CleanupClone = coro::SwitchCloner::createClone(
1387	OrigF&: F, Suffix: ".cleanup", Shape, FKind: coro::CloneKind::SwitchCleanup, TTI);
1388
1389	postSplitCleanup(F&: *ResumeClone);
1390	postSplitCleanup(F&: *DestroyClone);
1391	postSplitCleanup(F&: *CleanupClone);
1392
1393	// Store addresses resume/destroy/cleanup functions in the coroutine frame.
1394	updateCoroFrame(Shape, ResumeFn: ResumeClone, DestroyFn: DestroyClone, CleanupFn: CleanupClone);
1395
1396	assert(Clones.empty());
1397	Clones.push_back(Elt: ResumeClone);
1398	Clones.push_back(Elt: DestroyClone);
1399	Clones.push_back(Elt: CleanupClone);
1400
1401	// Create a constant array referring to resume/destroy/clone functions
1402	// pointed by the last argument of @llvm.coro.info, so that CoroElide pass
1403	// can determined correct function to call.
1404	setCoroInfo(F, Shape, Fns: Clones);
1405	}
1406
1407	// Create a variant of ramp function that does not perform heap allocation
1408	// for a switch ABI coroutine.
1409	//
1410	// The newly split `.noalloc` ramp function has the following differences:
1411	// - Has one additional frame pointer parameter in lieu of dynamic
1412	// allocation.
1413	// - Suppressed allocations by replacing coro.alloc and coro.free.
1414	static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
1415	SmallVectorImpl<Function *> &Clones) {
1416	assert(Shape.ABI == coro::ABI::Switch);
1417	auto *OrigFnTy = F.getFunctionType();
1418	auto OldParams = OrigFnTy->params();
1419
1420	SmallVector<Type *> NewParams;
1421	NewParams.reserve(N: OldParams.size() + `1`);
1422	NewParams.append(in_start: OldParams.begin(), in_end: OldParams.end());
1423	NewParams.push_back(Elt: PointerType::getUnqual(C&: Shape.FrameTy->getContext()));
1424
1425	auto *NewFnTy = FunctionType::get(Result: OrigFnTy->getReturnType(), Params: NewParams,
1426	isVarArg: OrigFnTy->isVarArg());
1427	Function *NoAllocF =
1428	Function::Create(Ty: NewFnTy, Linkage: F.getLinkage(), N: F.getName() + ".noalloc");
1429
1430	ValueToValueMapTy VMap;
1431	unsigned int Idx = `0`;
1432	for (const auto &I : F.args()) {
1433	VMap [&I] = NoAllocF->getArg(i: Idx++);
1434	}
1435	// We just appended the frame pointer as the last argument of the new
1436	// function.
1437	auto FrameIdx = NoAllocF->arg_size() - `1`;
1438	SmallVector<ReturnInst *, `4`> Returns;
1439	CloneFunctionInto(NewFunc: NoAllocF, OldFunc: &F, VMap,
1440	Changes: CloneFunctionChangeType::LocalChangesOnly, Returns);
1441
1442	if (Shape.CoroBegin) {
1443	auto *NewCoroBegin =
1444	cast_if_present<CoroBeginInst>(Val&: VMap [Shape.CoroBegin]);
1445	auto *NewCoroId = cast<CoroIdInst>(Val: NewCoroBegin->getId());
1446	coro::replaceCoroFree(CoroId: NewCoroId, /Elide=/true);
1447	coro::suppressCoroAllocs(CoroId: NewCoroId);
1448	NewCoroBegin->replaceAllUsesWith(V: NoAllocF->getArg(i: FrameIdx));
1449	NewCoroBegin->eraseFromParent();
1450	}
1451
1452	Module *M = F.getParent();
1453	M->getFunctionList().insert(where: M->end(), New: NoAllocF);
1454
1455	removeUnreachableBlocks(F&: *NoAllocF);
1456	auto NewAttrs = NoAllocF->getAttributes();
1457	// When we elide allocation, we read these attributes to determine the
1458	// frame size and alignment.
1459	addFramePointerAttrs(Attrs&: NewAttrs, Context&: NoAllocF->getContext(), ParamIndex: FrameIdx,
1460	Size: Shape.FrameSize, Alignment: Shape.FrameAlign,
1461	/NoAlias=/false);
1462
1463	NoAllocF->setAttributes(NewAttrs);
1464
1465	Clones.push_back(Elt: NoAllocF);
1466	// Reset the original function's coro info, make the new noalloc variant
1467	// connected to the original ramp function.
1468	setCoroInfo(F, Shape, Fns: Clones);
1469	// After copying, set the linkage to internal linkage. Original function
1470	// may have different linkage, but optimization dependent on this function
1471	// generally relies on LTO.
1472	NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage);
1473	return NoAllocF;
1474	}
1475
1476	private:
1477	// Create an entry block for a resume function with a switch that will jump to
1478	// suspend points.
1479	static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
1480	LLVMContext &C = F.getContext();
1481
1482	DIBuilder DBuilder(F.getParent(), /AllowUnresolved/* false);
1483	DISubprogram *DIS = F.getSubprogram();
1484	// If there is no DISubprogram for F, it implies the function is compiled
1485	// without debug info. So we also don't generate debug info for the
1486	// suspension points.
1487	bool AddDebugLabels =
1488	(DIS && DIS->getUnit() &&
1489	(DIS->getUnit()->getEmissionKind() ==
1490	DICompileUnit::DebugEmissionKind::FullDebug \|\|
1491	DIS->getUnit()->getEmissionKind() ==
1492	DICompileUnit::DebugEmissionKind::LineTablesOnly));
1493
1494	// resume.entry:
1495	// %index.addr = getelementptr inbounds %f.Frame, %f.Frame %FramePtr, i32*
1496	// 0, i32 2 % index = load i32, i32 %index.addr switch i32 %index, label*
1497	// %unreachable [
1498	// i32 0, label %resume.0
1499	// i32 1, label %resume.1
1500	// ...
1501	// ]
1502
1503	auto *NewEntry = BasicBlock::Create(Context&: C, Name: "resume.entry", Parent: &F);
1504	auto *UnreachBB = BasicBlock::Create(Context&: C, Name: "unreachable", Parent: &F);
1505
1506	IRBuilder<> Builder(NewEntry);
1507	auto *FramePtr = Shape.FramePtr;
1508	auto *FrameTy = Shape.FrameTy;
1509	auto *GepIndex = Builder.CreateStructGEP(
1510	Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
1511	auto *Index = Builder.CreateLoad(Ty: Shape.getIndexType(), Ptr: GepIndex, Name: "index");
1512	auto *Switch =
1513	Builder.CreateSwitch(V: Index, Dest: UnreachBB, NumCases: Shape.CoroSuspends.size());
1514	Shape.SwitchLowering.ResumeSwitch = Switch;
1515
1516	// Split all coro.suspend calls
1517	size_t SuspendIndex = `0`;
1518	for (auto *AnyS : Shape.CoroSuspends) {
1519	auto *S = cast<CoroSuspendInst>(Val: AnyS);
1520	ConstantInt *IndexVal = Shape.getIndex(Value: SuspendIndex);
1521
1522	// Replace CoroSave with a store to Index:
1523	// %index.addr = getelementptr %f.frame... (index field number)
1524	// store i32 %IndexVal, i32 %index.addr1*
1525	auto *Save = S->getCoroSave();
1526	Builder.SetInsertPoint(Save);
1527	if (S->isFinal()) {
1528	// The coroutine should be marked done if it reaches the final suspend
1529	// point.
1530	markCoroutineAsDone(Builder, Shape, FramePtr);
1531	} else {
1532	auto *GepIndex = Builder.CreateStructGEP(
1533	Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
1534	Builder.CreateStore(Val: IndexVal, Ptr: GepIndex);
1535	}
1536
1537	Save->replaceAllUsesWith(V: ConstantTokenNone::get(Context&: C));
1538	Save->eraseFromParent();
1539
1540	// Split block before and after coro.suspend and add a jump from an entry
1541	// switch:
1542	//
1543	// whateverBB:
1544	// whatever
1545	// %0 = call i8 @llvm.coro.suspend(token none, i1 false)
1546	// switch i8 %0, label %suspend[i8 0, label %resume
1547	// i8 1, label %cleanup]
1548	// becomes:
1549	//
1550	// whateverBB:
1551	// whatever
1552	// br label %resume.0.landing
1553	//
1554	// resume.0: ; <--- jump from the switch in the resume.entry
1555	// #dbg_label(...) ; <--- artificial label for debuggers
1556	// %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
1557	// br label %resume.0.landing
1558	//
1559	// resume.0.landing:
1560	// %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
1561	// switch i8 % 1, label %suspend [i8 0, label %resume
1562	// i8 1, label %cleanup]
1563
1564	auto *SuspendBB = S->getParent();
1565	auto *ResumeBB =
1566	SuspendBB->splitBasicBlock(I: S, BBName: "resume." + Twine (SuspendIndex));
1567	auto *LandingBB = ResumeBB->splitBasicBlock(
1568	I: S->getNextNode(), BBName: ResumeBB->getName() + Twine (".landing"));
1569	Switch->addCase(OnVal: IndexVal, Dest: ResumeBB);
1570
1571	cast<BranchInst>(Val: SuspendBB->getTerminator())->setSuccessor(idx: `0`, NewSucc: LandingBB);
1572	auto *PN = PHINode::Create(Ty: Builder.getInt8Ty(), NumReservedValues: `2`, NameStr: "");
1573	PN->insertBefore(InsertPos: LandingBB->begin());
1574	S->replaceAllUsesWith(V: PN);
1575	PN->addIncoming(V: Builder.getInt8(C: -`1`), BB: SuspendBB);
1576	PN->addIncoming(V: S, BB: ResumeBB);
1577
1578	if (AddDebugLabels) {
1579	if (DebugLoc SuspendLoc = S->getDebugLoc()) {
1580	std::string LabelName =
1581	("__coro_resume_" + Twine (SuspendIndex)).str();
1582	DILocation &DILoc = *SuspendLoc.get();
1583	DILabel *ResumeLabel =
1584	DBuilder.createLabel(Scope: DIS, Name: LabelName, File: DILoc.getFile(),
1585	LineNo: SuspendLoc.getLine(), Column: SuspendLoc.getCol(),
1586	/IsArtificial=/true,
1587	/CoroSuspendIdx=/SuspendIndex,
1588	/AlwaysPreserve=/false);
1589	DBuilder.insertLabel(LabelInfo: ResumeLabel, DL: &DILoc, InsertPt: ResumeBB->begin());
1590	}
1591	}
1592
1593	++SuspendIndex;
1594	}
1595
1596	Builder.SetInsertPoint(UnreachBB);
1597	Builder.CreateUnreachable();
1598	DBuilder.finalize();
1599
1600	Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
1601	}
1602
1603	// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
1604	static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1605	Function DestroyFn, Function CleanupFn) {
1606	IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
1607
1608	auto *ResumeAddr = Builder.CreateStructGEP(
1609	Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
1610	Name: "resume.addr");
1611	Builder.CreateStore(Val: ResumeFn, Ptr: ResumeAddr);
1612
1613	Value *DestroyOrCleanupFn = DestroyFn;
1614
1615	CoroIdInst *CoroId = Shape.getSwitchCoroId();
1616	if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1617	// If there is a CoroAlloc and it returns false (meaning we elide the
1618	// allocation, use CleanupFn instead of DestroyFn).
1619	DestroyOrCleanupFn = Builder.CreateSelect(C: CA, True: DestroyFn, False: CleanupFn);
1620	}
1621
1622	auto *DestroyAddr = Builder.CreateStructGEP(
1623	Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Destroy,
1624	Name: "destroy.addr");
1625	Builder.CreateStore(Val: DestroyOrCleanupFn, Ptr: DestroyAddr);
1626	}
1627
1628	// Create a global constant array containing pointers to functions provided
1629	// and set Info parameter of CoroBegin to point at this constant. Example:
1630	//
1631	// @f.resumers = internal constant [2 x void(%f.frame)]
1632	// [void(%f.frame) @f.resume, void(%f.frame)
1633	// @f.destroy]
1634	// define void @f() {
1635	// ...
1636	// call i8 @llvm.coro.begin(i8* null, i32 0, i8* null,*
1637	// i8 bitcast([2 x void(%f.frame)] * @f.resumers to*
1638	// i8))*
1639	//
1640	// Assumes that all the functions have the same signature.
1641	static void setCoroInfo(Function &F, coro::Shape &Shape,
1642	ArrayRef<Function *> Fns) {
1643	// This only works under the switch-lowering ABI because coro elision
1644	// only works on the switch-lowering ABI.
1645	SmallVector<Constant *, `4`> Args(Fns);
1646	assert(!Args.empty());
1647	Function Part = Fns.begin();
1648	Module *M = Part->getParent();
1649	auto *ArrTy = ArrayType::get(ElementType: Part->getType(), NumElements: Args.size());
1650
1651	auto *ConstVal = ConstantArray::get(T: ArrTy, V: Args);
1652	auto GV = new* GlobalVariable (M, ConstVal->getType(), /isConstant=/*true,
1653	GlobalVariable::PrivateLinkage, ConstVal,
1654	F.getName() + Twine (".resumers"));
1655
1656	// Update coro.begin instruction to refer to this constant.
1657	LLVMContext &C = F.getContext();
1658	auto *BC = ConstantExpr::getPointerCast(C: GV, Ty: PointerType::getUnqual(C));
1659	Shape.getSwitchCoroId()->setInfo(BC);
1660	}
1661	};
1662
1663	} // namespace
1664
1665	static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1666	Value *Continuation) {
1667	auto *ResumeIntrinsic = Suspend->getResumeFunction();
1668	auto &Context = Suspend->getParent()->getParent()->getContext();
1669	auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1670
1671	IRBuilder<> Builder(ResumeIntrinsic);
1672	auto *Val = Builder.CreateBitOrPointerCast(V: Continuation, DestTy: Int8PtrTy);
1673	ResumeIntrinsic->replaceAllUsesWith(V: Val);
1674	ResumeIntrinsic->eraseFromParent();
1675	Suspend->setOperand(i_nocapture: CoroSuspendAsyncInst::ResumeFunctionArg,
1676	Val_nocapture: PoisonValue::get(T: Int8PtrTy));
1677	}
1678
1679	/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
1680	static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1681	ArrayRef<Value *> FnArgs,
1682	SmallVectorImpl<Value *> &CallArgs) {
1683	size_t ArgIdx = `0`;
1684	for (auto *paramTy : FnTy->params()) {
1685	assert(ArgIdx < FnArgs.size());
1686	if (paramTy != FnArgs [ArgIdx]->getType())
1687	CallArgs.push_back(
1688	Elt: Builder.CreateBitOrPointerCast(V: FnArgs [ArgIdx], DestTy: paramTy));
1689	else
1690	CallArgs.push_back(Elt: FnArgs [ArgIdx]);
1691	++ArgIdx;
1692	}
1693	}
1694
1695	CallInst coro::createMustTailCall(DebugLoc Loc, Function MustTailCallFn,
1696	TargetTransformInfo &TTI,
1697	ArrayRef<Value *> Arguments,
1698	IRBuilder<> &Builder) {
1699	auto *FnTy = MustTailCallFn->getFunctionType();
1700	// Coerce the arguments, llvm optimizations seem to ignore the types in
1701	// vaarg functions and throws away casts in optimized mode.
1702	SmallVector<Value *, `8`> CallArgs;
1703	coerceArguments(Builder, FnTy, FnArgs: Arguments, CallArgs);
1704
1705	auto *TailCall = Builder.CreateCall(FTy: FnTy, Callee: MustTailCallFn, Args: CallArgs);
1706	// Skip targets which don't support tail call.
1707	if (TTI.supportsTailCallFor(CB: TailCall)) {
1708	TailCall->setTailCallKind(CallInst::TCK_MustTail);
1709	}
1710	TailCall->setDebugLoc(Loc);
1711	TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1712	return TailCall;
1713	}
1714
1715	void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
1716	SmallVectorImpl<Function *> &Clones,
1717	TargetTransformInfo &TTI) {
1718	assert(Shape.ABI == coro::ABI::Async);
1719	assert(Clones.empty());
1720	// Reset various things that the optimizer might have decided it
1721	// "knows" about the coroutine function due to not seeing a return.
1722	F.removeFnAttr(Kind: Attribute::NoReturn);
1723	F.removeRetAttr(Kind: Attribute::NoAlias);
1724	F.removeRetAttr(Kind: Attribute::NonNull);
1725
1726	auto &Context = F.getContext();
1727	auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1728
1729	auto *Id = Shape.getAsyncCoroId();
1730	IRBuilder<> Builder(Id);
1731
1732	auto *FramePtr = Id->getStorage();
1733	FramePtr = Builder.CreateBitOrPointerCast(V: FramePtr, DestTy: Int8PtrTy);
1734	FramePtr = Builder.CreateConstInBoundsGEP1_32(
1735	Ty: Type::getInt8Ty(C&: Context), Ptr: FramePtr, Idx0: Shape.AsyncLowering.FrameOffset,
1736	Name: "async.ctx.frameptr");
1737
1738	// Map all uses of llvm.coro.begin to the allocated frame pointer.
1739	{
1740	// Make sure we don't invalidate Shape.FramePtr.
1741	TrackingVH<Value> Handle(Shape.FramePtr);
1742	Shape.CoroBegin->replaceAllUsesWith(V: FramePtr);
1743	Shape.FramePtr = Handle.getValPtr();
1744	}
1745
1746	// Create all the functions in order after the main function.
1747	auto NextF = std::next(x: F.getIterator());
1748
1749	// Create a continuation function for each of the suspend points.
1750	Clones.reserve(N: Shape.CoroSuspends.size());
1751	for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1752	auto *Suspend = cast<CoroSuspendAsyncInst>(Val: CS);
1753
1754	// Create the clone declaration.
1755	auto ResumeNameSuffix = ".resume.";
1756	auto ProjectionFunctionName =
1757	Suspend->getAsyncContextProjectionFunction()->getName();
1758	bool UseSwiftMangling = false;
1759	if (ProjectionFunctionName == "__swift_async_resume_project_context") {
1760	ResumeNameSuffix = "TQ";
1761	UseSwiftMangling = true;
1762	} else if (ProjectionFunctionName == "__swift_async_resume_get_context") {
1763	ResumeNameSuffix = "TY";
1764	UseSwiftMangling = true;
1765	}
1766	auto *Continuation = createCloneDeclaration(
1767	OrigF&: F, Shape,
1768	Suffix: UseSwiftMangling ? ResumeNameSuffix + Twine (Idx) + "_"
1769	: ResumeNameSuffix + Twine (Idx),
1770	InsertBefore: NextF, ActiveSuspend: Suspend);
1771	Clones.push_back(Elt: Continuation);
1772
1773	// Insert a branch to a new return block immediately before the suspend
1774	// point.
1775	auto *SuspendBB = Suspend->getParent();
1776	auto *NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1777	auto *Branch = cast<BranchInst>(Val: SuspendBB->getTerminator());
1778
1779	// Place it before the first suspend.
1780	auto *ReturnBB =
1781	BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1782	Branch->setSuccessor(idx: `0`, NewSucc: ReturnBB);
1783
1784	IRBuilder<> Builder(ReturnBB);
1785
1786	// Insert the call to the tail call function and inline it.
1787	auto *Fn = Suspend->getMustTailCallFunction();
1788	SmallVector<Value *, `8`> Args(Suspend->args());
1789	auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1790	N: CoroSuspendAsyncInst::MustTailCallFuncArg + `1`);
1791	auto *TailCall = coro::createMustTailCall(Loc: Suspend->getDebugLoc(), MustTailCallFn: Fn, TTI,
1792	Arguments: FnArgs, Builder);
1793	Builder.CreateRetVoid();
1794	InlineFunctionInfo FnInfo;
1795	(void)InlineFunction(CB&: *TailCall, IFI&: FnInfo);
1796
1797	// Replace the lvm.coro.async.resume intrisic call.
1798	replaceAsyncResumeFunction(Suspend, Continuation);
1799	}
1800
1801	assert(Clones.size() == Shape.CoroSuspends.size());
1802
1803	for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1804	auto *Suspend = CS;
1805	auto *Clone = Clones [Idx];
1806
1807	coro::BaseCloner::createClone(OrigF&: F, Suffix: "resume." + Twine (Idx), Shape, NewF: Clone,
1808	ActiveSuspend: Suspend, TTI);
1809	}
1810	}
1811
1812	void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
1813	SmallVectorImpl<Function *> &Clones,
1814	TargetTransformInfo &TTI) {
1815	assert(Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce);
1816	assert(Clones.empty());
1817
1818	// Reset various things that the optimizer might have decided it
1819	// "knows" about the coroutine function due to not seeing a return.
1820	F.removeFnAttr(Kind: Attribute::NoReturn);
1821	F.removeRetAttr(Kind: Attribute::NoAlias);
1822	F.removeRetAttr(Kind: Attribute::NonNull);
1823
1824	// Allocate the frame.
1825	auto *Id = Shape.getRetconCoroId();
1826	Value *RawFramePtr;
1827	if (Shape.RetconLowering.IsFrameInlineInStorage) {
1828	RawFramePtr = Id->getStorage();
1829	} else {
1830	IRBuilder<> Builder(Id);
1831
1832	// Determine the size of the frame.
1833	const DataLayout &DL = F.getDataLayout();
1834	auto Size = DL.getTypeAllocSize(Ty: Shape.FrameTy);
1835
1836	// Allocate. We don't need to update the call graph node because we're
1837	// going to recompute it from scratch after splitting.
1838	// FIXME: pass the required alignment
1839	RawFramePtr = Shape.emitAlloc(Builder, Size: Builder.getInt64(C: Size), CG: nullptr);
1840	RawFramePtr =
1841	Builder.CreateBitCast(V: RawFramePtr, DestTy: Shape.CoroBegin->getType());
1842
1843	// Stash the allocated frame pointer in the continuation storage.
1844	Builder.CreateStore(Val: RawFramePtr, Ptr: Id->getStorage());
1845	}
1846
1847	// Map all uses of llvm.coro.begin to the allocated frame pointer.
1848	{
1849	// Make sure we don't invalidate Shape.FramePtr.
1850	TrackingVH<Value> Handle(Shape.FramePtr);
1851	Shape.CoroBegin->replaceAllUsesWith(V: RawFramePtr);
1852	Shape.FramePtr = Handle.getValPtr();
1853	}
1854
1855	// Create a unique return block.
1856	BasicBlock ReturnBB = nullptr*;
1857	PHINode ContinuationPhi = nullptr*;
1858	SmallVector<PHINode *, `4`> ReturnPHIs;
1859
1860	// Create all the functions in order after the main function.
1861	auto NextF = std::next(x: F.getIterator());
1862
1863	// Create a continuation function for each of the suspend points.
1864	Clones.reserve(N: Shape.CoroSuspends.size());
1865	for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1866	auto Suspend = cast<CoroSuspendRetconInst>(Val: CS);
1867
1868	// Create the clone declaration.
1869	auto Continuation = createCloneDeclaration(
1870	OrigF&: F, Shape, Suffix: ".resume." + Twine (Idx), InsertBefore: NextF, ActiveSuspend: nullptr);
1871	Clones.push_back(Elt: Continuation);
1872
1873	// Insert a branch to the unified return block immediately before
1874	// the suspend point.
1875	auto SuspendBB = Suspend->getParent();
1876	auto NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1877	auto Branch = cast<BranchInst>(Val: SuspendBB->getTerminator());
1878
1879	// Create the unified return block.
1880	if (!ReturnBB) {
1881	// Place it before the first suspend.
1882	ReturnBB =
1883	BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1884	Shape.RetconLowering.ReturnBlock = ReturnBB;
1885
1886	IRBuilder<> Builder(ReturnBB);
1887
1888	// First, the continuation.
1889	ContinuationPhi =
1890	Builder.CreatePHI(Ty: Continuation->getType(), NumReservedValues: Shape.CoroSuspends.size());
1891
1892	// Create PHIs for all other return values.
1893	assert(ReturnPHIs.empty());
1894
1895	// Next, all the directly-yielded values.
1896	for (auto *ResultTy : Shape.getRetconResultTypes())
1897	ReturnPHIs.push_back(
1898	Elt: Builder.CreatePHI(Ty: ResultTy, NumReservedValues: Shape.CoroSuspends.size()));
1899
1900	// Build the return value.
1901	auto RetTy = F.getReturnType();
1902
1903	// Cast the continuation value if necessary.
1904	// We can't rely on the types matching up because that type would
1905	// have to be infinite.
1906	auto CastedContinuationTy =
1907	(ReturnPHIs.empty() ? RetTy : RetTy->getStructElementType(N: `0`));
1908	auto *CastedContinuation =
1909	Builder.CreateBitCast(V: ContinuationPhi, DestTy: CastedContinuationTy);
1910
1911	Value *RetV = CastedContinuation;
1912	if (!ReturnPHIs.empty()) {
1913	auto ValueIdx = `0`;
1914	RetV = PoisonValue::get(T: RetTy);
1915	RetV = Builder.CreateInsertValue(Agg: RetV, Val: CastedContinuation, Idxs: ValueIdx++);
1916
1917	for (auto Phi : ReturnPHIs)
1918	RetV = Builder.CreateInsertValue(Agg: RetV, Val: Phi, Idxs: ValueIdx++);
1919	}
1920
1921	Builder.CreateRet(V: RetV);
1922	}
1923
1924	// Branch to the return block.
1925	Branch->setSuccessor(idx: `0`, NewSucc: ReturnBB);
1926	assert(ContinuationPhi);
1927	ContinuationPhi->addIncoming(V: Continuation, BB: SuspendBB);
1928	for (auto [Phi, VUse] :
1929	llvm::zip_equal(t&: ReturnPHIs, u: Suspend->value_operands()))
1930	Phi->addIncoming(V: VUse, BB: SuspendBB);
1931	}
1932
1933	assert(Clones.size() == Shape.CoroSuspends.size());
1934
1935	for (auto [Idx, CS] : llvm::enumerate(First&: Shape.CoroSuspends)) {
1936	auto Suspend = CS;
1937	auto Clone = Clones [Idx];
1938
1939	coro::BaseCloner::createClone(OrigF&: F, Suffix: "resume." + Twine (Idx), Shape, NewF: Clone,
1940	ActiveSuspend: Suspend, TTI);
1941	}
1942	}
1943
1944	namespace {
1945	class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1946	Function &F;
1947
1948	public:
1949	PrettyStackTraceFunction(Function &F) : F(F) {}
1950	void print(raw_ostream &OS) const override {
1951	OS << "While splitting coroutine ";
1952	F.printAsOperand(O&: OS, /print type/ PrintType: false, M: F.getParent());
1953	OS << "\n";
1954	}
1955	};
1956	} // namespace
1957
1958	/// Remove calls to llvm.coro.end in the original function.
1959	static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
1960	if (Shape.ABI != coro::ABI::Switch) {
1961	for (auto *End : Shape.CoroEnds) {
1962	replaceCoroEnd(End, Shape, FramePtr: Shape.FramePtr, /in resume/ InResume: false, CG: nullptr);
1963	}
1964	} else {
1965	for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
1966	auto &Context = End->getContext();
1967	End->replaceAllUsesWith(V: ConstantInt::getFalse(Context));
1968	End->eraseFromParent();
1969	}
1970	}
1971	}
1972
1973	static bool hasSafeElideCaller(Function &F) {
1974	for (auto *U : F.users()) {
1975	if (auto *CB = dyn_cast<CallBase>(Val: U)) {
1976	auto *Caller = CB->getFunction();
1977	if (Caller && Caller->isPresplitCoroutine() &&
1978	CB->hasFnAttr(Kind: llvm::Attribute::CoroElideSafe))
1979	return true;
1980	}
1981	}
1982	return false;
1983	}
1984
1985	void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
1986	SmallVectorImpl<Function *> &Clones,
1987	TargetTransformInfo &TTI) {
1988	SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
1989	}
1990
1991	static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
1992	coro::BaseABI &ABI, TargetTransformInfo &TTI,
1993	bool OptimizeFrame) {
1994	PrettyStackTraceFunction prettyStackTrace(F);
1995
1996	auto &Shape = ABI.Shape;
1997	assert(Shape.CoroBegin);
1998
1999	lowerAwaitSuspends(F, Shape);
2000
2001	simplifySuspendPoints(Shape);
2002
2003	normalizeCoroutine(F, Shape, TTI);
2004	ABI.buildCoroutineFrame(OptimizeFrame);
2005	replaceFrameSizeAndAlignment(Shape);
2006
2007	bool isNoSuspendCoroutine = Shape.CoroSuspends.empty();
2008
2009	bool shouldCreateNoAllocVariant =
2010	!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch &&
2011	hasSafeElideCaller(F) && !F.hasFnAttribute(Kind: llvm::Attribute::NoInline);
2012
2013	// If there are no suspend points, no split required, just remove
2014	// the allocation and deallocation blocks, they are not needed.
2015	if (isNoSuspendCoroutine) {
2016	handleNoSuspendCoroutine(Shape);
2017	} else {
2018	ABI.splitCoroutine(F, Shape, Clones, TTI);
2019	}
2020
2021	// Replace all the swifterror operations in the original function.
2022	// This invalidates SwiftErrorOps in the Shape.
2023	replaceSwiftErrorOps(F, Shape, VMap: nullptr);
2024
2025	// Salvage debug intrinsics that point into the coroutine frame in the
2026	// original function. The Cloner has already salvaged debug info in the new
2027	// coroutine funclets.
2028	SmallDenseMap<Argument , AllocaInst , `4`> ArgToAllocaMap;
2029	auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F);
2030	for (auto *DDI : DbgInsts)
2031	coro::salvageDebugInfo(ArgToAllocaMap, DVI&: DDI, IsEntryPoint: false* /UseEntryValue/);
2032	for (DbgVariableRecord *DVR : DbgVariableRecords)
2033	coro::salvageDebugInfo(ArgToAllocaMap, DVR&: DVR, UseEntryValue: false* /UseEntryValue/);
2034
2035	removeCoroEndsFromRampFunction(Shape);
2036
2037	if (shouldCreateNoAllocVariant)
2038	SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones);
2039	}
2040
2041	static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit(
2042	LazyCallGraph::Node &N, const coro::Shape &Shape,
2043	const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
2044	LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
2045	FunctionAnalysisManager &FAM) {
2046
2047	auto *CurrentSCC = &C;
2048	if (!Clones.empty()) {
2049	switch (Shape.ABI) {
2050	case coro::ABI::Switch:
2051	// Each clone in the Switch lowering is independent of the other clones.
2052	// Let the LazyCallGraph know about each one separately.
2053	for (Function *Clone : Clones)
2054	CG.addSplitFunction(OriginalFunction&: N.getFunction(), NewFunction&: *Clone);
2055	break;
2056	case coro::ABI::Async:
2057	case coro::ABI::Retcon:
2058	case coro::ABI::RetconOnce:
2059	// Each clone in the Async/Retcon lowering references of the other clones.
2060	// Let the LazyCallGraph know about all of them at once.
2061	if (!Clones.empty())
2062	CG.addSplitRefRecursiveFunctions(OriginalFunction&: N.getFunction(), NewFunctions: Clones);
2063	break;
2064	}
2065
2066	// Let the CGSCC infra handle the changes to the original function.
2067	CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(G&: CG, C&: *CurrentSCC, N, AM,
2068	UR, FAM);
2069	}
2070
2071	// Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2072	// to the split functions.
2073	postSplitCleanup(F&: N.getFunction());
2074	CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(G&: CG, C&: *CurrentSCC, N,
2075	AM, UR, FAM);
2076	return *CurrentSCC;
2077	}
2078
2079	/// Replace a call to llvm.coro.prepare.retcon.
2080	static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2081	LazyCallGraph::SCC &C) {
2082	auto CastFn = Prepare->getArgOperand(i: `0`); // as an i8*
2083	auto Fn = CastFn->stripPointerCasts(); // as its original type
2084
2085	// Attempt to peephole this pattern:
2086	// %0 = bitcast [[TYPE]] @some_function to i8*
2087	// %1 = call @llvm.coro.prepare.retcon(i8 %0)*
2088	// %2 = bitcast %1 to [[TYPE]]
2089	// ==>
2090	// %2 = @some_function
2091	for (Use &U : llvm::make_early_inc_range(Range: Prepare->uses())) {
2092	// Look for bitcasts back to the original function type.
2093	auto *Cast = dyn_cast<BitCastInst>(Val: U.getUser());
2094	if (!Cast \|\| Cast->getType() != Fn->getType())
2095	continue;
2096
2097	// Replace and remove the cast.
2098	Cast->replaceAllUsesWith(V: Fn);
2099	Cast->eraseFromParent();
2100	}
2101
2102	// Replace any remaining uses with the function as an i8.*
2103	// This can never directly be a callee, so we don't need to update CG.
2104	Prepare->replaceAllUsesWith(V: CastFn);
2105	Prepare->eraseFromParent();
2106
2107	// Kill dead bitcasts.
2108	while (auto *Cast = dyn_cast<BitCastInst>(Val: CastFn)) {
2109	if (!Cast->use_empty())
2110	break;
2111	CastFn = Cast->getOperand(i_nocapture: `0`);
2112	Cast->eraseFromParent();
2113	}
2114	}
2115
2116	static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2117	LazyCallGraph::SCC &C) {
2118	bool Changed = false;
2119	for (Use &P : llvm::make_early_inc_range(Range: PrepareFn->uses())) {
2120	// Intrinsics can only be used in calls.
2121	auto *Prepare = cast<CallInst>(Val: P.getUser());
2122	replacePrepare(Prepare, CG, C);
2123	Changed = true;
2124	}
2125
2126	return Changed;
2127	}
2128
2129	static void addPrepareFunction(const Module &M,
2130	SmallVectorImpl<Function *> &Fns,
2131	StringRef Name) {
2132	auto *PrepareFn = M.getFunction(Name);
2133	if (PrepareFn && !PrepareFn->use_empty())
2134	Fns.push_back(Elt: PrepareFn);
2135	}
2136
2137	static std::unique_ptr<coro::BaseABI>
2138	CreateNewABI(Function &F, coro::Shape &S,
2139	std::function<bool(Instruction &)> IsMatCallback,
2140	const SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs) {
2141	if (S.CoroBegin->hasCustomABI()) {
2142	unsigned CustomABI = S.CoroBegin->getCustomABI();
2143	if (CustomABI >= GenCustomABIs.size())
2144	llvm_unreachable("Custom ABI not found amoung those specified");
2145	return GenCustomABIs [CustomABI](F, S);
2146	}
2147
2148	switch (S.ABI) {
2149	case coro::ABI::Switch:
2150	return std::make_unique<coro::SwitchABI>(args&: F, args&: S, args&: IsMatCallback);
2151	case coro::ABI::Async:
2152	return std::make_unique<coro::AsyncABI>(args&: F, args&: S, args&: IsMatCallback);
2153	case coro::ABI::Retcon:
2154	return std::make_unique<coro::AnyRetconABI>(args&: F, args&: S, args&: IsMatCallback);
2155	case coro::ABI::RetconOnce:
2156	return std::make_unique<coro::AnyRetconABI>(args&: F, args&: S, args&: IsMatCallback);
2157	}
2158	llvm_unreachable("Unknown ABI");
2159	}
2160
2161	CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
2162	: CreateAndInitABI ([](Function &F, coro::Shape &S) {
2163	std::unique_ptr<coro::BaseABI> ABI =
2164	CreateNewABI(F, S, IsMatCallback: coro::isTriviallyMaterializable, GenCustomABIs: {});
2165	ABI ->init();
2166	return ABI;
2167	}),
2168	OptimizeFrame(OptimizeFrame) {}
2169
2170	CoroSplitPass::CoroSplitPass(
2171	SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame)
2172	: CreateAndInitABI ([=](Function &F, coro::Shape &S) {
2173	std::unique_ptr<coro::BaseABI> ABI =
2174	CreateNewABI(F, S, IsMatCallback: coro::isTriviallyMaterializable, GenCustomABIs);
2175	ABI ->init();
2176	return ABI;
2177	}),
2178	OptimizeFrame(OptimizeFrame) {}
2179
2180	// For back compatibility, constructor takes a materializable callback and
2181	// creates a generator for an ABI with a modified materializable callback.
2182	CoroSplitPass::CoroSplitPass(std::function<bool(Instruction &)> IsMatCallback,
2183	bool OptimizeFrame)
2184	: CreateAndInitABI ([=](Function &F, coro::Shape &S) {
2185	std::unique_ptr<coro::BaseABI> ABI =
2186	CreateNewABI(F, S, IsMatCallback, GenCustomABIs: {});
2187	ABI ->init();
2188	return ABI;
2189	}),
2190	OptimizeFrame(OptimizeFrame) {}
2191
2192	// For back compatibility, constructor takes a materializable callback and
2193	// creates a generator for an ABI with a modified materializable callback.
2194	CoroSplitPass::CoroSplitPass(
2195	std::function<bool(Instruction &)> IsMatCallback,
2196	SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame)
2197	: CreateAndInitABI ([=](Function &F, coro::Shape &S) {
2198	std::unique_ptr<coro::BaseABI> ABI =
2199	CreateNewABI(F, S, IsMatCallback, GenCustomABIs);
2200	ABI ->init();
2201	return ABI;
2202	}),
2203	OptimizeFrame(OptimizeFrame) {}
2204
2205	PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2206	CGSCCAnalysisManager &AM,
2207	LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2208	// NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2209	// non-zero number of nodes, so we assume that here and grab the first
2210	// node's function's module.
2211	Module &M = *C.begin()->getFunction().getParent();
2212	auto &FAM =
2213	AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2214
2215	// Check for uses of llvm.coro.prepare.retcon/async.
2216	SmallVector<Function *, `2`> PrepareFns;
2217	addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.retcon");
2218	addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.async");
2219
2220	// Find coroutines for processing.
2221	SmallVector<LazyCallGraph::Node *> Coroutines;
2222	for (LazyCallGraph::Node &N : C)
2223	if (N.getFunction().isPresplitCoroutine())
2224	Coroutines.push_back(Elt: &N);
2225
2226	if (Coroutines.empty() && PrepareFns.empty())
2227	return PreservedAnalyses::all();
2228
2229	auto *CurrentSCC = &C;
2230	// Split all the coroutines.
2231	for (LazyCallGraph::Node *N : Coroutines) {
2232	Function &F = N->getFunction();
2233	LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2234	<< "\n");
2235
2236	// The suspend-crossing algorithm in buildCoroutineFrame gets tripped up
2237	// by unreachable blocks, so remove them as a first pass. Remove the
2238	// unreachable blocks before collecting intrinsics into Shape.
2239	removeUnreachableBlocks(F);
2240
2241	coro::Shape Shape(F);
2242	if (!Shape.CoroBegin)
2243	continue;
2244
2245	F.setSplittedCoroutine();
2246
2247	std::unique_ptr<coro::BaseABI> ABI = CreateAndInitABI (F, Shape);
2248
2249	SmallVector<Function *, `4`> Clones;
2250	auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
2251	doSplitCoroutine(F, Clones, ABI&: *ABI, TTI, OptimizeFrame);
2252	CurrentSCC = &updateCallGraphAfterCoroutineSplit(
2253	N&: N, Shape, Clones, C&: CurrentSCC, CG, AM, UR, FAM);
2254
2255	auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2256	ORE.emit(RemarkBuilder: [&]() {
2257	return OptimizationRemark (DEBUG_TYPE, "CoroSplit", &F)
2258	<< "Split '" << ore::NV ("function", F.getName())
2259	<< "' (frame_size=" << ore::NV ("frame_size", Shape.FrameSize)
2260	<< ", align=" << ore::NV ("align", Shape.FrameAlign.value()) << ")";
2261	});
2262
2263	if (!Shape.CoroSuspends.empty()) {
2264	// Run the CGSCC pipeline on the original and newly split functions.
2265	UR.CWorklist.insert(X: CurrentSCC);
2266	for (Function *Clone : Clones)
2267	UR.CWorklist.insert(X: CG.lookupSCC(N&: CG.get(F&: *Clone)));
2268	}
2269	}
2270
2271	for (auto *PrepareFn : PrepareFns) {
2272	replaceAllPrepares(PrepareFn, CG, C&: *CurrentSCC);
2273	}
2274
2275	return PreservedAnalyses::none();
2276	}
2277

Browse the source code of llvm_projects/llvm/lib/Transforms/Coroutines/CoroSplit.cpp