CoroFrame.cpp source code [llvm_projects/llvm/lib/Transforms/Coroutines/CoroFrame.cpp]

1	//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This file contains classes used to discover if for a particular value
9	// there from sue to definition that crosses a suspend block.
10	//
11	// Using the information discovered we form a Coroutine Frame structure to
12	// contain those values. All uses of those values are replaced with appropriate
13	// GEP + load from the coroutine frame. At the point of the definition we spill
14	// the value into the coroutine frame.
15	//===----------------------------------------------------------------------===//
16
17	#include "CoroInternal.h"
18	#include "llvm/ADT/BitVector.h"
19	#include "llvm/ADT/PostOrderIterator.h"
20	#include "llvm/ADT/ScopeExit.h"
21	#include "llvm/ADT/SmallString.h"
22	#include "llvm/Analysis/CFG.h"
23	#include "llvm/Analysis/PtrUseVisitor.h"
24	#include "llvm/Analysis/StackLifetime.h"
25	#include "llvm/Config/llvm-config.h"
26	#include "llvm/IR/CFG.h"
27	#include "llvm/IR/DIBuilder.h"
28	#include "llvm/IR/DebugInfo.h"
29	#include "llvm/IR/Dominators.h"
30	#include "llvm/IR/IRBuilder.h"
31	#include "llvm/IR/InstIterator.h"
32	#include "llvm/IR/IntrinsicInst.h"
33	#include "llvm/Support/Debug.h"
34	#include "llvm/Support/MathExtras.h"
35	#include "llvm/Support/OptimizedStructLayout.h"
36	#include "llvm/Support/circular_raw_ostream.h"
37	#include "llvm/Support/raw_ostream.h"
38	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39	#include "llvm/Transforms/Utils/Local.h"
40	#include "llvm/Transforms/Utils/PromoteMemToReg.h"
41	#include <algorithm>
42	#include <deque>
43	#include <optional>
44
45	using namespace llvm;
46
47	extern cl::opt<bool> UseNewDbgInfoFormat;
48
49	// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
50	// "coro-frame", which results in leaner debug spew.
51	#define DEBUG_TYPE "coro-suspend-crossing"
52
53	enum { SmallVectorThreshold = `32` };
54
55	// Provides two way mapping between the blocks and numbers.
56	namespace {
57	class BlockToIndexMapping {
58	SmallVector<BasicBlock *, SmallVectorThreshold> V;
59
60	public:
61	size_t size() const { return V.size(); }
62
63	BlockToIndexMapping(Function &F) {
64	for (BasicBlock &BB : F)
65	V.push_back(Elt: &BB);
66	llvm::sort(C&: V);
67	}
68
69	size_t blockToIndex(BasicBlock const BB) const* {
70	auto *I = llvm::lower_bound(Range: V, Value&: BB);
71	assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
72	return I - V.begin();
73	}
74
75	BasicBlock indexToBlock(unsigned* Index) const { return V [Index]; }
76	};
77	} // end anonymous namespace
78
79	// The SuspendCrossingInfo maintains data that allows to answer a question
80	// whether given two BasicBlocks A and B there is a path from A to B that
81	// passes through a suspend point.
82	//
83	// For every basic block 'i' it maintains a BlockData that consists of:
84	// Consumes: a bit vector which contains a set of indices of blocks that can
85	// reach block 'i'. A block can trivially reach itself.
86	// Kills: a bit vector which contains a set of indices of blocks that can
87	// reach block 'i' but there is a path crossing a suspend point
88	// not repeating 'i' (path to 'i' without cycles containing 'i').
89	// Suspend: a boolean indicating whether block 'i' contains a suspend point.
90	// End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
91	// KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
92	// crosses a suspend point.
93	//
94	namespace {
95	class SuspendCrossingInfo {
96	BlockToIndexMapping Mapping;
97
98	struct BlockData {
99	BitVector Consumes;
100	BitVector Kills;
101	bool Suspend = false;
102	bool End = false;
103	bool KillLoop = false;
104	bool Changed = false;
105	};
106	SmallVector<BlockData, SmallVectorThreshold> Block;
107
108	iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
109	BasicBlock *BB = Mapping.indexToBlock(Index: &BD - &Block [`0`]);
110	return llvm::predecessors(BB);
111	}
112
113	BlockData &getBlockData(BasicBlock *BB) {
114	return Block [Mapping.blockToIndex(BB)];
115	}
116
117	/// Compute the BlockData for the current function in one iteration.
118	/// Initialize - Whether this is the first iteration, we can optimize
119	/// the initial case a little bit by manual loop switch.
120	/// Returns whether the BlockData changes in this iteration.
121	template <bool Initialize = false>
122	bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
123
124	public:
125	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
126	void dump() const;
127	void dump(StringRef Label, BitVector const &BV) const;
128	#endif
129
130	SuspendCrossingInfo(Function &F, coro::Shape &Shape);
131
132	/// Returns true if there is a path from \p From to \p To crossing a suspend
133	/// point without crossing \p From a 2nd time.
134	bool hasPathCrossingSuspendPoint(BasicBlock From, BasicBlock To) const {
135	size_t const FromIndex = Mapping.blockToIndex(BB: From);
136	size_t const ToIndex = Mapping.blockToIndex(BB: To);
137	bool const Result = Block [ToIndex].Kills [FromIndex];
138	LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
139	<< " answer is " << Result << "\n");
140	return Result;
141	}
142
143	/// Returns true if there is a path from \p From to \p To crossing a suspend
144	/// point without crossing \p From a 2nd time. If \p From is the same as \p To
145	/// this will also check if there is a looping path crossing a suspend point.
146	bool hasPathOrLoopCrossingSuspendPoint(BasicBlock *From,
147	BasicBlock To) const* {
148	size_t const FromIndex = Mapping.blockToIndex(BB: From);
149	size_t const ToIndex = Mapping.blockToIndex(BB: To);
150	bool Result = Block [ToIndex].Kills [FromIndex] \|\|
151	(From == To && Block [ToIndex].KillLoop);
152	LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
153	<< " answer is " << Result << " (path or loop)\n");
154	return Result;
155	}
156
157	bool isDefinitionAcrossSuspend(BasicBlock DefBB, User U) const {
158	auto *I = cast<Instruction>(Val: U);
159
160	// We rewrote PHINodes, so that only the ones with exactly one incoming
161	// value need to be analyzed.
162	if (auto *PN = dyn_cast<PHINode>(Val: I))
163	if (PN->getNumIncomingValues() > `1`)
164	return false;
165
166	BasicBlock *UseBB = I->getParent();
167
168	// As a special case, treat uses by an llvm.coro.suspend.retcon or an
169	// llvm.coro.suspend.async as if they were uses in the suspend's single
170	// predecessor: the uses conceptually occur before the suspend.
171	if (isa<CoroSuspendRetconInst>(Val: I) \|\| isa<CoroSuspendAsyncInst>(Val: I)) {
172	UseBB = UseBB->getSinglePredecessor();
173	assert(UseBB && "should have split coro.suspend into its own block");
174	}
175
176	return hasPathCrossingSuspendPoint(From: DefBB, To: UseBB);
177	}
178
179	bool isDefinitionAcrossSuspend(Argument &A, User U) const* {
180	return isDefinitionAcrossSuspend(DefBB: &A.getParent()->getEntryBlock(), U);
181	}
182
183	bool isDefinitionAcrossSuspend(Instruction &I, User U) const* {
184	auto *DefBB = I.getParent();
185
186	// As a special case, treat values produced by an llvm.coro.suspend.*
187	// as if they were defined in the single successor: the uses
188	// conceptually occur after the suspend.
189	if (isa<AnyCoroSuspendInst>(Val: I)) {
190	DefBB = DefBB->getSingleSuccessor();
191	assert(DefBB && "should have split coro.suspend into its own block");
192	}
193
194	return isDefinitionAcrossSuspend(DefBB, U);
195	}
196
197	bool isDefinitionAcrossSuspend(Value &V, User U) const* {
198	if (auto *Arg = dyn_cast<Argument>(Val: &V))
199	return isDefinitionAcrossSuspend(A&: *Arg, U);
200	if (auto *Inst = dyn_cast<Instruction>(Val: &V))
201	return isDefinitionAcrossSuspend(I&: *Inst, U);
202
203	llvm_unreachable(
204	"Coroutine could only collect Argument and Instruction now.");
205	}
206	};
207	} // end anonymous namespace
208
209	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
210	LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
211	BitVector const &BV) const {
212	dbgs() << Label << ":";
213	for (size_t I = `0`, N = BV.size(); I < N; ++I)
214	if (BV[I])
215	dbgs() << " " << Mapping.indexToBlock(I)->getName();
216	dbgs() << "\n";
217	}
218
219	LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
220	for (size_t I = `0`, N = Block.size(); I < N; ++I) {
221	BasicBlock *const B = Mapping.indexToBlock(I);
222	dbgs() << B->getName() << ":\n";
223	dump(" Consumes", Block[I].Consumes);
224	dump(" Kills", Block[I].Kills);
225	}
226	dbgs() << "\n";
227	}
228	#endif
229
230	template <bool Initialize>
231	bool SuspendCrossingInfo::computeBlockData(
232	const ReversePostOrderTraversal<Function *> &RPOT) {
233	bool Changed = false;
234
235	for (const BasicBlock *BB : RPOT) {
236	auto BBNo = Mapping.blockToIndex(BB);
237	auto &B = Block [BBNo];
238
239	// We don't need to count the predecessors when initialization.
240	if constexpr (!Initialize)
241	// If all the predecessors of the current Block don't change,
242	// the BlockData for the current block must not change too.
243	if (all_of(predecessors(BD: B), [this](BasicBlock *BB) {
244	return !Block [Mapping.blockToIndex(BB)].Changed;
245	})) {
246	B.Changed = false;
247	continue;
248	}
249
250	// Saved Consumes and Kills bitsets so that it is easy to see
251	// if anything changed after propagation.
252	auto SavedConsumes = B.Consumes;
253	auto SavedKills = B.Kills;
254
255	for (BasicBlock *PI : predecessors(BD: B)) {
256	auto PrevNo = Mapping.blockToIndex(BB: PI);
257	auto &P = Block [PrevNo];
258
259	// Propagate Kills and Consumes from predecessors into B.
260	B.Consumes \|= P.Consumes;
261	B.Kills \|= P.Kills;
262
263	// If block P is a suspend block, it should propagate kills into block
264	// B for every block P consumes.
265	if (P.Suspend)
266	B.Kills \|= P.Consumes;
267	}
268
269	if (B.Suspend) {
270	// If block B is a suspend block, it should kill all of the blocks it
271	// consumes.
272	B.Kills \|= B.Consumes;
273	} else if (B.End) {
274	// If block B is an end block, it should not propagate kills as the
275	// blocks following coro.end() are reached during initial invocation
276	// of the coroutine while all the data are still available on the
277	// stack or in the registers.
278	B.Kills.reset();
279	} else {
280	// This is reached when B block it not Suspend nor coro.end and it
281	// need to make sure that it is not in the kill set.
282	B.KillLoop \|= B.Kills [BBNo];
283	B.Kills.reset(Idx: BBNo);
284	}
285
286	if constexpr (!Initialize) {
287	B.Changed = (B.Kills != SavedKills) \|\| (B.Consumes != SavedConsumes);
288	Changed \|= B.Changed;
289	}
290	}
291
292	return Changed;
293	}
294
295	SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
296	: Mapping (F) {
297	const size_t N = Mapping.size();
298	Block.resize(N);
299
300	// Initialize every block so that it consumes itself
301	for (size_t I = `0`; I < N; ++I) {
302	auto &B = Block [I];
303	B.Consumes.resize(N);
304	B.Kills.resize(N);
305	B.Consumes.set(I);
306	B.Changed = true;
307	}
308
309	// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
310	// the code beyond coro.end is reachable during initial invocation of the
311	// coroutine.
312	for (auto *CE : Shape.CoroEnds)
313	getBlockData(BB: CE->getParent()).End = true;
314
315	// Mark all suspend blocks and indicate that they kill everything they
316	// consume. Note, that crossing coro.save also requires a spill, as any code
317	// between coro.save and coro.suspend may resume the coroutine and all of the
318	// state needs to be saved by that time.
319	auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
320	BasicBlock *SuspendBlock = BarrierInst->getParent();
321	auto &B = getBlockData(BB: SuspendBlock);
322	B.Suspend = true;
323	B.Kills \|= B.Consumes;
324	};
325	for (auto *CSI : Shape.CoroSuspends) {
326	markSuspendBlock (CSI);
327	if (auto *Save = CSI->getCoroSave())
328	markSuspendBlock (Save);
329	}
330
331	// It is considered to be faster to use RPO traversal for forward-edges
332	// dataflow analysis.
333	ReversePostOrderTraversal<Function *> RPOT(&F);
334	computeBlockData</Initialize=/true>(RPOT);
335	while (computeBlockData</Initialize/ false>(RPOT))
336	;
337
338	LLVM_DEBUG(dump());
339	}
340
341	namespace {
342
343	// RematGraph is used to construct a DAG for rematerializable instructions
344	// When the constructor is invoked with a candidate instruction (which is
345	// materializable) it builds a DAG of materializable instructions from that
346	// point.
347	// Typically, for each instruction identified as re-materializable across a
348	// suspend point, a RematGraph will be created.
349	struct RematGraph {
350	// Each RematNode in the graph contains the edges to instructions providing
351	// operands in the current node.
352	struct RematNode {
353	Instruction *Node;
354	SmallVector<RematNode *> Operands;
355	RematNode() = default;
356	RematNode(Instruction *V) : Node(V) {}
357	};
358
359	RematNode *EntryNode;
360	using RematNodeMap =
361	SmallMapVector<Instruction *, std::unique_ptr<RematNode>, `8`>;
362	RematNodeMap Remats;
363	const std::function<bool(Instruction &)> &MaterializableCallback;
364	SuspendCrossingInfo &Checker;
365
366	RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
367	Instruction *I, SuspendCrossingInfo &Checker)
368	: MaterializableCallback(MaterializableCallback), Checker(Checker) {
369	std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(args&: I);
370	EntryNode = FirstNode.get();
371	std::deque<std::unique_ptr<RematNode>> WorkList;
372	addNode(NUPtr: std::move(FirstNode), WorkList, FirstUse: cast<User>(Val: I));
373	while (WorkList.size()) {
374	std::unique_ptr<RematNode> N = std::move(WorkList.front());
375	WorkList.pop_front();
376	addNode(NUPtr: std::move(N), WorkList, FirstUse: cast<User>(Val: I));
377	}
378	}
379
380	void addNode(std::unique_ptr<RematNode> NUPtr,
381	std::deque<std::unique_ptr<RematNode>> &WorkList,
382	User *FirstUse) {
383	RematNode *N = NUPtr.get();
384	if (Remats.count(Key: N->Node))
385	return;
386
387	// We haven't see this node yet - add to the list
388	Remats [N->Node] = std::move(NUPtr);
389	for (auto &Def : N->Node->operands()) {
390	Instruction *D = dyn_cast<Instruction>(Val: Def.get());
391	if (!D \|\| !MaterializableCallback (*D) \|\|
392	!Checker.isDefinitionAcrossSuspend(I&: *D, U: FirstUse))
393	continue;
394
395	if (Remats.count(Key: D)) {
396	// Already have this in the graph
397	N->Operands.push_back(Elt: Remats [D].get());
398	continue;
399	}
400
401	bool NoMatch = true;
402	for (auto &I : WorkList) {
403	if (I ->Node == D) {
404	NoMatch = false;
405	N->Operands.push_back(Elt: I.get());
406	break;
407	}
408	}
409	if (NoMatch) {
410	// Create a new node
411	std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(args&: D);
412	N->Operands.push_back(Elt: ChildNode.get());
413	WorkList.push_back(x: std::move(ChildNode));
414	}
415	}
416	}
417
418	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
419	void dump() const {
420	dbgs() << "Entry (";
421	if (EntryNode->Node->getParent()->hasName())
422	dbgs() << EntryNode->Node->getParent()->getName();
423	else
424	EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
425	dbgs() << ") : " << *EntryNode->Node << "\n";
426	for (auto &E : Remats) {
427	dbgs() << *(E.first) << "\n";
428	for (RematNode *U : E.second->Operands)
429	dbgs() << " " << *U->Node << "\n";
430	}
431	}
432	#endif
433	};
434	} // end anonymous namespace
435
436	namespace llvm {
437
438	template <> struct GraphTraits<RematGraph *> {
439	using NodeRef = RematGraph::RematNode *;
440	using ChildIteratorType = RematGraph::RematNode **;
441
442	static NodeRef getEntryNode(RematGraph G) { return* G->EntryNode; }
443	static ChildIteratorType child_begin(NodeRef N) {
444	return N->Operands.begin();
445	}
446	static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
447	};
448
449	} // end namespace llvm
450
451	#undef DEBUG_TYPE // "coro-suspend-crossing"
452	#define DEBUG_TYPE "coro-frame"
453
454	namespace {
455	class FrameTypeBuilder;
456	// Mapping from the to-be-spilled value to all the users that need reload.
457	using SpillInfo = SmallMapVector<Value , SmallVector<Instruction , `2`>, `8`>;
458	struct AllocaInfo {
459	AllocaInst *Alloca;
460	DenseMap<Instruction *, std::optional<APInt>> Aliases;
461	bool MayWriteBeforeCoroBegin;
462	AllocaInfo(AllocaInst *Alloca,
463	DenseMap<Instruction *, std::optional<APInt>> Aliases,
464	bool MayWriteBeforeCoroBegin)
465	: Alloca(Alloca), Aliases (std::move(Aliases)),
466	MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
467	};
468	struct FrameDataInfo {
469	// All the values (that are not allocas) that needs to be spilled to the
470	// frame.
471	SpillInfo Spills;
472	// Allocas contains all values defined as allocas that need to live in the
473	// frame.
474	SmallVector<AllocaInfo, `8`> Allocas;
475
476	SmallVector<Value , `8`> getAllDefs() const* {
477	SmallVector<Value *, `8`> Defs;
478	for (const auto &P : Spills)
479	Defs.push_back(Elt: P.first);
480	for (const auto &A : Allocas)
481	Defs.push_back(Elt: A.Alloca);
482	return Defs;
483	}
484
485	uint32_t getFieldIndex(Value V) const* {
486	auto Itr = FieldIndexMap.find(Val: V);
487	assert(Itr != FieldIndexMap.end() &&
488	"Value does not have a frame field index");
489	return Itr ->second;
490	}
491
492	void setFieldIndex(Value *V, uint32_t Index) {
493	assert((LayoutIndexUpdateStarted \|\| FieldIndexMap.count(V) == `0`) &&
494	"Cannot set the index for the same field twice.");
495	FieldIndexMap [V] = Index;
496	}
497
498	Align getAlign(Value V) const* {
499	auto Iter = FieldAlignMap.find(Val: V);
500	assert(Iter != FieldAlignMap.end());
501	return Iter ->second;
502	}
503
504	void setAlign(Value *V, Align AL) {
505	assert(FieldAlignMap.count(V) == `0`);
506	FieldAlignMap.insert(KV: {V, AL});
507	}
508
509	uint64_t getDynamicAlign(Value V) const* {
510	auto Iter = FieldDynamicAlignMap.find(Val: V);
511	assert(Iter != FieldDynamicAlignMap.end());
512	return Iter ->second;
513	}
514
515	void setDynamicAlign(Value *V, uint64_t Align) {
516	assert(FieldDynamicAlignMap.count(V) == `0`);
517	FieldDynamicAlignMap.insert(KV: {V, Align});
518	}
519
520	uint64_t getOffset(Value V) const* {
521	auto Iter = FieldOffsetMap.find(Val: V);
522	assert(Iter != FieldOffsetMap.end());
523	return Iter ->second;
524	}
525
526	void setOffset(Value *V, uint64_t Offset) {
527	assert(FieldOffsetMap.count(V) == `0`);
528	FieldOffsetMap.insert(KV: {V, Offset});
529	}
530
531	// Remap the index of every field in the frame, using the final layout index.
532	void updateLayoutIndex(FrameTypeBuilder &B);
533
534	private:
535	// LayoutIndexUpdateStarted is used to avoid updating the index of any field
536	// twice by mistake.
537	bool LayoutIndexUpdateStarted = false;
538	// Map from values to their slot indexes on the frame. They will be first set
539	// with their original insertion field index. After the frame is built, their
540	// indexes will be updated into the final layout index.
541	DenseMap<Value *, uint32_t> FieldIndexMap;
542	// Map from values to their alignment on the frame. They would be set after
543	// the frame is built.
544	DenseMap<Value *, Align> FieldAlignMap;
545	DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
546	// Map from values to their offset on the frame. They would be set after
547	// the frame is built.
548	DenseMap<Value *, uint64_t> FieldOffsetMap;
549	};
550	} // namespace
551
552	#ifndef NDEBUG
553	static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
554	dbgs() << "------------- " << Title << "--------------\n";
555	for (const auto &E : Spills) {
556	E.first->dump();
557	dbgs() << " user: ";
558	for (auto *I : E.second)
559	I->dump();
560	}
561	}
562	static void dumpRemats(
563	StringRef Title,
564	const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`> &RM) {
565	dbgs() << "------------- " << Title << "--------------\n";
566	for (const auto &E : RM) {
567	E.second->dump();
568	dbgs() << "--\n";
569	}
570	}
571
572	static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
573	dbgs() << "------------- Allocas --------------\n";
574	for (const auto &A : Allocas) {
575	A.Alloca->dump();
576	}
577	}
578	#endif
579
580	namespace {
581	using FieldIDType = size_t;
582	// We cannot rely solely on natural alignment of a type when building a
583	// coroutine frame and if the alignment specified on the Alloca instruction
584	// differs from the natural alignment of the alloca type we will need to insert
585	// padding.
586	class FrameTypeBuilder {
587	private:
588	struct Field {
589	uint64_t Size;
590	uint64_t Offset;
591	Type *Ty;
592	FieldIDType LayoutFieldIndex;
593	Align Alignment;
594	Align TyAlignment;
595	uint64_t DynamicAlignBuffer;
596	};
597
598	const DataLayout &DL;
599	LLVMContext &Context;
600	uint64_t StructSize = `0`;
601	Align StructAlign;
602	bool IsFinished = false;
603
604	std::optional<Align> MaxFrameAlignment;
605
606	SmallVector<Field, `8`> Fields;
607	DenseMap<Value, unsigned*> FieldIndexByKey;
608
609	public:
610	FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
611	std::optional<Align> MaxFrameAlignment)
612	: DL(DL), Context(Context), MaxFrameAlignment (MaxFrameAlignment) {}
613
614	/// Add a field to this structure for the storage of an `alloca`
615	/// instruction.
616	[[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
617	bool IsHeader = false) {
618	Type *Ty = AI->getAllocatedType();
619
620	// Make an array type if this is a static array allocation.
621	if (AI->isArrayAllocation()) {
622	if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize()))
623	Ty = ArrayType::get(ElementType: Ty, NumElements: CI->getValue().getZExtValue());
624	else
625	report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
626	}
627
628	return addField(Ty, MaybeFieldAlignment: AI->getAlign(), IsHeader);
629	}
630
631	/// We want to put the allocas whose lifetime-ranges are not overlapped
632	/// into one slot of coroutine frame.
633	/// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
634	///
635	/// cppcoro::task<void> alternative_paths(bool cond) {
636	/// if (cond) {
637	/// big_structure a;
638	/// process(a);
639	/// co_await something();
640	/// } else {
641	/// big_structure b;
642	/// process2(b);
643	/// co_await something();
644	/// }
645	/// }
646	///
647	/// We want to put variable a and variable b in the same slot to
648	/// reduce the size of coroutine frame.
649	///
650	/// This function use StackLifetime algorithm to partition the AllocaInsts in
651	/// Spills to non-overlapped sets in order to put Alloca in the same
652	/// non-overlapped set into the same slot in the Coroutine Frame. Then add
653	/// field for the allocas in the same non-overlapped set by using the largest
654	/// type as the field type.
655	///
656	/// Side Effects: Because We sort the allocas, the order of allocas in the
657	/// frame may be different with the order in the source code.
658	void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
659	coro::Shape &Shape);
660
661	/// Add a field to this structure.
662	[[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
663	bool IsHeader = false,
664	bool IsSpillOfValue = false) {
665	assert(!IsFinished && "adding fields to a finished builder");
666	assert(Ty && "must provide a type for a field");
667
668	// The field size is always the alloc size of the type.
669	uint64_t FieldSize = DL.getTypeAllocSize(Ty);
670
671	// For an alloca with size=0, we don't need to add a field and they
672	// can just point to any index in the frame. Use index 0.
673	if (FieldSize == `0`) {
674	return `0`;
675	}
676
677	// The field alignment might not be the type alignment, but we need
678	// to remember the type alignment anyway to build the type.
679	// If we are spilling values we don't need to worry about ABI alignment
680	// concerns.
681	Align ABIAlign = DL.getABITypeAlign(Ty);
682	Align TyAlignment = ABIAlign;
683	if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
684	TyAlignment = *MaxFrameAlignment;
685	Align FieldAlignment = MaybeFieldAlignment.value_or(u&: TyAlignment);
686
687	// The field alignment could be bigger than the max frame case, in that case
688	// we request additional storage to be able to dynamically align the
689	// pointer.
690	uint64_t DynamicAlignBuffer = `0`;
691	if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
692	DynamicAlignBuffer =
693	offsetToAlignment(Value: MaxFrameAlignment ->value(), Alignment: FieldAlignment);
694	FieldAlignment = *MaxFrameAlignment;
695	FieldSize = FieldSize + DynamicAlignBuffer;
696	}
697
698	// Lay out header fields immediately.
699	uint64_t Offset;
700	if (IsHeader) {
701	Offset = alignTo(Size: StructSize, A: FieldAlignment);
702	StructSize = Offset + FieldSize;
703
704	// Everything else has a flexible offset.
705	} else {
706	Offset = OptimizedStructLayoutField::FlexibleOffset;
707	}
708
709	Fields.push_back(Elt: {.Size: FieldSize, .Offset: Offset, .Ty: Ty, .LayoutFieldIndex: `0`, .Alignment: FieldAlignment, .TyAlignment: TyAlignment,
710	.DynamicAlignBuffer: DynamicAlignBuffer});
711	return Fields.size() - `1`;
712	}
713
714	/// Finish the layout and set the body on the given type.
715	void finish(StructType *Ty);
716
717	uint64_t getStructSize() const {
718	assert(IsFinished && "not yet finished!");
719	return StructSize;
720	}
721
722	Align getStructAlign() const {
723	assert(IsFinished && "not yet finished!");
724	return StructAlign;
725	}
726
727	FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
728	assert(IsFinished && "not yet finished!");
729	return Fields [Id].LayoutFieldIndex;
730	}
731
732	Field getLayoutField(FieldIDType Id) const {
733	assert(IsFinished && "not yet finished!");
734	return Fields [Id];
735	}
736	};
737	} // namespace
738
739	void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
740	auto Updater = [&](Value *I) {
741	auto Field = B.getLayoutField(Id: getFieldIndex(V: I));
742	setFieldIndex(V: I, Index: Field.LayoutFieldIndex);
743	setAlign(V: I, AL: Field.Alignment);
744	uint64_t dynamicAlign =
745	Field.DynamicAlignBuffer
746	? Field.DynamicAlignBuffer + Field.Alignment.value()
747	: `0`;
748	setDynamicAlign(V: I, Align: dynamicAlign);
749	setOffset(V: I, Offset: Field.Offset);
750	};
751	LayoutIndexUpdateStarted = true;
752	for (auto &S : Spills)
753	Updater (S.first);
754	for (const auto &A : Allocas)
755	Updater (A.Alloca);
756	LayoutIndexUpdateStarted = false;
757	}
758
759	void FrameTypeBuilder::addFieldForAllocas(const Function &F,
760	FrameDataInfo &FrameData,
761	coro::Shape &Shape) {
762	using AllocaSetType = SmallVector<AllocaInst *, `4`>;
763	SmallVector<AllocaSetType, `4`> NonOverlapedAllocas;
764
765	// We need to add field for allocas at the end of this function.
766	auto AddFieldForAllocasAtExit = make_scope_exit(F: [&]() {
767	for (auto AllocaList : NonOverlapedAllocas) {
768	auto LargestAI = AllocaList.begin();
769	FieldIDType Id = addFieldForAlloca(AI: LargestAI);
770	for (auto *Alloca : AllocaList)
771	FrameData.setFieldIndex(V: Alloca, Index: Id);
772	}
773	});
774
775	if (!Shape.OptimizeFrame) {
776	for (const auto &A : FrameData.Allocas) {
777	AllocaInst *Alloca = A.Alloca;
778	NonOverlapedAllocas.emplace_back(Args: AllocaSetType (`1`, Alloca));
779	}
780	return;
781	}
782
783	// Because there are paths from the lifetime.start to coro.end
784	// for each alloca, the liferanges for every alloca is overlaped
785	// in the blocks who contain coro.end and the successor blocks.
786	// So we choose to skip there blocks when we calculate the liferange
787	// for each alloca. It should be reasonable since there shouldn't be uses
788	// in these blocks and the coroutine frame shouldn't be used outside the
789	// coroutine body.
790	//
791	// Note that the user of coro.suspend may not be SwitchInst. However, this
792	// case seems too complex to handle. And it is harmless to skip these
793	// patterns since it just prevend putting the allocas to live in the same
794	// slot.
795	DenseMap<SwitchInst , BasicBlock > DefaultSuspendDest;
796	for (auto *CoroSuspendInst : Shape.CoroSuspends) {
797	for (auto *U : CoroSuspendInst->users()) {
798	if (auto *ConstSWI = dyn_cast<SwitchInst>(Val: U)) {
799	auto SWI = const_cast<SwitchInst >(ConstSWI);
800	DefaultSuspendDest [SWI] = SWI->getDefaultDest();
801	SWI->setDefaultDest(SWI->getSuccessor(idx: `1`));
802	}
803	}
804	}
805
806	auto ExtractAllocas = [&]() {
807	AllocaSetType Allocas;
808	Allocas.reserve(N: FrameData.Allocas.size());
809	for (const auto &A : FrameData.Allocas)
810	Allocas.push_back(Elt: A.Alloca);
811	return Allocas;
812	};
813	StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas (),
814	StackLifetime::LivenessType::May);
815	StackLifetimeAnalyzer.run();
816	auto IsAllocaInferenre = [&](const AllocaInst AI1, const* AllocaInst *AI2) {
817	return StackLifetimeAnalyzer.getLiveRange(AI: AI1).overlaps(
818	Other: StackLifetimeAnalyzer.getLiveRange(AI: AI2));
819	};
820	auto GetAllocaSize = [&](const AllocaInfo &A) {
821	std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
822	assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
823	assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
824	return RetSize ->getFixedValue();
825	};
826	// Put larger allocas in the front. So the larger allocas have higher
827	// priority to merge, which can save more space potentially. Also each
828	// AllocaSet would be ordered. So we can get the largest Alloca in one
829	// AllocaSet easily.
830	sort(C&: FrameData.Allocas, Comp: [&](const auto &Iter1, const auto &Iter2) {
831	return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
832	});
833	for (const auto &A : FrameData.Allocas) {
834	AllocaInst *Alloca = A.Alloca;
835	bool Merged = false;
836	// Try to find if the Alloca is not inferenced with any existing
837	// NonOverlappedAllocaSet. If it is true, insert the alloca to that
838	// NonOverlappedAllocaSet.
839	for (auto &AllocaSet : NonOverlapedAllocas) {
840	assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
841	bool NoInference = none_of(Range&: AllocaSet, P: [&](auto Iter) {
842	return IsAllocaInferenre(Alloca, Iter);
843	});
844	// If the alignment of A is multiple of the alignment of B, the address
845	// of A should satisfy the requirement for aligning for B.
846	//
847	// There may be other more fine-grained strategies to handle the alignment
848	// infomation during the merging process. But it seems hard to handle
849	// these strategies and benefit little.
850	bool Alignable = [&]() -> bool {
851	auto LargestAlloca = AllocaSet.begin();
852	return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
853	`0`;
854	}();
855	bool CouldMerge = NoInference && Alignable;
856	if (!CouldMerge)
857	continue;
858	AllocaSet.push_back(Elt: Alloca);
859	Merged = true;
860	break;
861	}
862	if (!Merged) {
863	NonOverlapedAllocas.emplace_back(Args: AllocaSetType (`1`, Alloca));
864	}
865	}
866	// Recover the default target destination for each Switch statement
867	// reserved.
868	for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
869	SwitchInst *SWI = SwitchAndDefaultDest.first;
870	BasicBlock *DestBB = SwitchAndDefaultDest.second;
871	SWI->setDefaultDest(DestBB);
872	}
873	// This Debug Info could tell us which allocas are merged into one slot.
874	LLVM_DEBUG(for (auto &AllocaSet
875	: NonOverlapedAllocas) {
876	if (AllocaSet.size() > `1`) {
877	dbgs() << "In Function:" << F.getName() << "\n";
878	dbgs() << "Find Union Set "
879	<< "\n";
880	dbgs() << "\tAllocas are \n";
881	for (auto Alloca : AllocaSet)
882	dbgs() << "\t\t" << *Alloca << "\n";
883	}
884	});
885	}
886
887	void FrameTypeBuilder::finish(StructType *Ty) {
888	assert(!IsFinished && "already finished!");
889
890	// Prepare the optimal-layout field array.
891	// The Id in the layout field is a pointer to our Field for it.
892	SmallVector<OptimizedStructLayoutField, `8`> LayoutFields;
893	LayoutFields.reserve(N: Fields.size());
894	for (auto &Field : Fields) {
895	LayoutFields.emplace_back(Args: &Field, Args&: Field.Size, Args&: Field.Alignment,
896	Args&: Field.Offset);
897	}
898
899	// Perform layout.
900	auto SizeAndAlign = performOptimizedStructLayout(Fields: LayoutFields);
901	StructSize = SizeAndAlign.first;
902	StructAlign = SizeAndAlign.second;
903
904	auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
905	return *static_cast<Field >(const_cast<void**>(LayoutField.Id));
906	};
907
908	// We need to produce a packed struct type if there's a field whose
909	// assigned offset isn't a multiple of its natural type alignment.
910	bool Packed = [&] {
911	for (auto &LayoutField : LayoutFields) {
912	auto &F = getField (LayoutField);
913	if (!isAligned(Lhs: F.TyAlignment, SizeInBytes: LayoutField.Offset))
914	return true;
915	}
916	return false;
917	}();
918
919	// Build the struct body.
920	SmallVector<Type*, `16`> FieldTypes;
921	FieldTypes.reserve(N: LayoutFields.size() * `3` / `2`);
922	uint64_t LastOffset = `0`;
923	for (auto &LayoutField : LayoutFields) {
924	auto &F = getField (LayoutField);
925
926	auto Offset = LayoutField.Offset;
927
928	// Add a padding field if there's a padding gap and we're either
929	// building a packed struct or the padding gap is more than we'd
930	// get from aligning to the field type's natural alignment.
931	assert(Offset >= LastOffset);
932	if (Offset != LastOffset) {
933	if (Packed \|\| alignTo(Size: LastOffset, A: F.TyAlignment) != Offset)
934	FieldTypes.push_back(Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
935	NumElements: Offset - LastOffset));
936	}
937
938	F.Offset = Offset;
939	F.LayoutFieldIndex = FieldTypes.size();
940
941	FieldTypes.push_back(Elt: F.Ty);
942	if (F.DynamicAlignBuffer) {
943	FieldTypes.push_back(
944	Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context), NumElements: F.DynamicAlignBuffer));
945	}
946	LastOffset = Offset + F.Size;
947	}
948
949	Ty->setBody(Elements: FieldTypes, isPacked: Packed);
950
951	#ifndef NDEBUG
952	// Check that the IR layout matches the offsets we expect.
953	auto Layout = DL.getStructLayout(Ty);
954	for (auto &F : Fields) {
955	assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
956	assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
957	}
958	#endif
959
960	IsFinished = true;
961	}
962
963	static void cacheDIVar(FrameDataInfo &FrameData,
964	DenseMap<Value , DILocalVariable > &DIVarCache) {
965	for (auto *V : FrameData.getAllDefs()) {
966	if (DIVarCache.contains(Val: V))
967	continue;
968
969	auto CacheIt = [&DIVarCache, V](const auto &Container) {
970	auto I = llvm::find_if(Container, [](auto* *DDI) {
971	return DDI->getExpression()->getNumElements() == `0`;
972	});
973	if (I != Container.end())
974	DIVarCache.insert({V, (*I)->getVariable()});
975	};
976	CacheIt (findDbgDeclares(V));
977	CacheIt (findDVRDeclares(V));
978	}
979	}
980
981	/// Create name for Type. It uses MDString to store new created string to
982	/// avoid memory leak.
983	static StringRef solveTypeName(Type *Ty) {
984	if (Ty->isIntegerTy()) {
985	// The longest name in common may be '__int_128', which has 9 bits.
986	SmallString<`16`> Buffer;
987	raw_svector_ostream OS(Buffer);
988	OS << "__int_" << cast<IntegerType>(Val: Ty)->getBitWidth();
989	auto *MDName = MDString::get(Context&: Ty->getContext(), Str: OS.str());
990	return MDName->getString();
991	}
992
993	if (Ty->isFloatingPointTy()) {
994	if (Ty->isFloatTy())
995	return "__float_";
996	if (Ty->isDoubleTy())
997	return "__double_";
998	return "__floating_type_";
999	}
1000
1001	if (Ty->isPointerTy())
1002	return "PointerType";
1003
1004	if (Ty->isStructTy()) {
1005	if (!cast<StructType>(Val: Ty)->hasName())
1006	return "__LiteralStructType_";
1007
1008	auto Name = Ty->getStructName();
1009
1010	SmallString<`16`> Buffer(Name);
1011	for (auto &Iter : Buffer)
1012	if (Iter == `'.'` \|\| Iter == `':'`)
1013	Iter = `'_'`;
1014	auto *MDName = MDString::get(Context&: Ty->getContext(), Str: Buffer.str());
1015	return MDName->getString();
1016	}
1017
1018	return "UnknownType";
1019	}
1020
1021	static DIType solveDIType(DIBuilder &Builder, Type Ty,
1022	const DataLayout &Layout, DIScope *Scope,
1023	unsigned LineNum,
1024	DenseMap<Type , DIType > &DITypeCache) {
1025	if (DIType *DT = DITypeCache.lookup(Val: Ty))
1026	return DT;
1027
1028	StringRef Name = solveTypeName(Ty);
1029
1030	DIType RetType = nullptr*;
1031
1032	if (Ty->isIntegerTy()) {
1033	auto BitWidth = cast<IntegerType>(Val: Ty)->getBitWidth();
1034	RetType = Builder.createBasicType(Name, SizeInBits: BitWidth, Encoding: dwarf::DW_ATE_signed,
1035	Flags: llvm::DINode::FlagArtificial);
1036	} else if (Ty->isFloatingPointTy()) {
1037	RetType = Builder.createBasicType(Name, SizeInBits: Layout.getTypeSizeInBits(Ty),
1038	Encoding: dwarf::DW_ATE_float,
1039	Flags: llvm::DINode::FlagArtificial);
1040	} else if (Ty->isPointerTy()) {
1041	// Construct PointerType points to null (aka void ) instead of exploring*
1042	// pointee type to avoid infinite search problem. For example, we would be
1043	// in trouble if we traverse recursively:
1044	//
1045	// struct Node {
1046	// Node ptr;*
1047	// };
1048	RetType =
1049	Builder.createPointerType(PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty),
1050	AlignInBits: Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
1051	/DWARFAddressSpace=/std::nullopt, Name);
1052	} else if (Ty->isStructTy()) {
1053	auto *DIStruct = Builder.createStructType(
1054	Scope, Name, File: Scope->getFile(), LineNumber: LineNum, SizeInBits: Layout.getTypeSizeInBits(Ty),
1055	AlignInBits: Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
1056	Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr, Elements: llvm::DINodeArray ());
1057
1058	auto *StructTy = cast<StructType>(Val: Ty);
1059	SmallVector<Metadata *, `16`> Elements;
1060	for (unsigned I = `0`; I < StructTy->getNumElements(); I++) {
1061	DIType *DITy = solveDIType(Builder, Ty: StructTy->getElementType(N: I), Layout,
1062	Scope, LineNum, DITypeCache);
1063	assert(DITy);
1064	Elements.push_back(Elt: Builder.createMemberType(
1065	Scope, Name: DITy->getName(), File: Scope->getFile(), LineNo: LineNum,
1066	SizeInBits: DITy->getSizeInBits(), AlignInBits: DITy->getAlignInBits(),
1067	OffsetInBits: Layout.getStructLayout(Ty: StructTy)->getElementOffsetInBits(Idx: I),
1068	Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1069	}
1070
1071	Builder.replaceArrays(T&: DIStruct, Elements: Builder.getOrCreateArray(Elements));
1072
1073	RetType = DIStruct;
1074	} else {
1075	LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
1076	TypeSize Size = Layout.getTypeSizeInBits(Ty);
1077	auto *CharSizeType = Builder.createBasicType(
1078	Name, SizeInBits: `8`, Encoding: dwarf::DW_ATE_unsigned_char, Flags: llvm::DINode::FlagArtificial);
1079
1080	if (Size <= `8`)
1081	RetType = CharSizeType;
1082	else {
1083	if (Size % `8` != `0`)
1084	Size = TypeSize::getFixed(ExactSize: Size + `8` - (Size % `8`));
1085
1086	RetType = Builder.createArrayType(
1087	Size, AlignInBits: Layout.getPrefTypeAlign(Ty).value(), Ty: CharSizeType,
1088	Subscripts: Builder.getOrCreateArray(Elements: Builder.getOrCreateSubrange(Lo: `0`, Count: Size / `8`)));
1089	}
1090	}
1091
1092	DITypeCache.insert(KV: {Ty, RetType});
1093	return RetType;
1094	}
1095
1096	/// Build artificial debug info for C++ coroutine frames to allow users to
1097	/// inspect the contents of the frame directly
1098	///
1099	/// Create Debug information for coroutine frame with debug name "__coro_frame".
1100	/// The debug information for the fields of coroutine frame is constructed from
1101	/// the following way:
1102	/// 1. For all the value in the Frame, we search the use of dbg.declare to find
1103	/// the corresponding debug variables for the value. If we can find the
1104	/// debug variable, we can get full and accurate debug information.
1105	/// 2. If we can't get debug information in step 1 and 2, we could only try to
1106	/// build the DIType by Type. We did this in solveDIType. We only handle
1107	/// integer, float, double, integer type and struct type for now.
1108	static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
1109	FrameDataInfo &FrameData) {
1110	DISubprogram *DIS = F.getSubprogram();
1111	// If there is no DISubprogram for F, it implies the Function are not compiled
1112	// with debug info. So we also don't need to generate debug info for the frame
1113	// neither.
1114	if (!DIS \|\| !DIS->getUnit() \|\|
1115	!dwarf::isCPlusPlus(
1116	S: (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
1117	return;
1118
1119	assert(Shape.ABI == coro::ABI::Switch &&
1120	"We could only build debug infomation for C++ coroutine now.\n");
1121
1122	DIBuilder DBuilder(F.getParent(), /AllowUnresolved/* false);
1123
1124	AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1125	assert(PromiseAlloca &&
1126	"Coroutine with switch ABI should own Promise alloca");
1127
1128	TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: PromiseAlloca);
1129	TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: PromiseAlloca);
1130
1131	DILocalVariable PromiseDIVariable = nullptr*;
1132	DILocation DILoc = nullptr*;
1133	if (!DIs.empty()) {
1134	DbgDeclareInst *PromiseDDI = DIs.front();
1135	PromiseDIVariable = PromiseDDI->getVariable();
1136	DILoc = PromiseDDI->getDebugLoc().get();
1137	} else if (!DVRs.empty()) {
1138	DbgVariableRecord *PromiseDVR = DVRs.front();
1139	PromiseDIVariable = PromiseDVR->getVariable();
1140	DILoc = PromiseDVR->getDebugLoc().get();
1141	} else {
1142	return;
1143	}
1144
1145	DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
1146	DIFile *DFile = PromiseDIScope->getFile();
1147	unsigned LineNum = PromiseDIVariable->getLine();
1148
1149	DICompositeType *FrameDITy = DBuilder.createStructType(
1150	Scope: DIS->getUnit(), Name: Twine(F.getName() + ".coro_frame_ty").str(),
1151	File: DFile, LineNumber: LineNum, SizeInBits: Shape.FrameSize * `8`,
1152	AlignInBits: Shape.FrameAlign.value() * `8`, Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr,
1153	Elements: llvm::DINodeArray ());
1154	StructType *FrameTy = Shape.FrameTy;
1155	SmallVector<Metadata *, `16`> Elements;
1156	DataLayout Layout = F.getDataLayout();
1157
1158	DenseMap<Value , DILocalVariable > DIVarCache;
1159	cacheDIVar(FrameData, DIVarCache);
1160
1161	unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
1162	unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
1163	unsigned IndexIndex = Shape.SwitchLowering.IndexField;
1164
1165	DenseMap<unsigned, StringRef> NameCache;
1166	NameCache.insert(KV: {ResumeIndex, "__resume_fn"});
1167	NameCache.insert(KV: {DestroyIndex, "__destroy_fn"});
1168	NameCache.insert(KV: {IndexIndex, "__coro_index"});
1169
1170	Type *ResumeFnTy = FrameTy->getElementType(N: ResumeIndex),
1171	*DestroyFnTy = FrameTy->getElementType(N: DestroyIndex),
1172	*IndexTy = FrameTy->getElementType(N: IndexIndex);
1173
1174	DenseMap<unsigned, DIType *> TyCache;
1175	TyCache.insert(
1176	KV: {ResumeIndex, DBuilder.createPointerType(
1177	PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: ResumeFnTy))});
1178	TyCache.insert(
1179	KV: {DestroyIndex, DBuilder.createPointerType(
1180	PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: DestroyFnTy))});
1181
1182	/// FIXME: If we fill the field `SizeInBits` with the actual size of
1183	/// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1184	TyCache.insert(KV: {IndexIndex, DBuilder.createBasicType(
1185	Name: "__coro_index",
1186	SizeInBits: (Layout.getTypeSizeInBits(Ty: IndexTy) < `8`)
1187	? `8`
1188	: Layout.getTypeSizeInBits(Ty: IndexTy),
1189	Encoding: dwarf::DW_ATE_unsigned_char)});
1190
1191	for (auto *V : FrameData.getAllDefs()) {
1192	if (!DIVarCache.contains(Val: V))
1193	continue;
1194
1195	auto Index = FrameData.getFieldIndex(V);
1196
1197	NameCache.insert(KV: {Index, DIVarCache [V]->getName()});
1198	TyCache.insert(KV: {Index, DIVarCache [V]->getType()});
1199	}
1200
1201	// Cache from index to (Align, Offset Pair)
1202	DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
1203	// The Align and Offset of Resume function and Destroy function are fixed.
1204	OffsetCache.insert(KV: {ResumeIndex, {`8`, `0`}});
1205	OffsetCache.insert(KV: {DestroyIndex, {`8`, `8`}});
1206	OffsetCache.insert(
1207	KV: {IndexIndex,
1208	{Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
1209
1210	for (auto *V : FrameData.getAllDefs()) {
1211	auto Index = FrameData.getFieldIndex(V);
1212
1213	OffsetCache.insert(
1214	KV: {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
1215	}
1216
1217	DenseMap<Type , DIType > DITypeCache;
1218	// This counter is used to avoid same type names. e.g., there would be
1219	// many i32 and i64 types in one coroutine. And we would use i32_0 and
1220	// i32_1 to avoid the same type. Since it makes no sense the name of the
1221	// fields confilicts with each other.
1222	unsigned UnknownTypeNum = `0`;
1223	for (unsigned Index = `0`; Index < FrameTy->getNumElements(); Index++) {
1224	if (!OffsetCache.contains(Val: Index))
1225	continue;
1226
1227	std::string Name;
1228	uint64_t SizeInBits;
1229	uint32_t AlignInBits;
1230	uint64_t OffsetInBits;
1231	DIType DITy = nullptr*;
1232
1233	Type *Ty = FrameTy->getElementType(N: Index);
1234	assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1235	SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
1236	AlignInBits = OffsetCache [Index].first * `8`;
1237	OffsetInBits = OffsetCache [Index].second * `8`;
1238
1239	if (NameCache.contains(Val: Index)) {
1240	Name = NameCache [Index].str();
1241	DITy = TyCache [Index];
1242	} else {
1243	DITy = solveDIType(Builder&: DBuilder, Ty, Layout, Scope: FrameDITy, LineNum, DITypeCache);
1244	assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1245	Name = DITy->getName().str();
1246	Name += "_" + std::to_string(val: UnknownTypeNum);
1247	UnknownTypeNum++;
1248	}
1249
1250	Elements.push_back(Elt: DBuilder.createMemberType(
1251	Scope: FrameDITy, Name, File: DFile, LineNo: LineNum, SizeInBits, AlignInBits, OffsetInBits,
1252	Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1253	}
1254
1255	DBuilder.replaceArrays(T&: FrameDITy, Elements: DBuilder.getOrCreateArray(Elements));
1256
1257	auto *FrameDIVar = DBuilder.createAutoVariable(Scope: PromiseDIScope, Name: "__coro_frame",
1258	File: DFile, LineNo: LineNum, Ty: FrameDITy,
1259	AlwaysPreserve: true, Flags: DINode::FlagArtificial);
1260	assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
1261
1262	// Subprogram would have ContainedNodes field which records the debug
1263	// variables it contained. So we need to add __coro_frame to the
1264	// ContainedNodes of it.
1265	//
1266	// If we don't add __coro_frame to the RetainedNodes, user may get
1267	// `no symbol __coro_frame in context` rather than `__coro_frame`
1268	// is optimized out, which is more precise.
1269	if (auto *SubProgram = dyn_cast<DISubprogram>(Val: PromiseDIScope)) {
1270	auto RetainedNodes = SubProgram->getRetainedNodes();
1271	SmallVector<Metadata *, `32`> RetainedNodesVec(RetainedNodes.begin(),
1272	RetainedNodes.end());
1273	RetainedNodesVec.push_back(Elt: FrameDIVar);
1274	SubProgram->replaceOperandWith(
1275	I: `7`, New: (MDTuple::get(Context&: F.getContext(), MDs: RetainedNodesVec)));
1276	}
1277
1278	if (UseNewDbgInfoFormat) {
1279	DbgVariableRecord *NewDVR =
1280	new DbgVariableRecord (ValueAsMetadata::get(V: Shape.FramePtr), FrameDIVar,
1281	DBuilder.createExpression(), DILoc,
1282	DbgVariableRecord::LocationType::Declare);
1283	BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
1284	It ->getParent()->insertDbgRecordBefore(DR: NewDVR, Here: It);
1285	} else {
1286	DBuilder.insertDeclare(Storage: Shape.FramePtr, VarInfo: FrameDIVar,
1287	Expr: DBuilder.createExpression(), DL: DILoc,
1288	InsertBefore: &*Shape.getInsertPtAfterFramePtr());
1289	}
1290	}
1291
1292	// Build a struct that will keep state for an active coroutine.
1293	// struct f.frame {
1294	// ResumeFnTy ResumeFnAddr;
1295	// ResumeFnTy DestroyFnAddr;
1296	// ... promise (if present) ...
1297	// int ResumeIndex;
1298	// ... spills ...
1299	// };
1300	static StructType *buildFrameType(Function &F, coro::Shape &Shape,
1301	FrameDataInfo &FrameData) {
1302	LLVMContext &C = F.getContext();
1303	const DataLayout &DL = F.getDataLayout();
1304	StructType *FrameTy = [&] {
1305	SmallString<`32`> Name(F.getName());
1306	Name.append(RHS: ".Frame");
1307	return StructType::create(Context&: C, Name);
1308	}();
1309
1310	// We will use this value to cap the alignment of spilled values.
1311	std::optional<Align> MaxFrameAlignment;
1312	if (Shape.ABI == coro::ABI::Async)
1313	MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1314	FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1315
1316	AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1317	std::optional<FieldIDType> SwitchIndexFieldId;
1318
1319	if (Shape.ABI == coro::ABI::Switch) {
1320	auto *FnPtrTy = PointerType::getUnqual(C);
1321
1322	// Add header fields for the resume and destroy functions.
1323	// We can rely on these being perfectly packed.
1324	(void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /header/ IsHeader: true);
1325	(void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /header/ IsHeader: true);
1326
1327	// PromiseAlloca field needs to be explicitly added here because it's
1328	// a header field with a fixed offset based on its alignment. Hence it
1329	// needs special handling and cannot be added to FrameData.Allocas.
1330	if (PromiseAlloca)
1331	FrameData.setFieldIndex(
1332	V: PromiseAlloca, Index: B.addFieldForAlloca(AI: PromiseAlloca, /header/ IsHeader: true));
1333
1334	// Add a field to store the suspend index. This doesn't need to
1335	// be in the header.
1336	unsigned IndexBits = std::max(a: `1U`, b: Log2_64_Ceil(Value: Shape.CoroSuspends.size()));
1337	Type *IndexType = Type::getIntNTy(C, N: IndexBits);
1338
1339	SwitchIndexFieldId = B.addField(Ty: IndexType, MaybeFieldAlignment: std::nullopt);
1340	} else {
1341	assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1342	}
1343
1344	// Because multiple allocas may own the same field slot,
1345	// we add allocas to field here.
1346	B.addFieldForAllocas(F, FrameData, Shape);
1347	// Add PromiseAlloca to Allocas list so that
1348	// 1. updateLayoutIndex could update its index after
1349	// `performOptimizedStructLayout`
1350	// 2. it is processed in insertSpills.
1351	if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1352	// We assume that the promise alloca won't be modified before
1353	// CoroBegin and no alias will be create before CoroBegin.
1354	FrameData.Allocas.emplace_back(
1355	Args&: PromiseAlloca, Args: DenseMap<Instruction , std::optional<APInt>>{}, Args: false*);
1356	// Create an entry for every spilled value.
1357	for (auto &S : FrameData.Spills) {
1358	Type *FieldType = S.first->getType();
1359	// For byval arguments, we need to store the pointed value in the frame,
1360	// instead of the pointer itself.
1361	if (const Argument *A = dyn_cast<Argument>(Val: S.first))
1362	if (A->hasByValAttr())
1363	FieldType = A->getParamByValType();
1364	FieldIDType Id = B.addField(Ty: FieldType, MaybeFieldAlignment: std::nullopt, IsHeader: false /header/,
1365	IsSpillOfValue: true /IsSpillOfValue/);
1366	FrameData.setFieldIndex(V: S.first, Index: Id);
1367	}
1368
1369	B.finish(Ty: FrameTy);
1370	FrameData.updateLayoutIndex(B);
1371	Shape.FrameAlign = B.getStructAlign();
1372	Shape.FrameSize = B.getStructSize();
1373
1374	switch (Shape.ABI) {
1375	case coro::ABI::Switch: {
1376	// In the switch ABI, remember the switch-index field.
1377	auto IndexField = B.getLayoutField(Id: *SwitchIndexFieldId);
1378	Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1379	Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1380	Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1381
1382	// Also round the frame size up to a multiple of its alignment, as is
1383	// generally expected in C/C++.
1384	Shape.FrameSize = alignTo(Size: Shape.FrameSize, A: Shape.FrameAlign);
1385	break;
1386	}
1387
1388	// In the retcon ABI, remember whether the frame is inline in the storage.
1389	case coro::ABI::Retcon:
1390	case coro::ABI::RetconOnce: {
1391	auto Id = Shape.getRetconCoroId();
1392	Shape.RetconLowering.IsFrameInlineInStorage
1393	= (B.getStructSize() <= Id->getStorageSize() &&
1394	B.getStructAlign() <= Id->getStorageAlignment());
1395	break;
1396	}
1397	case coro::ABI::Async: {
1398	Shape.AsyncLowering.FrameOffset =
1399	alignTo(Size: Shape.AsyncLowering.ContextHeaderSize, A: Shape.FrameAlign);
1400	// Also make the final context size a multiple of the context alignment to
1401	// make allocation easier for allocators.
1402	Shape.AsyncLowering.ContextSize =
1403	alignTo(Size: Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
1404	A: Shape.AsyncLowering.getContextAlignment());
1405	if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1406	report_fatal_error(
1407	reason: "The alignment requirment of frame variables cannot be higher than "
1408	"the alignment of the async function context");
1409	}
1410	break;
1411	}
1412	}
1413
1414	return FrameTy;
1415	}
1416
1417	// We use a pointer use visitor to track how an alloca is being used.
1418	// The goal is to be able to answer the following three questions:
1419	// 1. Should this alloca be allocated on the frame instead.
1420	// 2. Could the content of the alloca be modified prior to CoroBegn, which would
1421	// require copying the data from alloca to the frame after CoroBegin.
1422	// 3. Is there any alias created for this alloca prior to CoroBegin, but used
1423	// after CoroBegin. In that case, we will need to recreate the alias after
1424	// CoroBegin based off the frame. To answer question 1, we track two things:
1425	// a. List of all BasicBlocks that use this alloca or any of the aliases of
1426	// the alloca. In the end, we check if there exists any two basic blocks that
1427	// cross suspension points. If so, this alloca must be put on the frame. b.
1428	// Whether the alloca or any alias of the alloca is escaped at some point,
1429	// either by storing the address somewhere, or the address is used in a
1430	// function call that might capture. If it's ever escaped, this alloca must be
1431	// put on the frame conservatively.
1432	// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1433	// Whenever a potential write happens, either through a store instruction, a
1434	// function call or any of the memory intrinsics, we check whether this
1435	// instruction is prior to CoroBegin. To answer question 3, we track the offsets
1436	// of all aliases created for the alloca prior to CoroBegin but used after
1437	// CoroBegin. std::optional is used to be able to represent the case when the
1438	// offset is unknown (e.g. when you have a PHINode that takes in different
1439	// offset values). We cannot handle unknown offsets and will assert. This is the
1440	// potential issue left out. An ideal solution would likely require a
1441	// significant redesign.
1442	namespace {
1443	struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1444	using Base = PtrUseVisitor<AllocaUseVisitor>;
1445	AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1446	const coro::Shape &CoroShape,
1447	const SuspendCrossingInfo &Checker,
1448	bool ShouldUseLifetimeStartInfo)
1449	: PtrUseVisitor (DL), DT(DT), CoroShape(CoroShape), Checker(Checker),
1450	ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {
1451	for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends)
1452	CoroSuspendBBs.insert(Ptr: SuspendInst->getParent());
1453	}
1454
1455	void visit(Instruction &I) {
1456	Users.insert(Ptr: &I);
1457	Base::visit(I);
1458	// If the pointer is escaped prior to CoroBegin, we have to assume it would
1459	// be written into before CoroBegin as well.
1460	if (PI.isEscaped() &&
1461	!DT.dominates(Def: CoroShape.CoroBegin, User: PI.getEscapingInst())) {
1462	MayWriteBeforeCoroBegin = true;
1463	}
1464	}
1465	// We need to provide this overload as PtrUseVisitor uses a pointer based
1466	// visiting function.
1467	void visit(Instruction I) { return* visit(I&: *I); }
1468
1469	void visitPHINode(PHINode &I) {
1470	enqueueUsers(I);
1471	handleAlias(I);
1472	}
1473
1474	void visitSelectInst(SelectInst &I) {
1475	enqueueUsers(I);
1476	handleAlias(I);
1477	}
1478
1479	void visitStoreInst(StoreInst &SI) {
1480	// Regardless whether the alias of the alloca is the value operand or the
1481	// pointer operand, we need to assume the alloca is been written.
1482	handleMayWrite(I: SI);
1483
1484	if (SI.getValueOperand() != U->get())
1485	return;
1486
1487	// We are storing the pointer into a memory location, potentially escaping.
1488	// As an optimization, we try to detect simple cases where it doesn't
1489	// actually escape, for example:
1490	// %ptr = alloca ..
1491	// %addr = alloca ..
1492	// store %ptr, %addr
1493	// %x = load %addr
1494	// ..
1495	// If %addr is only used by loading from it, we could simply treat %x as
1496	// another alias of %ptr, and not considering %ptr being escaped.
1497	auto IsSimpleStoreThenLoad = [&]() {
1498	auto *AI = dyn_cast<AllocaInst>(Val: SI.getPointerOperand());
1499	// If the memory location we are storing to is not an alloca, it
1500	// could be an alias of some other memory locations, which is difficult
1501	// to analyze.
1502	if (!AI)
1503	return false;
1504	// StoreAliases contains aliases of the memory location stored into.
1505	SmallVector<Instruction *, `4`> StoreAliases = {AI};
1506	while (!StoreAliases.empty()) {
1507	Instruction *I = StoreAliases.pop_back_val();
1508	for (User *U : I->users()) {
1509	// If we are loading from the memory location, we are creating an
1510	// alias of the original pointer.
1511	if (auto *LI = dyn_cast<LoadInst>(Val: U)) {
1512	enqueueUsers(I&: *LI);
1513	handleAlias(I&: *LI);
1514	continue;
1515	}
1516	// If we are overriding the memory location, the pointer certainly
1517	// won't escape.
1518	if (auto *S = dyn_cast<StoreInst>(Val: U))
1519	if (S->getPointerOperand() == I)
1520	continue;
1521	if (auto *II = dyn_cast<IntrinsicInst>(Val: U))
1522	if (II->isLifetimeStartOrEnd())
1523	continue;
1524	// BitCastInst creats aliases of the memory location being stored
1525	// into.
1526	if (auto *BI = dyn_cast<BitCastInst>(Val: U)) {
1527	StoreAliases.push_back(Elt: BI);
1528	continue;
1529	}
1530	return false;
1531	}
1532	}
1533
1534	return true;
1535	};
1536
1537	if (!IsSimpleStoreThenLoad())
1538	PI.setEscaped(&SI);
1539	}
1540
1541	// All mem intrinsics modify the data.
1542	void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(I: MI); }
1543
1544	void visitBitCastInst(BitCastInst &BC) {
1545	Base::visitBitCastInst(BC);
1546	handleAlias(I&: BC);
1547	}
1548
1549	void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1550	Base::visitAddrSpaceCastInst(ASC);
1551	handleAlias(I&: ASC);
1552	}
1553
1554	void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1555	// The base visitor will adjust Offset accordingly.
1556	Base::visitGetElementPtrInst(GEPI);
1557	handleAlias(I&: GEPI);
1558	}
1559
1560	void visitIntrinsicInst(IntrinsicInst &II) {
1561	// When we found the lifetime markers refers to a
1562	// subrange of the original alloca, ignore the lifetime
1563	// markers to avoid misleading the analysis.
1564	if (!IsOffsetKnown \|\| !Offset.isZero())
1565	return Base::visitIntrinsicInst(II);
1566	switch (II.getIntrinsicID()) {
1567	default:
1568	return Base::visitIntrinsicInst(II);
1569	case Intrinsic::lifetime_start:
1570	LifetimeStarts.insert(Ptr: &II);
1571	LifetimeStartBBs.push_back(Elt: II.getParent());
1572	break;
1573	case Intrinsic::lifetime_end:
1574	LifetimeEndBBs.insert(Ptr: II.getParent());
1575	break;
1576	}
1577	}
1578
1579	void visitCallBase(CallBase &CB) {
1580	for (unsigned Op = `0`, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1581	if (U->get() == CB.getArgOperand(i: Op) && !CB.doesNotCapture(OpNo: Op))
1582	PI.setEscaped(&CB);
1583	handleMayWrite(I: CB);
1584	}
1585
1586	bool getShouldLiveOnFrame() const {
1587	if (!ShouldLiveOnFrame)
1588	ShouldLiveOnFrame = computeShouldLiveOnFrame();
1589	return *ShouldLiveOnFrame;
1590	}
1591
1592	bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1593
1594	DenseMap<Instruction , std::optional<APInt>> getAliasesCopy() const* {
1595	assert(getShouldLiveOnFrame() && "This method should only be called if the "
1596	"alloca needs to live on the frame.");
1597	for (const auto &P : AliasOffetMap)
1598	if (!P.second)
1599	report_fatal_error(reason: "Unable to handle an alias with unknown offset "
1600	"created before CoroBegin.");
1601	return AliasOffetMap;
1602	}
1603
1604	private:
1605	const DominatorTree &DT;
1606	const coro::Shape &CoroShape;
1607	const SuspendCrossingInfo &Checker;
1608	// All alias to the original AllocaInst, created before CoroBegin and used
1609	// after CoroBegin. Each entry contains the instruction and the offset in the
1610	// original Alloca. They need to be recreated after CoroBegin off the frame.
1611	DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
1612	SmallPtrSet<Instruction *, `4`> Users{};
1613	SmallPtrSet<IntrinsicInst *, `2`> LifetimeStarts{};
1614	SmallVector<BasicBlock *> LifetimeStartBBs{};
1615	SmallPtrSet<BasicBlock *, `2`> LifetimeEndBBs{};
1616	SmallPtrSet<const BasicBlock *, `2`> CoroSuspendBBs{};
1617	bool MayWriteBeforeCoroBegin{false};
1618	bool ShouldUseLifetimeStartInfo{true};
1619
1620	mutable std::optional<bool> ShouldLiveOnFrame{};
1621
1622	bool computeShouldLiveOnFrame() const {
1623	// If lifetime information is available, we check it first since it's
1624	// more precise. We look at every pair of lifetime.start intrinsic and
1625	// every basic block that uses the pointer to see if they cross suspension
1626	// points. The uses cover both direct uses as well as indirect uses.
1627	if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1628	// If there is no explicit lifetime.end, then assume the address can
1629	// cross suspension points.
1630	if (LifetimeEndBBs.empty())
1631	return true;
1632
1633	// If there is a path from a lifetime.start to a suspend without a
1634	// corresponding lifetime.end, then the alloca's lifetime persists
1635	// beyond that suspension point and the alloca must go on the frame.
1636	llvm::SmallVector<BasicBlock *> Worklist(LifetimeStartBBs);
1637	if (isManyPotentiallyReachableFromMany(Worklist, StopSet: CoroSuspendBBs,
1638	ExclusionSet: &LifetimeEndBBs, DT: &DT))
1639	return true;
1640
1641	// Addresses are guaranteed to be identical after every lifetime.start so
1642	// we cannot use the local stack if the address escaped and there is a
1643	// suspend point between lifetime markers. This should also cover the
1644	// case of a single lifetime.start intrinsic in a loop with suspend point.
1645	if (PI.isEscaped()) {
1646	for (auto *A : LifetimeStarts) {
1647	for (auto *B : LifetimeStarts) {
1648	if (Checker.hasPathOrLoopCrossingSuspendPoint(From: A->getParent(),
1649	To: B->getParent()))
1650	return true;
1651	}
1652	}
1653	}
1654	return false;
1655	}
1656	// FIXME: Ideally the isEscaped check should come at the beginning.
1657	// However there are a few loose ends that need to be fixed first before
1658	// we can do that. We need to make sure we are not over-conservative, so
1659	// that the data accessed in-between await_suspend and symmetric transfer
1660	// is always put on the stack, and also data accessed after coro.end is
1661	// always put on the stack (esp the return object). To fix that, we need
1662	// to:
1663	// 1) Potentially treat sret as nocapture in calls
1664	// 2) Special handle the return object and put it on the stack
1665	// 3) Utilize lifetime.end intrinsic
1666	if (PI.isEscaped())
1667	return true;
1668
1669	for (auto *U1 : Users)
1670	for (auto *U2 : Users)
1671	if (Checker.isDefinitionAcrossSuspend(I&: *U1, U: U2))
1672	return true;
1673
1674	return false;
1675	}
1676
1677	void handleMayWrite(const Instruction &I) {
1678	if (!DT.dominates(Def: CoroShape.CoroBegin, User: &I))
1679	MayWriteBeforeCoroBegin = true;
1680	}
1681
1682	bool usedAfterCoroBegin(Instruction &I) {
1683	for (auto &U : I.uses())
1684	if (DT.dominates(Def: CoroShape.CoroBegin, U))
1685	return true;
1686	return false;
1687	}
1688
1689	void handleAlias(Instruction &I) {
1690	// We track all aliases created prior to CoroBegin but used after.
1691	// These aliases may need to be recreated after CoroBegin if the alloca
1692	// need to live on the frame.
1693	if (DT.dominates(Def: CoroShape.CoroBegin, User: &I) \|\| !usedAfterCoroBegin(I))
1694	return;
1695
1696	if (!IsOffsetKnown) {
1697	AliasOffetMap [&I].reset();
1698	} else {
1699	auto Itr = AliasOffetMap.find(Val: &I);
1700	if (Itr == AliasOffetMap.end()) {
1701	AliasOffetMap [&I] = Offset;
1702	} else if (Itr ->second && *Itr ->second != Offset) {
1703	// If we have seen two different possible values for this alias, we set
1704	// it to empty.
1705	AliasOffetMap [&I].reset();
1706	}
1707	}
1708	}
1709	};
1710	} // namespace
1711
1712	// We need to make room to insert a spill after initial PHIs, but before
1713	// catchswitch instruction. Placing it before violates the requirement that
1714	// catchswitch, like all other EHPads must be the first nonPHI in a block.
1715	//
1716	// Split away catchswitch into a separate block and insert in its place:
1717	//
1718	// cleanuppad <InsertPt> cleanupret.
1719	//
1720	// cleanupret instruction will act as an insert point for the spill.
1721	static Instruction splitBeforeCatchSwitch(CatchSwitchInst CatchSwitch) {
1722	BasicBlock *CurrentBlock = CatchSwitch->getParent();
1723	BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(I: CatchSwitch);
1724	CurrentBlock->getTerminator()->eraseFromParent();
1725
1726	auto *CleanupPad =
1727	CleanupPadInst::Create(ParentPad: CatchSwitch->getParentPad(), Args: {}, NameStr: "", InsertBefore: CurrentBlock);
1728	auto *CleanupRet =
1729	CleanupReturnInst::Create(CleanupPad, UnwindBB: NewBlock, InsertBefore: CurrentBlock);
1730	return CleanupRet;
1731	}
1732
1733	// Replace all alloca and SSA values that are accessed across suspend points
1734	// with GetElementPointer from coroutine frame + loads and stores. Create an
1735	// AllocaSpillBB that will become the new entry block for the resume parts of
1736	// the coroutine:
1737	//
1738	// %hdl = coro.begin(...)
1739	// whatever
1740	//
1741	// becomes:
1742	//
1743	// %hdl = coro.begin(...)
1744	// br label %AllocaSpillBB
1745	//
1746	// AllocaSpillBB:
1747	// ; geps corresponding to allocas that were moved to coroutine frame
1748	// br label PostSpill
1749	//
1750	// PostSpill:
1751	// whatever
1752	//
1753	//
1754	static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1755	auto *CB = Shape.CoroBegin;
1756	LLVMContext &C = CB->getContext();
1757	Function *F = CB->getFunction();
1758	IRBuilder<> Builder(C);
1759	StructType *FrameTy = Shape.FrameTy;
1760	Value *FramePtr = Shape.FramePtr;
1761	DominatorTree DT(*F);
1762	SmallDenseMap<Argument , AllocaInst , `4`> ArgToAllocaMap;
1763
1764	// Create a GEP with the given index into the coroutine frame for the original
1765	// value Orig. Appends an extra 0 index for array-allocas, preserving the
1766	// original type.
1767	auto GetFramePointer = [&](Value Orig) -> Value {
1768	FieldIDType Index = FrameData.getFieldIndex(V: Orig);
1769	SmallVector<Value *, `3`> Indices = {
1770	ConstantInt::get(Ty: Type::getInt32Ty(C), V: `0`),
1771	ConstantInt::get(Ty: Type::getInt32Ty(C), V: Index),
1772	};
1773
1774	if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1775	if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
1776	auto Count = CI->getValue().getZExtValue();
1777	if (Count > `1`) {
1778	Indices.push_back(Elt: ConstantInt::get(Ty: Type::getInt32Ty(C), V: `0`));
1779	}
1780	} else {
1781	report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
1782	}
1783	}
1784
1785	auto GEP = cast<GetElementPtrInst>(
1786	Val: Builder.CreateInBoundsGEP(Ty: FrameTy, Ptr: FramePtr, IdxList: Indices));
1787	if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1788	if (FrameData.getDynamicAlign(V: Orig) != `0`) {
1789	assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1790	auto *M = AI->getModule();
1791	auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1792	auto *PtrValue = Builder.CreatePtrToInt(V: GEP, DestTy: IntPtrTy);
1793	auto *AlignMask =
1794	ConstantInt::get(Ty: IntPtrTy, V: AI->getAlign().value() - `1`);
1795	PtrValue = Builder.CreateAdd(LHS: PtrValue, RHS: AlignMask);
1796	PtrValue = Builder.CreateAnd(LHS: PtrValue, RHS: Builder.CreateNot(V: AlignMask));
1797	return Builder.CreateIntToPtr(V: PtrValue, DestTy: AI->getType());
1798	}
1799	// If the type of GEP is not equal to the type of AllocaInst, it implies
1800	// that the AllocaInst may be reused in the Frame slot of other
1801	// AllocaInst. So We cast GEP to the AllocaInst here to re-use
1802	// the Frame storage.
1803	//
1804	// Note: If we change the strategy dealing with alignment, we need to refine
1805	// this casting.
1806	if (GEP->getType() != Orig->getType())
1807	return Builder.CreateAddrSpaceCast(V: GEP, DestTy: Orig->getType(),
1808	Name: Orig->getName() + Twine (".cast"));
1809	}
1810	return GEP;
1811	};
1812
1813	for (auto const &E : FrameData.Spills) {
1814	Value *Def = E.first;
1815	auto SpillAlignment = Align(FrameData.getAlign(V: Def));
1816	// Create a store instruction storing the value into the
1817	// coroutine frame.
1818	BasicBlock::iterator InsertPt;
1819	Type ByValTy = nullptr*;
1820	if (auto *Arg = dyn_cast<Argument>(Val: Def)) {
1821	// For arguments, we will place the store instruction right after
1822	// the coroutine frame pointer instruction, i.e. coro.begin.
1823	InsertPt = Shape.getInsertPtAfterFramePtr();
1824
1825	// If we're spilling an Argument, make sure we clear 'nocapture'
1826	// from the coroutine function.
1827	Arg->getParent()->removeParamAttr(ArgNo: Arg->getArgNo(), Kind: Attribute::NoCapture);
1828
1829	if (Arg->hasByValAttr())
1830	ByValTy = Arg->getParamByValType();
1831	} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Val: Def)) {
1832	// Don't spill immediately after a suspend; splitting assumes
1833	// that the suspend will be followed by a branch.
1834	InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
1835	} else {
1836	auto *I = cast<Instruction>(Val: Def);
1837	if (!DT.dominates(Def: CB, User: I)) {
1838	// If it is not dominated by CoroBegin, then spill should be
1839	// inserted immediately after CoroFrame is computed.
1840	InsertPt = Shape.getInsertPtAfterFramePtr();
1841	} else if (auto *II = dyn_cast<InvokeInst>(Val: I)) {
1842	// If we are spilling the result of the invoke instruction, split
1843	// the normal edge and insert the spill in the new block.
1844	auto *NewBB = SplitEdge(From: II->getParent(), To: II->getNormalDest());
1845	InsertPt = NewBB->getTerminator()->getIterator();
1846	} else if (isa<PHINode>(Val: I)) {
1847	// Skip the PHINodes and EH pads instructions.
1848	BasicBlock *DefBlock = I->getParent();
1849	if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: DefBlock->getTerminator()))
1850	InsertPt = splitBeforeCatchSwitch(CatchSwitch: CSI)->getIterator();
1851	else
1852	InsertPt = DefBlock->getFirstInsertionPt();
1853	} else {
1854	assert(!I->isTerminator() && "unexpected terminator");
1855	// For all other values, the spill is placed immediately after
1856	// the definition.
1857	InsertPt = I->getNextNode()->getIterator();
1858	}
1859	}
1860
1861	auto Index = FrameData.getFieldIndex(V: Def);
1862	Builder.SetInsertPoint(TheBB: InsertPt ->getParent(), IP: InsertPt);
1863	auto *G = Builder.CreateConstInBoundsGEP2_32(
1864	Ty: FrameTy, Ptr: FramePtr, Idx0: `0`, Idx1: Index, Name: Def->getName() + Twine (".spill.addr"));
1865	if (ByValTy) {
1866	// For byval arguments, we need to store the pointed value in the frame,
1867	// instead of the pointer itself.
1868	auto *Value = Builder.CreateLoad(Ty: ByValTy, Ptr: Def);
1869	Builder.CreateAlignedStore(Val: Value, Ptr: G, Align: SpillAlignment);
1870	} else {
1871	Builder.CreateAlignedStore(Val: Def, Ptr: G, Align: SpillAlignment);
1872	}
1873
1874	BasicBlock CurrentBlock = nullptr*;
1875	Value CurrentReload = nullptr*;
1876	for (auto *U : E.second) {
1877	// If we have not seen the use block, create a load instruction to reload
1878	// the spilled value from the coroutine frame. Populates the Value pointer
1879	// reference provided with the frame GEP.
1880	if (CurrentBlock != U->getParent()) {
1881	CurrentBlock = U->getParent();
1882	Builder.SetInsertPoint(TheBB: CurrentBlock,
1883	IP: CurrentBlock->getFirstInsertionPt());
1884
1885	auto *GEP = GetFramePointer (E.first);
1886	GEP->setName(E.first->getName() + Twine (".reload.addr"));
1887	if (ByValTy)
1888	CurrentReload = GEP;
1889	else
1890	CurrentReload = Builder.CreateAlignedLoad(
1891	Ty: FrameTy->getElementType(N: FrameData.getFieldIndex(V: E.first)), Ptr: GEP,
1892	Align: SpillAlignment, Name: E.first->getName() + Twine (".reload"));
1893
1894	TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: Def);
1895	TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: Def);
1896	// Try best to find dbg.declare. If the spill is a temp, there may not
1897	// be a direct dbg.declare. Walk up the load chain to find one from an
1898	// alias.
1899	if (F->getSubprogram()) {
1900	auto *CurDef = Def;
1901	while (DIs.empty() && DVRs.empty() && isa<LoadInst>(Val: CurDef)) {
1902	auto *LdInst = cast<LoadInst>(Val: CurDef);
1903	// Only consider ptr to ptr same type load.
1904	if (LdInst->getPointerOperandType() != LdInst->getType())
1905	break;
1906	CurDef = LdInst->getPointerOperand();
1907	if (!isa<AllocaInst, LoadInst>(Val: CurDef))
1908	break;
1909	DIs = findDbgDeclares(V: CurDef);
1910	DVRs = findDVRDeclares(V: CurDef);
1911	}
1912	}
1913
1914	auto SalvageOne = [&](auto *DDI) {
1915	bool AllowUnresolved = false;
1916	// This dbg.declare is preserved for all coro-split function
1917	// fragments. It will be unreachable in the main function, and
1918	// processed by coro::salvageDebugInfo() by CoroCloner.
1919	if (UseNewDbgInfoFormat) {
1920	DbgVariableRecord NewDVR = new* DbgVariableRecord(
1921	ValueAsMetadata::get(V: CurrentReload), DDI->getVariable(),
1922	DDI->getExpression(), DDI->getDebugLoc(),
1923	DbgVariableRecord::LocationType::Declare);
1924	Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1925	DR: NewDVR, Here: Builder.GetInsertPoint());
1926	} else {
1927	DIBuilder (*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1928	.insertDeclare(CurrentReload, DDI->getVariable(),
1929	DDI->getExpression(), DDI->getDebugLoc(),
1930	&*Builder.GetInsertPoint());
1931	}
1932	// This dbg.declare is for the main function entry point. It
1933	// will be deleted in all coro-split functions.
1934	coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
1935	false /UseEntryValue/);
1936	};
1937	for_each(Range&: DIs, F: SalvageOne);
1938	for_each(Range&: DVRs, F: SalvageOne);
1939	}
1940
1941	// If we have a single edge PHINode, remove it and replace it with a
1942	// reload from the coroutine frame. (We already took care of multi edge
1943	// PHINodes by rewriting them in the rewritePHIs function).
1944	if (auto *PN = dyn_cast<PHINode>(Val: U)) {
1945	assert(PN->getNumIncomingValues() == `1` &&
1946	"unexpected number of incoming "
1947	"values in the PHINode");
1948	PN->replaceAllUsesWith(V: CurrentReload);
1949	PN->eraseFromParent();
1950	continue;
1951	}
1952
1953	// Replace all uses of CurrentValue in the current instruction with
1954	// reload.
1955	U->replaceUsesOfWith(From: Def, To: CurrentReload);
1956	// Instructions are added to Def's user list if the attached
1957	// debug records use Def. Update those now.
1958	for (DbgVariableRecord &DVR : filterDbgVars(R: U->getDbgRecordRange()))
1959	DVR.replaceVariableLocationOp(OldValue: Def, NewValue: CurrentReload, AllowEmpty: true);
1960	}
1961	}
1962
1963	BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1964
1965	auto SpillBlock = FramePtrBB->splitBasicBlock(
1966	I: Shape.getInsertPtAfterFramePtr(), BBName: "AllocaSpillBB");
1967	SpillBlock->splitBasicBlock(I: &SpillBlock->front(), BBName: "PostSpill");
1968	Shape.AllocaSpillBlock = SpillBlock;
1969
1970	// retcon and retcon.once lowering assumes all uses have been sunk.
1971	if (Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce \|\|
1972	Shape.ABI == coro::ABI::Async) {
1973	// If we found any allocas, replace all of their remaining uses with Geps.
1974	Builder.SetInsertPoint(TheBB: SpillBlock, IP: SpillBlock->begin());
1975	for (const auto &P : FrameData.Allocas) {
1976	AllocaInst *Alloca = P.Alloca;
1977	auto *G = GetFramePointer (Alloca);
1978
1979	// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1980	// here, as we are changing location of the instruction.
1981	G->takeName(V: Alloca);
1982	Alloca->replaceAllUsesWith(V: G);
1983	Alloca->eraseFromParent();
1984	}
1985	return;
1986	}
1987
1988	// If we found any alloca, replace all of their remaining uses with GEP
1989	// instructions. To remain debugbility, we replace the uses of allocas for
1990	// dbg.declares and dbg.values with the reload from the frame.
1991	// Note: We cannot replace the alloca with GEP instructions indiscriminately,
1992	// as some of the uses may not be dominated by CoroBegin.
1993	Builder.SetInsertPoint(TheBB: Shape.AllocaSpillBlock,
1994	IP: Shape.AllocaSpillBlock->begin());
1995	SmallVector<Instruction *, `4`> UsersToUpdate;
1996	for (const auto &A : FrameData.Allocas) {
1997	AllocaInst *Alloca = A.Alloca;
1998	UsersToUpdate.clear();
1999	for (User *U : Alloca->users()) {
2000	auto *I = cast<Instruction>(Val: U);
2001	if (DT.dominates(Def: CB, User: I))
2002	UsersToUpdate.push_back(Elt: I);
2003	}
2004	if (UsersToUpdate.empty())
2005	continue;
2006	auto *G = GetFramePointer (Alloca);
2007	G->setName(Alloca->getName() + Twine (".reload.addr"));
2008
2009	SmallVector<DbgVariableIntrinsic *, `4`> DIs;
2010	SmallVector<DbgVariableRecord *> DbgVariableRecords;
2011	findDbgUsers(DbgInsts&: DIs, V: Alloca, DbgVariableRecords: &DbgVariableRecords);
2012	for (auto *DVI : DIs)
2013	DVI->replaceUsesOfWith(From: Alloca, To: G);
2014	for (auto *DVR : DbgVariableRecords)
2015	DVR->replaceVariableLocationOp(OldValue: Alloca, NewValue: G);
2016
2017	for (Instruction *I : UsersToUpdate) {
2018	// It is meaningless to retain the lifetime intrinsics refer for the
2019	// member of coroutine frames and the meaningless lifetime intrinsics
2020	// are possible to block further optimizations.
2021	if (I->isLifetimeStartOrEnd()) {
2022	I->eraseFromParent();
2023	continue;
2024	}
2025
2026	I->replaceUsesOfWith(From: Alloca, To: G);
2027	}
2028	}
2029	Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2030	for (const auto &A : FrameData.Allocas) {
2031	AllocaInst *Alloca = A.Alloca;
2032	if (A.MayWriteBeforeCoroBegin) {
2033	// isEscaped really means potentially modified before CoroBegin.
2034	if (Alloca->isArrayAllocation())
2035	report_fatal_error(
2036	reason: "Coroutines cannot handle copying of array allocas yet");
2037
2038	auto *G = GetFramePointer (Alloca);
2039	auto *Value = Builder.CreateLoad(Ty: Alloca->getAllocatedType(), Ptr: Alloca);
2040	Builder.CreateStore(Val: Value, Ptr: G);
2041	}
2042	// For each alias to Alloca created before CoroBegin but used after
2043	// CoroBegin, we recreate them after CoroBegin by appplying the offset
2044	// to the pointer in the frame.
2045	for (const auto &Alias : A.Aliases) {
2046	auto *FramePtr = GetFramePointer (Alloca);
2047	auto &Value = *Alias.second;
2048	auto ITy = IntegerType::get(C, NumBits: Value.getBitWidth());
2049	auto *AliasPtr =
2050	Builder.CreatePtrAdd(Ptr: FramePtr, Offset: ConstantInt::get(Ty: ITy, V: Value));
2051	Alias.first->replaceUsesWithIf(
2052	New: AliasPtr, ShouldReplace: [&](Use &U) { return DT.dominates(Def: CB, U); });
2053	}
2054	}
2055
2056	// PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2057	// the case that the PromiseAlloca may have writes before CoroBegin in the
2058	// above codes. And it may be problematic in edge cases. See
2059	// https://github.com/llvm/llvm-project/issues/57861 for an example.
2060	if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
2061	AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
2062	// If there is memory accessing to promise alloca before CoroBegin;
2063	bool HasAccessingPromiseBeforeCB = llvm::any_of(Range: PA->uses(), P: [&](Use &U) {
2064	auto *Inst = dyn_cast<Instruction>(Val: U.getUser());
2065	if (!Inst \|\| DT.dominates(Def: CB, User: Inst))
2066	return false;
2067
2068	if (auto *CI = dyn_cast<CallInst>(Val: Inst)) {
2069	// It is fine if the call wouldn't write to the Promise.
2070	// This is possible for @llvm.coro.id intrinsics, which
2071	// would take the promise as the second argument as a
2072	// marker.
2073	if (CI->onlyReadsMemory() \|\|
2074	CI->onlyReadsMemory(OpNo: CI->getArgOperandNo(U: &U)))
2075	return false;
2076	return true;
2077	}
2078
2079	return isa<StoreInst>(Val: Inst) \|\|
2080	// It may take too much time to track the uses.
2081	// Be conservative about the case the use may escape.
2082	isa<GetElementPtrInst>(Val: Inst) \|\|
2083	// There would always be a bitcast for the promise alloca
2084	// before we enabled Opaque pointers. And now given
2085	// opaque pointers are enabled by default. This should be
2086	// fine.
2087	isa<BitCastInst>(Val: Inst);
2088	});
2089	if (HasAccessingPromiseBeforeCB) {
2090	Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2091	auto *G = GetFramePointer (PA);
2092	auto *Value = Builder.CreateLoad(Ty: PA->getAllocatedType(), Ptr: PA);
2093	Builder.CreateStore(Val: Value, Ptr: G);
2094	}
2095	}
2096	}
2097
2098	// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2099	// PHI in InsertedBB.
2100	static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
2101	BasicBlock *InsertedBB,
2102	BasicBlock *PredBB,
2103	PHINode UntilPHI = nullptr*) {
2104	auto *PN = cast<PHINode>(Val: &SuccBB->front());
2105	do {
2106	int Index = PN->getBasicBlockIndex(BB: InsertedBB);
2107	Value *V = PN->getIncomingValue(i: Index);
2108	PHINode *InputV = PHINode::Create(
2109	Ty: V->getType(), NumReservedValues: `1`, NameStr: V->getName() + Twine (".") + SuccBB->getName());
2110	InputV->insertBefore(InsertPos: InsertedBB->begin());
2111	InputV->addIncoming(V, BB: PredBB);
2112	PN->setIncomingValue(i: Index, V: InputV);
2113	PN = dyn_cast<PHINode>(Val: PN->getNextNode());
2114	} while (PN != UntilPHI);
2115	}
2116
2117	// Rewrites the PHI Nodes in a cleanuppad.
2118	static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
2119	CleanupPadInst *CleanupPad) {
2120	// For every incoming edge to a CleanupPad we will create a new block holding
2121	// all incoming values in single-value PHI nodes. We will then create another
2122	// block to act as a dispather (as all unwind edges for related EH blocks
2123	// must be the same).
2124	//
2125	// cleanuppad:
2126	// %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2127	// %3 = cleanuppad within none []
2128	//
2129	// It will create:
2130	//
2131	// cleanuppad.corodispatch
2132	// %2 = phi i8[0, %catchswitch], [1, %catch.1]
2133	// %3 = cleanuppad within none []
2134	// switch i8 % 2, label %unreachable
2135	// [i8 0, label %cleanuppad.from.catchswitch
2136	// i8 1, label %cleanuppad.from.catch.1]
2137	// cleanuppad.from.catchswitch:
2138	// %4 = phi i32 [%0, %catchswitch]
2139	// br %label cleanuppad
2140	// cleanuppad.from.catch.1:
2141	// %6 = phi i32 [%1, %catch.1]
2142	// br %label cleanuppad
2143	// cleanuppad:
2144	// %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2145	// [%6, %cleanuppad.from.catch.1]
2146
2147	// Unreachable BB, in case switching on an invalid value in the dispatcher.
2148	auto *UnreachBB = BasicBlock::Create(
2149	Context&: CleanupPadBB->getContext(), Name: "unreachable", Parent: CleanupPadBB->getParent());
2150	IRBuilder<> Builder(UnreachBB);
2151	Builder.CreateUnreachable();
2152
2153	// Create a new cleanuppad which will be the dispatcher.
2154	auto *NewCleanupPadBB =
2155	BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2156	Name: CleanupPadBB->getName() + Twine (".corodispatch"),
2157	Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2158	Builder.SetInsertPoint(NewCleanupPadBB);
2159	auto *SwitchType = Builder.getInt8Ty();
2160	auto *SetDispatchValuePN =
2161	Builder.CreatePHI(Ty: SwitchType, NumReservedValues: pred_size(BB: CleanupPadBB));
2162	CleanupPad->removeFromParent();
2163	CleanupPad->insertAfter(InsertPos: SetDispatchValuePN);
2164	auto *SwitchOnDispatch = Builder.CreateSwitch(V: SetDispatchValuePN, Dest: UnreachBB,
2165	NumCases: pred_size(BB: CleanupPadBB));
2166
2167	int SwitchIndex = `0`;
2168	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: CleanupPadBB));
2169	for (BasicBlock *Pred : Preds) {
2170	// Create a new cleanuppad and move the PHI values to there.
2171	auto *CaseBB = BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2172	Name: CleanupPadBB->getName() +
2173	Twine (".from.") + Pred->getName(),
2174	Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2175	updatePhiNodes(DestBB: CleanupPadBB, OldPred: Pred, NewPred: CaseBB);
2176	CaseBB->setName(CleanupPadBB->getName() + Twine (".from.") +
2177	Pred->getName());
2178	Builder.SetInsertPoint(CaseBB);
2179	Builder.CreateBr(Dest: CleanupPadBB);
2180	movePHIValuesToInsertedBlock(SuccBB: CleanupPadBB, InsertedBB: CaseBB, PredBB: NewCleanupPadBB);
2181
2182	// Update this Pred to the new unwind point.
2183	setUnwindEdgeTo(TI: Pred->getTerminator(), Succ: NewCleanupPadBB);
2184
2185	// Setup the switch in the dispatcher.
2186	auto *SwitchConstant = ConstantInt::get(Ty: SwitchType, V: SwitchIndex);
2187	SetDispatchValuePN->addIncoming(V: SwitchConstant, BB: Pred);
2188	SwitchOnDispatch->addCase(OnVal: SwitchConstant, Dest: CaseBB);
2189	SwitchIndex++;
2190	}
2191	}
2192
2193	static void cleanupSinglePredPHIs(Function &F) {
2194	SmallVector<PHINode *, `32`> Worklist;
2195	for (auto &BB : F) {
2196	for (auto &Phi : BB.phis()) {
2197	if (Phi.getNumIncomingValues() == `1`) {
2198	Worklist.push_back(Elt: &Phi);
2199	} else
2200	break;
2201	}
2202	}
2203	while (!Worklist.empty()) {
2204	auto *Phi = Worklist.pop_back_val();
2205	auto *OriginalValue = Phi->getIncomingValue(i: `0`);
2206	Phi->replaceAllUsesWith(V: OriginalValue);
2207	}
2208	}
2209
2210	static void rewritePHIs(BasicBlock &BB) {
2211	// For every incoming edge we will create a block holding all
2212	// incoming values in a single PHI nodes.
2213	//
2214	// loop:
2215	// %n.val = phi i32[%n, %entry], [%inc, %loop]
2216	//
2217	// It will create:
2218	//
2219	// loop.from.entry:
2220	// %n.loop.pre = phi i32 [%n, %entry]
2221	// br %label loop
2222	// loop.from.loop:
2223	// %inc.loop.pre = phi i32 [%inc, %loop]
2224	// br %label loop
2225	//
2226	// After this rewrite, further analysis will ignore any phi nodes with more
2227	// than one incoming edge.
2228
2229	// TODO: Simplify PHINodes in the basic block to remove duplicate
2230	// predecessors.
2231
2232	// Special case for CleanupPad: all EH blocks must have the same unwind edge
2233	// so we need to create an additional "dispatcher" block.
2234	if (auto *CleanupPad =
2235	dyn_cast_or_null<CleanupPadInst>(Val: BB.getFirstNonPHI())) {
2236	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: &BB));
2237	for (BasicBlock *Pred : Preds) {
2238	if (CatchSwitchInst *CS =
2239	dyn_cast<CatchSwitchInst>(Val: Pred->getTerminator())) {
2240	// CleanupPad with a CatchSwitch predecessor: therefore this is an
2241	// unwind destination that needs to be handle specially.
2242	assert(CS->getUnwindDest() == &BB);
2243	(void)CS;
2244	rewritePHIsForCleanupPad(CleanupPadBB: &BB, CleanupPad);
2245	return;
2246	}
2247	}
2248	}
2249
2250	LandingPadInst LandingPad = nullptr*;
2251	PHINode ReplPHI = nullptr*;
2252	if ((LandingPad = dyn_cast_or_null<LandingPadInst>(Val: BB.getFirstNonPHI()))) {
2253	// ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2254	// We replace the original landing pad with a PHINode that will collect the
2255	// results from all of them.
2256	ReplPHI = PHINode::Create(Ty: LandingPad->getType(), NumReservedValues: `1`, NameStr: "");
2257	ReplPHI->insertBefore(InsertPos: LandingPad->getIterator());
2258	ReplPHI->takeName(V: LandingPad);
2259	LandingPad->replaceAllUsesWith(V: ReplPHI);
2260	// We will erase the original landing pad at the end of this function after
2261	// ehAwareSplitEdge cloned it in the transition blocks.
2262	}
2263
2264	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: &BB));
2265	for (BasicBlock *Pred : Preds) {
2266	auto *IncomingBB = ehAwareSplitEdge(BB: Pred, Succ: &BB, OriginalPad: LandingPad, LandingPadReplacement: ReplPHI);
2267	IncomingBB->setName(BB.getName() + Twine (".from.") + Pred->getName());
2268
2269	// Stop the moving of values at ReplPHI, as this is either null or the PHI
2270	// that replaced the landing pad.
2271	movePHIValuesToInsertedBlock(SuccBB: &BB, InsertedBB: IncomingBB, PredBB: Pred, UntilPHI: ReplPHI);
2272	}
2273
2274	if (LandingPad) {
2275	// Calls to ehAwareSplitEdge function cloned the original lading pad.
2276	// No longer need it.
2277	LandingPad->eraseFromParent();
2278	}
2279	}
2280
2281	static void rewritePHIs(Function &F) {
2282	SmallVector<BasicBlock *, `8`> WorkList;
2283
2284	for (BasicBlock &BB : F)
2285	if (auto *PN = dyn_cast<PHINode>(Val: &BB.front()))
2286	if (PN->getNumIncomingValues() > `1`)
2287	WorkList.push_back(Elt: &BB);
2288
2289	for (BasicBlock *BB : WorkList)
2290	rewritePHIs(BB&: *BB);
2291	}
2292
2293	/// Default materializable callback
2294	// Check for instructions that we can recreate on resume as opposed to spill
2295	// the result into a coroutine frame.
2296	bool coro::defaultMaterializable(Instruction &V) {
2297	return (isa<CastInst>(Val: &V) \|\| isa<GetElementPtrInst>(Val: &V) \|\|
2298	isa<BinaryOperator>(Val: &V) \|\| isa<CmpInst>(Val: &V) \|\| isa<SelectInst>(Val: &V));
2299	}
2300
2301	// Check for structural coroutine intrinsics that should not be spilled into
2302	// the coroutine frame.
2303	static bool isCoroutineStructureIntrinsic(Instruction &I) {
2304	return isa<CoroIdInst>(Val: &I) \|\| isa<CoroSaveInst>(Val: &I) \|\|
2305	isa<CoroSuspendInst>(Val: &I);
2306	}
2307
2308	// For each instruction identified as materializable across the suspend point,
2309	// and its associated DAG of other rematerializable instructions,
2310	// recreate the DAG of instructions after the suspend point.
2311	static void rewriteMaterializableInstructions(
2312	const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`>
2313	&AllRemats) {
2314	// This has to be done in 2 phases
2315	// Do the remats and record the required defs to be replaced in the
2316	// original use instructions
2317	// Once all the remats are complete, replace the uses in the final
2318	// instructions with the new defs
2319	typedef struct {
2320	Instruction *Use;
2321	Instruction *Def;
2322	Instruction *Remat;
2323	} ProcessNode;
2324
2325	SmallVector<ProcessNode> FinalInstructionsToProcess;
2326
2327	for (const auto &E : AllRemats) {
2328	Instruction *Use = E.first;
2329	Instruction CurrentMaterialization = nullptr*;
2330	RematGraph *RG = E.second.get();
2331	ReversePostOrderTraversal<RematGraph *> RPOT(RG);
2332	SmallVector<Instruction *> InstructionsToProcess;
2333
2334	// If the target use is actually a suspend instruction then we have to
2335	// insert the remats into the end of the predecessor (there should only be
2336	// one). This is so that suspend blocks always have the suspend instruction
2337	// as the first instruction.
2338	auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
2339	if (isa<AnyCoroSuspendInst>(Val: Use)) {
2340	BasicBlock *SuspendPredecessorBlock =
2341	Use->getParent()->getSinglePredecessor();
2342	assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
2343	InsertPoint = SuspendPredecessorBlock->getTerminator();
2344	}
2345
2346	// Note: skip the first instruction as this is the actual use that we're
2347	// rematerializing everything for.
2348	auto I = RPOT.begin();
2349	++I;
2350	for (; I != RPOT.end(); ++I) {
2351	Instruction D = (I)->Node;
2352	CurrentMaterialization = D->clone();
2353	CurrentMaterialization->setName(D->getName());
2354	CurrentMaterialization->insertBefore(InsertPos: InsertPoint);
2355	InsertPoint = CurrentMaterialization;
2356
2357	// Replace all uses of Def in the instructions being added as part of this
2358	// rematerialization group
2359	for (auto &I : InstructionsToProcess)
2360	I->replaceUsesOfWith(From: D, To: CurrentMaterialization);
2361
2362	// Don't replace the final use at this point as this can cause problems
2363	// for other materializations. Instead, for any final use that uses a
2364	// define that's being rematerialized, record the replace values
2365	for (unsigned i = `0`, E = Use->getNumOperands(); i != E; ++i)
2366	if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
2367	FinalInstructionsToProcess.push_back(
2368	Elt: {.Use: Use, .Def: D, .Remat: CurrentMaterialization});
2369
2370	InstructionsToProcess.push_back(Elt: CurrentMaterialization);
2371	}
2372	}
2373
2374	// Finally, replace the uses with the defines that we've just rematerialized
2375	for (auto &R : FinalInstructionsToProcess) {
2376	if (auto *PN = dyn_cast<PHINode>(Val: R.Use)) {
2377	assert(PN->getNumIncomingValues() == `1` && "unexpected number of incoming "
2378	"values in the PHINode");
2379	PN->replaceAllUsesWith(V: R.Remat);
2380	PN->eraseFromParent();
2381	continue;
2382	}
2383	R.Use->replaceUsesOfWith(From: R.Def, To: R.Remat);
2384	}
2385	}
2386
2387	// Splits the block at a particular instruction unless it is the first
2388	// instruction in the block with a single predecessor.
2389	static BasicBlock splitBlockIfNotFirst(Instruction I, const Twine &Name) {
2390	auto *BB = I->getParent();
2391	if (&BB->front() == I) {
2392	if (BB->getSinglePredecessor()) {
2393	BB->setName(Name);
2394	return BB;
2395	}
2396	}
2397	return BB->splitBasicBlock(I, BBName: Name);
2398	}
2399
2400	// Split above and below a particular instruction so that it
2401	// will be all alone by itself in a block.
2402	static void splitAround(Instruction I, const* Twine &Name) {
2403	splitBlockIfNotFirst(I, Name);
2404	splitBlockIfNotFirst(I: I->getNextNode(), Name: "After" + Name);
2405	}
2406
2407	static bool isSuspendBlock(BasicBlock *BB) {
2408	return isa<AnyCoroSuspendInst>(Val: BB->front());
2409	}
2410
2411	typedef SmallPtrSet<BasicBlock*, `8`> VisitedBlocksSet;
2412
2413	/// Does control flow starting at the given block ever reach a suspend
2414	/// instruction before reaching a block in VisitedOrFreeBBs?
2415	static bool isSuspendReachableFrom(BasicBlock *From,
2416	VisitedBlocksSet &VisitedOrFreeBBs) {
2417	// Eagerly try to add this block to the visited set. If it's already
2418	// there, stop recursing; this path doesn't reach a suspend before
2419	// either looping or reaching a freeing block.
2420	if (!VisitedOrFreeBBs.insert(Ptr: From).second)
2421	return false;
2422
2423	// We assume that we'll already have split suspends into their own blocks.
2424	if (isSuspendBlock(BB: From))
2425	return true;
2426
2427	// Recurse on the successors.
2428	for (auto *Succ : successors(BB: From)) {
2429	if (isSuspendReachableFrom(From: Succ, VisitedOrFreeBBs))
2430	return true;
2431	}
2432
2433	return false;
2434	}
2435
2436	/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2437	/// suspend point?
2438	static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
2439	// Seed the visited set with all the basic blocks containing a free
2440	// so that we won't pass them up.
2441	VisitedBlocksSet VisitedOrFreeBBs;
2442	for (auto *User : AI->users()) {
2443	if (auto FI = dyn_cast<CoroAllocaFreeInst>(Val: User))
2444	VisitedOrFreeBBs.insert(Ptr: FI->getParent());
2445	}
2446
2447	return !isSuspendReachableFrom(From: AI->getParent(), VisitedOrFreeBBs);
2448	}
2449
2450	/// After we split the coroutine, will the given basic block be along
2451	/// an obvious exit path for the resumption function?
2452	static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
2453	unsigned depth = `3`) {
2454	// If we've bottomed out our depth count, stop searching and assume
2455	// that the path might loop back.
2456	if (depth == `0`) return false;
2457
2458	// If this is a suspend block, we're about to exit the resumption function.
2459	if (isSuspendBlock(BB)) return true;
2460
2461	// Recurse into the successors.
2462	for (auto *Succ : successors(BB)) {
2463	if (!willLeaveFunctionImmediatelyAfter(BB: Succ, depth: depth - `1`))
2464	return false;
2465	}
2466
2467	// If none of the successors leads back in a loop, we're on an exit/abort.
2468	return true;
2469	}
2470
2471	static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
2472	// Look for a free that isn't sufficiently obviously followed by
2473	// either a suspend or a termination, i.e. something that will leave
2474	// the coro resumption frame.
2475	for (auto *U : AI->users()) {
2476	auto FI = dyn_cast<CoroAllocaFreeInst>(Val: U);
2477	if (!FI) continue;
2478
2479	if (!willLeaveFunctionImmediatelyAfter(BB: FI->getParent()))
2480	return true;
2481	}
2482
2483	// If we never found one, we don't need a stack save.
2484	return false;
2485	}
2486
2487	/// Turn each of the given local allocas into a normal (dynamic) alloca
2488	/// instruction.
2489	static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
2490	SmallVectorImpl<Instruction*> &DeadInsts) {
2491	for (auto *AI : LocalAllocas) {
2492	IRBuilder<> Builder(AI);
2493
2494	// Save the stack depth. Try to avoid doing this if the stackrestore
2495	// is going to immediately precede a return or something.
2496	Value StackSave = nullptr*;
2497	if (localAllocaNeedsStackSave(AI))
2498	StackSave = Builder.CreateStackSave();
2499
2500	// Allocate memory.
2501	auto Alloca = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: AI->getSize());
2502	Alloca->setAlignment(AI->getAlignment());
2503
2504	for (auto *U : AI->users()) {
2505	// Replace gets with the allocation.
2506	if (isa<CoroAllocaGetInst>(Val: U)) {
2507	U->replaceAllUsesWith(V: Alloca);
2508
2509	// Replace frees with stackrestores. This is safe because
2510	// alloca.alloc is required to obey a stack discipline, although we
2511	// don't enforce that structurally.
2512	} else {
2513	auto FI = cast<CoroAllocaFreeInst>(Val: U);
2514	if (StackSave) {
2515	Builder.SetInsertPoint(FI);
2516	Builder.CreateStackRestore(Ptr: StackSave);
2517	}
2518	}
2519	DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2520	}
2521
2522	DeadInsts.push_back(Elt: AI);
2523	}
2524	}
2525
2526	/// Turn the given coro.alloca.alloc call into a dynamic allocation.
2527	/// This happens during the all-instructions iteration, so it must not
2528	/// delete the call.
2529	static Instruction lowerNonLocalAlloca(CoroAllocaAllocInst AI,
2530	coro::Shape &Shape,
2531	SmallVectorImpl<Instruction*> &DeadInsts) {
2532	IRBuilder<> Builder(AI);
2533	auto Alloc = Shape.emitAlloc(Builder, Size: AI->getSize(), CG: nullptr);
2534
2535	for (User *U : AI->users()) {
2536	if (isa<CoroAllocaGetInst>(Val: U)) {
2537	U->replaceAllUsesWith(V: Alloc);
2538	} else {
2539	auto FI = cast<CoroAllocaFreeInst>(Val: U);
2540	Builder.SetInsertPoint(FI);
2541	Shape.emitDealloc(Builder, Ptr: Alloc, CG: nullptr);
2542	}
2543	DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2544	}
2545
2546	// Push this on last so that it gets deleted after all the others.
2547	DeadInsts.push_back(Elt: AI);
2548
2549	// Return the new allocation value so that we can check for needed spills.
2550	return cast<Instruction>(Val: Alloc);
2551	}
2552
2553	/// Get the current swifterror value.
2554	static Value emitGetSwiftErrorValue(IRBuilder<> &Builder, Type ValueTy,
2555	coro::Shape &Shape) {
2556	// Make a fake function pointer as a sort of intrinsic.
2557	auto FnTy = FunctionType::get(Result: ValueTy, Params: {}, isVarArg: false);
2558	auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2559
2560	auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: {});
2561	Shape.SwiftErrorOps.push_back(Elt: Call);
2562
2563	return Call;
2564	}
2565
2566	/// Set the given value as the current swifterror value.
2567	///
2568	/// Returns a slot that can be used as a swifterror slot.
2569	static Value emitSetSwiftErrorValue(IRBuilder<> &Builder, Value V,
2570	coro::Shape &Shape) {
2571	// Make a fake function pointer as a sort of intrinsic.
2572	auto FnTy = FunctionType::get(Result: Builder.getPtrTy(),
2573	Params: {V->getType()}, isVarArg: false);
2574	auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2575
2576	auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: { V });
2577	Shape.SwiftErrorOps.push_back(Elt: Call);
2578
2579	return Call;
2580	}
2581
2582	/// Set the swifterror value from the given alloca before a call,
2583	/// then put in back in the alloca afterwards.
2584	///
2585	/// Returns an address that will stand in for the swifterror slot
2586	/// until splitting.
2587	static Value emitSetAndGetSwiftErrorValueAround(Instruction Call,
2588	AllocaInst *Alloca,
2589	coro::Shape &Shape) {
2590	auto ValueTy = Alloca->getAllocatedType();
2591	IRBuilder<> Builder(Call);
2592
2593	// Load the current value from the alloca and set it as the
2594	// swifterror value.
2595	auto ValueBeforeCall = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2596	auto Addr = emitSetSwiftErrorValue(Builder, V: ValueBeforeCall, Shape);
2597
2598	// Move to after the call. Since swifterror only has a guaranteed
2599	// value on normal exits, we can ignore implicit and explicit unwind
2600	// edges.
2601	if (isa<CallInst>(Val: Call)) {
2602	Builder.SetInsertPoint(Call->getNextNode());
2603	} else {
2604	auto Invoke = cast<InvokeInst>(Val: Call);
2605	Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2606	}
2607
2608	// Get the current swifterror value and store it to the alloca.
2609	auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2610	Builder.CreateStore(Val: ValueAfterCall, Ptr: Alloca);
2611
2612	return Addr;
2613	}
2614
2615	/// Eliminate a formerly-swifterror alloca by inserting the get/set
2616	/// intrinsics and attempting to MemToReg the alloca away.
2617	static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
2618	coro::Shape &Shape) {
2619	for (Use &Use : llvm::make_early_inc_range(Range: Alloca->uses())) {
2620	// swifterror values can only be used in very specific ways.
2621	// We take advantage of that here.
2622	auto User = Use.getUser();
2623	if (isa<LoadInst>(Val: User) \|\| isa<StoreInst>(Val: User))
2624	continue;
2625
2626	assert(isa<CallInst>(User) \|\| isa<InvokeInst>(User));
2627	auto Call = cast<Instruction>(Val: User);
2628
2629	auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2630
2631	// Use the returned slot address as the call argument.
2632	Use.set(Addr);
2633	}
2634
2635	// All the uses should be loads and stores now.
2636	assert(isAllocaPromotable(Alloca));
2637	}
2638
2639	/// "Eliminate" a swifterror argument by reducing it to the alloca case
2640	/// and then loading and storing in the prologue and epilog.
2641	///
2642	/// The argument keeps the swifterror flag.
2643	static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
2644	coro::Shape &Shape,
2645	SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2646	IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2647
2648	auto ArgTy = cast<PointerType>(Val: Arg.getType());
2649	auto ValueTy = PointerType::getUnqual(C&: F.getContext());
2650
2651	// Reduce to the alloca case:
2652
2653	// Create an alloca and replace all uses of the arg with it.
2654	auto Alloca = Builder.CreateAlloca(Ty: ValueTy, AddrSpace: ArgTy->getAddressSpace());
2655	Arg.replaceAllUsesWith(V: Alloca);
2656
2657	// Set an initial value in the alloca. swifterror is always null on entry.
2658	auto InitialValue = Constant::getNullValue(Ty: ValueTy);
2659	Builder.CreateStore(Val: InitialValue, Ptr: Alloca);
2660
2661	// Find all the suspends in the function and save and restore around them.
2662	for (auto *Suspend : Shape.CoroSuspends) {
2663	(void) emitSetAndGetSwiftErrorValueAround(Call: Suspend, Alloca, Shape);
2664	}
2665
2666	// Find all the coro.ends in the function and restore the error value.
2667	for (auto *End : Shape.CoroEnds) {
2668	Builder.SetInsertPoint(End);
2669	auto FinalValue = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2670	(void) emitSetSwiftErrorValue(Builder, V: FinalValue, Shape);
2671	}
2672
2673	// Now we can use the alloca logic.
2674	AllocasToPromote.push_back(Elt: Alloca);
2675	eliminateSwiftErrorAlloca(F, Alloca, Shape);
2676	}
2677
2678	/// Eliminate all problematic uses of swifterror arguments and allocas
2679	/// from the function. We'll fix them up later when splitting the function.
2680	static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
2681	SmallVector<AllocaInst*, `4`> AllocasToPromote;
2682
2683	// Look for a swifterror argument.
2684	for (auto &Arg : F.args()) {
2685	if (!Arg.hasSwiftErrorAttr()) continue;
2686
2687	eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2688	break;
2689	}
2690
2691	// Look for swifterror allocas.
2692	for (auto &Inst : F.getEntryBlock()) {
2693	auto Alloca = dyn_cast<AllocaInst>(Val: &Inst);
2694	if (!Alloca \|\| !Alloca->isSwiftError()) continue;
2695
2696	// Clear the swifterror flag.
2697	Alloca->setSwiftError(false);
2698
2699	AllocasToPromote.push_back(Elt: Alloca);
2700	eliminateSwiftErrorAlloca(F, Alloca, Shape);
2701	}
2702
2703	// If we have any allocas to promote, compute a dominator tree and
2704	// promote them en masse.
2705	if (!AllocasToPromote.empty()) {
2706	DominatorTree DT(F);
2707	PromoteMemToReg(Allocas: AllocasToPromote, DT);
2708	}
2709	}
2710
2711	/// retcon and retcon.once conventions assume that all spill uses can be sunk
2712	/// after the coro.begin intrinsic.
2713	static void sinkSpillUsesAfterCoroBegin(Function &F,
2714	const FrameDataInfo &FrameData,
2715	CoroBeginInst *CoroBegin) {
2716	DominatorTree Dom(F);
2717
2718	SmallSetVector<Instruction *, `32`> ToMove;
2719	SmallVector<Instruction *, `32`> Worklist;
2720
2721	// Collect all users that precede coro.begin.
2722	for (auto *Def : FrameData.getAllDefs()) {
2723	for (User *U : Def->users()) {
2724	auto Inst = cast<Instruction>(Val: U);
2725	if (Inst->getParent() != CoroBegin->getParent() \|\|
2726	Dom.dominates(Def: CoroBegin, User: Inst))
2727	continue;
2728	if (ToMove.insert(X: Inst))
2729	Worklist.push_back(Elt: Inst);
2730	}
2731	}
2732	// Recursively collect users before coro.begin.
2733	while (!Worklist.empty()) {
2734	auto *Def = Worklist.pop_back_val();
2735	for (User *U : Def->users()) {
2736	auto Inst = cast<Instruction>(Val: U);
2737	if (Dom.dominates(Def: CoroBegin, User: Inst))
2738	continue;
2739	if (ToMove.insert(X: Inst))
2740	Worklist.push_back(Elt: Inst);
2741	}
2742	}
2743
2744	// Sort by dominance.
2745	SmallVector<Instruction *, `64`> InsertionList(ToMove.begin(), ToMove.end());
2746	llvm::sort(C&: InsertionList, Comp: [&Dom](Instruction A, Instruction B) -> bool {
2747	// If a dominates b it should preceed (<) b.
2748	return Dom.dominates(Def: A, User: B);
2749	});
2750
2751	Instruction *InsertPt = CoroBegin->getNextNode();
2752	for (Instruction *Inst : InsertionList)
2753	Inst->moveBefore(MovePos: InsertPt);
2754	}
2755
2756	/// For each local variable that all of its user are only used inside one of
2757	/// suspended region, we sink their lifetime.start markers to the place where
2758	/// after the suspend block. Doing so minimizes the lifetime of each variable,
2759	/// hence minimizing the amount of data we end up putting on the frame.
2760	static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
2761	SuspendCrossingInfo &Checker,
2762	const DominatorTree &DT) {
2763	if (F.hasOptNone())
2764	return;
2765
2766	// Collect all possible basic blocks which may dominate all uses of allocas.
2767	SmallPtrSet<BasicBlock *, `4`> DomSet;
2768	DomSet.insert(Ptr: &F.getEntryBlock());
2769	for (auto *CSI : Shape.CoroSuspends) {
2770	BasicBlock *SuspendBlock = CSI->getParent();
2771	assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2772	"should have split coro.suspend into its own block");
2773	DomSet.insert(Ptr: SuspendBlock->getSingleSuccessor());
2774	}
2775
2776	for (Instruction &I : instructions(F)) {
2777	AllocaInst* AI = dyn_cast<AllocaInst>(Val: &I);
2778	if (!AI)
2779	continue;
2780
2781	for (BasicBlock *DomBB : DomSet) {
2782	bool Valid = true;
2783	SmallVector<Instruction *, `1`> Lifetimes;
2784
2785	auto isLifetimeStart = [](Instruction* I) {
2786	if (auto* II = dyn_cast<IntrinsicInst>(Val: I))
2787	return II->getIntrinsicID() == Intrinsic::lifetime_start;
2788	return false;
2789	};
2790
2791	auto collectLifetimeStart = [&](Instruction U, AllocaInst AI) {
2792	if (isLifetimeStart (U)) {
2793	Lifetimes.push_back(Elt: U);
2794	return true;
2795	}
2796	if (!U->hasOneUse() \|\| U->stripPointerCasts() != AI)
2797	return false;
2798	if (isLifetimeStart (U->user_back())) {
2799	Lifetimes.push_back(Elt: U->user_back());
2800	return true;
2801	}
2802	return false;
2803	};
2804
2805	for (User *U : AI->users()) {
2806	Instruction *UI = cast<Instruction>(Val: U);
2807	// For all users except lifetime.start markers, if they are all
2808	// dominated by one of the basic blocks and do not cross
2809	// suspend points as well, then there is no need to spill the
2810	// instruction.
2811	if (!DT.dominates(A: DomBB, B: UI->getParent()) \|\|
2812	Checker.isDefinitionAcrossSuspend(DefBB: DomBB, U: UI)) {
2813	// Skip lifetime.start, GEP and bitcast used by lifetime.start
2814	// markers.
2815	if (collectLifetimeStart (UI, AI))
2816	continue;
2817	Valid = false;
2818	break;
2819	}
2820	}
2821	// Sink lifetime.start markers to dominate block when they are
2822	// only used outside the region.
2823	if (Valid && Lifetimes.size() != `0`) {
2824	auto *NewLifetime = Lifetimes [`0`]->clone();
2825	NewLifetime->replaceUsesOfWith(From: NewLifetime->getOperand(i: `1`), To: AI);
2826	NewLifetime->insertBefore(InsertPos: DomBB->getTerminator());
2827
2828	// All the outsided lifetime.start markers are no longer necessary.
2829	for (Instruction *S : Lifetimes)
2830	S->eraseFromParent();
2831
2832	break;
2833	}
2834	}
2835	}
2836	}
2837
2838	static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
2839	const SuspendCrossingInfo &Checker,
2840	SmallVectorImpl<AllocaInfo> &Allocas,
2841	const DominatorTree &DT) {
2842	if (Shape.CoroSuspends.empty())
2843	return;
2844
2845	// The PromiseAlloca will be specially handled since it needs to be in a
2846	// fixed position in the frame.
2847	if (AI == Shape.SwitchLowering.PromiseAlloca)
2848	return;
2849
2850	// The __coro_gro alloca should outlive the promise, make sure we
2851	// keep it outside the frame.
2852	if (AI->hasMetadata(KindID: LLVMContext::MD_coro_outside_frame))
2853	return;
2854
2855	// The code that uses lifetime.start intrinsic does not work for functions
2856	// with loops without exit. Disable it on ABIs we know to generate such
2857	// code.
2858	bool ShouldUseLifetimeStartInfo =
2859	(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2860	Shape.ABI != coro::ABI::RetconOnce);
2861	AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker,
2862	ShouldUseLifetimeStartInfo};
2863	Visitor.visitPtr(I&: *AI);
2864	if (!Visitor.getShouldLiveOnFrame())
2865	return;
2866	Allocas.emplace_back(Args&: AI, Args: Visitor.getAliasesCopy(),
2867	Args: Visitor.getMayWriteBeforeCoroBegin());
2868	}
2869
2870	static std::optional<std::pair<Value &, DIExpression &>>
2871	salvageDebugInfoImpl(SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2872	bool OptimizeFrame, bool UseEntryValue, Function *F,
2873	Value Storage, DIExpression Expr,
2874	bool SkipOutermostLoad) {
2875	IRBuilder<> Builder(F->getContext());
2876	auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2877	while (isa<IntrinsicInst>(Val: InsertPt))
2878	++InsertPt;
2879	Builder.SetInsertPoint(TheBB: &F->getEntryBlock(), IP: InsertPt);
2880
2881	while (auto *Inst = dyn_cast_or_null<Instruction>(Val: Storage)) {
2882	if (auto *LdInst = dyn_cast<LoadInst>(Val: Inst)) {
2883	Storage = LdInst->getPointerOperand();
2884	// FIXME: This is a heuristic that works around the fact that
2885	// LLVM IR debug intrinsics cannot yet distinguish between
2886	// memory and value locations: Because a dbg.declare(alloca) is
2887	// implicitly a memory location no DW_OP_deref operation for the
2888	// last direct load from an alloca is necessary. This condition
2889	// effectively drops the last* DW_OP_deref in the expression.*
2890	if (!SkipOutermostLoad)
2891	Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2892	} else if (auto *StInst = dyn_cast<StoreInst>(Val: Inst)) {
2893	Storage = StInst->getValueOperand();
2894	} else {
2895	SmallVector<uint64_t, `16`> Ops;
2896	SmallVector<Value *, `0`> AdditionalValues;
2897	Value *Op = llvm::salvageDebugInfoImpl(
2898	I&: *Inst, CurrentLocOps: Expr ? Expr->getNumLocationOperands() : `0`, Ops,
2899	AdditionalValues);
2900	if (!Op \|\| !AdditionalValues.empty()) {
2901	// If salvaging failed or salvaging produced more than one location
2902	// operand, give up.
2903	break;
2904	}
2905	Storage = Op;
2906	Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: `0`, /StackValue/ false);
2907	}
2908	SkipOutermostLoad = false;
2909	}
2910	if (!Storage)
2911	return std::nullopt;
2912
2913	auto *StorageAsArg = dyn_cast<Argument>(Val: Storage);
2914	const bool IsSwiftAsyncArg =
2915	StorageAsArg && StorageAsArg->hasAttribute(Kind: Attribute::SwiftAsync);
2916
2917	// Swift async arguments are described by an entry value of the ABI-defined
2918	// register containing the coroutine context.
2919	// Entry values in variadic expressions are not supported.
2920	if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
2921	Expr->isSingleLocationExpression())
2922	Expr = DIExpression::prepend(Expr, Flags: DIExpression::EntryValue);
2923
2924	// If the coroutine frame is an Argument, store it in an alloca to improve
2925	// its availability (e.g. registers may be clobbered).
2926	// Avoid this if optimizations are enabled (they would remove the alloca) or
2927	// if the value is guaranteed to be available through other means (e.g. swift
2928	// ABI guarantees).
2929	if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
2930	auto &Cached = ArgToAllocaMap [StorageAsArg];
2931	if (!Cached) {
2932	Cached = Builder.CreateAlloca(Ty: Storage->getType(), AddrSpace: `0`, ArraySize: nullptr,
2933	Name: Storage->getName() + ".debug");
2934	Builder.CreateStore(Val: Storage, Ptr: Cached);
2935	}
2936	Storage = Cached;
2937	// FIXME: LLVM lacks nuanced semantics to differentiate between
2938	// memory and direct locations at the IR level. The backend will
2939	// turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2940	// location. Thus, if there are deref and offset operations in the
2941	// expression, we need to add a DW_OP_deref at the start* of the*
2942	// expression to first load the contents of the alloca before
2943	// adjusting it with the expression.
2944	Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2945	}
2946
2947	return {{Storage, Expr}};
2948	}
2949
2950	void coro::salvageDebugInfo(
2951	SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2952	DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool UseEntryValue) {
2953
2954	Function *F = DVI.getFunction();
2955	// Follow the pointer arithmetic all the way to the incoming
2956	// function argument and convert into a DIExpression.
2957	bool SkipOutermostLoad = !isa<DbgValueInst>(Val: DVI);
2958	Value *OriginalStorage = DVI.getVariableLocationOp(OpIdx: `0`);
2959
2960	auto SalvagedInfo = ::salvageDebugInfoImpl(
2961	ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
2962	Expr: DVI.getExpression(), SkipOutermostLoad);
2963	if (!SalvagedInfo)
2964	return;
2965
2966	Value *Storage = &SalvagedInfo ->first;
2967	DIExpression *Expr = &SalvagedInfo ->second;
2968
2969	DVI.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
2970	DVI.setExpression(Expr);
2971	// We only hoist dbg.declare today since it doesn't make sense to hoist
2972	// dbg.value since it does not have the same function wide guarantees that
2973	// dbg.declare does.
2974	if (isa<DbgDeclareInst>(Val: DVI)) {
2975	std::optional<BasicBlock::iterator> InsertPt;
2976	if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
2977	InsertPt = I->getInsertionPointAfterDef();
2978	// Update DILocation only if variable was not inlined.
2979	DebugLoc ILoc = I->getDebugLoc();
2980	DebugLoc DVILoc = DVI.getDebugLoc();
2981	if (ILoc && DVILoc &&
2982	DVILoc ->getScope()->getSubprogram() ==
2983	ILoc ->getScope()->getSubprogram())
2984	DVI.setDebugLoc(I->getDebugLoc());
2985	} else if (isa<Argument>(Val: Storage))
2986	InsertPt = F->getEntryBlock().begin();
2987	if (InsertPt)
2988	DVI.moveBefore(BB&: (InsertPt)->getParent(), I: *InsertPt);
2989	}
2990	}
2991
2992	void coro::salvageDebugInfo(
2993	SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2994	DbgVariableRecord &DVR, bool OptimizeFrame, bool UseEntryValue) {
2995
2996	Function *F = DVR.getFunction();
2997	// Follow the pointer arithmetic all the way to the incoming
2998	// function argument and convert into a DIExpression.
2999	bool SkipOutermostLoad = DVR.isDbgDeclare();
3000	Value *OriginalStorage = DVR.getVariableLocationOp(OpIdx: `0`);
3001
3002	auto SalvagedInfo = ::salvageDebugInfoImpl(
3003	ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
3004	Expr: DVR.getExpression(), SkipOutermostLoad);
3005	if (!SalvagedInfo)
3006	return;
3007
3008	Value *Storage = &SalvagedInfo ->first;
3009	DIExpression *Expr = &SalvagedInfo ->second;
3010
3011	DVR.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
3012	DVR.setExpression(Expr);
3013	// We only hoist dbg.declare today since it doesn't make sense to hoist
3014	// dbg.value since it does not have the same function wide guarantees that
3015	// dbg.declare does.
3016	if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
3017	std::optional<BasicBlock::iterator> InsertPt;
3018	if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
3019	InsertPt = I->getInsertionPointAfterDef();
3020	// Update DILocation only if variable was not inlined.
3021	DebugLoc ILoc = I->getDebugLoc();
3022	DebugLoc DVRLoc = DVR.getDebugLoc();
3023	if (ILoc && DVRLoc &&
3024	DVRLoc ->getScope()->getSubprogram() ==
3025	ILoc ->getScope()->getSubprogram())
3026	DVR.setDebugLoc(ILoc);
3027	} else if (isa<Argument>(Val: Storage))
3028	InsertPt = F->getEntryBlock().begin();
3029	if (InsertPt) {
3030	DVR.removeFromParent();
3031	(InsertPt)->getParent()->insertDbgRecordBefore(DR: &DVR, Here: InsertPt);
3032	}
3033	}
3034	}
3035
3036	static void doRematerializations(
3037	Function &F, SuspendCrossingInfo &Checker,
3038	const std::function<bool(Instruction &)> &MaterializableCallback) {
3039	if (F.hasOptNone())
3040	return;
3041
3042	SpillInfo Spills;
3043
3044	// See if there are materializable instructions across suspend points
3045	// We record these as the starting point to also identify materializable
3046	// defs of uses in these operations
3047	for (Instruction &I : instructions(F)) {
3048	if (!MaterializableCallback (I))
3049	continue;
3050	for (User *U : I.users())
3051	if (Checker.isDefinitionAcrossSuspend(I, U))
3052	Spills [&I].push_back(Elt: cast<Instruction>(Val: U));
3053	}
3054
3055	// Process each of the identified rematerializable instructions
3056	// and add predecessor instructions that can also be rematerialized.
3057	// This is actually a graph of instructions since we could potentially
3058	// have multiple uses of a def in the set of predecessor instructions.
3059	// The approach here is to maintain a graph of instructions for each bottom
3060	// level instruction - where we have a unique set of instructions (nodes)
3061	// and edges between them. We then walk the graph in reverse post-dominator
3062	// order to insert them past the suspend point, but ensure that ordering is
3063	// correct. We also rely on CSE removing duplicate defs for remats of
3064	// different instructions with a def in common (rather than maintaining more
3065	// complex graphs for each suspend point)
3066
3067	// We can do this by adding new nodes to the list for each suspend
3068	// point. Then using standard GraphTraits to give a reverse post-order
3069	// traversal when we insert the nodes after the suspend
3070	SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`> AllRemats;
3071	for (auto &E : Spills) {
3072	for (Instruction *U : E.second) {
3073	// Don't process a user twice (this can happen if the instruction uses
3074	// more than one rematerializable def)
3075	if (AllRemats.count(Key: U))
3076	continue;
3077
3078	// Constructor creates the whole RematGraph for the given Use
3079	auto RematUPtr =
3080	std::make_unique<RematGraph>(args: MaterializableCallback, args&: U, args&: Checker);
3081
3082	LLVM_DEBUG(dbgs() << "*** Next remat group ***\n";
3083	ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
3084	for (auto I = RPOT.begin(); I != RPOT.end();
3085	++I) { (*I)->Node->dump(); } dbgs()
3086	<< "\n";);
3087
3088	AllRemats [U] = std::move(RematUPtr);
3089	}
3090	}
3091
3092	// Rewrite materializable instructions to be materialized at the use
3093	// point.
3094	LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
3095	rewriteMaterializableInstructions(AllRemats);
3096	}
3097
3098	void coro::buildCoroutineFrame(
3099	Function &F, Shape &Shape, TargetTransformInfo &TTI,
3100	const std::function<bool(Instruction &)> &MaterializableCallback) {
3101	// Don't eliminate swifterror in async functions that won't be split.
3102	if (Shape.ABI != coro::ABI::Async \|\| !Shape.CoroSuspends.empty())
3103	eliminateSwiftError(F, Shape);
3104
3105	if (Shape.ABI == coro::ABI::Switch &&
3106	Shape.SwitchLowering.PromiseAlloca) {
3107	Shape.getSwitchCoroId()->clearPromise();
3108	}
3109
3110	// Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3111	// intrinsics are in their own blocks to simplify the logic of building up
3112	// SuspendCrossing data.
3113	for (auto *CSI : Shape.CoroSuspends) {
3114	if (auto *Save = CSI->getCoroSave())
3115	splitAround(I: Save, Name: "CoroSave");
3116	splitAround(I: CSI, Name: "CoroSuspend");
3117	}
3118
3119	// Put CoroEnds into their own blocks.
3120	for (AnyCoroEndInst *CE : Shape.CoroEnds) {
3121	splitAround(I: CE, Name: "CoroEnd");
3122
3123	// Emit the musttail call function in a new block before the CoroEnd.
3124	// We do this here so that the right suspend crossing info is computed for
3125	// the uses of the musttail call function call. (Arguments to the coro.end
3126	// instructions would be ignored)
3127	if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(Val: CE)) {
3128	auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
3129	if (!MustTailCallFn)
3130	continue;
3131	IRBuilder<> Builder(AsyncEnd);
3132	SmallVector<Value *, `8`> Args(AsyncEnd->args());
3133	auto Arguments = ArrayRef<Value *>(Args).drop_front(N: `3`);
3134	auto *Call = createMustTailCall(Loc: AsyncEnd->getDebugLoc(), MustTailCallFn,
3135	TTI, Arguments, Builder);
3136	splitAround(I: Call, Name: "MustTailCall.Before.CoroEnd");
3137	}
3138	}
3139
3140	// Later code makes structural assumptions about single predecessors phis e.g
3141	// that they are not live across a suspend point.
3142	cleanupSinglePredPHIs(F);
3143
3144	// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3145	// never has its definition separated from the PHI by the suspend point.
3146	rewritePHIs(F);
3147
3148	// Build suspend crossing info.
3149	SuspendCrossingInfo Checker(F, Shape);
3150
3151	doRematerializations(F, Checker, MaterializableCallback);
3152
3153	const DominatorTree DT(F);
3154	FrameDataInfo FrameData;
3155	SmallVector<CoroAllocaAllocInst*, `4`> LocalAllocas;
3156	SmallVector<Instruction*, `4`> DeadInstructions;
3157	if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
3158	Shape.ABI != coro::ABI::RetconOnce)
3159	sinkLifetimeStartMarkers(F, Shape, Checker, DT);
3160
3161	// Collect the spills for arguments and other not-materializable values.
3162	for (Argument &A : F.args())
3163	for (User *U : A.users())
3164	if (Checker.isDefinitionAcrossSuspend(A, U))
3165	FrameData.Spills [&A].push_back(Elt: cast<Instruction>(Val: U));
3166
3167	for (Instruction &I : instructions(F)) {
3168	// Values returned from coroutine structure intrinsics should not be part
3169	// of the Coroutine Frame.
3170	if (isCoroutineStructureIntrinsic(I) \|\| &I == Shape.CoroBegin)
3171	continue;
3172
3173	// Handle alloca.alloc specially here.
3174	if (auto AI = dyn_cast<CoroAllocaAllocInst>(Val: &I)) {
3175	// Check whether the alloca's lifetime is bounded by suspend points.
3176	if (isLocalAlloca(AI)) {
3177	LocalAllocas.push_back(Elt: AI);
3178	continue;
3179	}
3180
3181	// If not, do a quick rewrite of the alloca and then add spills of
3182	// the rewritten value. The rewrite doesn't invalidate anything in
3183	// Spills because the other alloca intrinsics have no other operands
3184	// besides AI, and it doesn't invalidate the iteration because we delay
3185	// erasing AI.
3186	auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInsts&: DeadInstructions);
3187
3188	for (User *U : Alloc->users()) {
3189	if (Checker.isDefinitionAcrossSuspend(I&: *Alloc, U))
3190	FrameData.Spills [Alloc].push_back(Elt: cast<Instruction>(Val: U));
3191	}
3192	continue;
3193	}
3194
3195	// Ignore alloca.get; we process this as part of coro.alloca.alloc.
3196	if (isa<CoroAllocaGetInst>(Val: I))
3197	continue;
3198
3199	if (auto *AI = dyn_cast<AllocaInst>(Val: &I)) {
3200	collectFrameAlloca(AI, Shape, Checker, Allocas&: FrameData.Allocas, DT);
3201	continue;
3202	}
3203
3204	for (User *U : I.users())
3205	if (Checker.isDefinitionAcrossSuspend(I, U)) {
3206	// We cannot spill a token.
3207	if (I.getType()->isTokenTy())
3208	report_fatal_error(
3209	reason: "token definition is separated from the use by a suspend point");
3210	FrameData.Spills [&I].push_back(Elt: cast<Instruction>(Val: U));
3211	}
3212	}
3213
3214	LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
3215
3216	// We don't want the layout of coroutine frame to be affected
3217	// by debug information. So we only choose to salvage DbgValueInst for
3218	// whose value is already in the frame.
3219	// We would handle the dbg.values for allocas specially
3220	for (auto &Iter : FrameData.Spills) {
3221	auto *V = Iter.first;
3222	SmallVector<DbgValueInst *, `16`> DVIs;
3223	SmallVector<DbgVariableRecord *, `16`> DVRs;
3224	findDbgValues(DbgValues&: DVIs, V, DbgVariableRecords: &DVRs);
3225	for (DbgValueInst *DVI : DVIs)
3226	if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVI))
3227	FrameData.Spills [V].push_back(Elt: DVI);
3228	// Add the instructions which carry debug info that is in the frame.
3229	for (DbgVariableRecord *DVR : DVRs)
3230	if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVR->Marker->MarkedInstr))
3231	FrameData.Spills [V].push_back(Elt: DVR->Marker->MarkedInstr);
3232	}
3233
3234	LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
3235	if (Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce \|\|
3236	Shape.ABI == coro::ABI::Async)
3237	sinkSpillUsesAfterCoroBegin(F, FrameData, CoroBegin: Shape.CoroBegin);
3238	Shape.FrameTy = buildFrameType(F, Shape, FrameData);
3239	Shape.FramePtr = Shape.CoroBegin;
3240	// For now, this works for C++ programs only.
3241	buildFrameDebugInfo(F, Shape, FrameData);
3242	insertSpills(FrameData, Shape);
3243	lowerLocalAllocas(LocalAllocas, DeadInsts&: DeadInstructions);
3244
3245	for (auto *I : DeadInstructions)
3246	I->eraseFromParent();
3247	}
3248

Browse the source code of llvm_projects/llvm/lib/Transforms/Coroutines/CoroFrame.cpp