CoroElide.cpp source code [llvm_projects/llvm/lib/Transforms/Coroutines/CoroElide.cpp]

1	//===- CoroElide.cpp - Coroutine Frame Allocation Elision Pass ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "llvm/Transforms/Coroutines/CoroElide.h"
10	#include "CoroInternal.h"
11	#include "llvm/ADT/DenseMap.h"
12	#include "llvm/ADT/Statistic.h"
13	#include "llvm/Analysis/AliasAnalysis.h"
14	#include "llvm/Analysis/InstructionSimplify.h"
15	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
16	#include "llvm/IR/Dominators.h"
17	#include "llvm/IR/InstIterator.h"
18	#include "llvm/Support/ErrorHandling.h"
19	#include "llvm/Support/FileSystem.h"
20	#include <optional>
21
22	using namespace llvm;
23
24	#define DEBUG_TYPE "coro-elide"
25
26	STATISTIC(NumOfCoroElided, "The # of coroutine get elided.");
27
28	#ifndef NDEBUG
29	static cl::opt<std::string> CoroElideInfoOutputFilename(
30	"coro-elide-info-output-file", cl::value_desc("filename"),
31	cl::desc("File to record the coroutines got elided"), cl::Hidden);
32	#endif
33
34	namespace {
35	// Created on demand if the coro-elide pass has work to do.
36	class FunctionElideInfo {
37	public:
38	FunctionElideInfo(Function *F) : ContainingFunction(F) {
39	this->collectPostSplitCoroIds();
40	}
41
42	bool hasCoroIds() const { return !CoroIds.empty(); }
43
44	const SmallVectorImpl<CoroIdInst > &getCoroIds() const* { return CoroIds; }
45
46	private:
47	Function *ContainingFunction;
48	SmallVector<CoroIdInst *, `4`> CoroIds;
49	// Used in canCoroBeginEscape to distinguish coro.suspend switchs.
50	SmallPtrSet<const SwitchInst *, `4`> CoroSuspendSwitches;
51
52	void collectPostSplitCoroIds();
53	friend class CoroIdElider;
54	};
55
56	class CoroIdElider {
57	public:
58	CoroIdElider(CoroIdInst *CoroId, FunctionElideInfo &FEI, AAResults &AA,
59	DominatorTree &DT, OptimizationRemarkEmitter &ORE);
60	void elideHeapAllocations(uint64_t FrameSize, Align FrameAlign);
61	bool lifetimeEligibleForElide() const;
62	bool attemptElide();
63	bool canCoroBeginEscape(const CoroBeginInst *,
64	const SmallPtrSetImpl<BasicBlock > &) const*;
65
66	private:
67	CoroIdInst *CoroId;
68	FunctionElideInfo &FEI;
69	AAResults &AA;
70	DominatorTree &DT;
71	OptimizationRemarkEmitter &ORE;
72
73	SmallVector<CoroBeginInst *, `1`> CoroBegins;
74	SmallVector<CoroAllocInst *, `1`> CoroAllocs;
75	SmallVector<CoroSubFnInst *, `4`> ResumeAddr;
76	DenseMap<CoroBeginInst , SmallVector<CoroSubFnInst , `4`>> DestroyAddr;
77	};
78	} // end anonymous namespace
79
80	// Go through the list of coro.subfn.addr intrinsics and replace them with the
81	// provided constant.
82	static void replaceWithConstant(Constant *Value,
83	SmallVectorImpl<CoroSubFnInst *> &Users) {
84	for (CoroSubFnInst *I : Users)
85	replaceAndRecursivelySimplify(I, SimpleV: Value);
86	}
87
88	// See if any operand of the call instruction references the coroutine frame.
89	static bool operandReferences(CallInst CI, AllocaInst Frame, AAResults &AA) {
90	for (Value *Op : CI->operand_values())
91	if (Op->getType()->isPointerTy() && !AA.isNoAlias(V1: Op, V2: Frame))
92	return true;
93	return false;
94	}
95
96	// Look for any tail calls referencing the coroutine frame and remove tail
97	// attribute from them, since now coroutine frame resides on the stack and tail
98	// call implies that the function does not references anything on the stack.
99	// However if it's a musttail call, we cannot remove the tailcall attribute.
100	// It's safe to keep it there as the musttail call is for symmetric transfer,
101	// and by that point the frame should have been destroyed and hence not
102	// interfering with operands.
103	static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
104	Function &F = *Frame->getFunction();
105	for (Instruction &I : instructions(F))
106	if (auto *Call = dyn_cast<CallInst>(Val: &I))
107	if (Call->isTailCall() && operandReferences(CI: Call, Frame, AA) &&
108	!Call->isMustTailCall())
109	Call->setTailCall(false);
110	}
111
112	// Given a resume function @f.resume(%f.frame %frame), returns the size*
113	// and expected alignment of %f.frame type.
114	static std::optional<std::pair<uint64_t, Align>>
115	getFrameLayout(Function *Resume) {
116	// Pull information from the function attributes.
117	auto Size = Resume->getParamDereferenceableBytes(ArgNo: `0`);
118	if (!Size)
119	return std::nullopt;
120	return std::make_pair(x&: Size, y: Resume->getParamAlign(ArgNo: `0`).valueOrOne());
121	}
122
123	// Finds first non alloca instruction in the entry block of a function.
124	static Instruction getFirstNonAllocaInTheEntryBlock(Function F) {
125	for (Instruction &I : F->getEntryBlock())
126	if (!isa<AllocaInst>(Val: &I))
127	return &I;
128	llvm_unreachable("no terminator in the entry block");
129	}
130
131	#ifndef NDEBUG
132	static std::unique_ptr<raw_fd_ostream> getOrCreateLogFile() {
133	assert(!CoroElideInfoOutputFilename.empty() &&
134	"coro-elide-info-output-file shouldn't be empty");
135	std::error_code EC;
136	auto Result = std::make_unique<raw_fd_ostream>(CoroElideInfoOutputFilename,
137	EC, sys::fs::OF_Append);
138	if (!EC)
139	return Result;
140	llvm::errs() << "Error opening coro-elide-info-output-file '"
141	<< CoroElideInfoOutputFilename << " for appending!\n";
142	return std::make_unique<raw_fd_ostream>(`2`, false); // stderr.
143	}
144	#endif
145
146	void FunctionElideInfo::collectPostSplitCoroIds() {
147	for (auto &I : instructions(F: this->ContainingFunction)) {
148	if (auto *CII = dyn_cast<CoroIdInst>(Val: &I))
149	if (CII->getInfo().isPostSplit())
150	// If it is the coroutine itself, don't touch it.
151	if (CII->getCoroutine() != CII->getFunction())
152	CoroIds.push_back(Elt: CII);
153
154	// Consider case like:
155	// %0 = call i8 @llvm.coro.suspend(...)
156	// switch i8 %0, label %suspend [i8 0, label %resume
157	// i8 1, label %cleanup]
158	// and collect the SwitchInsts which are used by escape analysis later.
159	if (auto *CSI = dyn_cast<CoroSuspendInst>(Val: &I))
160	if (CSI->hasOneUse() && isa<SwitchInst>(Val: CSI->use_begin()->getUser())) {
161	SwitchInst *SWI = cast<SwitchInst>(Val: CSI->use_begin()->getUser());
162	if (SWI->getNumCases() == `2`)
163	CoroSuspendSwitches.insert(Ptr: SWI);
164	}
165	}
166	}
167
168	CoroIdElider::CoroIdElider(CoroIdInst *CoroId, FunctionElideInfo &FEI,
169	AAResults &AA, DominatorTree &DT,
170	OptimizationRemarkEmitter &ORE)
171	: CoroId(CoroId), FEI(FEI), AA(AA), DT(DT), ORE(ORE) {
172	// Collect all coro.begin and coro.allocs associated with this coro.id.
173	for (User *U : CoroId->users()) {
174	if (auto *CB = dyn_cast<CoroBeginInst>(Val: U))
175	CoroBegins.push_back(Elt: CB);
176	else if (auto *CA = dyn_cast<CoroAllocInst>(Val: U))
177	CoroAllocs.push_back(Elt: CA);
178	}
179
180	// Collect all coro.subfn.addrs associated with coro.begin.
181	// Note, we only devirtualize the calls if their coro.subfn.addr refers to
182	// coro.begin directly. If we run into cases where this check is too
183	// conservative, we can consider relaxing the check.
184	for (CoroBeginInst *CB : CoroBegins) {
185	for (User *U : CB->users())
186	if (auto *II = dyn_cast<CoroSubFnInst>(Val: U))
187	switch (II->getIndex()) {
188	case CoroSubFnInst::ResumeIndex:
189	ResumeAddr.push_back(Elt: II);
190	break;
191	case CoroSubFnInst::DestroyIndex:
192	DestroyAddr [CB].push_back(Elt: II);
193	break;
194	default:
195	llvm_unreachable("unexpected coro.subfn.addr constant");
196	}
197	}
198	}
199
200	// To elide heap allocations we need to suppress code blocks guarded by
201	// llvm.coro.alloc and llvm.coro.free instructions.
202	void CoroIdElider::elideHeapAllocations(uint64_t FrameSize, Align FrameAlign) {
203	LLVMContext &C = FEI.ContainingFunction->getContext();
204	BasicBlock::iterator InsertPt =
205	getFirstNonAllocaInTheEntryBlock(F: FEI.ContainingFunction)->getIterator();
206
207	// Replacing llvm.coro.alloc with false will suppress dynamic
208	// allocation as it is expected for the frontend to generate the code that
209	// looks like:
210	// id = coro.id(...)
211	// mem = coro.alloc(id) ? malloc(coro.size()) : 0;
212	// coro.begin(id, mem)
213	auto *False = ConstantInt::getFalse(Context&: C);
214	for (auto *CA : CoroAllocs) {
215	CA->replaceAllUsesWith(V: False);
216	CA->eraseFromParent();
217	}
218
219	// FIXME: Design how to transmit alignment information for every alloca that
220	// is spilled into the coroutine frame and recreate the alignment information
221	// here. Possibly we will need to do a mini SROA here and break the coroutine
222	// frame into individual AllocaInst recreating the original alignment.
223	const DataLayout &DL = FEI.ContainingFunction->getDataLayout();
224	auto FrameTy = ArrayType::get(ElementType: Type::getInt8Ty(C), NumElements: FrameSize);
225	auto Frame = new* AllocaInst (FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
226	Frame->setAlignment(FrameAlign);
227	auto *FrameVoidPtr =
228	new BitCastInst (Frame, PointerType::getUnqual(C), "vFrame", InsertPt);
229
230	for (auto *CB : CoroBegins) {
231	CB->replaceAllUsesWith(V: FrameVoidPtr);
232	CB->eraseFromParent();
233	}
234
235	// Since now coroutine frame lives on the stack we need to make sure that
236	// any tail call referencing it, must be made non-tail call.
237	removeTailCallAttribute(Frame, AA);
238	}
239
240	bool CoroIdElider::canCoroBeginEscape(
241	const CoroBeginInst CB, const* SmallPtrSetImpl<BasicBlock > &TIs) const* {
242	const auto &It = DestroyAddr.find(Val: CB);
243	assert(It != DestroyAddr.end());
244
245	// Limit the number of blocks we visit.
246	unsigned Limit = `32` * (`1` + It ->second.size());
247
248	SmallVector<const BasicBlock *, `32`> Worklist;
249	Worklist.push_back(Elt: CB->getParent());
250
251	SmallPtrSet<const BasicBlock *, `32`> Visited;
252	// Consider basicblock of coro.destroy as visited one, so that we
253	// skip the path pass through coro.destroy.
254	for (auto *DA : It ->second)
255	Visited.insert(Ptr: DA->getParent());
256
257	SmallPtrSet<const BasicBlock *, `32`> EscapingBBs;
258	for (auto *U : CB->users()) {
259	// The use from coroutine intrinsics are not a problem.
260	if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(Val: U))
261	continue;
262
263	// Think all other usages may be an escaping candidate conservatively.
264	//
265	// Note that the major user of switch ABI coroutine (the C++) will store
266	// resume.fn, destroy.fn and the index to the coroutine frame immediately.
267	// So the parent of the coro.begin in C++ will be always escaping.
268	// Then we can't get any performance benefits for C++ by improving the
269	// precision of the method.
270	//
271	// The reason why we still judge it is we want to make LLVM Coroutine in
272	// switch ABIs to be self contained as much as possible instead of a
273	// by-product of C++20 Coroutines.
274	EscapingBBs.insert(Ptr: cast<Instruction>(Val: U)->getParent());
275	}
276
277	bool PotentiallyEscaped = false;
278
279	do {
280	const auto *BB = Worklist.pop_back_val();
281	if (!Visited.insert(Ptr: BB).second)
282	continue;
283
284	// A Path insensitive marker to test whether the coro.begin escapes.
285	// It is intentional to make it path insensitive while it may not be
286	// precise since we don't want the process to be too slow.
287	PotentiallyEscaped \|= EscapingBBs.count(Ptr: BB);
288
289	if (TIs.count(Ptr: BB)) {
290	if (isa<ReturnInst>(Val: BB->getTerminator()) \|\| PotentiallyEscaped)
291	return true;
292
293	// If the function ends with the exceptional terminator, the memory used
294	// by the coroutine frame can be released by stack unwinding
295	// automatically. So we can think the coro.begin doesn't escape if it
296	// exits the function by exceptional terminator.
297
298	continue;
299	}
300
301	// Conservatively say that there is potentially a path.
302	if (!--Limit)
303	return true;
304
305	auto TI = BB->getTerminator();
306	// Although the default dest of coro.suspend switches is suspend pointer
307	// which means a escape path to normal terminator, it is reasonable to skip
308	// it since coroutine frame doesn't change outside the coroutine body.
309	if (isa<SwitchInst>(Val: TI) &&
310	FEI.CoroSuspendSwitches.count(Ptr: cast<SwitchInst>(Val: TI))) {
311	Worklist.push_back(Elt: cast<SwitchInst>(Val: TI)->getSuccessor(idx: `1`));
312	Worklist.push_back(Elt: cast<SwitchInst>(Val: TI)->getSuccessor(idx: `2`));
313	} else
314	Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
315
316	} while (!Worklist.empty());
317
318	// We have exhausted all possible paths and are certain that coro.begin can
319	// not reach to any of terminators.
320	return false;
321	}
322
323	bool CoroIdElider::lifetimeEligibleForElide() const {
324	// If no CoroAllocs, we cannot suppress allocation, so elision is not
325	// possible.
326	if (CoroAllocs.empty())
327	return false;
328
329	// Check that for every coro.begin there is at least one coro.destroy directly
330	// referencing the SSA value of that coro.begin along each
331	// non-exceptional path.
332	//
333	// If the value escaped, then coro.destroy would have been referencing a
334	// memory location storing that value and not the virtual register.
335
336	SmallPtrSet<BasicBlock *, `8`> Terminators;
337	// First gather all of the terminators for the function.
338	// Consider the final coro.suspend as the real terminator when the current
339	// function is a coroutine.
340	for (BasicBlock &B : *FEI.ContainingFunction) {
341	auto *TI = B.getTerminator();
342
343	if (TI->getNumSuccessors() != `0` \|\| isa<UnreachableInst>(Val: TI))
344	continue;
345
346	Terminators.insert(Ptr: &B);
347	}
348
349	// Filter out the coro.destroy that lie along exceptional paths.
350	for (const auto *CB : CoroBegins) {
351	auto It = DestroyAddr.find(Val: CB);
352
353	// FIXME: If we have not found any destroys for this coro.begin, we
354	// disqualify this elide.
355	if (It == DestroyAddr.end())
356	return false;
357
358	const auto &CorrespondingDestroyAddrs = It ->second;
359
360	// If every terminators is dominated by coro.destroy, we could know the
361	// corresponding coro.begin wouldn't escape.
362	auto DominatesTerminator = [&](auto *TI) {
363	return llvm::any_of(CorrespondingDestroyAddrs, [&](auto *Destroy) {
364	return DT.dominates(Destroy, TI->getTerminator());
365	});
366	};
367
368	if (llvm::all_of(Range&: Terminators, P: DominatesTerminator))
369	continue;
370
371	// Otherwise canCoroBeginEscape would decide whether there is any paths from
372	// coro.begin to Terminators which not pass through any of the
373	// coro.destroys. This is a slower analysis.
374	//
375	// canCoroBeginEscape is relatively slow, so we avoid to run it as much as
376	// possible.
377	if (canCoroBeginEscape(CB, TIs: Terminators))
378	return false;
379	}
380
381	// We have checked all CoroBegins and their paths to the terminators without
382	// finding disqualifying code patterns, so we can perform heap allocations.
383	return true;
384	}
385
386	bool CoroIdElider::attemptElide() {
387	// PostSplit coro.id refers to an array of subfunctions in its Info
388	// argument.
389	ConstantArray *Resumers = CoroId->getInfo().Resumers;
390	assert(Resumers && "PostSplit coro.id Info argument must refer to an array"
391	"of coroutine subfunctions");
392	auto *ResumeAddrConstant =
393	Resumers->getAggregateElement(Elt: CoroSubFnInst::ResumeIndex);
394
395	replaceWithConstant(Value: ResumeAddrConstant, Users&: ResumeAddr);
396
397	bool EligibleForElide = lifetimeEligibleForElide();
398
399	auto *DestroyAddrConstant = Resumers->getAggregateElement(
400	Elt: EligibleForElide ? CoroSubFnInst::CleanupIndex
401	: CoroSubFnInst::DestroyIndex);
402
403	for (auto &It : DestroyAddr)
404	replaceWithConstant(Value: DestroyAddrConstant, Users&: It.second);
405
406	auto FrameSizeAndAlign = getFrameLayout(Resume: cast<Function>(Val: ResumeAddrConstant));
407
408	auto CallerFunctionName = FEI.ContainingFunction->getName();
409	auto CalleeCoroutineName = CoroId->getCoroutine()->getName();
410
411	if (EligibleForElide && FrameSizeAndAlign) {
412	elideHeapAllocations(FrameSize: FrameSizeAndAlign ->first, FrameAlign: FrameSizeAndAlign ->second);
413	coro::replaceCoroFree(CoroId, /Elide=/true);
414	NumOfCoroElided ++;
415
416	#ifndef NDEBUG
417	if (!CoroElideInfoOutputFilename.empty())
418	*getOrCreateLogFile() << "Elide " << CalleeCoroutineName << " in "
419	<< FEI.ContainingFunction->getName() << "\n";
420	#endif
421
422	ORE.emit(RemarkBuilder: [&]() {
423	return OptimizationRemark (DEBUG_TYPE, "CoroElide", CoroId)
424	<< "'" << ore::NV ("callee", CalleeCoroutineName)
425	<< "' elided in '" << ore::NV ("caller", CallerFunctionName)
426	<< "' (frame_size="
427	<< ore::NV ("frame_size", FrameSizeAndAlign ->first) << ", align="
428	<< ore::NV ("align", FrameSizeAndAlign ->second.value()) << ")";
429	});
430	} else {
431	ORE.emit(RemarkBuilder: [&]() {
432	auto Remark = OptimizationRemarkMissed (DEBUG_TYPE, "CoroElide", CoroId)
433	<< "'" << ore::NV ("callee", CalleeCoroutineName)
434	<< "' not elided in '"
435	<< ore::NV ("caller", CallerFunctionName);
436
437	if (FrameSizeAndAlign)
438	return Remark << "' (frame_size="
439	<< ore::NV ("frame_size", FrameSizeAndAlign ->first)
440	<< ", align="
441	<< ore::NV ("align", FrameSizeAndAlign ->second.value())
442	<< ")";
443	else
444	return Remark << "' (frame_size=unknown, align=unknown)";
445	});
446	}
447
448	return true;
449	}
450
451	PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
452	auto &M = *F.getParent();
453	if (!coro::declaresIntrinsics(M, List: Intrinsic::coro_id))
454	return PreservedAnalyses::all();
455
456	FunctionElideInfo FEI{&F};
457	// Elide is not necessary if there's no coro.id within the function.
458	if (!FEI.hasCoroIds())
459	return PreservedAnalyses::all();
460
461	AAResults &AA = AM.getResult<AAManager>(IR&: F);
462	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
463	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
464
465	bool Changed = false;
466	for (auto *CII : FEI.getCoroIds()) {
467	CoroIdElider CIE(CII, FEI, AA, DT, ORE);
468	Changed \|= CIE.attemptElide();
469	}
470
471	return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
472	}
473

Browse the source code of llvm_projects/llvm/lib/Transforms/Coroutines/CoroElide.cpp