WebAssemblyFixIrreducibleControlFlow.cpp source code [llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp]

1	//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements a pass that removes irreducible control flow.
11	/// Irreducible control flow means multiple-entry loops, which this pass
12	/// transforms to have a single entry.
13	///
14	/// Note that LLVM has a generic pass that lowers irreducible control flow, but
15	/// it linearizes control flow, turning diamonds into two triangles, which is
16	/// both unnecessary and undesirable for WebAssembly.
17	///
18	/// The big picture: We recursively process each "region", defined as a group
19	/// of blocks with a single entry and no branches back to that entry. A region
20	/// may be the entire function body, or the inner part of a loop, i.e., the
21	/// loop's body without branches back to the loop entry. In each region we fix
22	/// up multi-entry loops by adding a new block that can dispatch to each of the
23	/// loop entries, based on the value of a label "helper" variable, and we
24	/// replace direct branches to the entries with assignments to the label
25	/// variable and a branch to the dispatch block. Then the dispatch block is the
26	/// single entry in the loop containing the previous multiple entries. After
27	/// ensuring all the loops in a region are reducible, we recurse into them. The
28	/// total time complexity of this pass is:
29	///
30	/// O(NumBlocks NumNestedLoops * NumIrreducibleLoops +*
31	/// NumLoops NumLoops)*
32	///
33	/// This pass is similar to what the Relooper [1] does. Both identify looping
34	/// code that requires multiple entries, and resolve it in a similar way (in
35	/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36	/// also that like the Relooper, we implement a "minimal" intervention: we only
37	/// use the "label" helper for the blocks we absolutely must and no others. We
38	/// also prioritize code size and do not duplicate code in order to resolve
39	/// irreducibility. The graph algorithms for finding loops and entries and so
40	/// forth are also similar to the Relooper. The main differences between this
41	/// pass and the Relooper are:
42	///
43	/// We just care about irreducibility, so we just look at loops.*
44	/// The Relooper emits structured control flow (with ifs etc.), while we*
45	/// emit a CFG.
46	///
47	/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48	/// Proceedings of the ACM international conference companion on Object oriented
49	/// programming systems languages and applications companion (SPLASH '11). ACM,
50	/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51	/// http://doi.acm.org/10.1145/2048147.2048224
52	///
53	//===----------------------------------------------------------------------===//
54
55	#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56	#include "WebAssembly.h"
57	#include "WebAssemblySubtarget.h"
58	#include "llvm/CodeGen/MachineFunctionPass.h"
59	#include "llvm/CodeGen/MachineInstrBuilder.h"
60	#include "llvm/Support/Debug.h"
61	using namespace llvm;
62
63	#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64
65	namespace {
66
67	using BlockVector = SmallVector<MachineBasicBlock *, `4`>;
68	using BlockSet = SmallPtrSet<MachineBasicBlock *, `4`>;
69
70	static BlockVector getSortedEntries(const BlockSet &Entries) {
71	BlockVector SortedEntries(Entries.begin(), Entries.end());
72	llvm::sort(C&: SortedEntries,
73	Comp: [](const MachineBasicBlock A, const* MachineBasicBlock *B) {
74	auto ANum = A->getNumber();
75	auto BNum = B->getNumber();
76	return ANum < BNum;
77	});
78	return SortedEntries;
79	}
80
81	// Calculates reachability in a region. Ignores branches to blocks outside of
82	// the region, and ignores branches to the region entry (for the case where
83	// the region is the inner part of a loop).
84	class ReachabilityGraph {
85	public:
86	ReachabilityGraph(MachineBasicBlock Entry, const* BlockSet &Blocks)
87	: Entry(Entry), Blocks(Blocks) {
88	#ifndef NDEBUG
89	// The region must have a single entry.
90	for (auto *MBB : Blocks) {
91	if (MBB != Entry) {
92	for (auto *Pred : MBB->predecessors()) {
93	assert(inRegion(Pred));
94	}
95	}
96	}
97	#endif
98	calculate();
99	}
100
101	bool canReach(MachineBasicBlock From, MachineBasicBlock To) const {
102	assert(inRegion(From) && inRegion(To));
103	auto I = Reachable.find(Val: From);
104	if (I == Reachable.end())
105	return false;
106	return I ->second.count(Ptr: To);
107	}
108
109	// "Loopers" are blocks that are in a loop. We detect these by finding blocks
110	// that can reach themselves.
111	const BlockSet &getLoopers() const { return Loopers; }
112
113	// Get all blocks that are loop entries.
114	const BlockSet &getLoopEntries() const { return LoopEntries; }
115
116	// Get all blocks that enter a particular loop from outside.
117	const BlockSet &getLoopEnterers(MachineBasicBlock LoopEntry) const* {
118	assert(inRegion(LoopEntry));
119	auto I = LoopEnterers.find(Val: LoopEntry);
120	assert(I != LoopEnterers.end());
121	return I ->second;
122	}
123
124	private:
125	MachineBasicBlock *Entry;
126	const BlockSet &Blocks;
127
128	BlockSet Loopers, LoopEntries;
129	DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130
131	bool inRegion(MachineBasicBlock MBB) const* { return Blocks.count(Ptr: MBB); }
132
133	// Maps a block to all the other blocks it can reach.
134	DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135
136	void calculate() {
137	// Reachability computation work list. Contains pairs of recent additions
138	// (A, B) where we just added a link A => B.
139	using BlockPair = std::pair<MachineBasicBlock , MachineBasicBlock >;
140	SmallVector<BlockPair, `4`> WorkList;
141
142	// Add all relevant direct branches.
143	for (auto *MBB : Blocks) {
144	for (auto *Succ : MBB->successors()) {
145	if (Succ != Entry && inRegion(MBB: Succ)) {
146	Reachable [MBB].insert(Ptr: Succ);
147	WorkList.emplace_back(Args&: MBB, Args&: Succ);
148	}
149	}
150	}
151
152	while (!WorkList.empty()) {
153	MachineBasicBlock MBB, Succ;
154	std::tie(args&: MBB, args&: Succ) = WorkList.pop_back_val();
155	assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156	if (MBB != Entry) {
157	// We recently added MBB => Succ, and that means we may have enabled
158	// Pred => MBB => Succ.
159	for (auto *Pred : MBB->predecessors()) {
160	if (Reachable [Pred].insert(Ptr: Succ).second) {
161	WorkList.emplace_back(Args&: Pred, Args&: Succ);
162	}
163	}
164	}
165	}
166
167	// Blocks that can return to themselves are in a loop.
168	for (auto *MBB : Blocks) {
169	if (canReach(From: MBB, To: MBB)) {
170	Loopers.insert(Ptr: MBB);
171	}
172	}
173	assert(!Loopers.count(Entry));
174
175	// Find the loop entries - loopers reachable from blocks not in that loop -
176	// and those outside blocks that reach them, the "loop enterers".
177	for (auto *Looper : Loopers) {
178	for (auto *Pred : Looper->predecessors()) {
179	// Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180	// otherwise, it is a block that enters into the loop.
181	if (!canReach(From: Looper, To: Pred)) {
182	LoopEntries.insert(Ptr: Looper);
183	LoopEnterers [Looper].insert(Ptr: Pred);
184	}
185	}
186	}
187	}
188	};
189
190	// Finds the blocks in a single-entry loop, given the loop entry and the
191	// list of blocks that enter the loop.
192	class LoopBlocks {
193	public:
194	LoopBlocks(MachineBasicBlock Entry, const* BlockSet &Enterers)
195	: Entry(Entry), Enterers(Enterers) {
196	calculate();
197	}
198
199	BlockSet &getBlocks() { return Blocks; }
200
201	private:
202	MachineBasicBlock *Entry;
203	const BlockSet &Enterers;
204
205	BlockSet Blocks;
206
207	void calculate() {
208	// Going backwards from the loop entry, if we ignore the blocks entering
209	// from outside, we will traverse all the blocks in the loop.
210	BlockVector WorkList;
211	BlockSet AddedToWorkList;
212	Blocks.insert(Ptr: Entry);
213	for (auto *Pred : Entry->predecessors()) {
214	if (!Enterers.count(Ptr: Pred)) {
215	WorkList.push_back(Elt: Pred);
216	AddedToWorkList.insert(Ptr: Pred);
217	}
218	}
219
220	while (!WorkList.empty()) {
221	auto *MBB = WorkList.pop_back_val();
222	assert(!Enterers.count(MBB));
223	if (Blocks.insert(Ptr: MBB).second) {
224	for (auto *Pred : MBB->predecessors()) {
225	if (AddedToWorkList.insert(Ptr: Pred).second)
226	WorkList.push_back(Elt: Pred);
227	}
228	}
229	}
230	}
231	};
232
233	class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
234	StringRef getPassName() const override {
235	return "WebAssembly Fix Irreducible Control Flow";
236	}
237
238	bool runOnMachineFunction(MachineFunction &MF) override;
239
240	bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241	MachineFunction &MF);
242
243	void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244	MachineFunction &MF, const ReachabilityGraph &Graph);
245
246	public:
247	static char ID; // Pass identification, replacement for typeid
248	WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass (ID) {}
249	};
250
251	bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252	MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253	bool Changed = false;
254	// Remove irreducibility before processing child loops, which may take
255	// multiple iterations.
256	while (true) {
257	ReachabilityGraph Graph(Entry, Blocks);
258
259	bool FoundIrreducibility = false;
260
261	for (auto *LoopEntry : getSortedEntries(Entries: Graph.getLoopEntries())) {
262	// Find mutual entries - all entries which can reach this one, and
263	// are reached by it (that always includes LoopEntry itself). All mutual
264	// entries must be in the same loop, so if we have more than one, then we
265	// have irreducible control flow.
266	//
267	// (Note that we need to sort the entries here, as otherwise the order can
268	// matter: being mutual is a symmetric relationship, and each set of
269	// mutuals will be handled properly no matter which we see first. However,
270	// there can be multiple disjoint sets of mutuals, and which we process
271	// first changes the output.)
272	//
273	// Note that irreducibility may involve inner loops, e.g. imagine A
274	// starts one loop, and it has B inside it which starts an inner loop.
275	// If we add a branch from all the way on the outside to B, then in a
276	// sense B is no longer an "inner" loop, semantically speaking. We will
277	// fix that irreducibility by adding a block that dispatches to either
278	// either A or B, so B will no longer be an inner loop in our output.
279	// (A fancier approach might try to keep it as such.)
280	//
281	// Note that we still need to recurse into inner loops later, to handle
282	// the case where the irreducibility is entirely nested - we would not
283	// be able to identify that at this point, since the enclosing loop is
284	// a group of blocks all of whom can reach each other. (We'll see the
285	// irreducibility after removing branches to the top of that enclosing
286	// loop.)
287	BlockSet MutualLoopEntries;
288	MutualLoopEntries.insert(Ptr: LoopEntry);
289	for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290	if (OtherLoopEntry != LoopEntry &&
291	Graph.canReach(From: LoopEntry, To: OtherLoopEntry) &&
292	Graph.canReach(From: OtherLoopEntry, To: LoopEntry)) {
293	MutualLoopEntries.insert(Ptr: OtherLoopEntry);
294	}
295	}
296
297	if (MutualLoopEntries.size() > `1`) {
298	makeSingleEntryLoop(Entries&: MutualLoopEntries, Blocks, MF, Graph);
299	FoundIrreducibility = true;
300	Changed = true;
301	break;
302	}
303	}
304	// Only go on to actually process the inner loops when we are done
305	// removing irreducible control flow and changing the graph. Modifying
306	// the graph as we go is possible, and that might let us avoid looking at
307	// the already-fixed loops again if we are careful, but all that is
308	// complex and bug-prone. Since irreducible loops are rare, just starting
309	// another iteration is best.
310	if (FoundIrreducibility) {
311	continue;
312	}
313
314	for (auto *LoopEntry : Graph.getLoopEntries()) {
315	LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316	// Each of these calls to processRegion may change the graph, but are
317	// guaranteed not to interfere with each other. The only changes we make
318	// to the graph are to add blocks on the way to a loop entry. As the
319	// loops are disjoint, that means we may only alter branches that exit
320	// another loop, which are ignored when recursing into that other loop
321	// anyhow.
322	if (processRegion(Entry: LoopEntry, Blocks&: InnerBlocks.getBlocks(), MF)) {
323	Changed = true;
324	}
325	}
326
327	return Changed;
328	}
329	}
330
331	// Given a set of entries to a single loop, create a single entry for that
332	// loop by creating a dispatch block for them, routing control flow using
333	// a helper variable. Also updates Blocks with any new blocks created, so
334	// that we properly track all the blocks in the region. But this does not update
335	// ReachabilityGraph; this will be updated in the caller of this function as
336	// needed.
337	void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338	BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339	const ReachabilityGraph &Graph) {
340	assert(Entries.size() >= `2`);
341
342	// Sort the entries to ensure a deterministic build.
343	BlockVector SortedEntries = getSortedEntries(Entries);
344
345	#ifndef NDEBUG
346	for (auto *Block : SortedEntries)
347	assert(Block->getNumber() != -`1`);
348	if (SortedEntries.size() > `1`) {
349	for (auto I = SortedEntries.begin(), E = SortedEntries.end() - `1`; I != E;
350	++I) {
351	auto ANum = (*I)->getNumber();
352	auto BNum = (*(std::next(I)))->getNumber();
353	assert(ANum != BNum);
354	}
355	}
356	#endif
357
358	// Create a dispatch block which will contain a jump table to the entries.
359	MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360	MF.insert(MBBI: MF.end(), MBB: Dispatch);
361	Blocks.insert(Ptr: Dispatch);
362
363	// Add the jump table.
364	const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365	MachineInstrBuilder MIB =
366	BuildMI(BB: Dispatch, MIMD: DebugLoc (), MCID: TII.get(Opcode: WebAssembly::BR_TABLE_I32));
367
368	// Add the register which will be used to tell the jump table which block to
369	// jump to.
370	MachineRegisterInfo &MRI = MF.getRegInfo();
371	Register Reg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
372	MIB.addReg(RegNo: Reg);
373
374	// Compute the indices in the superheader, one for each bad block, and
375	// add them as successors.
376	DenseMap<MachineBasicBlock , unsigned*> Indices;
377	for (auto *Entry : SortedEntries) {
378	auto Pair = Indices.try_emplace(Key: Entry);
379	assert(Pair.second);
380
381	unsigned Index = MIB.getInstr()->getNumExplicitOperands() - `1`;
382	Pair.first ->second = Index;
383
384	MIB.addMBB(MBB: Entry);
385	Dispatch->addSuccessor(Succ: Entry);
386	}
387
388	// Rewrite the problematic successors for every block that wants to reach
389	// the bad blocks. For simplicity, we just introduce a new block for every
390	// edge we need to rewrite. (Fancier things are possible.)
391
392	BlockVector AllPreds;
393	for (auto *Entry : SortedEntries) {
394	for (auto *Pred : Entry->predecessors()) {
395	if (Pred != Dispatch) {
396	AllPreds.push_back(Elt: Pred);
397	}
398	}
399	}
400
401	// This set stores predecessors within this loop.
402	DenseSet<MachineBasicBlock *> InLoop;
403	for (auto *Pred : AllPreds) {
404	for (auto *Entry : Pred->successors()) {
405	if (!Entries.count(Ptr: Entry))
406	continue;
407	if (Graph.canReach(From: Entry, To: Pred)) {
408	InLoop.insert(V: Pred);
409	break;
410	}
411	}
412	}
413
414	// Record if each entry has a layout predecessor. This map stores
415	// <<loop entry, Predecessor is within the loop?>, layout predecessor>
416	DenseMap<PointerIntPair<MachineBasicBlock , `1`, bool>, MachineBasicBlock >
417	EntryToLayoutPred;
418	for (auto *Pred : AllPreds) {
419	bool PredInLoop = InLoop.count(V: Pred);
420	for (auto *Entry : Pred->successors())
421	if (Entries.count(Ptr: Entry) && Pred->isLayoutSuccessor(MBB: Entry))
422	EntryToLayoutPred [{Entry, PredInLoop}] = Pred;
423	}
424
425	// We need to create at most two routing blocks per entry: one for
426	// predecessors outside the loop and one for predecessors inside the loop.
427	// This map stores
428	// <<loop entry, Predecessor is within the loop?>, routing block>
429	DenseMap<PointerIntPair<MachineBasicBlock , `1`, bool>, MachineBasicBlock >
430	Map;
431	for (auto *Pred : AllPreds) {
432	bool PredInLoop = InLoop.count(V: Pred);
433	for (auto *Entry : Pred->successors()) {
434	if (!Entries.count(Ptr: Entry) \|\| Map.count(Val: {Entry, PredInLoop}))
435	continue;
436	// If there exists a layout predecessor of this entry and this predecessor
437	// is not that, we rather create a routing block after that layout
438	// predecessor to save a branch.
439	if (auto *OtherPred = EntryToLayoutPred.lookup(Val: {Entry, PredInLoop}))
440	if (OtherPred != Pred)
441	continue;
442
443	// This is a successor we need to rewrite.
444	MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445	MF.insert(MBBI: Pred->isLayoutSuccessor(MBB: Entry)
446	? MachineFunction::iterator (Entry)
447	: MF.end(),
448	MBB: Routing);
449	Blocks.insert(Ptr: Routing);
450
451	// Set the jump table's register of the index of the block we wish to
452	// jump to, and jump to the jump table.
453	BuildMI(BB: Routing, MIMD: DebugLoc (), MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: Reg)
454	.addImm(Val: Indices [Entry]);
455	BuildMI(BB: Routing, MIMD: DebugLoc (), MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: Dispatch);
456	Routing->addSuccessor(Succ: Dispatch);
457	Map [{Entry, PredInLoop}] = Routing;
458	}
459	}
460
461	for (auto *Pred : AllPreds) {
462	bool PredInLoop = InLoop.count(V: Pred);
463	// Remap the terminator operands and the successor list.
464	for (MachineInstr &Term : Pred->terminators())
465	for (auto &Op : Term.explicit_uses())
466	if (Op.isMBB() && Indices.count(Val: Op.getMBB()))
467	Op.setMBB(Map [{Op.getMBB(), PredInLoop}]);
468
469	for (auto *Succ : Pred->successors()) {
470	if (!Entries.count(Ptr: Succ))
471	continue;
472	auto *Routing = Map [{Succ, PredInLoop}];
473	Pred->replaceSuccessor(Old: Succ, New: Routing);
474	}
475	}
476
477	// Create a fake default label, because br_table requires one.
478	MIB.addMBB(MBB: MIB.getInstr()
479	->getOperand(i: MIB.getInstr()->getNumExplicitOperands() - `1`)
480	.getMBB());
481	}
482
483	} // end anonymous namespace
484
485	char WebAssemblyFixIrreducibleControlFlow::ID = `0`;
486	INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487	"Removes irreducible control flow", false, false)
488
489	FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490	return new WebAssemblyFixIrreducibleControlFlow ();
491	}
492
493	// Test whether the given register has an ARGUMENT def.
494	static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495	for (const auto &Def : MRI.def_instructions(Reg))
496	if (WebAssembly::isArgument(Opc: Def.getOpcode()))
497	return true;
498	return false;
499	}
500
501	// Add a register definition with IMPLICIT_DEFs for every register to cover for
502	// register uses that don't have defs in every possible path.
503	// TODO: This is fairly heavy-handed; find a better approach.
504	static void addImplicitDefs(MachineFunction &MF) {
505	const MachineRegisterInfo &MRI = MF.getRegInfo();
506	const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507	MachineBasicBlock &Entry = *MF.begin();
508	for (unsigned I = `0`, E = MRI.getNumVirtRegs(); I < E; ++I) {
509	Register Reg = Register::index2VirtReg(Index: I);
510
511	// Skip unused registers.
512	if (MRI.use_nodbg_empty(RegNo: Reg))
513	continue;
514
515	// Skip registers that have an ARGUMENT definition.
516	if (hasArgumentDef(Reg, MRI))
517	continue;
518
519	BuildMI(BB&: Entry, I: Entry.begin(), MIMD: DebugLoc (),
520	MCID: TII.get(Opcode: WebAssembly::IMPLICIT_DEF), DestReg: Reg);
521	}
522
523	// Move ARGUMENT_ instructions to the top of the entry block, so that their*
524	// liveness reflects the fact that these really are live-in values.
525	for (MachineInstr &MI : llvm::make_early_inc_range(Range&: Entry)) {
526	if (WebAssembly::isArgument(Opc: MI.getOpcode())) {
527	MI.removeFromParent();
528	Entry.insert(I: Entry.begin(), MI: &MI);
529	}
530	}
531	}
532
533	bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534	MachineFunction &MF) {
535	LLVM_DEBUG(dbgs() << "******** Fixing Irreducible Control Flow ********\n"
536	"********** Function: "
537	<< MF.getName() << `'\n'`);
538
539	// Start the recursive process on the entire function body.
540	BlockSet AllBlocks;
541	for (auto &MBB : MF) {
542	AllBlocks.insert(Ptr: &MBB);
543	}
544
545	if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546	// We rewrote part of the function; recompute relevant things.
547	MF.RenumberBlocks();
548	// Now we've inserted dispatch blocks, some register uses can have incoming
549	// paths without a def. For example, before this pass register %a was
550	// defined in BB1 and used in BB2, and there was only one path from BB1 and
551	// BB2. But if this pass inserts a dispatch block having multiple
552	// predecessors between the two BBs, now there are paths to BB2 without
553	// visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554	// IMPLICIT_DEFs to all regs is one simple way to fix it.
555	addImplicitDefs(MF);
556	return true;
557	}
558
559	return false;
560	}
561

Browse the source code of llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp