1//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a pass that removes irreducible control flow.
11/// Irreducible control flow means multiple-entry loops, which this pass
12/// transforms to have a single entry.
13///
14/// Note that LLVM has a generic pass that lowers irreducible control flow, but
15/// it linearizes control flow, turning diamonds into two triangles, which is
16/// both unnecessary and undesirable for WebAssembly.
17///
18/// The big picture: We recursively process each "region", defined as a group
19/// of blocks with a single entry and no branches back to that entry. A region
20/// may be the entire function body, or the inner part of a loop, i.e., the
21/// loop's body without branches back to the loop entry. In each region we fix
22/// up multi-entry loops by adding a new block that can dispatch to each of the
23/// loop entries, based on the value of a label "helper" variable, and we
24/// replace direct branches to the entries with assignments to the label
25/// variable and a branch to the dispatch block. Then the dispatch block is the
26/// single entry in the loop containing the previous multiple entries. After
27/// ensuring all the loops in a region are reducible, we recurse into them. The
28/// total time complexity of this pass is:
29///
30/// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31/// NumLoops * NumLoops)
32///
33/// This pass is similar to what the Relooper [1] does. Both identify looping
34/// code that requires multiple entries, and resolve it in a similar way (in
35/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36/// also that like the Relooper, we implement a "minimal" intervention: we only
37/// use the "label" helper for the blocks we absolutely must and no others. We
38/// also prioritize code size and do not duplicate code in order to resolve
39/// irreducibility. The graph algorithms for finding loops and entries and so
40/// forth are also similar to the Relooper. The main differences between this
41/// pass and the Relooper are:
42///
43/// * We just care about irreducibility, so we just look at loops.
44/// * The Relooper emits structured control flow (with ifs etc.), while we
45/// emit a CFG.
46///
47/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48/// Proceedings of the ACM international conference companion on Object oriented
49/// programming systems languages and applications companion (SPLASH '11). ACM,
50/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51/// http://doi.acm.org/10.1145/2048147.2048224
52///
53//===----------------------------------------------------------------------===//
54
55#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56#include "WebAssembly.h"
57#include "WebAssemblySubtarget.h"
58#include "llvm/CodeGen/MachineFunctionPass.h"
59#include "llvm/CodeGen/MachineInstrBuilder.h"
60#include "llvm/Support/Debug.h"
61using namespace llvm;
62
63#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64
65namespace {
66
67using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69
70static BlockVector getSortedEntries(const BlockSet &Entries) {
71 BlockVector SortedEntries(Entries.begin(), Entries.end());
72 llvm::sort(C&: SortedEntries,
73 Comp: [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
74 auto ANum = A->getNumber();
75 auto BNum = B->getNumber();
76 return ANum < BNum;
77 });
78 return SortedEntries;
79}
80
81// Calculates reachability in a region. Ignores branches to blocks outside of
82// the region, and ignores branches to the region entry (for the case where
83// the region is the inner part of a loop).
84class ReachabilityGraph {
85public:
86 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
87 : Entry(Entry), Blocks(Blocks) {
88#ifndef NDEBUG
89 // The region must have a single entry.
90 for (auto *MBB : Blocks) {
91 if (MBB != Entry) {
92 for (auto *Pred : MBB->predecessors()) {
93 assert(inRegion(Pred));
94 }
95 }
96 }
97#endif
98 calculate();
99 }
100
101 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
102 assert(inRegion(From) && inRegion(To));
103 auto I = Reachable.find(Val: From);
104 if (I == Reachable.end())
105 return false;
106 return I->second.count(Ptr: To);
107 }
108
109 // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110 // that can reach themselves.
111 const BlockSet &getLoopers() const { return Loopers; }
112
113 // Get all blocks that are loop entries.
114 const BlockSet &getLoopEntries() const { return LoopEntries; }
115
116 // Get all blocks that enter a particular loop from outside.
117 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
118 assert(inRegion(LoopEntry));
119 auto I = LoopEnterers.find(Val: LoopEntry);
120 assert(I != LoopEnterers.end());
121 return I->second;
122 }
123
124private:
125 MachineBasicBlock *Entry;
126 const BlockSet &Blocks;
127
128 BlockSet Loopers, LoopEntries;
129 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130
131 bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(Ptr: MBB); }
132
133 // Maps a block to all the other blocks it can reach.
134 DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135
136 void calculate() {
137 // Reachability computation work list. Contains pairs of recent additions
138 // (A, B) where we just added a link A => B.
139 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
140 SmallVector<BlockPair, 4> WorkList;
141
142 // Add all relevant direct branches.
143 for (auto *MBB : Blocks) {
144 for (auto *Succ : MBB->successors()) {
145 if (Succ != Entry && inRegion(MBB: Succ)) {
146 Reachable[MBB].insert(Ptr: Succ);
147 WorkList.emplace_back(Args&: MBB, Args&: Succ);
148 }
149 }
150 }
151
152 while (!WorkList.empty()) {
153 MachineBasicBlock *MBB, *Succ;
154 std::tie(args&: MBB, args&: Succ) = WorkList.pop_back_val();
155 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156 if (MBB != Entry) {
157 // We recently added MBB => Succ, and that means we may have enabled
158 // Pred => MBB => Succ.
159 for (auto *Pred : MBB->predecessors()) {
160 if (Reachable[Pred].insert(Ptr: Succ).second) {
161 WorkList.emplace_back(Args&: Pred, Args&: Succ);
162 }
163 }
164 }
165 }
166
167 // Blocks that can return to themselves are in a loop.
168 for (auto *MBB : Blocks) {
169 if (canReach(From: MBB, To: MBB)) {
170 Loopers.insert(Ptr: MBB);
171 }
172 }
173 assert(!Loopers.count(Entry));
174
175 // Find the loop entries - loopers reachable from blocks not in that loop -
176 // and those outside blocks that reach them, the "loop enterers".
177 for (auto *Looper : Loopers) {
178 for (auto *Pred : Looper->predecessors()) {
179 // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180 // otherwise, it is a block that enters into the loop.
181 if (!canReach(From: Looper, To: Pred)) {
182 LoopEntries.insert(Ptr: Looper);
183 LoopEnterers[Looper].insert(Ptr: Pred);
184 }
185 }
186 }
187 }
188};
189
190// Finds the blocks in a single-entry loop, given the loop entry and the
191// list of blocks that enter the loop.
192class LoopBlocks {
193public:
194 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
195 : Entry(Entry), Enterers(Enterers) {
196 calculate();
197 }
198
199 BlockSet &getBlocks() { return Blocks; }
200
201private:
202 MachineBasicBlock *Entry;
203 const BlockSet &Enterers;
204
205 BlockSet Blocks;
206
207 void calculate() {
208 // Going backwards from the loop entry, if we ignore the blocks entering
209 // from outside, we will traverse all the blocks in the loop.
210 BlockVector WorkList;
211 BlockSet AddedToWorkList;
212 Blocks.insert(Ptr: Entry);
213 for (auto *Pred : Entry->predecessors()) {
214 if (!Enterers.count(Ptr: Pred)) {
215 WorkList.push_back(Elt: Pred);
216 AddedToWorkList.insert(Ptr: Pred);
217 }
218 }
219
220 while (!WorkList.empty()) {
221 auto *MBB = WorkList.pop_back_val();
222 assert(!Enterers.count(MBB));
223 if (Blocks.insert(Ptr: MBB).second) {
224 for (auto *Pred : MBB->predecessors()) {
225 if (AddedToWorkList.insert(Ptr: Pred).second)
226 WorkList.push_back(Elt: Pred);
227 }
228 }
229 }
230 }
231};
232
233class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
234 StringRef getPassName() const override {
235 return "WebAssembly Fix Irreducible Control Flow";
236 }
237
238 bool runOnMachineFunction(MachineFunction &MF) override;
239
240 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241 MachineFunction &MF);
242
243 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244 MachineFunction &MF, const ReachabilityGraph &Graph);
245
246public:
247 static char ID; // Pass identification, replacement for typeid
248 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
249};
250
251bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253 bool Changed = false;
254 // Remove irreducibility before processing child loops, which may take
255 // multiple iterations.
256 while (true) {
257 ReachabilityGraph Graph(Entry, Blocks);
258
259 bool FoundIrreducibility = false;
260
261 for (auto *LoopEntry : getSortedEntries(Entries: Graph.getLoopEntries())) {
262 // Find mutual entries - all entries which can reach this one, and
263 // are reached by it (that always includes LoopEntry itself). All mutual
264 // entries must be in the same loop, so if we have more than one, then we
265 // have irreducible control flow.
266 //
267 // (Note that we need to sort the entries here, as otherwise the order can
268 // matter: being mutual is a symmetric relationship, and each set of
269 // mutuals will be handled properly no matter which we see first. However,
270 // there can be multiple disjoint sets of mutuals, and which we process
271 // first changes the output.)
272 //
273 // Note that irreducibility may involve inner loops, e.g. imagine A
274 // starts one loop, and it has B inside it which starts an inner loop.
275 // If we add a branch from all the way on the outside to B, then in a
276 // sense B is no longer an "inner" loop, semantically speaking. We will
277 // fix that irreducibility by adding a block that dispatches to either
278 // either A or B, so B will no longer be an inner loop in our output.
279 // (A fancier approach might try to keep it as such.)
280 //
281 // Note that we still need to recurse into inner loops later, to handle
282 // the case where the irreducibility is entirely nested - we would not
283 // be able to identify that at this point, since the enclosing loop is
284 // a group of blocks all of whom can reach each other. (We'll see the
285 // irreducibility after removing branches to the top of that enclosing
286 // loop.)
287 BlockSet MutualLoopEntries;
288 MutualLoopEntries.insert(Ptr: LoopEntry);
289 for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290 if (OtherLoopEntry != LoopEntry &&
291 Graph.canReach(From: LoopEntry, To: OtherLoopEntry) &&
292 Graph.canReach(From: OtherLoopEntry, To: LoopEntry)) {
293 MutualLoopEntries.insert(Ptr: OtherLoopEntry);
294 }
295 }
296
297 if (MutualLoopEntries.size() > 1) {
298 makeSingleEntryLoop(Entries&: MutualLoopEntries, Blocks, MF, Graph);
299 FoundIrreducibility = true;
300 Changed = true;
301 break;
302 }
303 }
304 // Only go on to actually process the inner loops when we are done
305 // removing irreducible control flow and changing the graph. Modifying
306 // the graph as we go is possible, and that might let us avoid looking at
307 // the already-fixed loops again if we are careful, but all that is
308 // complex and bug-prone. Since irreducible loops are rare, just starting
309 // another iteration is best.
310 if (FoundIrreducibility) {
311 continue;
312 }
313
314 for (auto *LoopEntry : Graph.getLoopEntries()) {
315 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316 // Each of these calls to processRegion may change the graph, but are
317 // guaranteed not to interfere with each other. The only changes we make
318 // to the graph are to add blocks on the way to a loop entry. As the
319 // loops are disjoint, that means we may only alter branches that exit
320 // another loop, which are ignored when recursing into that other loop
321 // anyhow.
322 if (processRegion(Entry: LoopEntry, Blocks&: InnerBlocks.getBlocks(), MF)) {
323 Changed = true;
324 }
325 }
326
327 return Changed;
328 }
329}
330
331// Given a set of entries to a single loop, create a single entry for that
332// loop by creating a dispatch block for them, routing control flow using
333// a helper variable. Also updates Blocks with any new blocks created, so
334// that we properly track all the blocks in the region. But this does not update
335// ReachabilityGraph; this will be updated in the caller of this function as
336// needed.
337void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339 const ReachabilityGraph &Graph) {
340 assert(Entries.size() >= 2);
341
342 // Sort the entries to ensure a deterministic build.
343 BlockVector SortedEntries = getSortedEntries(Entries);
344
345#ifndef NDEBUG
346 for (auto *Block : SortedEntries)
347 assert(Block->getNumber() != -1);
348 if (SortedEntries.size() > 1) {
349 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
350 ++I) {
351 auto ANum = (*I)->getNumber();
352 auto BNum = (*(std::next(I)))->getNumber();
353 assert(ANum != BNum);
354 }
355 }
356#endif
357
358 // Create a dispatch block which will contain a jump table to the entries.
359 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360 MF.insert(MBBI: MF.end(), MBB: Dispatch);
361 Blocks.insert(Ptr: Dispatch);
362
363 // Add the jump table.
364 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365 MachineInstrBuilder MIB =
366 BuildMI(BB: Dispatch, MIMD: DebugLoc(), MCID: TII.get(Opcode: WebAssembly::BR_TABLE_I32));
367
368 // Add the register which will be used to tell the jump table which block to
369 // jump to.
370 MachineRegisterInfo &MRI = MF.getRegInfo();
371 Register Reg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
372 MIB.addReg(RegNo: Reg);
373
374 // Compute the indices in the superheader, one for each bad block, and
375 // add them as successors.
376 DenseMap<MachineBasicBlock *, unsigned> Indices;
377 for (auto *Entry : SortedEntries) {
378 auto Pair = Indices.insert(KV: std::make_pair(x&: Entry, y: 0));
379 assert(Pair.second);
380
381 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
382 Pair.first->second = Index;
383
384 MIB.addMBB(MBB: Entry);
385 Dispatch->addSuccessor(Succ: Entry);
386 }
387
388 // Rewrite the problematic successors for every block that wants to reach
389 // the bad blocks. For simplicity, we just introduce a new block for every
390 // edge we need to rewrite. (Fancier things are possible.)
391
392 BlockVector AllPreds;
393 for (auto *Entry : SortedEntries) {
394 for (auto *Pred : Entry->predecessors()) {
395 if (Pred != Dispatch) {
396 AllPreds.push_back(Elt: Pred);
397 }
398 }
399 }
400
401 // This set stores predecessors within this loop.
402 DenseSet<MachineBasicBlock *> InLoop;
403 for (auto *Pred : AllPreds) {
404 for (auto *Entry : Pred->successors()) {
405 if (!Entries.count(Ptr: Entry))
406 continue;
407 if (Graph.canReach(From: Entry, To: Pred)) {
408 InLoop.insert(V: Pred);
409 break;
410 }
411 }
412 }
413
414 // Record if each entry has a layout predecessor. This map stores
415 // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416 DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
417 EntryToLayoutPred;
418 for (auto *Pred : AllPreds) {
419 bool PredInLoop = InLoop.count(V: Pred);
420 for (auto *Entry : Pred->successors())
421 if (Entries.count(Ptr: Entry) && Pred->isLayoutSuccessor(MBB: Entry))
422 EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
423 }
424
425 // We need to create at most two routing blocks per entry: one for
426 // predecessors outside the loop and one for predecessors inside the loop.
427 // This map stores
428 // <<loop entry, Predecessor is within the loop?>, routing block>
429 DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
430 Map;
431 for (auto *Pred : AllPreds) {
432 bool PredInLoop = InLoop.count(V: Pred);
433 for (auto *Entry : Pred->successors()) {
434 if (!Entries.count(Ptr: Entry) || Map.count(Val: {Entry, PredInLoop}))
435 continue;
436 // If there exists a layout predecessor of this entry and this predecessor
437 // is not that, we rather create a routing block after that layout
438 // predecessor to save a branch.
439 if (auto *OtherPred = EntryToLayoutPred.lookup(Val: {Entry, PredInLoop}))
440 if (OtherPred != Pred)
441 continue;
442
443 // This is a successor we need to rewrite.
444 MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445 MF.insert(MBBI: Pred->isLayoutSuccessor(MBB: Entry)
446 ? MachineFunction::iterator(Entry)
447 : MF.end(),
448 MBB: Routing);
449 Blocks.insert(Ptr: Routing);
450
451 // Set the jump table's register of the index of the block we wish to
452 // jump to, and jump to the jump table.
453 BuildMI(BB: Routing, MIMD: DebugLoc(), MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: Reg)
454 .addImm(Val: Indices[Entry]);
455 BuildMI(BB: Routing, MIMD: DebugLoc(), MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: Dispatch);
456 Routing->addSuccessor(Succ: Dispatch);
457 Map[{Entry, PredInLoop}] = Routing;
458 }
459 }
460
461 for (auto *Pred : AllPreds) {
462 bool PredInLoop = InLoop.count(V: Pred);
463 // Remap the terminator operands and the successor list.
464 for (MachineInstr &Term : Pred->terminators())
465 for (auto &Op : Term.explicit_uses())
466 if (Op.isMBB() && Indices.count(Val: Op.getMBB()))
467 Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
468
469 for (auto *Succ : Pred->successors()) {
470 if (!Entries.count(Ptr: Succ))
471 continue;
472 auto *Routing = Map[{Succ, PredInLoop}];
473 Pred->replaceSuccessor(Old: Succ, New: Routing);
474 }
475 }
476
477 // Create a fake default label, because br_table requires one.
478 MIB.addMBB(MBB: MIB.getInstr()
479 ->getOperand(i: MIB.getInstr()->getNumExplicitOperands() - 1)
480 .getMBB());
481}
482
483} // end anonymous namespace
484
485char WebAssemblyFixIrreducibleControlFlow::ID = 0;
486INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487 "Removes irreducible control flow", false, false)
488
489FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490 return new WebAssemblyFixIrreducibleControlFlow();
491}
492
493// Test whether the given register has an ARGUMENT def.
494static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495 for (const auto &Def : MRI.def_instructions(Reg))
496 if (WebAssembly::isArgument(Opc: Def.getOpcode()))
497 return true;
498 return false;
499}
500
501// Add a register definition with IMPLICIT_DEFs for every register to cover for
502// register uses that don't have defs in every possible path.
503// TODO: This is fairly heavy-handed; find a better approach.
504static void addImplicitDefs(MachineFunction &MF) {
505 const MachineRegisterInfo &MRI = MF.getRegInfo();
506 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507 MachineBasicBlock &Entry = *MF.begin();
508 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
509 Register Reg = Register::index2VirtReg(Index: I);
510
511 // Skip unused registers.
512 if (MRI.use_nodbg_empty(RegNo: Reg))
513 continue;
514
515 // Skip registers that have an ARGUMENT definition.
516 if (hasArgumentDef(Reg, MRI))
517 continue;
518
519 BuildMI(BB&: Entry, I: Entry.begin(), MIMD: DebugLoc(),
520 MCID: TII.get(Opcode: WebAssembly::IMPLICIT_DEF), DestReg: Reg);
521 }
522
523 // Move ARGUMENT_* instructions to the top of the entry block, so that their
524 // liveness reflects the fact that these really are live-in values.
525 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: Entry)) {
526 if (WebAssembly::isArgument(Opc: MI.getOpcode())) {
527 MI.removeFromParent();
528 Entry.insert(I: Entry.begin(), MI: &MI);
529 }
530 }
531}
532
533bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534 MachineFunction &MF) {
535 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536 "********** Function: "
537 << MF.getName() << '\n');
538
539 // Start the recursive process on the entire function body.
540 BlockSet AllBlocks;
541 for (auto &MBB : MF) {
542 AllBlocks.insert(Ptr: &MBB);
543 }
544
545 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546 // We rewrote part of the function; recompute relevant things.
547 MF.RenumberBlocks();
548 // Now we've inserted dispatch blocks, some register uses can have incoming
549 // paths without a def. For example, before this pass register %a was
550 // defined in BB1 and used in BB2, and there was only one path from BB1 and
551 // BB2. But if this pass inserts a dispatch block having multiple
552 // predecessors between the two BBs, now there are paths to BB2 without
553 // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554 // IMPLICIT_DEFs to all regs is one simple way to fix it.
555 addImplicitDefs(MF);
556 return true;
557 }
558
559 return false;
560}
561