1//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass forwards branches to unconditional branches to make them branch
10// directly to the target block. This pass often results in dead MBB's, which
11// it then removes.
12//
13// Note that this pass must be run after register allocation, it cannot handle
14// SSA form. It also must handle virtual registers for targets that emit virtual
15// ISA (e.g. NVPTX).
16//
17//===----------------------------------------------------------------------===//
18
19#include "BranchFolding.h"
20#include "llvm/ADT/BitVector.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/ProfileSummaryInfo.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/CodeGen/BranchFoldingPass.h"
28#include "llvm/CodeGen/MBFIWrapper.h"
29#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
31#include "llvm/CodeGen/MachineDominators.h"
32#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineFunctionPass.h"
34#include "llvm/CodeGen/MachineInstr.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineJumpTableInfo.h"
37#include "llvm/CodeGen/MachineLoopInfo.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachinePostDominators.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/MachineSizeOpts.h"
42#include "llvm/CodeGen/TargetInstrInfo.h"
43#include "llvm/CodeGen/TargetOpcodes.h"
44#include "llvm/CodeGen/TargetPassConfig.h"
45#include "llvm/CodeGen/TargetRegisterInfo.h"
46#include "llvm/CodeGen/TargetSubtargetInfo.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/DebugInfoMetadata.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/InitializePasses.h"
52#include "llvm/MC/LaneBitmask.h"
53#include "llvm/MC/MCRegisterInfo.h"
54#include "llvm/Pass.h"
55#include "llvm/Support/BlockFrequency.h"
56#include "llvm/Support/BranchProbability.h"
57#include "llvm/Support/CommandLine.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/raw_ostream.h"
61#include "llvm/Target/TargetMachine.h"
62#include <cassert>
63#include <cstddef>
64#include <iterator>
65#include <numeric>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "branch-folder"
70
71STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
72STATISTIC(NumBranchOpts, "Number of branches optimized");
73STATISTIC(NumTailMerge , "Number of block tails merged");
74STATISTIC(NumHoist , "Number of times common instructions are hoisted");
75STATISTIC(NumTailCalls, "Number of tail calls optimized");
76
77static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
78 cl::init(Val: cl::BOU_UNSET), cl::Hidden);
79
80// Throttle for huge numbers of predecessors (compile speed problems)
81static cl::opt<unsigned>
82TailMergeThreshold("tail-merge-threshold",
83 cl::desc("Max number of predecessors to consider tail merging"),
84 cl::init(Val: 150), cl::Hidden);
85
86// Heuristic for tail merging (and, inversely, tail duplication).
87static cl::opt<unsigned>
88TailMergeSize("tail-merge-size",
89 cl::desc("Min number of instructions to consider tail merging"),
90 cl::init(Val: 3), cl::Hidden);
91
92namespace {
93
94 /// BranchFolderPass - Wrap branch folder in a machine function pass.
95class BranchFolderLegacy : public MachineFunctionPass {
96public:
97 static char ID;
98
99 explicit BranchFolderLegacy() : MachineFunctionPass(ID) {}
100
101 bool runOnMachineFunction(MachineFunction &MF) override;
102
103 void getAnalysisUsage(AnalysisUsage &AU) const override {
104 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
105 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
106 AU.addRequired<ProfileSummaryInfoWrapperPass>();
107 AU.addRequired<TargetPassConfig>();
108 MachineFunctionPass::getAnalysisUsage(AU);
109 }
110
111 MachineFunctionProperties getRequiredProperties() const override {
112 return MachineFunctionProperties().setNoPHIs();
113 }
114};
115
116} // end anonymous namespace
117
118char BranchFolderLegacy::ID = 0;
119
120char &llvm::BranchFolderPassID = BranchFolderLegacy::ID;
121
122INITIALIZE_PASS(BranchFolderLegacy, DEBUG_TYPE, "Control Flow Optimizer", false,
123 false)
124
125PreservedAnalyses BranchFolderPass::run(MachineFunction &MF,
126 MachineFunctionAnalysisManager &MFAM) {
127 MFPropsModifier _(*this, MF);
128 bool EnableTailMerge =
129 !MF.getTarget().requiresStructuredCFG() && this->EnableTailMerge;
130
131 auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF);
132 auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF)
133 .getCachedResult<ProfileSummaryAnalysis>(
134 IR&: *MF.getFunction().getParent());
135 if (!PSI)
136 report_fatal_error(
137 reason: "ProfileSummaryAnalysis is required for BranchFoldingPass", gen_crash_diag: false);
138
139 auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF);
140 MBFIWrapper MBBFreqInfo(MBFI);
141 BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, MBPI,
142 PSI);
143 if (Folder.OptimizeFunction(MF, tii: MF.getSubtarget().getInstrInfo(),
144 tri: MF.getSubtarget().getRegisterInfo()))
145 return getMachineFunctionPassPreservedAnalyses();
146
147 return PreservedAnalyses::all();
148}
149
150bool BranchFolderLegacy::runOnMachineFunction(MachineFunction &MF) {
151 if (skipFunction(F: MF.getFunction()))
152 return false;
153
154 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
155 // TailMerge can create jump into if branches that make CFG irreducible for
156 // HW that requires structurized CFG.
157 bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
158 PassConfig->getEnableTailMerge();
159 MBFIWrapper MBBFreqInfo(
160 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
161 BranchFolder Folder(
162 EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
163 getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(),
164 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
165 return Folder.OptimizeFunction(MF, tii: MF.getSubtarget().getInstrInfo(),
166 tri: MF.getSubtarget().getRegisterInfo());
167}
168
169BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
170 MBFIWrapper &FreqInfo,
171 const MachineBranchProbabilityInfo &ProbInfo,
172 ProfileSummaryInfo *PSI, unsigned MinTailLength)
173 : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
174 MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
175 switch (FlagEnableTailMerge) {
176 case cl::BOU_UNSET:
177 EnableTailMerge = DefaultEnableTailMerge;
178 break;
179 case cl::BOU_TRUE: EnableTailMerge = true; break;
180 case cl::BOU_FALSE: EnableTailMerge = false; break;
181 }
182}
183
184void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
185 assert(MBB->pred_empty() && "MBB must be dead!");
186 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
187
188 MachineFunction *MF = MBB->getParent();
189 // drop all successors.
190 while (!MBB->succ_empty())
191 MBB->removeSuccessor(I: MBB->succ_end()-1);
192
193 // Avoid matching if this pointer gets reused.
194 TriedMerging.erase(Ptr: MBB);
195
196 // Update call info.
197 for (const MachineInstr &MI : *MBB)
198 if (MI.shouldUpdateAdditionalCallInfo())
199 MF->eraseAdditionalCallInfo(MI: &MI);
200
201 // Remove the block.
202 if (MLI)
203 MLI->removeBlock(BB: MBB);
204 MF->erase(MBBI: MBB);
205 EHScopeMembership.erase(Val: MBB);
206}
207
208bool BranchFolder::OptimizeFunction(MachineFunction &MF,
209 const TargetInstrInfo *tii,
210 const TargetRegisterInfo *tri,
211 MachineLoopInfo *mli, bool AfterPlacement) {
212 if (!tii) return false;
213
214 TriedMerging.clear();
215
216 MachineRegisterInfo &MRI = MF.getRegInfo();
217 AfterBlockPlacement = AfterPlacement;
218 TII = tii;
219 TRI = tri;
220 MLI = mli;
221 this->MRI = &MRI;
222
223 if (MinCommonTailLength == 0) {
224 MinCommonTailLength = TailMergeSize.getNumOccurrences() > 0
225 ? TailMergeSize
226 : TII->getTailMergeSize(MF);
227 }
228
229 UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
230 if (!UpdateLiveIns)
231 MRI.invalidateLiveness();
232
233 bool MadeChange = false;
234
235 // Recalculate EH scope membership.
236 EHScopeMembership = getEHScopeMembership(MF);
237
238 bool MadeChangeThisIteration = true;
239 while (MadeChangeThisIteration) {
240 MadeChangeThisIteration = TailMergeBlocks(MF);
241 // No need to clean up if tail merging does not change anything after the
242 // block placement.
243 if (!AfterBlockPlacement || MadeChangeThisIteration)
244 MadeChangeThisIteration |= OptimizeBranches(MF);
245 if (EnableHoistCommonCode)
246 MadeChangeThisIteration |= HoistCommonCode(MF);
247 MadeChange |= MadeChangeThisIteration;
248 }
249
250 // See if any jump tables have become dead as the code generator
251 // did its thing.
252 MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
253 if (!JTI)
254 return MadeChange;
255
256 // Walk the function to find jump tables that are live.
257 BitVector JTIsLive(JTI->getJumpTables().size());
258 for (const MachineBasicBlock &BB : MF) {
259 for (const MachineInstr &I : BB)
260 for (const MachineOperand &Op : I.operands()) {
261 if (!Op.isJTI()) continue;
262
263 // Remember that this JT is live.
264 JTIsLive.set(Op.getIndex());
265 }
266 }
267
268 // Finally, remove dead jump tables. This happens when the
269 // indirect jump was unreachable (and thus deleted).
270 for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
271 if (!JTIsLive.test(Idx: i)) {
272 JTI->RemoveJumpTable(Idx: i);
273 MadeChange = true;
274 }
275
276 return MadeChange;
277}
278
279//===----------------------------------------------------------------------===//
280// Tail Merging of Blocks
281//===----------------------------------------------------------------------===//
282
283/// HashMachineInstr - Compute a hash value for MI and its operands.
284static unsigned HashMachineInstr(const MachineInstr &MI) {
285 unsigned Hash = MI.getOpcode();
286 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
287 const MachineOperand &Op = MI.getOperand(i);
288
289 // Merge in bits from the operand if easy. We can't use MachineOperand's
290 // hash_code here because it's not deterministic and we sort by hash value
291 // later.
292 unsigned OperandHash = 0;
293 switch (Op.getType()) {
294 case MachineOperand::MO_Register:
295 OperandHash = Op.getReg().id();
296 break;
297 case MachineOperand::MO_Immediate:
298 OperandHash = Op.getImm();
299 break;
300 case MachineOperand::MO_MachineBasicBlock:
301 OperandHash = Op.getMBB()->getNumber();
302 break;
303 case MachineOperand::MO_FrameIndex:
304 case MachineOperand::MO_ConstantPoolIndex:
305 case MachineOperand::MO_JumpTableIndex:
306 OperandHash = Op.getIndex();
307 break;
308 case MachineOperand::MO_GlobalAddress:
309 case MachineOperand::MO_ExternalSymbol:
310 // Global address / external symbol are too hard, don't bother, but do
311 // pull in the offset.
312 OperandHash = Op.getOffset();
313 break;
314 default:
315 break;
316 }
317
318 Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);
319 }
320 return Hash;
321}
322
323/// HashEndOfMBB - Hash the last instruction in the MBB.
324static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
325 MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(SkipPseudoOp: false);
326 if (I == MBB.end())
327 return 0;
328
329 return HashMachineInstr(MI: *I);
330}
331
332/// Whether MI should be counted as an instruction when calculating common tail.
333static bool countsAsInstruction(const MachineInstr &MI) {
334 return !(MI.isDebugInstr() || MI.isCFIInstruction());
335}
336
337/// Iterate backwards from the given iterator \p I, towards the beginning of the
338/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
339/// pointing to that MI. If no such MI is found, return the end iterator.
340static MachineBasicBlock::iterator
341skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
342 MachineBasicBlock *MBB) {
343 while (I != MBB->begin()) {
344 --I;
345 if (countsAsInstruction(MI: *I))
346 return I;
347 }
348 return MBB->end();
349}
350
351/// Given two machine basic blocks, return the number of instructions they
352/// actually have in common together at their end. If a common tail is found (at
353/// least by one instruction), then iterators for the first shared instruction
354/// in each block are returned as well.
355///
356/// Non-instructions according to countsAsInstruction are ignored.
357static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
358 MachineBasicBlock *MBB2,
359 MachineBasicBlock::iterator &I1,
360 MachineBasicBlock::iterator &I2) {
361 MachineBasicBlock::iterator MBBI1 = MBB1->end();
362 MachineBasicBlock::iterator MBBI2 = MBB2->end();
363
364 unsigned TailLen = 0;
365 while (true) {
366 MBBI1 = skipBackwardPastNonInstructions(I: MBBI1, MBB: MBB1);
367 MBBI2 = skipBackwardPastNonInstructions(I: MBBI2, MBB: MBB2);
368 if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
369 break;
370 if (!MBBI1->isIdenticalTo(Other: *MBBI2) ||
371 // FIXME: This check is dubious. It's used to get around a problem where
372 // people incorrectly expect inline asm directives to remain in the same
373 // relative order. This is untenable because normal compiler
374 // optimizations (like this one) may reorder and/or merge these
375 // directives.
376 MBBI1->isInlineAsm()) {
377 break;
378 }
379 if (MBBI1->getFlag(Flag: MachineInstr::NoMerge) ||
380 MBBI2->getFlag(Flag: MachineInstr::NoMerge))
381 break;
382 ++TailLen;
383 I1 = MBBI1;
384 I2 = MBBI2;
385 }
386
387 return TailLen;
388}
389
390void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
391 MachineBasicBlock &NewDest) {
392 if (UpdateLiveIns) {
393 // OldInst should always point to an instruction.
394 MachineBasicBlock &OldMBB = *OldInst->getParent();
395 LiveRegs.clear();
396 LiveRegs.addLiveOuts(MBB: OldMBB);
397 // Move backward to the place where will insert the jump.
398 MachineBasicBlock::iterator I = OldMBB.end();
399 do {
400 --I;
401 LiveRegs.stepBackward(MI: *I);
402 } while (I != OldInst);
403
404 // Merging the tails may have switched some undef operand to non-undef ones.
405 // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the
406 // register.
407 for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {
408 // We computed the liveins with computeLiveIn earlier and should only see
409 // full registers:
410 assert(P.LaneMask == LaneBitmask::getAll() &&
411 "Can only handle full register.");
412 MCRegister Reg = P.PhysReg;
413 if (!LiveRegs.available(MRI: *MRI, Reg))
414 continue;
415 DebugLoc DL;
416 BuildMI(BB&: OldMBB, I: OldInst, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: Reg);
417 }
418 }
419
420 TII->ReplaceTailWithBranchTo(Tail: OldInst, NewDest: &NewDest);
421 ++NumTailMerge;
422}
423
424MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
425 MachineBasicBlock::iterator BBI1,
426 const BasicBlock *BB) {
427 if (!TII->isLegalToSplitMBBAt(MBB&: CurMBB, MBBI: BBI1))
428 return nullptr;
429
430 MachineFunction &MF = *CurMBB.getParent();
431
432 // Create the fall-through block.
433 MachineFunction::iterator MBBI = CurMBB.getIterator();
434 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB);
435 CurMBB.getParent()->insert(MBBI: ++MBBI, MBB: NewMBB);
436
437 // Move all the successors of this block to the specified block.
438 NewMBB->transferSuccessors(FromMBB: &CurMBB);
439
440 // Add an edge from CurMBB to NewMBB for the fall-through.
441 CurMBB.addSuccessor(Succ: NewMBB);
442
443 // Splice the code over.
444 NewMBB->splice(Where: NewMBB->end(), Other: &CurMBB, From: BBI1, To: CurMBB.end());
445
446 // NewMBB belongs to the same loop as CurMBB.
447 if (MLI)
448 if (MachineLoop *ML = MLI->getLoopFor(BB: &CurMBB))
449 ML->addBasicBlockToLoop(NewBB: NewMBB, LI&: *MLI);
450
451 // NewMBB inherits CurMBB's block frequency.
452 MBBFreqInfo.setBlockFreq(MBB: NewMBB, F: MBBFreqInfo.getBlockFreq(MBB: &CurMBB));
453
454 if (UpdateLiveIns)
455 computeAndAddLiveIns(LiveRegs, MBB&: *NewMBB);
456
457 // Add the new block to the EH scope.
458 const auto &EHScopeI = EHScopeMembership.find(Val: &CurMBB);
459 if (EHScopeI != EHScopeMembership.end()) {
460 auto n = EHScopeI->second;
461 EHScopeMembership[NewMBB] = n;
462 }
463
464 return NewMBB;
465}
466
467/// EstimateRuntime - Make a rough estimate for how long it will take to run
468/// the specified code.
469static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
470 MachineBasicBlock::iterator E) {
471 unsigned Time = 0;
472 for (; I != E; ++I) {
473 if (!countsAsInstruction(MI: *I))
474 continue;
475 if (I->isCall())
476 Time += 10;
477 else if (I->mayLoadOrStore())
478 Time += 2;
479 else
480 ++Time;
481 }
482 return Time;
483}
484
485// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
486// branches temporarily for tail merging). In the case where CurMBB ends
487// with a conditional branch to the next block, optimize by reversing the
488// test and conditionally branching to SuccMBB instead.
489static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
490 const TargetInstrInfo *TII, const DebugLoc &BranchDL) {
491 MachineFunction *MF = CurMBB->getParent();
492 MachineFunction::iterator I = std::next(x: MachineFunction::iterator(CurMBB));
493 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
494 SmallVector<MachineOperand, 4> Cond;
495 DebugLoc dl = CurMBB->findBranchDebugLoc();
496 if (!dl)
497 dl = BranchDL;
498 if (I != MF->end() && !TII->analyzeBranch(MBB&: *CurMBB, TBB, FBB, Cond, AllowModify: true)) {
499 MachineBasicBlock *NextBB = &*I;
500 if (TBB == NextBB && !Cond.empty() && !FBB) {
501 if (!TII->reverseBranchCondition(Cond)) {
502 TII->removeBranch(MBB&: *CurMBB);
503 TII->insertBranch(MBB&: *CurMBB, TBB: SuccBB, FBB: nullptr, Cond, DL: dl);
504 return;
505 }
506 }
507 }
508 TII->insertBranch(MBB&: *CurMBB, TBB: SuccBB, FBB: nullptr,
509 Cond: SmallVector<MachineOperand, 0>(), DL: dl);
510}
511
512bool
513BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
514 if (getHash() < o.getHash())
515 return true;
516 if (getHash() > o.getHash())
517 return false;
518 if (getBlock()->getNumber() < o.getBlock()->getNumber())
519 return true;
520 if (getBlock()->getNumber() > o.getBlock()->getNumber())
521 return false;
522 return false;
523}
524
525/// CountTerminators - Count the number of terminators in the given
526/// block and set I to the position of the first non-terminator, if there
527/// is one, or MBB->end() otherwise.
528static unsigned CountTerminators(MachineBasicBlock *MBB,
529 MachineBasicBlock::iterator &I) {
530 I = MBB->end();
531 unsigned NumTerms = 0;
532 while (true) {
533 if (I == MBB->begin()) {
534 I = MBB->end();
535 break;
536 }
537 --I;
538 if (!I->isTerminator()) break;
539 ++NumTerms;
540 }
541 return NumTerms;
542}
543
544/// A no successor, non-return block probably ends in unreachable and is cold.
545/// Also consider a block that ends in an indirect branch to be a return block,
546/// since many targets use plain indirect branches to return.
547static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
548 if (!MBB->succ_empty())
549 return false;
550 if (MBB->empty())
551 return true;
552 return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());
553}
554
555/// ProfitableToMerge - Check if two machine basic blocks have a common tail
556/// and decide if it would be profitable to merge those tails. Return the
557/// length of the common tail and iterators to the first common instruction
558/// in each block.
559/// MBB1, MBB2 The blocks to check
560/// MinCommonTailLength Minimum size of tail block to be merged.
561/// CommonTailLen Out parameter to record the size of the shared tail between
562/// MBB1 and MBB2
563/// I1, I2 Iterator references that will be changed to point to the first
564/// instruction in the common tail shared by MBB1,MBB2
565/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form
566/// relative to SuccBB
567/// PredBB The layout predecessor of SuccBB, if any.
568/// EHScopeMembership map from block to EH scope #.
569/// AfterPlacement True if we are merging blocks after layout. Stricter
570/// thresholds apply to prevent undoing tail-duplication.
571static bool
572ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
573 unsigned MinCommonTailLength, unsigned &CommonTailLen,
574 MachineBasicBlock::iterator &I1,
575 MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
576 MachineBasicBlock *PredBB,
577 DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
578 bool AfterPlacement,
579 MBFIWrapper &MBBFreqInfo,
580 ProfileSummaryInfo *PSI) {
581 // It is never profitable to tail-merge blocks from two different EH scopes.
582 if (!EHScopeMembership.empty()) {
583 auto EHScope1 = EHScopeMembership.find(Val: MBB1);
584 assert(EHScope1 != EHScopeMembership.end());
585 auto EHScope2 = EHScopeMembership.find(Val: MBB2);
586 assert(EHScope2 != EHScopeMembership.end());
587 if (EHScope1->second != EHScope2->second)
588 return false;
589 }
590
591 CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
592 if (CommonTailLen == 0)
593 return false;
594 LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
595 << " and " << printMBBReference(*MBB2) << " is "
596 << CommonTailLen << '\n');
597
598 // Move the iterators to the beginning of the MBB if we only got debug
599 // instructions before the tail. This is to avoid splitting a block when we
600 // only got debug instructions before the tail (to be invariant on -g).
601 if (skipDebugInstructionsForward(It: MBB1->begin(), End: MBB1->end(), SkipPseudoOp: false) == I1)
602 I1 = MBB1->begin();
603 if (skipDebugInstructionsForward(It: MBB2->begin(), End: MBB2->end(), SkipPseudoOp: false) == I2)
604 I2 = MBB2->begin();
605
606 bool FullBlockTail1 = I1 == MBB1->begin();
607 bool FullBlockTail2 = I2 == MBB2->begin();
608
609 // It's almost always profitable to merge any number of non-terminator
610 // instructions with the block that falls through into the common successor.
611 // This is true only for a single successor. For multiple successors, we are
612 // trading a conditional branch for an unconditional one.
613 // TODO: Re-visit successor size for non-layout tail merging.
614 if ((MBB1 == PredBB || MBB2 == PredBB) &&
615 (!AfterPlacement || MBB1->succ_size() == 1)) {
616 MachineBasicBlock::iterator I;
617 unsigned NumTerms = CountTerminators(MBB: MBB1 == PredBB ? MBB2 : MBB1, I);
618 if (CommonTailLen > NumTerms)
619 return true;
620 }
621
622 // If these are identical non-return blocks with no successors, merge them.
623 // Such blocks are typically cold calls to noreturn functions like abort, and
624 // are unlikely to become a fallthrough target after machine block placement.
625 // Tail merging these blocks is unlikely to create additional unconditional
626 // branches, and will reduce the size of this cold code.
627 if (FullBlockTail1 && FullBlockTail2 &&
628 blockEndsInUnreachable(MBB: MBB1) && blockEndsInUnreachable(MBB: MBB2))
629 return true;
630
631 // If one of the blocks can be completely merged and happens to be in
632 // a position where the other could fall through into it, merge any number
633 // of instructions, because it can be done without a branch.
634 // TODO: If the blocks are not adjacent, move one of them so that they are?
635 if (MBB1->isLayoutSuccessor(MBB: MBB2) && FullBlockTail2)
636 return true;
637 if (MBB2->isLayoutSuccessor(MBB: MBB1) && FullBlockTail1)
638 return true;
639
640 // If both blocks are identical and end in a branch, merge them unless they
641 // both have a fallthrough predecessor and successor.
642 // We can only do this after block placement because it depends on whether
643 // there are fallthroughs, and we don't know until after layout.
644 if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
645 auto BothFallThrough = [](MachineBasicBlock *MBB) {
646 if (!MBB->succ_empty() && !MBB->canFallThrough())
647 return false;
648 MachineFunction::iterator I(MBB);
649 MachineFunction *MF = MBB->getParent();
650 return (MBB != &*MF->begin()) && std::prev(x: I)->canFallThrough();
651 };
652 if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))
653 return true;
654 }
655
656 // If both blocks have an unconditional branch temporarily stripped out,
657 // count that as an additional common instruction for the following
658 // heuristics. This heuristic is only accurate for single-succ blocks, so to
659 // make sure that during layout merging and duplicating don't crash, we check
660 // for that when merging during layout.
661 unsigned EffectiveTailLen = CommonTailLen;
662 if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
663 (MBB1->succ_size() == 1 || !AfterPlacement) &&
664 !MBB1->back().isBarrier() &&
665 !MBB2->back().isBarrier())
666 ++EffectiveTailLen;
667
668 // Check if the common tail is long enough to be worthwhile.
669 if (EffectiveTailLen >= MinCommonTailLength)
670 return true;
671
672 // If we are optimizing for code size, 2 instructions in common is enough if
673 // we don't have to split a block. At worst we will be introducing 1 new
674 // branch instruction, which is likely to be smaller than the 2
675 // instructions that would be deleted in the merge.
676 bool OptForSize = llvm::shouldOptimizeForSize(MBB: MBB1, PSI, MBFIWrapper: &MBBFreqInfo) &&
677 llvm::shouldOptimizeForSize(MBB: MBB2, PSI, MBFIWrapper: &MBBFreqInfo);
678 return EffectiveTailLen >= 2 && OptForSize &&
679 (FullBlockTail1 || FullBlockTail2);
680}
681
682unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
683 unsigned MinCommonTailLength,
684 MachineBasicBlock *SuccBB,
685 MachineBasicBlock *PredBB) {
686 unsigned maxCommonTailLength = 0U;
687 SameTails.clear();
688 MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
689 MPIterator HighestMPIter = std::prev(x: MergePotentials.end());
690 for (MPIterator CurMPIter = std::prev(x: MergePotentials.end()),
691 B = MergePotentials.begin();
692 CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
693 for (MPIterator I = std::prev(x: CurMPIter); I->getHash() == CurHash; --I) {
694 unsigned CommonTailLen;
695 if (ProfitableToMerge(MBB1: CurMPIter->getBlock(), MBB2: I->getBlock(),
696 MinCommonTailLength,
697 CommonTailLen, I1&: TrialBBI1, I2&: TrialBBI2,
698 SuccBB, PredBB,
699 EHScopeMembership,
700 AfterPlacement: AfterBlockPlacement, MBBFreqInfo, PSI)) {
701 if (CommonTailLen > maxCommonTailLength) {
702 SameTails.clear();
703 maxCommonTailLength = CommonTailLen;
704 HighestMPIter = CurMPIter;
705 SameTails.push_back(x: SameTailElt(CurMPIter, TrialBBI1));
706 }
707 if (HighestMPIter == CurMPIter &&
708 CommonTailLen == maxCommonTailLength)
709 SameTails.push_back(x: SameTailElt(I, TrialBBI2));
710 }
711 if (I == B)
712 break;
713 }
714 }
715 return maxCommonTailLength;
716}
717
718void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
719 MachineBasicBlock *SuccBB,
720 MachineBasicBlock *PredBB,
721 const DebugLoc &BranchDL) {
722 MPIterator CurMPIter, B;
723 for (CurMPIter = std::prev(x: MergePotentials.end()),
724 B = MergePotentials.begin();
725 CurMPIter->getHash() == CurHash; --CurMPIter) {
726 // Put the unconditional branch back, if we need one.
727 MachineBasicBlock *CurMBB = CurMPIter->getBlock();
728 if (SuccBB && CurMBB != PredBB)
729 FixTail(CurMBB, SuccBB, TII, BranchDL);
730 if (CurMPIter == B)
731 break;
732 }
733 if (CurMPIter->getHash() != CurHash)
734 CurMPIter++;
735 MergePotentials.erase(first: CurMPIter, last: MergePotentials.end());
736}
737
738bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
739 MachineBasicBlock *SuccBB,
740 unsigned maxCommonTailLength,
741 unsigned &commonTailIndex) {
742 commonTailIndex = 0;
743 unsigned TimeEstimate = ~0U;
744 for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
745 // Use PredBB if possible; that doesn't require a new branch.
746 if (SameTails[i].getBlock() == PredBB) {
747 commonTailIndex = i;
748 break;
749 }
750 // Otherwise, make a (fairly bogus) choice based on estimate of
751 // how long it will take the various blocks to execute.
752 unsigned t = EstimateRuntime(I: SameTails[i].getBlock()->begin(),
753 E: SameTails[i].getTailStartPos());
754 if (t <= TimeEstimate) {
755 TimeEstimate = t;
756 commonTailIndex = i;
757 }
758 }
759
760 MachineBasicBlock::iterator BBI =
761 SameTails[commonTailIndex].getTailStartPos();
762 MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
763
764 LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
765 << maxCommonTailLength);
766
767 // If the split block unconditionally falls-thru to SuccBB, it will be
768 // merged. In control flow terms it should then take SuccBB's name. e.g. If
769 // SuccBB is an inner loop, the common tail is still part of the inner loop.
770 const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
771 SuccBB->getBasicBlock() : MBB->getBasicBlock();
772 MachineBasicBlock *newMBB = SplitMBBAt(CurMBB&: *MBB, BBI1: BBI, BB);
773 if (!newMBB) {
774 LLVM_DEBUG(dbgs() << "... failed!");
775 return false;
776 }
777
778 SameTails[commonTailIndex].setBlock(newMBB);
779 SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
780
781 // If we split PredBB, newMBB is the new predecessor.
782 if (PredBB == MBB)
783 PredBB = newMBB;
784
785 return true;
786}
787
788static void
789mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
790 MachineBasicBlock &MBBCommon) {
791 MachineBasicBlock *MBB = MBBIStartPos->getParent();
792 // Note CommonTailLen does not necessarily matches the size of
793 // the common BB nor all its instructions because of debug
794 // instructions differences.
795 unsigned CommonTailLen = 0;
796 for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
797 ++CommonTailLen;
798
799 MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
800 MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
801 MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
802 MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
803
804 while (CommonTailLen--) {
805 assert(MBBI != MBBIE && "Reached BB end within common tail length!");
806 (void)MBBIE;
807
808 if (!countsAsInstruction(MI: *MBBI)) {
809 ++MBBI;
810 continue;
811 }
812
813 while ((MBBICommon != MBBIECommon) && !countsAsInstruction(MI: *MBBICommon))
814 ++MBBICommon;
815
816 assert(MBBICommon != MBBIECommon &&
817 "Reached BB end within common tail length!");
818 assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
819
820 // Merge MMOs from memory operations in the common block.
821 if (MBBICommon->mayLoadOrStore())
822 MBBICommon->cloneMergedMemRefs(MF&: *MBB->getParent(), MIs: {&*MBBICommon, &*MBBI});
823 // Drop undef flags if they aren't present in all merged instructions.
824 for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
825 MachineOperand &MO = MBBICommon->getOperand(i: I);
826 if (MO.isReg() && MO.isUndef()) {
827 const MachineOperand &OtherMO = MBBI->getOperand(i: I);
828 if (!OtherMO.isUndef())
829 MO.setIsUndef(false);
830 }
831 }
832
833 ++MBBI;
834 ++MBBICommon;
835 }
836}
837
838void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
839 MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
840
841 std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
842 for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
843 if (i != commonTailIndex) {
844 NextCommonInsts[i] = SameTails[i].getTailStartPos();
845 mergeOperations(MBBIStartPos: SameTails[i].getTailStartPos(), MBBCommon&: *MBB);
846 } else {
847 assert(SameTails[i].getTailStartPos() == MBB->begin() &&
848 "MBB is not a common tail only block");
849 }
850 }
851
852 for (auto &MI : *MBB) {
853 if (!countsAsInstruction(MI))
854 continue;
855 DebugLoc DL = MI.getDebugLoc();
856 for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
857 if (i == commonTailIndex)
858 continue;
859
860 auto &Pos = NextCommonInsts[i];
861 assert(Pos != SameTails[i].getBlock()->end() &&
862 "Reached BB end within common tail");
863 while (!countsAsInstruction(MI: *Pos)) {
864 ++Pos;
865 assert(Pos != SameTails[i].getBlock()->end() &&
866 "Reached BB end within common tail");
867 }
868 assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
869 DL = DebugLoc::getMergedLocation(LocA: DL, LocB: Pos->getDebugLoc());
870 NextCommonInsts[i] = ++Pos;
871 }
872 MI.setDebugLoc(DL);
873 }
874
875 if (UpdateLiveIns) {
876 LivePhysRegs NewLiveIns(*TRI);
877 computeLiveIns(LiveRegs&: NewLiveIns, MBB: *MBB);
878 LiveRegs.init(TRI: *TRI);
879
880 // The flag merging may lead to some register uses no longer using the
881 // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
882 for (MachineBasicBlock *Pred : MBB->predecessors()) {
883 LiveRegs.clear();
884 LiveRegs.addLiveOuts(MBB: *Pred);
885 MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
886 for (Register Reg : NewLiveIns) {
887 if (!LiveRegs.available(MRI: *MRI, Reg))
888 continue;
889
890 // Skip the register if we are about to add one of its super registers.
891 // TODO: Common this up with the same logic in addLineIns().
892 if (any_of(Range: TRI->superregs(Reg), P: [&](MCPhysReg SReg) {
893 return NewLiveIns.contains(Reg: SReg) && !MRI->isReserved(PhysReg: SReg);
894 }))
895 continue;
896
897 DebugLoc DL;
898 BuildMI(BB&: *Pred, I: InsertBefore, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF),
899 DestReg: Reg);
900 }
901 }
902
903 MBB->clearLiveIns();
904 addLiveIns(MBB&: *MBB, LiveRegs: NewLiveIns);
905 }
906}
907
908// See if any of the blocks in MergePotentials (which all have SuccBB as a
909// successor, or all have no successor if it is null) can be tail-merged.
910// If there is a successor, any blocks in MergePotentials that are not
911// tail-merged and are not immediately before Succ must have an unconditional
912// branch to Succ added (but the predecessor/successor lists need no
913// adjustment). The lone predecessor of Succ that falls through into Succ,
914// if any, is given in PredBB.
915// MinCommonTailLength - Except for the special cases below, tail-merge if
916// there are at least this many instructions in common.
917bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
918 MachineBasicBlock *PredBB,
919 unsigned MinCommonTailLength) {
920 bool MadeChange = false;
921
922 LLVM_DEBUG({
923 dbgs() << "\nTryTailMergeBlocks: ";
924 for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
925 dbgs() << printMBBReference(*MergePotentials[i].getBlock())
926 << (i == e - 1 ? "" : ", ");
927 dbgs() << "\n";
928 if (SuccBB) {
929 dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';
930 if (PredBB)
931 dbgs() << " which has fall-through from " << printMBBReference(*PredBB)
932 << "\n";
933 }
934 dbgs() << "Looking for common tails of at least " << MinCommonTailLength
935 << " instruction" << (MinCommonTailLength == 1 ? "" : "s") << '\n';
936 });
937
938 // Sort by hash value so that blocks with identical end sequences sort
939 // together.
940#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
941 // If origin-tracking is enabled then MergePotentialElt is no longer a POD
942 // type, so we need std::sort instead.
943 std::sort(MergePotentials.begin(), MergePotentials.end());
944#else
945 array_pod_sort(Start: MergePotentials.begin(), End: MergePotentials.end());
946#endif
947
948 // Walk through equivalence sets looking for actual exact matches.
949 while (MergePotentials.size() > 1) {
950 unsigned CurHash = MergePotentials.back().getHash();
951 const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc();
952
953 // Build SameTails, identifying the set of blocks with this hash code
954 // and with the maximum number of instructions in common.
955 unsigned maxCommonTailLength = ComputeSameTails(CurHash,
956 MinCommonTailLength,
957 SuccBB, PredBB);
958
959 // If we didn't find any pair that has at least MinCommonTailLength
960 // instructions in common, remove all blocks with this hash code and retry.
961 if (SameTails.empty()) {
962 RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);
963 continue;
964 }
965
966 // If one of the blocks is the entire common tail (and is not the entry
967 // block/an EH pad, which we can't jump to), we can treat all blocks with
968 // this same tail at once. Use PredBB if that is one of the possibilities,
969 // as that will not introduce any extra branches.
970 MachineBasicBlock *EntryBB =
971 &MergePotentials.front().getBlock()->getParent()->front();
972 unsigned commonTailIndex = SameTails.size();
973 // If there are two blocks, check to see if one can be made to fall through
974 // into the other.
975 if (SameTails.size() == 2 &&
976 SameTails[0].getBlock()->isLayoutSuccessor(MBB: SameTails[1].getBlock()) &&
977 SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad())
978 commonTailIndex = 1;
979 else if (SameTails.size() == 2 &&
980 SameTails[1].getBlock()->isLayoutSuccessor(
981 MBB: SameTails[0].getBlock()) &&
982 SameTails[0].tailIsWholeBlock() &&
983 !SameTails[0].getBlock()->isEHPad())
984 commonTailIndex = 0;
985 else {
986 // Otherwise just pick one, favoring the fall-through predecessor if
987 // there is one.
988 for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
989 MachineBasicBlock *MBB = SameTails[i].getBlock();
990 if ((MBB == EntryBB || MBB->isEHPad()) &&
991 SameTails[i].tailIsWholeBlock())
992 continue;
993 if (MBB == PredBB) {
994 commonTailIndex = i;
995 break;
996 }
997 if (SameTails[i].tailIsWholeBlock())
998 commonTailIndex = i;
999 }
1000 }
1001
1002 if (commonTailIndex == SameTails.size() ||
1003 (SameTails[commonTailIndex].getBlock() == PredBB &&
1004 !SameTails[commonTailIndex].tailIsWholeBlock())) {
1005 // None of the blocks consist entirely of the common tail.
1006 // Split a block so that one does.
1007 if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
1008 maxCommonTailLength, commonTailIndex)) {
1009 RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);
1010 continue;
1011 }
1012 }
1013
1014 MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
1015
1016 // Recompute common tail MBB's edge weights and block frequency.
1017 setCommonTailEdgeWeights(*MBB);
1018
1019 // Merge debug locations, MMOs and undef flags across identical instructions
1020 // for common tail.
1021 mergeCommonTails(commonTailIndex);
1022
1023 // MBB is common tail. Adjust all other BB's to jump to this one.
1024 // Traversal must be forwards so erases work.
1025 LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
1026 << " for ");
1027 for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
1028 if (commonTailIndex == i)
1029 continue;
1030 LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
1031 << (i == e - 1 ? "" : ", "));
1032 // Hack the end off BB i, making it jump to BB commonTailIndex instead.
1033 replaceTailWithBranchTo(OldInst: SameTails[i].getTailStartPos(), NewDest&: *MBB);
1034 // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
1035 MergePotentials.erase(position: SameTails[i].getMPIter());
1036 }
1037 LLVM_DEBUG(dbgs() << "\n");
1038 // We leave commonTailIndex in the worklist in case there are other blocks
1039 // that match it with a smaller number of instructions.
1040 MadeChange = true;
1041 }
1042 return MadeChange;
1043}
1044
1045bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
1046 bool MadeChange = false;
1047 if (!EnableTailMerge)
1048 return MadeChange;
1049
1050 // First find blocks with no successors.
1051 // Block placement may create new tail merging opportunities for these blocks.
1052 MergePotentials.clear();
1053 for (MachineBasicBlock &MBB : MF) {
1054 if (MergePotentials.size() == TailMergeThreshold)
1055 break;
1056 if (!TriedMerging.count(Ptr: &MBB) && MBB.succ_empty())
1057 MergePotentials.push_back(x: MergePotentialsElt(HashEndOfMBB(MBB), &MBB,
1058 MBB.findBranchDebugLoc()));
1059 }
1060
1061 // If this is a large problem, avoid visiting the same basic blocks
1062 // multiple times.
1063 if (MergePotentials.size() == TailMergeThreshold)
1064 for (const MergePotentialsElt &Elt : MergePotentials)
1065 TriedMerging.insert(Ptr: Elt.getBlock());
1066
1067 // See if we can do any tail merging on those.
1068 if (MergePotentials.size() >= 2)
1069 MadeChange |= TryTailMergeBlocks(SuccBB: nullptr, PredBB: nullptr, MinCommonTailLength);
1070
1071 // Look at blocks (IBB) with multiple predecessors (PBB).
1072 // We change each predecessor to a canonical form, by
1073 // (1) temporarily removing any unconditional branch from the predecessor
1074 // to IBB, and
1075 // (2) alter conditional branches so they branch to the other block
1076 // not IBB; this may require adding back an unconditional branch to IBB
1077 // later, where there wasn't one coming in. E.g.
1078 // Bcc IBB
1079 // fallthrough to QBB
1080 // here becomes
1081 // Bncc QBB
1082 // with a conceptual B to IBB after that, which never actually exists.
1083 // With those changes, we see whether the predecessors' tails match,
1084 // and merge them if so. We change things out of canonical form and
1085 // back to the way they were later in the process. (OptimizeBranches
1086 // would undo some of this, but we can't use it, because we'd get into
1087 // a compile-time infinite loop repeatedly doing and undoing the same
1088 // transformations.)
1089
1090 for (MachineFunction::iterator I = std::next(x: MF.begin()), E = MF.end();
1091 I != E; ++I) {
1092 if (I->pred_size() < 2) continue;
1093 SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
1094 MachineBasicBlock *IBB = &*I;
1095 MachineBasicBlock *PredBB = &*std::prev(x: I);
1096 MergePotentials.clear();
1097 MachineLoop *ML;
1098
1099 // Bail if merging after placement and IBB is the loop header because
1100 // -- If merging predecessors that belong to the same loop as IBB, the
1101 // common tail of merged predecessors may become the loop top if block
1102 // placement is called again and the predecessors may branch to this common
1103 // tail and require more branches. This can be relaxed if
1104 // MachineBlockPlacement::findBestLoopTop is more flexible.
1105 // --If merging predecessors that do not belong to the same loop as IBB, the
1106 // loop info of IBB's loop and the other loops may be affected. Calling the
1107 // block placement again may make big change to the layout and eliminate the
1108 // reason to do tail merging here.
1109 if (AfterBlockPlacement && MLI) {
1110 ML = MLI->getLoopFor(BB: IBB);
1111 if (ML && IBB == ML->getHeader())
1112 continue;
1113 }
1114
1115 for (MachineBasicBlock *PBB : I->predecessors()) {
1116 if (MergePotentials.size() == TailMergeThreshold)
1117 break;
1118
1119 if (TriedMerging.count(Ptr: PBB))
1120 continue;
1121
1122 // Skip blocks that loop to themselves, can't tail merge these.
1123 if (PBB == IBB)
1124 continue;
1125
1126 // Visit each predecessor only once.
1127 if (!UniquePreds.insert(Ptr: PBB).second)
1128 continue;
1129
1130 // Skip blocks which may jump to a landing pad or jump from an asm blob.
1131 // Can't tail merge these.
1132 if (PBB->hasEHPadSuccessor() || PBB->mayHaveInlineAsmBr())
1133 continue;
1134
1135 // After block placement, only consider predecessors that belong to the
1136 // same loop as IBB. The reason is the same as above when skipping loop
1137 // header.
1138 if (AfterBlockPlacement && MLI)
1139 if (ML != MLI->getLoopFor(BB: PBB))
1140 continue;
1141
1142 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1143 SmallVector<MachineOperand, 4> Cond;
1144 if (!TII->analyzeBranch(MBB&: *PBB, TBB, FBB, Cond, AllowModify: true)) {
1145 // Failing case: IBB is the target of a cbr, and we cannot reverse the
1146 // branch.
1147 SmallVector<MachineOperand, 4> NewCond(Cond);
1148 if (!Cond.empty() && TBB == IBB) {
1149 if (TII->reverseBranchCondition(Cond&: NewCond))
1150 continue;
1151 // This is the QBB case described above
1152 if (!FBB) {
1153 auto Next = ++PBB->getIterator();
1154 if (Next != MF.end())
1155 FBB = &*Next;
1156 }
1157 }
1158
1159 // Remove the unconditional branch at the end, if any.
1160 DebugLoc dl = PBB->findBranchDebugLoc();
1161 if (TBB && (Cond.empty() || FBB)) {
1162 TII->removeBranch(MBB&: *PBB);
1163 if (!Cond.empty())
1164 // reinsert conditional branch only, for now
1165 TII->insertBranch(MBB&: *PBB, TBB: (TBB == IBB) ? FBB : TBB, FBB: nullptr,
1166 Cond: NewCond, DL: dl);
1167 }
1168
1169 MergePotentials.push_back(
1170 x: MergePotentialsElt(HashEndOfMBB(MBB: *PBB), PBB, dl));
1171 }
1172 }
1173
1174 // If this is a large problem, avoid visiting the same basic blocks multiple
1175 // times.
1176 if (MergePotentials.size() == TailMergeThreshold)
1177 for (MergePotentialsElt &Elt : MergePotentials)
1178 TriedMerging.insert(Ptr: Elt.getBlock());
1179
1180 if (MergePotentials.size() >= 2)
1181 MadeChange |= TryTailMergeBlocks(SuccBB: IBB, PredBB, MinCommonTailLength);
1182
1183 // Reinsert an unconditional branch if needed. The 1 below can occur as a
1184 // result of removing blocks in TryTailMergeBlocks.
1185 PredBB = &*std::prev(x: I); // this may have been changed in TryTailMergeBlocks
1186 if (MergePotentials.size() == 1 &&
1187 MergePotentials.begin()->getBlock() != PredBB)
1188 FixTail(CurMBB: MergePotentials.begin()->getBlock(), SuccBB: IBB, TII,
1189 BranchDL: MergePotentials.begin()->getBranchDebugLoc());
1190 }
1191
1192 return MadeChange;
1193}
1194
1195void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
1196 SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
1197 BlockFrequency AccumulatedMBBFreq;
1198
1199 // Aggregate edge frequency of successor edge j:
1200 // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
1201 // where bb is a basic block that is in SameTails.
1202 for (const auto &Src : SameTails) {
1203 const MachineBasicBlock *SrcMBB = Src.getBlock();
1204 BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(MBB: SrcMBB);
1205 AccumulatedMBBFreq += BlockFreq;
1206
1207 // It is not necessary to recompute edge weights if TailBB has less than two
1208 // successors.
1209 if (TailMBB.succ_size() <= 1)
1210 continue;
1211
1212 auto EdgeFreq = EdgeFreqLs.begin();
1213
1214 for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1215 SuccI != SuccE; ++SuccI, ++EdgeFreq)
1216 *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(Src: SrcMBB, Dst: *SuccI);
1217 }
1218
1219 MBBFreqInfo.setBlockFreq(MBB: &TailMBB, F: AccumulatedMBBFreq);
1220
1221 if (TailMBB.succ_size() <= 1)
1222 return;
1223
1224 auto SumEdgeFreq =
1225 std::accumulate(first: EdgeFreqLs.begin(), last: EdgeFreqLs.end(), init: BlockFrequency(0))
1226 .getFrequency();
1227 auto EdgeFreq = EdgeFreqLs.begin();
1228
1229 if (SumEdgeFreq > 0) {
1230 for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1231 SuccI != SuccE; ++SuccI, ++EdgeFreq) {
1232 auto Prob = BranchProbability::getBranchProbability(
1233 Numerator: EdgeFreq->getFrequency(), Denominator: SumEdgeFreq);
1234 TailMBB.setSuccProbability(I: SuccI, Prob);
1235 }
1236 }
1237}
1238
1239//===----------------------------------------------------------------------===//
1240// Branch Optimization
1241//===----------------------------------------------------------------------===//
1242
1243bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
1244 bool MadeChange = false;
1245
1246 // Make sure blocks are numbered in order
1247 MF.RenumberBlocks();
1248 // Renumbering blocks alters EH scope membership, recalculate it.
1249 EHScopeMembership = getEHScopeMembership(MF);
1250
1251 for (MachineBasicBlock &MBB :
1252 llvm::make_early_inc_range(Range: llvm::drop_begin(RangeOrContainer&: MF))) {
1253 MadeChange |= OptimizeBlock(MBB: &MBB);
1254
1255 // If it is dead, remove it.
1256 if (MBB.pred_empty() && !MBB.isMachineBlockAddressTaken() &&
1257 !MBB.isEHPad()) {
1258 RemoveDeadBlock(MBB: &MBB);
1259 MadeChange = true;
1260 ++NumDeadBlocks;
1261 }
1262 }
1263
1264 return MadeChange;
1265}
1266
1267// Blocks should be considered empty if they contain only debug info;
1268// else the debug info would affect codegen.
1269static bool IsEmptyBlock(MachineBasicBlock *MBB) {
1270 return MBB->getFirstNonDebugInstr(SkipPseudoOp: true) == MBB->end();
1271}
1272
1273// Blocks with only debug info and branches should be considered the same
1274// as blocks with only branches.
1275static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
1276 MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr();
1277 assert(I != MBB->end() && "empty block!");
1278 return I->isBranch();
1279}
1280
1281/// IsBetterFallthrough - Return true if it would be clearly better to
1282/// fall-through to MBB1 than to fall through into MBB2. This has to return
1283/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
1284/// result in infinite loops.
1285static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
1286 MachineBasicBlock *MBB2) {
1287 assert(MBB1 && MBB2 && "Unknown MachineBasicBlock");
1288
1289 // Right now, we use a simple heuristic. If MBB2 ends with a call, and
1290 // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
1291 // optimize branches that branch to either a return block or an assert block
1292 // into a fallthrough to the return.
1293 MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr();
1294 MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr();
1295 if (MBB1I == MBB1->end() || MBB2I == MBB2->end())
1296 return false;
1297
1298 // If there is a clear successor ordering we make sure that one block
1299 // will fall through to the next
1300 if (MBB1->isSuccessor(MBB: MBB2)) return true;
1301 if (MBB2->isSuccessor(MBB: MBB1)) return false;
1302
1303 return MBB2I->isCall() && !MBB1I->isCall();
1304}
1305
1306static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
1307 MachineBasicBlock &MBB,
1308 MachineBasicBlock &PredMBB) {
1309 auto InsertBefore = PredMBB.getFirstTerminator();
1310 for (MachineInstr &MI : MBB.instrs())
1311 if (MI.isDebugInstr()) {
1312 TII->duplicate(MBB&: PredMBB, InsertBefore, Orig: MI);
1313 LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "
1314 << MI);
1315 }
1316}
1317
1318static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
1319 MachineBasicBlock &MBB,
1320 MachineBasicBlock &SuccMBB) {
1321 auto InsertBefore = SuccMBB.SkipPHIsAndLabels(I: SuccMBB.begin());
1322 for (MachineInstr &MI : MBB.instrs())
1323 if (MI.isDebugInstr()) {
1324 TII->duplicate(MBB&: SuccMBB, InsertBefore, Orig: MI);
1325 LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "
1326 << MI);
1327 }
1328}
1329
1330// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such
1331// a basic block is removed we would lose the debug information unless we have
1332// copied the information to a predecessor/successor.
1333//
1334// TODO: This function only handles some simple cases. An alternative would be
1335// to run a heavier analysis, such as the LiveDebugValues pass, before we do
1336// branch folding.
1337static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII,
1338 MachineBasicBlock &MBB) {
1339 assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info).");
1340 // If this MBB is the only predecessor of a successor it is legal to copy
1341 // DBG_VALUE instructions to the beginning of the successor.
1342 for (MachineBasicBlock *SuccBB : MBB.successors())
1343 if (SuccBB->pred_size() == 1)
1344 copyDebugInfoToSuccessor(TII, MBB, SuccMBB&: *SuccBB);
1345 // If this MBB is the only successor of a predecessor it is legal to copy the
1346 // DBG_VALUE instructions to the end of the predecessor (just before the
1347 // terminators, assuming that the terminator isn't affecting the DBG_VALUE).
1348 for (MachineBasicBlock *PredBB : MBB.predecessors())
1349 if (PredBB->succ_size() == 1)
1350 copyDebugInfoToPredecessor(TII, MBB, PredMBB&: *PredBB);
1351}
1352
1353bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
1354 bool MadeChange = false;
1355 MachineFunction &MF = *MBB->getParent();
1356ReoptimizeBlock:
1357
1358 MachineFunction::iterator FallThrough = MBB->getIterator();
1359 ++FallThrough;
1360
1361 // Make sure MBB and FallThrough belong to the same EH scope.
1362 bool SameEHScope = true;
1363 if (!EHScopeMembership.empty() && FallThrough != MF.end()) {
1364 auto MBBEHScope = EHScopeMembership.find(Val: MBB);
1365 assert(MBBEHScope != EHScopeMembership.end());
1366 auto FallThroughEHScope = EHScopeMembership.find(Val: &*FallThrough);
1367 assert(FallThroughEHScope != EHScopeMembership.end());
1368 SameEHScope = MBBEHScope->second == FallThroughEHScope->second;
1369 }
1370
1371 // Analyze the branch in the current block. As a side-effect, this may cause
1372 // the block to become empty.
1373 MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
1374 SmallVector<MachineOperand, 4> CurCond;
1375 bool CurUnAnalyzable =
1376 TII->analyzeBranch(MBB&: *MBB, TBB&: CurTBB, FBB&: CurFBB, Cond&: CurCond, AllowModify: true);
1377
1378 // If this block is empty, make everyone use its fall-through, not the block
1379 // explicitly. Landing pads should not do this since the landing-pad table
1380 // points to this block. Blocks with their addresses taken shouldn't be
1381 // optimized away.
1382 if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
1383 SameEHScope) {
1384 salvageDebugInfoFromEmptyBlock(TII, MBB&: *MBB);
1385 // Dead block? Leave for cleanup later.
1386 if (MBB->pred_empty()) return MadeChange;
1387
1388 if (FallThrough == MF.end()) {
1389 // TODO: Simplify preds to not branch here if possible!
1390 } else if (FallThrough->isEHPad()) {
1391 // Don't rewrite to a landing pad fallthough. That could lead to the case
1392 // where a BB jumps to more than one landing pad.
1393 // TODO: Is it ever worth rewriting predecessors which don't already
1394 // jump to a landing pad, and so can safely jump to the fallthrough?
1395 } else if (MBB->isSuccessor(MBB: &*FallThrough)) {
1396 // Rewrite all predecessors of the old block to go to the fallthrough
1397 // instead.
1398 while (!MBB->pred_empty()) {
1399 MachineBasicBlock *Pred = *(MBB->pred_end()-1);
1400 Pred->ReplaceUsesOfBlockWith(Old: MBB, New: &*FallThrough);
1401 }
1402 // Add rest successors of MBB to successors of FallThrough. Those
1403 // successors are not directly reachable via MBB, so it should be
1404 // landing-pad.
1405 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
1406 if (*SI != &*FallThrough && !FallThrough->isSuccessor(MBB: *SI)) {
1407 assert((*SI)->isEHPad() && "Bad CFG");
1408 FallThrough->copySuccessor(Orig: MBB, I: SI);
1409 }
1410 // If MBB was the target of a jump table, update jump tables to go to the
1411 // fallthrough instead.
1412 if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1413 MJTI->ReplaceMBBInJumpTables(Old: MBB, New: &*FallThrough);
1414 MadeChange = true;
1415 }
1416 return MadeChange;
1417 }
1418
1419 // Check to see if we can simplify the terminator of the block before this
1420 // one.
1421 MachineBasicBlock &PrevBB = *std::prev(x: MachineFunction::iterator(MBB));
1422
1423 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
1424 SmallVector<MachineOperand, 4> PriorCond;
1425 bool PriorUnAnalyzable =
1426 TII->analyzeBranch(MBB&: PrevBB, TBB&: PriorTBB, FBB&: PriorFBB, Cond&: PriorCond, AllowModify: true);
1427 if (!PriorUnAnalyzable) {
1428 // If the previous branch is conditional and both conditions go to the same
1429 // destination, remove the branch, replacing it with an unconditional one or
1430 // a fall-through.
1431 if (PriorTBB && PriorTBB == PriorFBB) {
1432 DebugLoc Dl = PrevBB.findBranchDebugLoc();
1433 TII->removeBranch(MBB&: PrevBB);
1434 PriorCond.clear();
1435 if (PriorTBB != MBB)
1436 TII->insertBranch(MBB&: PrevBB, TBB: PriorTBB, FBB: nullptr, Cond: PriorCond, DL: Dl);
1437 MadeChange = true;
1438 ++NumBranchOpts;
1439 goto ReoptimizeBlock;
1440 }
1441
1442 // If the previous block unconditionally falls through to this block and
1443 // this block has no other predecessors, move the contents of this block
1444 // into the prior block. This doesn't usually happen when SimplifyCFG
1445 // has been used, but it can happen if tail merging splits a fall-through
1446 // predecessor of a block.
1447 // This has to check PrevBB->succ_size() because EH edges are ignored by
1448 // analyzeBranch.
1449 if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
1450 PrevBB.succ_size() == 1 && PrevBB.isSuccessor(MBB) &&
1451 !MBB->hasAddressTaken() && !MBB->isEHPad()) {
1452 LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB
1453 << "From MBB: " << *MBB);
1454 // Remove redundant DBG_VALUEs first.
1455 if (!PrevBB.empty()) {
1456 MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
1457 --PrevBBIter;
1458 MachineBasicBlock::iterator MBBIter = MBB->begin();
1459 // Check if DBG_VALUE at the end of PrevBB is identical to the
1460 // DBG_VALUE at the beginning of MBB.
1461 while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
1462 && PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) {
1463 if (!MBBIter->isIdenticalTo(Other: *PrevBBIter))
1464 break;
1465 MachineInstr &DuplicateDbg = *MBBIter;
1466 ++MBBIter; -- PrevBBIter;
1467 DuplicateDbg.eraseFromParent();
1468 }
1469 }
1470 PrevBB.splice(Where: PrevBB.end(), Other: MBB, From: MBB->begin(), To: MBB->end());
1471 PrevBB.removeSuccessor(I: PrevBB.succ_begin());
1472 assert(PrevBB.succ_empty());
1473 PrevBB.transferSuccessors(FromMBB: MBB);
1474 MadeChange = true;
1475 return MadeChange;
1476 }
1477
1478 // If the previous branch *only* branches to *this* block (conditional or
1479 // not) remove the branch.
1480 if (PriorTBB == MBB && !PriorFBB) {
1481 TII->removeBranch(MBB&: PrevBB);
1482 MadeChange = true;
1483 ++NumBranchOpts;
1484 goto ReoptimizeBlock;
1485 }
1486
1487 // If the prior block branches somewhere else on the condition and here if
1488 // the condition is false, remove the uncond second branch.
1489 if (PriorFBB == MBB) {
1490 DebugLoc Dl = PrevBB.findBranchDebugLoc();
1491 TII->removeBranch(MBB&: PrevBB);
1492 TII->insertBranch(MBB&: PrevBB, TBB: PriorTBB, FBB: nullptr, Cond: PriorCond, DL: Dl);
1493 MadeChange = true;
1494 ++NumBranchOpts;
1495 goto ReoptimizeBlock;
1496 }
1497
1498 // If the prior block branches here on true and somewhere else on false, and
1499 // if the branch condition is reversible, reverse the branch to create a
1500 // fall-through.
1501 if (PriorTBB == MBB) {
1502 SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1503 if (!TII->reverseBranchCondition(Cond&: NewPriorCond)) {
1504 DebugLoc Dl = PrevBB.findBranchDebugLoc();
1505 TII->removeBranch(MBB&: PrevBB);
1506 TII->insertBranch(MBB&: PrevBB, TBB: PriorFBB, FBB: nullptr, Cond: NewPriorCond, DL: Dl);
1507 MadeChange = true;
1508 ++NumBranchOpts;
1509 goto ReoptimizeBlock;
1510 }
1511 }
1512
1513 // If this block has no successors (e.g. it is a return block or ends with
1514 // a call to a no-return function like abort or __cxa_throw) and if the pred
1515 // falls through into this block, and if it would otherwise fall through
1516 // into the block after this, move this block to the end of the function.
1517 //
1518 // We consider it more likely that execution will stay in the function (e.g.
1519 // due to loops) than it is to exit it. This asserts in loops etc, moving
1520 // the assert condition out of the loop body.
1521 if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
1522 MachineFunction::iterator(PriorTBB) == FallThrough &&
1523 !MBB->canFallThrough()) {
1524 bool DoTransform = true;
1525
1526 // We have to be careful that the succs of PredBB aren't both no-successor
1527 // blocks. If neither have successors and if PredBB is the second from
1528 // last block in the function, we'd just keep swapping the two blocks for
1529 // last. Only do the swap if one is clearly better to fall through than
1530 // the other.
1531 if (FallThrough == --MF.end() &&
1532 !IsBetterFallthrough(MBB1: PriorTBB, MBB2: MBB))
1533 DoTransform = false;
1534
1535 if (DoTransform) {
1536 // Reverse the branch so we will fall through on the previous true cond.
1537 SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1538 if (!TII->reverseBranchCondition(Cond&: NewPriorCond)) {
1539 LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB
1540 << "To make fallthrough to: " << *PriorTBB << "\n");
1541
1542 DebugLoc Dl = PrevBB.findBranchDebugLoc();
1543 TII->removeBranch(MBB&: PrevBB);
1544 TII->insertBranch(MBB&: PrevBB, TBB: MBB, FBB: nullptr, Cond: NewPriorCond, DL: Dl);
1545
1546 // Move this block to the end of the function.
1547 MBB->moveAfter(NewBefore: &MF.back());
1548 MadeChange = true;
1549 ++NumBranchOpts;
1550 return MadeChange;
1551 }
1552 }
1553 }
1554 }
1555
1556 if (!IsEmptyBlock(MBB)) {
1557 MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
1558 if (TII->isUnconditionalTailCall(MI: TailCall)) {
1559 SmallVector<MachineBasicBlock *> PredsChanged;
1560 for (auto &Pred : MBB->predecessors()) {
1561 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
1562 SmallVector<MachineOperand, 4> PredCond;
1563 bool PredAnalyzable =
1564 !TII->analyzeBranch(MBB&: *Pred, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond, AllowModify: true);
1565
1566 // Only eliminate if MBB == TBB (Taken Basic Block)
1567 if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
1568 PredTBB != PredFBB) {
1569 // The predecessor has a conditional branch to this block which
1570 // consists of only a tail call. Try to fold the tail call into the
1571 // conditional branch.
1572 if (TII->canMakeTailCallConditional(Cond&: PredCond, TailCall)) {
1573 // TODO: It would be nice if analyzeBranch() could provide a pointer
1574 // to the branch instruction so replaceBranchWithTailCall() doesn't
1575 // have to search for it.
1576 TII->replaceBranchWithTailCall(MBB&: *Pred, Cond&: PredCond, TailCall);
1577 PredsChanged.push_back(Elt: Pred);
1578 }
1579 }
1580 // If the predecessor is falling through to this block, we could reverse
1581 // the branch condition and fold the tail call into that. However, after
1582 // that we might have to re-arrange the CFG to fall through to the other
1583 // block and there is a high risk of regressing code size rather than
1584 // improving it.
1585 }
1586 if (!PredsChanged.empty()) {
1587 NumTailCalls += PredsChanged.size();
1588 for (auto &Pred : PredsChanged)
1589 Pred->removeSuccessor(Succ: MBB);
1590
1591 return true;
1592 }
1593 }
1594 }
1595
1596 if (!CurUnAnalyzable) {
1597 // If this is a two-way branch, and the FBB branches to this block, reverse
1598 // the condition so the single-basic-block loop is faster. Instead of:
1599 // Loop: xxx; jcc Out; jmp Loop
1600 // we want:
1601 // Loop: xxx; jncc Loop; jmp Out
1602 if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
1603 SmallVector<MachineOperand, 4> NewCond(CurCond);
1604 if (!TII->reverseBranchCondition(Cond&: NewCond)) {
1605 DebugLoc Dl = MBB->findBranchDebugLoc();
1606 TII->removeBranch(MBB&: *MBB);
1607 TII->insertBranch(MBB&: *MBB, TBB: CurFBB, FBB: CurTBB, Cond: NewCond, DL: Dl);
1608 MadeChange = true;
1609 ++NumBranchOpts;
1610 goto ReoptimizeBlock;
1611 }
1612 }
1613
1614 // If this branch is the only thing in its block, see if we can forward
1615 // other blocks across it.
1616 if (CurTBB && CurCond.empty() && !CurFBB &&
1617 IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
1618 !MBB->hasAddressTaken() && !MBB->isEHPad()) {
1619 DebugLoc Dl = MBB->findBranchDebugLoc();
1620 // This block may contain just an unconditional branch. Because there can
1621 // be 'non-branch terminators' in the block, try removing the branch and
1622 // then seeing if the block is empty.
1623 TII->removeBranch(MBB&: *MBB);
1624 // If the only things remaining in the block are debug info, remove these
1625 // as well, so this will behave the same as an empty block in non-debug
1626 // mode.
1627 if (IsEmptyBlock(MBB)) {
1628 // Make the block empty, losing the debug info (we could probably
1629 // improve this in some cases.)
1630 MBB->erase(I: MBB->begin(), E: MBB->end());
1631 }
1632 // If this block is just an unconditional branch to CurTBB, we can
1633 // usually completely eliminate the block. The only case we cannot
1634 // completely eliminate the block is when the block before this one
1635 // falls through into MBB and we can't understand the prior block's branch
1636 // condition.
1637 if (MBB->empty()) {
1638 bool PredHasNoFallThrough = !PrevBB.canFallThrough();
1639 if (PredHasNoFallThrough || !PriorUnAnalyzable ||
1640 !PrevBB.isSuccessor(MBB)) {
1641 // If the prior block falls through into us, turn it into an
1642 // explicit branch to us to make updates simpler.
1643 if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
1644 PriorTBB != MBB && PriorFBB != MBB) {
1645 if (!PriorTBB) {
1646 assert(PriorCond.empty() && !PriorFBB &&
1647 "Bad branch analysis");
1648 PriorTBB = MBB;
1649 } else {
1650 assert(!PriorFBB && "Machine CFG out of date!");
1651 PriorFBB = MBB;
1652 }
1653 DebugLoc PrevDl = PrevBB.findBranchDebugLoc();
1654 TII->removeBranch(MBB&: PrevBB);
1655 TII->insertBranch(MBB&: PrevBB, TBB: PriorTBB, FBB: PriorFBB, Cond: PriorCond, DL: PrevDl);
1656 }
1657
1658 // Iterate through all the predecessors, revectoring each in-turn.
1659 size_t PI = 0;
1660 bool DidChange = false;
1661 bool HasBranchToSelf = false;
1662 while(PI != MBB->pred_size()) {
1663 MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
1664 if (PMBB == MBB) {
1665 // If this block has an uncond branch to itself, leave it.
1666 ++PI;
1667 HasBranchToSelf = true;
1668 } else {
1669 DidChange = true;
1670 PMBB->ReplaceUsesOfBlockWith(Old: MBB, New: CurTBB);
1671 // Add rest successors of MBB to successors of CurTBB. Those
1672 // successors are not directly reachable via MBB, so it should be
1673 // landing-pad.
1674 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE;
1675 ++SI)
1676 if (*SI != CurTBB && !CurTBB->isSuccessor(MBB: *SI)) {
1677 assert((*SI)->isEHPad() && "Bad CFG");
1678 CurTBB->copySuccessor(Orig: MBB, I: SI);
1679 }
1680 // If this change resulted in PMBB ending in a conditional
1681 // branch where both conditions go to the same destination,
1682 // change this to an unconditional branch.
1683 MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
1684 SmallVector<MachineOperand, 4> NewCurCond;
1685 bool NewCurUnAnalyzable = TII->analyzeBranch(
1686 MBB&: *PMBB, TBB&: NewCurTBB, FBB&: NewCurFBB, Cond&: NewCurCond, AllowModify: true);
1687 if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
1688 DebugLoc PrevDl = PMBB->findBranchDebugLoc();
1689 TII->removeBranch(MBB&: *PMBB);
1690 NewCurCond.clear();
1691 TII->insertBranch(MBB&: *PMBB, TBB: NewCurTBB, FBB: nullptr, Cond: NewCurCond,
1692 DL: PrevDl);
1693 MadeChange = true;
1694 ++NumBranchOpts;
1695 }
1696 }
1697 }
1698
1699 // Change any jumptables to go to the new MBB.
1700 if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1701 MJTI->ReplaceMBBInJumpTables(Old: MBB, New: CurTBB);
1702 if (DidChange) {
1703 ++NumBranchOpts;
1704 MadeChange = true;
1705 if (!HasBranchToSelf) return MadeChange;
1706 }
1707 }
1708 }
1709
1710 // Add the branch back if the block is more than just an uncond branch.
1711 TII->insertBranch(MBB&: *MBB, TBB: CurTBB, FBB: nullptr, Cond: CurCond, DL: Dl);
1712 }
1713 }
1714
1715 // If the prior block doesn't fall through into this block, and if this
1716 // block doesn't fall through into some other block, see if we can find a
1717 // place to move this block where a fall-through will happen.
1718 if (!PrevBB.canFallThrough()) {
1719 // Now we know that there was no fall-through into this block, check to
1720 // see if it has a fall-through into its successor.
1721 bool CurFallsThru = MBB->canFallThrough();
1722
1723 if (!MBB->isEHPad()) {
1724 // Check all the predecessors of this block. If one of them has no fall
1725 // throughs, and analyzeBranch thinks it _could_ fallthrough to this
1726 // block, move this block right after it.
1727 for (MachineBasicBlock *PredBB : MBB->predecessors()) {
1728 // Analyze the branch at the end of the pred.
1729 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
1730 SmallVector<MachineOperand, 4> PredCond;
1731 if (PredBB != MBB && !PredBB->canFallThrough() &&
1732 !TII->analyzeBranch(MBB&: *PredBB, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond, AllowModify: true) &&
1733 (PredTBB == MBB || PredFBB == MBB) &&
1734 (!CurFallsThru || !CurTBB || !CurFBB) &&
1735 (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
1736 // If the current block doesn't fall through, just move it.
1737 // If the current block can fall through and does not end with a
1738 // conditional branch, we need to append an unconditional jump to
1739 // the (current) next block. To avoid a possible compile-time
1740 // infinite loop, move blocks only backward in this case.
1741 // Also, if there are already 2 branches here, we cannot add a third;
1742 // this means we have the case
1743 // Bcc next
1744 // B elsewhere
1745 // next:
1746 if (CurFallsThru) {
1747 MachineBasicBlock *NextBB = &*std::next(x: MBB->getIterator());
1748 CurCond.clear();
1749 TII->insertBranch(MBB&: *MBB, TBB: NextBB, FBB: nullptr, Cond: CurCond, DL: DebugLoc());
1750 }
1751 MBB->moveAfter(NewBefore: PredBB);
1752 MadeChange = true;
1753 goto ReoptimizeBlock;
1754 }
1755 }
1756 }
1757
1758 if (!CurFallsThru) {
1759 // Check analyzable branch-successors to see if we can move this block
1760 // before one.
1761 if (!CurUnAnalyzable) {
1762 for (MachineBasicBlock *SuccBB : {CurFBB, CurTBB}) {
1763 if (!SuccBB)
1764 continue;
1765 // Analyze the branch at the end of the block before the succ.
1766 MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
1767
1768 // If this block doesn't already fall-through to that successor, and
1769 // if the succ doesn't already have a block that can fall through into
1770 // it, we can arrange for the fallthrough to happen.
1771 if (SuccBB != MBB && &*SuccPrev != MBB &&
1772 !SuccPrev->canFallThrough()) {
1773 MBB->moveBefore(NewAfter: SuccBB);
1774 MadeChange = true;
1775 goto ReoptimizeBlock;
1776 }
1777 }
1778 }
1779
1780 // Okay, there is no really great place to put this block. If, however,
1781 // the block before this one would be a fall-through if this block were
1782 // removed, move this block to the end of the function. There is no real
1783 // advantage in "falling through" to an EH block, so we don't want to
1784 // perform this transformation for that case.
1785 //
1786 // Also, Windows EH introduced the possibility of an arbitrary number of
1787 // successors to a given block. The analyzeBranch call does not consider
1788 // exception handling and so we can get in a state where a block
1789 // containing a call is followed by multiple EH blocks that would be
1790 // rotated infinitely at the end of the function if the transformation
1791 // below were performed for EH "FallThrough" blocks. Therefore, even if
1792 // that appears not to be happening anymore, we should assume that it is
1793 // possible and not remove the "!FallThrough()->isEHPad" condition below.
1794 //
1795 // Similarly, the analyzeBranch call does not consider callbr, which also
1796 // introduces the possibility of infinite rotation, as there may be
1797 // multiple successors of PrevBB. Thus we check such case by
1798 // FallThrough->isInlineAsmBrIndirectTarget().
1799 // NOTE: Checking if PrevBB contains callbr is more precise, but much
1800 // more expensive.
1801 MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
1802 SmallVector<MachineOperand, 4> PrevCond;
1803
1804 if (FallThrough != MF.end() && !FallThrough->isEHPad() &&
1805 !FallThrough->isInlineAsmBrIndirectTarget() &&
1806 !TII->analyzeBranch(MBB&: PrevBB, TBB&: PrevTBB, FBB&: PrevFBB, Cond&: PrevCond, AllowModify: true) &&
1807 PrevBB.isSuccessor(MBB: &*FallThrough)) {
1808 MBB->moveAfter(NewBefore: &MF.back());
1809 MadeChange = true;
1810 return MadeChange;
1811 }
1812 }
1813 }
1814
1815 return MadeChange;
1816}
1817
1818//===----------------------------------------------------------------------===//
1819// Hoist Common Code
1820//===----------------------------------------------------------------------===//
1821
1822bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
1823 bool MadeChange = false;
1824 for (MachineBasicBlock &MBB : llvm::make_early_inc_range(Range&: MF))
1825 MadeChange |= HoistCommonCodeInSuccs(MBB: &MBB);
1826
1827 return MadeChange;
1828}
1829
1830/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
1831/// its 'true' successor.
1832static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
1833 MachineBasicBlock *TrueBB) {
1834 for (MachineBasicBlock *SuccBB : BB->successors())
1835 if (SuccBB != TrueBB)
1836 return SuccBB;
1837 return nullptr;
1838}
1839
1840template <class Container>
1841static void addRegAndItsAliases(Register Reg, const TargetRegisterInfo *TRI,
1842 Container &Set) {
1843 if (Reg.isPhysical()) {
1844 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1845 Set.insert(*AI);
1846 } else {
1847 Set.insert(Reg);
1848 }
1849}
1850
1851/// findHoistingInsertPosAndDeps - Find the location to move common instructions
1852/// in successors to. The location is usually just before the terminator,
1853/// however if the terminator is a conditional branch and its previous
1854/// instruction is the flag setting instruction, the previous instruction is
1855/// the preferred location. This function also gathers uses and defs of the
1856/// instructions from the insertion point to the end of the block. The data is
1857/// used by HoistCommonCodeInSuccs to ensure safety.
1858static
1859MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
1860 const TargetInstrInfo *TII,
1861 const TargetRegisterInfo *TRI,
1862 SmallSet<Register, 4> &Uses,
1863 SmallSet<Register, 4> &Defs) {
1864 MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
1865 if (!TII->isUnpredicatedTerminator(MI: *Loc))
1866 return MBB->end();
1867
1868 for (const MachineOperand &MO : Loc->operands()) {
1869 if (!MO.isReg())
1870 continue;
1871 Register Reg = MO.getReg();
1872 if (!Reg)
1873 continue;
1874 if (MO.isUse()) {
1875 addRegAndItsAliases(Reg, TRI, Set&: Uses);
1876 } else {
1877 if (!MO.isDead())
1878 // Don't try to hoist code in the rare case the terminator defines a
1879 // register that is later used.
1880 return MBB->end();
1881
1882 // If the terminator defines a register, make sure we don't hoist
1883 // the instruction whose def might be clobbered by the terminator.
1884 addRegAndItsAliases(Reg, TRI, Set&: Defs);
1885 }
1886 }
1887
1888 if (Uses.empty())
1889 return Loc;
1890 // If the terminator is the only instruction in the block and Uses is not
1891 // empty (or we would have returned above), we can still safely hoist
1892 // instructions just before the terminator as long as the Defs/Uses are not
1893 // violated (which is checked in HoistCommonCodeInSuccs).
1894 if (Loc == MBB->begin())
1895 return Loc;
1896
1897 // The terminator is probably a conditional branch, try not to separate the
1898 // branch from condition setting instruction.
1899 MachineBasicBlock::iterator PI = prev_nodbg(It: Loc, Begin: MBB->begin());
1900
1901 bool IsDef = false;
1902 for (const MachineOperand &MO : PI->operands()) {
1903 // If PI has a regmask operand, it is probably a call. Separate away.
1904 if (MO.isRegMask())
1905 return Loc;
1906 if (!MO.isReg() || MO.isUse())
1907 continue;
1908 Register Reg = MO.getReg();
1909 if (!Reg)
1910 continue;
1911 if (Uses.count(V: Reg)) {
1912 IsDef = true;
1913 break;
1914 }
1915 }
1916 if (!IsDef)
1917 // The condition setting instruction is not just before the conditional
1918 // branch.
1919 return Loc;
1920
1921 // Be conservative, don't insert instruction above something that may have
1922 // side-effects. And since it's potentially bad to separate flag setting
1923 // instruction from the conditional branch, just abort the optimization
1924 // completely.
1925 // Also avoid moving code above predicated instruction since it's hard to
1926 // reason about register liveness with predicated instruction.
1927 bool DontMoveAcrossStore = true;
1928 if (!PI->isSafeToMove(SawStore&: DontMoveAcrossStore) || TII->isPredicated(MI: *PI))
1929 return MBB->end();
1930
1931 // Find out what registers are live. Note this routine is ignoring other live
1932 // registers which are only used by instructions in successor blocks.
1933 for (const MachineOperand &MO : PI->operands()) {
1934 if (!MO.isReg())
1935 continue;
1936 Register Reg = MO.getReg();
1937 if (!Reg)
1938 continue;
1939 if (MO.isUse()) {
1940 addRegAndItsAliases(Reg, TRI, Set&: Uses);
1941 } else {
1942 if (Uses.erase(V: Reg)) {
1943 if (Reg.isPhysical()) {
1944 for (MCPhysReg SubReg : TRI->subregs(Reg))
1945 Uses.erase(V: SubReg); // Use sub-registers to be conservative
1946 }
1947 }
1948 addRegAndItsAliases(Reg, TRI, Set&: Defs);
1949 }
1950 }
1951
1952 return PI;
1953}
1954
1955bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
1956 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1957 SmallVector<MachineOperand, 4> Cond;
1958 if (TII->analyzeBranch(MBB&: *MBB, TBB, FBB, Cond, AllowModify: true) || !TBB || Cond.empty())
1959 return false;
1960
1961 if (!FBB) FBB = findFalseBlock(BB: MBB, TrueBB: TBB);
1962 if (!FBB)
1963 // Malformed bcc? True and false blocks are the same?
1964 return false;
1965
1966 // Restrict the optimization to cases where MBB is the only predecessor,
1967 // it is an obvious win.
1968 if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
1969 return false;
1970
1971 // Find a suitable position to hoist the common instructions to. Also figure
1972 // out which registers are used or defined by instructions from the insertion
1973 // point to the end of the block.
1974 SmallSet<Register, 4> Uses, Defs;
1975 MachineBasicBlock::iterator Loc =
1976 findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
1977 if (Loc == MBB->end())
1978 return false;
1979
1980 bool HasDups = false;
1981 SmallSet<Register, 4> ActiveDefsSet, AllDefsSet;
1982 MachineBasicBlock::iterator TIB = TBB->begin();
1983 MachineBasicBlock::iterator FIB = FBB->begin();
1984 MachineBasicBlock::iterator TIE = TBB->end();
1985 MachineBasicBlock::iterator FIE = FBB->end();
1986 MachineFunction &MF = *TBB->getParent();
1987 while (TIB != TIE && FIB != FIE) {
1988 // Skip dbg_value instructions. These do not count.
1989 TIB = skipDebugInstructionsForward(It: TIB, End: TIE, SkipPseudoOp: false);
1990 FIB = skipDebugInstructionsForward(It: FIB, End: FIE, SkipPseudoOp: false);
1991 if (TIB == TIE || FIB == FIE)
1992 break;
1993
1994 if (!TIB->isIdenticalTo(Other: *FIB, Check: MachineInstr::CheckKillDead))
1995 break;
1996
1997 if (TII->isPredicated(MI: *TIB))
1998 // Hard to reason about register liveness with predicated instruction.
1999 break;
2000
2001 if (!TII->isSafeToMove(MI: *TIB, MBB: TBB, MF))
2002 // Don't hoist the instruction if it isn't safe to move.
2003 break;
2004
2005 bool IsSafe = true;
2006 for (MachineOperand &MO : TIB->operands()) {
2007 // Don't attempt to hoist instructions with register masks.
2008 if (MO.isRegMask()) {
2009 IsSafe = false;
2010 break;
2011 }
2012 if (!MO.isReg())
2013 continue;
2014 Register Reg = MO.getReg();
2015 if (!Reg)
2016 continue;
2017 if (MO.isDef()) {
2018 if (Uses.count(V: Reg)) {
2019 // Avoid clobbering a register that's used by the instruction at
2020 // the point of insertion.
2021 IsSafe = false;
2022 break;
2023 }
2024
2025 if (Defs.count(V: Reg) && !MO.isDead()) {
2026 // Don't hoist the instruction if the def would be clobber by the
2027 // instruction at the point insertion. FIXME: This is overly
2028 // conservative. It should be possible to hoist the instructions
2029 // in BB2 in the following example:
2030 // BB1:
2031 // r1, eflag = op1 r2, r3
2032 // brcc eflag
2033 //
2034 // BB2:
2035 // r1 = op2, ...
2036 // = op3, killed r1
2037 IsSafe = false;
2038 break;
2039 }
2040 } else if (!ActiveDefsSet.count(V: Reg)) {
2041 if (Defs.count(V: Reg)) {
2042 // Use is defined by the instruction at the point of insertion.
2043 IsSafe = false;
2044 break;
2045 }
2046
2047 if (MO.isKill() && Uses.count(V: Reg))
2048 // Kills a register that's read by the instruction at the point of
2049 // insertion. Remove the kill marker.
2050 MO.setIsKill(false);
2051 }
2052 }
2053 if (!IsSafe)
2054 break;
2055
2056 bool DontMoveAcrossStore = true;
2057 if (!TIB->isSafeToMove(SawStore&: DontMoveAcrossStore))
2058 break;
2059
2060 // Remove kills from ActiveDefsSet, these registers had short live ranges.
2061 for (const MachineOperand &MO : TIB->all_uses()) {
2062 if (!MO.isKill())
2063 continue;
2064 Register Reg = MO.getReg();
2065 if (!Reg)
2066 continue;
2067 if (!AllDefsSet.count(V: Reg)) {
2068 continue;
2069 }
2070 if (Reg.isPhysical()) {
2071 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
2072 ActiveDefsSet.erase(V: *AI);
2073 } else {
2074 ActiveDefsSet.erase(V: Reg);
2075 }
2076 }
2077
2078 // Track local defs so we can update liveins.
2079 for (const MachineOperand &MO : TIB->all_defs()) {
2080 if (MO.isDead())
2081 continue;
2082 Register Reg = MO.getReg();
2083 if (!Reg || Reg.isVirtual())
2084 continue;
2085 addRegAndItsAliases(Reg, TRI, Set&: ActiveDefsSet);
2086 addRegAndItsAliases(Reg, TRI, Set&: AllDefsSet);
2087 }
2088
2089 HasDups = true;
2090 ++TIB;
2091 ++FIB;
2092 }
2093
2094 if (!HasDups)
2095 return false;
2096
2097 // Hoist the instructions from [T.begin, TIB) and then delete [F.begin, FIB).
2098 // If we're hoisting from a single block then just splice. Else step through
2099 // and merge the debug locations.
2100 if (TBB == FBB) {
2101 MBB->splice(Where: Loc, Other: TBB, From: TBB->begin(), To: TIB);
2102 } else {
2103 // Merge the debug locations, and hoist and kill the debug instructions from
2104 // both branches. FIXME: We could probably try harder to preserve some debug
2105 // instructions (but at least this isn't producing wrong locations).
2106 MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc);
2107 auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) {
2108 assert(DI->isDebugInstr() && "Expected a debug instruction");
2109 if (DI->isDebugRef()) {
2110 const TargetInstrInfo *TII =
2111 MBB->getParent()->getSubtarget().getInstrInfo();
2112 const MCInstrDesc &DBGV = TII->get(Opcode: TargetOpcode::DBG_VALUE);
2113 DI = BuildMI(MF&: *MBB->getParent(), DL: DI->getDebugLoc(), MCID: DBGV, IsIndirect: false, Reg: 0,
2114 Variable: DI->getDebugVariable(), Expr: DI->getDebugExpression());
2115 MBB->insert(I: Loc, MI: &*DI);
2116 return;
2117 }
2118 // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF.
2119 if (DI->isDebugPHI()) {
2120 DI->eraseFromParent();
2121 return;
2122 }
2123 // Move DBG_LABELs without modifying them. Set DBG_VALUEs undef.
2124 if (!DI->isDebugLabel())
2125 DI->setDebugValueUndef();
2126 DI->moveBefore(MovePos: &*Loc);
2127 };
2128
2129 // TIB and FIB point to the end of the regions to hoist/merge in TBB and
2130 // FBB.
2131 MachineBasicBlock::iterator FE = FIB;
2132 MachineBasicBlock::iterator FI = FBB->begin();
2133 for (MachineBasicBlock::iterator TI :
2134 make_early_inc_range(Range: make_range(x: TBB->begin(), y: TIB))) {
2135 // Hoist and kill debug instructions from FBB. After this loop FI points
2136 // to the next non-debug instruction to hoist (checked in assert after the
2137 // TBB debug instruction handling code).
2138 while (FI != FE && FI->isDebugInstr())
2139 HoistAndKillDbgInstr(FI++);
2140
2141 // Kill debug instructions before moving.
2142 if (TI->isDebugInstr()) {
2143 HoistAndKillDbgInstr(TI);
2144 continue;
2145 }
2146
2147 // FI and TI now point to identical non-debug instructions.
2148 assert(FI != FE && "Unexpected end of FBB range");
2149 // Pseudo probes are excluded from the range when identifying foldable
2150 // instructions, so we don't expect to see one now.
2151 assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range");
2152 // NOTE: The loop above checks CheckKillDead but we can't do that here as
2153 // it modifies some kill markers after the check.
2154 assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) &&
2155 "Expected non-debug lockstep");
2156
2157 // Merge debug locs on hoisted instructions.
2158 TI->setDebugLoc(
2159 DILocation::getMergedLocation(LocA: TI->getDebugLoc(), LocB: FI->getDebugLoc()));
2160 TI->moveBefore(MovePos: &*Loc);
2161 ++FI;
2162 }
2163 }
2164
2165 FBB->erase(I: FBB->begin(), E: FIB);
2166
2167 if (UpdateLiveIns)
2168 fullyRecomputeLiveIns(MBBs: {TBB, FBB});
2169
2170 ++NumHoist;
2171 return true;
2172}
2173