1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/DepthFirstIterator.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/PointerIntPair.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/CFG.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
32#include "llvm/CodeGen/MachineCycleAnalysis.h"
33#include "llvm/CodeGen/MachineDominators.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineFunctionPass.h"
36#include "llvm/CodeGen/MachineInstr.h"
37#include "llvm/CodeGen/MachineLoopInfo.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachinePostDominators.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/RegisterClassInfo.h"
42#include "llvm/CodeGen/RegisterPressure.h"
43#include "llvm/CodeGen/TargetInstrInfo.h"
44#include "llvm/CodeGen/TargetPassConfig.h"
45#include "llvm/CodeGen/TargetRegisterInfo.h"
46#include "llvm/CodeGen/TargetSubtargetInfo.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/DebugInfoMetadata.h"
49#include "llvm/IR/LLVMContext.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/MC/MCRegisterInfo.h"
52#include "llvm/Pass.h"
53#include "llvm/Support/BranchProbability.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/raw_ostream.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <utility>
61#include <vector>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "machine-sink"
66
67static cl::opt<bool>
68SplitEdges("machine-sink-split",
69 cl::desc("Split critical edges during machine sinking"),
70 cl::init(Val: true), cl::Hidden);
71
72static cl::opt<bool>
73UseBlockFreqInfo("machine-sink-bfi",
74 cl::desc("Use block frequency info to find successors to sink"),
75 cl::init(Val: true), cl::Hidden);
76
77static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
78 "machine-sink-split-probability-threshold",
79 cl::desc(
80 "Percentage threshold for splitting single-instruction critical edge. "
81 "If the branch threshold is higher than this threshold, we allow "
82 "speculative execution of up to 1 instruction to avoid branching to "
83 "splitted critical edge"),
84 cl::init(Val: 40), cl::Hidden);
85
86static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
87 "machine-sink-load-instrs-threshold",
88 cl::desc("Do not try to find alias store for a load if there is a in-path "
89 "block whose instruction number is higher than this threshold."),
90 cl::init(Val: 2000), cl::Hidden);
91
92static cl::opt<unsigned> SinkLoadBlocksThreshold(
93 "machine-sink-load-blocks-threshold",
94 cl::desc("Do not try to find alias store for a load if the block number in "
95 "the straight line is higher than this threshold."),
96 cl::init(Val: 20), cl::Hidden);
97
98static cl::opt<bool>
99 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
100 cl::desc("Sink instructions into cycles to avoid "
101 "register spills"),
102 cl::init(Val: false), cl::Hidden);
103
104static cl::opt<unsigned> SinkIntoCycleLimit(
105 "machine-sink-cycle-limit",
106 cl::desc("The maximum number of instructions considered for cycle sinking."),
107 cl::init(Val: 50), cl::Hidden);
108
109STATISTIC(NumSunk, "Number of machine instructions sunk");
110STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
111STATISTIC(NumSplit, "Number of critical edges split");
112STATISTIC(NumCoalesces, "Number of copies coalesced");
113STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
114
115namespace {
116
117 class MachineSinking : public MachineFunctionPass {
118 const TargetSubtargetInfo *STI = nullptr;
119 const TargetInstrInfo *TII = nullptr;
120 const TargetRegisterInfo *TRI = nullptr;
121 MachineRegisterInfo *MRI = nullptr; // Machine register information
122 MachineDominatorTree *DT = nullptr; // Machine dominator tree
123 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
124 MachineCycleInfo *CI = nullptr;
125 MachineBlockFrequencyInfo *MBFI = nullptr;
126 const MachineBranchProbabilityInfo *MBPI = nullptr;
127 AliasAnalysis *AA = nullptr;
128 RegisterClassInfo RegClassInfo;
129
130 // Remember which edges have been considered for breaking.
131 SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
132 CEBCandidates;
133 // Memorize the register that also wanted to sink into the same block along
134 // a different critical edge.
135 // {register to sink, sink-to block} -> the first sink-from block.
136 // We're recording the first sink-from block because that (critical) edge
137 // was deferred until we see another register that's going to sink into the
138 // same block.
139 DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
140 CEMergeCandidates;
141 // Remember which edges we are about to split.
142 // This is different from CEBCandidates since those edges
143 // will be split.
144 SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
145
146 DenseSet<Register> RegsToClearKillFlags;
147
148 using AllSuccsCache =
149 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
150
151 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
152 /// post-dominated by another DBG_VALUE of the same variable location.
153 /// This is necessary to detect sequences such as:
154 /// %0 = someinst
155 /// DBG_VALUE %0, !123, !DIExpression()
156 /// %1 = anotherinst
157 /// DBG_VALUE %1, !123, !DIExpression()
158 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
159 /// would re-order assignments.
160 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
161
162 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
163 /// debug instructions to sink.
164 SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
165
166 /// Record of debug variables that have had their locations set in the
167 /// current block.
168 DenseSet<DebugVariable> SeenDbgVars;
169
170 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
171 HasStoreCache;
172
173 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
174 SmallVector<MachineInstr *>>
175 StoreInstrCache;
176
177 /// Cached BB's register pressure.
178 DenseMap<const MachineBasicBlock *, std::vector<unsigned>>
179 CachedRegisterPressure;
180
181 bool EnableSinkAndFold;
182
183 public:
184 static char ID; // Pass identification
185
186 MachineSinking() : MachineFunctionPass(ID) {
187 initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
188 }
189
190 bool runOnMachineFunction(MachineFunction &MF) override;
191
192 void getAnalysisUsage(AnalysisUsage &AU) const override {
193 MachineFunctionPass::getAnalysisUsage(AU);
194 AU.addRequired<AAResultsWrapperPass>();
195 AU.addRequired<MachineDominatorTreeWrapperPass>();
196 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
197 AU.addRequired<MachineCycleInfoWrapperPass>();
198 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
199 AU.addPreserved<MachineCycleInfoWrapperPass>();
200 AU.addPreserved<MachineLoopInfoWrapperPass>();
201 if (UseBlockFreqInfo)
202 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
203 AU.addRequired<TargetPassConfig>();
204 }
205
206 void releaseMemory() override {
207 CEBCandidates.clear();
208 CEMergeCandidates.clear();
209 }
210
211 private:
212 bool ProcessBlock(MachineBasicBlock &MBB);
213 void ProcessDbgInst(MachineInstr &MI);
214 bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
215 MachineBasicBlock *To, bool BreakPHIEdge);
216 bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
217 MachineBasicBlock *To,
218 MachineBasicBlock *&DeferredFromBlock);
219
220 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
221 MachineInstr &MI);
222
223 /// Postpone the splitting of the given critical
224 /// edge (\p From, \p To).
225 ///
226 /// We do not split the edges on the fly. Indeed, this invalidates
227 /// the dominance information and thus triggers a lot of updates
228 /// of that information underneath.
229 /// Instead, we postpone all the splits after each iteration of
230 /// the main loop. That way, the information is at least valid
231 /// for the lifetime of an iteration.
232 ///
233 /// \return True if the edge is marked as toSplit, false otherwise.
234 /// False can be returned if, for instance, this is not profitable.
235 bool PostponeSplitCriticalEdge(MachineInstr &MI,
236 MachineBasicBlock *From,
237 MachineBasicBlock *To,
238 bool BreakPHIEdge);
239 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
240 AllSuccsCache &AllSuccessors);
241
242 /// If we sink a COPY inst, some debug users of it's destination may no
243 /// longer be dominated by the COPY, and will eventually be dropped.
244 /// This is easily rectified by forwarding the non-dominated debug uses
245 /// to the copy source.
246 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
247 MachineBasicBlock *TargetBlock);
248 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
249 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
250 bool &LocalUse) const;
251 MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
252 bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
253
254 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
255 SmallVectorImpl<MachineInstr *> &Candidates);
256 bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
257
258 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
259 MachineBasicBlock *MBB,
260 MachineBasicBlock *SuccToSinkTo,
261 AllSuccsCache &AllSuccessors);
262
263 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
264 MachineBasicBlock *MBB);
265
266 bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
267
268 SmallVector<MachineBasicBlock *, 4> &
269 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
270 AllSuccsCache &AllSuccessors) const;
271
272 std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
273
274 bool registerPressureSetExceedsLimit(unsigned NRegs,
275 const TargetRegisterClass *RC,
276 const MachineBasicBlock &MBB);
277 };
278
279} // end anonymous namespace
280
281char MachineSinking::ID = 0;
282
283char &llvm::MachineSinkingID = MachineSinking::ID;
284
285INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
286 "Machine code sinking", false, false)
287INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
288INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
289INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
290INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
291INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
292 "Machine code sinking", false, false)
293
294/// Return true if a target defined block prologue instruction interferes
295/// with a sink candidate.
296static bool blockPrologueInterferes(const MachineBasicBlock *BB,
297 MachineBasicBlock::const_iterator End,
298 const MachineInstr &MI,
299 const TargetRegisterInfo *TRI,
300 const TargetInstrInfo *TII,
301 const MachineRegisterInfo *MRI) {
302 for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
303 ++PI) {
304 // Only check target defined prologue instructions
305 if (!TII->isBasicBlockPrologue(MI: *PI))
306 continue;
307 for (auto &MO : MI.operands()) {
308 if (!MO.isReg())
309 continue;
310 Register Reg = MO.getReg();
311 if (!Reg)
312 continue;
313 if (MO.isUse()) {
314 if (Reg.isPhysical() &&
315 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg))))
316 continue;
317 if (PI->modifiesRegister(Reg, TRI))
318 return true;
319 } else {
320 if (PI->readsRegister(Reg, TRI))
321 return true;
322 // Check for interference with non-dead defs
323 auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true);
324 if (DefOp && !DefOp->isDead())
325 return true;
326 }
327 }
328 }
329
330 return false;
331}
332
333bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
334 MachineBasicBlock *MBB) {
335 if (!MI.isCopy())
336 return false;
337
338 Register SrcReg = MI.getOperand(i: 1).getReg();
339 Register DstReg = MI.getOperand(i: 0).getReg();
340 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
341 !MRI->hasOneNonDBGUse(RegNo: SrcReg))
342 return false;
343
344 const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg);
345 const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg);
346 if (SRC != DRC)
347 return false;
348
349 MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg);
350 if (DefMI->isCopyLike())
351 return false;
352 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
353 LLVM_DEBUG(dbgs() << "*** to: " << MI);
354 MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg);
355 MI.eraseFromParent();
356
357 // Conservatively, clear any kill flags, since it's possible that they are no
358 // longer correct.
359 MRI->clearKillFlags(Reg: SrcReg);
360
361 ++NumCoalesces;
362 return true;
363}
364
365bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
366 MachineBasicBlock *MBB) {
367 if (MI.isCopy() || MI.mayLoadOrStore() ||
368 MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
369 return false;
370
371 // Don't sink instructions that the target prefers not to sink.
372 if (!TII->shouldSink(MI))
373 return false;
374
375 // Check if it's safe to move the instruction.
376 bool SawStore = true;
377 if (!MI.isSafeToMove(AA, SawStore))
378 return false;
379
380 // Convergent operations may not be made control-dependent on additional
381 // values.
382 if (MI.isConvergent())
383 return false;
384
385 // Don't sink defs/uses of hard registers or if the instruction defines more
386 // than one register.
387 // Don't sink more than two register uses - it'll cover most of the cases and
388 // greatly simplifies the register pressure checks.
389 Register DefReg;
390 Register UsedRegA, UsedRegB;
391 for (const MachineOperand &MO : MI.operands()) {
392 if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
393 MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
394 MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
395 continue;
396 if (!MO.isReg())
397 return false;
398
399 Register Reg = MO.getReg();
400 if (Reg == 0)
401 continue;
402
403 if (Reg.isVirtual()) {
404 if (MO.isDef()) {
405 if (DefReg)
406 return false;
407 DefReg = Reg;
408 continue;
409 }
410
411 if (UsedRegA == 0)
412 UsedRegA = Reg;
413 else if (UsedRegB == 0)
414 UsedRegB = Reg;
415 else
416 return false;
417 continue;
418 }
419
420 if (Reg.isPhysical() && MO.isUse() &&
421 (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO)))
422 continue;
423
424 return false;
425 }
426
427 // Scan uses of the destination register. Every use, except the last, must be
428 // a copy, with a chain of copies terminating with either a copy into a hard
429 // register, or a load/store instruction where the use is part of the
430 // address (*not* the stored value).
431 using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
432 SmallVector<SinkInfo> SinkInto;
433 SmallVector<Register> Worklist;
434
435 const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg);
436 const TargetRegisterClass *RCA =
437 UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA);
438 const TargetRegisterClass *RCB =
439 UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB);
440
441 Worklist.push_back(Elt: DefReg);
442 while (!Worklist.empty()) {
443 Register Reg = Worklist.pop_back_val();
444
445 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
446 ExtAddrMode MaybeAM;
447 MachineInstr &UseInst = *MO.getParent();
448 if (UseInst.isCopy()) {
449 Register DstReg;
450 if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg())
451 DstReg = O.getReg();
452 if (DstReg == 0)
453 return false;
454 if (DstReg.isVirtual()) {
455 Worklist.push_back(Elt: DstReg);
456 continue;
457 }
458 // If we are going to replace a copy, the original instruction must be
459 // as cheap as a copy.
460 if (!TII->isAsCheapAsAMove(MI))
461 return false;
462 // The hard register must be in the register class of the original
463 // instruction's destination register.
464 if (!RC->contains(Reg: DstReg))
465 return false;
466 } else if (UseInst.mayLoadOrStore()) {
467 ExtAddrMode AM;
468 if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM))
469 return false;
470 MaybeAM = AM;
471 } else {
472 return false;
473 }
474
475 if (UseInst.getParent() != MI.getParent()) {
476 // If the register class of the register we are replacing is a superset
477 // of any of the register classes of the operands of the materialized
478 // instruction don't consider that live range extended.
479 const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
480 if (RCA && RCA->hasSuperClassEq(RC: RCS))
481 RCA = nullptr;
482 else if (RCB && RCB->hasSuperClassEq(RC: RCS))
483 RCB = nullptr;
484 if (RCA || RCB) {
485 if (RCA == nullptr) {
486 RCA = RCB;
487 RCB = nullptr;
488 }
489
490 unsigned NRegs = !!RCA + !!RCB;
491 if (RCA == RCB)
492 RCB = nullptr;
493
494 // Check we don't exceed register pressure at the destination.
495 const MachineBasicBlock &MBB = *UseInst.getParent();
496 if (RCB == nullptr) {
497 if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB))
498 return false;
499 } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) ||
500 registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) {
501 return false;
502 }
503 }
504 }
505
506 SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM);
507 }
508 }
509
510 if (SinkInto.empty())
511 return false;
512
513 // Now we know we can fold the instruction in all its users.
514 for (auto &[SinkDst, MaybeAM] : SinkInto) {
515 MachineInstr *New = nullptr;
516 LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
517 SinkDst->dump());
518 if (SinkDst->isCopy()) {
519 // TODO: After performing the sink-and-fold, the original instruction is
520 // deleted. Its value is still available (in a hard register), so if there
521 // are debug instructions which refer to the (now deleted) virtual
522 // register they could be updated to refer to the hard register, in
523 // principle. However, it's not clear how to do that, moreover in some
524 // cases the debug instructions may need to be replicated proportionally
525 // to the number of the COPY instructions replaced and in some extreme
526 // cases we can end up with quadratic increase in the number of debug
527 // instructions.
528
529 // Sink a copy of the instruction, replacing a COPY instruction.
530 MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
531 Register DstReg = SinkDst->getOperand(i: 0).getReg();
532 TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI, TRI: *TRI);
533 New = &*std::prev(x: InsertPt);
534 if (!New->getDebugLoc())
535 New->setDebugLoc(SinkDst->getDebugLoc());
536
537 // The operand registers of the "sunk" instruction have their live range
538 // extended and their kill flags may no longer be correct. Conservatively
539 // clear the kill flags.
540 if (UsedRegA)
541 MRI->clearKillFlags(Reg: UsedRegA);
542 if (UsedRegB)
543 MRI->clearKillFlags(Reg: UsedRegB);
544 } else {
545 // Fold instruction into the addressing mode of a memory instruction.
546 New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM);
547
548 // The registers of the addressing mode may have their live range extended
549 // and their kill flags may no longer be correct. Conservatively clear the
550 // kill flags.
551 if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
552 MRI->clearKillFlags(Reg: R);
553 if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
554 MRI->clearKillFlags(Reg: R);
555 }
556 LLVM_DEBUG(dbgs() << "yielding"; New->dump());
557 // Clear the StoreInstrCache, since we may invalidate it by erasing.
558 if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef())
559 StoreInstrCache.clear();
560 SinkDst->eraseFromParent();
561 }
562
563 // Collect operands that need to be cleaned up because the registers no longer
564 // exist (in COPYs and debug instructions). We cannot delete instructions or
565 // clear operands while traversing register uses.
566 SmallVector<MachineOperand *> Cleanup;
567 Worklist.push_back(Elt: DefReg);
568 while (!Worklist.empty()) {
569 Register Reg = Worklist.pop_back_val();
570 for (MachineOperand &MO : MRI->use_operands(Reg)) {
571 MachineInstr *U = MO.getParent();
572 assert((U->isCopy() || U->isDebugInstr()) &&
573 "Only debug uses and copies must remain");
574 if (U->isCopy())
575 Worklist.push_back(Elt: U->getOperand(i: 0).getReg());
576 Cleanup.push_back(Elt: &MO);
577 }
578 }
579
580 // Delete the dead COPYs and clear operands in debug instructions
581 for (MachineOperand *MO : Cleanup) {
582 MachineInstr *I = MO->getParent();
583 if (I->isCopy()) {
584 I->eraseFromParent();
585 } else {
586 MO->setReg(0);
587 MO->setSubReg(0);
588 }
589 }
590
591 MI.eraseFromParent();
592 return true;
593}
594
595/// AllUsesDominatedByBlock - Return true if all uses of the specified register
596/// occur in blocks dominated by the specified block. If any use is in the
597/// definition block, then return false since it is never legal to move def
598/// after uses.
599bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
600 MachineBasicBlock *MBB,
601 MachineBasicBlock *DefMBB,
602 bool &BreakPHIEdge,
603 bool &LocalUse) const {
604 assert(Reg.isVirtual() && "Only makes sense for vregs");
605
606 // Ignore debug uses because debug info doesn't affect the code.
607 if (MRI->use_nodbg_empty(RegNo: Reg))
608 return true;
609
610 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
611 // into and they are all PHI nodes. In this case, machine-sink must break
612 // the critical edge first. e.g.
613 //
614 // %bb.1:
615 // Predecessors according to CFG: %bb.0
616 // ...
617 // %def = DEC64_32r %x, implicit-def dead %eflags
618 // ...
619 // JE_4 <%bb.37>, implicit %eflags
620 // Successors according to CFG: %bb.37 %bb.2
621 //
622 // %bb.2:
623 // %p = PHI %y, %bb.0, %def, %bb.1
624 if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) {
625 MachineInstr *UseInst = MO.getParent();
626 unsigned OpNo = MO.getOperandNo();
627 MachineBasicBlock *UseBlock = UseInst->getParent();
628 return UseBlock == MBB && UseInst->isPHI() &&
629 UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB;
630 })) {
631 BreakPHIEdge = true;
632 return true;
633 }
634
635 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
636 // Determine the block of the use.
637 MachineInstr *UseInst = MO.getParent();
638 unsigned OpNo = &MO - &UseInst->getOperand(i: 0);
639 MachineBasicBlock *UseBlock = UseInst->getParent();
640 if (UseInst->isPHI()) {
641 // PHI nodes use the operand in the predecessor block, not the block with
642 // the PHI.
643 UseBlock = UseInst->getOperand(i: OpNo+1).getMBB();
644 } else if (UseBlock == DefMBB) {
645 LocalUse = true;
646 return false;
647 }
648
649 // Check that it dominates.
650 if (!DT->dominates(A: MBB, B: UseBlock))
651 return false;
652 }
653
654 return true;
655}
656
657/// Return true if this machine instruction loads from global offset table or
658/// constant pool.
659static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
660 assert(MI.mayLoad() && "Expected MI that loads!");
661
662 // If we lost memory operands, conservatively assume that the instruction
663 // reads from everything..
664 if (MI.memoperands_empty())
665 return true;
666
667 for (MachineMemOperand *MemOp : MI.memoperands())
668 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
669 if (PSV->isGOT() || PSV->isConstantPool())
670 return true;
671
672 return false;
673}
674
675void MachineSinking::FindCycleSinkCandidates(
676 MachineCycle *Cycle, MachineBasicBlock *BB,
677 SmallVectorImpl<MachineInstr *> &Candidates) {
678 for (auto &MI : *BB) {
679 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
680 if (!TII->shouldSink(MI)) {
681 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
682 "target\n");
683 continue;
684 }
685 if (!isCycleInvariant(Cycle, I&: MI)) {
686 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
687 continue;
688 }
689 bool DontMoveAcrossStore = true;
690 if (!MI.isSafeToMove(AA, SawStore&: DontMoveAcrossStore)) {
691 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
692 continue;
693 }
694 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
695 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
696 continue;
697 }
698 if (MI.isConvergent())
699 continue;
700
701 const MachineOperand &MO = MI.getOperand(i: 0);
702 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
703 continue;
704 if (!MRI->hasOneDef(RegNo: MO.getReg()))
705 continue;
706
707 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
708 Candidates.push_back(Elt: &MI);
709 }
710}
711
712bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
713 if (skipFunction(F: MF.getFunction()))
714 return false;
715
716 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
717
718 STI = &MF.getSubtarget();
719 TII = STI->getInstrInfo();
720 TRI = STI->getRegisterInfo();
721 MRI = &MF.getRegInfo();
722 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
723 PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
724 CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
725 MBFI = UseBlockFreqInfo
726 ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
727 : nullptr;
728 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
729 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
730 RegClassInfo.runOnMachineFunction(MF);
731 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
732 EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
733
734 bool EverMadeChange = false;
735
736 while (true) {
737 bool MadeChange = false;
738
739 // Process all basic blocks.
740 CEBCandidates.clear();
741 CEMergeCandidates.clear();
742 ToSplit.clear();
743 for (auto &MBB: MF)
744 MadeChange |= ProcessBlock(MBB);
745
746 // If we have anything we marked as toSplit, split it now.
747 for (const auto &Pair : ToSplit) {
748 auto NewSucc = Pair.first->SplitCriticalEdge(Succ: Pair.second, P&: *this);
749 if (NewSucc != nullptr) {
750 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
751 << printMBBReference(*Pair.first) << " -- "
752 << printMBBReference(*NewSucc) << " -- "
753 << printMBBReference(*Pair.second) << '\n');
754 if (MBFI)
755 MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI);
756
757 MadeChange = true;
758 ++NumSplit;
759 CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc);
760 } else
761 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
762 }
763 // If this iteration over the code changed anything, keep iterating.
764 if (!MadeChange) break;
765 EverMadeChange = true;
766 }
767
768 if (SinkInstsIntoCycle) {
769 SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
770 CI->toplevel_end());
771 for (auto *Cycle : Cycles) {
772 MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
773 if (!Preheader) {
774 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
775 continue;
776 }
777 SmallVector<MachineInstr *, 8> Candidates;
778 FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates);
779
780 // Walk the candidates in reverse order so that we start with the use
781 // of a def-use chain, if there is any.
782 // TODO: Sort the candidates using a cost-model.
783 unsigned i = 0;
784 for (MachineInstr *I : llvm::reverse(C&: Candidates)) {
785 if (i++ == SinkIntoCycleLimit) {
786 LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
787 "be analysed.");
788 break;
789 }
790
791 if (!SinkIntoCycle(Cycle, I&: *I))
792 break;
793 EverMadeChange = true;
794 ++NumCycleSunk;
795 }
796 }
797 }
798
799 HasStoreCache.clear();
800 StoreInstrCache.clear();
801
802 // Now clear any kill flags for recorded registers.
803 for (auto I : RegsToClearKillFlags)
804 MRI->clearKillFlags(Reg: I);
805 RegsToClearKillFlags.clear();
806
807 return EverMadeChange;
808}
809
810bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
811 if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
812 return false;
813
814 // Don't bother sinking code out of unreachable blocks. In addition to being
815 // unprofitable, it can also lead to infinite looping, because in an
816 // unreachable cycle there may be nowhere to stop.
817 if (!DT->isReachableFromEntry(A: &MBB)) return false;
818
819 bool MadeChange = false;
820
821 // Cache all successors, sorted by frequency info and cycle depth.
822 AllSuccsCache AllSuccessors;
823
824 // Walk the basic block bottom-up. Remember if we saw a store.
825 MachineBasicBlock::iterator I = MBB.end();
826 --I;
827 bool ProcessedBegin, SawStore = false;
828 do {
829 MachineInstr &MI = *I; // The instruction to sink.
830
831 // Predecrement I (if it's not begin) so that it isn't invalidated by
832 // sinking.
833 ProcessedBegin = I == MBB.begin();
834 if (!ProcessedBegin)
835 --I;
836
837 if (MI.isDebugOrPseudoInstr()) {
838 if (MI.isDebugValue())
839 ProcessDbgInst(MI);
840 continue;
841 }
842
843 if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) {
844 MadeChange = true;
845 continue;
846 }
847
848 // Can't sink anything out of a block that has less than two successors.
849 if (MBB.succ_size() <= 1)
850 continue;
851
852 if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) {
853 MadeChange = true;
854 continue;
855 }
856
857 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
858 ++NumSunk;
859 MadeChange = true;
860 }
861
862 // If we just processed the first instruction in the block, we're done.
863 } while (!ProcessedBegin);
864
865 SeenDbgUsers.clear();
866 SeenDbgVars.clear();
867 // recalculate the bb register pressure after sinking one BB.
868 CachedRegisterPressure.clear();
869 return MadeChange;
870}
871
872void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
873 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
874 // we know what to sink if the vreg def sinks.
875 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
876
877 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
878 MI.getDebugLoc()->getInlinedAt());
879 bool SeenBefore = SeenDbgVars.contains(V: Var);
880
881 for (MachineOperand &MO : MI.debug_operands()) {
882 if (MO.isReg() && MO.getReg().isVirtual())
883 SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore));
884 }
885
886 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
887 SeenDbgVars.insert(V: Var);
888}
889
890bool MachineSinking::isWorthBreakingCriticalEdge(
891 MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
892 MachineBasicBlock *&DeferredFromBlock) {
893 // FIXME: Need much better heuristics.
894
895 // If the pass has already considered breaking this edge (during this pass
896 // through the function), then let's go ahead and break it. This means
897 // sinking multiple "cheap" instructions into the same block.
898 if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second)
899 return true;
900
901 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
902 return true;
903
904 // Check and record the register and the destination block we want to sink
905 // into. Note that we want to do the following before the next check on branch
906 // probability. Because we want to record the initial candidate even if it's
907 // on hot edge, so that other candidates that might not on hot edges can be
908 // sinked as well.
909 for (const auto &MO : MI.all_defs()) {
910 Register Reg = MO.getReg();
911 if (!Reg)
912 continue;
913 Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg;
914 auto Key = std::make_pair(x&: SrcReg, y&: To);
915 auto Res = CEMergeCandidates.try_emplace(Key, Args&: From);
916 // We wanted to sink the same register into the same block, consider it to
917 // be profitable.
918 if (!Res.second) {
919 // Return the source block that was previously held off.
920 DeferredFromBlock = Res.first->second;
921 return true;
922 }
923 }
924
925 if (From->isSuccessor(MBB: To) && MBPI->getEdgeProbability(Src: From, Dst: To) <=
926 BranchProbability(SplitEdgeProbabilityThreshold, 100))
927 return true;
928
929 // MI is cheap, we probably don't want to break the critical edge for it.
930 // However, if this would allow some definitions of its source operands
931 // to be sunk then it's probably worth it.
932 for (const MachineOperand &MO : MI.all_uses()) {
933 Register Reg = MO.getReg();
934 if (Reg == 0)
935 continue;
936
937 // We don't move live definitions of physical registers,
938 // so sinking their uses won't enable any opportunities.
939 if (Reg.isPhysical())
940 continue;
941
942 // If this instruction is the only user of a virtual register,
943 // check if breaking the edge will enable sinking
944 // both this instruction and the defining instruction.
945 if (MRI->hasOneNonDBGUse(RegNo: Reg)) {
946 // If the definition resides in same MBB,
947 // claim it's likely we can sink these together.
948 // If definition resides elsewhere, we aren't
949 // blocking it from being sunk so don't break the edge.
950 MachineInstr *DefMI = MRI->getVRegDef(Reg);
951 if (DefMI->getParent() == MI.getParent())
952 return true;
953 }
954 }
955
956 return false;
957}
958
959bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
960 MachineBasicBlock *FromBB,
961 MachineBasicBlock *ToBB,
962 bool BreakPHIEdge) {
963 // Avoid breaking back edge. From == To means backedge for single BB cycle.
964 if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB))
965 return false;
966
967 MachineCycle *FromCycle = CI->getCycle(Block: FromBB);
968 MachineCycle *ToCycle = CI->getCycle(Block: ToBB);
969
970 // Check for backedges of more "complex" cycles.
971 if (FromCycle == ToCycle && FromCycle &&
972 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
973 return false;
974
975 // It's not always legal to break critical edges and sink the computation
976 // to the edge.
977 //
978 // %bb.1:
979 // v1024
980 // Beq %bb.3
981 // <fallthrough>
982 // %bb.2:
983 // ... no uses of v1024
984 // <fallthrough>
985 // %bb.3:
986 // ...
987 // = v1024
988 //
989 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
990 //
991 // %bb.1:
992 // ...
993 // Bne %bb.2
994 // %bb.4:
995 // v1024 =
996 // B %bb.3
997 // %bb.2:
998 // ... no uses of v1024
999 // <fallthrough>
1000 // %bb.3:
1001 // ...
1002 // = v1024
1003 //
1004 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
1005 // flow. We need to ensure the new basic block where the computation is
1006 // sunk to dominates all the uses.
1007 // It's only legal to break critical edge and sink the computation to the
1008 // new block if all the predecessors of "To", except for "From", are
1009 // not dominated by "From". Given SSA property, this means these
1010 // predecessors are dominated by "To".
1011 //
1012 // There is no need to do this check if all the uses are PHI nodes. PHI
1013 // sources are only defined on the specific predecessor edges.
1014 if (!BreakPHIEdge) {
1015 for (MachineBasicBlock *Pred : ToBB->predecessors())
1016 if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred))
1017 return false;
1018 }
1019
1020 return true;
1021}
1022
1023bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1024 MachineBasicBlock *FromBB,
1025 MachineBasicBlock *ToBB,
1026 bool BreakPHIEdge) {
1027 bool Status = false;
1028 MachineBasicBlock *DeferredFromBB = nullptr;
1029 if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) {
1030 // If there is a DeferredFromBB, we consider FromBB only if _both_
1031 // of them are legal to split.
1032 if ((!DeferredFromBB ||
1033 ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) ||
1034 isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) &&
1035 isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
1036 ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB));
1037 if (DeferredFromBB)
1038 ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB));
1039 Status = true;
1040 }
1041 }
1042
1043 return Status;
1044}
1045
1046std::vector<unsigned> &
1047MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
1048 // Currently to save compiling time, MBB's register pressure will not change
1049 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
1050 // register pressure is changed after sinking any instructions into it.
1051 // FIXME: need a accurate and cheap register pressure estiminate model here.
1052 auto RP = CachedRegisterPressure.find(Val: &MBB);
1053 if (RP != CachedRegisterPressure.end())
1054 return RP->second;
1055
1056 RegionPressure Pressure;
1057 RegPressureTracker RPTracker(Pressure);
1058
1059 // Initialize the register pressure tracker.
1060 RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(),
1061 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
1062
1063 for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
1064 MIE = MBB.instr_begin();
1065 MII != MIE; --MII) {
1066 const MachineInstr &MI = *std::prev(x: MII);
1067 if (MI.isDebugInstr() || MI.isPseudoProbe())
1068 continue;
1069 RegisterOperands RegOpers;
1070 RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false);
1071 RPTracker.recedeSkipDebugValues();
1072 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
1073 RPTracker.recede(RegOpers);
1074 }
1075
1076 RPTracker.closeRegion();
1077 auto It = CachedRegisterPressure.insert(
1078 KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure));
1079 return It.first->second;
1080}
1081
1082bool MachineSinking::registerPressureSetExceedsLimit(
1083 unsigned NRegs, const TargetRegisterClass *RC,
1084 const MachineBasicBlock &MBB) {
1085 unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
1086 const int *PS = TRI->getRegClassPressureSets(RC);
1087 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
1088 for (; *PS != -1; PS++)
1089 if (Weight + BBRegisterPressure[*PS] >=
1090 TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: *PS))
1091 return true;
1092 return false;
1093}
1094
1095/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
1096bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
1097 MachineBasicBlock *MBB,
1098 MachineBasicBlock *SuccToSinkTo,
1099 AllSuccsCache &AllSuccessors) {
1100 assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
1101
1102 if (MBB == SuccToSinkTo)
1103 return false;
1104
1105 // It is profitable if SuccToSinkTo does not post dominate current block.
1106 if (!PDT->dominates(A: SuccToSinkTo, B: MBB))
1107 return true;
1108
1109 // It is profitable to sink an instruction from a deeper cycle to a shallower
1110 // cycle, even if the latter post-dominates the former (PR21115).
1111 if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo))
1112 return true;
1113
1114 // Check if only use in post dominated block is PHI instruction.
1115 bool NonPHIUse = false;
1116 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
1117 MachineBasicBlock *UseBlock = UseInst.getParent();
1118 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
1119 NonPHIUse = true;
1120 }
1121 if (!NonPHIUse)
1122 return true;
1123
1124 // If SuccToSinkTo post dominates then also it may be profitable if MI
1125 // can further profitably sinked into another block in next round.
1126 bool BreakPHIEdge = false;
1127 // FIXME - If finding successor is compile time expensive then cache results.
1128 if (MachineBasicBlock *MBB2 =
1129 FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors))
1130 return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors);
1131
1132 MachineCycle *MCycle = CI->getCycle(Block: MBB);
1133
1134 // If the instruction is not inside a cycle, it is not profitable to sink MI to
1135 // a post dominate block SuccToSinkTo.
1136 if (!MCycle)
1137 return false;
1138
1139 // If this instruction is inside a Cycle and sinking this instruction can make
1140 // more registers live range shorten, it is still prifitable.
1141 for (const MachineOperand &MO : MI.operands()) {
1142 // Ignore non-register operands.
1143 if (!MO.isReg())
1144 continue;
1145 Register Reg = MO.getReg();
1146 if (Reg == 0)
1147 continue;
1148
1149 if (Reg.isPhysical()) {
1150 // Don't handle non-constant and non-ignorable physical register uses.
1151 if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO))
1152 return false;
1153 continue;
1154 }
1155
1156 // Users for the defs are all dominated by SuccToSinkTo.
1157 if (MO.isDef()) {
1158 // This def register's live range is shortened after sinking.
1159 bool LocalUse = false;
1160 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1161 LocalUse))
1162 return false;
1163 } else {
1164 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1165 if (!DefMI)
1166 continue;
1167 MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent());
1168 // DefMI is defined outside of cycle. There should be no live range
1169 // impact for this operand. Defination outside of cycle means:
1170 // 1: defination is outside of cycle.
1171 // 2: defination is in this cycle, but it is a PHI in the cycle header.
1172 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
1173 Cycle->getHeader() == DefMI->getParent()))
1174 continue;
1175 // The DefMI is defined inside the cycle.
1176 // If sinking this operand makes some register pressure set exceed limit,
1177 // it is not profitable.
1178 if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg),
1179 MBB: *SuccToSinkTo)) {
1180 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
1181 return false;
1182 }
1183 }
1184 }
1185
1186 // If MI is in cycle and all its operands are alive across the whole cycle or
1187 // if no operand sinking make register pressure set exceed limit, it is
1188 // profitable to sink MI.
1189 return true;
1190}
1191
1192/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
1193/// computing it if it was not already cached.
1194SmallVector<MachineBasicBlock *, 4> &
1195MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
1196 AllSuccsCache &AllSuccessors) const {
1197 // Do we have the sorted successors in cache ?
1198 auto Succs = AllSuccessors.find(Val: MBB);
1199 if (Succs != AllSuccessors.end())
1200 return Succs->second;
1201
1202 SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
1203
1204 // Handle cases where sinking can happen but where the sink point isn't a
1205 // successor. For example:
1206 //
1207 // x = computation
1208 // if () {} else {}
1209 // use x
1210 //
1211 for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) {
1212 // DomTree children of MBB that have MBB as immediate dominator are added.
1213 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
1214 // Skip MBBs already added to the AllSuccs vector above.
1215 !MBB->isSuccessor(MBB: DTChild->getBlock()))
1216 AllSuccs.push_back(Elt: DTChild->getBlock());
1217 }
1218
1219 // Sort Successors according to their cycle depth or block frequency info.
1220 llvm::stable_sort(
1221 Range&: AllSuccs, C: [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
1222 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0;
1223 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0;
1224 bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
1225 return HasBlockFreq ? LHSFreq < RHSFreq
1226 : CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R);
1227 });
1228
1229 auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs));
1230
1231 return it.first->second;
1232}
1233
1234/// FindSuccToSinkTo - Find a successor to sink this instruction to.
1235MachineBasicBlock *
1236MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
1237 bool &BreakPHIEdge,
1238 AllSuccsCache &AllSuccessors) {
1239 assert (MBB && "Invalid MachineBasicBlock!");
1240
1241 // loop over all the operands of the specified instruction. If there is
1242 // anything we can't handle, bail out.
1243
1244 // SuccToSinkTo - This is the successor to sink this instruction to, once we
1245 // decide.
1246 MachineBasicBlock *SuccToSinkTo = nullptr;
1247 for (const MachineOperand &MO : MI.operands()) {
1248 if (!MO.isReg()) continue; // Ignore non-register operands.
1249
1250 Register Reg = MO.getReg();
1251 if (Reg == 0) continue;
1252
1253 if (Reg.isPhysical()) {
1254 if (MO.isUse()) {
1255 // If the physreg has no defs anywhere, it's just an ambient register
1256 // and we can freely move its uses. Alternatively, if it's allocatable,
1257 // it could get allocated to something with a def during allocation.
1258 if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO))
1259 return nullptr;
1260 } else if (!MO.isDead()) {
1261 // A def that isn't dead. We can't move it.
1262 return nullptr;
1263 }
1264 } else {
1265 // Virtual register uses are always safe to sink.
1266 if (MO.isUse()) continue;
1267
1268 // If it's not safe to move defs of the register class, then abort.
1269 if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg)))
1270 return nullptr;
1271
1272 // Virtual register defs can only be sunk if all their uses are in blocks
1273 // dominated by one of the successors.
1274 if (SuccToSinkTo) {
1275 // If a previous operand picked a block to sink to, then this operand
1276 // must be sinkable to the same block.
1277 bool LocalUse = false;
1278 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB,
1279 BreakPHIEdge, LocalUse))
1280 return nullptr;
1281
1282 continue;
1283 }
1284
1285 // Otherwise, we should look at all the successors and decide which one
1286 // we should sink to. If we have reliable block frequency information
1287 // (frequency != 0) available, give successors with smaller frequencies
1288 // higher priority, otherwise prioritize smaller cycle depths.
1289 for (MachineBasicBlock *SuccBlock :
1290 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
1291 bool LocalUse = false;
1292 if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB,
1293 BreakPHIEdge, LocalUse)) {
1294 SuccToSinkTo = SuccBlock;
1295 break;
1296 }
1297 if (LocalUse)
1298 // Def is used locally, it's never safe to move this def.
1299 return nullptr;
1300 }
1301
1302 // If we couldn't find a block to sink to, ignore this instruction.
1303 if (!SuccToSinkTo)
1304 return nullptr;
1305 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
1306 return nullptr;
1307 }
1308 }
1309
1310 // It is not possible to sink an instruction into its own block. This can
1311 // happen with cycles.
1312 if (MBB == SuccToSinkTo)
1313 return nullptr;
1314
1315 // It's not safe to sink instructions to EH landing pad. Control flow into
1316 // landing pad is implicitly defined.
1317 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
1318 return nullptr;
1319
1320 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
1321 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
1322 // the source block (which this code does not yet do). So for now, forbid
1323 // doing so.
1324 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
1325 return nullptr;
1326
1327 if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI))
1328 return nullptr;
1329
1330 return SuccToSinkTo;
1331}
1332
1333/// Return true if MI is likely to be usable as a memory operation by the
1334/// implicit null check optimization.
1335///
1336/// This is a "best effort" heuristic, and should not be relied upon for
1337/// correctness. This returning true does not guarantee that the implicit null
1338/// check optimization is legal over MI, and this returning false does not
1339/// guarantee MI cannot possibly be used to do a null check.
1340static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
1341 const TargetInstrInfo *TII,
1342 const TargetRegisterInfo *TRI) {
1343 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1344
1345 auto *MBB = MI.getParent();
1346 if (MBB->pred_size() != 1)
1347 return false;
1348
1349 auto *PredMBB = *MBB->pred_begin();
1350 auto *PredBB = PredMBB->getBasicBlock();
1351
1352 // Frontends that don't use implicit null checks have no reason to emit
1353 // branches with make.implicit metadata, and this function should always
1354 // return false for them.
1355 if (!PredBB ||
1356 !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit))
1357 return false;
1358
1359 const MachineOperand *BaseOp;
1360 int64_t Offset;
1361 bool OffsetIsScalable;
1362 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1363 return false;
1364
1365 if (!BaseOp->isReg())
1366 return false;
1367
1368 if (!(MI.mayLoad() && !MI.isPredicable()))
1369 return false;
1370
1371 MachineBranchPredicate MBP;
1372 if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false))
1373 return false;
1374
1375 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1376 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1377 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1378 MBP.LHS.getReg() == BaseOp->getReg();
1379}
1380
1381/// If the sunk instruction is a copy, try to forward the copy instead of
1382/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1383/// there's any subregister weirdness involved. Returns true if copy
1384/// propagation occurred.
1385static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1386 Register Reg) {
1387 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1388 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1389
1390 // Copy DBG_VALUE operand and set the original to undef. We then check to
1391 // see whether this is something that can be copy-forwarded. If it isn't,
1392 // continue around the loop.
1393
1394 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1395 auto CopyOperands = TII.isCopyInstr(MI: SinkInst);
1396 if (!CopyOperands)
1397 return false;
1398 SrcMO = CopyOperands->Source;
1399 DstMO = CopyOperands->Destination;
1400
1401 // Check validity of forwarding this copy.
1402 bool PostRA = MRI.getNumVirtRegs() == 0;
1403
1404 // Trying to forward between physical and virtual registers is too hard.
1405 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1406 return false;
1407
1408 // Only try virtual register copy-forwarding before regalloc, and physical
1409 // register copy-forwarding after regalloc.
1410 bool arePhysRegs = !Reg.isVirtual();
1411 if (arePhysRegs != PostRA)
1412 return false;
1413
1414 // Pre-regalloc, only forward if all subregisters agree (or there are no
1415 // subregs at all). More analysis might recover some forwardable copies.
1416 if (!PostRA)
1417 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1418 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1419 DbgMO.getSubReg() != DstMO->getSubReg())
1420 return false;
1421
1422 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1423 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1424 // matches the copy destination.
1425 if (PostRA && Reg != DstMO->getReg())
1426 return false;
1427
1428 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1429 DbgMO.setReg(SrcMO->getReg());
1430 DbgMO.setSubReg(SrcMO->getSubReg());
1431 }
1432 return true;
1433}
1434
1435using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
1436/// Sink an instruction and its associated debug instructions.
1437static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1438 MachineBasicBlock::iterator InsertPos,
1439 ArrayRef<MIRegs> DbgValuesToSink) {
1440 // If we cannot find a location to use (merge with), then we erase the debug
1441 // location to prevent debug-info driven tools from potentially reporting
1442 // wrong location information.
1443 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1444 MI.setDebugLoc(DILocation::getMergedLocation(LocA: MI.getDebugLoc(),
1445 LocB: InsertPos->getDebugLoc()));
1446 else
1447 MI.setDebugLoc(DebugLoc());
1448
1449 // Move the instruction.
1450 MachineBasicBlock *ParentBlock = MI.getParent();
1451 SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI,
1452 To: ++MachineBasicBlock::iterator(MI));
1453
1454 // Sink a copy of debug users to the insert position. Mark the original
1455 // DBG_VALUE location as 'undef', indicating that any earlier variable
1456 // location should be terminated as we've optimised away the value at this
1457 // point.
1458 for (const auto &DbgValueToSink : DbgValuesToSink) {
1459 MachineInstr *DbgMI = DbgValueToSink.first;
1460 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI);
1461 SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI);
1462
1463 bool PropagatedAllSunkOps = true;
1464 for (unsigned Reg : DbgValueToSink.second) {
1465 if (DbgMI->hasDebugOperandForReg(Reg)) {
1466 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) {
1467 PropagatedAllSunkOps = false;
1468 break;
1469 }
1470 }
1471 }
1472 if (!PropagatedAllSunkOps)
1473 DbgMI->setDebugValueUndef();
1474 }
1475}
1476
1477/// hasStoreBetween - check if there is store betweeen straight line blocks From
1478/// and To.
1479bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1480 MachineBasicBlock *To, MachineInstr &MI) {
1481 // Make sure From and To are in straight line which means From dominates To
1482 // and To post dominates From.
1483 if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From))
1484 return true;
1485
1486 auto BlockPair = std::make_pair(x&: From, y&: To);
1487
1488 // Does these two blocks pair be queried before and have a definite cached
1489 // result?
1490 if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end())
1491 return It->second;
1492
1493 if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end())
1494 return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) {
1495 return I->mayAlias(AA, Other: MI, UseTBAA: false);
1496 });
1497
1498 bool SawStore = false;
1499 bool HasAliasedStore = false;
1500 DenseSet<MachineBasicBlock *> HandledBlocks;
1501 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1502 // Go through all reachable blocks from From.
1503 for (MachineBasicBlock *BB : depth_first(G: From)) {
1504 // We insert the instruction at the start of block To, so no need to worry
1505 // about stores inside To.
1506 // Store in block From should be already considered when just enter function
1507 // SinkInstruction.
1508 if (BB == To || BB == From)
1509 continue;
1510
1511 // We already handle this BB in previous iteration.
1512 if (HandledBlocks.count(V: BB))
1513 continue;
1514
1515 HandledBlocks.insert(V: BB);
1516 // To post dominates BB, it must be a path from block From.
1517 if (PDT->dominates(A: To, B: BB)) {
1518 if (!HandledDomBlocks.count(V: BB))
1519 HandledDomBlocks.insert(V: BB);
1520
1521 // If this BB is too big or the block number in straight line between From
1522 // and To is too big, stop searching to save compiling time.
1523 if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) ||
1524 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1525 for (auto *DomBB : HandledDomBlocks) {
1526 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1527 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1528 else if(DomBB != BB && DT->dominates(A: BB, B: DomBB))
1529 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1530 }
1531 HasStoreCache[BlockPair] = true;
1532 return true;
1533 }
1534
1535 for (MachineInstr &I : *BB) {
1536 // Treat as alias conservatively for a call or an ordered memory
1537 // operation.
1538 if (I.isCall() || I.hasOrderedMemoryRef()) {
1539 for (auto *DomBB : HandledDomBlocks) {
1540 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1541 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1542 else if(DomBB != BB && DT->dominates(A: BB, B: DomBB))
1543 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1544 }
1545 HasStoreCache[BlockPair] = true;
1546 return true;
1547 }
1548
1549 if (I.mayStore()) {
1550 SawStore = true;
1551 // We still have chance to sink MI if all stores between are not
1552 // aliased to MI.
1553 // Cache all store instructions, so that we don't need to go through
1554 // all From reachable blocks for next load instruction.
1555 if (I.mayAlias(AA, Other: MI, UseTBAA: false))
1556 HasAliasedStore = true;
1557 StoreInstrCache[BlockPair].push_back(Elt: &I);
1558 }
1559 }
1560 }
1561 }
1562 // If there is no store at all, cache the result.
1563 if (!SawStore)
1564 HasStoreCache[BlockPair] = false;
1565 return HasAliasedStore;
1566}
1567
1568/// Sink instructions into cycles if profitable. This especially tries to
1569/// prevent register spills caused by register pressure if there is little to no
1570/// overhead moving instructions into cycles.
1571bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1572 LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1573 MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
1574 assert(Preheader && "Cycle sink needs a preheader block");
1575 MachineBasicBlock *SinkBlock = nullptr;
1576 bool CanSink = true;
1577 const MachineOperand &MO = I.getOperand(i: 0);
1578
1579 for (MachineInstr &MI : MRI->use_instructions(Reg: MO.getReg())) {
1580 LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
1581 if (!Cycle->contains(Block: MI.getParent())) {
1582 LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
1583 CanSink = false;
1584 break;
1585 }
1586
1587 // FIXME: Come up with a proper cost model that estimates whether sinking
1588 // the instruction (and thus possibly executing it on every cycle
1589 // iteration) is more expensive than a register.
1590 // For now assumes that copies are cheap and thus almost always worth it.
1591 if (!MI.isCopy()) {
1592 LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
1593 CanSink = false;
1594 break;
1595 }
1596 if (!SinkBlock) {
1597 SinkBlock = MI.getParent();
1598 LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
1599 << printMBBReference(*SinkBlock) << "\n");
1600 continue;
1601 }
1602 SinkBlock = DT->findNearestCommonDominator(A: SinkBlock, B: MI.getParent());
1603 if (!SinkBlock) {
1604 LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
1605 CanSink = false;
1606 break;
1607 }
1608 LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
1609 printMBBReference(*SinkBlock) << "\n");
1610 }
1611
1612 if (!CanSink) {
1613 LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1614 return false;
1615 }
1616 if (!SinkBlock) {
1617 LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1618 return false;
1619 }
1620 if (SinkBlock == Preheader) {
1621 LLVM_DEBUG(
1622 dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1623 return false;
1624 }
1625 if (SinkBlock->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold)) {
1626 LLVM_DEBUG(
1627 dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1628 return false;
1629 }
1630
1631 LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1632 SinkBlock->splice(Where: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()), Other: Preheader,
1633 From: I);
1634
1635 // Conservatively clear any kill flags on uses of sunk instruction
1636 for (MachineOperand &MO : I.operands()) {
1637 if (MO.isReg() && MO.readsReg())
1638 RegsToClearKillFlags.insert(V: MO.getReg());
1639 }
1640
1641 // The instruction is moved from its basic block, so do not retain the
1642 // debug information.
1643 assert(!I.isDebugInstr() && "Should not sink debug inst");
1644 I.setDebugLoc(DebugLoc());
1645 return true;
1646}
1647
1648/// SinkInstruction - Determine whether it is safe to sink the specified machine
1649/// instruction out of its current block into a successor.
1650bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1651 AllSuccsCache &AllSuccessors) {
1652 // Don't sink instructions that the target prefers not to sink.
1653 if (!TII->shouldSink(MI))
1654 return false;
1655
1656 // Check if it's safe to move the instruction.
1657 if (!MI.isSafeToMove(AA, SawStore))
1658 return false;
1659
1660 // Convergent operations may not be made control-dependent on additional
1661 // values.
1662 if (MI.isConvergent())
1663 return false;
1664
1665 // Don't break implicit null checks. This is a performance heuristic, and not
1666 // required for correctness.
1667 if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
1668 return false;
1669
1670 // FIXME: This should include support for sinking instructions within the
1671 // block they are currently in to shorten the live ranges. We often get
1672 // instructions sunk into the top of a large block, but it would be better to
1673 // also sink them down before their first use in the block. This xform has to
1674 // be careful not to *increase* register pressure though, e.g. sinking
1675 // "x = y + z" down if it kills y and z would increase the live ranges of y
1676 // and z and only shrink the live range of x.
1677
1678 bool BreakPHIEdge = false;
1679 MachineBasicBlock *ParentBlock = MI.getParent();
1680 MachineBasicBlock *SuccToSinkTo =
1681 FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors);
1682
1683 // If there are no outputs, it must have side-effects.
1684 if (!SuccToSinkTo)
1685 return false;
1686
1687 // If the instruction to move defines a dead physical register which is live
1688 // when leaving the basic block, don't move it because it could turn into a
1689 // "zombie" define of that preg. E.g., EFLAGS.
1690 for (const MachineOperand &MO : MI.all_defs()) {
1691 Register Reg = MO.getReg();
1692 if (Reg == 0 || !Reg.isPhysical())
1693 continue;
1694 if (SuccToSinkTo->isLiveIn(Reg))
1695 return false;
1696 }
1697
1698 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1699
1700 // If the block has multiple predecessors, this is a critical edge.
1701 // Decide if we can sink along it or need to break the edge.
1702 if (SuccToSinkTo->pred_size() > 1) {
1703 // We cannot sink a load across a critical edge - there may be stores in
1704 // other code paths.
1705 bool TryBreak = false;
1706 bool Store =
1707 MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true;
1708 if (!MI.isSafeToMove(AA, SawStore&: Store)) {
1709 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1710 TryBreak = true;
1711 }
1712
1713 // We don't want to sink across a critical edge if we don't dominate the
1714 // successor. We could be introducing calculations to new code paths.
1715 if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) {
1716 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1717 TryBreak = true;
1718 }
1719
1720 // Don't sink instructions into a cycle.
1721 if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) &&
1722 (!CI->getCycle(Block: SuccToSinkTo)->isReducible() ||
1723 CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1724 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1725 TryBreak = true;
1726 }
1727
1728 // Otherwise we are OK with sinking along a critical edge.
1729 if (!TryBreak)
1730 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1731 else {
1732 // Mark this edge as to be split.
1733 // If the edge can actually be split, the next iteration of the main loop
1734 // will sink MI in the newly created block.
1735 bool Status =
1736 PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge);
1737 if (!Status)
1738 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1739 "break critical edge\n");
1740 // The instruction will not be sunk this time.
1741 return false;
1742 }
1743 }
1744
1745 if (BreakPHIEdge) {
1746 // BreakPHIEdge is true if all the uses are in the successor MBB being
1747 // sunken into and they are all PHI nodes. In this case, machine-sink must
1748 // break the critical edge first.
1749 bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock,
1750 ToBB: SuccToSinkTo, BreakPHIEdge);
1751 if (!Status)
1752 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1753 "break critical edge\n");
1754 // The instruction will not be sunk this time.
1755 return false;
1756 }
1757
1758 // Determine where to insert into. Skip phi nodes.
1759 MachineBasicBlock::iterator InsertPos =
1760 SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin());
1761 if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) {
1762 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1763 return false;
1764 }
1765
1766 // Collect debug users of any vreg that this inst defines.
1767 SmallVector<MIRegs, 4> DbgUsersToSink;
1768 for (auto &MO : MI.all_defs()) {
1769 if (!MO.getReg().isVirtual())
1770 continue;
1771 if (!SeenDbgUsers.count(Val: MO.getReg()))
1772 continue;
1773
1774 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1775 auto &Users = SeenDbgUsers[MO.getReg()];
1776 for (auto &User : Users) {
1777 MachineInstr *DbgMI = User.getPointer();
1778 if (User.getInt()) {
1779 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1780 // it, it can't be recovered. Set it undef.
1781 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg()))
1782 DbgMI->setDebugValueUndef();
1783 } else {
1784 DbgUsersToSink.push_back(
1785 Elt: {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
1786 }
1787 }
1788 }
1789
1790 // After sinking, some debug users may not be dominated any more. If possible,
1791 // copy-propagate their operands. As it's expensive, don't do this if there's
1792 // no debuginfo in the program.
1793 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1794 SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo);
1795
1796 performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink);
1797
1798 // Conservatively, clear any kill flags, since it's possible that they are no
1799 // longer correct.
1800 // Note that we have to clear the kill flags for any register this instruction
1801 // uses as we may sink over another instruction which currently kills the
1802 // used registers.
1803 for (MachineOperand &MO : MI.all_uses())
1804 RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags.
1805
1806 return true;
1807}
1808
1809void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1810 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1811 assert(MI.isCopy());
1812 assert(MI.getOperand(1).isReg());
1813
1814 // Enumerate all users of vreg operands that are def'd. Skip those that will
1815 // be sunk. For the rest, if they are not dominated by the block we will sink
1816 // MI into, propagate the copy source to them.
1817 SmallVector<MachineInstr *, 4> DbgDefUsers;
1818 SmallVector<Register, 4> DbgUseRegs;
1819 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1820 for (auto &MO : MI.all_defs()) {
1821 if (!MO.getReg().isVirtual())
1822 continue;
1823 DbgUseRegs.push_back(Elt: MO.getReg());
1824 for (auto &User : MRI.use_instructions(Reg: MO.getReg())) {
1825 if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent()))
1826 continue;
1827
1828 // If is in same block, will either sink or be use-before-def.
1829 if (User.getParent() == MI.getParent())
1830 continue;
1831
1832 assert(User.hasDebugOperandForReg(MO.getReg()) &&
1833 "DBG_VALUE user of vreg, but has no operand for it?");
1834 DbgDefUsers.push_back(Elt: &User);
1835 }
1836 }
1837
1838 // Point the users of this copy that are no longer dominated, at the source
1839 // of the copy.
1840 for (auto *User : DbgDefUsers) {
1841 for (auto &Reg : DbgUseRegs) {
1842 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
1843 DbgOp.setReg(MI.getOperand(i: 1).getReg());
1844 DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg());
1845 }
1846 }
1847 }
1848}
1849
1850//===----------------------------------------------------------------------===//
1851// This pass is not intended to be a replacement or a complete alternative
1852// for the pre-ra machine sink pass. It is only designed to sink COPY
1853// instructions which should be handled after RA.
1854//
1855// This pass sinks COPY instructions into a successor block, if the COPY is not
1856// used in the current block and the COPY is live-in to a single successor
1857// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
1858// copy on paths where their results aren't needed. This also exposes
1859// additional opportunites for dead copy elimination and shrink wrapping.
1860//
1861// These copies were either not handled by or are inserted after the MachineSink
1862// pass. As an example of the former case, the MachineSink pass cannot sink
1863// COPY instructions with allocatable source registers; for AArch64 these type
1864// of copy instructions are frequently used to move function parameters (PhyReg)
1865// into virtual registers in the entry block.
1866//
1867// For the machine IR below, this pass will sink %w19 in the entry into its
1868// successor (%bb.1) because %w19 is only live-in in %bb.1.
1869// %bb.0:
1870// %wzr = SUBSWri %w1, 1
1871// %w19 = COPY %w0
1872// Bcc 11, %bb.2
1873// %bb.1:
1874// Live Ins: %w19
1875// BL @fun
1876// %w0 = ADDWrr %w0, %w19
1877// RET %w0
1878// %bb.2:
1879// %w0 = COPY %wzr
1880// RET %w0
1881// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
1882// able to see %bb.0 as a candidate.
1883//===----------------------------------------------------------------------===//
1884namespace {
1885
1886class PostRAMachineSinking : public MachineFunctionPass {
1887public:
1888 bool runOnMachineFunction(MachineFunction &MF) override;
1889
1890 static char ID;
1891 PostRAMachineSinking() : MachineFunctionPass(ID) {}
1892 StringRef getPassName() const override { return "PostRA Machine Sink"; }
1893
1894 void getAnalysisUsage(AnalysisUsage &AU) const override {
1895 AU.setPreservesCFG();
1896 MachineFunctionPass::getAnalysisUsage(AU);
1897 }
1898
1899 MachineFunctionProperties getRequiredProperties() const override {
1900 return MachineFunctionProperties().set(
1901 MachineFunctionProperties::Property::NoVRegs);
1902 }
1903
1904private:
1905 /// Track which register units have been modified and used.
1906 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
1907
1908 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
1909 /// entry in this map for each unit it touches. The DBG_VALUE's entry
1910 /// consists of a pointer to the instruction itself, and a vector of registers
1911 /// referred to by the instruction that overlap the key register unit.
1912 DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs;
1913
1914 /// Sink Copy instructions unused in the same block close to their uses in
1915 /// successors.
1916 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
1917 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
1918};
1919} // namespace
1920
1921char PostRAMachineSinking::ID = 0;
1922char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
1923
1924INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
1925 "PostRA Machine Sink", false, false)
1926
1927static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
1928 const TargetRegisterInfo *TRI) {
1929 LiveRegUnits LiveInRegUnits(*TRI);
1930 LiveInRegUnits.addLiveIns(MBB);
1931 return !LiveInRegUnits.available(Reg);
1932}
1933
1934static MachineBasicBlock *
1935getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1936 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1937 unsigned Reg, const TargetRegisterInfo *TRI) {
1938 // Try to find a single sinkable successor in which Reg is live-in.
1939 MachineBasicBlock *BB = nullptr;
1940 for (auto *SI : SinkableBBs) {
1941 if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) {
1942 // If BB is set here, Reg is live-in to at least two sinkable successors,
1943 // so quit.
1944 if (BB)
1945 return nullptr;
1946 BB = SI;
1947 }
1948 }
1949 // Reg is not live-in to any sinkable successors.
1950 if (!BB)
1951 return nullptr;
1952
1953 // Check if any register aliased with Reg is live-in in other successors.
1954 for (auto *SI : CurBB.successors()) {
1955 if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI))
1956 return nullptr;
1957 }
1958 return BB;
1959}
1960
1961static MachineBasicBlock *
1962getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1963 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1964 ArrayRef<unsigned> DefedRegsInCopy,
1965 const TargetRegisterInfo *TRI) {
1966 MachineBasicBlock *SingleBB = nullptr;
1967 for (auto DefReg : DefedRegsInCopy) {
1968 MachineBasicBlock *BB =
1969 getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI);
1970 if (!BB || (SingleBB && SingleBB != BB))
1971 return nullptr;
1972 SingleBB = BB;
1973 }
1974 return SingleBB;
1975}
1976
1977static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
1978 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1979 LiveRegUnits &UsedRegUnits,
1980 const TargetRegisterInfo *TRI) {
1981 for (auto U : UsedOpsInCopy) {
1982 MachineOperand &MO = MI->getOperand(i: U);
1983 Register SrcReg = MO.getReg();
1984 if (!UsedRegUnits.available(Reg: SrcReg)) {
1985 MachineBasicBlock::iterator NI = std::next(x: MI->getIterator());
1986 for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) {
1987 if (UI.killsRegister(Reg: SrcReg, TRI)) {
1988 UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI);
1989 MO.setIsKill(true);
1990 break;
1991 }
1992 }
1993 }
1994 }
1995}
1996
1997static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
1998 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1999 SmallVectorImpl<unsigned> &DefedRegsInCopy) {
2000 MachineFunction &MF = *SuccBB->getParent();
2001 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2002 for (unsigned DefReg : DefedRegsInCopy)
2003 for (MCPhysReg S : TRI->subregs_inclusive(Reg: DefReg))
2004 SuccBB->removeLiveIn(Reg: S);
2005 for (auto U : UsedOpsInCopy)
2006 SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg());
2007 SuccBB->sortUniqueLiveIns();
2008}
2009
2010static bool hasRegisterDependency(MachineInstr *MI,
2011 SmallVectorImpl<unsigned> &UsedOpsInCopy,
2012 SmallVectorImpl<unsigned> &DefedRegsInCopy,
2013 LiveRegUnits &ModifiedRegUnits,
2014 LiveRegUnits &UsedRegUnits) {
2015 bool HasRegDependency = false;
2016 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2017 MachineOperand &MO = MI->getOperand(i);
2018 if (!MO.isReg())
2019 continue;
2020 Register Reg = MO.getReg();
2021 if (!Reg)
2022 continue;
2023 if (MO.isDef()) {
2024 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
2025 HasRegDependency = true;
2026 break;
2027 }
2028 DefedRegsInCopy.push_back(Elt: Reg);
2029
2030 // FIXME: instead of isUse(), readsReg() would be a better fix here,
2031 // For example, we can ignore modifications in reg with undef. However,
2032 // it's not perfectly clear if skipping the internal read is safe in all
2033 // other targets.
2034 } else if (MO.isUse()) {
2035 if (!ModifiedRegUnits.available(Reg)) {
2036 HasRegDependency = true;
2037 break;
2038 }
2039 UsedOpsInCopy.push_back(Elt: i);
2040 }
2041 }
2042 return HasRegDependency;
2043}
2044
2045bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
2046 MachineFunction &MF,
2047 const TargetRegisterInfo *TRI,
2048 const TargetInstrInfo *TII) {
2049 SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
2050 // FIXME: For now, we sink only to a successor which has a single predecessor
2051 // so that we can directly sink COPY instructions to the successor without
2052 // adding any new block or branch instruction.
2053 for (MachineBasicBlock *SI : CurBB.successors())
2054 if (!SI->livein_empty() && SI->pred_size() == 1)
2055 SinkableBBs.insert(Ptr: SI);
2056
2057 if (SinkableBBs.empty())
2058 return false;
2059
2060 bool Changed = false;
2061
2062 // Track which registers have been modified and used between the end of the
2063 // block and the current instruction.
2064 ModifiedRegUnits.clear();
2065 UsedRegUnits.clear();
2066 SeenDbgInstrs.clear();
2067
2068 for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) {
2069 // Track the operand index for use in Copy.
2070 SmallVector<unsigned, 2> UsedOpsInCopy;
2071 // Track the register number defed in Copy.
2072 SmallVector<unsigned, 2> DefedRegsInCopy;
2073
2074 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
2075 // for DBG_VALUEs later, record them when they're encountered.
2076 if (MI.isDebugValue() && !MI.isDebugRef()) {
2077 SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
2078 bool IsValid = true;
2079 for (MachineOperand &MO : MI.debug_operands()) {
2080 if (MO.isReg() && MO.getReg().isPhysical()) {
2081 // Bail if we can already tell the sink would be rejected, rather
2082 // than needlessly accumulating lots of DBG_VALUEs.
2083 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2084 ModifiedRegUnits, UsedRegUnits)) {
2085 IsValid = false;
2086 break;
2087 }
2088
2089 // Record debug use of each reg unit.
2090 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg()))
2091 MIUnits[Unit].push_back(Elt: MO.getReg());
2092 }
2093 }
2094 if (IsValid) {
2095 for (auto &RegOps : MIUnits)
2096 SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI,
2097 Args: std::move(RegOps.second));
2098 }
2099 continue;
2100 }
2101
2102 if (MI.isDebugOrPseudoInstr())
2103 continue;
2104
2105 // Do not move any instruction across function call.
2106 if (MI.isCall())
2107 return false;
2108
2109 if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) {
2110 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2111 TRI);
2112 continue;
2113 }
2114
2115 // Don't sink the COPY if it would violate a register dependency.
2116 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2117 ModifiedRegUnits, UsedRegUnits)) {
2118 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2119 TRI);
2120 continue;
2121 }
2122 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
2123 "Unexpect SrcReg or DefReg");
2124 MachineBasicBlock *SuccBB =
2125 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
2126 // Don't sink if we cannot find a single sinkable successor in which Reg
2127 // is live-in.
2128 if (!SuccBB) {
2129 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2130 TRI);
2131 continue;
2132 }
2133 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
2134 "Unexpected predecessor");
2135
2136 // Collect DBG_VALUEs that must sink with this copy. We've previously
2137 // recorded which reg units that DBG_VALUEs read, if this instruction
2138 // writes any of those units then the corresponding DBG_VALUEs must sink.
2139 MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
2140 for (auto &MO : MI.all_defs()) {
2141 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) {
2142 for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) {
2143 auto &Regs = DbgValsToSinkMap[MIRegs.first];
2144 for (unsigned Reg : MIRegs.second)
2145 Regs.push_back(Elt: Reg);
2146 }
2147 }
2148 }
2149 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
2150
2151 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
2152
2153 MachineBasicBlock::iterator InsertPos =
2154 SuccBB->SkipPHIsAndLabels(I: SuccBB->begin());
2155 if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) {
2156 LLVM_DEBUG(
2157 dbgs() << " *** Not sinking: prologue interference\n");
2158 continue;
2159 }
2160
2161 // Clear the kill flag if SrcReg is killed between MI and the end of the
2162 // block.
2163 clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
2164 performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink);
2165 updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
2166
2167 Changed = true;
2168 ++NumPostRACopySink;
2169 }
2170 return Changed;
2171}
2172
2173bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
2174 if (skipFunction(F: MF.getFunction()))
2175 return false;
2176
2177 bool Changed = false;
2178 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2179 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2180
2181 ModifiedRegUnits.init(TRI: *TRI);
2182 UsedRegUnits.init(TRI: *TRI);
2183 for (auto &BB : MF)
2184 Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII);
2185
2186 return Changed;
2187}
2188