1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/MachineSink.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PointerIntPair.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/CFG.h"
29#include "llvm/Analysis/ProfileSummaryInfo.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/LiveVariables.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
35#include "llvm/CodeGen/MachineCycleAnalysis.h"
36#include "llvm/CodeGen/MachineDomTreeUpdater.h"
37#include "llvm/CodeGen/MachineDominators.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineLoopInfo.h"
42#include "llvm/CodeGen/MachineOperand.h"
43#include "llvm/CodeGen/MachinePostDominators.h"
44#include "llvm/CodeGen/MachineRegisterInfo.h"
45#include "llvm/CodeGen/MachineSizeOpts.h"
46#include "llvm/CodeGen/RegisterClassInfo.h"
47#include "llvm/CodeGen/RegisterPressure.h"
48#include "llvm/CodeGen/SlotIndexes.h"
49#include "llvm/CodeGen/TargetInstrInfo.h"
50#include "llvm/CodeGen/TargetPassConfig.h"
51#include "llvm/CodeGen/TargetRegisterInfo.h"
52#include "llvm/CodeGen/TargetSchedule.h"
53#include "llvm/CodeGen/TargetSubtargetInfo.h"
54#include "llvm/IR/BasicBlock.h"
55#include "llvm/IR/DebugInfoMetadata.h"
56#include "llvm/IR/LLVMContext.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Pass.h"
59#include "llvm/Support/BranchProbability.h"
60#include "llvm/Support/CommandLine.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/raw_ostream.h"
63#include <cassert>
64#include <cstdint>
65#include <utility>
66#include <vector>
67
68using namespace llvm;
69
70#define DEBUG_TYPE "machine-sink"
71
72static cl::opt<bool>
73 SplitEdges("machine-sink-split",
74 cl::desc("Split critical edges during machine sinking"),
75 cl::init(Val: true), cl::Hidden);
76
77static cl::opt<bool> UseBlockFreqInfo(
78 "machine-sink-bfi",
79 cl::desc("Use block frequency info to find successors to sink"),
80 cl::init(Val: true), cl::Hidden);
81
82static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
83 "machine-sink-split-probability-threshold",
84 cl::desc(
85 "Percentage threshold for splitting single-instruction critical edge. "
86 "If the branch threshold is higher than this threshold, we allow "
87 "speculative execution of up to 1 instruction to avoid branching to "
88 "splitted critical edge"),
89 cl::init(Val: 40), cl::Hidden);
90
91static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
92 "machine-sink-load-instrs-threshold",
93 cl::desc("Do not try to find alias store for a load if there is a in-path "
94 "block whose instruction number is higher than this threshold."),
95 cl::init(Val: 2000), cl::Hidden);
96
97static cl::opt<unsigned> SinkLoadBlocksThreshold(
98 "machine-sink-load-blocks-threshold",
99 cl::desc("Do not try to find alias store for a load if the block number in "
100 "the straight line is higher than this threshold."),
101 cl::init(Val: 20), cl::Hidden);
102
103static cl::opt<bool>
104 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
105 cl::desc("Sink instructions into cycles to avoid "
106 "register spills"),
107 cl::init(Val: false), cl::Hidden);
108
109static cl::opt<unsigned> SinkIntoCycleLimit(
110 "machine-sink-cycle-limit",
111 cl::desc(
112 "The maximum number of instructions considered for cycle sinking."),
113 cl::init(Val: 50), cl::Hidden);
114
115STATISTIC(NumSunk, "Number of machine instructions sunk");
116STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
117STATISTIC(NumSplit, "Number of critical edges split");
118STATISTIC(NumCoalesces, "Number of copies coalesced");
119STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
120
121using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
122
123namespace {
124
125class MachineSinking {
126 const TargetSubtargetInfo *STI = nullptr;
127 const TargetInstrInfo *TII = nullptr;
128 const TargetRegisterInfo *TRI = nullptr;
129 MachineRegisterInfo *MRI = nullptr; // Machine register information
130 MachineDominatorTree *DT = nullptr; // Machine dominator tree
131 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
132 MachineCycleInfo *CI = nullptr;
133 ProfileSummaryInfo *PSI = nullptr;
134 MachineBlockFrequencyInfo *MBFI = nullptr;
135 const MachineBranchProbabilityInfo *MBPI = nullptr;
136 AliasAnalysis *AA = nullptr;
137 RegisterClassInfo RegClassInfo;
138 TargetSchedModel SchedModel;
139 // Required for split critical edge
140 LiveIntervals *LIS;
141 SlotIndexes *SI;
142 LiveVariables *LV;
143 MachineLoopInfo *MLI;
144
145 // Remember which edges have been considered for breaking.
146 SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
147 CEBCandidates;
148 // Memorize the register that also wanted to sink into the same block along
149 // a different critical edge.
150 // {register to sink, sink-to block} -> the first sink-from block.
151 // We're recording the first sink-from block because that (critical) edge
152 // was deferred until we see another register that's going to sink into the
153 // same block.
154 DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
155 CEMergeCandidates;
156 // Remember which edges we are about to split.
157 // This is different from CEBCandidates since those edges
158 // will be split.
159 SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
160
161 DenseSet<Register> RegsToClearKillFlags;
162
163 using AllSuccsCache =
164 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
165
166 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
167 /// post-dominated by another DBG_VALUE of the same variable location.
168 /// This is necessary to detect sequences such as:
169 /// %0 = someinst
170 /// DBG_VALUE %0, !123, !DIExpression()
171 /// %1 = anotherinst
172 /// DBG_VALUE %1, !123, !DIExpression()
173 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
174 /// would re-order assignments.
175 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
176
177 using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
178
179 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
180 /// debug instructions to sink.
181 SmallDenseMap<Register, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
182
183 /// Record of debug variables that have had their locations set in the
184 /// current block.
185 DenseSet<DebugVariable> SeenDbgVars;
186
187 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
188 HasStoreCache;
189
190 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
191 SmallVector<MachineInstr *>>
192 StoreInstrCache;
193
194 /// Cached BB's register pressure.
195 DenseMap<const MachineBasicBlock *, std::vector<unsigned>>
196 CachedRegisterPressure;
197
198 bool EnableSinkAndFold;
199
200public:
201 MachineSinking(bool EnableSinkAndFold, MachineDominatorTree *DT,
202 MachinePostDominatorTree *PDT, LiveVariables *LV,
203 MachineLoopInfo *MLI, SlotIndexes *SI, LiveIntervals *LIS,
204 MachineCycleInfo *CI, ProfileSummaryInfo *PSI,
205 MachineBlockFrequencyInfo *MBFI,
206 const MachineBranchProbabilityInfo *MBPI, AliasAnalysis *AA)
207 : DT(DT), PDT(PDT), CI(CI), PSI(PSI), MBFI(MBFI), MBPI(MBPI), AA(AA),
208 LIS(LIS), SI(SI), LV(LV), MLI(MLI),
209 EnableSinkAndFold(EnableSinkAndFold) {}
210
211 bool run(MachineFunction &MF);
212
213 void releaseMemory() {
214 CEBCandidates.clear();
215 CEMergeCandidates.clear();
216 }
217
218private:
219 bool ProcessBlock(MachineBasicBlock &MBB);
220 void ProcessDbgInst(MachineInstr &MI);
221 bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
222 MachineBasicBlock *To, bool BreakPHIEdge);
223 bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
224 MachineBasicBlock *To,
225 MachineBasicBlock *&DeferredFromBlock);
226
227 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
228 MachineInstr &MI);
229
230 /// Postpone the splitting of the given critical
231 /// edge (\p From, \p To).
232 ///
233 /// We do not split the edges on the fly. Indeed, this invalidates
234 /// the dominance information and thus triggers a lot of updates
235 /// of that information underneath.
236 /// Instead, we postpone all the splits after each iteration of
237 /// the main loop. That way, the information is at least valid
238 /// for the lifetime of an iteration.
239 ///
240 /// \return True if the edge is marked as toSplit, false otherwise.
241 /// False can be returned if, for instance, this is not profitable.
242 bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
243 MachineBasicBlock *To, bool BreakPHIEdge);
244 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
245 AllSuccsCache &AllSuccessors);
246
247 /// If we sink a COPY inst, some debug users of it's destination may no
248 /// longer be dominated by the COPY, and will eventually be dropped.
249 /// This is easily rectified by forwarding the non-dominated debug uses
250 /// to the copy source.
251 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
252 MachineBasicBlock *TargetBlock);
253 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
254 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
255 bool &LocalUse) const;
256 MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
257 bool &BreakPHIEdge,
258 AllSuccsCache &AllSuccessors);
259
260 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
261 SmallVectorImpl<MachineInstr *> &Candidates);
262
263 bool
264 aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
265 DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
266
267 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
268 MachineBasicBlock *MBB,
269 MachineBasicBlock *SuccToSinkTo,
270 AllSuccsCache &AllSuccessors);
271
272 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
273 MachineBasicBlock *MBB);
274
275 bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
276
277 SmallVector<MachineBasicBlock *, 4> &
278 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
279 AllSuccsCache &AllSuccessors) const;
280
281 std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
282 bool UseCache = true);
283
284 bool registerPressureSetExceedsLimit(unsigned NRegs,
285 const TargetRegisterClass *RC,
286 const MachineBasicBlock &MBB);
287
288 bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
289};
290
291class MachineSinkingLegacy : public MachineFunctionPass {
292public:
293 static char ID;
294
295 MachineSinkingLegacy() : MachineFunctionPass(ID) {
296 initializeMachineSinkingLegacyPass(*PassRegistry::getPassRegistry());
297 }
298
299 bool runOnMachineFunction(MachineFunction &MF) override;
300
301 void getAnalysisUsage(AnalysisUsage &AU) const override {
302 MachineFunctionPass::getAnalysisUsage(AU);
303 AU.addRequired<AAResultsWrapperPass>();
304 AU.addRequired<MachineDominatorTreeWrapperPass>();
305 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
306 AU.addRequired<MachineCycleInfoWrapperPass>();
307 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
308 AU.addPreserved<MachineCycleInfoWrapperPass>();
309 AU.addPreserved<MachineLoopInfoWrapperPass>();
310 AU.addRequired<ProfileSummaryInfoWrapperPass>();
311 if (UseBlockFreqInfo)
312 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
313 AU.addRequired<TargetPassConfig>();
314 }
315};
316
317} // end anonymous namespace
318
319char MachineSinkingLegacy::ID = 0;
320
321char &llvm::MachineSinkingLegacyID = MachineSinkingLegacy::ID;
322
323INITIALIZE_PASS_BEGIN(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
324 false, false)
325INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
326INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
327INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
328INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
329INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
330INITIALIZE_PASS_END(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
331 false, false)
332
333/// Return true if a target defined block prologue instruction interferes
334/// with a sink candidate.
335static bool blockPrologueInterferes(const MachineBasicBlock *BB,
336 MachineBasicBlock::const_iterator End,
337 const MachineInstr &MI,
338 const TargetRegisterInfo *TRI,
339 const TargetInstrInfo *TII,
340 const MachineRegisterInfo *MRI) {
341 for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
342 ++PI) {
343 // Only check target defined prologue instructions
344 if (!TII->isBasicBlockPrologue(MI: *PI))
345 continue;
346 for (auto &MO : MI.operands()) {
347 if (!MO.isReg())
348 continue;
349 Register Reg = MO.getReg();
350 if (!Reg)
351 continue;
352 if (MO.isUse()) {
353 if (Reg.isPhysical() &&
354 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg))))
355 continue;
356 if (PI->modifiesRegister(Reg, TRI))
357 return true;
358 } else {
359 if (PI->readsRegister(Reg, TRI))
360 return true;
361 // Check for interference with non-dead defs
362 auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true);
363 if (DefOp && !DefOp->isDead())
364 return true;
365 }
366 }
367 }
368
369 return false;
370}
371
372bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
373 MachineBasicBlock *MBB) {
374 if (!MI.isCopy())
375 return false;
376
377 Register SrcReg = MI.getOperand(i: 1).getReg();
378 Register DstReg = MI.getOperand(i: 0).getReg();
379 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
380 !MRI->hasOneNonDBGUse(RegNo: SrcReg))
381 return false;
382
383 const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg);
384 const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg);
385 if (SRC != DRC)
386 return false;
387
388 MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg);
389 if (DefMI->isCopyLike())
390 return false;
391 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
392 LLVM_DEBUG(dbgs() << "*** to: " << MI);
393 MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg);
394 MI.eraseFromParent();
395
396 // Conservatively, clear any kill flags, since it's possible that they are no
397 // longer correct.
398 MRI->clearKillFlags(Reg: SrcReg);
399
400 ++NumCoalesces;
401 return true;
402}
403
404bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
405 MachineBasicBlock *MBB) {
406 if (MI.isCopy() || MI.mayLoadOrStore() ||
407 MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
408 return false;
409
410 // Don't sink instructions that the target prefers not to sink.
411 if (!TII->shouldSink(MI))
412 return false;
413
414 // Check if it's safe to move the instruction.
415 bool SawStore = true;
416 if (!MI.isSafeToMove(SawStore))
417 return false;
418
419 // Convergent operations may not be made control-dependent on additional
420 // values.
421 if (MI.isConvergent())
422 return false;
423
424 // Don't sink defs/uses of hard registers or if the instruction defines more
425 // than one register.
426 // Don't sink more than two register uses - it'll cover most of the cases and
427 // greatly simplifies the register pressure checks.
428 Register DefReg;
429 Register UsedRegA, UsedRegB;
430 for (const MachineOperand &MO : MI.operands()) {
431 if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
432 MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
433 MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
434 continue;
435 if (!MO.isReg())
436 return false;
437
438 Register Reg = MO.getReg();
439 if (Reg == 0)
440 continue;
441
442 if (Reg.isVirtual()) {
443 if (MO.isDef()) {
444 if (DefReg)
445 return false;
446 DefReg = Reg;
447 continue;
448 }
449
450 if (UsedRegA == 0)
451 UsedRegA = Reg;
452 else if (UsedRegB == 0)
453 UsedRegB = Reg;
454 else
455 return false;
456 continue;
457 }
458
459 if (Reg.isPhysical() && MO.isUse() &&
460 (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO)))
461 continue;
462
463 return false;
464 }
465
466 // Scan uses of the destination register. Every use, except the last, must be
467 // a copy, with a chain of copies terminating with either a copy into a hard
468 // register, or a load/store instruction where the use is part of the
469 // address (*not* the stored value).
470 using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
471 SmallVector<SinkInfo> SinkInto;
472 SmallVector<Register> Worklist;
473
474 const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg);
475 const TargetRegisterClass *RCA =
476 UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA);
477 const TargetRegisterClass *RCB =
478 UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB);
479
480 Worklist.push_back(Elt: DefReg);
481 while (!Worklist.empty()) {
482 Register Reg = Worklist.pop_back_val();
483
484 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
485 ExtAddrMode MaybeAM;
486 MachineInstr &UseInst = *MO.getParent();
487 if (UseInst.isCopy()) {
488 Register DstReg;
489 if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg())
490 DstReg = O.getReg();
491 if (DstReg == 0)
492 return false;
493 if (DstReg.isVirtual()) {
494 Worklist.push_back(Elt: DstReg);
495 continue;
496 }
497 // If we are going to replace a copy, the original instruction must be
498 // as cheap as a copy.
499 if (!TII->isAsCheapAsAMove(MI))
500 return false;
501 // The hard register must be in the register class of the original
502 // instruction's destination register.
503 if (!RC->contains(Reg: DstReg))
504 return false;
505 } else if (UseInst.mayLoadOrStore()) {
506 ExtAddrMode AM;
507 if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM))
508 return false;
509 MaybeAM = AM;
510 } else {
511 return false;
512 }
513
514 if (UseInst.getParent() != MI.getParent()) {
515 // If the register class of the register we are replacing is a superset
516 // of any of the register classes of the operands of the materialized
517 // instruction don't consider that live range extended.
518 const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
519 if (RCA && RCA->hasSuperClassEq(RC: RCS))
520 RCA = nullptr;
521 else if (RCB && RCB->hasSuperClassEq(RC: RCS))
522 RCB = nullptr;
523 if (RCA || RCB) {
524 if (RCA == nullptr) {
525 RCA = RCB;
526 RCB = nullptr;
527 }
528
529 unsigned NRegs = !!RCA + !!RCB;
530 if (RCA == RCB)
531 RCB = nullptr;
532
533 // Check we don't exceed register pressure at the destination.
534 const MachineBasicBlock &MBB = *UseInst.getParent();
535 if (RCB == nullptr) {
536 if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB))
537 return false;
538 } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) ||
539 registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) {
540 return false;
541 }
542 }
543 }
544
545 SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM);
546 }
547 }
548
549 if (SinkInto.empty())
550 return false;
551
552 // Now we know we can fold the instruction in all its users.
553 for (auto &[SinkDst, MaybeAM] : SinkInto) {
554 MachineInstr *New = nullptr;
555 LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
556 SinkDst->dump());
557 if (SinkDst->isCopy()) {
558 // TODO: After performing the sink-and-fold, the original instruction is
559 // deleted. Its value is still available (in a hard register), so if there
560 // are debug instructions which refer to the (now deleted) virtual
561 // register they could be updated to refer to the hard register, in
562 // principle. However, it's not clear how to do that, moreover in some
563 // cases the debug instructions may need to be replicated proportionally
564 // to the number of the COPY instructions replaced and in some extreme
565 // cases we can end up with quadratic increase in the number of debug
566 // instructions.
567
568 // Sink a copy of the instruction, replacing a COPY instruction.
569 MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
570 Register DstReg = SinkDst->getOperand(i: 0).getReg();
571 TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI, TRI: *TRI);
572 New = &*std::prev(x: InsertPt);
573 if (!New->getDebugLoc())
574 New->setDebugLoc(SinkDst->getDebugLoc());
575
576 // The operand registers of the "sunk" instruction have their live range
577 // extended and their kill flags may no longer be correct. Conservatively
578 // clear the kill flags.
579 if (UsedRegA)
580 MRI->clearKillFlags(Reg: UsedRegA);
581 if (UsedRegB)
582 MRI->clearKillFlags(Reg: UsedRegB);
583 } else {
584 // Fold instruction into the addressing mode of a memory instruction.
585 New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM);
586
587 // The registers of the addressing mode may have their live range extended
588 // and their kill flags may no longer be correct. Conservatively clear the
589 // kill flags.
590 if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
591 MRI->clearKillFlags(Reg: R);
592 if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
593 MRI->clearKillFlags(Reg: R);
594 }
595 LLVM_DEBUG(dbgs() << "yielding"; New->dump());
596 // Clear the StoreInstrCache, since we may invalidate it by erasing.
597 if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef())
598 StoreInstrCache.clear();
599 SinkDst->eraseFromParent();
600 }
601
602 // Collect operands that need to be cleaned up because the registers no longer
603 // exist (in COPYs and debug instructions). We cannot delete instructions or
604 // clear operands while traversing register uses.
605 SmallVector<MachineOperand *> Cleanup;
606 Worklist.push_back(Elt: DefReg);
607 while (!Worklist.empty()) {
608 Register Reg = Worklist.pop_back_val();
609 for (MachineOperand &MO : MRI->use_operands(Reg)) {
610 MachineInstr *U = MO.getParent();
611 assert((U->isCopy() || U->isDebugInstr()) &&
612 "Only debug uses and copies must remain");
613 if (U->isCopy())
614 Worklist.push_back(Elt: U->getOperand(i: 0).getReg());
615 Cleanup.push_back(Elt: &MO);
616 }
617 }
618
619 // Delete the dead COPYs and clear operands in debug instructions
620 for (MachineOperand *MO : Cleanup) {
621 MachineInstr *I = MO->getParent();
622 if (I->isCopy()) {
623 I->eraseFromParent();
624 } else {
625 MO->setReg(0);
626 MO->setSubReg(0);
627 }
628 }
629
630 MI.eraseFromParent();
631 return true;
632}
633
634/// AllUsesDominatedByBlock - Return true if all uses of the specified register
635/// occur in blocks dominated by the specified block. If any use is in the
636/// definition block, then return false since it is never legal to move def
637/// after uses.
638bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
639 MachineBasicBlock *MBB,
640 MachineBasicBlock *DefMBB,
641 bool &BreakPHIEdge,
642 bool &LocalUse) const {
643 assert(Reg.isVirtual() && "Only makes sense for vregs");
644
645 // Ignore debug uses because debug info doesn't affect the code.
646 if (MRI->use_nodbg_empty(RegNo: Reg))
647 return true;
648
649 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
650 // into and they are all PHI nodes. In this case, machine-sink must break
651 // the critical edge first. e.g.
652 //
653 // %bb.1:
654 // Predecessors according to CFG: %bb.0
655 // ...
656 // %def = DEC64_32r %x, implicit-def dead %eflags
657 // ...
658 // JE_4 <%bb.37>, implicit %eflags
659 // Successors according to CFG: %bb.37 %bb.2
660 //
661 // %bb.2:
662 // %p = PHI %y, %bb.0, %def, %bb.1
663 if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) {
664 MachineInstr *UseInst = MO.getParent();
665 unsigned OpNo = MO.getOperandNo();
666 MachineBasicBlock *UseBlock = UseInst->getParent();
667 return UseBlock == MBB && UseInst->isPHI() &&
668 UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB;
669 })) {
670 BreakPHIEdge = true;
671 return true;
672 }
673
674 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
675 // Determine the block of the use.
676 MachineInstr *UseInst = MO.getParent();
677 unsigned OpNo = &MO - &UseInst->getOperand(i: 0);
678 MachineBasicBlock *UseBlock = UseInst->getParent();
679 if (UseInst->isPHI()) {
680 // PHI nodes use the operand in the predecessor block, not the block with
681 // the PHI.
682 UseBlock = UseInst->getOperand(i: OpNo + 1).getMBB();
683 } else if (UseBlock == DefMBB) {
684 LocalUse = true;
685 return false;
686 }
687
688 // Check that it dominates.
689 if (!DT->dominates(A: MBB, B: UseBlock))
690 return false;
691 }
692
693 return true;
694}
695
696/// Return true if this machine instruction loads from global offset table or
697/// constant pool.
698static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
699 assert(MI.mayLoad() && "Expected MI that loads!");
700
701 // If we lost memory operands, conservatively assume that the instruction
702 // reads from everything..
703 if (MI.memoperands_empty())
704 return true;
705
706 for (MachineMemOperand *MemOp : MI.memoperands())
707 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
708 if (PSV->isGOT() || PSV->isConstantPool())
709 return true;
710
711 return false;
712}
713
714void MachineSinking::FindCycleSinkCandidates(
715 MachineCycle *Cycle, MachineBasicBlock *BB,
716 SmallVectorImpl<MachineInstr *> &Candidates) {
717 for (auto &MI : *BB) {
718 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
719 if (MI.isMetaInstruction()) {
720 LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
721 continue;
722 }
723 if (!TII->shouldSink(MI)) {
724 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
725 "target\n");
726 continue;
727 }
728 if (!isCycleInvariant(Cycle, I&: MI)) {
729 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
730 continue;
731 }
732 bool DontMoveAcrossStore = true;
733 if (!MI.isSafeToMove(SawStore&: DontMoveAcrossStore)) {
734 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
735 continue;
736 }
737 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
738 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
739 continue;
740 }
741 if (MI.isConvergent())
742 continue;
743
744 const MachineOperand &MO = MI.getOperand(i: 0);
745 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
746 continue;
747 if (!MRI->hasOneDef(RegNo: MO.getReg()))
748 continue;
749
750 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
751 Candidates.push_back(Elt: &MI);
752 }
753}
754
755PreservedAnalyses
756MachineSinkingPass::run(MachineFunction &MF,
757 MachineFunctionAnalysisManager &MFAM) {
758 auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
759 auto *PDT = &MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF);
760 auto *CI = &MFAM.getResult<MachineCycleAnalysis>(IR&: MF);
761 auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF)
762 .getCachedResult<ProfileSummaryAnalysis>(
763 IR&: *MF.getFunction().getParent());
764 auto *MBFI = UseBlockFreqInfo
765 ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF)
766 : nullptr;
767 auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF);
768 auto *AA = &MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
769 .getManager()
770 .getResult<AAManager>(IR&: MF.getFunction());
771 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
772 auto *SI = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
773 auto *LV = MFAM.getCachedResult<LiveVariablesAnalysis>(IR&: MF);
774 auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(IR&: MF);
775 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
776 MBFI, MBPI, AA);
777 bool Changed = Impl.run(MF);
778 if (!Changed)
779 return PreservedAnalyses::all();
780 auto PA = getMachineFunctionPassPreservedAnalyses();
781 PA.preserve<MachineCycleAnalysis>();
782 PA.preserve<MachineLoopAnalysis>();
783 return PA;
784}
785
786void MachineSinkingPass::printPipeline(
787 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
788 OS << MapClassName2PassName(name()); // ideally machine-sink
789 if (EnableSinkAndFold)
790 OS << "<enable-sink-fold>";
791}
792
793bool MachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
794 if (skipFunction(F: MF.getFunction()))
795 return false;
796
797 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
798 bool EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
799
800 auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
801 auto *PDT =
802 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
803 auto *CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
804 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
805 auto *MBFI =
806 UseBlockFreqInfo
807 ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
808 : nullptr;
809 auto *MBPI =
810 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
811 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
812 // Get analyses for split critical edge.
813 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
814 auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
815 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
816 auto *SI = SIWrapper ? &SIWrapper->getSI() : nullptr;
817 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
818 auto *LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
819 auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
820 auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
821
822 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
823 MBFI, MBPI, AA);
824 return Impl.run(MF);
825}
826
827bool MachineSinking::run(MachineFunction &MF) {
828 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
829
830 STI = &MF.getSubtarget();
831 TII = STI->getInstrInfo();
832 TRI = STI->getRegisterInfo();
833 MRI = &MF.getRegInfo();
834
835 RegClassInfo.runOnMachineFunction(MF);
836
837 bool EverMadeChange = false;
838
839 while (true) {
840 bool MadeChange = false;
841
842 // Process all basic blocks.
843 CEBCandidates.clear();
844 CEMergeCandidates.clear();
845 ToSplit.clear();
846 for (auto &MBB : MF)
847 MadeChange |= ProcessBlock(MBB);
848
849 // If we have anything we marked as toSplit, split it now.
850 MachineDomTreeUpdater MDTU(DT, PDT,
851 MachineDomTreeUpdater::UpdateStrategy::Lazy);
852 for (const auto &Pair : ToSplit) {
853 auto NewSucc = Pair.first->SplitCriticalEdge(
854 Succ: Pair.second, Analyses: {.LIS: LIS, .SI: SI, .LV: LV, .MLI: MLI}, LiveInSets: nullptr, MDTU: &MDTU);
855 if (NewSucc != nullptr) {
856 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
857 << printMBBReference(*Pair.first) << " -- "
858 << printMBBReference(*NewSucc) << " -- "
859 << printMBBReference(*Pair.second) << '\n');
860 if (MBFI)
861 MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI);
862
863 MadeChange = true;
864 ++NumSplit;
865 CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc);
866 } else
867 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
868 }
869 // If this iteration over the code changed anything, keep iterating.
870 if (!MadeChange)
871 break;
872 EverMadeChange = true;
873 }
874
875 if (SinkInstsIntoCycle) {
876 SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
877 SchedModel.init(TSInfo: STI);
878 bool HasHighPressure;
879
880 DenseMap<SinkItem, MachineInstr *> SunkInstrs;
881
882 enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
883 for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
884 ++Stage, SunkInstrs.clear()) {
885 HasHighPressure = false;
886
887 for (auto *Cycle : Cycles) {
888 MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
889 if (!Preheader) {
890 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
891 continue;
892 }
893 SmallVector<MachineInstr *, 8> Candidates;
894 FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates);
895
896 unsigned i = 0;
897
898 // Walk the candidates in reverse order so that we start with the use
899 // of a def-use chain, if there is any.
900 // TODO: Sort the candidates using a cost-model.
901 for (MachineInstr *I : llvm::reverse(C&: Candidates)) {
902 // CycleSinkStage::COPY: Sink a limited number of copies
903 if (Stage == CycleSinkStage::COPY) {
904 if (i++ == SinkIntoCycleLimit) {
905 LLVM_DEBUG(dbgs()
906 << "CycleSink: Limit reached of instructions to "
907 "be analyzed.");
908 break;
909 }
910
911 if (!I->isCopy())
912 continue;
913 }
914
915 // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
916 // which the target specifies as low-latency
917 if (Stage == CycleSinkStage::LOW_LATENCY &&
918 !TII->hasLowDefLatency(SchedModel, DefMI: *I, DefIdx: 0))
919 continue;
920
921 if (!aggressivelySinkIntoCycle(Cycle, I&: *I, SunkInstrs))
922 continue;
923 EverMadeChange = true;
924 ++NumCycleSunk;
925 }
926
927 // Recalculate the pressure after sinking
928 if (!HasHighPressure)
929 HasHighPressure = registerPressureExceedsLimit(MBB: *Preheader);
930 }
931 if (!HasHighPressure)
932 break;
933 }
934 }
935
936 HasStoreCache.clear();
937 StoreInstrCache.clear();
938
939 // Now clear any kill flags for recorded registers.
940 for (auto I : RegsToClearKillFlags)
941 MRI->clearKillFlags(Reg: I);
942 RegsToClearKillFlags.clear();
943
944 releaseMemory();
945 return EverMadeChange;
946}
947
948bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
949 if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
950 return false;
951
952 // Don't bother sinking code out of unreachable blocks. In addition to being
953 // unprofitable, it can also lead to infinite looping, because in an
954 // unreachable cycle there may be nowhere to stop.
955 if (!DT->isReachableFromEntry(A: &MBB))
956 return false;
957
958 bool MadeChange = false;
959
960 // Cache all successors, sorted by frequency info and cycle depth.
961 AllSuccsCache AllSuccessors;
962
963 // Walk the basic block bottom-up. Remember if we saw a store.
964 MachineBasicBlock::iterator I = MBB.end();
965 --I;
966 bool ProcessedBegin, SawStore = false;
967 do {
968 MachineInstr &MI = *I; // The instruction to sink.
969
970 // Predecrement I (if it's not begin) so that it isn't invalidated by
971 // sinking.
972 ProcessedBegin = I == MBB.begin();
973 if (!ProcessedBegin)
974 --I;
975
976 if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) {
977 if (MI.isDebugValue())
978 ProcessDbgInst(MI);
979 continue;
980 }
981
982 if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) {
983 MadeChange = true;
984 continue;
985 }
986
987 // Can't sink anything out of a block that has less than two successors.
988 if (MBB.succ_size() <= 1)
989 continue;
990
991 if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) {
992 MadeChange = true;
993 continue;
994 }
995
996 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
997 ++NumSunk;
998 MadeChange = true;
999 }
1000
1001 // If we just processed the first instruction in the block, we're done.
1002 } while (!ProcessedBegin);
1003
1004 SeenDbgUsers.clear();
1005 SeenDbgVars.clear();
1006 // recalculate the bb register pressure after sinking one BB.
1007 CachedRegisterPressure.clear();
1008 return MadeChange;
1009}
1010
1011void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
1012 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
1013 // we know what to sink if the vreg def sinks.
1014 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
1015
1016 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
1017 MI.getDebugLoc()->getInlinedAt());
1018 bool SeenBefore = SeenDbgVars.contains(V: Var);
1019
1020 for (MachineOperand &MO : MI.debug_operands()) {
1021 if (MO.isReg() && MO.getReg().isVirtual())
1022 SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore));
1023 }
1024
1025 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
1026 SeenDbgVars.insert(V: Var);
1027}
1028
1029bool MachineSinking::isWorthBreakingCriticalEdge(
1030 MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
1031 MachineBasicBlock *&DeferredFromBlock) {
1032 // FIXME: Need much better heuristics.
1033
1034 // If the pass has already considered breaking this edge (during this pass
1035 // through the function), then let's go ahead and break it. This means
1036 // sinking multiple "cheap" instructions into the same block.
1037 if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second)
1038 return true;
1039
1040 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
1041 return true;
1042
1043 // Check and record the register and the destination block we want to sink
1044 // into. Note that we want to do the following before the next check on branch
1045 // probability. Because we want to record the initial candidate even if it's
1046 // on hot edge, so that other candidates that might not on hot edges can be
1047 // sinked as well.
1048 for (const auto &MO : MI.all_defs()) {
1049 Register Reg = MO.getReg();
1050 if (!Reg)
1051 continue;
1052 Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg;
1053 auto Key = std::make_pair(x&: SrcReg, y&: To);
1054 auto Res = CEMergeCandidates.try_emplace(Key, Args&: From);
1055 // We wanted to sink the same register into the same block, consider it to
1056 // be profitable.
1057 if (!Res.second) {
1058 // Return the source block that was previously held off.
1059 DeferredFromBlock = Res.first->second;
1060 return true;
1061 }
1062 }
1063
1064 if (From->isSuccessor(MBB: To) &&
1065 MBPI->getEdgeProbability(Src: From, Dst: To) <=
1066 BranchProbability(SplitEdgeProbabilityThreshold, 100))
1067 return true;
1068
1069 // MI is cheap, we probably don't want to break the critical edge for it.
1070 // However, if this would allow some definitions of its source operands
1071 // to be sunk then it's probably worth it.
1072 for (const MachineOperand &MO : MI.all_uses()) {
1073 Register Reg = MO.getReg();
1074 if (Reg == 0)
1075 continue;
1076
1077 // We don't move live definitions of physical registers,
1078 // so sinking their uses won't enable any opportunities.
1079 if (Reg.isPhysical())
1080 continue;
1081
1082 // If this instruction is the only user of a virtual register,
1083 // check if breaking the edge will enable sinking
1084 // both this instruction and the defining instruction.
1085 if (MRI->hasOneNonDBGUse(RegNo: Reg)) {
1086 // If the definition resides in same MBB,
1087 // claim it's likely we can sink these together.
1088 // If definition resides elsewhere, we aren't
1089 // blocking it from being sunk so don't break the edge.
1090 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1091 if (DefMI->getParent() == MI.getParent())
1092 return true;
1093 }
1094 }
1095
1096 // Let the target decide if it's worth breaking this
1097 // critical edge for a "cheap" instruction.
1098 return TII->shouldBreakCriticalEdgeToSink(MI);
1099}
1100
1101bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
1102 MachineBasicBlock *FromBB,
1103 MachineBasicBlock *ToBB,
1104 bool BreakPHIEdge) {
1105 // Avoid breaking back edge. From == To means backedge for single BB cycle.
1106 if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB))
1107 return false;
1108
1109 MachineCycle *FromCycle = CI->getCycle(Block: FromBB);
1110 MachineCycle *ToCycle = CI->getCycle(Block: ToBB);
1111
1112 // Check for backedges of more "complex" cycles.
1113 if (FromCycle == ToCycle && FromCycle &&
1114 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
1115 return false;
1116
1117 // It's not always legal to break critical edges and sink the computation
1118 // to the edge.
1119 //
1120 // %bb.1:
1121 // v1024
1122 // Beq %bb.3
1123 // <fallthrough>
1124 // %bb.2:
1125 // ... no uses of v1024
1126 // <fallthrough>
1127 // %bb.3:
1128 // ...
1129 // = v1024
1130 //
1131 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
1132 //
1133 // %bb.1:
1134 // ...
1135 // Bne %bb.2
1136 // %bb.4:
1137 // v1024 =
1138 // B %bb.3
1139 // %bb.2:
1140 // ... no uses of v1024
1141 // <fallthrough>
1142 // %bb.3:
1143 // ...
1144 // = v1024
1145 //
1146 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
1147 // flow. We need to ensure the new basic block where the computation is
1148 // sunk to dominates all the uses.
1149 // It's only legal to break critical edge and sink the computation to the
1150 // new block if all the predecessors of "To", except for "From", are
1151 // not dominated by "From". Given SSA property, this means these
1152 // predecessors are dominated by "To".
1153 //
1154 // There is no need to do this check if all the uses are PHI nodes. PHI
1155 // sources are only defined on the specific predecessor edges.
1156 if (!BreakPHIEdge) {
1157 for (MachineBasicBlock *Pred : ToBB->predecessors())
1158 if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred))
1159 return false;
1160 }
1161
1162 return true;
1163}
1164
1165bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1166 MachineBasicBlock *FromBB,
1167 MachineBasicBlock *ToBB,
1168 bool BreakPHIEdge) {
1169 bool Status = false;
1170 MachineBasicBlock *DeferredFromBB = nullptr;
1171 if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) {
1172 // If there is a DeferredFromBB, we consider FromBB only if _both_
1173 // of them are legal to split.
1174 if ((!DeferredFromBB ||
1175 ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) ||
1176 isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) &&
1177 isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
1178 ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB));
1179 if (DeferredFromBB)
1180 ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB));
1181 Status = true;
1182 }
1183 }
1184
1185 return Status;
1186}
1187
1188std::vector<unsigned> &
1189MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
1190 bool UseCache) {
1191 // Currently to save compiling time, MBB's register pressure will not change
1192 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
1193 // register pressure is changed after sinking any instructions into it.
1194 // FIXME: need a accurate and cheap register pressure estiminate model here.
1195
1196 auto RP = CachedRegisterPressure.find(Val: &MBB);
1197 if (UseCache && RP != CachedRegisterPressure.end())
1198 return RP->second;
1199
1200 RegionPressure Pressure;
1201 RegPressureTracker RPTracker(Pressure);
1202
1203 // Initialize the register pressure tracker.
1204 RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(),
1205 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
1206
1207 for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
1208 MIE = MBB.instr_begin();
1209 MII != MIE; --MII) {
1210 const MachineInstr &MI = *std::prev(x: MII);
1211 if (MI.isDebugInstr() || MI.isPseudoProbe())
1212 continue;
1213 RegisterOperands RegOpers;
1214 RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false);
1215 RPTracker.recedeSkipDebugValues();
1216 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
1217 RPTracker.recede(RegOpers);
1218 }
1219
1220 RPTracker.closeRegion();
1221
1222 if (RP != CachedRegisterPressure.end()) {
1223 CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
1224 return CachedRegisterPressure[&MBB];
1225 }
1226
1227 auto It = CachedRegisterPressure.insert(
1228 KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure));
1229 return It.first->second;
1230}
1231
1232bool MachineSinking::registerPressureSetExceedsLimit(
1233 unsigned NRegs, const TargetRegisterClass *RC,
1234 const MachineBasicBlock &MBB) {
1235 unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
1236 const int *PS = TRI->getRegClassPressureSets(RC);
1237 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
1238 for (; *PS != -1; PS++)
1239 if (Weight + BBRegisterPressure[*PS] >=
1240 RegClassInfo.getRegPressureSetLimit(Idx: *PS))
1241 return true;
1242 return false;
1243}
1244
1245// Recalculate RP and check if any pressure set exceeds the set limit.
1246bool MachineSinking::registerPressureExceedsLimit(
1247 const MachineBasicBlock &MBB) {
1248 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, UseCache: false);
1249
1250 for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
1251 if (BBRegisterPressure[PS] >=
1252 TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: PS)) {
1253 return true;
1254 }
1255 }
1256
1257 return false;
1258}
1259
1260/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
1261bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
1262 MachineBasicBlock *MBB,
1263 MachineBasicBlock *SuccToSinkTo,
1264 AllSuccsCache &AllSuccessors) {
1265 assert(SuccToSinkTo && "Invalid SinkTo Candidate BB");
1266
1267 if (MBB == SuccToSinkTo)
1268 return false;
1269
1270 // It is profitable if SuccToSinkTo does not post dominate current block.
1271 if (!PDT->dominates(A: SuccToSinkTo, B: MBB))
1272 return true;
1273
1274 // It is profitable to sink an instruction from a deeper cycle to a shallower
1275 // cycle, even if the latter post-dominates the former (PR21115).
1276 if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo))
1277 return true;
1278
1279 // Check if only use in post dominated block is PHI instruction.
1280 bool NonPHIUse = false;
1281 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
1282 MachineBasicBlock *UseBlock = UseInst.getParent();
1283 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
1284 NonPHIUse = true;
1285 }
1286 if (!NonPHIUse)
1287 return true;
1288
1289 // If SuccToSinkTo post dominates then also it may be profitable if MI
1290 // can further profitably sinked into another block in next round.
1291 bool BreakPHIEdge = false;
1292 // FIXME - If finding successor is compile time expensive then cache results.
1293 if (MachineBasicBlock *MBB2 =
1294 FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors))
1295 return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors);
1296
1297 MachineCycle *MCycle = CI->getCycle(Block: MBB);
1298
1299 // If the instruction is not inside a cycle, it is not profitable to sink MI
1300 // to a post dominate block SuccToSinkTo.
1301 if (!MCycle)
1302 return false;
1303
1304 // If this instruction is inside a Cycle and sinking this instruction can make
1305 // more registers live range shorten, it is still prifitable.
1306 for (const MachineOperand &MO : MI.operands()) {
1307 // Ignore non-register operands.
1308 if (!MO.isReg())
1309 continue;
1310 Register Reg = MO.getReg();
1311 if (Reg == 0)
1312 continue;
1313
1314 if (Reg.isPhysical()) {
1315 // Don't handle non-constant and non-ignorable physical register uses.
1316 if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) &&
1317 !TII->isIgnorableUse(MO))
1318 return false;
1319 continue;
1320 }
1321
1322 // Users for the defs are all dominated by SuccToSinkTo.
1323 if (MO.isDef()) {
1324 // This def register's live range is shortened after sinking.
1325 bool LocalUse = false;
1326 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1327 LocalUse))
1328 return false;
1329 } else {
1330 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1331 if (!DefMI)
1332 continue;
1333 MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent());
1334 // DefMI is defined outside of cycle. There should be no live range
1335 // impact for this operand. Defination outside of cycle means:
1336 // 1: defination is outside of cycle.
1337 // 2: defination is in this cycle, but it is a PHI in the cycle header.
1338 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
1339 Cycle->getHeader() == DefMI->getParent()))
1340 continue;
1341 // The DefMI is defined inside the cycle.
1342 // If sinking this operand makes some register pressure set exceed limit,
1343 // it is not profitable.
1344 if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg),
1345 MBB: *SuccToSinkTo)) {
1346 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
1347 return false;
1348 }
1349 }
1350 }
1351
1352 // If MI is in cycle and all its operands are alive across the whole cycle or
1353 // if no operand sinking make register pressure set exceed limit, it is
1354 // profitable to sink MI.
1355 return true;
1356}
1357
1358/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
1359/// computing it if it was not already cached.
1360SmallVector<MachineBasicBlock *, 4> &
1361MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
1362 AllSuccsCache &AllSuccessors) const {
1363 // Do we have the sorted successors in cache ?
1364 auto Succs = AllSuccessors.find(Val: MBB);
1365 if (Succs != AllSuccessors.end())
1366 return Succs->second;
1367
1368 SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
1369
1370 // Handle cases where sinking can happen but where the sink point isn't a
1371 // successor. For example:
1372 //
1373 // x = computation
1374 // if () {} else {}
1375 // use x
1376 //
1377 for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) {
1378 // DomTree children of MBB that have MBB as immediate dominator are added.
1379 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
1380 // Skip MBBs already added to the AllSuccs vector above.
1381 !MBB->isSuccessor(MBB: DTChild->getBlock()))
1382 AllSuccs.push_back(Elt: DTChild->getBlock());
1383 }
1384
1385 // Sort Successors according to their cycle depth or block frequency info.
1386 llvm::stable_sort(
1387 Range&: AllSuccs, C: [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
1388 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0;
1389 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0;
1390 if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) ||
1391 (!LHSFreq && !RHSFreq))
1392 return CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R);
1393 return LHSFreq < RHSFreq;
1394 });
1395
1396 auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs));
1397
1398 return it.first->second;
1399}
1400
1401/// FindSuccToSinkTo - Find a successor to sink this instruction to.
1402MachineBasicBlock *
1403MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
1404 bool &BreakPHIEdge,
1405 AllSuccsCache &AllSuccessors) {
1406 assert(MBB && "Invalid MachineBasicBlock!");
1407
1408 // loop over all the operands of the specified instruction. If there is
1409 // anything we can't handle, bail out.
1410
1411 // SuccToSinkTo - This is the successor to sink this instruction to, once we
1412 // decide.
1413 MachineBasicBlock *SuccToSinkTo = nullptr;
1414 for (const MachineOperand &MO : MI.operands()) {
1415 if (!MO.isReg())
1416 continue; // Ignore non-register operands.
1417
1418 Register Reg = MO.getReg();
1419 if (Reg == 0)
1420 continue;
1421
1422 if (Reg.isPhysical()) {
1423 if (MO.isUse()) {
1424 // If the physreg has no defs anywhere, it's just an ambient register
1425 // and we can freely move its uses. Alternatively, if it's allocatable,
1426 // it could get allocated to something with a def during allocation.
1427 if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO))
1428 return nullptr;
1429 } else if (!MO.isDead()) {
1430 // A def that isn't dead. We can't move it.
1431 return nullptr;
1432 }
1433 } else {
1434 // Virtual register uses are always safe to sink.
1435 if (MO.isUse())
1436 continue;
1437
1438 // If it's not safe to move defs of the register class, then abort.
1439 if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg)))
1440 return nullptr;
1441
1442 // Virtual register defs can only be sunk if all their uses are in blocks
1443 // dominated by one of the successors.
1444 if (SuccToSinkTo) {
1445 // If a previous operand picked a block to sink to, then this operand
1446 // must be sinkable to the same block.
1447 bool LocalUse = false;
1448 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1449 LocalUse))
1450 return nullptr;
1451
1452 continue;
1453 }
1454
1455 // Otherwise, we should look at all the successors and decide which one
1456 // we should sink to. If we have reliable block frequency information
1457 // (frequency != 0) available, give successors with smaller frequencies
1458 // higher priority, otherwise prioritize smaller cycle depths.
1459 for (MachineBasicBlock *SuccBlock :
1460 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
1461 bool LocalUse = false;
1462 if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB, BreakPHIEdge,
1463 LocalUse)) {
1464 SuccToSinkTo = SuccBlock;
1465 break;
1466 }
1467 if (LocalUse)
1468 // Def is used locally, it's never safe to move this def.
1469 return nullptr;
1470 }
1471
1472 // If we couldn't find a block to sink to, ignore this instruction.
1473 if (!SuccToSinkTo)
1474 return nullptr;
1475 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
1476 return nullptr;
1477 }
1478 }
1479
1480 // It is not possible to sink an instruction into its own block. This can
1481 // happen with cycles.
1482 if (MBB == SuccToSinkTo)
1483 return nullptr;
1484
1485 // It's not safe to sink instructions to EH landing pad. Control flow into
1486 // landing pad is implicitly defined.
1487 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
1488 return nullptr;
1489
1490 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
1491 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
1492 // the source block (which this code does not yet do). So for now, forbid
1493 // doing so.
1494 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
1495 return nullptr;
1496
1497 if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI))
1498 return nullptr;
1499
1500 return SuccToSinkTo;
1501}
1502
1503/// Return true if MI is likely to be usable as a memory operation by the
1504/// implicit null check optimization.
1505///
1506/// This is a "best effort" heuristic, and should not be relied upon for
1507/// correctness. This returning true does not guarantee that the implicit null
1508/// check optimization is legal over MI, and this returning false does not
1509/// guarantee MI cannot possibly be used to do a null check.
1510static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
1511 const TargetInstrInfo *TII,
1512 const TargetRegisterInfo *TRI) {
1513 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1514
1515 auto *MBB = MI.getParent();
1516 if (MBB->pred_size() != 1)
1517 return false;
1518
1519 auto *PredMBB = *MBB->pred_begin();
1520 auto *PredBB = PredMBB->getBasicBlock();
1521
1522 // Frontends that don't use implicit null checks have no reason to emit
1523 // branches with make.implicit metadata, and this function should always
1524 // return false for them.
1525 if (!PredBB ||
1526 !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit))
1527 return false;
1528
1529 const MachineOperand *BaseOp;
1530 int64_t Offset;
1531 bool OffsetIsScalable;
1532 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1533 return false;
1534
1535 if (!BaseOp->isReg())
1536 return false;
1537
1538 if (!(MI.mayLoad() && !MI.isPredicable()))
1539 return false;
1540
1541 MachineBranchPredicate MBP;
1542 if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false))
1543 return false;
1544
1545 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1546 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1547 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1548 MBP.LHS.getReg() == BaseOp->getReg();
1549}
1550
1551/// If the sunk instruction is a copy, try to forward the copy instead of
1552/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1553/// there's any subregister weirdness involved. Returns true if copy
1554/// propagation occurred.
1555static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1556 Register Reg) {
1557 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1558 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1559
1560 // Copy DBG_VALUE operand and set the original to undef. We then check to
1561 // see whether this is something that can be copy-forwarded. If it isn't,
1562 // continue around the loop.
1563
1564 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1565 auto CopyOperands = TII.isCopyInstr(MI: SinkInst);
1566 if (!CopyOperands)
1567 return false;
1568 SrcMO = CopyOperands->Source;
1569 DstMO = CopyOperands->Destination;
1570
1571 // Check validity of forwarding this copy.
1572 bool PostRA = MRI.getNumVirtRegs() == 0;
1573
1574 // Trying to forward between physical and virtual registers is too hard.
1575 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1576 return false;
1577
1578 // Only try virtual register copy-forwarding before regalloc, and physical
1579 // register copy-forwarding after regalloc.
1580 bool arePhysRegs = !Reg.isVirtual();
1581 if (arePhysRegs != PostRA)
1582 return false;
1583
1584 // Pre-regalloc, only forward if all subregisters agree (or there are no
1585 // subregs at all). More analysis might recover some forwardable copies.
1586 if (!PostRA)
1587 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1588 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1589 DbgMO.getSubReg() != DstMO->getSubReg())
1590 return false;
1591
1592 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1593 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1594 // matches the copy destination.
1595 if (PostRA && Reg != DstMO->getReg())
1596 return false;
1597
1598 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1599 DbgMO.setReg(SrcMO->getReg());
1600 DbgMO.setSubReg(SrcMO->getSubReg());
1601 }
1602 return true;
1603}
1604
1605using MIRegs = std::pair<MachineInstr *, SmallVector<Register, 2>>;
1606/// Sink an instruction and its associated debug instructions.
1607static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1608 MachineBasicBlock::iterator InsertPos,
1609 ArrayRef<MIRegs> DbgValuesToSink) {
1610 // If we cannot find a location to use (merge with), then we erase the debug
1611 // location to prevent debug-info driven tools from potentially reporting
1612 // wrong location information.
1613 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1614 MI.setDebugLoc(DebugLoc::getMergedLocation(LocA: MI.getDebugLoc(),
1615 LocB: InsertPos->getDebugLoc()));
1616 else
1617 MI.setDebugLoc(DebugLoc());
1618
1619 // Move the instruction.
1620 MachineBasicBlock *ParentBlock = MI.getParent();
1621 SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI,
1622 To: ++MachineBasicBlock::iterator(MI));
1623
1624 // Sink a copy of debug users to the insert position. Mark the original
1625 // DBG_VALUE location as 'undef', indicating that any earlier variable
1626 // location should be terminated as we've optimised away the value at this
1627 // point.
1628 for (const auto &DbgValueToSink : DbgValuesToSink) {
1629 MachineInstr *DbgMI = DbgValueToSink.first;
1630 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI);
1631 SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI);
1632
1633 bool PropagatedAllSunkOps = true;
1634 for (Register Reg : DbgValueToSink.second) {
1635 if (DbgMI->hasDebugOperandForReg(Reg)) {
1636 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) {
1637 PropagatedAllSunkOps = false;
1638 break;
1639 }
1640 }
1641 }
1642 if (!PropagatedAllSunkOps)
1643 DbgMI->setDebugValueUndef();
1644 }
1645}
1646
1647/// hasStoreBetween - check if there is store betweeen straight line blocks From
1648/// and To.
1649bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1650 MachineBasicBlock *To, MachineInstr &MI) {
1651 // Make sure From and To are in straight line which means From dominates To
1652 // and To post dominates From.
1653 if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From))
1654 return true;
1655
1656 auto BlockPair = std::make_pair(x&: From, y&: To);
1657
1658 // Does these two blocks pair be queried before and have a definite cached
1659 // result?
1660 if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end())
1661 return It->second;
1662
1663 if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end())
1664 return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) {
1665 return I->mayAlias(AA, Other: MI, UseTBAA: false);
1666 });
1667
1668 bool SawStore = false;
1669 bool HasAliasedStore = false;
1670 DenseSet<MachineBasicBlock *> HandledBlocks;
1671 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1672 // Go through all reachable blocks from From.
1673 for (MachineBasicBlock *BB : depth_first(G: From)) {
1674 // We insert the instruction at the start of block To, so no need to worry
1675 // about stores inside To.
1676 // Store in block From should be already considered when just enter function
1677 // SinkInstruction.
1678 if (BB == To || BB == From)
1679 continue;
1680
1681 // We already handle this BB in previous iteration.
1682 if (HandledBlocks.count(V: BB))
1683 continue;
1684
1685 HandledBlocks.insert(V: BB);
1686 // To post dominates BB, it must be a path from block From.
1687 if (PDT->dominates(A: To, B: BB)) {
1688 if (!HandledDomBlocks.count(V: BB))
1689 HandledDomBlocks.insert(V: BB);
1690
1691 // If this BB is too big or the block number in straight line between From
1692 // and To is too big, stop searching to save compiling time.
1693 if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) ||
1694 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1695 for (auto *DomBB : HandledDomBlocks) {
1696 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1697 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1698 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1699 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1700 }
1701 HasStoreCache[BlockPair] = true;
1702 return true;
1703 }
1704
1705 for (MachineInstr &I : *BB) {
1706 // Treat as alias conservatively for a call or an ordered memory
1707 // operation.
1708 if (I.isCall() || I.hasOrderedMemoryRef()) {
1709 for (auto *DomBB : HandledDomBlocks) {
1710 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1711 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1712 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1713 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1714 }
1715 HasStoreCache[BlockPair] = true;
1716 return true;
1717 }
1718
1719 if (I.mayStore()) {
1720 SawStore = true;
1721 // We still have chance to sink MI if all stores between are not
1722 // aliased to MI.
1723 // Cache all store instructions, so that we don't need to go through
1724 // all From reachable blocks for next load instruction.
1725 if (I.mayAlias(AA, Other: MI, UseTBAA: false))
1726 HasAliasedStore = true;
1727 StoreInstrCache[BlockPair].push_back(Elt: &I);
1728 }
1729 }
1730 }
1731 }
1732 // If there is no store at all, cache the result.
1733 if (!SawStore)
1734 HasStoreCache[BlockPair] = false;
1735 return HasAliasedStore;
1736}
1737
1738/// Aggressively sink instructions into cycles. This will aggressively try to
1739/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
1740/// In particular, it will sink into multiple successor blocks without limits
1741/// based on the amount of sinking, or the type of ops being sunk (so long as
1742/// they are safe to sink).
1743bool MachineSinking::aggressivelySinkIntoCycle(
1744 MachineCycle *Cycle, MachineInstr &I,
1745 DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1746 // TODO: support instructions with multiple defs
1747 if (I.getNumDefs() > 1)
1748 return false;
1749
1750 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
1751 assert(Cycle->getCyclePreheader() && "Cycle sink needs a preheader block");
1752 SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
1753
1754 MachineOperand &DefMO = I.getOperand(i: 0);
1755 for (MachineInstr &MI : MRI->use_instructions(Reg: DefMO.getReg())) {
1756 Uses.push_back(Elt: {{DefMO.getReg(), DefMO.getSubReg()}, &MI});
1757 }
1758
1759 for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1760 MachineInstr *MI = Entry.second;
1761 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
1762 if (MI->isPHI()) {
1763 LLVM_DEBUG(
1764 dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
1765 continue;
1766 }
1767 // We cannot sink before the prologue
1768 if (MI->isPosition() || TII->isBasicBlockPrologue(MI: *MI)) {
1769 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
1770 "can't sink.\n");
1771 continue;
1772 }
1773 if (!Cycle->contains(Block: MI->getParent())) {
1774 LLVM_DEBUG(
1775 dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
1776 continue;
1777 }
1778
1779 MachineBasicBlock *SinkBlock = MI->getParent();
1780 MachineInstr *NewMI = nullptr;
1781 SinkItem MapEntry(&I, SinkBlock);
1782
1783 auto SI = SunkInstrs.find(Val: MapEntry);
1784
1785 // Check for the case in which we have already sunk a copy of this
1786 // instruction into the user block.
1787 if (SI != SunkInstrs.end()) {
1788 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
1789 << printMBBReference(*SinkBlock) << "\n");
1790 NewMI = SI->second;
1791 }
1792
1793 // Create a copy of the instruction in the use block.
1794 if (!NewMI) {
1795 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
1796 << printMBBReference(*SinkBlock) << "\n");
1797
1798 NewMI = I.getMF()->CloneMachineInstr(Orig: &I);
1799 if (DefMO.getReg().isVirtual()) {
1800 const TargetRegisterClass *TRC = MRI->getRegClass(Reg: DefMO.getReg());
1801 Register DestReg = MRI->createVirtualRegister(RegClass: TRC);
1802 NewMI->substituteRegister(FromReg: DefMO.getReg(), ToReg: DestReg, SubIdx: DefMO.getSubReg(),
1803 RegInfo: *TRI);
1804 }
1805 SinkBlock->insert(I: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()),
1806 MI: NewMI);
1807 SunkInstrs.insert(KV: {MapEntry, NewMI});
1808 }
1809
1810 // Conservatively clear any kill flags on uses of sunk instruction
1811 for (MachineOperand &MO : NewMI->all_uses()) {
1812 assert(MO.isReg() && MO.isUse());
1813 RegsToClearKillFlags.insert(V: MO.getReg());
1814 }
1815
1816 // The instruction is moved from its basic block, so do not retain the
1817 // debug information.
1818 assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
1819 NewMI->setDebugLoc(DebugLoc());
1820
1821 // Replace the use with the newly created virtual register.
1822 RegSubRegPair &UseReg = Entry.first;
1823 MI->substituteRegister(FromReg: UseReg.Reg, ToReg: NewMI->getOperand(i: 0).getReg(),
1824 SubIdx: UseReg.SubReg, RegInfo: *TRI);
1825 }
1826 // If we have replaced all uses, then delete the dead instruction
1827 if (I.isDead(MRI: *MRI))
1828 I.eraseFromParent();
1829 return true;
1830}
1831
1832/// SinkInstruction - Determine whether it is safe to sink the specified machine
1833/// instruction out of its current block into a successor.
1834bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1835 AllSuccsCache &AllSuccessors) {
1836 // Don't sink instructions that the target prefers not to sink.
1837 if (!TII->shouldSink(MI))
1838 return false;
1839
1840 // Check if it's safe to move the instruction.
1841 if (!MI.isSafeToMove(SawStore))
1842 return false;
1843
1844 // Convergent operations may not be made control-dependent on additional
1845 // values.
1846 if (MI.isConvergent())
1847 return false;
1848
1849 // Don't break implicit null checks. This is a performance heuristic, and not
1850 // required for correctness.
1851 if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
1852 return false;
1853
1854 // FIXME: This should include support for sinking instructions within the
1855 // block they are currently in to shorten the live ranges. We often get
1856 // instructions sunk into the top of a large block, but it would be better to
1857 // also sink them down before their first use in the block. This xform has to
1858 // be careful not to *increase* register pressure though, e.g. sinking
1859 // "x = y + z" down if it kills y and z would increase the live ranges of y
1860 // and z and only shrink the live range of x.
1861
1862 bool BreakPHIEdge = false;
1863 MachineBasicBlock *ParentBlock = MI.getParent();
1864 MachineBasicBlock *SuccToSinkTo =
1865 FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors);
1866
1867 // If there are no outputs, it must have side-effects.
1868 if (!SuccToSinkTo)
1869 return false;
1870
1871 // If the instruction to move defines a dead physical register which is live
1872 // when leaving the basic block, don't move it because it could turn into a
1873 // "zombie" define of that preg. E.g., EFLAGS.
1874 for (const MachineOperand &MO : MI.all_defs()) {
1875 Register Reg = MO.getReg();
1876 if (Reg == 0 || !Reg.isPhysical())
1877 continue;
1878 if (SuccToSinkTo->isLiveIn(Reg))
1879 return false;
1880 }
1881
1882 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1883
1884 // If the block has multiple predecessors, this is a critical edge.
1885 // Decide if we can sink along it or need to break the edge.
1886 if (SuccToSinkTo->pred_size() > 1) {
1887 // We cannot sink a load across a critical edge - there may be stores in
1888 // other code paths.
1889 bool TryBreak = false;
1890 bool Store =
1891 MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true;
1892 if (!MI.isSafeToMove(SawStore&: Store)) {
1893 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1894 TryBreak = true;
1895 }
1896
1897 // We don't want to sink across a critical edge if we don't dominate the
1898 // successor. We could be introducing calculations to new code paths.
1899 if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) {
1900 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1901 TryBreak = true;
1902 }
1903
1904 // Don't sink instructions into a cycle.
1905 if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) &&
1906 (!CI->getCycle(Block: SuccToSinkTo)->isReducible() ||
1907 CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1908 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1909 TryBreak = true;
1910 }
1911
1912 // Otherwise we are OK with sinking along a critical edge.
1913 if (!TryBreak)
1914 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1915 else {
1916 // Mark this edge as to be split.
1917 // If the edge can actually be split, the next iteration of the main loop
1918 // will sink MI in the newly created block.
1919 bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo,
1920 BreakPHIEdge);
1921 if (!Status)
1922 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1923 "break critical edge\n");
1924 // The instruction will not be sunk this time.
1925 return false;
1926 }
1927 }
1928
1929 if (BreakPHIEdge) {
1930 // BreakPHIEdge is true if all the uses are in the successor MBB being
1931 // sunken into and they are all PHI nodes. In this case, machine-sink must
1932 // break the critical edge first.
1933 bool Status =
1934 PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge);
1935 if (!Status)
1936 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1937 "break critical edge\n");
1938 // The instruction will not be sunk this time.
1939 return false;
1940 }
1941
1942 // Determine where to insert into. Skip phi nodes.
1943 MachineBasicBlock::iterator InsertPos =
1944 SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin());
1945 if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) {
1946 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1947 return false;
1948 }
1949
1950 // Collect debug users of any vreg that this inst defines.
1951 SmallVector<MIRegs, 4> DbgUsersToSink;
1952 for (auto &MO : MI.all_defs()) {
1953 if (!MO.getReg().isVirtual())
1954 continue;
1955 auto It = SeenDbgUsers.find(Val: MO.getReg());
1956 if (It == SeenDbgUsers.end())
1957 continue;
1958
1959 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1960 auto &Users = It->second;
1961 for (auto &User : Users) {
1962 MachineInstr *DbgMI = User.getPointer();
1963 if (User.getInt()) {
1964 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1965 // it, it can't be recovered. Set it undef.
1966 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg()))
1967 DbgMI->setDebugValueUndef();
1968 } else {
1969 DbgUsersToSink.push_back(
1970 Elt: {DbgMI, SmallVector<Register, 2>(1, MO.getReg())});
1971 }
1972 }
1973 }
1974
1975 // After sinking, some debug users may not be dominated any more. If possible,
1976 // copy-propagate their operands. As it's expensive, don't do this if there's
1977 // no debuginfo in the program.
1978 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1979 SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo);
1980
1981 performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink);
1982
1983 // Conservatively, clear any kill flags, since it's possible that they are no
1984 // longer correct.
1985 // Note that we have to clear the kill flags for any register this instruction
1986 // uses as we may sink over another instruction which currently kills the
1987 // used registers.
1988 for (MachineOperand &MO : MI.all_uses())
1989 RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags.
1990
1991 return true;
1992}
1993
1994void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1995 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1996 assert(MI.isCopy());
1997 assert(MI.getOperand(1).isReg());
1998
1999 // Enumerate all users of vreg operands that are def'd. Skip those that will
2000 // be sunk. For the rest, if they are not dominated by the block we will sink
2001 // MI into, propagate the copy source to them.
2002 SmallVector<MachineInstr *, 4> DbgDefUsers;
2003 SmallVector<Register, 4> DbgUseRegs;
2004 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
2005 for (auto &MO : MI.all_defs()) {
2006 if (!MO.getReg().isVirtual())
2007 continue;
2008 DbgUseRegs.push_back(Elt: MO.getReg());
2009 for (auto &User : MRI.use_instructions(Reg: MO.getReg())) {
2010 if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent()))
2011 continue;
2012
2013 // If is in same block, will either sink or be use-before-def.
2014 if (User.getParent() == MI.getParent())
2015 continue;
2016
2017 assert(User.hasDebugOperandForReg(MO.getReg()) &&
2018 "DBG_VALUE user of vreg, but has no operand for it?");
2019 DbgDefUsers.push_back(Elt: &User);
2020 }
2021 }
2022
2023 // Point the users of this copy that are no longer dominated, at the source
2024 // of the copy.
2025 for (auto *User : DbgDefUsers) {
2026 for (auto &Reg : DbgUseRegs) {
2027 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
2028 DbgOp.setReg(MI.getOperand(i: 1).getReg());
2029 DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg());
2030 }
2031 }
2032 }
2033}
2034
2035//===----------------------------------------------------------------------===//
2036// This pass is not intended to be a replacement or a complete alternative
2037// for the pre-ra machine sink pass. It is only designed to sink COPY
2038// instructions which should be handled after RA.
2039//
2040// This pass sinks COPY instructions into a successor block, if the COPY is not
2041// used in the current block and the COPY is live-in to a single successor
2042// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
2043// copy on paths where their results aren't needed. This also exposes
2044// additional opportunites for dead copy elimination and shrink wrapping.
2045//
2046// These copies were either not handled by or are inserted after the MachineSink
2047// pass. As an example of the former case, the MachineSink pass cannot sink
2048// COPY instructions with allocatable source registers; for AArch64 these type
2049// of copy instructions are frequently used to move function parameters (PhyReg)
2050// into virtual registers in the entry block.
2051//
2052// For the machine IR below, this pass will sink %w19 in the entry into its
2053// successor (%bb.1) because %w19 is only live-in in %bb.1.
2054// %bb.0:
2055// %wzr = SUBSWri %w1, 1
2056// %w19 = COPY %w0
2057// Bcc 11, %bb.2
2058// %bb.1:
2059// Live Ins: %w19
2060// BL @fun
2061// %w0 = ADDWrr %w0, %w19
2062// RET %w0
2063// %bb.2:
2064// %w0 = COPY %wzr
2065// RET %w0
2066// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
2067// able to see %bb.0 as a candidate.
2068//===----------------------------------------------------------------------===//
2069namespace {
2070
2071class PostRAMachineSinking : public MachineFunctionPass {
2072public:
2073 bool runOnMachineFunction(MachineFunction &MF) override;
2074
2075 static char ID;
2076 PostRAMachineSinking() : MachineFunctionPass(ID) {}
2077 StringRef getPassName() const override { return "PostRA Machine Sink"; }
2078
2079 void getAnalysisUsage(AnalysisUsage &AU) const override {
2080 AU.setPreservesCFG();
2081 MachineFunctionPass::getAnalysisUsage(AU);
2082 }
2083
2084 MachineFunctionProperties getRequiredProperties() const override {
2085 return MachineFunctionProperties().setNoVRegs();
2086 }
2087
2088private:
2089 /// Track which register units have been modified and used.
2090 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
2091
2092 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
2093 /// entry in this map for each unit it touches. The DBG_VALUE's entry
2094 /// consists of a pointer to the instruction itself, and a vector of registers
2095 /// referred to by the instruction that overlap the key register unit.
2096 DenseMap<MCRegUnit, SmallVector<MIRegs, 2>> SeenDbgInstrs;
2097
2098 /// Sink Copy instructions unused in the same block close to their uses in
2099 /// successors.
2100 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
2101 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
2102};
2103} // namespace
2104
2105char PostRAMachineSinking::ID = 0;
2106char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
2107
2108INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
2109 "PostRA Machine Sink", false, false)
2110
2111static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg,
2112 const TargetRegisterInfo *TRI) {
2113 LiveRegUnits LiveInRegUnits(*TRI);
2114 LiveInRegUnits.addLiveIns(MBB);
2115 return !LiveInRegUnits.available(Reg);
2116}
2117
2118static MachineBasicBlock *
2119getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2120 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2121 Register Reg, const TargetRegisterInfo *TRI) {
2122 // Try to find a single sinkable successor in which Reg is live-in.
2123 MachineBasicBlock *BB = nullptr;
2124 for (auto *SI : SinkableBBs) {
2125 if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) {
2126 // If BB is set here, Reg is live-in to at least two sinkable successors,
2127 // so quit.
2128 if (BB)
2129 return nullptr;
2130 BB = SI;
2131 }
2132 }
2133 // Reg is not live-in to any sinkable successors.
2134 if (!BB)
2135 return nullptr;
2136
2137 // Check if any register aliased with Reg is live-in in other successors.
2138 for (auto *SI : CurBB.successors()) {
2139 if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI))
2140 return nullptr;
2141 }
2142 return BB;
2143}
2144
2145static MachineBasicBlock *
2146getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2147 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2148 ArrayRef<Register> DefedRegsInCopy,
2149 const TargetRegisterInfo *TRI) {
2150 MachineBasicBlock *SingleBB = nullptr;
2151 for (auto DefReg : DefedRegsInCopy) {
2152 MachineBasicBlock *BB =
2153 getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI);
2154 if (!BB || (SingleBB && SingleBB != BB))
2155 return nullptr;
2156 SingleBB = BB;
2157 }
2158 return SingleBB;
2159}
2160
2161static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
2162 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2163 const LiveRegUnits &UsedRegUnits,
2164 const TargetRegisterInfo *TRI) {
2165 for (auto U : UsedOpsInCopy) {
2166 MachineOperand &MO = MI->getOperand(i: U);
2167 Register SrcReg = MO.getReg();
2168 if (!UsedRegUnits.available(Reg: SrcReg)) {
2169 MachineBasicBlock::iterator NI = std::next(x: MI->getIterator());
2170 for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) {
2171 if (UI.killsRegister(Reg: SrcReg, TRI)) {
2172 UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI);
2173 MO.setIsKill(true);
2174 break;
2175 }
2176 }
2177 }
2178 }
2179}
2180
2181static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
2182 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2183 const SmallVectorImpl<Register> &DefedRegsInCopy) {
2184 MachineFunction &MF = *SuccBB->getParent();
2185 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2186 for (Register DefReg : DefedRegsInCopy)
2187 for (MCPhysReg S : TRI->subregs_inclusive(Reg: DefReg))
2188 SuccBB->removeLiveIn(Reg: S);
2189 for (auto U : UsedOpsInCopy)
2190 SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg());
2191 SuccBB->sortUniqueLiveIns();
2192}
2193
2194static bool hasRegisterDependency(MachineInstr *MI,
2195 SmallVectorImpl<unsigned> &UsedOpsInCopy,
2196 SmallVectorImpl<Register> &DefedRegsInCopy,
2197 LiveRegUnits &ModifiedRegUnits,
2198 LiveRegUnits &UsedRegUnits) {
2199 bool HasRegDependency = false;
2200 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2201 MachineOperand &MO = MI->getOperand(i);
2202 if (!MO.isReg())
2203 continue;
2204 Register Reg = MO.getReg();
2205 if (!Reg)
2206 continue;
2207 if (MO.isDef()) {
2208 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
2209 HasRegDependency = true;
2210 break;
2211 }
2212 DefedRegsInCopy.push_back(Elt: Reg);
2213
2214 // FIXME: instead of isUse(), readsReg() would be a better fix here,
2215 // For example, we can ignore modifications in reg with undef. However,
2216 // it's not perfectly clear if skipping the internal read is safe in all
2217 // other targets.
2218 } else if (MO.isUse()) {
2219 if (!ModifiedRegUnits.available(Reg)) {
2220 HasRegDependency = true;
2221 break;
2222 }
2223 UsedOpsInCopy.push_back(Elt: i);
2224 }
2225 }
2226 return HasRegDependency;
2227}
2228
2229bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
2230 MachineFunction &MF,
2231 const TargetRegisterInfo *TRI,
2232 const TargetInstrInfo *TII) {
2233 SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
2234 // FIXME: For now, we sink only to a successor which has a single predecessor
2235 // so that we can directly sink COPY instructions to the successor without
2236 // adding any new block or branch instruction.
2237 for (MachineBasicBlock *SI : CurBB.successors())
2238 if (!SI->livein_empty() && SI->pred_size() == 1)
2239 SinkableBBs.insert(Ptr: SI);
2240
2241 if (SinkableBBs.empty())
2242 return false;
2243
2244 bool Changed = false;
2245
2246 // Track which registers have been modified and used between the end of the
2247 // block and the current instruction.
2248 ModifiedRegUnits.clear();
2249 UsedRegUnits.clear();
2250 SeenDbgInstrs.clear();
2251
2252 for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) {
2253 // Track the operand index for use in Copy.
2254 SmallVector<unsigned, 2> UsedOpsInCopy;
2255 // Track the register number defed in Copy.
2256 SmallVector<Register, 2> DefedRegsInCopy;
2257
2258 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
2259 // for DBG_VALUEs later, record them when they're encountered.
2260 if (MI.isDebugValue() && !MI.isDebugRef()) {
2261 SmallDenseMap<MCRegUnit, SmallVector<Register, 2>, 4> MIUnits;
2262 bool IsValid = true;
2263 for (MachineOperand &MO : MI.debug_operands()) {
2264 if (MO.isReg() && MO.getReg().isPhysical()) {
2265 // Bail if we can already tell the sink would be rejected, rather
2266 // than needlessly accumulating lots of DBG_VALUEs.
2267 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2268 ModifiedRegUnits, UsedRegUnits)) {
2269 IsValid = false;
2270 break;
2271 }
2272
2273 // Record debug use of each reg unit.
2274 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg()))
2275 MIUnits[Unit].push_back(Elt: MO.getReg());
2276 }
2277 }
2278 if (IsValid) {
2279 for (auto &RegOps : MIUnits)
2280 SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI,
2281 Args: std::move(RegOps.second));
2282 }
2283 continue;
2284 }
2285
2286 if (MI.isDebugOrPseudoInstr())
2287 continue;
2288
2289 // Do not move any instruction across function call.
2290 if (MI.isCall())
2291 return false;
2292
2293 if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) {
2294 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2295 TRI);
2296 continue;
2297 }
2298
2299 // Don't sink the COPY if it would violate a register dependency.
2300 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2301 ModifiedRegUnits, UsedRegUnits)) {
2302 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2303 TRI);
2304 continue;
2305 }
2306 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
2307 "Unexpect SrcReg or DefReg");
2308 MachineBasicBlock *SuccBB =
2309 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
2310 // Don't sink if we cannot find a single sinkable successor in which Reg
2311 // is live-in.
2312 if (!SuccBB) {
2313 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2314 TRI);
2315 continue;
2316 }
2317 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
2318 "Unexpected predecessor");
2319
2320 // Collect DBG_VALUEs that must sink with this copy. We've previously
2321 // recorded which reg units that DBG_VALUEs read, if this instruction
2322 // writes any of those units then the corresponding DBG_VALUEs must sink.
2323 MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
2324 for (auto &MO : MI.all_defs()) {
2325 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) {
2326 for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) {
2327 auto &Regs = DbgValsToSinkMap[MIRegs.first];
2328 llvm::append_range(C&: Regs, R: MIRegs.second);
2329 }
2330 }
2331 }
2332 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
2333
2334 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
2335
2336 MachineBasicBlock::iterator InsertPos =
2337 SuccBB->SkipPHIsAndLabels(I: SuccBB->begin());
2338 if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) {
2339 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2340 TRI);
2341 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
2342 continue;
2343 }
2344
2345 // Clear the kill flag if SrcReg is killed between MI and the end of the
2346 // block.
2347 clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
2348 performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink);
2349 updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
2350
2351 Changed = true;
2352 ++NumPostRACopySink;
2353 }
2354 return Changed;
2355}
2356
2357bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
2358 if (skipFunction(F: MF.getFunction()))
2359 return false;
2360
2361 bool Changed = false;
2362 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2363 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2364
2365 ModifiedRegUnits.init(TRI: *TRI);
2366 UsedRegUnits.init(TRI: *TRI);
2367 for (auto &BB : MF)
2368 Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII);
2369
2370 return Changed;
2371}
2372