1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/MachineSink.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PointerIntPair.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/CFG.h"
29#include "llvm/Analysis/ProfileSummaryInfo.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/LiveVariables.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
35#include "llvm/CodeGen/MachineCycleAnalysis.h"
36#include "llvm/CodeGen/MachineDomTreeUpdater.h"
37#include "llvm/CodeGen/MachineDominators.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineLoopInfo.h"
42#include "llvm/CodeGen/MachineOperand.h"
43#include "llvm/CodeGen/MachinePostDominators.h"
44#include "llvm/CodeGen/MachineRegisterInfo.h"
45#include "llvm/CodeGen/MachineSizeOpts.h"
46#include "llvm/CodeGen/PostRAMachineSink.h"
47#include "llvm/CodeGen/RegisterClassInfo.h"
48#include "llvm/CodeGen/RegisterPressure.h"
49#include "llvm/CodeGen/SlotIndexes.h"
50#include "llvm/CodeGen/TargetInstrInfo.h"
51#include "llvm/CodeGen/TargetPassConfig.h"
52#include "llvm/CodeGen/TargetRegisterInfo.h"
53#include "llvm/CodeGen/TargetSchedule.h"
54#include "llvm/CodeGen/TargetSubtargetInfo.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/DebugInfoMetadata.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/InitializePasses.h"
59#include "llvm/Pass.h"
60#include "llvm/Support/BranchProbability.h"
61#include "llvm/Support/CommandLine.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include <cassert>
65#include <cstdint>
66#include <utility>
67#include <vector>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "machine-sink"
72
73static cl::opt<bool>
74 SplitEdges("machine-sink-split",
75 cl::desc("Split critical edges during machine sinking"),
76 cl::init(Val: true), cl::Hidden);
77
78static cl::opt<bool> UseBlockFreqInfo(
79 "machine-sink-bfi",
80 cl::desc("Use block frequency info to find successors to sink"),
81 cl::init(Val: true), cl::Hidden);
82
83static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
84 "machine-sink-split-probability-threshold",
85 cl::desc(
86 "Percentage threshold for splitting single-instruction critical edge. "
87 "If the branch threshold is higher than this threshold, we allow "
88 "speculative execution of up to 1 instruction to avoid branching to "
89 "splitted critical edge"),
90 cl::init(Val: 40), cl::Hidden);
91
92static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
93 "machine-sink-load-instrs-threshold",
94 cl::desc("Do not try to find alias store for a load if there is a in-path "
95 "block whose instruction number is higher than this threshold."),
96 cl::init(Val: 2000), cl::Hidden);
97
98static cl::opt<unsigned> SinkLoadBlocksThreshold(
99 "machine-sink-load-blocks-threshold",
100 cl::desc("Do not try to find alias store for a load if the block number in "
101 "the straight line is higher than this threshold."),
102 cl::init(Val: 20), cl::Hidden);
103
104static cl::opt<bool>
105 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
106 cl::desc("Sink instructions into cycles to avoid "
107 "register spills"),
108 cl::init(Val: false), cl::Hidden);
109
110static cl::opt<unsigned> SinkIntoCycleLimit(
111 "machine-sink-cycle-limit",
112 cl::desc(
113 "The maximum number of instructions considered for cycle sinking."),
114 cl::init(Val: 50), cl::Hidden);
115
116STATISTIC(NumSunk, "Number of machine instructions sunk");
117STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
118STATISTIC(NumSplit, "Number of critical edges split");
119STATISTIC(NumCoalesces, "Number of copies coalesced");
120STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
121
122using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
123
124namespace {
125
126class MachineSinking {
127 const TargetSubtargetInfo *STI = nullptr;
128 const TargetInstrInfo *TII = nullptr;
129 const TargetRegisterInfo *TRI = nullptr;
130 MachineRegisterInfo *MRI = nullptr; // Machine register information
131 MachineDominatorTree *DT = nullptr; // Machine dominator tree
132 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
133 MachineCycleInfo *CI = nullptr;
134 ProfileSummaryInfo *PSI = nullptr;
135 MachineBlockFrequencyInfo *MBFI = nullptr;
136 const MachineBranchProbabilityInfo *MBPI = nullptr;
137 AliasAnalysis *AA = nullptr;
138 RegisterClassInfo RegClassInfo;
139 TargetSchedModel SchedModel;
140 // Required for split critical edge
141 LiveIntervals *LIS;
142 SlotIndexes *SI;
143 LiveVariables *LV;
144 MachineLoopInfo *MLI;
145
146 // Remember which edges have been considered for breaking.
147 SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
148 CEBCandidates;
149 // Memorize the register that also wanted to sink into the same block along
150 // a different critical edge.
151 // {register to sink, sink-to block} -> the first sink-from block.
152 // We're recording the first sink-from block because that (critical) edge
153 // was deferred until we see another register that's going to sink into the
154 // same block.
155 DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
156 CEMergeCandidates;
157 // Remember which edges we are about to split.
158 // This is different from CEBCandidates since those edges
159 // will be split.
160 SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
161
162 DenseSet<Register> RegsToClearKillFlags;
163
164 using AllSuccsCache =
165 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
166
167 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
168 /// post-dominated by another DBG_VALUE of the same variable location.
169 /// This is necessary to detect sequences such as:
170 /// %0 = someinst
171 /// DBG_VALUE %0, !123, !DIExpression()
172 /// %1 = anotherinst
173 /// DBG_VALUE %1, !123, !DIExpression()
174 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
175 /// would re-order assignments.
176 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
177
178 using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
179
180 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
181 /// debug instructions to sink.
182 SmallDenseMap<Register, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
183
184 /// Record of debug variables that have had their locations set in the
185 /// current block.
186 DenseSet<DebugVariable> SeenDbgVars;
187
188 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
189 HasStoreCache;
190
191 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
192 SmallVector<MachineInstr *>>
193 StoreInstrCache;
194
195 /// Cached BB's register pressure.
196 DenseMap<const MachineBasicBlock *, std::vector<unsigned>>
197 CachedRegisterPressure;
198
199 bool EnableSinkAndFold;
200
201public:
202 MachineSinking(bool EnableSinkAndFold, MachineDominatorTree *DT,
203 MachinePostDominatorTree *PDT, LiveVariables *LV,
204 MachineLoopInfo *MLI, SlotIndexes *SI, LiveIntervals *LIS,
205 MachineCycleInfo *CI, ProfileSummaryInfo *PSI,
206 MachineBlockFrequencyInfo *MBFI,
207 const MachineBranchProbabilityInfo *MBPI, AliasAnalysis *AA)
208 : DT(DT), PDT(PDT), CI(CI), PSI(PSI), MBFI(MBFI), MBPI(MBPI), AA(AA),
209 LIS(LIS), SI(SI), LV(LV), MLI(MLI),
210 EnableSinkAndFold(EnableSinkAndFold) {}
211
212 bool run(MachineFunction &MF);
213
214 void releaseMemory() {
215 CEBCandidates.clear();
216 CEMergeCandidates.clear();
217 }
218
219private:
220 bool ProcessBlock(MachineBasicBlock &MBB);
221 void ProcessDbgInst(MachineInstr &MI);
222 bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
223 MachineBasicBlock *To, bool BreakPHIEdge);
224 bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
225 MachineBasicBlock *To,
226 MachineBasicBlock *&DeferredFromBlock);
227
228 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
229 MachineInstr &MI);
230
231 /// Postpone the splitting of the given critical
232 /// edge (\p From, \p To).
233 ///
234 /// We do not split the edges on the fly. Indeed, this invalidates
235 /// the dominance information and thus triggers a lot of updates
236 /// of that information underneath.
237 /// Instead, we postpone all the splits after each iteration of
238 /// the main loop. That way, the information is at least valid
239 /// for the lifetime of an iteration.
240 ///
241 /// \return True if the edge is marked as toSplit, false otherwise.
242 /// False can be returned if, for instance, this is not profitable.
243 bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
244 MachineBasicBlock *To, bool BreakPHIEdge);
245 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
246 AllSuccsCache &AllSuccessors);
247
248 /// If we sink a COPY inst, some debug users of it's destination may no
249 /// longer be dominated by the COPY, and will eventually be dropped.
250 /// This is easily rectified by forwarding the non-dominated debug uses
251 /// to the copy source.
252 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
253 MachineBasicBlock *TargetBlock);
254 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
255 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
256 bool &LocalUse) const;
257 MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
258 bool &BreakPHIEdge,
259 AllSuccsCache &AllSuccessors);
260
261 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
262 SmallVectorImpl<MachineInstr *> &Candidates);
263
264 bool
265 aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
266 DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
267
268 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
269 MachineBasicBlock *MBB,
270 MachineBasicBlock *SuccToSinkTo,
271 AllSuccsCache &AllSuccessors);
272
273 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
274 MachineBasicBlock *MBB);
275
276 bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
277
278 SmallVector<MachineBasicBlock *, 4> &
279 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
280 AllSuccsCache &AllSuccessors) const;
281
282 std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
283 bool UseCache = true);
284
285 bool registerPressureSetExceedsLimit(unsigned NRegs,
286 const TargetRegisterClass *RC,
287 const MachineBasicBlock &MBB);
288
289 bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
290};
291
292class MachineSinkingLegacy : public MachineFunctionPass {
293public:
294 static char ID;
295
296 MachineSinkingLegacy() : MachineFunctionPass(ID) {}
297
298 bool runOnMachineFunction(MachineFunction &MF) override;
299
300 void getAnalysisUsage(AnalysisUsage &AU) const override {
301 MachineFunctionPass::getAnalysisUsage(AU);
302 AU.addRequired<AAResultsWrapperPass>();
303 AU.addRequired<MachineDominatorTreeWrapperPass>();
304 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
305 AU.addRequired<MachineCycleInfoWrapperPass>();
306 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
307 AU.addPreserved<MachineCycleInfoWrapperPass>();
308 AU.addPreserved<MachineLoopInfoWrapperPass>();
309 AU.addRequired<ProfileSummaryInfoWrapperPass>();
310 if (UseBlockFreqInfo) {
311 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
312 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
313 }
314 AU.addRequired<TargetPassConfig>();
315 }
316};
317
318} // end anonymous namespace
319
320char MachineSinkingLegacy::ID = 0;
321
322char &llvm::MachineSinkingLegacyID = MachineSinkingLegacy::ID;
323
324INITIALIZE_PASS_BEGIN(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
325 false, false)
326INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
327INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
328INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
329INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
330INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
331INITIALIZE_PASS_END(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
332 false, false)
333
334/// Return true if a target defined block prologue instruction interferes
335/// with a sink candidate.
336static bool blockPrologueInterferes(const MachineBasicBlock *BB,
337 MachineBasicBlock::const_iterator End,
338 const MachineInstr &MI,
339 const TargetRegisterInfo *TRI,
340 const TargetInstrInfo *TII,
341 const MachineRegisterInfo *MRI) {
342 for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
343 ++PI) {
344 // Only check target defined prologue instructions
345 if (!TII->isBasicBlockPrologue(MI: *PI))
346 continue;
347 for (auto &MO : MI.operands()) {
348 if (!MO.isReg())
349 continue;
350 Register Reg = MO.getReg();
351 if (!Reg)
352 continue;
353 if (MO.isUse()) {
354 if (Reg.isPhysical() &&
355 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg))))
356 continue;
357 if (PI->modifiesRegister(Reg, TRI))
358 return true;
359 } else {
360 if (PI->readsRegister(Reg, TRI))
361 return true;
362 // Check for interference with non-dead defs
363 auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true);
364 if (DefOp && !DefOp->isDead())
365 return true;
366 }
367 }
368 }
369
370 return false;
371}
372
373bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
374 MachineBasicBlock *MBB) {
375 if (!MI.isCopy())
376 return false;
377
378 Register SrcReg = MI.getOperand(i: 1).getReg();
379 Register DstReg = MI.getOperand(i: 0).getReg();
380 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
381 !MRI->hasOneNonDBGUse(RegNo: SrcReg))
382 return false;
383
384 const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg);
385 const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg);
386 if (SRC != DRC)
387 return false;
388
389 MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg);
390 if (DefMI->isCopyLike())
391 return false;
392 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
393 LLVM_DEBUG(dbgs() << "*** to: " << MI);
394 MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg);
395 MI.eraseFromParent();
396
397 // Conservatively, clear any kill flags, since it's possible that they are no
398 // longer correct.
399 MRI->clearKillFlags(Reg: SrcReg);
400
401 ++NumCoalesces;
402 return true;
403}
404
405bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
406 MachineBasicBlock *MBB) {
407 if (MI.isCopy() || MI.mayLoadOrStore() ||
408 MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
409 return false;
410
411 // Don't sink instructions that the target prefers not to sink.
412 if (!TII->shouldSink(MI))
413 return false;
414
415 // Check if it's safe to move the instruction.
416 bool SawStore = true;
417 if (!MI.isSafeToMove(SawStore))
418 return false;
419
420 // Convergent operations may not be made control-dependent on additional
421 // values.
422 if (MI.isConvergent())
423 return false;
424
425 // Don't sink defs/uses of hard registers or if the instruction defines more
426 // than one register.
427 // Don't sink more than two register uses - it'll cover most of the cases and
428 // greatly simplifies the register pressure checks.
429 Register DefReg;
430 Register UsedRegA, UsedRegB;
431 for (const MachineOperand &MO : MI.operands()) {
432 if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
433 MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
434 MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
435 continue;
436 if (!MO.isReg())
437 return false;
438
439 Register Reg = MO.getReg();
440 if (Reg == 0)
441 continue;
442
443 if (Reg.isVirtual()) {
444 if (MO.isDef()) {
445 if (DefReg)
446 return false;
447 DefReg = Reg;
448 continue;
449 }
450
451 if (UsedRegA == 0)
452 UsedRegA = Reg;
453 else if (UsedRegB == 0)
454 UsedRegB = Reg;
455 else
456 return false;
457 continue;
458 }
459
460 if (Reg.isPhysical() && MO.isUse() &&
461 (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO)))
462 continue;
463
464 return false;
465 }
466
467 // Scan uses of the destination register. Every use, except the last, must be
468 // a copy, with a chain of copies terminating with either a copy into a hard
469 // register, or a load/store instruction where the use is part of the
470 // address (*not* the stored value).
471 using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
472 SmallVector<SinkInfo> SinkInto;
473 SmallVector<Register> Worklist;
474
475 const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg);
476 const TargetRegisterClass *RCA =
477 UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA);
478 const TargetRegisterClass *RCB =
479 UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB);
480
481 Worklist.push_back(Elt: DefReg);
482 while (!Worklist.empty()) {
483 Register Reg = Worklist.pop_back_val();
484
485 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
486 ExtAddrMode MaybeAM;
487 MachineInstr &UseInst = *MO.getParent();
488 if (UseInst.isCopy()) {
489 Register DstReg;
490 if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg())
491 DstReg = O.getReg();
492 if (DstReg == 0)
493 return false;
494 if (DstReg.isVirtual()) {
495 Worklist.push_back(Elt: DstReg);
496 continue;
497 }
498 // If we are going to replace a copy, the original instruction must be
499 // as cheap as a copy.
500 if (!TII->isAsCheapAsAMove(MI))
501 return false;
502 // The hard register must be in the register class of the original
503 // instruction's destination register.
504 if (!RC->contains(Reg: DstReg))
505 return false;
506 } else if (UseInst.mayLoadOrStore()) {
507 // If the destination instruction contains more than one use of the
508 // register, we won't be able to remove the original instruction, so
509 // don't sink.
510 if (llvm::count_if(Range: UseInst.operands(), P: [Reg](const MachineOperand &MO) {
511 return MO.isReg() && MO.getReg() == Reg;
512 }) > 1)
513 return false;
514 ExtAddrMode AM;
515 if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM))
516 return false;
517 MaybeAM = AM;
518 } else {
519 return false;
520 }
521
522 if (UseInst.getParent() != MI.getParent()) {
523 // If the register class of the register we are replacing is a superset
524 // of any of the register classes of the operands of the materialized
525 // instruction don't consider that live range extended.
526 const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
527 if (RCA && RCA->hasSuperClassEq(RC: RCS))
528 RCA = nullptr;
529 else if (RCB && RCB->hasSuperClassEq(RC: RCS))
530 RCB = nullptr;
531 if (RCA || RCB) {
532 if (RCA == nullptr) {
533 RCA = RCB;
534 RCB = nullptr;
535 }
536
537 unsigned NRegs = !!RCA + !!RCB;
538 if (RCA == RCB)
539 RCB = nullptr;
540
541 // Check we don't exceed register pressure at the destination.
542 const MachineBasicBlock &MBB = *UseInst.getParent();
543 if (RCB == nullptr) {
544 if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB))
545 return false;
546 } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) ||
547 registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) {
548 return false;
549 }
550 }
551 }
552
553 SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM);
554 }
555 }
556
557 if (SinkInto.empty())
558 return false;
559
560 // Now we know we can fold the instruction in all its users.
561 for (auto &[SinkDst, MaybeAM] : SinkInto) {
562 MachineInstr *New = nullptr;
563 LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
564 SinkDst->dump());
565 if (SinkDst->isCopy()) {
566 // TODO: After performing the sink-and-fold, the original instruction is
567 // deleted. Its value is still available (in a hard register), so if there
568 // are debug instructions which refer to the (now deleted) virtual
569 // register they could be updated to refer to the hard register, in
570 // principle. However, it's not clear how to do that, moreover in some
571 // cases the debug instructions may need to be replicated proportionally
572 // to the number of the COPY instructions replaced and in some extreme
573 // cases we can end up with quadratic increase in the number of debug
574 // instructions.
575
576 // Sink a copy of the instruction, replacing a COPY instruction.
577 MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
578 Register DstReg = SinkDst->getOperand(i: 0).getReg();
579 TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI);
580 New = &*std::prev(x: InsertPt);
581 if (!New->getDebugLoc())
582 New->setDebugLoc(SinkDst->getDebugLoc());
583
584 // The operand registers of the "sunk" instruction have their live range
585 // extended and their kill flags may no longer be correct. Conservatively
586 // clear the kill flags.
587 if (UsedRegA)
588 MRI->clearKillFlags(Reg: UsedRegA);
589 if (UsedRegB)
590 MRI->clearKillFlags(Reg: UsedRegB);
591 } else {
592 // Fold instruction into the addressing mode of a memory instruction.
593 New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM);
594
595 // The registers of the addressing mode may have their live range extended
596 // and their kill flags may no longer be correct. Conservatively clear the
597 // kill flags.
598 if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
599 MRI->clearKillFlags(Reg: R);
600 if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
601 MRI->clearKillFlags(Reg: R);
602 }
603 LLVM_DEBUG(dbgs() << "yielding"; New->dump());
604 // Clear the StoreInstrCache, since we may invalidate it by erasing.
605 if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef())
606 StoreInstrCache.clear();
607 SinkDst->eraseFromParent();
608 }
609
610 // Collect operands that need to be cleaned up because the registers no longer
611 // exist (in COPYs and debug instructions). We cannot delete instructions or
612 // clear operands while traversing register uses.
613 SmallVector<MachineOperand *> Cleanup;
614 Worklist.push_back(Elt: DefReg);
615 while (!Worklist.empty()) {
616 Register Reg = Worklist.pop_back_val();
617 for (MachineOperand &MO : MRI->use_operands(Reg)) {
618 MachineInstr *U = MO.getParent();
619 assert((U->isCopy() || U->isDebugInstr()) &&
620 "Only debug uses and copies must remain");
621 if (U->isCopy())
622 Worklist.push_back(Elt: U->getOperand(i: 0).getReg());
623 Cleanup.push_back(Elt: &MO);
624 }
625 }
626
627 // Delete the dead COPYs and clear operands in debug instructions
628 for (MachineOperand *MO : Cleanup) {
629 MachineInstr *I = MO->getParent();
630 if (I->isCopy()) {
631 I->eraseFromParent();
632 } else {
633 MO->setReg(0);
634 MO->setSubReg(0);
635 }
636 }
637
638 MI.eraseFromParent();
639 return true;
640}
641
642/// AllUsesDominatedByBlock - Return true if all uses of the specified register
643/// occur in blocks dominated by the specified block. If any use is in the
644/// definition block, then return false since it is never legal to move def
645/// after uses.
646bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
647 MachineBasicBlock *MBB,
648 MachineBasicBlock *DefMBB,
649 bool &BreakPHIEdge,
650 bool &LocalUse) const {
651 assert(Reg.isVirtual() && "Only makes sense for vregs");
652
653 // Ignore debug uses because debug info doesn't affect the code.
654 if (MRI->use_nodbg_empty(RegNo: Reg))
655 return true;
656
657 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
658 // into and they are all PHI nodes. In this case, machine-sink must break
659 // the critical edge first. e.g.
660 //
661 // %bb.1:
662 // Predecessors according to CFG: %bb.0
663 // ...
664 // %def = DEC64_32r %x, implicit-def dead %eflags
665 // ...
666 // JE_4 <%bb.37>, implicit %eflags
667 // Successors according to CFG: %bb.37 %bb.2
668 //
669 // %bb.2:
670 // %p = PHI %y, %bb.0, %def, %bb.1
671 if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) {
672 MachineInstr *UseInst = MO.getParent();
673 unsigned OpNo = MO.getOperandNo();
674 MachineBasicBlock *UseBlock = UseInst->getParent();
675 return UseBlock == MBB && UseInst->isPHI() &&
676 UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB;
677 })) {
678 BreakPHIEdge = true;
679 return true;
680 }
681
682 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
683 // Determine the block of the use.
684 MachineInstr *UseInst = MO.getParent();
685 unsigned OpNo = &MO - &UseInst->getOperand(i: 0);
686 MachineBasicBlock *UseBlock = UseInst->getParent();
687 if (UseInst->isPHI()) {
688 // PHI nodes use the operand in the predecessor block, not the block with
689 // the PHI.
690 UseBlock = UseInst->getOperand(i: OpNo + 1).getMBB();
691 } else if (UseBlock == DefMBB) {
692 LocalUse = true;
693 return false;
694 }
695
696 // Check that it dominates.
697 if (!DT->dominates(A: MBB, B: UseBlock))
698 return false;
699 }
700
701 return true;
702}
703
704/// Return true if this machine instruction loads from global offset table or
705/// constant pool.
706static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
707 assert(MI.mayLoad() && "Expected MI that loads!");
708
709 // If we lost memory operands, conservatively assume that the instruction
710 // reads from everything..
711 if (MI.memoperands_empty())
712 return true;
713
714 for (MachineMemOperand *MemOp : MI.memoperands())
715 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
716 if (PSV->isGOT() || PSV->isConstantPool())
717 return true;
718
719 return false;
720}
721
722void MachineSinking::FindCycleSinkCandidates(
723 MachineCycle *Cycle, MachineBasicBlock *BB,
724 SmallVectorImpl<MachineInstr *> &Candidates) {
725 for (auto &MI : *BB) {
726 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
727 if (MI.isMetaInstruction()) {
728 LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
729 continue;
730 }
731 if (!TII->shouldSink(MI)) {
732 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
733 "target\n");
734 continue;
735 }
736 if (!isCycleInvariant(Cycle, I&: MI)) {
737 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
738 continue;
739 }
740 bool DontMoveAcrossStore = true;
741 if (!MI.isSafeToMove(SawStore&: DontMoveAcrossStore)) {
742 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
743 continue;
744 }
745 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
746 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
747 continue;
748 }
749 if (MI.isConvergent())
750 continue;
751
752 const MachineOperand &MO = MI.getOperand(i: 0);
753 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
754 continue;
755 if (!MRI->hasOneDef(RegNo: MO.getReg()))
756 continue;
757
758 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
759 Candidates.push_back(Elt: &MI);
760 }
761}
762
763PreservedAnalyses
764MachineSinkingPass::run(MachineFunction &MF,
765 MachineFunctionAnalysisManager &MFAM) {
766 auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
767 auto *PDT = &MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF);
768 auto *CI = &MFAM.getResult<MachineCycleAnalysis>(IR&: MF);
769 auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF)
770 .getCachedResult<ProfileSummaryAnalysis>(
771 IR&: *MF.getFunction().getParent());
772 auto *MBFI = UseBlockFreqInfo
773 ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF)
774 : nullptr;
775 auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF);
776 auto *AA = &MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
777 .getManager()
778 .getResult<AAManager>(IR&: MF.getFunction());
779 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
780 auto *SI = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
781 auto *LV = MFAM.getCachedResult<LiveVariablesAnalysis>(IR&: MF);
782 auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(IR&: MF);
783 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
784 MBFI, MBPI, AA);
785 bool Changed = Impl.run(MF);
786 if (!Changed)
787 return PreservedAnalyses::all();
788 auto PA = getMachineFunctionPassPreservedAnalyses();
789 PA.preserve<MachineCycleAnalysis>();
790 PA.preserve<MachineLoopAnalysis>();
791 if (UseBlockFreqInfo)
792 PA.preserve<MachineBlockFrequencyAnalysis>();
793 return PA;
794}
795
796void MachineSinkingPass::printPipeline(
797 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
798 OS << MapClassName2PassName(name()); // ideally machine-sink
799 if (EnableSinkAndFold)
800 OS << "<enable-sink-fold>";
801}
802
803bool MachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
804 if (skipFunction(F: MF.getFunction()))
805 return false;
806
807 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
808 bool EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
809
810 auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
811 auto *PDT =
812 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
813 auto *CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
814 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
815 auto *MBFI =
816 UseBlockFreqInfo
817 ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
818 : nullptr;
819 auto *MBPI =
820 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
821 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
822 // Get analyses for split critical edge.
823 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
824 auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
825 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
826 auto *SI = SIWrapper ? &SIWrapper->getSI() : nullptr;
827 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
828 auto *LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
829 auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
830 auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
831
832 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
833 MBFI, MBPI, AA);
834 return Impl.run(MF);
835}
836
837bool MachineSinking::run(MachineFunction &MF) {
838 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
839
840 STI = &MF.getSubtarget();
841 TII = STI->getInstrInfo();
842 TRI = STI->getRegisterInfo();
843 MRI = &MF.getRegInfo();
844
845 RegClassInfo.runOnMachineFunction(MF);
846
847 bool EverMadeChange = false;
848
849 while (true) {
850 bool MadeChange = false;
851
852 // Process all basic blocks.
853 CEBCandidates.clear();
854 CEMergeCandidates.clear();
855 ToSplit.clear();
856 for (auto &MBB : MF)
857 MadeChange |= ProcessBlock(MBB);
858
859 // If we have anything we marked as toSplit, split it now.
860 MachineDomTreeUpdater MDTU(DT, PDT,
861 MachineDomTreeUpdater::UpdateStrategy::Lazy);
862 for (const auto &Pair : ToSplit) {
863 auto NewSucc = Pair.first->SplitCriticalEdge(
864 Succ: Pair.second, Analyses: {.LIS: LIS, .SI: SI, .LV: LV, .MLI: MLI}, LiveInSets: nullptr, MDTU: &MDTU);
865 if (NewSucc != nullptr) {
866 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
867 << printMBBReference(*Pair.first) << " -- "
868 << printMBBReference(*NewSucc) << " -- "
869 << printMBBReference(*Pair.second) << '\n');
870 if (MBFI)
871 MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI);
872
873 MadeChange = true;
874 ++NumSplit;
875 CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc);
876 } else
877 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
878 }
879 // If this iteration over the code changed anything, keep iterating.
880 if (!MadeChange)
881 break;
882 EverMadeChange = true;
883 }
884
885 if (SinkInstsIntoCycle) {
886 SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
887 SchedModel.init(TSInfo: STI);
888 bool HasHighPressure;
889
890 DenseMap<SinkItem, MachineInstr *> SunkInstrs;
891
892 enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
893 for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
894 ++Stage, SunkInstrs.clear()) {
895 HasHighPressure = false;
896
897 for (auto *Cycle : Cycles) {
898 MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
899 if (!Preheader) {
900 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
901 continue;
902 }
903 SmallVector<MachineInstr *, 8> Candidates;
904 FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates);
905
906 unsigned i = 0;
907
908 // Walk the candidates in reverse order so that we start with the use
909 // of a def-use chain, if there is any.
910 // TODO: Sort the candidates using a cost-model.
911 for (MachineInstr *I : llvm::reverse(C&: Candidates)) {
912 // CycleSinkStage::COPY: Sink a limited number of copies
913 if (Stage == CycleSinkStage::COPY) {
914 if (i++ == SinkIntoCycleLimit) {
915 LLVM_DEBUG(dbgs()
916 << "CycleSink: Limit reached of instructions to "
917 "be analyzed.");
918 break;
919 }
920
921 if (!I->isCopy())
922 continue;
923 }
924
925 // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
926 // which the target specifies as low-latency
927 if (Stage == CycleSinkStage::LOW_LATENCY &&
928 !TII->hasLowDefLatency(SchedModel, DefMI: *I, DefIdx: 0))
929 continue;
930
931 if (!aggressivelySinkIntoCycle(Cycle, I&: *I, SunkInstrs))
932 continue;
933 EverMadeChange = true;
934 ++NumCycleSunk;
935 }
936
937 // Recalculate the pressure after sinking
938 if (!HasHighPressure)
939 HasHighPressure = registerPressureExceedsLimit(MBB: *Preheader);
940 }
941 if (!HasHighPressure)
942 break;
943 }
944 }
945
946 HasStoreCache.clear();
947 StoreInstrCache.clear();
948
949 // Now clear any kill flags for recorded registers.
950 for (auto I : RegsToClearKillFlags)
951 MRI->clearKillFlags(Reg: I);
952 RegsToClearKillFlags.clear();
953
954 releaseMemory();
955 return EverMadeChange;
956}
957
958bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
959 if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
960 return false;
961
962 // Don't bother sinking code out of unreachable blocks. In addition to being
963 // unprofitable, it can also lead to infinite looping, because in an
964 // unreachable cycle there may be nowhere to stop.
965 if (!DT->isReachableFromEntry(A: &MBB))
966 return false;
967
968 bool MadeChange = false;
969
970 // Cache all successors, sorted by frequency info and cycle depth.
971 AllSuccsCache AllSuccessors;
972
973 // Walk the basic block bottom-up. Remember if we saw a store.
974 MachineBasicBlock::iterator I = MBB.end();
975 --I;
976 bool ProcessedBegin, SawStore = false;
977 do {
978 MachineInstr &MI = *I; // The instruction to sink.
979
980 // Predecrement I (if it's not begin) so that it isn't invalidated by
981 // sinking.
982 ProcessedBegin = I == MBB.begin();
983 if (!ProcessedBegin)
984 --I;
985
986 if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) {
987 if (MI.isDebugValue())
988 ProcessDbgInst(MI);
989 continue;
990 }
991
992 if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) {
993 MadeChange = true;
994 continue;
995 }
996
997 // Can't sink anything out of a block that has less than two successors.
998 if (MBB.succ_size() <= 1)
999 continue;
1000
1001 if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) {
1002 MadeChange = true;
1003 continue;
1004 }
1005
1006 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
1007 ++NumSunk;
1008 MadeChange = true;
1009 }
1010
1011 // If we just processed the first instruction in the block, we're done.
1012 } while (!ProcessedBegin);
1013
1014 SeenDbgUsers.clear();
1015 SeenDbgVars.clear();
1016 // recalculate the bb register pressure after sinking one BB.
1017 CachedRegisterPressure.clear();
1018 return MadeChange;
1019}
1020
1021void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
1022 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
1023 // we know what to sink if the vreg def sinks.
1024 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
1025
1026 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
1027 MI.getDebugLoc()->getInlinedAt());
1028 bool SeenBefore = SeenDbgVars.contains(V: Var);
1029
1030 for (MachineOperand &MO : MI.debug_operands()) {
1031 if (MO.isReg() && MO.getReg().isVirtual())
1032 SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore));
1033 }
1034
1035 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
1036 SeenDbgVars.insert(V: Var);
1037}
1038
1039bool MachineSinking::isWorthBreakingCriticalEdge(
1040 MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
1041 MachineBasicBlock *&DeferredFromBlock) {
1042 // FIXME: Need much better heuristics.
1043
1044 // If the pass has already considered breaking this edge (during this pass
1045 // through the function), then let's go ahead and break it. This means
1046 // sinking multiple "cheap" instructions into the same block.
1047 if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second)
1048 return true;
1049
1050 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
1051 return true;
1052
1053 // Check and record the register and the destination block we want to sink
1054 // into. Note that we want to do the following before the next check on branch
1055 // probability. Because we want to record the initial candidate even if it's
1056 // on hot edge, so that other candidates that might not on hot edges can be
1057 // sinked as well.
1058 for (const auto &MO : MI.all_defs()) {
1059 Register Reg = MO.getReg();
1060 if (!Reg)
1061 continue;
1062 Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg;
1063 auto Key = std::make_pair(x&: SrcReg, y&: To);
1064 auto Res = CEMergeCandidates.try_emplace(Key, Args&: From);
1065 // We wanted to sink the same register into the same block, consider it to
1066 // be profitable.
1067 if (!Res.second) {
1068 // Return the source block that was previously held off.
1069 DeferredFromBlock = Res.first->second;
1070 return true;
1071 }
1072 }
1073
1074 if (From->isSuccessor(MBB: To) &&
1075 MBPI->getEdgeProbability(Src: From, Dst: To) <=
1076 BranchProbability(SplitEdgeProbabilityThreshold, 100))
1077 return true;
1078
1079 // MI is cheap, we probably don't want to break the critical edge for it.
1080 // However, if this would allow some definitions of its source operands
1081 // to be sunk then it's probably worth it.
1082 for (const MachineOperand &MO : MI.all_uses()) {
1083 Register Reg = MO.getReg();
1084 if (Reg == 0)
1085 continue;
1086
1087 // We don't move live definitions of physical registers,
1088 // so sinking their uses won't enable any opportunities.
1089 if (Reg.isPhysical())
1090 continue;
1091
1092 // If this instruction is the only user of a virtual register,
1093 // check if breaking the edge will enable sinking
1094 // both this instruction and the defining instruction.
1095 if (MRI->hasOneNonDBGUse(RegNo: Reg)) {
1096 // If the definition resides in same MBB,
1097 // claim it's likely we can sink these together.
1098 // If definition resides elsewhere, we aren't
1099 // blocking it from being sunk so don't break the edge.
1100 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1101 if (DefMI->getParent() == MI.getParent())
1102 return true;
1103 }
1104 }
1105
1106 // Let the target decide if it's worth breaking this
1107 // critical edge for a "cheap" instruction.
1108 return TII->shouldBreakCriticalEdgeToSink(MI);
1109}
1110
1111bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
1112 MachineBasicBlock *FromBB,
1113 MachineBasicBlock *ToBB,
1114 bool BreakPHIEdge) {
1115 // Avoid breaking back edge. From == To means backedge for single BB cycle.
1116 if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB))
1117 return false;
1118
1119 MachineCycle *FromCycle = CI->getCycle(Block: FromBB);
1120 MachineCycle *ToCycle = CI->getCycle(Block: ToBB);
1121
1122 // Check for backedges of more "complex" cycles.
1123 if (FromCycle == ToCycle && FromCycle &&
1124 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
1125 return false;
1126
1127 // It's not always legal to break critical edges and sink the computation
1128 // to the edge.
1129 //
1130 // %bb.1:
1131 // v1024
1132 // Beq %bb.3
1133 // <fallthrough>
1134 // %bb.2:
1135 // ... no uses of v1024
1136 // <fallthrough>
1137 // %bb.3:
1138 // ...
1139 // = v1024
1140 //
1141 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
1142 //
1143 // %bb.1:
1144 // ...
1145 // Bne %bb.2
1146 // %bb.4:
1147 // v1024 =
1148 // B %bb.3
1149 // %bb.2:
1150 // ... no uses of v1024
1151 // <fallthrough>
1152 // %bb.3:
1153 // ...
1154 // = v1024
1155 //
1156 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
1157 // flow. We need to ensure the new basic block where the computation is
1158 // sunk to dominates all the uses.
1159 // It's only legal to break critical edge and sink the computation to the
1160 // new block if all the predecessors of "To", except for "From", are
1161 // not dominated by "From". Given SSA property, this means these
1162 // predecessors are dominated by "To".
1163 //
1164 // There is no need to do this check if all the uses are PHI nodes. PHI
1165 // sources are only defined on the specific predecessor edges.
1166 if (!BreakPHIEdge) {
1167 for (MachineBasicBlock *Pred : ToBB->predecessors())
1168 if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred))
1169 return false;
1170 }
1171
1172 return true;
1173}
1174
1175bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1176 MachineBasicBlock *FromBB,
1177 MachineBasicBlock *ToBB,
1178 bool BreakPHIEdge) {
1179 bool Status = false;
1180 MachineBasicBlock *DeferredFromBB = nullptr;
1181 if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) {
1182 // If there is a DeferredFromBB, we consider FromBB only if _both_
1183 // of them are legal to split.
1184 if ((!DeferredFromBB ||
1185 ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) ||
1186 isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) &&
1187 isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
1188 ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB));
1189 if (DeferredFromBB)
1190 ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB));
1191 Status = true;
1192 }
1193 }
1194
1195 return Status;
1196}
1197
1198std::vector<unsigned> &
1199MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
1200 bool UseCache) {
1201 // Currently to save compiling time, MBB's register pressure will not change
1202 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
1203 // register pressure is changed after sinking any instructions into it.
1204 // FIXME: need a accurate and cheap register pressure estiminate model here.
1205
1206 auto RP = CachedRegisterPressure.find(Val: &MBB);
1207 if (UseCache && RP != CachedRegisterPressure.end())
1208 return RP->second;
1209
1210 RegionPressure Pressure;
1211 RegPressureTracker RPTracker(Pressure);
1212
1213 // Initialize the register pressure tracker.
1214 RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(),
1215 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
1216
1217 for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
1218 MIE = MBB.instr_begin();
1219 MII != MIE; --MII) {
1220 const MachineInstr &MI = *std::prev(x: MII);
1221 if (MI.isDebugOrPseudoInstr())
1222 continue;
1223 RegisterOperands RegOpers;
1224 RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false);
1225 RPTracker.recedeSkipDebugValues();
1226 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
1227 RPTracker.recede(RegOpers);
1228 }
1229
1230 RPTracker.closeRegion();
1231
1232 if (RP != CachedRegisterPressure.end()) {
1233 CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
1234 return CachedRegisterPressure[&MBB];
1235 }
1236
1237 auto It = CachedRegisterPressure.insert(
1238 KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure));
1239 return It.first->second;
1240}
1241
1242bool MachineSinking::registerPressureSetExceedsLimit(
1243 unsigned NRegs, const TargetRegisterClass *RC,
1244 const MachineBasicBlock &MBB) {
1245 unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
1246 const int *PS = TRI->getRegClassPressureSets(RC);
1247 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
1248 for (; *PS != -1; PS++)
1249 if (Weight + BBRegisterPressure[*PS] >=
1250 RegClassInfo.getRegPressureSetLimit(Idx: *PS))
1251 return true;
1252 return false;
1253}
1254
1255// Recalculate RP and check if any pressure set exceeds the set limit.
1256bool MachineSinking::registerPressureExceedsLimit(
1257 const MachineBasicBlock &MBB) {
1258 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, UseCache: false);
1259
1260 for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
1261 if (BBRegisterPressure[PS] >=
1262 TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: PS)) {
1263 return true;
1264 }
1265 }
1266
1267 return false;
1268}
1269
1270/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
1271bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
1272 MachineBasicBlock *MBB,
1273 MachineBasicBlock *SuccToSinkTo,
1274 AllSuccsCache &AllSuccessors) {
1275 assert(SuccToSinkTo && "Invalid SinkTo Candidate BB");
1276
1277 if (MBB == SuccToSinkTo)
1278 return false;
1279
1280 // It is profitable if SuccToSinkTo does not post dominate current block.
1281 if (!PDT->dominates(A: SuccToSinkTo, B: MBB))
1282 return true;
1283
1284 // It is profitable to sink an instruction from a deeper cycle to a shallower
1285 // cycle, even if the latter post-dominates the former (PR21115).
1286 if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo))
1287 return true;
1288
1289 // Check if only use in post dominated block is PHI instruction.
1290 bool NonPHIUse = false;
1291 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
1292 MachineBasicBlock *UseBlock = UseInst.getParent();
1293 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
1294 NonPHIUse = true;
1295 }
1296 if (!NonPHIUse)
1297 return true;
1298
1299 // If SuccToSinkTo post dominates then also it may be profitable if MI
1300 // can further profitably sinked into another block in next round.
1301 bool BreakPHIEdge = false;
1302 // FIXME - If finding successor is compile time expensive then cache results.
1303 if (MachineBasicBlock *MBB2 =
1304 FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors))
1305 return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors);
1306
1307 MachineCycle *MCycle = CI->getCycle(Block: MBB);
1308
1309 // If the instruction is not inside a cycle, it is not profitable to sink MI
1310 // to a post dominate block SuccToSinkTo.
1311 if (!MCycle)
1312 return false;
1313
1314 // If this instruction is inside a Cycle and sinking this instruction can make
1315 // more registers live range shorten, it is still prifitable.
1316 for (const MachineOperand &MO : MI.operands()) {
1317 // Ignore non-register operands.
1318 if (!MO.isReg())
1319 continue;
1320 Register Reg = MO.getReg();
1321 if (Reg == 0)
1322 continue;
1323
1324 if (Reg.isPhysical()) {
1325 // Don't handle non-constant and non-ignorable physical register uses.
1326 if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) &&
1327 !TII->isIgnorableUse(MO))
1328 return false;
1329 continue;
1330 }
1331
1332 // Users for the defs are all dominated by SuccToSinkTo.
1333 if (MO.isDef()) {
1334 // This def register's live range is shortened after sinking.
1335 bool LocalUse = false;
1336 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1337 LocalUse))
1338 return false;
1339 } else {
1340 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1341 if (!DefMI)
1342 continue;
1343 MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent());
1344 // DefMI is defined outside of cycle. There should be no live range
1345 // impact for this operand. Defination outside of cycle means:
1346 // 1: defination is outside of cycle.
1347 // 2: defination is in this cycle, but it is a PHI in the cycle header.
1348 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
1349 Cycle->getHeader() == DefMI->getParent()))
1350 continue;
1351 // The DefMI is defined inside the cycle.
1352 // If sinking this operand makes some register pressure set exceed limit,
1353 // it is not profitable.
1354 if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg),
1355 MBB: *SuccToSinkTo)) {
1356 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
1357 return false;
1358 }
1359 }
1360 }
1361
1362 // If MI is in cycle and all its operands are alive across the whole cycle or
1363 // if no operand sinking make register pressure set exceed limit, it is
1364 // profitable to sink MI.
1365 return true;
1366}
1367
1368/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
1369/// computing it if it was not already cached.
1370SmallVector<MachineBasicBlock *, 4> &
1371MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
1372 AllSuccsCache &AllSuccessors) const {
1373 // Do we have the sorted successors in cache ?
1374 auto Succs = AllSuccessors.find(Val: MBB);
1375 if (Succs != AllSuccessors.end())
1376 return Succs->second;
1377
1378 SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
1379
1380 // Handle cases where sinking can happen but where the sink point isn't a
1381 // successor. For example:
1382 //
1383 // x = computation
1384 // if () {} else {}
1385 // use x
1386 //
1387 for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) {
1388 // DomTree children of MBB that have MBB as immediate dominator are added.
1389 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
1390 // Skip MBBs already added to the AllSuccs vector above.
1391 !MBB->isSuccessor(MBB: DTChild->getBlock()))
1392 AllSuccs.push_back(Elt: DTChild->getBlock());
1393 }
1394
1395 // Sort Successors according to their cycle depth or block frequency info.
1396 llvm::stable_sort(
1397 Range&: AllSuccs, C: [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
1398 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0;
1399 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0;
1400 if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) ||
1401 (!LHSFreq && !RHSFreq))
1402 return CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R);
1403 return LHSFreq < RHSFreq;
1404 });
1405
1406 auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs));
1407
1408 return it.first->second;
1409}
1410
1411/// FindSuccToSinkTo - Find a successor to sink this instruction to.
1412MachineBasicBlock *
1413MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
1414 bool &BreakPHIEdge,
1415 AllSuccsCache &AllSuccessors) {
1416 assert(MBB && "Invalid MachineBasicBlock!");
1417
1418 // loop over all the operands of the specified instruction. If there is
1419 // anything we can't handle, bail out.
1420
1421 // SuccToSinkTo - This is the successor to sink this instruction to, once we
1422 // decide.
1423 MachineBasicBlock *SuccToSinkTo = nullptr;
1424 for (const MachineOperand &MO : MI.operands()) {
1425 if (!MO.isReg())
1426 continue; // Ignore non-register operands.
1427
1428 Register Reg = MO.getReg();
1429 if (Reg == 0)
1430 continue;
1431
1432 if (Reg.isPhysical()) {
1433 if (MO.isUse()) {
1434 // If the physreg has no defs anywhere, it's just an ambient register
1435 // and we can freely move its uses. Alternatively, if it's allocatable,
1436 // it could get allocated to something with a def during allocation.
1437 if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO))
1438 return nullptr;
1439 } else if (!MO.isDead()) {
1440 // A def that isn't dead. We can't move it.
1441 return nullptr;
1442 }
1443 } else {
1444 // Virtual register uses are always safe to sink.
1445 if (MO.isUse())
1446 continue;
1447
1448 // If it's not safe to move defs of the register class, then abort.
1449 if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg)))
1450 return nullptr;
1451
1452 // Virtual register defs can only be sunk if all their uses are in blocks
1453 // dominated by one of the successors.
1454 if (SuccToSinkTo) {
1455 // If a previous operand picked a block to sink to, then this operand
1456 // must be sinkable to the same block.
1457 bool LocalUse = false;
1458 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1459 LocalUse))
1460 return nullptr;
1461
1462 continue;
1463 }
1464
1465 // Otherwise, we should look at all the successors and decide which one
1466 // we should sink to. If we have reliable block frequency information
1467 // (frequency != 0) available, give successors with smaller frequencies
1468 // higher priority, otherwise prioritize smaller cycle depths.
1469 for (MachineBasicBlock *SuccBlock :
1470 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
1471 bool LocalUse = false;
1472 if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB, BreakPHIEdge,
1473 LocalUse)) {
1474 SuccToSinkTo = SuccBlock;
1475 break;
1476 }
1477 if (LocalUse)
1478 // Def is used locally, it's never safe to move this def.
1479 return nullptr;
1480 }
1481
1482 // If we couldn't find a block to sink to, ignore this instruction.
1483 if (!SuccToSinkTo)
1484 return nullptr;
1485 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
1486 return nullptr;
1487 }
1488 }
1489
1490 // It is not possible to sink an instruction into its own block. This can
1491 // happen with cycles.
1492 if (MBB == SuccToSinkTo)
1493 return nullptr;
1494
1495 // It's not safe to sink instructions to EH landing pad. Control flow into
1496 // landing pad is implicitly defined.
1497 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
1498 return nullptr;
1499
1500 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
1501 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
1502 // the source block (which this code does not yet do). So for now, forbid
1503 // doing so.
1504 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
1505 return nullptr;
1506
1507 if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI))
1508 return nullptr;
1509
1510 return SuccToSinkTo;
1511}
1512
1513/// Return true if MI is likely to be usable as a memory operation by the
1514/// implicit null check optimization.
1515///
1516/// This is a "best effort" heuristic, and should not be relied upon for
1517/// correctness. This returning true does not guarantee that the implicit null
1518/// check optimization is legal over MI, and this returning false does not
1519/// guarantee MI cannot possibly be used to do a null check.
1520static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
1521 const TargetInstrInfo *TII,
1522 const TargetRegisterInfo *TRI) {
1523 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1524
1525 auto *MBB = MI.getParent();
1526 if (MBB->pred_size() != 1)
1527 return false;
1528
1529 auto *PredMBB = *MBB->pred_begin();
1530 auto *PredBB = PredMBB->getBasicBlock();
1531
1532 // Frontends that don't use implicit null checks have no reason to emit
1533 // branches with make.implicit metadata, and this function should always
1534 // return false for them.
1535 if (!PredBB ||
1536 !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit))
1537 return false;
1538
1539 const MachineOperand *BaseOp;
1540 int64_t Offset;
1541 bool OffsetIsScalable;
1542 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1543 return false;
1544
1545 if (!BaseOp->isReg())
1546 return false;
1547
1548 if (!(MI.mayLoad() && !MI.isPredicable()))
1549 return false;
1550
1551 MachineBranchPredicate MBP;
1552 if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false))
1553 return false;
1554
1555 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1556 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1557 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1558 MBP.LHS.getReg() == BaseOp->getReg();
1559}
1560
1561/// If the sunk instruction is a copy, try to forward the copy instead of
1562/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1563/// there's any subregister weirdness involved. Returns true if copy
1564/// propagation occurred.
1565static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1566 Register Reg) {
1567 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1568 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1569
1570 // Copy DBG_VALUE operand and set the original to undef. We then check to
1571 // see whether this is something that can be copy-forwarded. If it isn't,
1572 // continue around the loop.
1573
1574 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1575 auto CopyOperands = TII.isCopyInstr(MI: SinkInst);
1576 if (!CopyOperands)
1577 return false;
1578 SrcMO = CopyOperands->Source;
1579 DstMO = CopyOperands->Destination;
1580
1581 // Check validity of forwarding this copy.
1582 bool PostRA = MRI.getNumVirtRegs() == 0;
1583
1584 // Trying to forward between physical and virtual registers is too hard.
1585 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1586 return false;
1587
1588 // Only try virtual register copy-forwarding before regalloc, and physical
1589 // register copy-forwarding after regalloc.
1590 bool arePhysRegs = !Reg.isVirtual();
1591 if (arePhysRegs != PostRA)
1592 return false;
1593
1594 // Pre-regalloc, only forward if all subregisters agree (or there are no
1595 // subregs at all). More analysis might recover some forwardable copies.
1596 if (!PostRA)
1597 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1598 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1599 DbgMO.getSubReg() != DstMO->getSubReg())
1600 return false;
1601
1602 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1603 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1604 // matches the copy destination.
1605 if (PostRA && Reg != DstMO->getReg())
1606 return false;
1607
1608 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1609 DbgMO.setReg(SrcMO->getReg());
1610 DbgMO.setSubReg(SrcMO->getSubReg());
1611 }
1612 return true;
1613}
1614
1615using MIRegs = std::pair<MachineInstr *, SmallVector<Register, 2>>;
1616/// Sink an instruction and its associated debug instructions.
1617static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1618 MachineBasicBlock::iterator InsertPos,
1619 ArrayRef<MIRegs> DbgValuesToSink) {
1620 // If we cannot find a location to use (merge with), then we erase the debug
1621 // location to prevent debug-info driven tools from potentially reporting
1622 // wrong location information.
1623 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1624 MI.setDebugLoc(DebugLoc::getMergedLocation(LocA: MI.getDebugLoc(),
1625 LocB: InsertPos->getDebugLoc()));
1626 else
1627 MI.setDebugLoc(DebugLoc());
1628
1629 // Move the instruction.
1630 MachineBasicBlock *ParentBlock = MI.getParent();
1631 SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI,
1632 To: ++MachineBasicBlock::iterator(MI));
1633
1634 // Sink a copy of debug users to the insert position. Mark the original
1635 // DBG_VALUE location as 'undef', indicating that any earlier variable
1636 // location should be terminated as we've optimised away the value at this
1637 // point.
1638 for (const auto &DbgValueToSink : DbgValuesToSink) {
1639 MachineInstr *DbgMI = DbgValueToSink.first;
1640 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI);
1641 SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI);
1642
1643 bool PropagatedAllSunkOps = true;
1644 for (Register Reg : DbgValueToSink.second) {
1645 if (DbgMI->hasDebugOperandForReg(Reg)) {
1646 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) {
1647 PropagatedAllSunkOps = false;
1648 break;
1649 }
1650 }
1651 }
1652 if (!PropagatedAllSunkOps)
1653 DbgMI->setDebugValueUndef();
1654 }
1655}
1656
1657/// hasStoreBetween - check if there is store betweeen straight line blocks From
1658/// and To.
1659bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1660 MachineBasicBlock *To, MachineInstr &MI) {
1661 // Make sure From and To are in straight line which means From dominates To
1662 // and To post dominates From.
1663 if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From))
1664 return true;
1665
1666 auto BlockPair = std::make_pair(x&: From, y&: To);
1667
1668 // Does these two blocks pair be queried before and have a definite cached
1669 // result?
1670 if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end())
1671 return It->second;
1672
1673 if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end())
1674 return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) {
1675 return I->mayAlias(AA, Other: MI, UseTBAA: false);
1676 });
1677
1678 bool SawStore = false;
1679 bool HasAliasedStore = false;
1680 DenseSet<MachineBasicBlock *> HandledBlocks;
1681 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1682 // Go through all reachable blocks from From.
1683 for (MachineBasicBlock *BB : depth_first(G: From)) {
1684 // We insert the instruction at the start of block To, so no need to worry
1685 // about stores inside To.
1686 // Store in block From should be already considered when just enter function
1687 // SinkInstruction.
1688 if (BB == To || BB == From)
1689 continue;
1690
1691 // We already handle this BB in previous iteration.
1692 if (HandledBlocks.count(V: BB))
1693 continue;
1694
1695 HandledBlocks.insert(V: BB);
1696 // To post dominates BB, it must be a path from block From.
1697 if (PDT->dominates(A: To, B: BB)) {
1698 if (!HandledDomBlocks.count(V: BB))
1699 HandledDomBlocks.insert(V: BB);
1700
1701 // If this BB is too big or the block number in straight line between From
1702 // and To is too big, stop searching to save compiling time.
1703 if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) ||
1704 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1705 for (auto *DomBB : HandledDomBlocks) {
1706 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1707 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1708 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1709 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1710 }
1711 HasStoreCache[BlockPair] = true;
1712 return true;
1713 }
1714
1715 for (MachineInstr &I : *BB) {
1716 // Treat as alias conservatively for a call or an ordered memory
1717 // operation.
1718 if (I.isCall() || I.hasOrderedMemoryRef()) {
1719 for (auto *DomBB : HandledDomBlocks) {
1720 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1721 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1722 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1723 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1724 }
1725 HasStoreCache[BlockPair] = true;
1726 return true;
1727 }
1728
1729 if (I.mayStore()) {
1730 SawStore = true;
1731 // We still have chance to sink MI if all stores between are not
1732 // aliased to MI.
1733 // Cache all store instructions, so that we don't need to go through
1734 // all From reachable blocks for next load instruction.
1735 if (I.mayAlias(AA, Other: MI, UseTBAA: false))
1736 HasAliasedStore = true;
1737 StoreInstrCache[BlockPair].push_back(Elt: &I);
1738 }
1739 }
1740 }
1741 }
1742 // If there is no store at all, cache the result.
1743 if (!SawStore)
1744 HasStoreCache[BlockPair] = false;
1745 return HasAliasedStore;
1746}
1747
1748/// Aggressively sink instructions into cycles. This will aggressively try to
1749/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
1750/// In particular, it will sink into multiple successor blocks without limits
1751/// based on the amount of sinking, or the type of ops being sunk (so long as
1752/// they are safe to sink).
1753bool MachineSinking::aggressivelySinkIntoCycle(
1754 MachineCycle *Cycle, MachineInstr &I,
1755 DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1756 // TODO: support instructions with multiple defs
1757 if (I.getNumDefs() > 1)
1758 return false;
1759
1760 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
1761 assert(Cycle->getCyclePreheader() && "Cycle sink needs a preheader block");
1762 SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
1763
1764 MachineOperand &DefMO = I.getOperand(i: 0);
1765 for (MachineInstr &MI : MRI->use_instructions(Reg: DefMO.getReg())) {
1766 Uses.push_back(Elt: {{DefMO.getReg(), DefMO.getSubReg()}, &MI});
1767 }
1768
1769 for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1770 MachineInstr *MI = Entry.second;
1771 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
1772 if (MI->isPHI()) {
1773 LLVM_DEBUG(
1774 dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
1775 continue;
1776 }
1777 // We cannot sink before the prologue
1778 if (MI->isPosition() || TII->isBasicBlockPrologue(MI: *MI)) {
1779 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
1780 "can't sink.\n");
1781 continue;
1782 }
1783 if (!Cycle->contains(Block: MI->getParent())) {
1784 LLVM_DEBUG(
1785 dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
1786 continue;
1787 }
1788
1789 MachineBasicBlock *SinkBlock = MI->getParent();
1790 MachineInstr *NewMI = nullptr;
1791 SinkItem MapEntry(&I, SinkBlock);
1792
1793 auto SI = SunkInstrs.find(Val: MapEntry);
1794
1795 // Check for the case in which we have already sunk a copy of this
1796 // instruction into the user block.
1797 if (SI != SunkInstrs.end()) {
1798 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
1799 << printMBBReference(*SinkBlock) << "\n");
1800 NewMI = SI->second;
1801 }
1802
1803 // Create a copy of the instruction in the use block.
1804 if (!NewMI) {
1805 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
1806 << printMBBReference(*SinkBlock) << "\n");
1807
1808 NewMI = I.getMF()->CloneMachineInstr(Orig: &I);
1809 if (DefMO.getReg().isVirtual()) {
1810 const TargetRegisterClass *TRC = MRI->getRegClass(Reg: DefMO.getReg());
1811 Register DestReg = MRI->createVirtualRegister(RegClass: TRC);
1812 NewMI->substituteRegister(FromReg: DefMO.getReg(), ToReg: DestReg, SubIdx: DefMO.getSubReg(),
1813 RegInfo: *TRI);
1814 }
1815 SinkBlock->insert(I: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()),
1816 MI: NewMI);
1817 SunkInstrs.insert(KV: {MapEntry, NewMI});
1818 }
1819
1820 // Conservatively clear any kill flags on uses of sunk instruction
1821 for (MachineOperand &MO : NewMI->all_uses()) {
1822 assert(MO.isReg() && MO.isUse());
1823 RegsToClearKillFlags.insert(V: MO.getReg());
1824 }
1825
1826 // The instruction is moved from its basic block, so do not retain the
1827 // debug information.
1828 assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
1829 NewMI->setDebugLoc(DebugLoc());
1830
1831 // Replace the use with the newly created virtual register.
1832 RegSubRegPair &UseReg = Entry.first;
1833 MI->substituteRegister(FromReg: UseReg.Reg, ToReg: NewMI->getOperand(i: 0).getReg(),
1834 SubIdx: UseReg.SubReg, RegInfo: *TRI);
1835 }
1836 // If we have replaced all uses, then delete the dead instruction
1837 if (I.isDead(MRI: *MRI))
1838 I.eraseFromParent();
1839 return true;
1840}
1841
1842/// SinkInstruction - Determine whether it is safe to sink the specified machine
1843/// instruction out of its current block into a successor.
1844bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1845 AllSuccsCache &AllSuccessors) {
1846 // Don't sink instructions that the target prefers not to sink.
1847 if (!TII->shouldSink(MI))
1848 return false;
1849
1850 // Check if it's safe to move the instruction.
1851 if (!MI.isSafeToMove(SawStore))
1852 return false;
1853
1854 // Convergent operations may not be made control-dependent on additional
1855 // values.
1856 if (MI.isConvergent())
1857 return false;
1858
1859 // Don't break implicit null checks. This is a performance heuristic, and not
1860 // required for correctness.
1861 if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
1862 return false;
1863
1864 // FIXME: This should include support for sinking instructions within the
1865 // block they are currently in to shorten the live ranges. We often get
1866 // instructions sunk into the top of a large block, but it would be better to
1867 // also sink them down before their first use in the block. This xform has to
1868 // be careful not to *increase* register pressure though, e.g. sinking
1869 // "x = y + z" down if it kills y and z would increase the live ranges of y
1870 // and z and only shrink the live range of x.
1871
1872 bool BreakPHIEdge = false;
1873 MachineBasicBlock *ParentBlock = MI.getParent();
1874 MachineBasicBlock *SuccToSinkTo =
1875 FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors);
1876
1877 // If there are no outputs, it must have side-effects.
1878 if (!SuccToSinkTo)
1879 return false;
1880
1881 // If the instruction to move defines a dead physical register which is live
1882 // when leaving the basic block, don't move it because it could turn into a
1883 // "zombie" define of that preg. E.g., EFLAGS.
1884 for (const MachineOperand &MO : MI.all_defs()) {
1885 Register Reg = MO.getReg();
1886 if (Reg == 0 || !Reg.isPhysical())
1887 continue;
1888 if (SuccToSinkTo->isLiveIn(Reg))
1889 return false;
1890 }
1891
1892 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1893
1894 // If the block has multiple predecessors, this is a critical edge.
1895 // Decide if we can sink along it or need to break the edge.
1896 if (SuccToSinkTo->pred_size() > 1) {
1897 // We cannot sink a load across a critical edge - there may be stores in
1898 // other code paths.
1899 bool TryBreak = false;
1900 bool Store =
1901 MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true;
1902 if (!MI.isSafeToMove(SawStore&: Store)) {
1903 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1904 TryBreak = true;
1905 }
1906
1907 // We don't want to sink across a critical edge if we don't dominate the
1908 // successor. We could be introducing calculations to new code paths.
1909 if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) {
1910 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1911 TryBreak = true;
1912 }
1913
1914 // Don't sink instructions into a cycle.
1915 if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) &&
1916 (!CI->getCycle(Block: SuccToSinkTo)->isReducible() ||
1917 CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1918 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1919 TryBreak = true;
1920 }
1921
1922 // Otherwise we are OK with sinking along a critical edge.
1923 if (!TryBreak)
1924 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1925 else {
1926 // Mark this edge as to be split.
1927 // If the edge can actually be split, the next iteration of the main loop
1928 // will sink MI in the newly created block.
1929 bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo,
1930 BreakPHIEdge);
1931 if (!Status)
1932 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1933 "break critical edge\n");
1934 // The instruction will not be sunk this time.
1935 return false;
1936 }
1937 }
1938
1939 if (BreakPHIEdge) {
1940 // BreakPHIEdge is true if all the uses are in the successor MBB being
1941 // sunken into and they are all PHI nodes. In this case, machine-sink must
1942 // break the critical edge first.
1943 bool Status =
1944 PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge);
1945 if (!Status)
1946 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1947 "break critical edge\n");
1948 // The instruction will not be sunk this time.
1949 return false;
1950 }
1951
1952 // Determine where to insert into. Skip phi nodes.
1953 MachineBasicBlock::iterator InsertPos =
1954 SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin());
1955 if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) {
1956 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1957 return false;
1958 }
1959
1960 // Collect debug users of any vreg that this inst defines.
1961 SmallVector<MIRegs, 4> DbgUsersToSink;
1962 for (auto &MO : MI.all_defs()) {
1963 if (!MO.getReg().isVirtual())
1964 continue;
1965 auto It = SeenDbgUsers.find(Val: MO.getReg());
1966 if (It == SeenDbgUsers.end())
1967 continue;
1968
1969 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1970 auto &Users = It->second;
1971 for (auto &User : Users) {
1972 MachineInstr *DbgMI = User.getPointer();
1973 if (User.getInt()) {
1974 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1975 // it, it can't be recovered. Set it undef.
1976 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg()))
1977 DbgMI->setDebugValueUndef();
1978 } else {
1979 DbgUsersToSink.push_back(
1980 Elt: {DbgMI, SmallVector<Register, 2>(1, MO.getReg())});
1981 }
1982 }
1983 }
1984
1985 // After sinking, some debug users may not be dominated any more. If possible,
1986 // copy-propagate their operands. As it's expensive, don't do this if there's
1987 // no debuginfo in the program.
1988 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1989 SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo);
1990
1991 performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink);
1992
1993 // Conservatively, clear any kill flags, since it's possible that they are no
1994 // longer correct.
1995 // Note that we have to clear the kill flags for any register this instruction
1996 // uses as we may sink over another instruction which currently kills the
1997 // used registers.
1998 for (MachineOperand &MO : MI.all_uses())
1999 RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags.
2000
2001 return true;
2002}
2003
2004void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
2005 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
2006 assert(MI.isCopy());
2007 assert(MI.getOperand(1).isReg());
2008
2009 // Enumerate all users of vreg operands that are def'd. Skip those that will
2010 // be sunk. For the rest, if they are not dominated by the block we will sink
2011 // MI into, propagate the copy source to them.
2012 SmallVector<MachineInstr *, 4> DbgDefUsers;
2013 SmallVector<Register, 4> DbgUseRegs;
2014 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
2015 for (auto &MO : MI.all_defs()) {
2016 if (!MO.getReg().isVirtual())
2017 continue;
2018 DbgUseRegs.push_back(Elt: MO.getReg());
2019 for (auto &User : MRI.use_instructions(Reg: MO.getReg())) {
2020 if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent()))
2021 continue;
2022
2023 // If is in same block, will either sink or be use-before-def.
2024 if (User.getParent() == MI.getParent())
2025 continue;
2026
2027 assert(User.hasDebugOperandForReg(MO.getReg()) &&
2028 "DBG_VALUE user of vreg, but has no operand for it?");
2029 DbgDefUsers.push_back(Elt: &User);
2030 }
2031 }
2032
2033 // Point the users of this copy that are no longer dominated, at the source
2034 // of the copy.
2035 for (auto *User : DbgDefUsers) {
2036 for (auto &Reg : DbgUseRegs) {
2037 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
2038 DbgOp.setReg(MI.getOperand(i: 1).getReg());
2039 DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg());
2040 }
2041 }
2042 }
2043}
2044
2045//===----------------------------------------------------------------------===//
2046// This pass is not intended to be a replacement or a complete alternative
2047// for the pre-ra machine sink pass. It is only designed to sink COPY
2048// instructions which should be handled after RA.
2049//
2050// This pass sinks COPY instructions into a successor block, if the COPY is not
2051// used in the current block and the COPY is live-in to a single successor
2052// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
2053// copy on paths where their results aren't needed. This also exposes
2054// additional opportunites for dead copy elimination and shrink wrapping.
2055//
2056// These copies were either not handled by or are inserted after the MachineSink
2057// pass. As an example of the former case, the MachineSink pass cannot sink
2058// COPY instructions with allocatable source registers; for AArch64 these type
2059// of copy instructions are frequently used to move function parameters (PhyReg)
2060// into virtual registers in the entry block.
2061//
2062// For the machine IR below, this pass will sink %w19 in the entry into its
2063// successor (%bb.1) because %w19 is only live-in in %bb.1.
2064// %bb.0:
2065// %wzr = SUBSWri %w1, 1
2066// %w19 = COPY %w0
2067// Bcc 11, %bb.2
2068// %bb.1:
2069// Live Ins: %w19
2070// BL @fun
2071// %w0 = ADDWrr %w0, %w19
2072// RET %w0
2073// %bb.2:
2074// %w0 = COPY %wzr
2075// RET %w0
2076// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
2077// able to see %bb.0 as a candidate.
2078//===----------------------------------------------------------------------===//
2079namespace {
2080
2081class PostRAMachineSinkingImpl {
2082 /// Track which register units have been modified and used.
2083 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
2084
2085 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
2086 /// entry in this map for each unit it touches. The DBG_VALUE's entry
2087 /// consists of a pointer to the instruction itself, and a vector of registers
2088 /// referred to by the instruction that overlap the key register unit.
2089 DenseMap<MCRegUnit, SmallVector<MIRegs, 2>> SeenDbgInstrs;
2090
2091 /// Sink Copy instructions unused in the same block close to their uses in
2092 /// successors.
2093 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
2094 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
2095
2096public:
2097 bool run(MachineFunction &MF);
2098};
2099
2100class PostRAMachineSinkingLegacy : public MachineFunctionPass {
2101public:
2102 bool runOnMachineFunction(MachineFunction &MF) override;
2103
2104 static char ID;
2105 PostRAMachineSinkingLegacy() : MachineFunctionPass(ID) {}
2106 StringRef getPassName() const override { return "PostRA Machine Sink"; }
2107
2108 void getAnalysisUsage(AnalysisUsage &AU) const override {
2109 AU.setPreservesCFG();
2110 MachineFunctionPass::getAnalysisUsage(AU);
2111 }
2112
2113 MachineFunctionProperties getRequiredProperties() const override {
2114 return MachineFunctionProperties().setNoVRegs();
2115 }
2116};
2117
2118} // namespace
2119
2120char PostRAMachineSinkingLegacy::ID = 0;
2121char &llvm::PostRAMachineSinkingID = PostRAMachineSinkingLegacy::ID;
2122
2123INITIALIZE_PASS(PostRAMachineSinkingLegacy, "postra-machine-sink",
2124 "PostRA Machine Sink", false, false)
2125
2126static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg,
2127 const TargetRegisterInfo *TRI) {
2128 LiveRegUnits LiveInRegUnits(*TRI);
2129 LiveInRegUnits.addLiveIns(MBB);
2130 return !LiveInRegUnits.available(Reg);
2131}
2132
2133static MachineBasicBlock *
2134getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2135 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2136 Register Reg, const TargetRegisterInfo *TRI) {
2137 // Try to find a single sinkable successor in which Reg is live-in.
2138 MachineBasicBlock *BB = nullptr;
2139 for (auto *SI : SinkableBBs) {
2140 if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) {
2141 // If BB is set here, Reg is live-in to at least two sinkable successors,
2142 // so quit.
2143 if (BB)
2144 return nullptr;
2145 BB = SI;
2146 }
2147 }
2148 // Reg is not live-in to any sinkable successors.
2149 if (!BB)
2150 return nullptr;
2151
2152 // Check if any register aliased with Reg is live-in in other successors.
2153 for (auto *SI : CurBB.successors()) {
2154 if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI))
2155 return nullptr;
2156 }
2157 return BB;
2158}
2159
2160static MachineBasicBlock *
2161getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2162 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2163 ArrayRef<Register> DefedRegsInCopy,
2164 const TargetRegisterInfo *TRI) {
2165 MachineBasicBlock *SingleBB = nullptr;
2166 for (auto DefReg : DefedRegsInCopy) {
2167 MachineBasicBlock *BB =
2168 getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI);
2169 if (!BB || (SingleBB && SingleBB != BB))
2170 return nullptr;
2171 SingleBB = BB;
2172 }
2173 return SingleBB;
2174}
2175
2176static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
2177 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2178 const LiveRegUnits &UsedRegUnits,
2179 const TargetRegisterInfo *TRI) {
2180 for (auto U : UsedOpsInCopy) {
2181 MachineOperand &MO = MI->getOperand(i: U);
2182 Register SrcReg = MO.getReg();
2183 if (!UsedRegUnits.available(Reg: SrcReg)) {
2184 MachineBasicBlock::iterator NI = std::next(x: MI->getIterator());
2185 for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) {
2186 if (UI.killsRegister(Reg: SrcReg, TRI)) {
2187 UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI);
2188 MO.setIsKill(true);
2189 break;
2190 }
2191 }
2192 }
2193 }
2194}
2195
2196static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
2197 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2198 const SmallVectorImpl<Register> &DefedRegsInCopy) {
2199 for (Register DefReg : DefedRegsInCopy)
2200 SuccBB->removeLiveInOverlappedWith(Reg: DefReg);
2201
2202 for (auto U : UsedOpsInCopy)
2203 SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg());
2204 SuccBB->sortUniqueLiveIns();
2205}
2206
2207static bool hasRegisterDependency(MachineInstr *MI,
2208 SmallVectorImpl<unsigned> &UsedOpsInCopy,
2209 SmallVectorImpl<Register> &DefedRegsInCopy,
2210 LiveRegUnits &ModifiedRegUnits,
2211 LiveRegUnits &UsedRegUnits) {
2212 bool HasRegDependency = false;
2213 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2214 MachineOperand &MO = MI->getOperand(i);
2215 if (!MO.isReg())
2216 continue;
2217 Register Reg = MO.getReg();
2218 if (!Reg)
2219 continue;
2220 if (MO.isDef()) {
2221 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
2222 HasRegDependency = true;
2223 break;
2224 }
2225 DefedRegsInCopy.push_back(Elt: Reg);
2226
2227 // FIXME: instead of isUse(), readsReg() would be a better fix here,
2228 // For example, we can ignore modifications in reg with undef. However,
2229 // it's not perfectly clear if skipping the internal read is safe in all
2230 // other targets.
2231 } else if (MO.isUse()) {
2232 if (!ModifiedRegUnits.available(Reg)) {
2233 HasRegDependency = true;
2234 break;
2235 }
2236 UsedOpsInCopy.push_back(Elt: i);
2237 }
2238 }
2239 return HasRegDependency;
2240}
2241
2242bool PostRAMachineSinkingImpl::tryToSinkCopy(MachineBasicBlock &CurBB,
2243 MachineFunction &MF,
2244 const TargetRegisterInfo *TRI,
2245 const TargetInstrInfo *TII) {
2246 SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
2247 // FIXME: For now, we sink only to a successor which has a single predecessor
2248 // so that we can directly sink COPY instructions to the successor without
2249 // adding any new block or branch instruction.
2250 for (MachineBasicBlock *SI : CurBB.successors())
2251 if (!SI->livein_empty() && SI->pred_size() == 1)
2252 SinkableBBs.insert(Ptr: SI);
2253
2254 if (SinkableBBs.empty())
2255 return false;
2256
2257 bool Changed = false;
2258
2259 // Track which registers have been modified and used between the end of the
2260 // block and the current instruction.
2261 ModifiedRegUnits.clear();
2262 UsedRegUnits.clear();
2263 SeenDbgInstrs.clear();
2264
2265 for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) {
2266 // Track the operand index for use in Copy.
2267 SmallVector<unsigned, 2> UsedOpsInCopy;
2268 // Track the register number defed in Copy.
2269 SmallVector<Register, 2> DefedRegsInCopy;
2270
2271 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
2272 // for DBG_VALUEs later, record them when they're encountered.
2273 if (MI.isDebugValue() && !MI.isDebugRef()) {
2274 SmallDenseMap<MCRegUnit, SmallVector<Register, 2>, 4> MIUnits;
2275 bool IsValid = true;
2276 for (MachineOperand &MO : MI.debug_operands()) {
2277 if (MO.isReg() && MO.getReg().isPhysical()) {
2278 // Bail if we can already tell the sink would be rejected, rather
2279 // than needlessly accumulating lots of DBG_VALUEs.
2280 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2281 ModifiedRegUnits, UsedRegUnits)) {
2282 IsValid = false;
2283 break;
2284 }
2285
2286 // Record debug use of each reg unit.
2287 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg()))
2288 MIUnits[Unit].push_back(Elt: MO.getReg());
2289 }
2290 }
2291 if (IsValid) {
2292 for (auto &RegOps : MIUnits)
2293 SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI,
2294 Args: std::move(RegOps.second));
2295 }
2296 continue;
2297 }
2298
2299 // Don't postRASink instructions that the target prefers not to sink.
2300 if (!TII->shouldPostRASink(MI))
2301 continue;
2302
2303 if (MI.isDebugOrPseudoInstr())
2304 continue;
2305
2306 // Do not move any instruction across function call.
2307 if (MI.isCall())
2308 return false;
2309
2310 if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) {
2311 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2312 TRI);
2313 continue;
2314 }
2315
2316 // Don't sink the COPY if it would violate a register dependency.
2317 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2318 ModifiedRegUnits, UsedRegUnits)) {
2319 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2320 TRI);
2321 continue;
2322 }
2323 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
2324 "Unexpect SrcReg or DefReg");
2325 MachineBasicBlock *SuccBB =
2326 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
2327 // Don't sink if we cannot find a single sinkable successor in which Reg
2328 // is live-in.
2329 if (!SuccBB) {
2330 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2331 TRI);
2332 continue;
2333 }
2334 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
2335 "Unexpected predecessor");
2336
2337 // Collect DBG_VALUEs that must sink with this copy. We've previously
2338 // recorded which reg units that DBG_VALUEs read, if this instruction
2339 // writes any of those units then the corresponding DBG_VALUEs must sink.
2340 MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
2341 for (auto &MO : MI.all_defs()) {
2342 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) {
2343 for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) {
2344 auto &Regs = DbgValsToSinkMap[MIRegs.first];
2345 llvm::append_range(C&: Regs, R: MIRegs.second);
2346 }
2347 }
2348 }
2349 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
2350
2351 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
2352
2353 MachineBasicBlock::iterator InsertPos =
2354 SuccBB->SkipPHIsAndLabels(I: SuccBB->begin());
2355 if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) {
2356 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2357 TRI);
2358 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
2359 continue;
2360 }
2361
2362 // Clear the kill flag if SrcReg is killed between MI and the end of the
2363 // block.
2364 clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
2365 performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink);
2366 updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
2367
2368 Changed = true;
2369 ++NumPostRACopySink;
2370 }
2371 return Changed;
2372}
2373
2374bool PostRAMachineSinkingImpl::run(MachineFunction &MF) {
2375 bool Changed = false;
2376 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2377 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2378
2379 ModifiedRegUnits.init(TRI: *TRI);
2380 UsedRegUnits.init(TRI: *TRI);
2381 for (auto &BB : MF)
2382 Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII);
2383
2384 return Changed;
2385}
2386
2387bool PostRAMachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
2388 if (skipFunction(F: MF.getFunction()))
2389 return false;
2390
2391 return PostRAMachineSinkingImpl().run(MF);
2392}
2393
2394PreservedAnalyses
2395PostRAMachineSinkingPass::run(MachineFunction &MF,
2396 MachineFunctionAnalysisManager &MFAM) {
2397 MFPropsModifier _(*this, MF);
2398
2399 if (!PostRAMachineSinkingImpl().run(MF))
2400 return PreservedAnalyses::all();
2401
2402 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
2403 PA.preserveSet<CFGAnalyses>();
2404 return PA;
2405}
2406