1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/MachineSink.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PointerIntPair.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/CFG.h"
29#include "llvm/Analysis/ProfileSummaryInfo.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/LiveVariables.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
35#include "llvm/CodeGen/MachineCycleAnalysis.h"
36#include "llvm/CodeGen/MachineDomTreeUpdater.h"
37#include "llvm/CodeGen/MachineDominators.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineLoopInfo.h"
42#include "llvm/CodeGen/MachineOperand.h"
43#include "llvm/CodeGen/MachinePostDominators.h"
44#include "llvm/CodeGen/MachineRegisterInfo.h"
45#include "llvm/CodeGen/MachineSizeOpts.h"
46#include "llvm/CodeGen/PostRAMachineSink.h"
47#include "llvm/CodeGen/RegisterClassInfo.h"
48#include "llvm/CodeGen/RegisterPressure.h"
49#include "llvm/CodeGen/SlotIndexes.h"
50#include "llvm/CodeGen/TargetInstrInfo.h"
51#include "llvm/CodeGen/TargetPassConfig.h"
52#include "llvm/CodeGen/TargetRegisterInfo.h"
53#include "llvm/CodeGen/TargetSchedule.h"
54#include "llvm/CodeGen/TargetSubtargetInfo.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/DebugInfoMetadata.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/InitializePasses.h"
59#include "llvm/Pass.h"
60#include "llvm/Support/BranchProbability.h"
61#include "llvm/Support/CommandLine.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include <cassert>
65#include <cstdint>
66#include <utility>
67#include <vector>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "machine-sink"
72
73static cl::opt<bool>
74 SplitEdges("machine-sink-split",
75 cl::desc("Split critical edges during machine sinking"),
76 cl::init(Val: true), cl::Hidden);
77
78static cl::opt<bool> UseBlockFreqInfo(
79 "machine-sink-bfi",
80 cl::desc("Use block frequency info to find successors to sink"),
81 cl::init(Val: true), cl::Hidden);
82
83static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
84 "machine-sink-split-probability-threshold",
85 cl::desc(
86 "Percentage threshold for splitting single-instruction critical edge. "
87 "If the branch threshold is higher than this threshold, we allow "
88 "speculative execution of up to 1 instruction to avoid branching to "
89 "splitted critical edge"),
90 cl::init(Val: 40), cl::Hidden);
91
92static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
93 "machine-sink-load-instrs-threshold",
94 cl::desc("Do not try to find alias store for a load if there is a in-path "
95 "block whose instruction number is higher than this threshold."),
96 cl::init(Val: 2000), cl::Hidden);
97
98static cl::opt<unsigned> SinkLoadBlocksThreshold(
99 "machine-sink-load-blocks-threshold",
100 cl::desc("Do not try to find alias store for a load if the block number in "
101 "the straight line is higher than this threshold."),
102 cl::init(Val: 20), cl::Hidden);
103
104static cl::opt<bool>
105 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
106 cl::desc("Sink instructions into cycles to avoid "
107 "register spills"),
108 cl::init(Val: false), cl::Hidden);
109
110static cl::opt<unsigned> SinkIntoCycleLimit(
111 "machine-sink-cycle-limit",
112 cl::desc(
113 "The maximum number of instructions considered for cycle sinking."),
114 cl::init(Val: 50), cl::Hidden);
115
116STATISTIC(NumSunk, "Number of machine instructions sunk");
117STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
118STATISTIC(NumSplit, "Number of critical edges split");
119STATISTIC(NumCoalesces, "Number of copies coalesced");
120STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
121
122using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
123
124namespace {
125
126class MachineSinking {
127 const TargetSubtargetInfo *STI = nullptr;
128 const TargetInstrInfo *TII = nullptr;
129 const TargetRegisterInfo *TRI = nullptr;
130 MachineRegisterInfo *MRI = nullptr; // Machine register information
131 MachineDominatorTree *DT = nullptr; // Machine dominator tree
132 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
133 MachineCycleInfo *CI = nullptr;
134 ProfileSummaryInfo *PSI = nullptr;
135 MachineBlockFrequencyInfo *MBFI = nullptr;
136 const MachineBranchProbabilityInfo *MBPI = nullptr;
137 AliasAnalysis *AA = nullptr;
138 RegisterClassInfo RegClassInfo;
139 TargetSchedModel SchedModel;
140 // Required for split critical edge
141 LiveIntervals *LIS;
142 SlotIndexes *SI;
143 LiveVariables *LV;
144 MachineLoopInfo *MLI;
145
146 // Remember which edges have been considered for breaking.
147 SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
148 CEBCandidates;
149 // Memorize the register that also wanted to sink into the same block along
150 // a different critical edge.
151 // {register to sink, sink-to block} -> the first sink-from block.
152 // We're recording the first sink-from block because that (critical) edge
153 // was deferred until we see another register that's going to sink into the
154 // same block.
155 DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
156 CEMergeCandidates;
157 // Remember which edges we are about to split.
158 // This is different from CEBCandidates since those edges
159 // will be split.
160 SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
161
162 DenseSet<Register> RegsToClearKillFlags;
163
164 using AllSuccsCache =
165 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
166
167 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
168 /// post-dominated by another DBG_VALUE of the same variable location.
169 /// This is necessary to detect sequences such as:
170 /// %0 = someinst
171 /// DBG_VALUE %0, !123, !DIExpression()
172 /// %1 = anotherinst
173 /// DBG_VALUE %1, !123, !DIExpression()
174 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
175 /// would re-order assignments.
176 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
177
178 using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
179
180 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
181 /// debug instructions to sink.
182 SmallDenseMap<Register, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
183
184 /// Record of debug variables that have had their locations set in the
185 /// current block.
186 DenseSet<DebugVariable> SeenDbgVars;
187
188 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
189 HasStoreCache;
190
191 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
192 SmallVector<MachineInstr *>>
193 StoreInstrCache;
194
195 /// Cached BB's register pressure.
196 DenseMap<const MachineBasicBlock *, std::vector<unsigned>>
197 CachedRegisterPressure;
198
199 bool EnableSinkAndFold;
200
201public:
202 MachineSinking(bool EnableSinkAndFold, MachineDominatorTree *DT,
203 MachinePostDominatorTree *PDT, LiveVariables *LV,
204 MachineLoopInfo *MLI, SlotIndexes *SI, LiveIntervals *LIS,
205 MachineCycleInfo *CI, ProfileSummaryInfo *PSI,
206 MachineBlockFrequencyInfo *MBFI,
207 const MachineBranchProbabilityInfo *MBPI, AliasAnalysis *AA)
208 : DT(DT), PDT(PDT), CI(CI), PSI(PSI), MBFI(MBFI), MBPI(MBPI), AA(AA),
209 LIS(LIS), SI(SI), LV(LV), MLI(MLI),
210 EnableSinkAndFold(EnableSinkAndFold) {}
211
212 bool run(MachineFunction &MF);
213
214 void releaseMemory() {
215 CEBCandidates.clear();
216 CEMergeCandidates.clear();
217 }
218
219private:
220 bool ProcessBlock(MachineBasicBlock &MBB);
221 void ProcessDbgInst(MachineInstr &MI);
222 bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
223 MachineBasicBlock *To, bool BreakPHIEdge);
224 bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
225 MachineBasicBlock *To,
226 MachineBasicBlock *&DeferredFromBlock);
227
228 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
229 MachineInstr &MI);
230
231 /// Postpone the splitting of the given critical
232 /// edge (\p From, \p To).
233 ///
234 /// We do not split the edges on the fly. Indeed, this invalidates
235 /// the dominance information and thus triggers a lot of updates
236 /// of that information underneath.
237 /// Instead, we postpone all the splits after each iteration of
238 /// the main loop. That way, the information is at least valid
239 /// for the lifetime of an iteration.
240 ///
241 /// \return True if the edge is marked as toSplit, false otherwise.
242 /// False can be returned if, for instance, this is not profitable.
243 bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
244 MachineBasicBlock *To, bool BreakPHIEdge);
245 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
246 AllSuccsCache &AllSuccessors);
247
248 /// If we sink a COPY inst, some debug users of it's destination may no
249 /// longer be dominated by the COPY, and will eventually be dropped.
250 /// This is easily rectified by forwarding the non-dominated debug uses
251 /// to the copy source.
252 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
253 MachineBasicBlock *TargetBlock);
254 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
255 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
256 bool &LocalUse) const;
257 MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
258 bool &BreakPHIEdge,
259 AllSuccsCache &AllSuccessors);
260
261 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
262 SmallVectorImpl<MachineInstr *> &Candidates);
263
264 bool
265 aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
266 DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
267
268 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
269 MachineBasicBlock *MBB,
270 MachineBasicBlock *SuccToSinkTo,
271 AllSuccsCache &AllSuccessors);
272
273 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
274 MachineBasicBlock *MBB);
275
276 bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
277
278 SmallVector<MachineBasicBlock *, 4> &
279 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
280 AllSuccsCache &AllSuccessors) const;
281
282 std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
283 bool UseCache = true);
284
285 bool registerPressureSetExceedsLimit(unsigned NRegs,
286 const TargetRegisterClass *RC,
287 const MachineBasicBlock &MBB);
288
289 bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
290};
291
292class MachineSinkingLegacy : public MachineFunctionPass {
293public:
294 static char ID;
295
296 MachineSinkingLegacy() : MachineFunctionPass(ID) {}
297
298 bool runOnMachineFunction(MachineFunction &MF) override;
299
300 void getAnalysisUsage(AnalysisUsage &AU) const override {
301 MachineFunctionPass::getAnalysisUsage(AU);
302 AU.addRequired<AAResultsWrapperPass>();
303 AU.addRequired<MachineDominatorTreeWrapperPass>();
304 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
305 AU.addRequired<MachineCycleInfoWrapperPass>();
306 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
307 AU.addPreserved<MachineCycleInfoWrapperPass>();
308 AU.addPreserved<MachineLoopInfoWrapperPass>();
309 AU.addRequired<ProfileSummaryInfoWrapperPass>();
310 if (UseBlockFreqInfo) {
311 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
312 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
313 }
314 AU.addRequired<TargetPassConfig>();
315 }
316};
317
318} // end anonymous namespace
319
320char MachineSinkingLegacy::ID = 0;
321
322char &llvm::MachineSinkingLegacyID = MachineSinkingLegacy::ID;
323
324INITIALIZE_PASS_BEGIN(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
325 false, false)
326INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
327INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
328INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
329INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
330INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
331INITIALIZE_PASS_END(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking",
332 false, false)
333
334/// Return true if a target defined block prologue instruction interferes
335/// with a sink candidate.
336static bool blockPrologueInterferes(const MachineBasicBlock *BB,
337 MachineBasicBlock::const_iterator End,
338 const MachineInstr &MI,
339 const TargetRegisterInfo *TRI,
340 const TargetInstrInfo *TII,
341 const MachineRegisterInfo *MRI) {
342 for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
343 ++PI) {
344 // Only check target defined prologue instructions
345 if (!TII->isBasicBlockPrologue(MI: *PI))
346 continue;
347 for (auto &MO : MI.operands()) {
348 if (!MO.isReg())
349 continue;
350 Register Reg = MO.getReg();
351 if (!Reg)
352 continue;
353 if (MO.isUse()) {
354 if (Reg.isPhysical() &&
355 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg))))
356 continue;
357 if (PI->modifiesRegister(Reg, TRI))
358 return true;
359 } else {
360 if (PI->readsRegister(Reg, TRI))
361 return true;
362 // Check for interference with non-dead defs
363 auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true);
364 if (DefOp && !DefOp->isDead())
365 return true;
366 }
367 }
368 }
369
370 return false;
371}
372
373bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
374 MachineBasicBlock *MBB) {
375 if (!MI.isCopy())
376 return false;
377
378 Register SrcReg = MI.getOperand(i: 1).getReg();
379 Register DstReg = MI.getOperand(i: 0).getReg();
380 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
381 !MRI->hasOneNonDBGUse(RegNo: SrcReg))
382 return false;
383
384 const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg);
385 const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg);
386 if (SRC != DRC)
387 return false;
388
389 MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg);
390 if (DefMI->isCopyLike())
391 return false;
392 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
393 LLVM_DEBUG(dbgs() << "*** to: " << MI);
394 MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg);
395 MI.eraseFromParent();
396
397 // Conservatively, clear any kill flags, since it's possible that they are no
398 // longer correct.
399 MRI->clearKillFlags(Reg: SrcReg);
400
401 ++NumCoalesces;
402 return true;
403}
404
405bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
406 MachineBasicBlock *MBB) {
407 if (MI.isCopy() || MI.mayLoadOrStore() ||
408 MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
409 return false;
410
411 // Don't sink instructions that the target prefers not to sink.
412 if (!TII->shouldSink(MI))
413 return false;
414
415 // Check if it's safe to move the instruction.
416 bool SawStore = true;
417 if (!MI.isSafeToMove(SawStore))
418 return false;
419
420 // Convergent operations may not be made control-dependent on additional
421 // values.
422 if (MI.isConvergent())
423 return false;
424
425 // Don't sink defs/uses of hard registers or if the instruction defines more
426 // than one register.
427 // Don't sink more than two register uses - it'll cover most of the cases and
428 // greatly simplifies the register pressure checks.
429 Register DefReg;
430 Register UsedRegA, UsedRegB;
431 for (const MachineOperand &MO : MI.operands()) {
432 if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
433 MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
434 MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
435 continue;
436 if (!MO.isReg())
437 return false;
438
439 Register Reg = MO.getReg();
440 if (Reg == 0)
441 continue;
442
443 if (Reg.isVirtual()) {
444 if (MO.isDef()) {
445 if (DefReg)
446 return false;
447 DefReg = Reg;
448 continue;
449 }
450
451 if (UsedRegA == 0)
452 UsedRegA = Reg;
453 else if (UsedRegB == 0)
454 UsedRegB = Reg;
455 else
456 return false;
457 continue;
458 }
459
460 if (Reg.isPhysical() && MO.isUse() &&
461 (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO)))
462 continue;
463
464 return false;
465 }
466
467 // Scan uses of the destination register. Every use, except the last, must be
468 // a copy, with a chain of copies terminating with either a copy into a hard
469 // register, or a load/store instruction where the use is part of the
470 // address (*not* the stored value).
471 using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
472 SmallVector<SinkInfo> SinkInto;
473 SmallVector<Register> Worklist;
474
475 const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg);
476 const TargetRegisterClass *RCA =
477 UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA);
478 const TargetRegisterClass *RCB =
479 UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB);
480
481 Worklist.push_back(Elt: DefReg);
482 while (!Worklist.empty()) {
483 Register Reg = Worklist.pop_back_val();
484
485 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
486 ExtAddrMode MaybeAM;
487 MachineInstr &UseInst = *MO.getParent();
488 if (UseInst.isCopy()) {
489 Register DstReg;
490 if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg())
491 DstReg = O.getReg();
492 if (DstReg == 0)
493 return false;
494 if (DstReg.isVirtual()) {
495 Worklist.push_back(Elt: DstReg);
496 continue;
497 }
498 // If we are going to replace a copy, the original instruction must be
499 // as cheap as a copy.
500 if (!TII->isAsCheapAsAMove(MI))
501 return false;
502 // The hard register must be in the register class of the original
503 // instruction's destination register.
504 if (!RC->contains(Reg: DstReg))
505 return false;
506 } else if (UseInst.mayLoadOrStore()) {
507 ExtAddrMode AM;
508 if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM))
509 return false;
510 MaybeAM = AM;
511 } else {
512 return false;
513 }
514
515 if (UseInst.getParent() != MI.getParent()) {
516 // If the register class of the register we are replacing is a superset
517 // of any of the register classes of the operands of the materialized
518 // instruction don't consider that live range extended.
519 const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
520 if (RCA && RCA->hasSuperClassEq(RC: RCS))
521 RCA = nullptr;
522 else if (RCB && RCB->hasSuperClassEq(RC: RCS))
523 RCB = nullptr;
524 if (RCA || RCB) {
525 if (RCA == nullptr) {
526 RCA = RCB;
527 RCB = nullptr;
528 }
529
530 unsigned NRegs = !!RCA + !!RCB;
531 if (RCA == RCB)
532 RCB = nullptr;
533
534 // Check we don't exceed register pressure at the destination.
535 const MachineBasicBlock &MBB = *UseInst.getParent();
536 if (RCB == nullptr) {
537 if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB))
538 return false;
539 } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) ||
540 registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) {
541 return false;
542 }
543 }
544 }
545
546 SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM);
547 }
548 }
549
550 if (SinkInto.empty())
551 return false;
552
553 // Now we know we can fold the instruction in all its users.
554 for (auto &[SinkDst, MaybeAM] : SinkInto) {
555 MachineInstr *New = nullptr;
556 LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
557 SinkDst->dump());
558 if (SinkDst->isCopy()) {
559 // TODO: After performing the sink-and-fold, the original instruction is
560 // deleted. Its value is still available (in a hard register), so if there
561 // are debug instructions which refer to the (now deleted) virtual
562 // register they could be updated to refer to the hard register, in
563 // principle. However, it's not clear how to do that, moreover in some
564 // cases the debug instructions may need to be replicated proportionally
565 // to the number of the COPY instructions replaced and in some extreme
566 // cases we can end up with quadratic increase in the number of debug
567 // instructions.
568
569 // Sink a copy of the instruction, replacing a COPY instruction.
570 MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
571 Register DstReg = SinkDst->getOperand(i: 0).getReg();
572 TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI);
573 New = &*std::prev(x: InsertPt);
574 if (!New->getDebugLoc())
575 New->setDebugLoc(SinkDst->getDebugLoc());
576
577 // The operand registers of the "sunk" instruction have their live range
578 // extended and their kill flags may no longer be correct. Conservatively
579 // clear the kill flags.
580 if (UsedRegA)
581 MRI->clearKillFlags(Reg: UsedRegA);
582 if (UsedRegB)
583 MRI->clearKillFlags(Reg: UsedRegB);
584 } else {
585 // Fold instruction into the addressing mode of a memory instruction.
586 New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM);
587
588 // The registers of the addressing mode may have their live range extended
589 // and their kill flags may no longer be correct. Conservatively clear the
590 // kill flags.
591 if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
592 MRI->clearKillFlags(Reg: R);
593 if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
594 MRI->clearKillFlags(Reg: R);
595 }
596 LLVM_DEBUG(dbgs() << "yielding"; New->dump());
597 // Clear the StoreInstrCache, since we may invalidate it by erasing.
598 if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef())
599 StoreInstrCache.clear();
600 SinkDst->eraseFromParent();
601 }
602
603 // Collect operands that need to be cleaned up because the registers no longer
604 // exist (in COPYs and debug instructions). We cannot delete instructions or
605 // clear operands while traversing register uses.
606 SmallVector<MachineOperand *> Cleanup;
607 Worklist.push_back(Elt: DefReg);
608 while (!Worklist.empty()) {
609 Register Reg = Worklist.pop_back_val();
610 for (MachineOperand &MO : MRI->use_operands(Reg)) {
611 MachineInstr *U = MO.getParent();
612 assert((U->isCopy() || U->isDebugInstr()) &&
613 "Only debug uses and copies must remain");
614 if (U->isCopy())
615 Worklist.push_back(Elt: U->getOperand(i: 0).getReg());
616 Cleanup.push_back(Elt: &MO);
617 }
618 }
619
620 // Delete the dead COPYs and clear operands in debug instructions
621 for (MachineOperand *MO : Cleanup) {
622 MachineInstr *I = MO->getParent();
623 if (I->isCopy()) {
624 I->eraseFromParent();
625 } else {
626 MO->setReg(0);
627 MO->setSubReg(0);
628 }
629 }
630
631 MI.eraseFromParent();
632 return true;
633}
634
635/// AllUsesDominatedByBlock - Return true if all uses of the specified register
636/// occur in blocks dominated by the specified block. If any use is in the
637/// definition block, then return false since it is never legal to move def
638/// after uses.
639bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
640 MachineBasicBlock *MBB,
641 MachineBasicBlock *DefMBB,
642 bool &BreakPHIEdge,
643 bool &LocalUse) const {
644 assert(Reg.isVirtual() && "Only makes sense for vregs");
645
646 // Ignore debug uses because debug info doesn't affect the code.
647 if (MRI->use_nodbg_empty(RegNo: Reg))
648 return true;
649
650 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
651 // into and they are all PHI nodes. In this case, machine-sink must break
652 // the critical edge first. e.g.
653 //
654 // %bb.1:
655 // Predecessors according to CFG: %bb.0
656 // ...
657 // %def = DEC64_32r %x, implicit-def dead %eflags
658 // ...
659 // JE_4 <%bb.37>, implicit %eflags
660 // Successors according to CFG: %bb.37 %bb.2
661 //
662 // %bb.2:
663 // %p = PHI %y, %bb.0, %def, %bb.1
664 if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) {
665 MachineInstr *UseInst = MO.getParent();
666 unsigned OpNo = MO.getOperandNo();
667 MachineBasicBlock *UseBlock = UseInst->getParent();
668 return UseBlock == MBB && UseInst->isPHI() &&
669 UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB;
670 })) {
671 BreakPHIEdge = true;
672 return true;
673 }
674
675 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
676 // Determine the block of the use.
677 MachineInstr *UseInst = MO.getParent();
678 unsigned OpNo = &MO - &UseInst->getOperand(i: 0);
679 MachineBasicBlock *UseBlock = UseInst->getParent();
680 if (UseInst->isPHI()) {
681 // PHI nodes use the operand in the predecessor block, not the block with
682 // the PHI.
683 UseBlock = UseInst->getOperand(i: OpNo + 1).getMBB();
684 } else if (UseBlock == DefMBB) {
685 LocalUse = true;
686 return false;
687 }
688
689 // Check that it dominates.
690 if (!DT->dominates(A: MBB, B: UseBlock))
691 return false;
692 }
693
694 return true;
695}
696
697/// Return true if this machine instruction loads from global offset table or
698/// constant pool.
699static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
700 assert(MI.mayLoad() && "Expected MI that loads!");
701
702 // If we lost memory operands, conservatively assume that the instruction
703 // reads from everything..
704 if (MI.memoperands_empty())
705 return true;
706
707 for (MachineMemOperand *MemOp : MI.memoperands())
708 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
709 if (PSV->isGOT() || PSV->isConstantPool())
710 return true;
711
712 return false;
713}
714
715void MachineSinking::FindCycleSinkCandidates(
716 MachineCycle *Cycle, MachineBasicBlock *BB,
717 SmallVectorImpl<MachineInstr *> &Candidates) {
718 for (auto &MI : *BB) {
719 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
720 if (MI.isMetaInstruction()) {
721 LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
722 continue;
723 }
724 if (!TII->shouldSink(MI)) {
725 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
726 "target\n");
727 continue;
728 }
729 if (!isCycleInvariant(Cycle, I&: MI)) {
730 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
731 continue;
732 }
733 bool DontMoveAcrossStore = true;
734 if (!MI.isSafeToMove(SawStore&: DontMoveAcrossStore)) {
735 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
736 continue;
737 }
738 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
739 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
740 continue;
741 }
742 if (MI.isConvergent())
743 continue;
744
745 const MachineOperand &MO = MI.getOperand(i: 0);
746 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
747 continue;
748 if (!MRI->hasOneDef(RegNo: MO.getReg()))
749 continue;
750
751 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
752 Candidates.push_back(Elt: &MI);
753 }
754}
755
756PreservedAnalyses
757MachineSinkingPass::run(MachineFunction &MF,
758 MachineFunctionAnalysisManager &MFAM) {
759 auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
760 auto *PDT = &MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF);
761 auto *CI = &MFAM.getResult<MachineCycleAnalysis>(IR&: MF);
762 auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF)
763 .getCachedResult<ProfileSummaryAnalysis>(
764 IR&: *MF.getFunction().getParent());
765 auto *MBFI = UseBlockFreqInfo
766 ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF)
767 : nullptr;
768 auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF);
769 auto *AA = &MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
770 .getManager()
771 .getResult<AAManager>(IR&: MF.getFunction());
772 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
773 auto *SI = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
774 auto *LV = MFAM.getCachedResult<LiveVariablesAnalysis>(IR&: MF);
775 auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(IR&: MF);
776 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
777 MBFI, MBPI, AA);
778 bool Changed = Impl.run(MF);
779 if (!Changed)
780 return PreservedAnalyses::all();
781 auto PA = getMachineFunctionPassPreservedAnalyses();
782 PA.preserve<MachineCycleAnalysis>();
783 PA.preserve<MachineLoopAnalysis>();
784 if (UseBlockFreqInfo)
785 PA.preserve<MachineBlockFrequencyAnalysis>();
786 return PA;
787}
788
789void MachineSinkingPass::printPipeline(
790 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
791 OS << MapClassName2PassName(name()); // ideally machine-sink
792 if (EnableSinkAndFold)
793 OS << "<enable-sink-fold>";
794}
795
796bool MachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
797 if (skipFunction(F: MF.getFunction()))
798 return false;
799
800 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
801 bool EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
802
803 auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
804 auto *PDT =
805 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
806 auto *CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
807 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
808 auto *MBFI =
809 UseBlockFreqInfo
810 ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
811 : nullptr;
812 auto *MBPI =
813 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
814 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
815 // Get analyses for split critical edge.
816 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
817 auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
818 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
819 auto *SI = SIWrapper ? &SIWrapper->getSI() : nullptr;
820 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
821 auto *LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
822 auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
823 auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
824
825 MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI,
826 MBFI, MBPI, AA);
827 return Impl.run(MF);
828}
829
830bool MachineSinking::run(MachineFunction &MF) {
831 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
832
833 STI = &MF.getSubtarget();
834 TII = STI->getInstrInfo();
835 TRI = STI->getRegisterInfo();
836 MRI = &MF.getRegInfo();
837
838 RegClassInfo.runOnMachineFunction(MF);
839
840 bool EverMadeChange = false;
841
842 while (true) {
843 bool MadeChange = false;
844
845 // Process all basic blocks.
846 CEBCandidates.clear();
847 CEMergeCandidates.clear();
848 ToSplit.clear();
849 for (auto &MBB : MF)
850 MadeChange |= ProcessBlock(MBB);
851
852 // If we have anything we marked as toSplit, split it now.
853 MachineDomTreeUpdater MDTU(DT, PDT,
854 MachineDomTreeUpdater::UpdateStrategy::Lazy);
855 for (const auto &Pair : ToSplit) {
856 auto NewSucc = Pair.first->SplitCriticalEdge(
857 Succ: Pair.second, Analyses: {.LIS: LIS, .SI: SI, .LV: LV, .MLI: MLI}, LiveInSets: nullptr, MDTU: &MDTU);
858 if (NewSucc != nullptr) {
859 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
860 << printMBBReference(*Pair.first) << " -- "
861 << printMBBReference(*NewSucc) << " -- "
862 << printMBBReference(*Pair.second) << '\n');
863 if (MBFI)
864 MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI);
865
866 MadeChange = true;
867 ++NumSplit;
868 CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc);
869 } else
870 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
871 }
872 // If this iteration over the code changed anything, keep iterating.
873 if (!MadeChange)
874 break;
875 EverMadeChange = true;
876 }
877
878 if (SinkInstsIntoCycle) {
879 SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
880 SchedModel.init(TSInfo: STI);
881 bool HasHighPressure;
882
883 DenseMap<SinkItem, MachineInstr *> SunkInstrs;
884
885 enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
886 for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
887 ++Stage, SunkInstrs.clear()) {
888 HasHighPressure = false;
889
890 for (auto *Cycle : Cycles) {
891 MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
892 if (!Preheader) {
893 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
894 continue;
895 }
896 SmallVector<MachineInstr *, 8> Candidates;
897 FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates);
898
899 unsigned i = 0;
900
901 // Walk the candidates in reverse order so that we start with the use
902 // of a def-use chain, if there is any.
903 // TODO: Sort the candidates using a cost-model.
904 for (MachineInstr *I : llvm::reverse(C&: Candidates)) {
905 // CycleSinkStage::COPY: Sink a limited number of copies
906 if (Stage == CycleSinkStage::COPY) {
907 if (i++ == SinkIntoCycleLimit) {
908 LLVM_DEBUG(dbgs()
909 << "CycleSink: Limit reached of instructions to "
910 "be analyzed.");
911 break;
912 }
913
914 if (!I->isCopy())
915 continue;
916 }
917
918 // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
919 // which the target specifies as low-latency
920 if (Stage == CycleSinkStage::LOW_LATENCY &&
921 !TII->hasLowDefLatency(SchedModel, DefMI: *I, DefIdx: 0))
922 continue;
923
924 if (!aggressivelySinkIntoCycle(Cycle, I&: *I, SunkInstrs))
925 continue;
926 EverMadeChange = true;
927 ++NumCycleSunk;
928 }
929
930 // Recalculate the pressure after sinking
931 if (!HasHighPressure)
932 HasHighPressure = registerPressureExceedsLimit(MBB: *Preheader);
933 }
934 if (!HasHighPressure)
935 break;
936 }
937 }
938
939 HasStoreCache.clear();
940 StoreInstrCache.clear();
941
942 // Now clear any kill flags for recorded registers.
943 for (auto I : RegsToClearKillFlags)
944 MRI->clearKillFlags(Reg: I);
945 RegsToClearKillFlags.clear();
946
947 releaseMemory();
948 return EverMadeChange;
949}
950
951bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
952 if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
953 return false;
954
955 // Don't bother sinking code out of unreachable blocks. In addition to being
956 // unprofitable, it can also lead to infinite looping, because in an
957 // unreachable cycle there may be nowhere to stop.
958 if (!DT->isReachableFromEntry(A: &MBB))
959 return false;
960
961 bool MadeChange = false;
962
963 // Cache all successors, sorted by frequency info and cycle depth.
964 AllSuccsCache AllSuccessors;
965
966 // Walk the basic block bottom-up. Remember if we saw a store.
967 MachineBasicBlock::iterator I = MBB.end();
968 --I;
969 bool ProcessedBegin, SawStore = false;
970 do {
971 MachineInstr &MI = *I; // The instruction to sink.
972
973 // Predecrement I (if it's not begin) so that it isn't invalidated by
974 // sinking.
975 ProcessedBegin = I == MBB.begin();
976 if (!ProcessedBegin)
977 --I;
978
979 if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) {
980 if (MI.isDebugValue())
981 ProcessDbgInst(MI);
982 continue;
983 }
984
985 if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) {
986 MadeChange = true;
987 continue;
988 }
989
990 // Can't sink anything out of a block that has less than two successors.
991 if (MBB.succ_size() <= 1)
992 continue;
993
994 if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) {
995 MadeChange = true;
996 continue;
997 }
998
999 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
1000 ++NumSunk;
1001 MadeChange = true;
1002 }
1003
1004 // If we just processed the first instruction in the block, we're done.
1005 } while (!ProcessedBegin);
1006
1007 SeenDbgUsers.clear();
1008 SeenDbgVars.clear();
1009 // recalculate the bb register pressure after sinking one BB.
1010 CachedRegisterPressure.clear();
1011 return MadeChange;
1012}
1013
1014void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
1015 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
1016 // we know what to sink if the vreg def sinks.
1017 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
1018
1019 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
1020 MI.getDebugLoc()->getInlinedAt());
1021 bool SeenBefore = SeenDbgVars.contains(V: Var);
1022
1023 for (MachineOperand &MO : MI.debug_operands()) {
1024 if (MO.isReg() && MO.getReg().isVirtual())
1025 SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore));
1026 }
1027
1028 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
1029 SeenDbgVars.insert(V: Var);
1030}
1031
1032bool MachineSinking::isWorthBreakingCriticalEdge(
1033 MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
1034 MachineBasicBlock *&DeferredFromBlock) {
1035 // FIXME: Need much better heuristics.
1036
1037 // If the pass has already considered breaking this edge (during this pass
1038 // through the function), then let's go ahead and break it. This means
1039 // sinking multiple "cheap" instructions into the same block.
1040 if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second)
1041 return true;
1042
1043 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
1044 return true;
1045
1046 // Check and record the register and the destination block we want to sink
1047 // into. Note that we want to do the following before the next check on branch
1048 // probability. Because we want to record the initial candidate even if it's
1049 // on hot edge, so that other candidates that might not on hot edges can be
1050 // sinked as well.
1051 for (const auto &MO : MI.all_defs()) {
1052 Register Reg = MO.getReg();
1053 if (!Reg)
1054 continue;
1055 Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg;
1056 auto Key = std::make_pair(x&: SrcReg, y&: To);
1057 auto Res = CEMergeCandidates.try_emplace(Key, Args&: From);
1058 // We wanted to sink the same register into the same block, consider it to
1059 // be profitable.
1060 if (!Res.second) {
1061 // Return the source block that was previously held off.
1062 DeferredFromBlock = Res.first->second;
1063 return true;
1064 }
1065 }
1066
1067 if (From->isSuccessor(MBB: To) &&
1068 MBPI->getEdgeProbability(Src: From, Dst: To) <=
1069 BranchProbability(SplitEdgeProbabilityThreshold, 100))
1070 return true;
1071
1072 // MI is cheap, we probably don't want to break the critical edge for it.
1073 // However, if this would allow some definitions of its source operands
1074 // to be sunk then it's probably worth it.
1075 for (const MachineOperand &MO : MI.all_uses()) {
1076 Register Reg = MO.getReg();
1077 if (Reg == 0)
1078 continue;
1079
1080 // We don't move live definitions of physical registers,
1081 // so sinking their uses won't enable any opportunities.
1082 if (Reg.isPhysical())
1083 continue;
1084
1085 // If this instruction is the only user of a virtual register,
1086 // check if breaking the edge will enable sinking
1087 // both this instruction and the defining instruction.
1088 if (MRI->hasOneNonDBGUse(RegNo: Reg)) {
1089 // If the definition resides in same MBB,
1090 // claim it's likely we can sink these together.
1091 // If definition resides elsewhere, we aren't
1092 // blocking it from being sunk so don't break the edge.
1093 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1094 if (DefMI->getParent() == MI.getParent())
1095 return true;
1096 }
1097 }
1098
1099 // Let the target decide if it's worth breaking this
1100 // critical edge for a "cheap" instruction.
1101 return TII->shouldBreakCriticalEdgeToSink(MI);
1102}
1103
1104bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
1105 MachineBasicBlock *FromBB,
1106 MachineBasicBlock *ToBB,
1107 bool BreakPHIEdge) {
1108 // Avoid breaking back edge. From == To means backedge for single BB cycle.
1109 if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB))
1110 return false;
1111
1112 MachineCycle *FromCycle = CI->getCycle(Block: FromBB);
1113 MachineCycle *ToCycle = CI->getCycle(Block: ToBB);
1114
1115 // Check for backedges of more "complex" cycles.
1116 if (FromCycle == ToCycle && FromCycle &&
1117 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
1118 return false;
1119
1120 // It's not always legal to break critical edges and sink the computation
1121 // to the edge.
1122 //
1123 // %bb.1:
1124 // v1024
1125 // Beq %bb.3
1126 // <fallthrough>
1127 // %bb.2:
1128 // ... no uses of v1024
1129 // <fallthrough>
1130 // %bb.3:
1131 // ...
1132 // = v1024
1133 //
1134 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
1135 //
1136 // %bb.1:
1137 // ...
1138 // Bne %bb.2
1139 // %bb.4:
1140 // v1024 =
1141 // B %bb.3
1142 // %bb.2:
1143 // ... no uses of v1024
1144 // <fallthrough>
1145 // %bb.3:
1146 // ...
1147 // = v1024
1148 //
1149 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
1150 // flow. We need to ensure the new basic block where the computation is
1151 // sunk to dominates all the uses.
1152 // It's only legal to break critical edge and sink the computation to the
1153 // new block if all the predecessors of "To", except for "From", are
1154 // not dominated by "From". Given SSA property, this means these
1155 // predecessors are dominated by "To".
1156 //
1157 // There is no need to do this check if all the uses are PHI nodes. PHI
1158 // sources are only defined on the specific predecessor edges.
1159 if (!BreakPHIEdge) {
1160 for (MachineBasicBlock *Pred : ToBB->predecessors())
1161 if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred))
1162 return false;
1163 }
1164
1165 return true;
1166}
1167
1168bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1169 MachineBasicBlock *FromBB,
1170 MachineBasicBlock *ToBB,
1171 bool BreakPHIEdge) {
1172 bool Status = false;
1173 MachineBasicBlock *DeferredFromBB = nullptr;
1174 if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) {
1175 // If there is a DeferredFromBB, we consider FromBB only if _both_
1176 // of them are legal to split.
1177 if ((!DeferredFromBB ||
1178 ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) ||
1179 isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) &&
1180 isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
1181 ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB));
1182 if (DeferredFromBB)
1183 ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB));
1184 Status = true;
1185 }
1186 }
1187
1188 return Status;
1189}
1190
1191std::vector<unsigned> &
1192MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
1193 bool UseCache) {
1194 // Currently to save compiling time, MBB's register pressure will not change
1195 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
1196 // register pressure is changed after sinking any instructions into it.
1197 // FIXME: need a accurate and cheap register pressure estiminate model here.
1198
1199 auto RP = CachedRegisterPressure.find(Val: &MBB);
1200 if (UseCache && RP != CachedRegisterPressure.end())
1201 return RP->second;
1202
1203 RegionPressure Pressure;
1204 RegPressureTracker RPTracker(Pressure);
1205
1206 // Initialize the register pressure tracker.
1207 RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(),
1208 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
1209
1210 for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
1211 MIE = MBB.instr_begin();
1212 MII != MIE; --MII) {
1213 const MachineInstr &MI = *std::prev(x: MII);
1214 if (MI.isDebugOrPseudoInstr())
1215 continue;
1216 RegisterOperands RegOpers;
1217 RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false);
1218 RPTracker.recedeSkipDebugValues();
1219 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
1220 RPTracker.recede(RegOpers);
1221 }
1222
1223 RPTracker.closeRegion();
1224
1225 if (RP != CachedRegisterPressure.end()) {
1226 CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
1227 return CachedRegisterPressure[&MBB];
1228 }
1229
1230 auto It = CachedRegisterPressure.insert(
1231 KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure));
1232 return It.first->second;
1233}
1234
1235bool MachineSinking::registerPressureSetExceedsLimit(
1236 unsigned NRegs, const TargetRegisterClass *RC,
1237 const MachineBasicBlock &MBB) {
1238 unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
1239 const int *PS = TRI->getRegClassPressureSets(RC);
1240 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
1241 for (; *PS != -1; PS++)
1242 if (Weight + BBRegisterPressure[*PS] >=
1243 RegClassInfo.getRegPressureSetLimit(Idx: *PS))
1244 return true;
1245 return false;
1246}
1247
1248// Recalculate RP and check if any pressure set exceeds the set limit.
1249bool MachineSinking::registerPressureExceedsLimit(
1250 const MachineBasicBlock &MBB) {
1251 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, UseCache: false);
1252
1253 for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
1254 if (BBRegisterPressure[PS] >=
1255 TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: PS)) {
1256 return true;
1257 }
1258 }
1259
1260 return false;
1261}
1262
1263/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
1264bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
1265 MachineBasicBlock *MBB,
1266 MachineBasicBlock *SuccToSinkTo,
1267 AllSuccsCache &AllSuccessors) {
1268 assert(SuccToSinkTo && "Invalid SinkTo Candidate BB");
1269
1270 if (MBB == SuccToSinkTo)
1271 return false;
1272
1273 // It is profitable if SuccToSinkTo does not post dominate current block.
1274 if (!PDT->dominates(A: SuccToSinkTo, B: MBB))
1275 return true;
1276
1277 // It is profitable to sink an instruction from a deeper cycle to a shallower
1278 // cycle, even if the latter post-dominates the former (PR21115).
1279 if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo))
1280 return true;
1281
1282 // Check if only use in post dominated block is PHI instruction.
1283 bool NonPHIUse = false;
1284 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
1285 MachineBasicBlock *UseBlock = UseInst.getParent();
1286 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
1287 NonPHIUse = true;
1288 }
1289 if (!NonPHIUse)
1290 return true;
1291
1292 // If SuccToSinkTo post dominates then also it may be profitable if MI
1293 // can further profitably sinked into another block in next round.
1294 bool BreakPHIEdge = false;
1295 // FIXME - If finding successor is compile time expensive then cache results.
1296 if (MachineBasicBlock *MBB2 =
1297 FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors))
1298 return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors);
1299
1300 MachineCycle *MCycle = CI->getCycle(Block: MBB);
1301
1302 // If the instruction is not inside a cycle, it is not profitable to sink MI
1303 // to a post dominate block SuccToSinkTo.
1304 if (!MCycle)
1305 return false;
1306
1307 // If this instruction is inside a Cycle and sinking this instruction can make
1308 // more registers live range shorten, it is still prifitable.
1309 for (const MachineOperand &MO : MI.operands()) {
1310 // Ignore non-register operands.
1311 if (!MO.isReg())
1312 continue;
1313 Register Reg = MO.getReg();
1314 if (Reg == 0)
1315 continue;
1316
1317 if (Reg.isPhysical()) {
1318 // Don't handle non-constant and non-ignorable physical register uses.
1319 if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) &&
1320 !TII->isIgnorableUse(MO))
1321 return false;
1322 continue;
1323 }
1324
1325 // Users for the defs are all dominated by SuccToSinkTo.
1326 if (MO.isDef()) {
1327 // This def register's live range is shortened after sinking.
1328 bool LocalUse = false;
1329 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1330 LocalUse))
1331 return false;
1332 } else {
1333 MachineInstr *DefMI = MRI->getVRegDef(Reg);
1334 if (!DefMI)
1335 continue;
1336 MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent());
1337 // DefMI is defined outside of cycle. There should be no live range
1338 // impact for this operand. Defination outside of cycle means:
1339 // 1: defination is outside of cycle.
1340 // 2: defination is in this cycle, but it is a PHI in the cycle header.
1341 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
1342 Cycle->getHeader() == DefMI->getParent()))
1343 continue;
1344 // The DefMI is defined inside the cycle.
1345 // If sinking this operand makes some register pressure set exceed limit,
1346 // it is not profitable.
1347 if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg),
1348 MBB: *SuccToSinkTo)) {
1349 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
1350 return false;
1351 }
1352 }
1353 }
1354
1355 // If MI is in cycle and all its operands are alive across the whole cycle or
1356 // if no operand sinking make register pressure set exceed limit, it is
1357 // profitable to sink MI.
1358 return true;
1359}
1360
1361/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
1362/// computing it if it was not already cached.
1363SmallVector<MachineBasicBlock *, 4> &
1364MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
1365 AllSuccsCache &AllSuccessors) const {
1366 // Do we have the sorted successors in cache ?
1367 auto Succs = AllSuccessors.find(Val: MBB);
1368 if (Succs != AllSuccessors.end())
1369 return Succs->second;
1370
1371 SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
1372
1373 // Handle cases where sinking can happen but where the sink point isn't a
1374 // successor. For example:
1375 //
1376 // x = computation
1377 // if () {} else {}
1378 // use x
1379 //
1380 for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) {
1381 // DomTree children of MBB that have MBB as immediate dominator are added.
1382 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
1383 // Skip MBBs already added to the AllSuccs vector above.
1384 !MBB->isSuccessor(MBB: DTChild->getBlock()))
1385 AllSuccs.push_back(Elt: DTChild->getBlock());
1386 }
1387
1388 // Sort Successors according to their cycle depth or block frequency info.
1389 llvm::stable_sort(
1390 Range&: AllSuccs, C: [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
1391 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0;
1392 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0;
1393 if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) ||
1394 (!LHSFreq && !RHSFreq))
1395 return CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R);
1396 return LHSFreq < RHSFreq;
1397 });
1398
1399 auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs));
1400
1401 return it.first->second;
1402}
1403
1404/// FindSuccToSinkTo - Find a successor to sink this instruction to.
1405MachineBasicBlock *
1406MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
1407 bool &BreakPHIEdge,
1408 AllSuccsCache &AllSuccessors) {
1409 assert(MBB && "Invalid MachineBasicBlock!");
1410
1411 // loop over all the operands of the specified instruction. If there is
1412 // anything we can't handle, bail out.
1413
1414 // SuccToSinkTo - This is the successor to sink this instruction to, once we
1415 // decide.
1416 MachineBasicBlock *SuccToSinkTo = nullptr;
1417 for (const MachineOperand &MO : MI.operands()) {
1418 if (!MO.isReg())
1419 continue; // Ignore non-register operands.
1420
1421 Register Reg = MO.getReg();
1422 if (Reg == 0)
1423 continue;
1424
1425 if (Reg.isPhysical()) {
1426 if (MO.isUse()) {
1427 // If the physreg has no defs anywhere, it's just an ambient register
1428 // and we can freely move its uses. Alternatively, if it's allocatable,
1429 // it could get allocated to something with a def during allocation.
1430 if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO))
1431 return nullptr;
1432 } else if (!MO.isDead()) {
1433 // A def that isn't dead. We can't move it.
1434 return nullptr;
1435 }
1436 } else {
1437 // Virtual register uses are always safe to sink.
1438 if (MO.isUse())
1439 continue;
1440
1441 // If it's not safe to move defs of the register class, then abort.
1442 if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg)))
1443 return nullptr;
1444
1445 // Virtual register defs can only be sunk if all their uses are in blocks
1446 // dominated by one of the successors.
1447 if (SuccToSinkTo) {
1448 // If a previous operand picked a block to sink to, then this operand
1449 // must be sinkable to the same block.
1450 bool LocalUse = false;
1451 if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge,
1452 LocalUse))
1453 return nullptr;
1454
1455 continue;
1456 }
1457
1458 // Otherwise, we should look at all the successors and decide which one
1459 // we should sink to. If we have reliable block frequency information
1460 // (frequency != 0) available, give successors with smaller frequencies
1461 // higher priority, otherwise prioritize smaller cycle depths.
1462 for (MachineBasicBlock *SuccBlock :
1463 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
1464 bool LocalUse = false;
1465 if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB, BreakPHIEdge,
1466 LocalUse)) {
1467 SuccToSinkTo = SuccBlock;
1468 break;
1469 }
1470 if (LocalUse)
1471 // Def is used locally, it's never safe to move this def.
1472 return nullptr;
1473 }
1474
1475 // If we couldn't find a block to sink to, ignore this instruction.
1476 if (!SuccToSinkTo)
1477 return nullptr;
1478 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
1479 return nullptr;
1480 }
1481 }
1482
1483 // It is not possible to sink an instruction into its own block. This can
1484 // happen with cycles.
1485 if (MBB == SuccToSinkTo)
1486 return nullptr;
1487
1488 // It's not safe to sink instructions to EH landing pad. Control flow into
1489 // landing pad is implicitly defined.
1490 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
1491 return nullptr;
1492
1493 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
1494 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
1495 // the source block (which this code does not yet do). So for now, forbid
1496 // doing so.
1497 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
1498 return nullptr;
1499
1500 if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI))
1501 return nullptr;
1502
1503 return SuccToSinkTo;
1504}
1505
1506/// Return true if MI is likely to be usable as a memory operation by the
1507/// implicit null check optimization.
1508///
1509/// This is a "best effort" heuristic, and should not be relied upon for
1510/// correctness. This returning true does not guarantee that the implicit null
1511/// check optimization is legal over MI, and this returning false does not
1512/// guarantee MI cannot possibly be used to do a null check.
1513static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
1514 const TargetInstrInfo *TII,
1515 const TargetRegisterInfo *TRI) {
1516 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1517
1518 auto *MBB = MI.getParent();
1519 if (MBB->pred_size() != 1)
1520 return false;
1521
1522 auto *PredMBB = *MBB->pred_begin();
1523 auto *PredBB = PredMBB->getBasicBlock();
1524
1525 // Frontends that don't use implicit null checks have no reason to emit
1526 // branches with make.implicit metadata, and this function should always
1527 // return false for them.
1528 if (!PredBB ||
1529 !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit))
1530 return false;
1531
1532 const MachineOperand *BaseOp;
1533 int64_t Offset;
1534 bool OffsetIsScalable;
1535 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1536 return false;
1537
1538 if (!BaseOp->isReg())
1539 return false;
1540
1541 if (!(MI.mayLoad() && !MI.isPredicable()))
1542 return false;
1543
1544 MachineBranchPredicate MBP;
1545 if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false))
1546 return false;
1547
1548 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1549 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1550 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1551 MBP.LHS.getReg() == BaseOp->getReg();
1552}
1553
1554/// If the sunk instruction is a copy, try to forward the copy instead of
1555/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1556/// there's any subregister weirdness involved. Returns true if copy
1557/// propagation occurred.
1558static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1559 Register Reg) {
1560 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1561 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1562
1563 // Copy DBG_VALUE operand and set the original to undef. We then check to
1564 // see whether this is something that can be copy-forwarded. If it isn't,
1565 // continue around the loop.
1566
1567 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1568 auto CopyOperands = TII.isCopyInstr(MI: SinkInst);
1569 if (!CopyOperands)
1570 return false;
1571 SrcMO = CopyOperands->Source;
1572 DstMO = CopyOperands->Destination;
1573
1574 // Check validity of forwarding this copy.
1575 bool PostRA = MRI.getNumVirtRegs() == 0;
1576
1577 // Trying to forward between physical and virtual registers is too hard.
1578 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1579 return false;
1580
1581 // Only try virtual register copy-forwarding before regalloc, and physical
1582 // register copy-forwarding after regalloc.
1583 bool arePhysRegs = !Reg.isVirtual();
1584 if (arePhysRegs != PostRA)
1585 return false;
1586
1587 // Pre-regalloc, only forward if all subregisters agree (or there are no
1588 // subregs at all). More analysis might recover some forwardable copies.
1589 if (!PostRA)
1590 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1591 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1592 DbgMO.getSubReg() != DstMO->getSubReg())
1593 return false;
1594
1595 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1596 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1597 // matches the copy destination.
1598 if (PostRA && Reg != DstMO->getReg())
1599 return false;
1600
1601 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1602 DbgMO.setReg(SrcMO->getReg());
1603 DbgMO.setSubReg(SrcMO->getSubReg());
1604 }
1605 return true;
1606}
1607
1608using MIRegs = std::pair<MachineInstr *, SmallVector<Register, 2>>;
1609/// Sink an instruction and its associated debug instructions.
1610static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1611 MachineBasicBlock::iterator InsertPos,
1612 ArrayRef<MIRegs> DbgValuesToSink) {
1613 // If we cannot find a location to use (merge with), then we erase the debug
1614 // location to prevent debug-info driven tools from potentially reporting
1615 // wrong location information.
1616 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1617 MI.setDebugLoc(DebugLoc::getMergedLocation(LocA: MI.getDebugLoc(),
1618 LocB: InsertPos->getDebugLoc()));
1619 else
1620 MI.setDebugLoc(DebugLoc());
1621
1622 // Move the instruction.
1623 MachineBasicBlock *ParentBlock = MI.getParent();
1624 SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI,
1625 To: ++MachineBasicBlock::iterator(MI));
1626
1627 // Sink a copy of debug users to the insert position. Mark the original
1628 // DBG_VALUE location as 'undef', indicating that any earlier variable
1629 // location should be terminated as we've optimised away the value at this
1630 // point.
1631 for (const auto &DbgValueToSink : DbgValuesToSink) {
1632 MachineInstr *DbgMI = DbgValueToSink.first;
1633 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI);
1634 SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI);
1635
1636 bool PropagatedAllSunkOps = true;
1637 for (Register Reg : DbgValueToSink.second) {
1638 if (DbgMI->hasDebugOperandForReg(Reg)) {
1639 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) {
1640 PropagatedAllSunkOps = false;
1641 break;
1642 }
1643 }
1644 }
1645 if (!PropagatedAllSunkOps)
1646 DbgMI->setDebugValueUndef();
1647 }
1648}
1649
1650/// hasStoreBetween - check if there is store betweeen straight line blocks From
1651/// and To.
1652bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1653 MachineBasicBlock *To, MachineInstr &MI) {
1654 // Make sure From and To are in straight line which means From dominates To
1655 // and To post dominates From.
1656 if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From))
1657 return true;
1658
1659 auto BlockPair = std::make_pair(x&: From, y&: To);
1660
1661 // Does these two blocks pair be queried before and have a definite cached
1662 // result?
1663 if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end())
1664 return It->second;
1665
1666 if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end())
1667 return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) {
1668 return I->mayAlias(AA, Other: MI, UseTBAA: false);
1669 });
1670
1671 bool SawStore = false;
1672 bool HasAliasedStore = false;
1673 DenseSet<MachineBasicBlock *> HandledBlocks;
1674 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1675 // Go through all reachable blocks from From.
1676 for (MachineBasicBlock *BB : depth_first(G: From)) {
1677 // We insert the instruction at the start of block To, so no need to worry
1678 // about stores inside To.
1679 // Store in block From should be already considered when just enter function
1680 // SinkInstruction.
1681 if (BB == To || BB == From)
1682 continue;
1683
1684 // We already handle this BB in previous iteration.
1685 if (HandledBlocks.count(V: BB))
1686 continue;
1687
1688 HandledBlocks.insert(V: BB);
1689 // To post dominates BB, it must be a path from block From.
1690 if (PDT->dominates(A: To, B: BB)) {
1691 if (!HandledDomBlocks.count(V: BB))
1692 HandledDomBlocks.insert(V: BB);
1693
1694 // If this BB is too big or the block number in straight line between From
1695 // and To is too big, stop searching to save compiling time.
1696 if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) ||
1697 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1698 for (auto *DomBB : HandledDomBlocks) {
1699 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1700 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1701 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1702 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1703 }
1704 HasStoreCache[BlockPair] = true;
1705 return true;
1706 }
1707
1708 for (MachineInstr &I : *BB) {
1709 // Treat as alias conservatively for a call or an ordered memory
1710 // operation.
1711 if (I.isCall() || I.hasOrderedMemoryRef()) {
1712 for (auto *DomBB : HandledDomBlocks) {
1713 if (DomBB != BB && DT->dominates(A: DomBB, B: BB))
1714 HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true;
1715 else if (DomBB != BB && DT->dominates(A: BB, B: DomBB))
1716 HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true;
1717 }
1718 HasStoreCache[BlockPair] = true;
1719 return true;
1720 }
1721
1722 if (I.mayStore()) {
1723 SawStore = true;
1724 // We still have chance to sink MI if all stores between are not
1725 // aliased to MI.
1726 // Cache all store instructions, so that we don't need to go through
1727 // all From reachable blocks for next load instruction.
1728 if (I.mayAlias(AA, Other: MI, UseTBAA: false))
1729 HasAliasedStore = true;
1730 StoreInstrCache[BlockPair].push_back(Elt: &I);
1731 }
1732 }
1733 }
1734 }
1735 // If there is no store at all, cache the result.
1736 if (!SawStore)
1737 HasStoreCache[BlockPair] = false;
1738 return HasAliasedStore;
1739}
1740
1741/// Aggressively sink instructions into cycles. This will aggressively try to
1742/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
1743/// In particular, it will sink into multiple successor blocks without limits
1744/// based on the amount of sinking, or the type of ops being sunk (so long as
1745/// they are safe to sink).
1746bool MachineSinking::aggressivelySinkIntoCycle(
1747 MachineCycle *Cycle, MachineInstr &I,
1748 DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1749 // TODO: support instructions with multiple defs
1750 if (I.getNumDefs() > 1)
1751 return false;
1752
1753 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
1754 assert(Cycle->getCyclePreheader() && "Cycle sink needs a preheader block");
1755 SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
1756
1757 MachineOperand &DefMO = I.getOperand(i: 0);
1758 for (MachineInstr &MI : MRI->use_instructions(Reg: DefMO.getReg())) {
1759 Uses.push_back(Elt: {{DefMO.getReg(), DefMO.getSubReg()}, &MI});
1760 }
1761
1762 for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1763 MachineInstr *MI = Entry.second;
1764 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
1765 if (MI->isPHI()) {
1766 LLVM_DEBUG(
1767 dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
1768 continue;
1769 }
1770 // We cannot sink before the prologue
1771 if (MI->isPosition() || TII->isBasicBlockPrologue(MI: *MI)) {
1772 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
1773 "can't sink.\n");
1774 continue;
1775 }
1776 if (!Cycle->contains(Block: MI->getParent())) {
1777 LLVM_DEBUG(
1778 dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
1779 continue;
1780 }
1781
1782 MachineBasicBlock *SinkBlock = MI->getParent();
1783 MachineInstr *NewMI = nullptr;
1784 SinkItem MapEntry(&I, SinkBlock);
1785
1786 auto SI = SunkInstrs.find(Val: MapEntry);
1787
1788 // Check for the case in which we have already sunk a copy of this
1789 // instruction into the user block.
1790 if (SI != SunkInstrs.end()) {
1791 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
1792 << printMBBReference(*SinkBlock) << "\n");
1793 NewMI = SI->second;
1794 }
1795
1796 // Create a copy of the instruction in the use block.
1797 if (!NewMI) {
1798 LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
1799 << printMBBReference(*SinkBlock) << "\n");
1800
1801 NewMI = I.getMF()->CloneMachineInstr(Orig: &I);
1802 if (DefMO.getReg().isVirtual()) {
1803 const TargetRegisterClass *TRC = MRI->getRegClass(Reg: DefMO.getReg());
1804 Register DestReg = MRI->createVirtualRegister(RegClass: TRC);
1805 NewMI->substituteRegister(FromReg: DefMO.getReg(), ToReg: DestReg, SubIdx: DefMO.getSubReg(),
1806 RegInfo: *TRI);
1807 }
1808 SinkBlock->insert(I: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()),
1809 MI: NewMI);
1810 SunkInstrs.insert(KV: {MapEntry, NewMI});
1811 }
1812
1813 // Conservatively clear any kill flags on uses of sunk instruction
1814 for (MachineOperand &MO : NewMI->all_uses()) {
1815 assert(MO.isReg() && MO.isUse());
1816 RegsToClearKillFlags.insert(V: MO.getReg());
1817 }
1818
1819 // The instruction is moved from its basic block, so do not retain the
1820 // debug information.
1821 assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
1822 NewMI->setDebugLoc(DebugLoc());
1823
1824 // Replace the use with the newly created virtual register.
1825 RegSubRegPair &UseReg = Entry.first;
1826 MI->substituteRegister(FromReg: UseReg.Reg, ToReg: NewMI->getOperand(i: 0).getReg(),
1827 SubIdx: UseReg.SubReg, RegInfo: *TRI);
1828 }
1829 // If we have replaced all uses, then delete the dead instruction
1830 if (I.isDead(MRI: *MRI))
1831 I.eraseFromParent();
1832 return true;
1833}
1834
1835/// SinkInstruction - Determine whether it is safe to sink the specified machine
1836/// instruction out of its current block into a successor.
1837bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1838 AllSuccsCache &AllSuccessors) {
1839 // Don't sink instructions that the target prefers not to sink.
1840 if (!TII->shouldSink(MI))
1841 return false;
1842
1843 // Check if it's safe to move the instruction.
1844 if (!MI.isSafeToMove(SawStore))
1845 return false;
1846
1847 // Convergent operations may not be made control-dependent on additional
1848 // values.
1849 if (MI.isConvergent())
1850 return false;
1851
1852 // Don't break implicit null checks. This is a performance heuristic, and not
1853 // required for correctness.
1854 if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
1855 return false;
1856
1857 // FIXME: This should include support for sinking instructions within the
1858 // block they are currently in to shorten the live ranges. We often get
1859 // instructions sunk into the top of a large block, but it would be better to
1860 // also sink them down before their first use in the block. This xform has to
1861 // be careful not to *increase* register pressure though, e.g. sinking
1862 // "x = y + z" down if it kills y and z would increase the live ranges of y
1863 // and z and only shrink the live range of x.
1864
1865 bool BreakPHIEdge = false;
1866 MachineBasicBlock *ParentBlock = MI.getParent();
1867 MachineBasicBlock *SuccToSinkTo =
1868 FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors);
1869
1870 // If there are no outputs, it must have side-effects.
1871 if (!SuccToSinkTo)
1872 return false;
1873
1874 // If the instruction to move defines a dead physical register which is live
1875 // when leaving the basic block, don't move it because it could turn into a
1876 // "zombie" define of that preg. E.g., EFLAGS.
1877 for (const MachineOperand &MO : MI.all_defs()) {
1878 Register Reg = MO.getReg();
1879 if (Reg == 0 || !Reg.isPhysical())
1880 continue;
1881 if (SuccToSinkTo->isLiveIn(Reg))
1882 return false;
1883 }
1884
1885 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1886
1887 // If the block has multiple predecessors, this is a critical edge.
1888 // Decide if we can sink along it or need to break the edge.
1889 if (SuccToSinkTo->pred_size() > 1) {
1890 // We cannot sink a load across a critical edge - there may be stores in
1891 // other code paths.
1892 bool TryBreak = false;
1893 bool Store =
1894 MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true;
1895 if (!MI.isSafeToMove(SawStore&: Store)) {
1896 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1897 TryBreak = true;
1898 }
1899
1900 // We don't want to sink across a critical edge if we don't dominate the
1901 // successor. We could be introducing calculations to new code paths.
1902 if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) {
1903 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1904 TryBreak = true;
1905 }
1906
1907 // Don't sink instructions into a cycle.
1908 if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) &&
1909 (!CI->getCycle(Block: SuccToSinkTo)->isReducible() ||
1910 CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1911 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1912 TryBreak = true;
1913 }
1914
1915 // Otherwise we are OK with sinking along a critical edge.
1916 if (!TryBreak)
1917 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1918 else {
1919 // Mark this edge as to be split.
1920 // If the edge can actually be split, the next iteration of the main loop
1921 // will sink MI in the newly created block.
1922 bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo,
1923 BreakPHIEdge);
1924 if (!Status)
1925 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1926 "break critical edge\n");
1927 // The instruction will not be sunk this time.
1928 return false;
1929 }
1930 }
1931
1932 if (BreakPHIEdge) {
1933 // BreakPHIEdge is true if all the uses are in the successor MBB being
1934 // sunken into and they are all PHI nodes. In this case, machine-sink must
1935 // break the critical edge first.
1936 bool Status =
1937 PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge);
1938 if (!Status)
1939 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1940 "break critical edge\n");
1941 // The instruction will not be sunk this time.
1942 return false;
1943 }
1944
1945 // Determine where to insert into. Skip phi nodes.
1946 MachineBasicBlock::iterator InsertPos =
1947 SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin());
1948 if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) {
1949 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1950 return false;
1951 }
1952
1953 // Collect debug users of any vreg that this inst defines.
1954 SmallVector<MIRegs, 4> DbgUsersToSink;
1955 for (auto &MO : MI.all_defs()) {
1956 if (!MO.getReg().isVirtual())
1957 continue;
1958 auto It = SeenDbgUsers.find(Val: MO.getReg());
1959 if (It == SeenDbgUsers.end())
1960 continue;
1961
1962 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1963 auto &Users = It->second;
1964 for (auto &User : Users) {
1965 MachineInstr *DbgMI = User.getPointer();
1966 if (User.getInt()) {
1967 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1968 // it, it can't be recovered. Set it undef.
1969 if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg()))
1970 DbgMI->setDebugValueUndef();
1971 } else {
1972 DbgUsersToSink.push_back(
1973 Elt: {DbgMI, SmallVector<Register, 2>(1, MO.getReg())});
1974 }
1975 }
1976 }
1977
1978 // After sinking, some debug users may not be dominated any more. If possible,
1979 // copy-propagate their operands. As it's expensive, don't do this if there's
1980 // no debuginfo in the program.
1981 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1982 SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo);
1983
1984 performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink);
1985
1986 // Conservatively, clear any kill flags, since it's possible that they are no
1987 // longer correct.
1988 // Note that we have to clear the kill flags for any register this instruction
1989 // uses as we may sink over another instruction which currently kills the
1990 // used registers.
1991 for (MachineOperand &MO : MI.all_uses())
1992 RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags.
1993
1994 return true;
1995}
1996
1997void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1998 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1999 assert(MI.isCopy());
2000 assert(MI.getOperand(1).isReg());
2001
2002 // Enumerate all users of vreg operands that are def'd. Skip those that will
2003 // be sunk. For the rest, if they are not dominated by the block we will sink
2004 // MI into, propagate the copy source to them.
2005 SmallVector<MachineInstr *, 4> DbgDefUsers;
2006 SmallVector<Register, 4> DbgUseRegs;
2007 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
2008 for (auto &MO : MI.all_defs()) {
2009 if (!MO.getReg().isVirtual())
2010 continue;
2011 DbgUseRegs.push_back(Elt: MO.getReg());
2012 for (auto &User : MRI.use_instructions(Reg: MO.getReg())) {
2013 if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent()))
2014 continue;
2015
2016 // If is in same block, will either sink or be use-before-def.
2017 if (User.getParent() == MI.getParent())
2018 continue;
2019
2020 assert(User.hasDebugOperandForReg(MO.getReg()) &&
2021 "DBG_VALUE user of vreg, but has no operand for it?");
2022 DbgDefUsers.push_back(Elt: &User);
2023 }
2024 }
2025
2026 // Point the users of this copy that are no longer dominated, at the source
2027 // of the copy.
2028 for (auto *User : DbgDefUsers) {
2029 for (auto &Reg : DbgUseRegs) {
2030 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
2031 DbgOp.setReg(MI.getOperand(i: 1).getReg());
2032 DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg());
2033 }
2034 }
2035 }
2036}
2037
2038//===----------------------------------------------------------------------===//
2039// This pass is not intended to be a replacement or a complete alternative
2040// for the pre-ra machine sink pass. It is only designed to sink COPY
2041// instructions which should be handled after RA.
2042//
2043// This pass sinks COPY instructions into a successor block, if the COPY is not
2044// used in the current block and the COPY is live-in to a single successor
2045// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
2046// copy on paths where their results aren't needed. This also exposes
2047// additional opportunites for dead copy elimination and shrink wrapping.
2048//
2049// These copies were either not handled by or are inserted after the MachineSink
2050// pass. As an example of the former case, the MachineSink pass cannot sink
2051// COPY instructions with allocatable source registers; for AArch64 these type
2052// of copy instructions are frequently used to move function parameters (PhyReg)
2053// into virtual registers in the entry block.
2054//
2055// For the machine IR below, this pass will sink %w19 in the entry into its
2056// successor (%bb.1) because %w19 is only live-in in %bb.1.
2057// %bb.0:
2058// %wzr = SUBSWri %w1, 1
2059// %w19 = COPY %w0
2060// Bcc 11, %bb.2
2061// %bb.1:
2062// Live Ins: %w19
2063// BL @fun
2064// %w0 = ADDWrr %w0, %w19
2065// RET %w0
2066// %bb.2:
2067// %w0 = COPY %wzr
2068// RET %w0
2069// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
2070// able to see %bb.0 as a candidate.
2071//===----------------------------------------------------------------------===//
2072namespace {
2073
2074class PostRAMachineSinkingImpl {
2075 /// Track which register units have been modified and used.
2076 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
2077
2078 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
2079 /// entry in this map for each unit it touches. The DBG_VALUE's entry
2080 /// consists of a pointer to the instruction itself, and a vector of registers
2081 /// referred to by the instruction that overlap the key register unit.
2082 DenseMap<MCRegUnit, SmallVector<MIRegs, 2>> SeenDbgInstrs;
2083
2084 /// Sink Copy instructions unused in the same block close to their uses in
2085 /// successors.
2086 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
2087 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
2088
2089public:
2090 bool run(MachineFunction &MF);
2091};
2092
2093class PostRAMachineSinkingLegacy : public MachineFunctionPass {
2094public:
2095 bool runOnMachineFunction(MachineFunction &MF) override;
2096
2097 static char ID;
2098 PostRAMachineSinkingLegacy() : MachineFunctionPass(ID) {}
2099 StringRef getPassName() const override { return "PostRA Machine Sink"; }
2100
2101 void getAnalysisUsage(AnalysisUsage &AU) const override {
2102 AU.setPreservesCFG();
2103 MachineFunctionPass::getAnalysisUsage(AU);
2104 }
2105
2106 MachineFunctionProperties getRequiredProperties() const override {
2107 return MachineFunctionProperties().setNoVRegs();
2108 }
2109};
2110
2111} // namespace
2112
2113char PostRAMachineSinkingLegacy::ID = 0;
2114char &llvm::PostRAMachineSinkingID = PostRAMachineSinkingLegacy::ID;
2115
2116INITIALIZE_PASS(PostRAMachineSinkingLegacy, "postra-machine-sink",
2117 "PostRA Machine Sink", false, false)
2118
2119static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg,
2120 const TargetRegisterInfo *TRI) {
2121 LiveRegUnits LiveInRegUnits(*TRI);
2122 LiveInRegUnits.addLiveIns(MBB);
2123 return !LiveInRegUnits.available(Reg);
2124}
2125
2126static MachineBasicBlock *
2127getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2128 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2129 Register Reg, const TargetRegisterInfo *TRI) {
2130 // Try to find a single sinkable successor in which Reg is live-in.
2131 MachineBasicBlock *BB = nullptr;
2132 for (auto *SI : SinkableBBs) {
2133 if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) {
2134 // If BB is set here, Reg is live-in to at least two sinkable successors,
2135 // so quit.
2136 if (BB)
2137 return nullptr;
2138 BB = SI;
2139 }
2140 }
2141 // Reg is not live-in to any sinkable successors.
2142 if (!BB)
2143 return nullptr;
2144
2145 // Check if any register aliased with Reg is live-in in other successors.
2146 for (auto *SI : CurBB.successors()) {
2147 if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI))
2148 return nullptr;
2149 }
2150 return BB;
2151}
2152
2153static MachineBasicBlock *
2154getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
2155 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
2156 ArrayRef<Register> DefedRegsInCopy,
2157 const TargetRegisterInfo *TRI) {
2158 MachineBasicBlock *SingleBB = nullptr;
2159 for (auto DefReg : DefedRegsInCopy) {
2160 MachineBasicBlock *BB =
2161 getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI);
2162 if (!BB || (SingleBB && SingleBB != BB))
2163 return nullptr;
2164 SingleBB = BB;
2165 }
2166 return SingleBB;
2167}
2168
2169static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
2170 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2171 const LiveRegUnits &UsedRegUnits,
2172 const TargetRegisterInfo *TRI) {
2173 for (auto U : UsedOpsInCopy) {
2174 MachineOperand &MO = MI->getOperand(i: U);
2175 Register SrcReg = MO.getReg();
2176 if (!UsedRegUnits.available(Reg: SrcReg)) {
2177 MachineBasicBlock::iterator NI = std::next(x: MI->getIterator());
2178 for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) {
2179 if (UI.killsRegister(Reg: SrcReg, TRI)) {
2180 UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI);
2181 MO.setIsKill(true);
2182 break;
2183 }
2184 }
2185 }
2186 }
2187}
2188
2189static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
2190 const SmallVectorImpl<unsigned> &UsedOpsInCopy,
2191 const SmallVectorImpl<Register> &DefedRegsInCopy) {
2192 for (Register DefReg : DefedRegsInCopy)
2193 SuccBB->removeLiveInOverlappedWith(Reg: DefReg);
2194
2195 for (auto U : UsedOpsInCopy)
2196 SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg());
2197 SuccBB->sortUniqueLiveIns();
2198}
2199
2200static bool hasRegisterDependency(MachineInstr *MI,
2201 SmallVectorImpl<unsigned> &UsedOpsInCopy,
2202 SmallVectorImpl<Register> &DefedRegsInCopy,
2203 LiveRegUnits &ModifiedRegUnits,
2204 LiveRegUnits &UsedRegUnits) {
2205 bool HasRegDependency = false;
2206 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2207 MachineOperand &MO = MI->getOperand(i);
2208 if (!MO.isReg())
2209 continue;
2210 Register Reg = MO.getReg();
2211 if (!Reg)
2212 continue;
2213 if (MO.isDef()) {
2214 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
2215 HasRegDependency = true;
2216 break;
2217 }
2218 DefedRegsInCopy.push_back(Elt: Reg);
2219
2220 // FIXME: instead of isUse(), readsReg() would be a better fix here,
2221 // For example, we can ignore modifications in reg with undef. However,
2222 // it's not perfectly clear if skipping the internal read is safe in all
2223 // other targets.
2224 } else if (MO.isUse()) {
2225 if (!ModifiedRegUnits.available(Reg)) {
2226 HasRegDependency = true;
2227 break;
2228 }
2229 UsedOpsInCopy.push_back(Elt: i);
2230 }
2231 }
2232 return HasRegDependency;
2233}
2234
2235bool PostRAMachineSinkingImpl::tryToSinkCopy(MachineBasicBlock &CurBB,
2236 MachineFunction &MF,
2237 const TargetRegisterInfo *TRI,
2238 const TargetInstrInfo *TII) {
2239 SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
2240 // FIXME: For now, we sink only to a successor which has a single predecessor
2241 // so that we can directly sink COPY instructions to the successor without
2242 // adding any new block or branch instruction.
2243 for (MachineBasicBlock *SI : CurBB.successors())
2244 if (!SI->livein_empty() && SI->pred_size() == 1)
2245 SinkableBBs.insert(Ptr: SI);
2246
2247 if (SinkableBBs.empty())
2248 return false;
2249
2250 bool Changed = false;
2251
2252 // Track which registers have been modified and used between the end of the
2253 // block and the current instruction.
2254 ModifiedRegUnits.clear();
2255 UsedRegUnits.clear();
2256 SeenDbgInstrs.clear();
2257
2258 for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) {
2259 // Track the operand index for use in Copy.
2260 SmallVector<unsigned, 2> UsedOpsInCopy;
2261 // Track the register number defed in Copy.
2262 SmallVector<Register, 2> DefedRegsInCopy;
2263
2264 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
2265 // for DBG_VALUEs later, record them when they're encountered.
2266 if (MI.isDebugValue() && !MI.isDebugRef()) {
2267 SmallDenseMap<MCRegUnit, SmallVector<Register, 2>, 4> MIUnits;
2268 bool IsValid = true;
2269 for (MachineOperand &MO : MI.debug_operands()) {
2270 if (MO.isReg() && MO.getReg().isPhysical()) {
2271 // Bail if we can already tell the sink would be rejected, rather
2272 // than needlessly accumulating lots of DBG_VALUEs.
2273 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2274 ModifiedRegUnits, UsedRegUnits)) {
2275 IsValid = false;
2276 break;
2277 }
2278
2279 // Record debug use of each reg unit.
2280 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg()))
2281 MIUnits[Unit].push_back(Elt: MO.getReg());
2282 }
2283 }
2284 if (IsValid) {
2285 for (auto &RegOps : MIUnits)
2286 SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI,
2287 Args: std::move(RegOps.second));
2288 }
2289 continue;
2290 }
2291
2292 // Don't postRASink instructions that the target prefers not to sink.
2293 if (!TII->shouldPostRASink(MI))
2294 continue;
2295
2296 if (MI.isDebugOrPseudoInstr())
2297 continue;
2298
2299 // Do not move any instruction across function call.
2300 if (MI.isCall())
2301 return false;
2302
2303 if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) {
2304 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2305 TRI);
2306 continue;
2307 }
2308
2309 // Don't sink the COPY if it would violate a register dependency.
2310 if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy,
2311 ModifiedRegUnits, UsedRegUnits)) {
2312 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2313 TRI);
2314 continue;
2315 }
2316 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
2317 "Unexpect SrcReg or DefReg");
2318 MachineBasicBlock *SuccBB =
2319 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
2320 // Don't sink if we cannot find a single sinkable successor in which Reg
2321 // is live-in.
2322 if (!SuccBB) {
2323 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2324 TRI);
2325 continue;
2326 }
2327 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
2328 "Unexpected predecessor");
2329
2330 // Collect DBG_VALUEs that must sink with this copy. We've previously
2331 // recorded which reg units that DBG_VALUEs read, if this instruction
2332 // writes any of those units then the corresponding DBG_VALUEs must sink.
2333 MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
2334 for (auto &MO : MI.all_defs()) {
2335 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) {
2336 for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) {
2337 auto &Regs = DbgValsToSinkMap[MIRegs.first];
2338 llvm::append_range(C&: Regs, R: MIRegs.second);
2339 }
2340 }
2341 }
2342 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
2343
2344 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
2345
2346 MachineBasicBlock::iterator InsertPos =
2347 SuccBB->SkipPHIsAndLabels(I: SuccBB->begin());
2348 if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) {
2349 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2350 TRI);
2351 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
2352 continue;
2353 }
2354
2355 // Clear the kill flag if SrcReg is killed between MI and the end of the
2356 // block.
2357 clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
2358 performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink);
2359 updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
2360
2361 Changed = true;
2362 ++NumPostRACopySink;
2363 }
2364 return Changed;
2365}
2366
2367bool PostRAMachineSinkingImpl::run(MachineFunction &MF) {
2368 bool Changed = false;
2369 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2370 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2371
2372 ModifiedRegUnits.init(TRI: *TRI);
2373 UsedRegUnits.init(TRI: *TRI);
2374 for (auto &BB : MF)
2375 Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII);
2376
2377 return Changed;
2378}
2379
2380bool PostRAMachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
2381 if (skipFunction(F: MF.getFunction()))
2382 return false;
2383
2384 return PostRAMachineSinkingImpl().run(MF);
2385}
2386
2387PreservedAnalyses
2388PostRAMachineSinkingPass::run(MachineFunction &MF,
2389 MachineFunctionAnalysisManager &MFAM) {
2390 MFPropsModifier _(*this, MF);
2391
2392 if (!PostRAMachineSinkingImpl().run(MF))
2393 return PreservedAnalyses::all();
2394
2395 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
2396 PA.preserveSet<CFGAnalyses>();
2397 return PA;
2398}
2399