| 1 | //===- MachineSink.cpp - Sinking for machine instructions -----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass moves instructions into successor blocks when possible, so that |
| 10 | // they aren't executed on paths where their results aren't needed. |
| 11 | // |
| 12 | // This pass is not intended to be a replacement or a complete alternative |
| 13 | // for an LLVM-IR-level sinking pass. It is only designed to sink simple |
| 14 | // constructs that are not exposed before lowering and instruction selection. |
| 15 | // |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
| 18 | #include "llvm/CodeGen/MachineSink.h" |
| 19 | #include "llvm/ADT/DenseSet.h" |
| 20 | #include "llvm/ADT/DepthFirstIterator.h" |
| 21 | #include "llvm/ADT/MapVector.h" |
| 22 | #include "llvm/ADT/PointerIntPair.h" |
| 23 | #include "llvm/ADT/SetVector.h" |
| 24 | #include "llvm/ADT/SmallSet.h" |
| 25 | #include "llvm/ADT/SmallVector.h" |
| 26 | #include "llvm/ADT/Statistic.h" |
| 27 | #include "llvm/Analysis/AliasAnalysis.h" |
| 28 | #include "llvm/Analysis/CFG.h" |
| 29 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 30 | #include "llvm/CodeGen/LiveIntervals.h" |
| 31 | #include "llvm/CodeGen/LiveVariables.h" |
| 32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 33 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| 34 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
| 35 | #include "llvm/CodeGen/MachineCycleAnalysis.h" |
| 36 | #include "llvm/CodeGen/MachineDomTreeUpdater.h" |
| 37 | #include "llvm/CodeGen/MachineDominators.h" |
| 38 | #include "llvm/CodeGen/MachineFunction.h" |
| 39 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 40 | #include "llvm/CodeGen/MachineInstr.h" |
| 41 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 42 | #include "llvm/CodeGen/MachineOperand.h" |
| 43 | #include "llvm/CodeGen/MachinePostDominators.h" |
| 44 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 45 | #include "llvm/CodeGen/MachineSizeOpts.h" |
| 46 | #include "llvm/CodeGen/PostRAMachineSink.h" |
| 47 | #include "llvm/CodeGen/RegisterClassInfo.h" |
| 48 | #include "llvm/CodeGen/RegisterPressure.h" |
| 49 | #include "llvm/CodeGen/SlotIndexes.h" |
| 50 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 51 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 52 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 53 | #include "llvm/CodeGen/TargetSchedule.h" |
| 54 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 55 | #include "llvm/IR/BasicBlock.h" |
| 56 | #include "llvm/IR/DebugInfoMetadata.h" |
| 57 | #include "llvm/IR/LLVMContext.h" |
| 58 | #include "llvm/InitializePasses.h" |
| 59 | #include "llvm/Pass.h" |
| 60 | #include "llvm/Support/BranchProbability.h" |
| 61 | #include "llvm/Support/CommandLine.h" |
| 62 | #include "llvm/Support/Debug.h" |
| 63 | #include "llvm/Support/raw_ostream.h" |
| 64 | #include <cassert> |
| 65 | #include <cstdint> |
| 66 | #include <utility> |
| 67 | #include <vector> |
| 68 | |
| 69 | using namespace llvm; |
| 70 | |
| 71 | #define DEBUG_TYPE "machine-sink" |
| 72 | |
| 73 | static cl::opt<bool> |
| 74 | SplitEdges("machine-sink-split" , |
| 75 | cl::desc("Split critical edges during machine sinking" ), |
| 76 | cl::init(Val: true), cl::Hidden); |
| 77 | |
| 78 | static cl::opt<bool> UseBlockFreqInfo( |
| 79 | "machine-sink-bfi" , |
| 80 | cl::desc("Use block frequency info to find successors to sink" ), |
| 81 | cl::init(Val: true), cl::Hidden); |
| 82 | |
| 83 | static cl::opt<unsigned> SplitEdgeProbabilityThreshold( |
| 84 | "machine-sink-split-probability-threshold" , |
| 85 | cl::desc( |
| 86 | "Percentage threshold for splitting single-instruction critical edge. " |
| 87 | "If the branch threshold is higher than this threshold, we allow " |
| 88 | "speculative execution of up to 1 instruction to avoid branching to " |
| 89 | "splitted critical edge" ), |
| 90 | cl::init(Val: 40), cl::Hidden); |
| 91 | |
| 92 | static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold( |
| 93 | "machine-sink-load-instrs-threshold" , |
| 94 | cl::desc("Do not try to find alias store for a load if there is a in-path " |
| 95 | "block whose instruction number is higher than this threshold." ), |
| 96 | cl::init(Val: 2000), cl::Hidden); |
| 97 | |
| 98 | static cl::opt<unsigned> SinkLoadBlocksThreshold( |
| 99 | "machine-sink-load-blocks-threshold" , |
| 100 | cl::desc("Do not try to find alias store for a load if the block number in " |
| 101 | "the straight line is higher than this threshold." ), |
| 102 | cl::init(Val: 20), cl::Hidden); |
| 103 | |
| 104 | static cl::opt<bool> |
| 105 | SinkInstsIntoCycle("sink-insts-to-avoid-spills" , |
| 106 | cl::desc("Sink instructions into cycles to avoid " |
| 107 | "register spills" ), |
| 108 | cl::init(Val: false), cl::Hidden); |
| 109 | |
| 110 | static cl::opt<unsigned> SinkIntoCycleLimit( |
| 111 | "machine-sink-cycle-limit" , |
| 112 | cl::desc( |
| 113 | "The maximum number of instructions considered for cycle sinking." ), |
| 114 | cl::init(Val: 50), cl::Hidden); |
| 115 | |
| 116 | STATISTIC(NumSunk, "Number of machine instructions sunk" ); |
| 117 | STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle" ); |
| 118 | STATISTIC(NumSplit, "Number of critical edges split" ); |
| 119 | STATISTIC(NumCoalesces, "Number of copies coalesced" ); |
| 120 | STATISTIC(NumPostRACopySink, "Number of copies sunk after RA" ); |
| 121 | |
| 122 | using RegSubRegPair = TargetInstrInfo::RegSubRegPair; |
| 123 | |
| 124 | namespace { |
| 125 | |
| 126 | class MachineSinking { |
| 127 | const TargetSubtargetInfo *STI = nullptr; |
| 128 | const TargetInstrInfo *TII = nullptr; |
| 129 | const TargetRegisterInfo *TRI = nullptr; |
| 130 | MachineRegisterInfo *MRI = nullptr; // Machine register information |
| 131 | MachineDominatorTree *DT = nullptr; // Machine dominator tree |
| 132 | MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree |
| 133 | MachineCycleInfo *CI = nullptr; |
| 134 | ProfileSummaryInfo *PSI = nullptr; |
| 135 | MachineBlockFrequencyInfo *MBFI = nullptr; |
| 136 | const MachineBranchProbabilityInfo *MBPI = nullptr; |
| 137 | AliasAnalysis *AA = nullptr; |
| 138 | RegisterClassInfo RegClassInfo; |
| 139 | TargetSchedModel SchedModel; |
| 140 | // Required for split critical edge |
| 141 | LiveIntervals *LIS; |
| 142 | SlotIndexes *SI; |
| 143 | LiveVariables *LV; |
| 144 | MachineLoopInfo *MLI; |
| 145 | |
| 146 | // Remember which edges have been considered for breaking. |
| 147 | SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8> |
| 148 | CEBCandidates; |
| 149 | // Memorize the register that also wanted to sink into the same block along |
| 150 | // a different critical edge. |
| 151 | // {register to sink, sink-to block} -> the first sink-from block. |
| 152 | // We're recording the first sink-from block because that (critical) edge |
| 153 | // was deferred until we see another register that's going to sink into the |
| 154 | // same block. |
| 155 | DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *> |
| 156 | CEMergeCandidates; |
| 157 | // Remember which edges we are about to split. |
| 158 | // This is different from CEBCandidates since those edges |
| 159 | // will be split. |
| 160 | SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; |
| 161 | |
| 162 | DenseSet<Register> RegsToClearKillFlags; |
| 163 | |
| 164 | using AllSuccsCache = |
| 165 | SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; |
| 166 | |
| 167 | /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is |
| 168 | /// post-dominated by another DBG_VALUE of the same variable location. |
| 169 | /// This is necessary to detect sequences such as: |
| 170 | /// %0 = someinst |
| 171 | /// DBG_VALUE %0, !123, !DIExpression() |
| 172 | /// %1 = anotherinst |
| 173 | /// DBG_VALUE %1, !123, !DIExpression() |
| 174 | /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that |
| 175 | /// would re-order assignments. |
| 176 | using SeenDbgUser = PointerIntPair<MachineInstr *, 1>; |
| 177 | |
| 178 | using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>; |
| 179 | |
| 180 | /// Record of DBG_VALUE uses of vregs in a block, so that we can identify |
| 181 | /// debug instructions to sink. |
| 182 | SmallDenseMap<Register, TinyPtrVector<SeenDbgUser>> SeenDbgUsers; |
| 183 | |
| 184 | /// Record of debug variables that have had their locations set in the |
| 185 | /// current block. |
| 186 | DenseSet<DebugVariable> SeenDbgVars; |
| 187 | |
| 188 | DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool> |
| 189 | HasStoreCache; |
| 190 | |
| 191 | DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, |
| 192 | SmallVector<MachineInstr *>> |
| 193 | StoreInstrCache; |
| 194 | |
| 195 | /// Cached BB's register pressure. |
| 196 | DenseMap<const MachineBasicBlock *, std::vector<unsigned>> |
| 197 | CachedRegisterPressure; |
| 198 | |
| 199 | bool EnableSinkAndFold; |
| 200 | |
| 201 | public: |
| 202 | MachineSinking(bool EnableSinkAndFold, MachineDominatorTree *DT, |
| 203 | MachinePostDominatorTree *PDT, LiveVariables *LV, |
| 204 | MachineLoopInfo *MLI, SlotIndexes *SI, LiveIntervals *LIS, |
| 205 | MachineCycleInfo *CI, ProfileSummaryInfo *PSI, |
| 206 | MachineBlockFrequencyInfo *MBFI, |
| 207 | const MachineBranchProbabilityInfo *MBPI, AliasAnalysis *AA) |
| 208 | : DT(DT), PDT(PDT), CI(CI), PSI(PSI), MBFI(MBFI), MBPI(MBPI), AA(AA), |
| 209 | LIS(LIS), SI(SI), LV(LV), MLI(MLI), |
| 210 | EnableSinkAndFold(EnableSinkAndFold) {} |
| 211 | |
| 212 | bool run(MachineFunction &MF); |
| 213 | |
| 214 | void releaseMemory() { |
| 215 | CEBCandidates.clear(); |
| 216 | CEMergeCandidates.clear(); |
| 217 | } |
| 218 | |
| 219 | private: |
| 220 | bool ProcessBlock(MachineBasicBlock &MBB); |
| 221 | void ProcessDbgInst(MachineInstr &MI); |
| 222 | bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, |
| 223 | MachineBasicBlock *To, bool BreakPHIEdge); |
| 224 | bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, |
| 225 | MachineBasicBlock *To, |
| 226 | MachineBasicBlock *&DeferredFromBlock); |
| 227 | |
| 228 | bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, |
| 229 | MachineInstr &MI); |
| 230 | |
| 231 | /// Postpone the splitting of the given critical |
| 232 | /// edge (\p From, \p To). |
| 233 | /// |
| 234 | /// We do not split the edges on the fly. Indeed, this invalidates |
| 235 | /// the dominance information and thus triggers a lot of updates |
| 236 | /// of that information underneath. |
| 237 | /// Instead, we postpone all the splits after each iteration of |
| 238 | /// the main loop. That way, the information is at least valid |
| 239 | /// for the lifetime of an iteration. |
| 240 | /// |
| 241 | /// \return True if the edge is marked as toSplit, false otherwise. |
| 242 | /// False can be returned if, for instance, this is not profitable. |
| 243 | bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, |
| 244 | MachineBasicBlock *To, bool BreakPHIEdge); |
| 245 | bool SinkInstruction(MachineInstr &MI, bool &SawStore, |
| 246 | AllSuccsCache &AllSuccessors); |
| 247 | |
| 248 | /// If we sink a COPY inst, some debug users of it's destination may no |
| 249 | /// longer be dominated by the COPY, and will eventually be dropped. |
| 250 | /// This is easily rectified by forwarding the non-dominated debug uses |
| 251 | /// to the copy source. |
| 252 | void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, |
| 253 | MachineBasicBlock *TargetBlock); |
| 254 | bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, |
| 255 | MachineBasicBlock *DefMBB, bool &BreakPHIEdge, |
| 256 | bool &LocalUse) const; |
| 257 | MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, |
| 258 | bool &BreakPHIEdge, |
| 259 | AllSuccsCache &AllSuccessors); |
| 260 | |
| 261 | void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, |
| 262 | SmallVectorImpl<MachineInstr *> &Candidates); |
| 263 | |
| 264 | bool |
| 265 | aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, |
| 266 | DenseMap<SinkItem, MachineInstr *> &SunkInstrs); |
| 267 | |
| 268 | bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, |
| 269 | MachineBasicBlock *MBB, |
| 270 | MachineBasicBlock *SuccToSinkTo, |
| 271 | AllSuccsCache &AllSuccessors); |
| 272 | |
| 273 | bool PerformTrivialForwardCoalescing(MachineInstr &MI, |
| 274 | MachineBasicBlock *MBB); |
| 275 | |
| 276 | bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB); |
| 277 | |
| 278 | SmallVector<MachineBasicBlock *, 4> & |
| 279 | GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, |
| 280 | AllSuccsCache &AllSuccessors) const; |
| 281 | |
| 282 | std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB, |
| 283 | bool UseCache = true); |
| 284 | |
| 285 | bool registerPressureSetExceedsLimit(unsigned NRegs, |
| 286 | const TargetRegisterClass *RC, |
| 287 | const MachineBasicBlock &MBB); |
| 288 | |
| 289 | bool registerPressureExceedsLimit(const MachineBasicBlock &MBB); |
| 290 | }; |
| 291 | |
| 292 | class MachineSinkingLegacy : public MachineFunctionPass { |
| 293 | public: |
| 294 | static char ID; |
| 295 | |
| 296 | MachineSinkingLegacy() : MachineFunctionPass(ID) {} |
| 297 | |
| 298 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 299 | |
| 300 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 301 | MachineFunctionPass::getAnalysisUsage(AU); |
| 302 | AU.addRequired<AAResultsWrapperPass>(); |
| 303 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| 304 | AU.addRequired<MachinePostDominatorTreeWrapperPass>(); |
| 305 | AU.addRequired<MachineCycleInfoWrapperPass>(); |
| 306 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
| 307 | AU.addPreserved<MachineCycleInfoWrapperPass>(); |
| 308 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
| 309 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
| 310 | if (UseBlockFreqInfo) { |
| 311 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
| 312 | AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); |
| 313 | } |
| 314 | AU.addRequired<TargetPassConfig>(); |
| 315 | } |
| 316 | }; |
| 317 | |
| 318 | } // end anonymous namespace |
| 319 | |
| 320 | char MachineSinkingLegacy::ID = 0; |
| 321 | |
| 322 | char &llvm::MachineSinkingLegacyID = MachineSinkingLegacy::ID; |
| 323 | |
| 324 | INITIALIZE_PASS_BEGIN(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking" , |
| 325 | false, false) |
| 326 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
| 327 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
| 328 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
| 329 | INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) |
| 330 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
| 331 | INITIALIZE_PASS_END(MachineSinkingLegacy, DEBUG_TYPE, "Machine code sinking" , |
| 332 | false, false) |
| 333 | |
| 334 | /// Return true if a target defined block prologue instruction interferes |
| 335 | /// with a sink candidate. |
| 336 | static bool blockPrologueInterferes(const MachineBasicBlock *BB, |
| 337 | MachineBasicBlock::const_iterator End, |
| 338 | const MachineInstr &MI, |
| 339 | const TargetRegisterInfo *TRI, |
| 340 | const TargetInstrInfo *TII, |
| 341 | const MachineRegisterInfo *MRI) { |
| 342 | for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End; |
| 343 | ++PI) { |
| 344 | // Only check target defined prologue instructions |
| 345 | if (!TII->isBasicBlockPrologue(MI: *PI)) |
| 346 | continue; |
| 347 | for (auto &MO : MI.operands()) { |
| 348 | if (!MO.isReg()) |
| 349 | continue; |
| 350 | Register Reg = MO.getReg(); |
| 351 | if (!Reg) |
| 352 | continue; |
| 353 | if (MO.isUse()) { |
| 354 | if (Reg.isPhysical() && |
| 355 | (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg)))) |
| 356 | continue; |
| 357 | if (PI->modifiesRegister(Reg, TRI)) |
| 358 | return true; |
| 359 | } else { |
| 360 | if (PI->readsRegister(Reg, TRI)) |
| 361 | return true; |
| 362 | // Check for interference with non-dead defs |
| 363 | auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true); |
| 364 | if (DefOp && !DefOp->isDead()) |
| 365 | return true; |
| 366 | } |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | return false; |
| 371 | } |
| 372 | |
| 373 | bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, |
| 374 | MachineBasicBlock *MBB) { |
| 375 | if (!MI.isCopy()) |
| 376 | return false; |
| 377 | |
| 378 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
| 379 | Register DstReg = MI.getOperand(i: 0).getReg(); |
| 380 | if (!SrcReg.isVirtual() || !DstReg.isVirtual() || |
| 381 | !MRI->hasOneNonDBGUse(RegNo: SrcReg)) |
| 382 | return false; |
| 383 | |
| 384 | const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg); |
| 385 | const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg); |
| 386 | if (SRC != DRC) |
| 387 | return false; |
| 388 | |
| 389 | MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg); |
| 390 | if (DefMI->isCopyLike()) |
| 391 | return false; |
| 392 | LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); |
| 393 | LLVM_DEBUG(dbgs() << "*** to: " << MI); |
| 394 | MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg); |
| 395 | MI.eraseFromParent(); |
| 396 | |
| 397 | // Conservatively, clear any kill flags, since it's possible that they are no |
| 398 | // longer correct. |
| 399 | MRI->clearKillFlags(Reg: SrcReg); |
| 400 | |
| 401 | ++NumCoalesces; |
| 402 | return true; |
| 403 | } |
| 404 | |
| 405 | bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, |
| 406 | MachineBasicBlock *MBB) { |
| 407 | if (MI.isCopy() || MI.mayLoadOrStore() || |
| 408 | MI.getOpcode() == TargetOpcode::REG_SEQUENCE) |
| 409 | return false; |
| 410 | |
| 411 | // Don't sink instructions that the target prefers not to sink. |
| 412 | if (!TII->shouldSink(MI)) |
| 413 | return false; |
| 414 | |
| 415 | // Check if it's safe to move the instruction. |
| 416 | bool SawStore = true; |
| 417 | if (!MI.isSafeToMove(SawStore)) |
| 418 | return false; |
| 419 | |
| 420 | // Convergent operations may not be made control-dependent on additional |
| 421 | // values. |
| 422 | if (MI.isConvergent()) |
| 423 | return false; |
| 424 | |
| 425 | // Don't sink defs/uses of hard registers or if the instruction defines more |
| 426 | // than one register. |
| 427 | // Don't sink more than two register uses - it'll cover most of the cases and |
| 428 | // greatly simplifies the register pressure checks. |
| 429 | Register DefReg; |
| 430 | Register UsedRegA, UsedRegB; |
| 431 | for (const MachineOperand &MO : MI.operands()) { |
| 432 | if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() || |
| 433 | MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() || |
| 434 | MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask()) |
| 435 | continue; |
| 436 | if (!MO.isReg()) |
| 437 | return false; |
| 438 | |
| 439 | Register Reg = MO.getReg(); |
| 440 | if (Reg == 0) |
| 441 | continue; |
| 442 | |
| 443 | if (Reg.isVirtual()) { |
| 444 | if (MO.isDef()) { |
| 445 | if (DefReg) |
| 446 | return false; |
| 447 | DefReg = Reg; |
| 448 | continue; |
| 449 | } |
| 450 | |
| 451 | if (UsedRegA == 0) |
| 452 | UsedRegA = Reg; |
| 453 | else if (UsedRegB == 0) |
| 454 | UsedRegB = Reg; |
| 455 | else |
| 456 | return false; |
| 457 | continue; |
| 458 | } |
| 459 | |
| 460 | if (Reg.isPhysical() && MO.isUse() && |
| 461 | (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO))) |
| 462 | continue; |
| 463 | |
| 464 | return false; |
| 465 | } |
| 466 | |
| 467 | // Scan uses of the destination register. Every use, except the last, must be |
| 468 | // a copy, with a chain of copies terminating with either a copy into a hard |
| 469 | // register, or a load/store instruction where the use is part of the |
| 470 | // address (*not* the stored value). |
| 471 | using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>; |
| 472 | SmallVector<SinkInfo> SinkInto; |
| 473 | SmallVector<Register> Worklist; |
| 474 | |
| 475 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg); |
| 476 | const TargetRegisterClass *RCA = |
| 477 | UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA); |
| 478 | const TargetRegisterClass *RCB = |
| 479 | UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB); |
| 480 | |
| 481 | Worklist.push_back(Elt: DefReg); |
| 482 | while (!Worklist.empty()) { |
| 483 | Register Reg = Worklist.pop_back_val(); |
| 484 | |
| 485 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { |
| 486 | ExtAddrMode MaybeAM; |
| 487 | MachineInstr &UseInst = *MO.getParent(); |
| 488 | if (UseInst.isCopy()) { |
| 489 | Register DstReg; |
| 490 | if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg()) |
| 491 | DstReg = O.getReg(); |
| 492 | if (DstReg == 0) |
| 493 | return false; |
| 494 | if (DstReg.isVirtual()) { |
| 495 | Worklist.push_back(Elt: DstReg); |
| 496 | continue; |
| 497 | } |
| 498 | // If we are going to replace a copy, the original instruction must be |
| 499 | // as cheap as a copy. |
| 500 | if (!TII->isAsCheapAsAMove(MI)) |
| 501 | return false; |
| 502 | // The hard register must be in the register class of the original |
| 503 | // instruction's destination register. |
| 504 | if (!RC->contains(Reg: DstReg)) |
| 505 | return false; |
| 506 | } else if (UseInst.mayLoadOrStore()) { |
| 507 | // If the destination instruction contains more than one use of the |
| 508 | // register, we won't be able to remove the original instruction, so |
| 509 | // don't sink. |
| 510 | if (llvm::count_if(Range: UseInst.operands(), P: [Reg](const MachineOperand &MO) { |
| 511 | return MO.isReg() && MO.getReg() == Reg; |
| 512 | }) > 1) |
| 513 | return false; |
| 514 | ExtAddrMode AM; |
| 515 | if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM)) |
| 516 | return false; |
| 517 | MaybeAM = AM; |
| 518 | } else { |
| 519 | return false; |
| 520 | } |
| 521 | |
| 522 | if (UseInst.getParent() != MI.getParent()) { |
| 523 | // If the register class of the register we are replacing is a superset |
| 524 | // of any of the register classes of the operands of the materialized |
| 525 | // instruction don't consider that live range extended. |
| 526 | const TargetRegisterClass *RCS = MRI->getRegClass(Reg); |
| 527 | if (RCA && RCA->hasSuperClassEq(RC: RCS)) |
| 528 | RCA = nullptr; |
| 529 | else if (RCB && RCB->hasSuperClassEq(RC: RCS)) |
| 530 | RCB = nullptr; |
| 531 | if (RCA || RCB) { |
| 532 | if (RCA == nullptr) { |
| 533 | RCA = RCB; |
| 534 | RCB = nullptr; |
| 535 | } |
| 536 | |
| 537 | unsigned NRegs = !!RCA + !!RCB; |
| 538 | if (RCA == RCB) |
| 539 | RCB = nullptr; |
| 540 | |
| 541 | // Check we don't exceed register pressure at the destination. |
| 542 | const MachineBasicBlock &MBB = *UseInst.getParent(); |
| 543 | if (RCB == nullptr) { |
| 544 | if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB)) |
| 545 | return false; |
| 546 | } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) || |
| 547 | registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) { |
| 548 | return false; |
| 549 | } |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM); |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | if (SinkInto.empty()) |
| 558 | return false; |
| 559 | |
| 560 | // Now we know we can fold the instruction in all its users. |
| 561 | for (auto &[SinkDst, MaybeAM] : SinkInto) { |
| 562 | MachineInstr *New = nullptr; |
| 563 | LLVM_DEBUG(dbgs() << "Sinking copy of" ; MI.dump(); dbgs() << "into" ; |
| 564 | SinkDst->dump()); |
| 565 | if (SinkDst->isCopy()) { |
| 566 | // TODO: After performing the sink-and-fold, the original instruction is |
| 567 | // deleted. Its value is still available (in a hard register), so if there |
| 568 | // are debug instructions which refer to the (now deleted) virtual |
| 569 | // register they could be updated to refer to the hard register, in |
| 570 | // principle. However, it's not clear how to do that, moreover in some |
| 571 | // cases the debug instructions may need to be replicated proportionally |
| 572 | // to the number of the COPY instructions replaced and in some extreme |
| 573 | // cases we can end up with quadratic increase in the number of debug |
| 574 | // instructions. |
| 575 | |
| 576 | // Sink a copy of the instruction, replacing a COPY instruction. |
| 577 | MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); |
| 578 | Register DstReg = SinkDst->getOperand(i: 0).getReg(); |
| 579 | TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI); |
| 580 | New = &*std::prev(x: InsertPt); |
| 581 | if (!New->getDebugLoc()) |
| 582 | New->setDebugLoc(SinkDst->getDebugLoc()); |
| 583 | |
| 584 | // The operand registers of the "sunk" instruction have their live range |
| 585 | // extended and their kill flags may no longer be correct. Conservatively |
| 586 | // clear the kill flags. |
| 587 | if (UsedRegA) |
| 588 | MRI->clearKillFlags(Reg: UsedRegA); |
| 589 | if (UsedRegB) |
| 590 | MRI->clearKillFlags(Reg: UsedRegB); |
| 591 | } else { |
| 592 | // Fold instruction into the addressing mode of a memory instruction. |
| 593 | New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM); |
| 594 | |
| 595 | // The registers of the addressing mode may have their live range extended |
| 596 | // and their kill flags may no longer be correct. Conservatively clear the |
| 597 | // kill flags. |
| 598 | if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual()) |
| 599 | MRI->clearKillFlags(Reg: R); |
| 600 | if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual()) |
| 601 | MRI->clearKillFlags(Reg: R); |
| 602 | } |
| 603 | LLVM_DEBUG(dbgs() << "yielding" ; New->dump()); |
| 604 | // Clear the StoreInstrCache, since we may invalidate it by erasing. |
| 605 | if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef()) |
| 606 | StoreInstrCache.clear(); |
| 607 | SinkDst->eraseFromParent(); |
| 608 | } |
| 609 | |
| 610 | // Collect operands that need to be cleaned up because the registers no longer |
| 611 | // exist (in COPYs and debug instructions). We cannot delete instructions or |
| 612 | // clear operands while traversing register uses. |
| 613 | SmallVector<MachineOperand *> Cleanup; |
| 614 | Worklist.push_back(Elt: DefReg); |
| 615 | while (!Worklist.empty()) { |
| 616 | Register Reg = Worklist.pop_back_val(); |
| 617 | for (MachineOperand &MO : MRI->use_operands(Reg)) { |
| 618 | MachineInstr *U = MO.getParent(); |
| 619 | assert((U->isCopy() || U->isDebugInstr()) && |
| 620 | "Only debug uses and copies must remain" ); |
| 621 | if (U->isCopy()) |
| 622 | Worklist.push_back(Elt: U->getOperand(i: 0).getReg()); |
| 623 | Cleanup.push_back(Elt: &MO); |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | // Delete the dead COPYs and clear operands in debug instructions |
| 628 | for (MachineOperand *MO : Cleanup) { |
| 629 | MachineInstr *I = MO->getParent(); |
| 630 | if (I->isCopy()) { |
| 631 | I->eraseFromParent(); |
| 632 | } else { |
| 633 | MO->setReg(0); |
| 634 | MO->setSubReg(0); |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | MI.eraseFromParent(); |
| 639 | return true; |
| 640 | } |
| 641 | |
| 642 | /// AllUsesDominatedByBlock - Return true if all uses of the specified register |
| 643 | /// occur in blocks dominated by the specified block. If any use is in the |
| 644 | /// definition block, then return false since it is never legal to move def |
| 645 | /// after uses. |
| 646 | bool MachineSinking::AllUsesDominatedByBlock(Register Reg, |
| 647 | MachineBasicBlock *MBB, |
| 648 | MachineBasicBlock *DefMBB, |
| 649 | bool &BreakPHIEdge, |
| 650 | bool &LocalUse) const { |
| 651 | assert(Reg.isVirtual() && "Only makes sense for vregs" ); |
| 652 | |
| 653 | // Ignore debug uses because debug info doesn't affect the code. |
| 654 | if (MRI->use_nodbg_empty(RegNo: Reg)) |
| 655 | return true; |
| 656 | |
| 657 | // BreakPHIEdge is true if all the uses are in the successor MBB being sunken |
| 658 | // into and they are all PHI nodes. In this case, machine-sink must break |
| 659 | // the critical edge first. e.g. |
| 660 | // |
| 661 | // %bb.1: |
| 662 | // Predecessors according to CFG: %bb.0 |
| 663 | // ... |
| 664 | // %def = DEC64_32r %x, implicit-def dead %eflags |
| 665 | // ... |
| 666 | // JE_4 <%bb.37>, implicit %eflags |
| 667 | // Successors according to CFG: %bb.37 %bb.2 |
| 668 | // |
| 669 | // %bb.2: |
| 670 | // %p = PHI %y, %bb.0, %def, %bb.1 |
| 671 | if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) { |
| 672 | MachineInstr *UseInst = MO.getParent(); |
| 673 | unsigned OpNo = MO.getOperandNo(); |
| 674 | MachineBasicBlock *UseBlock = UseInst->getParent(); |
| 675 | return UseBlock == MBB && UseInst->isPHI() && |
| 676 | UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB; |
| 677 | })) { |
| 678 | BreakPHIEdge = true; |
| 679 | return true; |
| 680 | } |
| 681 | |
| 682 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { |
| 683 | // Determine the block of the use. |
| 684 | MachineInstr *UseInst = MO.getParent(); |
| 685 | unsigned OpNo = &MO - &UseInst->getOperand(i: 0); |
| 686 | MachineBasicBlock *UseBlock = UseInst->getParent(); |
| 687 | if (UseInst->isPHI()) { |
| 688 | // PHI nodes use the operand in the predecessor block, not the block with |
| 689 | // the PHI. |
| 690 | UseBlock = UseInst->getOperand(i: OpNo + 1).getMBB(); |
| 691 | } else if (UseBlock == DefMBB) { |
| 692 | LocalUse = true; |
| 693 | return false; |
| 694 | } |
| 695 | |
| 696 | // Check that it dominates. |
| 697 | if (!DT->dominates(A: MBB, B: UseBlock)) |
| 698 | return false; |
| 699 | } |
| 700 | |
| 701 | return true; |
| 702 | } |
| 703 | |
| 704 | /// Return true if this machine instruction loads from global offset table or |
| 705 | /// constant pool. |
| 706 | static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { |
| 707 | assert(MI.mayLoad() && "Expected MI that loads!" ); |
| 708 | |
| 709 | // If we lost memory operands, conservatively assume that the instruction |
| 710 | // reads from everything.. |
| 711 | if (MI.memoperands_empty()) |
| 712 | return true; |
| 713 | |
| 714 | for (MachineMemOperand *MemOp : MI.memoperands()) |
| 715 | if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) |
| 716 | if (PSV->isGOT() || PSV->isConstantPool()) |
| 717 | return true; |
| 718 | |
| 719 | return false; |
| 720 | } |
| 721 | |
| 722 | void MachineSinking::FindCycleSinkCandidates( |
| 723 | MachineCycle *Cycle, MachineBasicBlock *BB, |
| 724 | SmallVectorImpl<MachineInstr *> &Candidates) { |
| 725 | for (auto &MI : *BB) { |
| 726 | LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); |
| 727 | if (MI.isMetaInstruction()) { |
| 728 | LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n" ); |
| 729 | continue; |
| 730 | } |
| 731 | if (!TII->shouldSink(MI)) { |
| 732 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " |
| 733 | "target\n" ); |
| 734 | continue; |
| 735 | } |
| 736 | if (!isCycleInvariant(Cycle, I&: MI)) { |
| 737 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n" ); |
| 738 | continue; |
| 739 | } |
| 740 | bool DontMoveAcrossStore = true; |
| 741 | if (!MI.isSafeToMove(SawStore&: DontMoveAcrossStore)) { |
| 742 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n" ); |
| 743 | continue; |
| 744 | } |
| 745 | if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { |
| 746 | LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n" ); |
| 747 | continue; |
| 748 | } |
| 749 | if (MI.isConvergent()) |
| 750 | continue; |
| 751 | |
| 752 | const MachineOperand &MO = MI.getOperand(i: 0); |
| 753 | if (!MO.isReg() || !MO.getReg() || !MO.isDef()) |
| 754 | continue; |
| 755 | if (!MRI->hasOneDef(RegNo: MO.getReg())) |
| 756 | continue; |
| 757 | |
| 758 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n" ); |
| 759 | Candidates.push_back(Elt: &MI); |
| 760 | } |
| 761 | } |
| 762 | |
| 763 | PreservedAnalyses |
| 764 | MachineSinkingPass::run(MachineFunction &MF, |
| 765 | MachineFunctionAnalysisManager &MFAM) { |
| 766 | auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
| 767 | auto *PDT = &MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF); |
| 768 | auto *CI = &MFAM.getResult<MachineCycleAnalysis>(IR&: MF); |
| 769 | auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF) |
| 770 | .getCachedResult<ProfileSummaryAnalysis>( |
| 771 | IR&: *MF.getFunction().getParent()); |
| 772 | auto *MBFI = UseBlockFreqInfo |
| 773 | ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF) |
| 774 | : nullptr; |
| 775 | auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF); |
| 776 | auto *AA = &MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF) |
| 777 | .getManager() |
| 778 | .getResult<AAManager>(IR&: MF.getFunction()); |
| 779 | auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF); |
| 780 | auto *SI = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF); |
| 781 | auto *LV = MFAM.getCachedResult<LiveVariablesAnalysis>(IR&: MF); |
| 782 | auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(IR&: MF); |
| 783 | MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI, |
| 784 | MBFI, MBPI, AA); |
| 785 | bool Changed = Impl.run(MF); |
| 786 | if (!Changed) |
| 787 | return PreservedAnalyses::all(); |
| 788 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
| 789 | PA.preserve<MachineCycleAnalysis>(); |
| 790 | PA.preserve<MachineLoopAnalysis>(); |
| 791 | if (UseBlockFreqInfo) |
| 792 | PA.preserve<MachineBlockFrequencyAnalysis>(); |
| 793 | return PA; |
| 794 | } |
| 795 | |
| 796 | void MachineSinkingPass::printPipeline( |
| 797 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
| 798 | OS << MapClassName2PassName(name()); // ideally machine-sink |
| 799 | if (EnableSinkAndFold) |
| 800 | OS << "<enable-sink-fold>" ; |
| 801 | } |
| 802 | |
| 803 | bool MachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 804 | if (skipFunction(F: MF.getFunction())) |
| 805 | return false; |
| 806 | |
| 807 | TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); |
| 808 | bool EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); |
| 809 | |
| 810 | auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
| 811 | auto *PDT = |
| 812 | &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); |
| 813 | auto *CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); |
| 814 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
| 815 | auto *MBFI = |
| 816 | UseBlockFreqInfo |
| 817 | ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI() |
| 818 | : nullptr; |
| 819 | auto *MBPI = |
| 820 | &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
| 821 | auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
| 822 | // Get analyses for split critical edge. |
| 823 | auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); |
| 824 | auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; |
| 825 | auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); |
| 826 | auto *SI = SIWrapper ? &SIWrapper->getSI() : nullptr; |
| 827 | auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>(); |
| 828 | auto *LV = LVWrapper ? &LVWrapper->getLV() : nullptr; |
| 829 | auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); |
| 830 | auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; |
| 831 | |
| 832 | MachineSinking Impl(EnableSinkAndFold, DT, PDT, LV, MLI, SI, LIS, CI, PSI, |
| 833 | MBFI, MBPI, AA); |
| 834 | return Impl.run(MF); |
| 835 | } |
| 836 | |
| 837 | bool MachineSinking::run(MachineFunction &MF) { |
| 838 | LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n" ); |
| 839 | |
| 840 | STI = &MF.getSubtarget(); |
| 841 | TII = STI->getInstrInfo(); |
| 842 | TRI = STI->getRegisterInfo(); |
| 843 | MRI = &MF.getRegInfo(); |
| 844 | |
| 845 | RegClassInfo.runOnMachineFunction(MF); |
| 846 | |
| 847 | bool EverMadeChange = false; |
| 848 | |
| 849 | while (true) { |
| 850 | bool MadeChange = false; |
| 851 | |
| 852 | // Process all basic blocks. |
| 853 | CEBCandidates.clear(); |
| 854 | CEMergeCandidates.clear(); |
| 855 | ToSplit.clear(); |
| 856 | for (auto &MBB : MF) |
| 857 | MadeChange |= ProcessBlock(MBB); |
| 858 | |
| 859 | // If we have anything we marked as toSplit, split it now. |
| 860 | MachineDomTreeUpdater MDTU(DT, PDT, |
| 861 | MachineDomTreeUpdater::UpdateStrategy::Lazy); |
| 862 | for (const auto &Pair : ToSplit) { |
| 863 | auto NewSucc = Pair.first->SplitCriticalEdge( |
| 864 | Succ: Pair.second, Analyses: {.LIS: LIS, .SI: SI, .LV: LV, .MLI: MLI}, LiveInSets: nullptr, MDTU: &MDTU); |
| 865 | if (NewSucc != nullptr) { |
| 866 | LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " |
| 867 | << printMBBReference(*Pair.first) << " -- " |
| 868 | << printMBBReference(*NewSucc) << " -- " |
| 869 | << printMBBReference(*Pair.second) << '\n'); |
| 870 | if (MBFI) |
| 871 | MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI); |
| 872 | |
| 873 | MadeChange = true; |
| 874 | ++NumSplit; |
| 875 | CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc); |
| 876 | } else |
| 877 | LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n" ); |
| 878 | } |
| 879 | // If this iteration over the code changed anything, keep iterating. |
| 880 | if (!MadeChange) |
| 881 | break; |
| 882 | EverMadeChange = true; |
| 883 | } |
| 884 | |
| 885 | if (SinkInstsIntoCycle) { |
| 886 | SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles()); |
| 887 | SchedModel.init(TSInfo: STI); |
| 888 | bool HasHighPressure; |
| 889 | |
| 890 | DenseMap<SinkItem, MachineInstr *> SunkInstrs; |
| 891 | |
| 892 | enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END }; |
| 893 | for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; |
| 894 | ++Stage, SunkInstrs.clear()) { |
| 895 | HasHighPressure = false; |
| 896 | |
| 897 | for (auto *Cycle : Cycles) { |
| 898 | MachineBasicBlock * = Cycle->getCyclePreheader(); |
| 899 | if (!Preheader) { |
| 900 | LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n" ); |
| 901 | continue; |
| 902 | } |
| 903 | SmallVector<MachineInstr *, 8> Candidates; |
| 904 | FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates); |
| 905 | |
| 906 | unsigned i = 0; |
| 907 | |
| 908 | // Walk the candidates in reverse order so that we start with the use |
| 909 | // of a def-use chain, if there is any. |
| 910 | // TODO: Sort the candidates using a cost-model. |
| 911 | for (MachineInstr *I : llvm::reverse(C&: Candidates)) { |
| 912 | // CycleSinkStage::COPY: Sink a limited number of copies |
| 913 | if (Stage == CycleSinkStage::COPY) { |
| 914 | if (i++ == SinkIntoCycleLimit) { |
| 915 | LLVM_DEBUG(dbgs() |
| 916 | << "CycleSink: Limit reached of instructions to " |
| 917 | "be analyzed." ); |
| 918 | break; |
| 919 | } |
| 920 | |
| 921 | if (!I->isCopy()) |
| 922 | continue; |
| 923 | } |
| 924 | |
| 925 | // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions |
| 926 | // which the target specifies as low-latency |
| 927 | if (Stage == CycleSinkStage::LOW_LATENCY && |
| 928 | !TII->hasLowDefLatency(SchedModel, DefMI: *I, DefIdx: 0)) |
| 929 | continue; |
| 930 | |
| 931 | if (!aggressivelySinkIntoCycle(Cycle, I&: *I, SunkInstrs)) |
| 932 | continue; |
| 933 | EverMadeChange = true; |
| 934 | ++NumCycleSunk; |
| 935 | } |
| 936 | |
| 937 | // Recalculate the pressure after sinking |
| 938 | if (!HasHighPressure) |
| 939 | HasHighPressure = registerPressureExceedsLimit(MBB: *Preheader); |
| 940 | } |
| 941 | if (!HasHighPressure) |
| 942 | break; |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | HasStoreCache.clear(); |
| 947 | StoreInstrCache.clear(); |
| 948 | |
| 949 | // Now clear any kill flags for recorded registers. |
| 950 | for (auto I : RegsToClearKillFlags) |
| 951 | MRI->clearKillFlags(Reg: I); |
| 952 | RegsToClearKillFlags.clear(); |
| 953 | |
| 954 | releaseMemory(); |
| 955 | return EverMadeChange; |
| 956 | } |
| 957 | |
| 958 | bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { |
| 959 | if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty()) |
| 960 | return false; |
| 961 | |
| 962 | // Don't bother sinking code out of unreachable blocks. In addition to being |
| 963 | // unprofitable, it can also lead to infinite looping, because in an |
| 964 | // unreachable cycle there may be nowhere to stop. |
| 965 | if (!DT->isReachableFromEntry(A: &MBB)) |
| 966 | return false; |
| 967 | |
| 968 | bool MadeChange = false; |
| 969 | |
| 970 | // Cache all successors, sorted by frequency info and cycle depth. |
| 971 | AllSuccsCache AllSuccessors; |
| 972 | |
| 973 | // Walk the basic block bottom-up. Remember if we saw a store. |
| 974 | MachineBasicBlock::iterator I = MBB.end(); |
| 975 | --I; |
| 976 | bool ProcessedBegin, SawStore = false; |
| 977 | do { |
| 978 | MachineInstr &MI = *I; // The instruction to sink. |
| 979 | |
| 980 | // Predecrement I (if it's not begin) so that it isn't invalidated by |
| 981 | // sinking. |
| 982 | ProcessedBegin = I == MBB.begin(); |
| 983 | if (!ProcessedBegin) |
| 984 | --I; |
| 985 | |
| 986 | if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) { |
| 987 | if (MI.isDebugValue()) |
| 988 | ProcessDbgInst(MI); |
| 989 | continue; |
| 990 | } |
| 991 | |
| 992 | if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) { |
| 993 | MadeChange = true; |
| 994 | continue; |
| 995 | } |
| 996 | |
| 997 | // Can't sink anything out of a block that has less than two successors. |
| 998 | if (MBB.succ_size() <= 1) |
| 999 | continue; |
| 1000 | |
| 1001 | if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) { |
| 1002 | MadeChange = true; |
| 1003 | continue; |
| 1004 | } |
| 1005 | |
| 1006 | if (SinkInstruction(MI, SawStore, AllSuccessors)) { |
| 1007 | ++NumSunk; |
| 1008 | MadeChange = true; |
| 1009 | } |
| 1010 | |
| 1011 | // If we just processed the first instruction in the block, we're done. |
| 1012 | } while (!ProcessedBegin); |
| 1013 | |
| 1014 | SeenDbgUsers.clear(); |
| 1015 | SeenDbgVars.clear(); |
| 1016 | // recalculate the bb register pressure after sinking one BB. |
| 1017 | CachedRegisterPressure.clear(); |
| 1018 | return MadeChange; |
| 1019 | } |
| 1020 | |
| 1021 | void MachineSinking::ProcessDbgInst(MachineInstr &MI) { |
| 1022 | // When we see DBG_VALUEs for registers, record any vreg it reads, so that |
| 1023 | // we know what to sink if the vreg def sinks. |
| 1024 | assert(MI.isDebugValue() && "Expected DBG_VALUE for processing" ); |
| 1025 | |
| 1026 | DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), |
| 1027 | MI.getDebugLoc()->getInlinedAt()); |
| 1028 | bool SeenBefore = SeenDbgVars.contains(V: Var); |
| 1029 | |
| 1030 | for (MachineOperand &MO : MI.debug_operands()) { |
| 1031 | if (MO.isReg() && MO.getReg().isVirtual()) |
| 1032 | SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore)); |
| 1033 | } |
| 1034 | |
| 1035 | // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. |
| 1036 | SeenDbgVars.insert(V: Var); |
| 1037 | } |
| 1038 | |
| 1039 | bool MachineSinking::isWorthBreakingCriticalEdge( |
| 1040 | MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, |
| 1041 | MachineBasicBlock *&DeferredFromBlock) { |
| 1042 | // FIXME: Need much better heuristics. |
| 1043 | |
| 1044 | // If the pass has already considered breaking this edge (during this pass |
| 1045 | // through the function), then let's go ahead and break it. This means |
| 1046 | // sinking multiple "cheap" instructions into the same block. |
| 1047 | if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second) |
| 1048 | return true; |
| 1049 | |
| 1050 | if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) |
| 1051 | return true; |
| 1052 | |
| 1053 | // Check and record the register and the destination block we want to sink |
| 1054 | // into. Note that we want to do the following before the next check on branch |
| 1055 | // probability. Because we want to record the initial candidate even if it's |
| 1056 | // on hot edge, so that other candidates that might not on hot edges can be |
| 1057 | // sinked as well. |
| 1058 | for (const auto &MO : MI.all_defs()) { |
| 1059 | Register Reg = MO.getReg(); |
| 1060 | if (!Reg) |
| 1061 | continue; |
| 1062 | Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg; |
| 1063 | auto Key = std::make_pair(x&: SrcReg, y&: To); |
| 1064 | auto Res = CEMergeCandidates.try_emplace(Key, Args&: From); |
| 1065 | // We wanted to sink the same register into the same block, consider it to |
| 1066 | // be profitable. |
| 1067 | if (!Res.second) { |
| 1068 | // Return the source block that was previously held off. |
| 1069 | DeferredFromBlock = Res.first->second; |
| 1070 | return true; |
| 1071 | } |
| 1072 | } |
| 1073 | |
| 1074 | if (From->isSuccessor(MBB: To) && |
| 1075 | MBPI->getEdgeProbability(Src: From, Dst: To) <= |
| 1076 | BranchProbability(SplitEdgeProbabilityThreshold, 100)) |
| 1077 | return true; |
| 1078 | |
| 1079 | // MI is cheap, we probably don't want to break the critical edge for it. |
| 1080 | // However, if this would allow some definitions of its source operands |
| 1081 | // to be sunk then it's probably worth it. |
| 1082 | for (const MachineOperand &MO : MI.all_uses()) { |
| 1083 | Register Reg = MO.getReg(); |
| 1084 | if (Reg == 0) |
| 1085 | continue; |
| 1086 | |
| 1087 | // We don't move live definitions of physical registers, |
| 1088 | // so sinking their uses won't enable any opportunities. |
| 1089 | if (Reg.isPhysical()) |
| 1090 | continue; |
| 1091 | |
| 1092 | // If this instruction is the only user of a virtual register, |
| 1093 | // check if breaking the edge will enable sinking |
| 1094 | // both this instruction and the defining instruction. |
| 1095 | if (MRI->hasOneNonDBGUse(RegNo: Reg)) { |
| 1096 | // If the definition resides in same MBB, |
| 1097 | // claim it's likely we can sink these together. |
| 1098 | // If definition resides elsewhere, we aren't |
| 1099 | // blocking it from being sunk so don't break the edge. |
| 1100 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
| 1101 | if (DefMI->getParent() == MI.getParent()) |
| 1102 | return true; |
| 1103 | } |
| 1104 | } |
| 1105 | |
| 1106 | // Let the target decide if it's worth breaking this |
| 1107 | // critical edge for a "cheap" instruction. |
| 1108 | return TII->shouldBreakCriticalEdgeToSink(MI); |
| 1109 | } |
| 1110 | |
| 1111 | bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, |
| 1112 | MachineBasicBlock *FromBB, |
| 1113 | MachineBasicBlock *ToBB, |
| 1114 | bool BreakPHIEdge) { |
| 1115 | // Avoid breaking back edge. From == To means backedge for single BB cycle. |
| 1116 | if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB)) |
| 1117 | return false; |
| 1118 | |
| 1119 | MachineCycle *FromCycle = CI->getCycle(Block: FromBB); |
| 1120 | MachineCycle *ToCycle = CI->getCycle(Block: ToBB); |
| 1121 | |
| 1122 | // Check for backedges of more "complex" cycles. |
| 1123 | if (FromCycle == ToCycle && FromCycle && |
| 1124 | (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) |
| 1125 | return false; |
| 1126 | |
| 1127 | // It's not always legal to break critical edges and sink the computation |
| 1128 | // to the edge. |
| 1129 | // |
| 1130 | // %bb.1: |
| 1131 | // v1024 |
| 1132 | // Beq %bb.3 |
| 1133 | // <fallthrough> |
| 1134 | // %bb.2: |
| 1135 | // ... no uses of v1024 |
| 1136 | // <fallthrough> |
| 1137 | // %bb.3: |
| 1138 | // ... |
| 1139 | // = v1024 |
| 1140 | // |
| 1141 | // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: |
| 1142 | // |
| 1143 | // %bb.1: |
| 1144 | // ... |
| 1145 | // Bne %bb.2 |
| 1146 | // %bb.4: |
| 1147 | // v1024 = |
| 1148 | // B %bb.3 |
| 1149 | // %bb.2: |
| 1150 | // ... no uses of v1024 |
| 1151 | // <fallthrough> |
| 1152 | // %bb.3: |
| 1153 | // ... |
| 1154 | // = v1024 |
| 1155 | // |
| 1156 | // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 |
| 1157 | // flow. We need to ensure the new basic block where the computation is |
| 1158 | // sunk to dominates all the uses. |
| 1159 | // It's only legal to break critical edge and sink the computation to the |
| 1160 | // new block if all the predecessors of "To", except for "From", are |
| 1161 | // not dominated by "From". Given SSA property, this means these |
| 1162 | // predecessors are dominated by "To". |
| 1163 | // |
| 1164 | // There is no need to do this check if all the uses are PHI nodes. PHI |
| 1165 | // sources are only defined on the specific predecessor edges. |
| 1166 | if (!BreakPHIEdge) { |
| 1167 | for (MachineBasicBlock *Pred : ToBB->predecessors()) |
| 1168 | if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred)) |
| 1169 | return false; |
| 1170 | } |
| 1171 | |
| 1172 | return true; |
| 1173 | } |
| 1174 | |
| 1175 | bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, |
| 1176 | MachineBasicBlock *FromBB, |
| 1177 | MachineBasicBlock *ToBB, |
| 1178 | bool BreakPHIEdge) { |
| 1179 | bool Status = false; |
| 1180 | MachineBasicBlock *DeferredFromBB = nullptr; |
| 1181 | if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) { |
| 1182 | // If there is a DeferredFromBB, we consider FromBB only if _both_ |
| 1183 | // of them are legal to split. |
| 1184 | if ((!DeferredFromBB || |
| 1185 | ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) || |
| 1186 | isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) && |
| 1187 | isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) { |
| 1188 | ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB)); |
| 1189 | if (DeferredFromBB) |
| 1190 | ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB)); |
| 1191 | Status = true; |
| 1192 | } |
| 1193 | } |
| 1194 | |
| 1195 | return Status; |
| 1196 | } |
| 1197 | |
| 1198 | std::vector<unsigned> & |
| 1199 | MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB, |
| 1200 | bool UseCache) { |
| 1201 | // Currently to save compiling time, MBB's register pressure will not change |
| 1202 | // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's |
| 1203 | // register pressure is changed after sinking any instructions into it. |
| 1204 | // FIXME: need a accurate and cheap register pressure estiminate model here. |
| 1205 | |
| 1206 | auto RP = CachedRegisterPressure.find(Val: &MBB); |
| 1207 | if (UseCache && RP != CachedRegisterPressure.end()) |
| 1208 | return RP->second; |
| 1209 | |
| 1210 | RegionPressure Pressure; |
| 1211 | RegPressureTracker RPTracker(Pressure); |
| 1212 | |
| 1213 | // Initialize the register pressure tracker. |
| 1214 | RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(), |
| 1215 | /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); |
| 1216 | |
| 1217 | for (MachineBasicBlock::const_iterator MII = MBB.instr_end(), |
| 1218 | MIE = MBB.instr_begin(); |
| 1219 | MII != MIE; --MII) { |
| 1220 | const MachineInstr &MI = *std::prev(x: MII); |
| 1221 | if (MI.isDebugOrPseudoInstr()) |
| 1222 | continue; |
| 1223 | RegisterOperands RegOpers; |
| 1224 | RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false); |
| 1225 | RPTracker.recedeSkipDebugValues(); |
| 1226 | assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!" ); |
| 1227 | RPTracker.recede(RegOpers); |
| 1228 | } |
| 1229 | |
| 1230 | RPTracker.closeRegion(); |
| 1231 | |
| 1232 | if (RP != CachedRegisterPressure.end()) { |
| 1233 | CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure; |
| 1234 | return CachedRegisterPressure[&MBB]; |
| 1235 | } |
| 1236 | |
| 1237 | auto It = CachedRegisterPressure.insert( |
| 1238 | KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure)); |
| 1239 | return It.first->second; |
| 1240 | } |
| 1241 | |
| 1242 | bool MachineSinking::registerPressureSetExceedsLimit( |
| 1243 | unsigned NRegs, const TargetRegisterClass *RC, |
| 1244 | const MachineBasicBlock &MBB) { |
| 1245 | unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight; |
| 1246 | const int *PS = TRI->getRegClassPressureSets(RC); |
| 1247 | std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB); |
| 1248 | for (; *PS != -1; PS++) |
| 1249 | if (Weight + BBRegisterPressure[*PS] >= |
| 1250 | RegClassInfo.getRegPressureSetLimit(Idx: *PS)) |
| 1251 | return true; |
| 1252 | return false; |
| 1253 | } |
| 1254 | |
| 1255 | // Recalculate RP and check if any pressure set exceeds the set limit. |
| 1256 | bool MachineSinking::registerPressureExceedsLimit( |
| 1257 | const MachineBasicBlock &MBB) { |
| 1258 | std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, UseCache: false); |
| 1259 | |
| 1260 | for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) { |
| 1261 | if (BBRegisterPressure[PS] >= |
| 1262 | TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: PS)) { |
| 1263 | return true; |
| 1264 | } |
| 1265 | } |
| 1266 | |
| 1267 | return false; |
| 1268 | } |
| 1269 | |
| 1270 | /// isProfitableToSinkTo - Return true if it is profitable to sink MI. |
| 1271 | bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, |
| 1272 | MachineBasicBlock *MBB, |
| 1273 | MachineBasicBlock *SuccToSinkTo, |
| 1274 | AllSuccsCache &AllSuccessors) { |
| 1275 | assert(SuccToSinkTo && "Invalid SinkTo Candidate BB" ); |
| 1276 | |
| 1277 | if (MBB == SuccToSinkTo) |
| 1278 | return false; |
| 1279 | |
| 1280 | // It is profitable if SuccToSinkTo does not post dominate current block. |
| 1281 | if (!PDT->dominates(A: SuccToSinkTo, B: MBB)) |
| 1282 | return true; |
| 1283 | |
| 1284 | // It is profitable to sink an instruction from a deeper cycle to a shallower |
| 1285 | // cycle, even if the latter post-dominates the former (PR21115). |
| 1286 | if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo)) |
| 1287 | return true; |
| 1288 | |
| 1289 | // Check if only use in post dominated block is PHI instruction. |
| 1290 | bool NonPHIUse = false; |
| 1291 | for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { |
| 1292 | MachineBasicBlock *UseBlock = UseInst.getParent(); |
| 1293 | if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) |
| 1294 | NonPHIUse = true; |
| 1295 | } |
| 1296 | if (!NonPHIUse) |
| 1297 | return true; |
| 1298 | |
| 1299 | // If SuccToSinkTo post dominates then also it may be profitable if MI |
| 1300 | // can further profitably sinked into another block in next round. |
| 1301 | bool BreakPHIEdge = false; |
| 1302 | // FIXME - If finding successor is compile time expensive then cache results. |
| 1303 | if (MachineBasicBlock *MBB2 = |
| 1304 | FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors)) |
| 1305 | return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors); |
| 1306 | |
| 1307 | MachineCycle *MCycle = CI->getCycle(Block: MBB); |
| 1308 | |
| 1309 | // If the instruction is not inside a cycle, it is not profitable to sink MI |
| 1310 | // to a post dominate block SuccToSinkTo. |
| 1311 | if (!MCycle) |
| 1312 | return false; |
| 1313 | |
| 1314 | // If this instruction is inside a Cycle and sinking this instruction can make |
| 1315 | // more registers live range shorten, it is still prifitable. |
| 1316 | for (const MachineOperand &MO : MI.operands()) { |
| 1317 | // Ignore non-register operands. |
| 1318 | if (!MO.isReg()) |
| 1319 | continue; |
| 1320 | Register Reg = MO.getReg(); |
| 1321 | if (Reg == 0) |
| 1322 | continue; |
| 1323 | |
| 1324 | if (Reg.isPhysical()) { |
| 1325 | // Don't handle non-constant and non-ignorable physical register uses. |
| 1326 | if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) && |
| 1327 | !TII->isIgnorableUse(MO)) |
| 1328 | return false; |
| 1329 | continue; |
| 1330 | } |
| 1331 | |
| 1332 | // Users for the defs are all dominated by SuccToSinkTo. |
| 1333 | if (MO.isDef()) { |
| 1334 | // This def register's live range is shortened after sinking. |
| 1335 | bool LocalUse = false; |
| 1336 | if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge, |
| 1337 | LocalUse)) |
| 1338 | return false; |
| 1339 | } else { |
| 1340 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
| 1341 | if (!DefMI) |
| 1342 | continue; |
| 1343 | MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent()); |
| 1344 | // DefMI is defined outside of cycle. There should be no live range |
| 1345 | // impact for this operand. Defination outside of cycle means: |
| 1346 | // 1: defination is outside of cycle. |
| 1347 | // 2: defination is in this cycle, but it is a PHI in the cycle header. |
| 1348 | if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && |
| 1349 | Cycle->getHeader() == DefMI->getParent())) |
| 1350 | continue; |
| 1351 | // The DefMI is defined inside the cycle. |
| 1352 | // If sinking this operand makes some register pressure set exceed limit, |
| 1353 | // it is not profitable. |
| 1354 | if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg), |
| 1355 | MBB: *SuccToSinkTo)) { |
| 1356 | LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable." ); |
| 1357 | return false; |
| 1358 | } |
| 1359 | } |
| 1360 | } |
| 1361 | |
| 1362 | // If MI is in cycle and all its operands are alive across the whole cycle or |
| 1363 | // if no operand sinking make register pressure set exceed limit, it is |
| 1364 | // profitable to sink MI. |
| 1365 | return true; |
| 1366 | } |
| 1367 | |
| 1368 | /// Get the sorted sequence of successors for this MachineBasicBlock, possibly |
| 1369 | /// computing it if it was not already cached. |
| 1370 | SmallVector<MachineBasicBlock *, 4> & |
| 1371 | MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, |
| 1372 | AllSuccsCache &AllSuccessors) const { |
| 1373 | // Do we have the sorted successors in cache ? |
| 1374 | auto Succs = AllSuccessors.find(Val: MBB); |
| 1375 | if (Succs != AllSuccessors.end()) |
| 1376 | return Succs->second; |
| 1377 | |
| 1378 | SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors()); |
| 1379 | |
| 1380 | // Handle cases where sinking can happen but where the sink point isn't a |
| 1381 | // successor. For example: |
| 1382 | // |
| 1383 | // x = computation |
| 1384 | // if () {} else {} |
| 1385 | // use x |
| 1386 | // |
| 1387 | for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) { |
| 1388 | // DomTree children of MBB that have MBB as immediate dominator are added. |
| 1389 | if (DTChild->getIDom()->getBlock() == MI.getParent() && |
| 1390 | // Skip MBBs already added to the AllSuccs vector above. |
| 1391 | !MBB->isSuccessor(MBB: DTChild->getBlock())) |
| 1392 | AllSuccs.push_back(Elt: DTChild->getBlock()); |
| 1393 | } |
| 1394 | |
| 1395 | // Sort Successors according to their cycle depth or block frequency info. |
| 1396 | llvm::stable_sort( |
| 1397 | Range&: AllSuccs, C: [&](const MachineBasicBlock *L, const MachineBasicBlock *R) { |
| 1398 | uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0; |
| 1399 | uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0; |
| 1400 | if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || |
| 1401 | (!LHSFreq && !RHSFreq)) |
| 1402 | return CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R); |
| 1403 | return LHSFreq < RHSFreq; |
| 1404 | }); |
| 1405 | |
| 1406 | auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs)); |
| 1407 | |
| 1408 | return it.first->second; |
| 1409 | } |
| 1410 | |
| 1411 | /// FindSuccToSinkTo - Find a successor to sink this instruction to. |
| 1412 | MachineBasicBlock * |
| 1413 | MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, |
| 1414 | bool &BreakPHIEdge, |
| 1415 | AllSuccsCache &AllSuccessors) { |
| 1416 | assert(MBB && "Invalid MachineBasicBlock!" ); |
| 1417 | |
| 1418 | // loop over all the operands of the specified instruction. If there is |
| 1419 | // anything we can't handle, bail out. |
| 1420 | |
| 1421 | // SuccToSinkTo - This is the successor to sink this instruction to, once we |
| 1422 | // decide. |
| 1423 | MachineBasicBlock *SuccToSinkTo = nullptr; |
| 1424 | for (const MachineOperand &MO : MI.operands()) { |
| 1425 | if (!MO.isReg()) |
| 1426 | continue; // Ignore non-register operands. |
| 1427 | |
| 1428 | Register Reg = MO.getReg(); |
| 1429 | if (Reg == 0) |
| 1430 | continue; |
| 1431 | |
| 1432 | if (Reg.isPhysical()) { |
| 1433 | if (MO.isUse()) { |
| 1434 | // If the physreg has no defs anywhere, it's just an ambient register |
| 1435 | // and we can freely move its uses. Alternatively, if it's allocatable, |
| 1436 | // it could get allocated to something with a def during allocation. |
| 1437 | if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO)) |
| 1438 | return nullptr; |
| 1439 | } else if (!MO.isDead()) { |
| 1440 | // A def that isn't dead. We can't move it. |
| 1441 | return nullptr; |
| 1442 | } |
| 1443 | } else { |
| 1444 | // Virtual register uses are always safe to sink. |
| 1445 | if (MO.isUse()) |
| 1446 | continue; |
| 1447 | |
| 1448 | // If it's not safe to move defs of the register class, then abort. |
| 1449 | if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg))) |
| 1450 | return nullptr; |
| 1451 | |
| 1452 | // Virtual register defs can only be sunk if all their uses are in blocks |
| 1453 | // dominated by one of the successors. |
| 1454 | if (SuccToSinkTo) { |
| 1455 | // If a previous operand picked a block to sink to, then this operand |
| 1456 | // must be sinkable to the same block. |
| 1457 | bool LocalUse = false; |
| 1458 | if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge, |
| 1459 | LocalUse)) |
| 1460 | return nullptr; |
| 1461 | |
| 1462 | continue; |
| 1463 | } |
| 1464 | |
| 1465 | // Otherwise, we should look at all the successors and decide which one |
| 1466 | // we should sink to. If we have reliable block frequency information |
| 1467 | // (frequency != 0) available, give successors with smaller frequencies |
| 1468 | // higher priority, otherwise prioritize smaller cycle depths. |
| 1469 | for (MachineBasicBlock *SuccBlock : |
| 1470 | GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { |
| 1471 | bool LocalUse = false; |
| 1472 | if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB, BreakPHIEdge, |
| 1473 | LocalUse)) { |
| 1474 | SuccToSinkTo = SuccBlock; |
| 1475 | break; |
| 1476 | } |
| 1477 | if (LocalUse) |
| 1478 | // Def is used locally, it's never safe to move this def. |
| 1479 | return nullptr; |
| 1480 | } |
| 1481 | |
| 1482 | // If we couldn't find a block to sink to, ignore this instruction. |
| 1483 | if (!SuccToSinkTo) |
| 1484 | return nullptr; |
| 1485 | if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) |
| 1486 | return nullptr; |
| 1487 | } |
| 1488 | } |
| 1489 | |
| 1490 | // It is not possible to sink an instruction into its own block. This can |
| 1491 | // happen with cycles. |
| 1492 | if (MBB == SuccToSinkTo) |
| 1493 | return nullptr; |
| 1494 | |
| 1495 | // It's not safe to sink instructions to EH landing pad. Control flow into |
| 1496 | // landing pad is implicitly defined. |
| 1497 | if (SuccToSinkTo && SuccToSinkTo->isEHPad()) |
| 1498 | return nullptr; |
| 1499 | |
| 1500 | // It ought to be okay to sink instructions into an INLINEASM_BR target, but |
| 1501 | // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in |
| 1502 | // the source block (which this code does not yet do). So for now, forbid |
| 1503 | // doing so. |
| 1504 | if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) |
| 1505 | return nullptr; |
| 1506 | |
| 1507 | if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI)) |
| 1508 | return nullptr; |
| 1509 | |
| 1510 | return SuccToSinkTo; |
| 1511 | } |
| 1512 | |
| 1513 | /// Return true if MI is likely to be usable as a memory operation by the |
| 1514 | /// implicit null check optimization. |
| 1515 | /// |
| 1516 | /// This is a "best effort" heuristic, and should not be relied upon for |
| 1517 | /// correctness. This returning true does not guarantee that the implicit null |
| 1518 | /// check optimization is legal over MI, and this returning false does not |
| 1519 | /// guarantee MI cannot possibly be used to do a null check. |
| 1520 | static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, |
| 1521 | const TargetInstrInfo *TII, |
| 1522 | const TargetRegisterInfo *TRI) { |
| 1523 | using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; |
| 1524 | |
| 1525 | auto *MBB = MI.getParent(); |
| 1526 | if (MBB->pred_size() != 1) |
| 1527 | return false; |
| 1528 | |
| 1529 | auto *PredMBB = *MBB->pred_begin(); |
| 1530 | auto *PredBB = PredMBB->getBasicBlock(); |
| 1531 | |
| 1532 | // Frontends that don't use implicit null checks have no reason to emit |
| 1533 | // branches with make.implicit metadata, and this function should always |
| 1534 | // return false for them. |
| 1535 | if (!PredBB || |
| 1536 | !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit)) |
| 1537 | return false; |
| 1538 | |
| 1539 | const MachineOperand *BaseOp; |
| 1540 | int64_t Offset; |
| 1541 | bool OffsetIsScalable; |
| 1542 | if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) |
| 1543 | return false; |
| 1544 | |
| 1545 | if (!BaseOp->isReg()) |
| 1546 | return false; |
| 1547 | |
| 1548 | if (!(MI.mayLoad() && !MI.isPredicable())) |
| 1549 | return false; |
| 1550 | |
| 1551 | MachineBranchPredicate MBP; |
| 1552 | if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false)) |
| 1553 | return false; |
| 1554 | |
| 1555 | return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && |
| 1556 | (MBP.Predicate == MachineBranchPredicate::PRED_NE || |
| 1557 | MBP.Predicate == MachineBranchPredicate::PRED_EQ) && |
| 1558 | MBP.LHS.getReg() == BaseOp->getReg(); |
| 1559 | } |
| 1560 | |
| 1561 | /// If the sunk instruction is a copy, try to forward the copy instead of |
| 1562 | /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if |
| 1563 | /// there's any subregister weirdness involved. Returns true if copy |
| 1564 | /// propagation occurred. |
| 1565 | static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, |
| 1566 | Register Reg) { |
| 1567 | const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); |
| 1568 | const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); |
| 1569 | |
| 1570 | // Copy DBG_VALUE operand and set the original to undef. We then check to |
| 1571 | // see whether this is something that can be copy-forwarded. If it isn't, |
| 1572 | // continue around the loop. |
| 1573 | |
| 1574 | const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; |
| 1575 | auto CopyOperands = TII.isCopyInstr(MI: SinkInst); |
| 1576 | if (!CopyOperands) |
| 1577 | return false; |
| 1578 | SrcMO = CopyOperands->Source; |
| 1579 | DstMO = CopyOperands->Destination; |
| 1580 | |
| 1581 | // Check validity of forwarding this copy. |
| 1582 | bool PostRA = MRI.getNumVirtRegs() == 0; |
| 1583 | |
| 1584 | // Trying to forward between physical and virtual registers is too hard. |
| 1585 | if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) |
| 1586 | return false; |
| 1587 | |
| 1588 | // Only try virtual register copy-forwarding before regalloc, and physical |
| 1589 | // register copy-forwarding after regalloc. |
| 1590 | bool arePhysRegs = !Reg.isVirtual(); |
| 1591 | if (arePhysRegs != PostRA) |
| 1592 | return false; |
| 1593 | |
| 1594 | // Pre-regalloc, only forward if all subregisters agree (or there are no |
| 1595 | // subregs at all). More analysis might recover some forwardable copies. |
| 1596 | if (!PostRA) |
| 1597 | for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) |
| 1598 | if (DbgMO.getSubReg() != SrcMO->getSubReg() || |
| 1599 | DbgMO.getSubReg() != DstMO->getSubReg()) |
| 1600 | return false; |
| 1601 | |
| 1602 | // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register |
| 1603 | // of this copy. Only forward the copy if the DBG_VALUE operand exactly |
| 1604 | // matches the copy destination. |
| 1605 | if (PostRA && Reg != DstMO->getReg()) |
| 1606 | return false; |
| 1607 | |
| 1608 | for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { |
| 1609 | DbgMO.setReg(SrcMO->getReg()); |
| 1610 | DbgMO.setSubReg(SrcMO->getSubReg()); |
| 1611 | } |
| 1612 | return true; |
| 1613 | } |
| 1614 | |
| 1615 | using MIRegs = std::pair<MachineInstr *, SmallVector<Register, 2>>; |
| 1616 | /// Sink an instruction and its associated debug instructions. |
| 1617 | static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, |
| 1618 | MachineBasicBlock::iterator InsertPos, |
| 1619 | ArrayRef<MIRegs> DbgValuesToSink) { |
| 1620 | // If we cannot find a location to use (merge with), then we erase the debug |
| 1621 | // location to prevent debug-info driven tools from potentially reporting |
| 1622 | // wrong location information. |
| 1623 | if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) |
| 1624 | MI.setDebugLoc(DebugLoc::getMergedLocation(LocA: MI.getDebugLoc(), |
| 1625 | LocB: InsertPos->getDebugLoc())); |
| 1626 | else |
| 1627 | MI.setDebugLoc(DebugLoc()); |
| 1628 | |
| 1629 | // Move the instruction. |
| 1630 | MachineBasicBlock *ParentBlock = MI.getParent(); |
| 1631 | SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI, |
| 1632 | To: ++MachineBasicBlock::iterator(MI)); |
| 1633 | |
| 1634 | // Sink a copy of debug users to the insert position. Mark the original |
| 1635 | // DBG_VALUE location as 'undef', indicating that any earlier variable |
| 1636 | // location should be terminated as we've optimised away the value at this |
| 1637 | // point. |
| 1638 | for (const auto &DbgValueToSink : DbgValuesToSink) { |
| 1639 | MachineInstr *DbgMI = DbgValueToSink.first; |
| 1640 | MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI); |
| 1641 | SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI); |
| 1642 | |
| 1643 | bool PropagatedAllSunkOps = true; |
| 1644 | for (Register Reg : DbgValueToSink.second) { |
| 1645 | if (DbgMI->hasDebugOperandForReg(Reg)) { |
| 1646 | if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) { |
| 1647 | PropagatedAllSunkOps = false; |
| 1648 | break; |
| 1649 | } |
| 1650 | } |
| 1651 | } |
| 1652 | if (!PropagatedAllSunkOps) |
| 1653 | DbgMI->setDebugValueUndef(); |
| 1654 | } |
| 1655 | } |
| 1656 | |
| 1657 | /// hasStoreBetween - check if there is store betweeen straight line blocks From |
| 1658 | /// and To. |
| 1659 | bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, |
| 1660 | MachineBasicBlock *To, MachineInstr &MI) { |
| 1661 | // Make sure From and To are in straight line which means From dominates To |
| 1662 | // and To post dominates From. |
| 1663 | if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From)) |
| 1664 | return true; |
| 1665 | |
| 1666 | auto BlockPair = std::make_pair(x&: From, y&: To); |
| 1667 | |
| 1668 | // Does these two blocks pair be queried before and have a definite cached |
| 1669 | // result? |
| 1670 | if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end()) |
| 1671 | return It->second; |
| 1672 | |
| 1673 | if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end()) |
| 1674 | return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) { |
| 1675 | return I->mayAlias(AA, Other: MI, UseTBAA: false); |
| 1676 | }); |
| 1677 | |
| 1678 | bool SawStore = false; |
| 1679 | bool HasAliasedStore = false; |
| 1680 | DenseSet<MachineBasicBlock *> HandledBlocks; |
| 1681 | DenseSet<MachineBasicBlock *> HandledDomBlocks; |
| 1682 | // Go through all reachable blocks from From. |
| 1683 | for (MachineBasicBlock *BB : depth_first(G: From)) { |
| 1684 | // We insert the instruction at the start of block To, so no need to worry |
| 1685 | // about stores inside To. |
| 1686 | // Store in block From should be already considered when just enter function |
| 1687 | // SinkInstruction. |
| 1688 | if (BB == To || BB == From) |
| 1689 | continue; |
| 1690 | |
| 1691 | // We already handle this BB in previous iteration. |
| 1692 | if (HandledBlocks.count(V: BB)) |
| 1693 | continue; |
| 1694 | |
| 1695 | HandledBlocks.insert(V: BB); |
| 1696 | // To post dominates BB, it must be a path from block From. |
| 1697 | if (PDT->dominates(A: To, B: BB)) { |
| 1698 | if (!HandledDomBlocks.count(V: BB)) |
| 1699 | HandledDomBlocks.insert(V: BB); |
| 1700 | |
| 1701 | // If this BB is too big or the block number in straight line between From |
| 1702 | // and To is too big, stop searching to save compiling time. |
| 1703 | if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) || |
| 1704 | HandledDomBlocks.size() > SinkLoadBlocksThreshold) { |
| 1705 | for (auto *DomBB : HandledDomBlocks) { |
| 1706 | if (DomBB != BB && DT->dominates(A: DomBB, B: BB)) |
| 1707 | HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true; |
| 1708 | else if (DomBB != BB && DT->dominates(A: BB, B: DomBB)) |
| 1709 | HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true; |
| 1710 | } |
| 1711 | HasStoreCache[BlockPair] = true; |
| 1712 | return true; |
| 1713 | } |
| 1714 | |
| 1715 | for (MachineInstr &I : *BB) { |
| 1716 | // Treat as alias conservatively for a call or an ordered memory |
| 1717 | // operation. |
| 1718 | if (I.isCall() || I.hasOrderedMemoryRef()) { |
| 1719 | for (auto *DomBB : HandledDomBlocks) { |
| 1720 | if (DomBB != BB && DT->dominates(A: DomBB, B: BB)) |
| 1721 | HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true; |
| 1722 | else if (DomBB != BB && DT->dominates(A: BB, B: DomBB)) |
| 1723 | HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true; |
| 1724 | } |
| 1725 | HasStoreCache[BlockPair] = true; |
| 1726 | return true; |
| 1727 | } |
| 1728 | |
| 1729 | if (I.mayStore()) { |
| 1730 | SawStore = true; |
| 1731 | // We still have chance to sink MI if all stores between are not |
| 1732 | // aliased to MI. |
| 1733 | // Cache all store instructions, so that we don't need to go through |
| 1734 | // all From reachable blocks for next load instruction. |
| 1735 | if (I.mayAlias(AA, Other: MI, UseTBAA: false)) |
| 1736 | HasAliasedStore = true; |
| 1737 | StoreInstrCache[BlockPair].push_back(Elt: &I); |
| 1738 | } |
| 1739 | } |
| 1740 | } |
| 1741 | } |
| 1742 | // If there is no store at all, cache the result. |
| 1743 | if (!SawStore) |
| 1744 | HasStoreCache[BlockPair] = false; |
| 1745 | return HasAliasedStore; |
| 1746 | } |
| 1747 | |
| 1748 | /// Aggressively sink instructions into cycles. This will aggressively try to |
| 1749 | /// sink all instructions in the top-most preheaders in an attempt to reduce RP. |
| 1750 | /// In particular, it will sink into multiple successor blocks without limits |
| 1751 | /// based on the amount of sinking, or the type of ops being sunk (so long as |
| 1752 | /// they are safe to sink). |
| 1753 | bool MachineSinking::aggressivelySinkIntoCycle( |
| 1754 | MachineCycle *Cycle, MachineInstr &I, |
| 1755 | DenseMap<SinkItem, MachineInstr *> &SunkInstrs) { |
| 1756 | // TODO: support instructions with multiple defs |
| 1757 | if (I.getNumDefs() > 1) |
| 1758 | return false; |
| 1759 | |
| 1760 | LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I); |
| 1761 | assert(Cycle->getCyclePreheader() && "Cycle sink needs a preheader block" ); |
| 1762 | SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses; |
| 1763 | |
| 1764 | MachineOperand &DefMO = I.getOperand(i: 0); |
| 1765 | for (MachineInstr &MI : MRI->use_instructions(Reg: DefMO.getReg())) { |
| 1766 | Uses.push_back(Elt: {{DefMO.getReg(), DefMO.getSubReg()}, &MI}); |
| 1767 | } |
| 1768 | |
| 1769 | for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) { |
| 1770 | MachineInstr *MI = Entry.second; |
| 1771 | LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI); |
| 1772 | if (MI->isPHI()) { |
| 1773 | LLVM_DEBUG( |
| 1774 | dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n" ); |
| 1775 | continue; |
| 1776 | } |
| 1777 | // We cannot sink before the prologue |
| 1778 | if (MI->isPosition() || TII->isBasicBlockPrologue(MI: *MI)) { |
| 1779 | LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, " |
| 1780 | "can't sink.\n" ); |
| 1781 | continue; |
| 1782 | } |
| 1783 | if (!Cycle->contains(Block: MI->getParent())) { |
| 1784 | LLVM_DEBUG( |
| 1785 | dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n" ); |
| 1786 | continue; |
| 1787 | } |
| 1788 | |
| 1789 | MachineBasicBlock *SinkBlock = MI->getParent(); |
| 1790 | MachineInstr *NewMI = nullptr; |
| 1791 | SinkItem MapEntry(&I, SinkBlock); |
| 1792 | |
| 1793 | auto SI = SunkInstrs.find(Val: MapEntry); |
| 1794 | |
| 1795 | // Check for the case in which we have already sunk a copy of this |
| 1796 | // instruction into the user block. |
| 1797 | if (SI != SunkInstrs.end()) { |
| 1798 | LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: " |
| 1799 | << printMBBReference(*SinkBlock) << "\n" ); |
| 1800 | NewMI = SI->second; |
| 1801 | } |
| 1802 | |
| 1803 | // Create a copy of the instruction in the use block. |
| 1804 | if (!NewMI) { |
| 1805 | LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: " |
| 1806 | << printMBBReference(*SinkBlock) << "\n" ); |
| 1807 | |
| 1808 | NewMI = I.getMF()->CloneMachineInstr(Orig: &I); |
| 1809 | if (DefMO.getReg().isVirtual()) { |
| 1810 | const TargetRegisterClass *TRC = MRI->getRegClass(Reg: DefMO.getReg()); |
| 1811 | Register DestReg = MRI->createVirtualRegister(RegClass: TRC); |
| 1812 | NewMI->substituteRegister(FromReg: DefMO.getReg(), ToReg: DestReg, SubIdx: DefMO.getSubReg(), |
| 1813 | RegInfo: *TRI); |
| 1814 | } |
| 1815 | SinkBlock->insert(I: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()), |
| 1816 | MI: NewMI); |
| 1817 | SunkInstrs.insert(KV: {MapEntry, NewMI}); |
| 1818 | } |
| 1819 | |
| 1820 | // Conservatively clear any kill flags on uses of sunk instruction |
| 1821 | for (MachineOperand &MO : NewMI->all_uses()) { |
| 1822 | assert(MO.isReg() && MO.isUse()); |
| 1823 | RegsToClearKillFlags.insert(V: MO.getReg()); |
| 1824 | } |
| 1825 | |
| 1826 | // The instruction is moved from its basic block, so do not retain the |
| 1827 | // debug information. |
| 1828 | assert(!NewMI->isDebugInstr() && "Should not sink debug inst" ); |
| 1829 | NewMI->setDebugLoc(DebugLoc()); |
| 1830 | |
| 1831 | // Replace the use with the newly created virtual register. |
| 1832 | RegSubRegPair &UseReg = Entry.first; |
| 1833 | MI->substituteRegister(FromReg: UseReg.Reg, ToReg: NewMI->getOperand(i: 0).getReg(), |
| 1834 | SubIdx: UseReg.SubReg, RegInfo: *TRI); |
| 1835 | } |
| 1836 | // If we have replaced all uses, then delete the dead instruction |
| 1837 | if (I.isDead(MRI: *MRI)) |
| 1838 | I.eraseFromParent(); |
| 1839 | return true; |
| 1840 | } |
| 1841 | |
| 1842 | /// SinkInstruction - Determine whether it is safe to sink the specified machine |
| 1843 | /// instruction out of its current block into a successor. |
| 1844 | bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, |
| 1845 | AllSuccsCache &AllSuccessors) { |
| 1846 | // Don't sink instructions that the target prefers not to sink. |
| 1847 | if (!TII->shouldSink(MI)) |
| 1848 | return false; |
| 1849 | |
| 1850 | // Check if it's safe to move the instruction. |
| 1851 | if (!MI.isSafeToMove(SawStore)) |
| 1852 | return false; |
| 1853 | |
| 1854 | // Convergent operations may not be made control-dependent on additional |
| 1855 | // values. |
| 1856 | if (MI.isConvergent()) |
| 1857 | return false; |
| 1858 | |
| 1859 | // Don't break implicit null checks. This is a performance heuristic, and not |
| 1860 | // required for correctness. |
| 1861 | if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) |
| 1862 | return false; |
| 1863 | |
| 1864 | // FIXME: This should include support for sinking instructions within the |
| 1865 | // block they are currently in to shorten the live ranges. We often get |
| 1866 | // instructions sunk into the top of a large block, but it would be better to |
| 1867 | // also sink them down before their first use in the block. This xform has to |
| 1868 | // be careful not to *increase* register pressure though, e.g. sinking |
| 1869 | // "x = y + z" down if it kills y and z would increase the live ranges of y |
| 1870 | // and z and only shrink the live range of x. |
| 1871 | |
| 1872 | bool BreakPHIEdge = false; |
| 1873 | MachineBasicBlock *ParentBlock = MI.getParent(); |
| 1874 | MachineBasicBlock *SuccToSinkTo = |
| 1875 | FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors); |
| 1876 | |
| 1877 | // If there are no outputs, it must have side-effects. |
| 1878 | if (!SuccToSinkTo) |
| 1879 | return false; |
| 1880 | |
| 1881 | // If the instruction to move defines a dead physical register which is live |
| 1882 | // when leaving the basic block, don't move it because it could turn into a |
| 1883 | // "zombie" define of that preg. E.g., EFLAGS. |
| 1884 | for (const MachineOperand &MO : MI.all_defs()) { |
| 1885 | Register Reg = MO.getReg(); |
| 1886 | if (Reg == 0 || !Reg.isPhysical()) |
| 1887 | continue; |
| 1888 | if (SuccToSinkTo->isLiveIn(Reg)) |
| 1889 | return false; |
| 1890 | } |
| 1891 | |
| 1892 | LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); |
| 1893 | |
| 1894 | // If the block has multiple predecessors, this is a critical edge. |
| 1895 | // Decide if we can sink along it or need to break the edge. |
| 1896 | if (SuccToSinkTo->pred_size() > 1) { |
| 1897 | // We cannot sink a load across a critical edge - there may be stores in |
| 1898 | // other code paths. |
| 1899 | bool TryBreak = false; |
| 1900 | bool Store = |
| 1901 | MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true; |
| 1902 | if (!MI.isSafeToMove(SawStore&: Store)) { |
| 1903 | LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n" ); |
| 1904 | TryBreak = true; |
| 1905 | } |
| 1906 | |
| 1907 | // We don't want to sink across a critical edge if we don't dominate the |
| 1908 | // successor. We could be introducing calculations to new code paths. |
| 1909 | if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) { |
| 1910 | LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n" ); |
| 1911 | TryBreak = true; |
| 1912 | } |
| 1913 | |
| 1914 | // Don't sink instructions into a cycle. |
| 1915 | if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) && |
| 1916 | (!CI->getCycle(Block: SuccToSinkTo)->isReducible() || |
| 1917 | CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) { |
| 1918 | LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n" ); |
| 1919 | TryBreak = true; |
| 1920 | } |
| 1921 | |
| 1922 | // Otherwise we are OK with sinking along a critical edge. |
| 1923 | if (!TryBreak) |
| 1924 | LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n" ); |
| 1925 | else { |
| 1926 | // Mark this edge as to be split. |
| 1927 | // If the edge can actually be split, the next iteration of the main loop |
| 1928 | // will sink MI in the newly created block. |
| 1929 | bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, |
| 1930 | BreakPHIEdge); |
| 1931 | if (!Status) |
| 1932 | LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " |
| 1933 | "break critical edge\n" ); |
| 1934 | // The instruction will not be sunk this time. |
| 1935 | return false; |
| 1936 | } |
| 1937 | } |
| 1938 | |
| 1939 | if (BreakPHIEdge) { |
| 1940 | // BreakPHIEdge is true if all the uses are in the successor MBB being |
| 1941 | // sunken into and they are all PHI nodes. In this case, machine-sink must |
| 1942 | // break the critical edge first. |
| 1943 | bool Status = |
| 1944 | PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge); |
| 1945 | if (!Status) |
| 1946 | LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " |
| 1947 | "break critical edge\n" ); |
| 1948 | // The instruction will not be sunk this time. |
| 1949 | return false; |
| 1950 | } |
| 1951 | |
| 1952 | // Determine where to insert into. Skip phi nodes. |
| 1953 | MachineBasicBlock::iterator InsertPos = |
| 1954 | SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin()); |
| 1955 | if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) { |
| 1956 | LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n" ); |
| 1957 | return false; |
| 1958 | } |
| 1959 | |
| 1960 | // Collect debug users of any vreg that this inst defines. |
| 1961 | SmallVector<MIRegs, 4> DbgUsersToSink; |
| 1962 | for (auto &MO : MI.all_defs()) { |
| 1963 | if (!MO.getReg().isVirtual()) |
| 1964 | continue; |
| 1965 | auto It = SeenDbgUsers.find(Val: MO.getReg()); |
| 1966 | if (It == SeenDbgUsers.end()) |
| 1967 | continue; |
| 1968 | |
| 1969 | // Sink any users that don't pass any other DBG_VALUEs for this variable. |
| 1970 | auto &Users = It->second; |
| 1971 | for (auto &User : Users) { |
| 1972 | MachineInstr *DbgMI = User.getPointer(); |
| 1973 | if (User.getInt()) { |
| 1974 | // This DBG_VALUE would re-order assignments. If we can't copy-propagate |
| 1975 | // it, it can't be recovered. Set it undef. |
| 1976 | if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg())) |
| 1977 | DbgMI->setDebugValueUndef(); |
| 1978 | } else { |
| 1979 | DbgUsersToSink.push_back( |
| 1980 | Elt: {DbgMI, SmallVector<Register, 2>(1, MO.getReg())}); |
| 1981 | } |
| 1982 | } |
| 1983 | } |
| 1984 | |
| 1985 | // After sinking, some debug users may not be dominated any more. If possible, |
| 1986 | // copy-propagate their operands. As it's expensive, don't do this if there's |
| 1987 | // no debuginfo in the program. |
| 1988 | if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) |
| 1989 | SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo); |
| 1990 | |
| 1991 | performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink); |
| 1992 | |
| 1993 | // Conservatively, clear any kill flags, since it's possible that they are no |
| 1994 | // longer correct. |
| 1995 | // Note that we have to clear the kill flags for any register this instruction |
| 1996 | // uses as we may sink over another instruction which currently kills the |
| 1997 | // used registers. |
| 1998 | for (MachineOperand &MO : MI.all_uses()) |
| 1999 | RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags. |
| 2000 | |
| 2001 | return true; |
| 2002 | } |
| 2003 | |
| 2004 | void MachineSinking::SalvageUnsunkDebugUsersOfCopy( |
| 2005 | MachineInstr &MI, MachineBasicBlock *TargetBlock) { |
| 2006 | assert(MI.isCopy()); |
| 2007 | assert(MI.getOperand(1).isReg()); |
| 2008 | |
| 2009 | // Enumerate all users of vreg operands that are def'd. Skip those that will |
| 2010 | // be sunk. For the rest, if they are not dominated by the block we will sink |
| 2011 | // MI into, propagate the copy source to them. |
| 2012 | SmallVector<MachineInstr *, 4> DbgDefUsers; |
| 2013 | SmallVector<Register, 4> DbgUseRegs; |
| 2014 | const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); |
| 2015 | for (auto &MO : MI.all_defs()) { |
| 2016 | if (!MO.getReg().isVirtual()) |
| 2017 | continue; |
| 2018 | DbgUseRegs.push_back(Elt: MO.getReg()); |
| 2019 | for (auto &User : MRI.use_instructions(Reg: MO.getReg())) { |
| 2020 | if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent())) |
| 2021 | continue; |
| 2022 | |
| 2023 | // If is in same block, will either sink or be use-before-def. |
| 2024 | if (User.getParent() == MI.getParent()) |
| 2025 | continue; |
| 2026 | |
| 2027 | assert(User.hasDebugOperandForReg(MO.getReg()) && |
| 2028 | "DBG_VALUE user of vreg, but has no operand for it?" ); |
| 2029 | DbgDefUsers.push_back(Elt: &User); |
| 2030 | } |
| 2031 | } |
| 2032 | |
| 2033 | // Point the users of this copy that are no longer dominated, at the source |
| 2034 | // of the copy. |
| 2035 | for (auto *User : DbgDefUsers) { |
| 2036 | for (auto &Reg : DbgUseRegs) { |
| 2037 | for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { |
| 2038 | DbgOp.setReg(MI.getOperand(i: 1).getReg()); |
| 2039 | DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg()); |
| 2040 | } |
| 2041 | } |
| 2042 | } |
| 2043 | } |
| 2044 | |
| 2045 | //===----------------------------------------------------------------------===// |
| 2046 | // This pass is not intended to be a replacement or a complete alternative |
| 2047 | // for the pre-ra machine sink pass. It is only designed to sink COPY |
| 2048 | // instructions which should be handled after RA. |
| 2049 | // |
| 2050 | // This pass sinks COPY instructions into a successor block, if the COPY is not |
| 2051 | // used in the current block and the COPY is live-in to a single successor |
| 2052 | // (i.e., doesn't require the COPY to be duplicated). This avoids executing the |
| 2053 | // copy on paths where their results aren't needed. This also exposes |
| 2054 | // additional opportunites for dead copy elimination and shrink wrapping. |
| 2055 | // |
| 2056 | // These copies were either not handled by or are inserted after the MachineSink |
| 2057 | // pass. As an example of the former case, the MachineSink pass cannot sink |
| 2058 | // COPY instructions with allocatable source registers; for AArch64 these type |
| 2059 | // of copy instructions are frequently used to move function parameters (PhyReg) |
| 2060 | // into virtual registers in the entry block. |
| 2061 | // |
| 2062 | // For the machine IR below, this pass will sink %w19 in the entry into its |
| 2063 | // successor (%bb.1) because %w19 is only live-in in %bb.1. |
| 2064 | // %bb.0: |
| 2065 | // %wzr = SUBSWri %w1, 1 |
| 2066 | // %w19 = COPY %w0 |
| 2067 | // Bcc 11, %bb.2 |
| 2068 | // %bb.1: |
| 2069 | // Live Ins: %w19 |
| 2070 | // BL @fun |
| 2071 | // %w0 = ADDWrr %w0, %w19 |
| 2072 | // RET %w0 |
| 2073 | // %bb.2: |
| 2074 | // %w0 = COPY %wzr |
| 2075 | // RET %w0 |
| 2076 | // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be |
| 2077 | // able to see %bb.0 as a candidate. |
| 2078 | //===----------------------------------------------------------------------===// |
| 2079 | namespace { |
| 2080 | |
| 2081 | class PostRAMachineSinkingImpl { |
| 2082 | /// Track which register units have been modified and used. |
| 2083 | LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
| 2084 | |
| 2085 | /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an |
| 2086 | /// entry in this map for each unit it touches. The DBG_VALUE's entry |
| 2087 | /// consists of a pointer to the instruction itself, and a vector of registers |
| 2088 | /// referred to by the instruction that overlap the key register unit. |
| 2089 | DenseMap<MCRegUnit, SmallVector<MIRegs, 2>> SeenDbgInstrs; |
| 2090 | |
| 2091 | /// Sink Copy instructions unused in the same block close to their uses in |
| 2092 | /// successors. |
| 2093 | bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, |
| 2094 | const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); |
| 2095 | |
| 2096 | public: |
| 2097 | bool run(MachineFunction &MF); |
| 2098 | }; |
| 2099 | |
| 2100 | class PostRAMachineSinkingLegacy : public MachineFunctionPass { |
| 2101 | public: |
| 2102 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 2103 | |
| 2104 | static char ID; |
| 2105 | PostRAMachineSinkingLegacy() : MachineFunctionPass(ID) {} |
| 2106 | StringRef getPassName() const override { return "PostRA Machine Sink" ; } |
| 2107 | |
| 2108 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 2109 | AU.setPreservesCFG(); |
| 2110 | MachineFunctionPass::getAnalysisUsage(AU); |
| 2111 | } |
| 2112 | |
| 2113 | MachineFunctionProperties getRequiredProperties() const override { |
| 2114 | return MachineFunctionProperties().setNoVRegs(); |
| 2115 | } |
| 2116 | }; |
| 2117 | |
| 2118 | } // namespace |
| 2119 | |
| 2120 | char PostRAMachineSinkingLegacy::ID = 0; |
| 2121 | char &llvm::PostRAMachineSinkingID = PostRAMachineSinkingLegacy::ID; |
| 2122 | |
| 2123 | INITIALIZE_PASS(PostRAMachineSinkingLegacy, "postra-machine-sink" , |
| 2124 | "PostRA Machine Sink" , false, false) |
| 2125 | |
| 2126 | static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg, |
| 2127 | const TargetRegisterInfo *TRI) { |
| 2128 | LiveRegUnits LiveInRegUnits(*TRI); |
| 2129 | LiveInRegUnits.addLiveIns(MBB); |
| 2130 | return !LiveInRegUnits.available(Reg); |
| 2131 | } |
| 2132 | |
| 2133 | static MachineBasicBlock * |
| 2134 | getSingleLiveInSuccBB(MachineBasicBlock &CurBB, |
| 2135 | const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, |
| 2136 | Register Reg, const TargetRegisterInfo *TRI) { |
| 2137 | // Try to find a single sinkable successor in which Reg is live-in. |
| 2138 | MachineBasicBlock *BB = nullptr; |
| 2139 | for (auto *SI : SinkableBBs) { |
| 2140 | if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) { |
| 2141 | // If BB is set here, Reg is live-in to at least two sinkable successors, |
| 2142 | // so quit. |
| 2143 | if (BB) |
| 2144 | return nullptr; |
| 2145 | BB = SI; |
| 2146 | } |
| 2147 | } |
| 2148 | // Reg is not live-in to any sinkable successors. |
| 2149 | if (!BB) |
| 2150 | return nullptr; |
| 2151 | |
| 2152 | // Check if any register aliased with Reg is live-in in other successors. |
| 2153 | for (auto *SI : CurBB.successors()) { |
| 2154 | if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) |
| 2155 | return nullptr; |
| 2156 | } |
| 2157 | return BB; |
| 2158 | } |
| 2159 | |
| 2160 | static MachineBasicBlock * |
| 2161 | getSingleLiveInSuccBB(MachineBasicBlock &CurBB, |
| 2162 | const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, |
| 2163 | ArrayRef<Register> DefedRegsInCopy, |
| 2164 | const TargetRegisterInfo *TRI) { |
| 2165 | MachineBasicBlock *SingleBB = nullptr; |
| 2166 | for (auto DefReg : DefedRegsInCopy) { |
| 2167 | MachineBasicBlock *BB = |
| 2168 | getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI); |
| 2169 | if (!BB || (SingleBB && SingleBB != BB)) |
| 2170 | return nullptr; |
| 2171 | SingleBB = BB; |
| 2172 | } |
| 2173 | return SingleBB; |
| 2174 | } |
| 2175 | |
| 2176 | static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, |
| 2177 | const SmallVectorImpl<unsigned> &UsedOpsInCopy, |
| 2178 | const LiveRegUnits &UsedRegUnits, |
| 2179 | const TargetRegisterInfo *TRI) { |
| 2180 | for (auto U : UsedOpsInCopy) { |
| 2181 | MachineOperand &MO = MI->getOperand(i: U); |
| 2182 | Register SrcReg = MO.getReg(); |
| 2183 | if (!UsedRegUnits.available(Reg: SrcReg)) { |
| 2184 | MachineBasicBlock::iterator NI = std::next(x: MI->getIterator()); |
| 2185 | for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) { |
| 2186 | if (UI.killsRegister(Reg: SrcReg, TRI)) { |
| 2187 | UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
| 2188 | MO.setIsKill(true); |
| 2189 | break; |
| 2190 | } |
| 2191 | } |
| 2192 | } |
| 2193 | } |
| 2194 | } |
| 2195 | |
| 2196 | static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, |
| 2197 | const SmallVectorImpl<unsigned> &UsedOpsInCopy, |
| 2198 | const SmallVectorImpl<Register> &DefedRegsInCopy) { |
| 2199 | for (Register DefReg : DefedRegsInCopy) |
| 2200 | SuccBB->removeLiveInOverlappedWith(Reg: DefReg); |
| 2201 | |
| 2202 | for (auto U : UsedOpsInCopy) |
| 2203 | SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg()); |
| 2204 | SuccBB->sortUniqueLiveIns(); |
| 2205 | } |
| 2206 | |
| 2207 | static bool hasRegisterDependency(MachineInstr *MI, |
| 2208 | SmallVectorImpl<unsigned> &UsedOpsInCopy, |
| 2209 | SmallVectorImpl<Register> &DefedRegsInCopy, |
| 2210 | LiveRegUnits &ModifiedRegUnits, |
| 2211 | LiveRegUnits &UsedRegUnits) { |
| 2212 | bool HasRegDependency = false; |
| 2213 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
| 2214 | MachineOperand &MO = MI->getOperand(i); |
| 2215 | if (!MO.isReg()) |
| 2216 | continue; |
| 2217 | Register Reg = MO.getReg(); |
| 2218 | if (!Reg) |
| 2219 | continue; |
| 2220 | if (MO.isDef()) { |
| 2221 | if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { |
| 2222 | HasRegDependency = true; |
| 2223 | break; |
| 2224 | } |
| 2225 | DefedRegsInCopy.push_back(Elt: Reg); |
| 2226 | |
| 2227 | // FIXME: instead of isUse(), readsReg() would be a better fix here, |
| 2228 | // For example, we can ignore modifications in reg with undef. However, |
| 2229 | // it's not perfectly clear if skipping the internal read is safe in all |
| 2230 | // other targets. |
| 2231 | } else if (MO.isUse()) { |
| 2232 | if (!ModifiedRegUnits.available(Reg)) { |
| 2233 | HasRegDependency = true; |
| 2234 | break; |
| 2235 | } |
| 2236 | UsedOpsInCopy.push_back(Elt: i); |
| 2237 | } |
| 2238 | } |
| 2239 | return HasRegDependency; |
| 2240 | } |
| 2241 | |
| 2242 | bool PostRAMachineSinkingImpl::tryToSinkCopy(MachineBasicBlock &CurBB, |
| 2243 | MachineFunction &MF, |
| 2244 | const TargetRegisterInfo *TRI, |
| 2245 | const TargetInstrInfo *TII) { |
| 2246 | SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; |
| 2247 | // FIXME: For now, we sink only to a successor which has a single predecessor |
| 2248 | // so that we can directly sink COPY instructions to the successor without |
| 2249 | // adding any new block or branch instruction. |
| 2250 | for (MachineBasicBlock *SI : CurBB.successors()) |
| 2251 | if (!SI->livein_empty() && SI->pred_size() == 1) |
| 2252 | SinkableBBs.insert(Ptr: SI); |
| 2253 | |
| 2254 | if (SinkableBBs.empty()) |
| 2255 | return false; |
| 2256 | |
| 2257 | bool Changed = false; |
| 2258 | |
| 2259 | // Track which registers have been modified and used between the end of the |
| 2260 | // block and the current instruction. |
| 2261 | ModifiedRegUnits.clear(); |
| 2262 | UsedRegUnits.clear(); |
| 2263 | SeenDbgInstrs.clear(); |
| 2264 | |
| 2265 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) { |
| 2266 | // Track the operand index for use in Copy. |
| 2267 | SmallVector<unsigned, 2> UsedOpsInCopy; |
| 2268 | // Track the register number defed in Copy. |
| 2269 | SmallVector<Register, 2> DefedRegsInCopy; |
| 2270 | |
| 2271 | // We must sink this DBG_VALUE if its operand is sunk. To avoid searching |
| 2272 | // for DBG_VALUEs later, record them when they're encountered. |
| 2273 | if (MI.isDebugValue() && !MI.isDebugRef()) { |
| 2274 | SmallDenseMap<MCRegUnit, SmallVector<Register, 2>, 4> MIUnits; |
| 2275 | bool IsValid = true; |
| 2276 | for (MachineOperand &MO : MI.debug_operands()) { |
| 2277 | if (MO.isReg() && MO.getReg().isPhysical()) { |
| 2278 | // Bail if we can already tell the sink would be rejected, rather |
| 2279 | // than needlessly accumulating lots of DBG_VALUEs. |
| 2280 | if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy, |
| 2281 | ModifiedRegUnits, UsedRegUnits)) { |
| 2282 | IsValid = false; |
| 2283 | break; |
| 2284 | } |
| 2285 | |
| 2286 | // Record debug use of each reg unit. |
| 2287 | for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) |
| 2288 | MIUnits[Unit].push_back(Elt: MO.getReg()); |
| 2289 | } |
| 2290 | } |
| 2291 | if (IsValid) { |
| 2292 | for (auto &RegOps : MIUnits) |
| 2293 | SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI, |
| 2294 | Args: std::move(RegOps.second)); |
| 2295 | } |
| 2296 | continue; |
| 2297 | } |
| 2298 | |
| 2299 | // Don't postRASink instructions that the target prefers not to sink. |
| 2300 | if (!TII->shouldPostRASink(MI)) |
| 2301 | continue; |
| 2302 | |
| 2303 | if (MI.isDebugOrPseudoInstr()) |
| 2304 | continue; |
| 2305 | |
| 2306 | // Do not move any instruction across function call. |
| 2307 | if (MI.isCall()) |
| 2308 | return false; |
| 2309 | |
| 2310 | if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) { |
| 2311 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 2312 | TRI); |
| 2313 | continue; |
| 2314 | } |
| 2315 | |
| 2316 | // Don't sink the COPY if it would violate a register dependency. |
| 2317 | if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy, |
| 2318 | ModifiedRegUnits, UsedRegUnits)) { |
| 2319 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 2320 | TRI); |
| 2321 | continue; |
| 2322 | } |
| 2323 | assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && |
| 2324 | "Unexpect SrcReg or DefReg" ); |
| 2325 | MachineBasicBlock *SuccBB = |
| 2326 | getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); |
| 2327 | // Don't sink if we cannot find a single sinkable successor in which Reg |
| 2328 | // is live-in. |
| 2329 | if (!SuccBB) { |
| 2330 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 2331 | TRI); |
| 2332 | continue; |
| 2333 | } |
| 2334 | assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && |
| 2335 | "Unexpected predecessor" ); |
| 2336 | |
| 2337 | // Collect DBG_VALUEs that must sink with this copy. We've previously |
| 2338 | // recorded which reg units that DBG_VALUEs read, if this instruction |
| 2339 | // writes any of those units then the corresponding DBG_VALUEs must sink. |
| 2340 | MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; |
| 2341 | for (auto &MO : MI.all_defs()) { |
| 2342 | for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) { |
| 2343 | for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) { |
| 2344 | auto &Regs = DbgValsToSinkMap[MIRegs.first]; |
| 2345 | llvm::append_range(C&: Regs, R: MIRegs.second); |
| 2346 | } |
| 2347 | } |
| 2348 | } |
| 2349 | auto DbgValsToSink = DbgValsToSinkMap.takeVector(); |
| 2350 | |
| 2351 | LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); |
| 2352 | |
| 2353 | MachineBasicBlock::iterator InsertPos = |
| 2354 | SuccBB->SkipPHIsAndLabels(I: SuccBB->begin()); |
| 2355 | if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) { |
| 2356 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 2357 | TRI); |
| 2358 | LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n" ); |
| 2359 | continue; |
| 2360 | } |
| 2361 | |
| 2362 | // Clear the kill flag if SrcReg is killed between MI and the end of the |
| 2363 | // block. |
| 2364 | clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); |
| 2365 | performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink); |
| 2366 | updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); |
| 2367 | |
| 2368 | Changed = true; |
| 2369 | ++NumPostRACopySink; |
| 2370 | } |
| 2371 | return Changed; |
| 2372 | } |
| 2373 | |
| 2374 | bool PostRAMachineSinkingImpl::run(MachineFunction &MF) { |
| 2375 | bool Changed = false; |
| 2376 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 2377 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
| 2378 | |
| 2379 | ModifiedRegUnits.init(TRI: *TRI); |
| 2380 | UsedRegUnits.init(TRI: *TRI); |
| 2381 | for (auto &BB : MF) |
| 2382 | Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII); |
| 2383 | |
| 2384 | return Changed; |
| 2385 | } |
| 2386 | |
| 2387 | bool PostRAMachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 2388 | if (skipFunction(F: MF.getFunction())) |
| 2389 | return false; |
| 2390 | |
| 2391 | return PostRAMachineSinkingImpl().run(MF); |
| 2392 | } |
| 2393 | |
| 2394 | PreservedAnalyses |
| 2395 | PostRAMachineSinkingPass::run(MachineFunction &MF, |
| 2396 | MachineFunctionAnalysisManager &MFAM) { |
| 2397 | MFPropsModifier _(*this, MF); |
| 2398 | |
| 2399 | if (!PostRAMachineSinkingImpl().run(MF)) |
| 2400 | return PreservedAnalyses::all(); |
| 2401 | |
| 2402 | PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); |
| 2403 | PA.preserveSet<CFGAnalyses>(); |
| 2404 | return PA; |
| 2405 | } |
| 2406 | |