| 1 | //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass performs global common subexpression elimination on machine |
| 10 | // instructions using a scoped hash table based value numbering scheme. It |
| 11 | // must be run while the machine function is still in SSA form. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/CodeGen/MachineCSE.h" |
| 16 | #include "llvm/ADT/DenseMap.h" |
| 17 | #include "llvm/ADT/ScopedHashTable.h" |
| 18 | #include "llvm/ADT/SmallPtrSet.h" |
| 19 | #include "llvm/ADT/SmallSet.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/Statistic.h" |
| 22 | #include "llvm/Analysis/CFG.h" |
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 24 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| 25 | #include "llvm/CodeGen/MachineDominators.h" |
| 26 | #include "llvm/CodeGen/MachineFunction.h" |
| 27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 28 | #include "llvm/CodeGen/MachineInstr.h" |
| 29 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 30 | #include "llvm/CodeGen/MachineOperand.h" |
| 31 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 32 | #include "llvm/CodeGen/Passes.h" |
| 33 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 34 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 36 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 37 | #include "llvm/InitializePasses.h" |
| 38 | #include "llvm/MC/MCRegister.h" |
| 39 | #include "llvm/MC/MCRegisterInfo.h" |
| 40 | #include "llvm/Pass.h" |
| 41 | #include "llvm/Support/Allocator.h" |
| 42 | #include "llvm/Support/Debug.h" |
| 43 | #include "llvm/Support/RecyclingAllocator.h" |
| 44 | #include "llvm/Support/raw_ostream.h" |
| 45 | #include <cassert> |
| 46 | #include <iterator> |
| 47 | #include <utility> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | |
| 51 | #define DEBUG_TYPE "machine-cse" |
| 52 | |
| 53 | STATISTIC(NumCoalesces, "Number of copies coalesced" ); |
| 54 | STATISTIC(NumCSEs, "Number of common subexpression eliminated" ); |
| 55 | STATISTIC(NumPREs, "Number of partial redundant expression" |
| 56 | " transformed to fully redundant" ); |
| 57 | STATISTIC(NumPhysCSEs, |
| 58 | "Number of physreg referencing common subexpr eliminated" ); |
| 59 | STATISTIC(NumCrossBBCSEs, |
| 60 | "Number of cross-MBB physreg referencing CS eliminated" ); |
| 61 | STATISTIC(NumCommutes, "Number of copies coalesced after commuting" ); |
| 62 | |
| 63 | // Threshold to avoid excessive cost to compute isProfitableToCSE. |
| 64 | static cl::opt<int> |
| 65 | CSUsesThreshold("csuses-threshold" , cl::Hidden, cl::init(Val: 1024), |
| 66 | cl::desc("Threshold for the size of CSUses" )); |
| 67 | |
| 68 | static cl::opt<bool> AggressiveMachineCSE( |
| 69 | "aggressive-machine-cse" , cl::Hidden, cl::init(Val: false), |
| 70 | cl::desc("Override the profitability heuristics for Machine CSE" )); |
| 71 | |
| 72 | namespace { |
| 73 | |
| 74 | class MachineCSEImpl { |
| 75 | const TargetInstrInfo *TII = nullptr; |
| 76 | const TargetRegisterInfo *TRI = nullptr; |
| 77 | MachineDominatorTree *DT = nullptr; |
| 78 | MachineRegisterInfo *MRI = nullptr; |
| 79 | MachineBlockFrequencyInfo *MBFI = nullptr; |
| 80 | |
| 81 | public: |
| 82 | MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI) |
| 83 | : DT(DT), MBFI(MBFI) {} |
| 84 | bool run(MachineFunction &MF); |
| 85 | |
| 86 | private: |
| 87 | using AllocatorTy = |
| 88 | RecyclingAllocator<BumpPtrAllocator, |
| 89 | ScopedHashTableVal<MachineInstr *, unsigned>>; |
| 90 | using ScopedHTType = |
| 91 | ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, |
| 92 | AllocatorTy>; |
| 93 | using ScopeType = ScopedHTType::ScopeTy; |
| 94 | using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>; |
| 95 | |
| 96 | unsigned LookAheadLimit = 0; |
| 97 | DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; |
| 98 | DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> |
| 99 | PREMap; |
| 100 | ScopedHTType VNT; |
| 101 | SmallVector<MachineInstr *, 64> Exps; |
| 102 | unsigned CurrVN = 0; |
| 103 | |
| 104 | bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); |
| 105 | bool isPhysDefTriviallyDead(MCRegister Reg, |
| 106 | MachineBasicBlock::const_iterator I, |
| 107 | MachineBasicBlock::const_iterator E) const; |
| 108 | bool hasLivePhysRegDefUses(const MachineInstr *MI, |
| 109 | const MachineBasicBlock *MBB, |
| 110 | SmallSet<MCRegister, 8> &PhysRefs, |
| 111 | PhysDefVector &PhysDefs, bool &PhysUseDef) const; |
| 112 | bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
| 113 | const SmallSet<MCRegister, 8> &PhysRefs, |
| 114 | const PhysDefVector &PhysDefs, bool &NonLocal) const; |
| 115 | bool isCSECandidate(MachineInstr *MI); |
| 116 | bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, |
| 117 | MachineInstr *MI); |
| 118 | void EnterScope(MachineBasicBlock *MBB); |
| 119 | void ExitScope(MachineBasicBlock *MBB); |
| 120 | bool ProcessBlockCSE(MachineBasicBlock *MBB); |
| 121 | void ExitScopeIfDone(MachineDomTreeNode *Node, |
| 122 | DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren); |
| 123 | bool PerformCSE(MachineDomTreeNode *Node); |
| 124 | |
| 125 | bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); |
| 126 | bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); |
| 127 | bool PerformSimplePRE(MachineDominatorTree *DT); |
| 128 | /// Heuristics to see if it's profitable to move common computations of MBB |
| 129 | /// and MBB1 to CandidateBB. |
| 130 | bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, |
| 131 | MachineBasicBlock *MBB, MachineBasicBlock *MBB1); |
| 132 | void releaseMemory(); |
| 133 | }; |
| 134 | |
| 135 | class MachineCSELegacy : public MachineFunctionPass { |
| 136 | public: |
| 137 | static char ID; // Pass identification |
| 138 | |
| 139 | MachineCSELegacy() : MachineFunctionPass(ID) { |
| 140 | initializeMachineCSELegacyPass(*PassRegistry::getPassRegistry()); |
| 141 | } |
| 142 | |
| 143 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 144 | |
| 145 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 146 | AU.setPreservesCFG(); |
| 147 | MachineFunctionPass::getAnalysisUsage(AU); |
| 148 | AU.addPreservedID(ID&: MachineLoopInfoID); |
| 149 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| 150 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
| 151 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
| 152 | AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); |
| 153 | } |
| 154 | |
| 155 | MachineFunctionProperties getRequiredProperties() const override { |
| 156 | return MachineFunctionProperties().setIsSSA(); |
| 157 | } |
| 158 | }; |
| 159 | } // end anonymous namespace |
| 160 | |
| 161 | char MachineCSELegacy::ID = 0; |
| 162 | |
| 163 | char &llvm::MachineCSELegacyID = MachineCSELegacy::ID; |
| 164 | |
| 165 | INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE, |
| 166 | "Machine Common Subexpression Elimination" , false, false) |
| 167 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
| 168 | INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE, |
| 169 | "Machine Common Subexpression Elimination" , false, false) |
| 170 | |
| 171 | /// The source register of a COPY machine instruction can be propagated to all |
| 172 | /// its users, and this propagation could increase the probability of finding |
| 173 | /// common subexpressions. If the COPY has only one user, the COPY itself can |
| 174 | /// be removed. |
| 175 | bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI, |
| 176 | MachineBasicBlock *MBB) { |
| 177 | bool Changed = false; |
| 178 | for (MachineOperand &MO : MI->all_uses()) { |
| 179 | Register Reg = MO.getReg(); |
| 180 | if (!Reg.isVirtual()) |
| 181 | continue; |
| 182 | bool OnlyOneUse = MRI->hasOneNonDBGUse(RegNo: Reg); |
| 183 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
| 184 | if (!DefMI || !DefMI->isCopy()) |
| 185 | continue; |
| 186 | Register SrcReg = DefMI->getOperand(i: 1).getReg(); |
| 187 | if (!SrcReg.isVirtual()) |
| 188 | continue; |
| 189 | // FIXME: We should trivially coalesce subregister copies to expose CSE |
| 190 | // opportunities on instructions with truncated operands (see |
| 191 | // cse-add-with-overflow.ll). This can be done here as follows: |
| 192 | // if (SrcSubReg) |
| 193 | // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, |
| 194 | // SrcSubReg); |
| 195 | // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); |
| 196 | // |
| 197 | // The 2-addr pass has been updated to handle coalesced subregs. However, |
| 198 | // some machine-specific code still can't handle it. |
| 199 | // To handle it properly we also need a way find a constrained subregister |
| 200 | // class given a super-reg class and subreg index. |
| 201 | if (DefMI->getOperand(i: 1).getSubReg()) |
| 202 | continue; |
| 203 | if (!MRI->constrainRegAttrs(Reg: SrcReg, ConstrainingReg: Reg)) |
| 204 | continue; |
| 205 | LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); |
| 206 | LLVM_DEBUG(dbgs() << "*** to: " << *MI); |
| 207 | |
| 208 | // Propagate SrcReg of copies to MI. |
| 209 | MO.setReg(SrcReg); |
| 210 | MRI->clearKillFlags(Reg: SrcReg); |
| 211 | // Coalesce single use copies. |
| 212 | if (OnlyOneUse) { |
| 213 | // If (and only if) we've eliminated all uses of the copy, also |
| 214 | // copy-propagate to any debug-users of MI, or they'll be left using |
| 215 | // an undefined value. |
| 216 | DefMI->changeDebugValuesDefReg(Reg: SrcReg); |
| 217 | |
| 218 | DefMI->eraseFromParent(); |
| 219 | ++NumCoalesces; |
| 220 | } |
| 221 | Changed = true; |
| 222 | } |
| 223 | |
| 224 | return Changed; |
| 225 | } |
| 226 | |
| 227 | bool MachineCSEImpl::isPhysDefTriviallyDead( |
| 228 | MCRegister Reg, MachineBasicBlock::const_iterator I, |
| 229 | MachineBasicBlock::const_iterator E) const { |
| 230 | unsigned LookAheadLeft = LookAheadLimit; |
| 231 | while (LookAheadLeft) { |
| 232 | // Skip over dbg_value's. |
| 233 | I = skipDebugInstructionsForward(It: I, End: E); |
| 234 | |
| 235 | if (I == E) |
| 236 | // Reached end of block, we don't know if register is dead or not. |
| 237 | return false; |
| 238 | |
| 239 | bool SeenDef = false; |
| 240 | for (const MachineOperand &MO : I->operands()) { |
| 241 | if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: Reg)) |
| 242 | SeenDef = true; |
| 243 | if (!MO.isReg() || !MO.getReg()) |
| 244 | continue; |
| 245 | if (!TRI->regsOverlap(RegA: MO.getReg(), RegB: Reg)) |
| 246 | continue; |
| 247 | if (MO.isUse()) |
| 248 | // Found a use! |
| 249 | return false; |
| 250 | SeenDef = true; |
| 251 | } |
| 252 | if (SeenDef) |
| 253 | // See a def of Reg (or an alias) before encountering any use, it's |
| 254 | // trivially dead. |
| 255 | return true; |
| 256 | |
| 257 | --LookAheadLeft; |
| 258 | ++I; |
| 259 | } |
| 260 | return false; |
| 261 | } |
| 262 | |
| 263 | static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, |
| 264 | const MachineOperand &MO, |
| 265 | const MachineFunction &MF, |
| 266 | const TargetRegisterInfo &TRI, |
| 267 | const TargetInstrInfo &TII) { |
| 268 | // MachineRegisterInfo::isConstantPhysReg directly called by |
| 269 | // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the |
| 270 | // reserved registers to be frozen. That doesn't cause a problem post-ISel as |
| 271 | // most (if not all) targets freeze reserved registers right after ISel. |
| 272 | // |
| 273 | // It does cause issues mid-GlobalISel, however, hence the additional |
| 274 | // reservedRegsFrozen check. |
| 275 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 276 | return TRI.isCallerPreservedPhysReg(PhysReg: Reg, MF) || TII.isIgnorableUse(MO) || |
| 277 | (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(PhysReg: Reg)); |
| 278 | } |
| 279 | |
| 280 | /// hasLivePhysRegDefUses - Return true if the specified instruction read/write |
| 281 | /// physical registers (except for dead defs of physical registers). It also |
| 282 | /// returns the physical register def by reference if it's the only one and the |
| 283 | /// instruction does not uses a physical register. |
| 284 | bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI, |
| 285 | const MachineBasicBlock *MBB, |
| 286 | SmallSet<MCRegister, 8> &PhysRefs, |
| 287 | PhysDefVector &PhysDefs, |
| 288 | bool &PhysUseDef) const { |
| 289 | // First, add all uses to PhysRefs. |
| 290 | for (const MachineOperand &MO : MI->all_uses()) { |
| 291 | Register Reg = MO.getReg(); |
| 292 | if (!Reg) |
| 293 | continue; |
| 294 | if (Reg.isVirtual()) |
| 295 | continue; |
| 296 | // Reading either caller preserved or constant physregs is ok. |
| 297 | if (!isCallerPreservedOrConstPhysReg(Reg: Reg.asMCReg(), MO, MF: *MI->getMF(), TRI: *TRI, |
| 298 | TII: *TII)) |
| 299 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
| 300 | PhysRefs.insert(V: *AI); |
| 301 | } |
| 302 | |
| 303 | // Next, collect all defs into PhysDefs. If any is already in PhysRefs |
| 304 | // (which currently contains only uses), set the PhysUseDef flag. |
| 305 | PhysUseDef = false; |
| 306 | MachineBasicBlock::const_iterator I = MI; I = std::next(x: I); |
| 307 | for (const auto &MOP : llvm::enumerate(First: MI->operands())) { |
| 308 | const MachineOperand &MO = MOP.value(); |
| 309 | if (!MO.isReg() || !MO.isDef()) |
| 310 | continue; |
| 311 | Register Reg = MO.getReg(); |
| 312 | if (!Reg) |
| 313 | continue; |
| 314 | if (Reg.isVirtual()) |
| 315 | continue; |
| 316 | // Check against PhysRefs even if the def is "dead". |
| 317 | if (PhysRefs.count(V: Reg.asMCReg())) |
| 318 | PhysUseDef = true; |
| 319 | // If the def is dead, it's ok. But the def may not marked "dead". That's |
| 320 | // common since this pass is run before livevariables. We can scan |
| 321 | // forward a few instructions and check if it is obviously dead. |
| 322 | if (!MO.isDead() && !isPhysDefTriviallyDead(Reg: Reg.asMCReg(), I, E: MBB->end())) |
| 323 | PhysDefs.emplace_back(Args: MOP.index(), Args&: Reg); |
| 324 | } |
| 325 | |
| 326 | // Finally, add all defs to PhysRefs as well. |
| 327 | for (const auto &Def : PhysDefs) |
| 328 | for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI) |
| 329 | PhysRefs.insert(V: *AI); |
| 330 | |
| 331 | return !PhysRefs.empty(); |
| 332 | } |
| 333 | |
| 334 | bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
| 335 | const SmallSet<MCRegister, 8> &PhysRefs, |
| 336 | const PhysDefVector &PhysDefs, |
| 337 | bool &NonLocal) const { |
| 338 | // For now conservatively returns false if the common subexpression is |
| 339 | // not in the same basic block as the given instruction. The only exception |
| 340 | // is if the common subexpression is in the sole predecessor block. |
| 341 | const MachineBasicBlock *MBB = MI->getParent(); |
| 342 | const MachineBasicBlock *CSMBB = CSMI->getParent(); |
| 343 | |
| 344 | bool CrossMBB = false; |
| 345 | if (CSMBB != MBB) { |
| 346 | if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) |
| 347 | return false; |
| 348 | |
| 349 | for (const auto &PhysDef : PhysDefs) { |
| 350 | if (MRI->isAllocatable(PhysReg: PhysDef.second) || MRI->isReserved(PhysReg: PhysDef.second)) |
| 351 | // Avoid extending live range of physical registers if they are |
| 352 | //allocatable or reserved. |
| 353 | return false; |
| 354 | } |
| 355 | CrossMBB = true; |
| 356 | } |
| 357 | MachineBasicBlock::const_iterator I = CSMI; I = std::next(x: I); |
| 358 | MachineBasicBlock::const_iterator E = MI; |
| 359 | MachineBasicBlock::const_iterator EE = CSMBB->end(); |
| 360 | unsigned LookAheadLeft = LookAheadLimit; |
| 361 | while (LookAheadLeft) { |
| 362 | // Skip over dbg_value's. |
| 363 | while (I != E && I != EE && I->isDebugInstr()) |
| 364 | ++I; |
| 365 | |
| 366 | if (I == EE) { |
| 367 | assert(CrossMBB && "Reaching end-of-MBB without finding MI?" ); |
| 368 | (void)CrossMBB; |
| 369 | CrossMBB = false; |
| 370 | NonLocal = true; |
| 371 | I = MBB->begin(); |
| 372 | EE = MBB->end(); |
| 373 | continue; |
| 374 | } |
| 375 | |
| 376 | if (I == E) |
| 377 | return true; |
| 378 | |
| 379 | for (const MachineOperand &MO : I->operands()) { |
| 380 | // RegMasks go on instructions like calls that clobber lots of physregs. |
| 381 | // Don't attempt to CSE across such an instruction. |
| 382 | if (MO.isRegMask()) |
| 383 | return false; |
| 384 | if (!MO.isReg() || !MO.isDef()) |
| 385 | continue; |
| 386 | Register MOReg = MO.getReg(); |
| 387 | if (MOReg.isVirtual()) |
| 388 | continue; |
| 389 | if (PhysRefs.count(V: MOReg.asMCReg())) |
| 390 | return false; |
| 391 | } |
| 392 | |
| 393 | --LookAheadLeft; |
| 394 | ++I; |
| 395 | } |
| 396 | |
| 397 | return false; |
| 398 | } |
| 399 | |
| 400 | bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) { |
| 401 | if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || |
| 402 | MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || |
| 403 | MI->isFakeUse()) |
| 404 | return false; |
| 405 | |
| 406 | // Ignore copies. |
| 407 | if (MI->isCopyLike()) |
| 408 | return false; |
| 409 | |
| 410 | // Ignore stuff that we obviously can't move. |
| 411 | if (MI->mayStore() || MI->isCall() || MI->isTerminator() || |
| 412 | MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) |
| 413 | return false; |
| 414 | |
| 415 | if (MI->mayLoad()) { |
| 416 | // Okay, this instruction does a load. As a refinement, we allow the target |
| 417 | // to decide whether the loaded value is actually a constant. If so, we can |
| 418 | // actually use it as a load. |
| 419 | if (!MI->isDereferenceableInvariantLoad()) |
| 420 | // FIXME: we should be able to hoist loads with no other side effects if |
| 421 | // there are no other instructions which can change memory in this loop. |
| 422 | // This is a trivial form of alias analysis. |
| 423 | return false; |
| 424 | } |
| 425 | |
| 426 | // Ignore stack guard loads, otherwise the register that holds CSEed value may |
| 427 | // be spilled and get loaded back with corrupted data. |
| 428 | if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) |
| 429 | return false; |
| 430 | |
| 431 | return true; |
| 432 | } |
| 433 | |
| 434 | /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a |
| 435 | /// common expression that defines Reg. CSBB is basic block where CSReg is |
| 436 | /// defined. |
| 437 | bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg, |
| 438 | MachineBasicBlock *CSBB, |
| 439 | MachineInstr *MI) { |
| 440 | if (AggressiveMachineCSE) |
| 441 | return true; |
| 442 | |
| 443 | // FIXME: Heuristics that works around the lack the live range splitting. |
| 444 | |
| 445 | // If CSReg is used at all uses of Reg, CSE should not increase register |
| 446 | // pressure of CSReg. |
| 447 | bool MayIncreasePressure = true; |
| 448 | if (CSReg.isVirtual() && Reg.isVirtual()) { |
| 449 | MayIncreasePressure = false; |
| 450 | SmallPtrSet<MachineInstr*, 8> CSUses; |
| 451 | int NumOfUses = 0; |
| 452 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg: CSReg)) { |
| 453 | CSUses.insert(Ptr: &MI); |
| 454 | // Too costly to compute if NumOfUses is very large. Conservatively assume |
| 455 | // MayIncreasePressure to avoid spending too much time here. |
| 456 | if (++NumOfUses > CSUsesThreshold) { |
| 457 | MayIncreasePressure = true; |
| 458 | break; |
| 459 | } |
| 460 | } |
| 461 | if (!MayIncreasePressure) |
| 462 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
| 463 | if (!CSUses.count(Ptr: &MI)) { |
| 464 | MayIncreasePressure = true; |
| 465 | break; |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | if (!MayIncreasePressure) return true; |
| 470 | |
| 471 | // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in |
| 472 | // an immediate predecessor. We don't want to increase register pressure and |
| 473 | // end up causing other computation to be spilled. |
| 474 | if (TII->isAsCheapAsAMove(MI: *MI)) { |
| 475 | MachineBasicBlock *BB = MI->getParent(); |
| 476 | if (CSBB != BB && !CSBB->isSuccessor(MBB: BB)) |
| 477 | return false; |
| 478 | } |
| 479 | |
| 480 | // Heuristics #2: If the expression doesn't not use a vr and the only use |
| 481 | // of the redundant computation are copies, do not cse. |
| 482 | bool HasVRegUse = false; |
| 483 | for (const MachineOperand &MO : MI->all_uses()) { |
| 484 | if (MO.getReg().isVirtual()) { |
| 485 | HasVRegUse = true; |
| 486 | break; |
| 487 | } |
| 488 | } |
| 489 | if (!HasVRegUse) { |
| 490 | bool HasNonCopyUse = false; |
| 491 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
| 492 | // Ignore copies. |
| 493 | if (!MI.isCopyLike()) { |
| 494 | HasNonCopyUse = true; |
| 495 | break; |
| 496 | } |
| 497 | } |
| 498 | if (!HasNonCopyUse) |
| 499 | return false; |
| 500 | } |
| 501 | |
| 502 | // Heuristics #3: If the common subexpression is used by PHIs, do not reuse |
| 503 | // it unless the defined value is already used in the BB of the new use. |
| 504 | bool HasPHI = false; |
| 505 | for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg: CSReg)) { |
| 506 | HasPHI |= UseMI.isPHI(); |
| 507 | if (UseMI.getParent() == MI->getParent()) |
| 508 | return true; |
| 509 | } |
| 510 | |
| 511 | return !HasPHI; |
| 512 | } |
| 513 | |
| 514 | void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) { |
| 515 | LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); |
| 516 | ScopeType *Scope = new ScopeType(VNT); |
| 517 | ScopeMap[MBB] = Scope; |
| 518 | } |
| 519 | |
| 520 | void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) { |
| 521 | LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); |
| 522 | DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(Val: MBB); |
| 523 | assert(SI != ScopeMap.end()); |
| 524 | delete SI->second; |
| 525 | ScopeMap.erase(I: SI); |
| 526 | } |
| 527 | |
| 528 | bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) { |
| 529 | bool Changed = false; |
| 530 | |
| 531 | SmallVector<std::pair<Register, Register>, 8> CSEPairs; |
| 532 | SmallVector<unsigned, 2> ImplicitDefsToUpdate; |
| 533 | SmallVector<Register, 2> ImplicitDefs; |
| 534 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) { |
| 535 | if (!isCSECandidate(MI: &MI)) |
| 536 | continue; |
| 537 | |
| 538 | bool FoundCSE = VNT.count(Key: &MI); |
| 539 | if (!FoundCSE) { |
| 540 | // Using trivial copy propagation to find more CSE opportunities. |
| 541 | if (PerformTrivialCopyPropagation(MI: &MI, MBB)) { |
| 542 | Changed = true; |
| 543 | |
| 544 | // After coalescing MI itself may become a copy. |
| 545 | if (MI.isCopyLike()) |
| 546 | continue; |
| 547 | |
| 548 | // Try again to see if CSE is possible. |
| 549 | FoundCSE = VNT.count(Key: &MI); |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | // Commute commutable instructions. |
| 554 | bool Commuted = false; |
| 555 | if (!FoundCSE && MI.isCommutable()) { |
| 556 | if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { |
| 557 | Commuted = true; |
| 558 | FoundCSE = VNT.count(Key: NewMI); |
| 559 | if (NewMI != &MI) { |
| 560 | // New instruction. It doesn't need to be kept. |
| 561 | NewMI->eraseFromParent(); |
| 562 | Changed = true; |
| 563 | } else if (!FoundCSE) |
| 564 | // MI was changed but it didn't help, commute it back! |
| 565 | (void)TII->commuteInstruction(MI); |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | // If the instruction defines physical registers and the values *may* be |
| 570 | // used, then it's not safe to replace it with a common subexpression. |
| 571 | // It's also not safe if the instruction uses physical registers. |
| 572 | bool CrossMBBPhysDef = false; |
| 573 | SmallSet<MCRegister, 8> PhysRefs; |
| 574 | PhysDefVector PhysDefs; |
| 575 | bool PhysUseDef = false; |
| 576 | if (FoundCSE && |
| 577 | hasLivePhysRegDefUses(MI: &MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { |
| 578 | FoundCSE = false; |
| 579 | |
| 580 | // ... Unless the CS is local or is in the sole predecessor block |
| 581 | // and it also defines the physical register which is not clobbered |
| 582 | // in between and the physical register uses were not clobbered. |
| 583 | // This can never be the case if the instruction both uses and |
| 584 | // defines the same physical register, which was detected above. |
| 585 | if (!PhysUseDef) { |
| 586 | unsigned CSVN = VNT.lookup(Key: &MI); |
| 587 | MachineInstr *CSMI = Exps[CSVN]; |
| 588 | if (PhysRegDefsReach(CSMI, MI: &MI, PhysRefs, PhysDefs, NonLocal&: CrossMBBPhysDef)) |
| 589 | FoundCSE = true; |
| 590 | } |
| 591 | } |
| 592 | |
| 593 | if (!FoundCSE) { |
| 594 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 595 | Exps.push_back(Elt: &MI); |
| 596 | continue; |
| 597 | } |
| 598 | |
| 599 | // Found a common subexpression, eliminate it. |
| 600 | unsigned CSVN = VNT.lookup(Key: &MI); |
| 601 | MachineInstr *CSMI = Exps[CSVN]; |
| 602 | LLVM_DEBUG(dbgs() << "Examining: " << MI); |
| 603 | LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); |
| 604 | |
| 605 | // Prevent CSE-ing non-local convergent instructions. |
| 606 | // LLVM's current definition of `isConvergent` does not necessarily prove |
| 607 | // that non-local CSE is illegal. The following check extends the definition |
| 608 | // of `isConvergent` to assume a convergent instruction is dependent not |
| 609 | // only on additional conditions, but also on fewer conditions. LLVM does |
| 610 | // not have a MachineInstr attribute which expresses this extended |
| 611 | // definition, so it's necessary to use `isConvergent` to prevent illegally |
| 612 | // CSE-ing the subset of `isConvergent` instructions which do fall into this |
| 613 | // extended definition. |
| 614 | if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { |
| 615 | LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " |
| 616 | "different BBs, avoid CSE!\n" ); |
| 617 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 618 | Exps.push_back(Elt: &MI); |
| 619 | continue; |
| 620 | } |
| 621 | |
| 622 | // Check if it's profitable to perform this CSE. |
| 623 | bool DoCSE = true; |
| 624 | unsigned NumDefs = MI.getNumDefs(); |
| 625 | |
| 626 | for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { |
| 627 | MachineOperand &MO = MI.getOperand(i); |
| 628 | if (!MO.isReg() || !MO.isDef()) |
| 629 | continue; |
| 630 | Register OldReg = MO.getReg(); |
| 631 | Register NewReg = CSMI->getOperand(i).getReg(); |
| 632 | |
| 633 | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
| 634 | // we should make sure it is not dead at CSMI. |
| 635 | if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) |
| 636 | ImplicitDefsToUpdate.push_back(Elt: i); |
| 637 | |
| 638 | // Keep track of implicit defs of CSMI and MI, to clear possibly |
| 639 | // made-redundant kill flags. |
| 640 | if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) |
| 641 | ImplicitDefs.push_back(Elt: OldReg); |
| 642 | |
| 643 | if (OldReg == NewReg) { |
| 644 | --NumDefs; |
| 645 | continue; |
| 646 | } |
| 647 | |
| 648 | assert(OldReg.isVirtual() && NewReg.isVirtual() && |
| 649 | "Do not CSE physical register defs!" ); |
| 650 | |
| 651 | if (!isProfitableToCSE(CSReg: NewReg, Reg: OldReg, CSBB: CSMI->getParent(), MI: &MI)) { |
| 652 | LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n" ); |
| 653 | DoCSE = false; |
| 654 | break; |
| 655 | } |
| 656 | |
| 657 | // Don't perform CSE if the result of the new instruction cannot exist |
| 658 | // within the constraints (register class, bank, or low-level type) of |
| 659 | // the old instruction. |
| 660 | if (!MRI->constrainRegAttrs(Reg: NewReg, ConstrainingReg: OldReg)) { |
| 661 | LLVM_DEBUG( |
| 662 | dbgs() << "*** Not the same register constraints, avoid CSE!\n" ); |
| 663 | DoCSE = false; |
| 664 | break; |
| 665 | } |
| 666 | |
| 667 | CSEPairs.emplace_back(Args&: OldReg, Args&: NewReg); |
| 668 | --NumDefs; |
| 669 | } |
| 670 | |
| 671 | // Actually perform the elimination. |
| 672 | if (DoCSE) { |
| 673 | for (const std::pair<Register, Register> &CSEPair : CSEPairs) { |
| 674 | Register OldReg = CSEPair.first; |
| 675 | Register NewReg = CSEPair.second; |
| 676 | // OldReg may have been unused but is used now, clear the Dead flag |
| 677 | MachineInstr *Def = MRI->getUniqueVRegDef(Reg: NewReg); |
| 678 | assert(Def != nullptr && "CSEd register has no unique definition?" ); |
| 679 | Def->clearRegisterDeads(Reg: NewReg); |
| 680 | // Replace with NewReg and clear kill flags which may be wrong now. |
| 681 | MRI->replaceRegWith(FromReg: OldReg, ToReg: NewReg); |
| 682 | MRI->clearKillFlags(Reg: NewReg); |
| 683 | } |
| 684 | |
| 685 | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
| 686 | // we should make sure it is not dead at CSMI. |
| 687 | for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) |
| 688 | CSMI->getOperand(i: ImplicitDefToUpdate).setIsDead(false); |
| 689 | for (const auto &PhysDef : PhysDefs) |
| 690 | if (!MI.getOperand(i: PhysDef.first).isDead()) |
| 691 | CSMI->getOperand(i: PhysDef.first).setIsDead(false); |
| 692 | |
| 693 | // Go through implicit defs of CSMI and MI, and clear the kill flags on |
| 694 | // their uses in all the instructions between CSMI and MI. |
| 695 | // We might have made some of the kill flags redundant, consider: |
| 696 | // subs ... implicit-def %nzcv <- CSMI |
| 697 | // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore |
| 698 | // subs ... implicit-def %nzcv <- MI, to be eliminated |
| 699 | // csinc ... implicit killed %nzcv |
| 700 | // Since we eliminated MI, and reused a register imp-def'd by CSMI |
| 701 | // (here %nzcv), that register, if it was killed before MI, should have |
| 702 | // that kill flag removed, because it's lifetime was extended. |
| 703 | if (CSMI->getParent() == MI.getParent()) { |
| 704 | for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) |
| 705 | for (auto ImplicitDef : ImplicitDefs) |
| 706 | if (MachineOperand *MO = II->findRegisterUseOperand( |
| 707 | Reg: ImplicitDef, TRI, /*isKill=*/true)) |
| 708 | MO->setIsKill(false); |
| 709 | } else { |
| 710 | // If the instructions aren't in the same BB, bail out and clear the |
| 711 | // kill flag on all uses of the imp-def'd register. |
| 712 | for (auto ImplicitDef : ImplicitDefs) |
| 713 | MRI->clearKillFlags(Reg: ImplicitDef); |
| 714 | } |
| 715 | |
| 716 | if (CrossMBBPhysDef) { |
| 717 | // Add physical register defs now coming in from a predecessor to MBB |
| 718 | // livein list. |
| 719 | while (!PhysDefs.empty()) { |
| 720 | auto LiveIn = PhysDefs.pop_back_val(); |
| 721 | if (!MBB->isLiveIn(Reg: LiveIn.second)) |
| 722 | MBB->addLiveIn(PhysReg: LiveIn.second); |
| 723 | } |
| 724 | ++NumCrossBBCSEs; |
| 725 | } |
| 726 | |
| 727 | MI.eraseFromParent(); |
| 728 | ++NumCSEs; |
| 729 | if (!PhysRefs.empty()) |
| 730 | ++NumPhysCSEs; |
| 731 | if (Commuted) |
| 732 | ++NumCommutes; |
| 733 | Changed = true; |
| 734 | } else { |
| 735 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 736 | Exps.push_back(Elt: &MI); |
| 737 | } |
| 738 | CSEPairs.clear(); |
| 739 | ImplicitDefsToUpdate.clear(); |
| 740 | ImplicitDefs.clear(); |
| 741 | } |
| 742 | |
| 743 | return Changed; |
| 744 | } |
| 745 | |
| 746 | /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given |
| 747 | /// dominator tree node if its a leaf or all of its children are done. Walk |
| 748 | /// up the dominator tree to destroy ancestors which are now done. |
| 749 | void MachineCSEImpl::ExitScopeIfDone( |
| 750 | MachineDomTreeNode *Node, |
| 751 | DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) { |
| 752 | if (OpenChildren[Node]) |
| 753 | return; |
| 754 | |
| 755 | // Pop scope. |
| 756 | ExitScope(MBB: Node->getBlock()); |
| 757 | |
| 758 | // Now traverse upwards to pop ancestors whose offsprings are all done. |
| 759 | while (MachineDomTreeNode *Parent = Node->getIDom()) { |
| 760 | unsigned Left = --OpenChildren[Parent]; |
| 761 | if (Left != 0) |
| 762 | break; |
| 763 | ExitScope(MBB: Parent->getBlock()); |
| 764 | Node = Parent; |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) { |
| 769 | SmallVector<MachineDomTreeNode*, 32> Scopes; |
| 770 | SmallVector<MachineDomTreeNode*, 8> WorkList; |
| 771 | DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; |
| 772 | |
| 773 | CurrVN = 0; |
| 774 | |
| 775 | // Perform a DFS walk to determine the order of visit. |
| 776 | WorkList.push_back(Elt: Node); |
| 777 | do { |
| 778 | Node = WorkList.pop_back_val(); |
| 779 | Scopes.push_back(Elt: Node); |
| 780 | OpenChildren[Node] = Node->getNumChildren(); |
| 781 | append_range(C&: WorkList, R: Node->children()); |
| 782 | } while (!WorkList.empty()); |
| 783 | |
| 784 | // Now perform CSE. |
| 785 | bool Changed = false; |
| 786 | for (MachineDomTreeNode *Node : Scopes) { |
| 787 | MachineBasicBlock *MBB = Node->getBlock(); |
| 788 | EnterScope(MBB); |
| 789 | Changed |= ProcessBlockCSE(MBB); |
| 790 | // If it's a leaf node, it's done. Traverse upwards to pop ancestors. |
| 791 | ExitScopeIfDone(Node, OpenChildren); |
| 792 | } |
| 793 | |
| 794 | return Changed; |
| 795 | } |
| 796 | |
| 797 | // We use stronger checks for PRE candidate rather than for CSE ones to embrace |
| 798 | // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps |
| 799 | // to exclude instrs created by PRE that won't be CSEed later. |
| 800 | bool MachineCSEImpl::isPRECandidate(MachineInstr *MI, |
| 801 | SmallSet<MCRegister, 8> &PhysRefs) { |
| 802 | if (!isCSECandidate(MI) || |
| 803 | MI->isNotDuplicable() || |
| 804 | MI->mayLoad() || |
| 805 | TII->isAsCheapAsAMove(MI: *MI) || |
| 806 | MI->getNumDefs() != 1 || |
| 807 | MI->getNumExplicitDefs() != 1) |
| 808 | return false; |
| 809 | |
| 810 | for (const MachineOperand &MO : MI->operands()) { |
| 811 | if (MO.isReg() && !MO.getReg().isVirtual()) { |
| 812 | if (MO.isDef()) |
| 813 | return false; |
| 814 | else |
| 815 | PhysRefs.insert(V: MO.getReg()); |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | return true; |
| 820 | } |
| 821 | |
| 822 | bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT, |
| 823 | MachineBasicBlock *MBB) { |
| 824 | bool Changed = false; |
| 825 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) { |
| 826 | SmallSet<MCRegister, 8> PhysRefs; |
| 827 | if (!isPRECandidate(MI: &MI, PhysRefs)) |
| 828 | continue; |
| 829 | |
| 830 | auto [It, Inserted] = PREMap.try_emplace(Key: &MI, Args&: MBB); |
| 831 | if (Inserted) |
| 832 | continue; |
| 833 | |
| 834 | auto *MBB1 = It->second; |
| 835 | assert( |
| 836 | !DT->properlyDominates(MBB, MBB1) && |
| 837 | "MBB cannot properly dominate MBB1 while DFS through dominators tree!" ); |
| 838 | auto CMBB = DT->findNearestCommonDominator(A: MBB, B: MBB1); |
| 839 | if (!CMBB->isLegalToHoistInto()) |
| 840 | continue; |
| 841 | |
| 842 | if (!isProfitableToHoistInto(CandidateBB: CMBB, MBB, MBB1)) |
| 843 | continue; |
| 844 | |
| 845 | // Two instrs are partial redundant if their basic blocks are reachable |
| 846 | // from one to another but one doesn't dominate another. |
| 847 | if (CMBB != MBB1) { |
| 848 | auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); |
| 849 | if (BB != nullptr && BB1 != nullptr && |
| 850 | (isPotentiallyReachable(From: BB1, To: BB) || |
| 851 | isPotentiallyReachable(From: BB, To: BB1))) { |
| 852 | // The following check extends the definition of `isConvergent` to |
| 853 | // assume a convergent instruction is dependent not only on additional |
| 854 | // conditions, but also on fewer conditions. LLVM does not have a |
| 855 | // MachineInstr attribute which expresses this extended definition, so |
| 856 | // it's necessary to use `isConvergent` to prevent illegally PRE-ing the |
| 857 | // subset of `isConvergent` instructions which do fall into this |
| 858 | // extended definition. |
| 859 | if (MI.isConvergent() && CMBB != MBB) |
| 860 | continue; |
| 861 | |
| 862 | // If this instruction uses physical registers then we can only do PRE |
| 863 | // if it's using the value that is live at the place we're hoisting to. |
| 864 | bool NonLocal; |
| 865 | PhysDefVector PhysDefs; |
| 866 | if (!PhysRefs.empty() && |
| 867 | !PhysRegDefsReach(CSMI: &*(CMBB->getFirstTerminator()), MI: &MI, PhysRefs, |
| 868 | PhysDefs, NonLocal)) |
| 869 | continue; |
| 870 | |
| 871 | assert(MI.getOperand(0).isDef() && |
| 872 | "First operand of instr with one explicit def must be this def" ); |
| 873 | Register VReg = MI.getOperand(i: 0).getReg(); |
| 874 | Register NewReg = MRI->cloneVirtualRegister(VReg); |
| 875 | if (!isProfitableToCSE(CSReg: NewReg, Reg: VReg, CSBB: CMBB, MI: &MI)) |
| 876 | continue; |
| 877 | MachineInstr &NewMI = |
| 878 | TII->duplicate(MBB&: *CMBB, InsertBefore: CMBB->getFirstTerminator(), Orig: MI); |
| 879 | |
| 880 | // When hoisting, make sure we don't carry the debug location of |
| 881 | // the original instruction, as that's not correct and can cause |
| 882 | // unexpected jumps when debugging optimized code. |
| 883 | auto EmptyDL = DebugLoc(); |
| 884 | NewMI.setDebugLoc(EmptyDL); |
| 885 | |
| 886 | NewMI.getOperand(i: 0).setReg(NewReg); |
| 887 | |
| 888 | PREMap[&MI] = CMBB; |
| 889 | ++NumPREs; |
| 890 | Changed = true; |
| 891 | } |
| 892 | } |
| 893 | } |
| 894 | return Changed; |
| 895 | } |
| 896 | |
| 897 | // This simple PRE (partial redundancy elimination) pass doesn't actually |
| 898 | // eliminate partial redundancy but transforms it to full redundancy, |
| 899 | // anticipating that the next CSE step will eliminate this created redundancy. |
| 900 | // If CSE doesn't eliminate this, than created instruction will remain dead |
| 901 | // and eliminated later by Remove Dead Machine Instructions pass. |
| 902 | bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) { |
| 903 | SmallVector<MachineDomTreeNode *, 32> BBs; |
| 904 | |
| 905 | PREMap.clear(); |
| 906 | bool Changed = false; |
| 907 | BBs.push_back(Elt: DT->getRootNode()); |
| 908 | do { |
| 909 | auto Node = BBs.pop_back_val(); |
| 910 | append_range(C&: BBs, R: Node->children()); |
| 911 | |
| 912 | MachineBasicBlock *MBB = Node->getBlock(); |
| 913 | Changed |= ProcessBlockPRE(DT, MBB); |
| 914 | |
| 915 | } while (!BBs.empty()); |
| 916 | |
| 917 | return Changed; |
| 918 | } |
| 919 | |
| 920 | bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, |
| 921 | MachineBasicBlock *MBB, |
| 922 | MachineBasicBlock *MBB1) { |
| 923 | if (CandidateBB->getParent()->getFunction().hasMinSize()) |
| 924 | return true; |
| 925 | assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB" ); |
| 926 | assert(DT->dominates(CandidateBB, MBB1) && |
| 927 | "CandidateBB should dominate MBB1" ); |
| 928 | return MBFI->getBlockFreq(MBB: CandidateBB) <= |
| 929 | MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB: MBB1); |
| 930 | } |
| 931 | |
| 932 | void MachineCSEImpl::releaseMemory() { |
| 933 | ScopeMap.clear(); |
| 934 | PREMap.clear(); |
| 935 | Exps.clear(); |
| 936 | } |
| 937 | |
| 938 | bool MachineCSEImpl::run(MachineFunction &MF) { |
| 939 | TII = MF.getSubtarget().getInstrInfo(); |
| 940 | TRI = MF.getSubtarget().getRegisterInfo(); |
| 941 | MRI = &MF.getRegInfo(); |
| 942 | LookAheadLimit = TII->getMachineCSELookAheadLimit(); |
| 943 | bool ChangedPRE, ChangedCSE; |
| 944 | ChangedPRE = PerformSimplePRE(DT); |
| 945 | ChangedCSE = PerformCSE(Node: DT->getRootNode()); |
| 946 | releaseMemory(); |
| 947 | return ChangedPRE || ChangedCSE; |
| 948 | } |
| 949 | |
| 950 | PreservedAnalyses MachineCSEPass::run(MachineFunction &MF, |
| 951 | MachineFunctionAnalysisManager &MFAM) { |
| 952 | MFPropsModifier _(*this, MF); |
| 953 | |
| 954 | MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
| 955 | MachineBlockFrequencyInfo &MBFI = |
| 956 | MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF); |
| 957 | MachineCSEImpl Impl(&MDT, &MBFI); |
| 958 | bool Changed = Impl.run(MF); |
| 959 | if (!Changed) |
| 960 | return PreservedAnalyses::all(); |
| 961 | |
| 962 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
| 963 | PA.preserve<MachineLoopAnalysis>(); |
| 964 | PA.preserve<MachineDominatorTreeAnalysis>(); |
| 965 | PA.preserve<MachineBlockFrequencyAnalysis>(); |
| 966 | PA.preserveSet<CFGAnalyses>(); |
| 967 | return PA; |
| 968 | } |
| 969 | |
| 970 | bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) { |
| 971 | if (skipFunction(F: MF.getFunction())) |
| 972 | return false; |
| 973 | |
| 974 | MachineDominatorTree &MDT = |
| 975 | getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
| 976 | MachineBlockFrequencyInfo &MBFI = |
| 977 | getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); |
| 978 | MachineCSEImpl Impl(&MDT, &MBFI); |
| 979 | return Impl.run(MF); |
| 980 | } |
| 981 | |