| 1 | //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass performs global common subexpression elimination on machine |
| 10 | // instructions using a scoped hash table based value numbering scheme. It |
| 11 | // must be run while the machine function is still in SSA form. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/CodeGen/MachineCSE.h" |
| 16 | #include "llvm/ADT/DenseMap.h" |
| 17 | #include "llvm/ADT/ScopedHashTable.h" |
| 18 | #include "llvm/ADT/SmallPtrSet.h" |
| 19 | #include "llvm/ADT/SmallSet.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/Statistic.h" |
| 22 | #include "llvm/Analysis/CFG.h" |
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 24 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| 25 | #include "llvm/CodeGen/MachineDominators.h" |
| 26 | #include "llvm/CodeGen/MachineFunction.h" |
| 27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 28 | #include "llvm/CodeGen/MachineInstr.h" |
| 29 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 30 | #include "llvm/CodeGen/MachineOperand.h" |
| 31 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 32 | #include "llvm/CodeGen/Passes.h" |
| 33 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 34 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 36 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 37 | #include "llvm/InitializePasses.h" |
| 38 | #include "llvm/MC/MCRegister.h" |
| 39 | #include "llvm/MC/MCRegisterInfo.h" |
| 40 | #include "llvm/Pass.h" |
| 41 | #include "llvm/Support/Allocator.h" |
| 42 | #include "llvm/Support/Debug.h" |
| 43 | #include "llvm/Support/RecyclingAllocator.h" |
| 44 | #include "llvm/Support/raw_ostream.h" |
| 45 | #include <cassert> |
| 46 | #include <iterator> |
| 47 | #include <utility> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | |
| 51 | #define DEBUG_TYPE "machine-cse" |
| 52 | |
| 53 | STATISTIC(NumCoalesces, "Number of copies coalesced" ); |
| 54 | STATISTIC(NumCSEs, "Number of common subexpression eliminated" ); |
| 55 | STATISTIC(NumPREs, "Number of partial redundant expression" |
| 56 | " transformed to fully redundant" ); |
| 57 | STATISTIC(NumPhysCSEs, |
| 58 | "Number of physreg referencing common subexpr eliminated" ); |
| 59 | STATISTIC(NumCrossBBCSEs, |
| 60 | "Number of cross-MBB physreg referencing CS eliminated" ); |
| 61 | STATISTIC(NumCommutes, "Number of copies coalesced after commuting" ); |
| 62 | |
| 63 | // Threshold to avoid excessive cost to compute isProfitableToCSE. |
| 64 | static cl::opt<int> |
| 65 | CSUsesThreshold("csuses-threshold" , cl::Hidden, cl::init(Val: 1024), |
| 66 | cl::desc("Threshold for the size of CSUses" )); |
| 67 | |
| 68 | static cl::opt<bool> AggressiveMachineCSE( |
| 69 | "aggressive-machine-cse" , cl::Hidden, cl::init(Val: false), |
| 70 | cl::desc("Override the profitability heuristics for Machine CSE" )); |
| 71 | |
| 72 | namespace { |
| 73 | |
| 74 | class MachineCSEImpl { |
| 75 | const TargetInstrInfo *TII = nullptr; |
| 76 | const TargetRegisterInfo *TRI = nullptr; |
| 77 | MachineDominatorTree *DT = nullptr; |
| 78 | MachineRegisterInfo *MRI = nullptr; |
| 79 | MachineBlockFrequencyInfo *MBFI = nullptr; |
| 80 | |
| 81 | public: |
| 82 | MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI) |
| 83 | : DT(DT), MBFI(MBFI) {} |
| 84 | bool run(MachineFunction &MF); |
| 85 | |
| 86 | private: |
| 87 | using AllocatorTy = |
| 88 | RecyclingAllocator<BumpPtrAllocator, |
| 89 | ScopedHashTableVal<MachineInstr *, unsigned>>; |
| 90 | using ScopedHTType = |
| 91 | ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, |
| 92 | AllocatorTy>; |
| 93 | using ScopeType = ScopedHTType::ScopeTy; |
| 94 | using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>; |
| 95 | |
| 96 | unsigned LookAheadLimit = 0; |
| 97 | DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; |
| 98 | DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> |
| 99 | PREMap; |
| 100 | ScopedHTType VNT; |
| 101 | SmallVector<MachineInstr *, 64> Exps; |
| 102 | unsigned CurrVN = 0; |
| 103 | |
| 104 | bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); |
| 105 | bool isPhysDefTriviallyDead(MCRegister Reg, |
| 106 | MachineBasicBlock::const_iterator I, |
| 107 | MachineBasicBlock::const_iterator E) const; |
| 108 | bool hasLivePhysRegDefUses(const MachineInstr *MI, |
| 109 | const MachineBasicBlock *MBB, |
| 110 | SmallSet<MCRegister, 8> &PhysRefs, |
| 111 | PhysDefVector &PhysDefs, bool &PhysUseDef) const; |
| 112 | bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
| 113 | const SmallSet<MCRegister, 8> &PhysRefs, |
| 114 | const PhysDefVector &PhysDefs, bool &NonLocal) const; |
| 115 | bool isCSECandidate(MachineInstr *MI); |
| 116 | bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, |
| 117 | MachineInstr *MI); |
| 118 | void EnterScope(MachineBasicBlock *MBB); |
| 119 | void ExitScope(MachineBasicBlock *MBB); |
| 120 | bool ProcessBlockCSE(MachineBasicBlock *MBB); |
| 121 | void ExitScopeIfDone(MachineDomTreeNode *Node, |
| 122 | DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren); |
| 123 | bool PerformCSE(MachineDomTreeNode *Node); |
| 124 | |
| 125 | bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); |
| 126 | bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); |
| 127 | bool PerformSimplePRE(MachineDominatorTree *DT); |
| 128 | /// Heuristics to see if it's profitable to move common computations of MBB |
| 129 | /// and MBB1 to CandidateBB. |
| 130 | bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, |
| 131 | MachineBasicBlock *MBB, MachineBasicBlock *MBB1); |
| 132 | void releaseMemory(); |
| 133 | }; |
| 134 | |
| 135 | class MachineCSELegacy : public MachineFunctionPass { |
| 136 | public: |
| 137 | static char ID; // Pass identification |
| 138 | |
| 139 | MachineCSELegacy() : MachineFunctionPass(ID) {} |
| 140 | |
| 141 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 142 | |
| 143 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 144 | AU.setPreservesCFG(); |
| 145 | MachineFunctionPass::getAnalysisUsage(AU); |
| 146 | AU.addPreservedID(ID&: MachineLoopInfoID); |
| 147 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| 148 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
| 149 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
| 150 | AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); |
| 151 | } |
| 152 | |
| 153 | MachineFunctionProperties getRequiredProperties() const override { |
| 154 | return MachineFunctionProperties().setIsSSA(); |
| 155 | } |
| 156 | }; |
| 157 | } // end anonymous namespace |
| 158 | |
| 159 | char MachineCSELegacy::ID = 0; |
| 160 | |
| 161 | char &llvm::MachineCSELegacyID = MachineCSELegacy::ID; |
| 162 | |
| 163 | INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE, |
| 164 | "Machine Common Subexpression Elimination" , false, false) |
| 165 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
| 166 | INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE, |
| 167 | "Machine Common Subexpression Elimination" , false, false) |
| 168 | |
| 169 | /// The source register of a COPY machine instruction can be propagated to all |
| 170 | /// its users, and this propagation could increase the probability of finding |
| 171 | /// common subexpressions. If the COPY has only one user, the COPY itself can |
| 172 | /// be removed. |
| 173 | bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI, |
| 174 | MachineBasicBlock *MBB) { |
| 175 | bool Changed = false; |
| 176 | for (MachineOperand &MO : MI->all_uses()) { |
| 177 | Register Reg = MO.getReg(); |
| 178 | if (!Reg.isVirtual()) |
| 179 | continue; |
| 180 | bool OnlyOneUse = MRI->hasOneNonDBGUse(RegNo: Reg); |
| 181 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
| 182 | if (!DefMI || !DefMI->isCopy()) |
| 183 | continue; |
| 184 | Register SrcReg = DefMI->getOperand(i: 1).getReg(); |
| 185 | if (!SrcReg.isVirtual()) |
| 186 | continue; |
| 187 | // FIXME: We should trivially coalesce subregister copies to expose CSE |
| 188 | // opportunities on instructions with truncated operands (see |
| 189 | // cse-add-with-overflow.ll). This can be done here as follows: |
| 190 | // if (SrcSubReg) |
| 191 | // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, |
| 192 | // SrcSubReg); |
| 193 | // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); |
| 194 | // |
| 195 | // The 2-addr pass has been updated to handle coalesced subregs. However, |
| 196 | // some machine-specific code still can't handle it. |
| 197 | // To handle it properly we also need a way find a constrained subregister |
| 198 | // class given a super-reg class and subreg index. |
| 199 | if (DefMI->getOperand(i: 1).getSubReg()) |
| 200 | continue; |
| 201 | if (!MRI->constrainRegAttrs(Reg: SrcReg, ConstrainingReg: Reg)) |
| 202 | continue; |
| 203 | LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); |
| 204 | LLVM_DEBUG(dbgs() << "*** to: " << *MI); |
| 205 | |
| 206 | // Propagate SrcReg of copies to MI. |
| 207 | MO.setReg(SrcReg); |
| 208 | MRI->clearKillFlags(Reg: SrcReg); |
| 209 | // Coalesce single use copies. |
| 210 | if (OnlyOneUse) { |
| 211 | // If (and only if) we've eliminated all uses of the copy, also |
| 212 | // copy-propagate to any debug-users of MI, or they'll be left using |
| 213 | // an undefined value. |
| 214 | DefMI->changeDebugValuesDefReg(Reg: SrcReg); |
| 215 | |
| 216 | DefMI->eraseFromParent(); |
| 217 | ++NumCoalesces; |
| 218 | } |
| 219 | Changed = true; |
| 220 | } |
| 221 | |
| 222 | return Changed; |
| 223 | } |
| 224 | |
| 225 | bool MachineCSEImpl::isPhysDefTriviallyDead( |
| 226 | MCRegister Reg, MachineBasicBlock::const_iterator I, |
| 227 | MachineBasicBlock::const_iterator E) const { |
| 228 | unsigned LookAheadLeft = LookAheadLimit; |
| 229 | while (LookAheadLeft) { |
| 230 | // Skip over dbg_value's. |
| 231 | I = skipDebugInstructionsForward(It: I, End: E); |
| 232 | |
| 233 | if (I == E) |
| 234 | // Reached end of block, we don't know if register is dead or not. |
| 235 | return false; |
| 236 | |
| 237 | bool SeenDef = false; |
| 238 | for (const MachineOperand &MO : I->operands()) { |
| 239 | if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: Reg)) |
| 240 | SeenDef = true; |
| 241 | if (!MO.isReg() || !MO.getReg()) |
| 242 | continue; |
| 243 | if (!TRI->regsOverlap(RegA: MO.getReg(), RegB: Reg)) |
| 244 | continue; |
| 245 | if (MO.isUse()) |
| 246 | // Found a use! |
| 247 | return false; |
| 248 | SeenDef = true; |
| 249 | } |
| 250 | if (SeenDef) |
| 251 | // See a def of Reg (or an alias) before encountering any use, it's |
| 252 | // trivially dead. |
| 253 | return true; |
| 254 | |
| 255 | --LookAheadLeft; |
| 256 | ++I; |
| 257 | } |
| 258 | return false; |
| 259 | } |
| 260 | |
| 261 | static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, |
| 262 | const MachineOperand &MO, |
| 263 | const MachineFunction &MF, |
| 264 | const TargetRegisterInfo &TRI, |
| 265 | const TargetInstrInfo &TII) { |
| 266 | // MachineRegisterInfo::isConstantPhysReg directly called by |
| 267 | // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the |
| 268 | // reserved registers to be frozen. That doesn't cause a problem post-ISel as |
| 269 | // most (if not all) targets freeze reserved registers right after ISel. |
| 270 | // |
| 271 | // It does cause issues mid-GlobalISel, however, hence the additional |
| 272 | // reservedRegsFrozen check. |
| 273 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 274 | return TRI.isCallerPreservedPhysReg(PhysReg: Reg, MF) || TII.isIgnorableUse(MO) || |
| 275 | (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(PhysReg: Reg)); |
| 276 | } |
| 277 | |
| 278 | /// hasLivePhysRegDefUses - Return true if the specified instruction read/write |
| 279 | /// physical registers (except for dead defs of physical registers). It also |
| 280 | /// returns the physical register def by reference if it's the only one and the |
| 281 | /// instruction does not uses a physical register. |
| 282 | bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI, |
| 283 | const MachineBasicBlock *MBB, |
| 284 | SmallSet<MCRegister, 8> &PhysRefs, |
| 285 | PhysDefVector &PhysDefs, |
| 286 | bool &PhysUseDef) const { |
| 287 | // First, add all uses to PhysRefs. |
| 288 | for (const MachineOperand &MO : MI->all_uses()) { |
| 289 | Register Reg = MO.getReg(); |
| 290 | if (!Reg) |
| 291 | continue; |
| 292 | if (Reg.isVirtual()) |
| 293 | continue; |
| 294 | // Reading either caller preserved or constant physregs is ok. |
| 295 | if (!isCallerPreservedOrConstPhysReg(Reg: Reg.asMCReg(), MO, MF: *MI->getMF(), TRI: *TRI, |
| 296 | TII: *TII)) |
| 297 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
| 298 | PhysRefs.insert(V: *AI); |
| 299 | } |
| 300 | |
| 301 | // Next, collect all defs into PhysDefs. If any is already in PhysRefs |
| 302 | // (which currently contains only uses), set the PhysUseDef flag. |
| 303 | PhysUseDef = false; |
| 304 | MachineBasicBlock::const_iterator I = MI; I = std::next(x: I); |
| 305 | for (const auto &MOP : llvm::enumerate(First: MI->operands())) { |
| 306 | const MachineOperand &MO = MOP.value(); |
| 307 | if (!MO.isReg() || !MO.isDef()) |
| 308 | continue; |
| 309 | Register Reg = MO.getReg(); |
| 310 | if (!Reg) |
| 311 | continue; |
| 312 | if (Reg.isVirtual()) |
| 313 | continue; |
| 314 | // Check against PhysRefs even if the def is "dead". |
| 315 | if (PhysRefs.count(V: Reg.asMCReg())) |
| 316 | PhysUseDef = true; |
| 317 | // If the def is dead, it's ok. But the def may not marked "dead". That's |
| 318 | // common since this pass is run before livevariables. We can scan |
| 319 | // forward a few instructions and check if it is obviously dead. |
| 320 | if (!MO.isDead() && !isPhysDefTriviallyDead(Reg: Reg.asMCReg(), I, E: MBB->end())) |
| 321 | PhysDefs.emplace_back(Args: MOP.index(), Args&: Reg); |
| 322 | } |
| 323 | |
| 324 | // Finally, add all defs to PhysRefs as well. |
| 325 | for (const auto &Def : PhysDefs) |
| 326 | for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI) |
| 327 | PhysRefs.insert(V: *AI); |
| 328 | |
| 329 | return !PhysRefs.empty(); |
| 330 | } |
| 331 | |
| 332 | bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
| 333 | const SmallSet<MCRegister, 8> &PhysRefs, |
| 334 | const PhysDefVector &PhysDefs, |
| 335 | bool &NonLocal) const { |
| 336 | // For now conservatively returns false if the common subexpression is |
| 337 | // not in the same basic block as the given instruction. The only exception |
| 338 | // is if the common subexpression is in the sole predecessor block. |
| 339 | const MachineBasicBlock *MBB = MI->getParent(); |
| 340 | const MachineBasicBlock *CSMBB = CSMI->getParent(); |
| 341 | |
| 342 | bool CrossMBB = false; |
| 343 | if (CSMBB != MBB) { |
| 344 | if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) |
| 345 | return false; |
| 346 | |
| 347 | for (const auto &PhysDef : PhysDefs) { |
| 348 | if (MRI->isAllocatable(PhysReg: PhysDef.second) || MRI->isReserved(PhysReg: PhysDef.second)) |
| 349 | // Avoid extending live range of physical registers if they are |
| 350 | //allocatable or reserved. |
| 351 | return false; |
| 352 | } |
| 353 | CrossMBB = true; |
| 354 | } |
| 355 | MachineBasicBlock::const_iterator I = CSMI; I = std::next(x: I); |
| 356 | MachineBasicBlock::const_iterator E = MI; |
| 357 | MachineBasicBlock::const_iterator EE = CSMBB->end(); |
| 358 | unsigned LookAheadLeft = LookAheadLimit; |
| 359 | while (LookAheadLeft) { |
| 360 | // Skip over dbg_value's. |
| 361 | while (I != E && I != EE && I->isDebugInstr()) |
| 362 | ++I; |
| 363 | |
| 364 | if (I == EE) { |
| 365 | assert(CrossMBB && "Reaching end-of-MBB without finding MI?" ); |
| 366 | (void)CrossMBB; |
| 367 | CrossMBB = false; |
| 368 | NonLocal = true; |
| 369 | I = MBB->begin(); |
| 370 | EE = MBB->end(); |
| 371 | continue; |
| 372 | } |
| 373 | |
| 374 | if (I == E) |
| 375 | return true; |
| 376 | |
| 377 | for (const MachineOperand &MO : I->operands()) { |
| 378 | // RegMasks go on instructions like calls that clobber lots of physregs. |
| 379 | // Don't attempt to CSE across such an instruction. |
| 380 | if (MO.isRegMask()) |
| 381 | return false; |
| 382 | if (!MO.isReg() || !MO.isDef()) |
| 383 | continue; |
| 384 | Register MOReg = MO.getReg(); |
| 385 | if (MOReg.isVirtual()) |
| 386 | continue; |
| 387 | if (PhysRefs.count(V: MOReg.asMCReg())) |
| 388 | return false; |
| 389 | } |
| 390 | |
| 391 | --LookAheadLeft; |
| 392 | ++I; |
| 393 | } |
| 394 | |
| 395 | return false; |
| 396 | } |
| 397 | |
| 398 | bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) { |
| 399 | if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || |
| 400 | MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || |
| 401 | MI->isFakeUse()) |
| 402 | return false; |
| 403 | |
| 404 | // Ignore copies. |
| 405 | if (MI->isCopyLike()) |
| 406 | return false; |
| 407 | |
| 408 | // Ignore stuff that we obviously can't move. |
| 409 | if (MI->mayStore() || MI->isCall() || MI->isTerminator() || |
| 410 | MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) |
| 411 | return false; |
| 412 | |
| 413 | if (MI->mayLoad()) { |
| 414 | // Okay, this instruction does a load. As a refinement, we allow the target |
| 415 | // to decide whether the loaded value is actually a constant. If so, we can |
| 416 | // actually use it as a load. |
| 417 | if (!MI->isDereferenceableInvariantLoad()) |
| 418 | // FIXME: we should be able to hoist loads with no other side effects if |
| 419 | // there are no other instructions which can change memory in this loop. |
| 420 | // This is a trivial form of alias analysis. |
| 421 | return false; |
| 422 | } |
| 423 | |
| 424 | // Ignore stack guard loads, otherwise the register that holds CSEed value may |
| 425 | // be spilled and get loaded back with corrupted data. |
| 426 | if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) |
| 427 | return false; |
| 428 | |
| 429 | return true; |
| 430 | } |
| 431 | |
| 432 | /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a |
| 433 | /// common expression that defines Reg. CSBB is basic block where CSReg is |
| 434 | /// defined. |
| 435 | bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg, |
| 436 | MachineBasicBlock *CSBB, |
| 437 | MachineInstr *MI) { |
| 438 | if (AggressiveMachineCSE) |
| 439 | return true; |
| 440 | |
| 441 | // FIXME: Heuristics that works around the lack the live range splitting. |
| 442 | |
| 443 | // If CSReg is used at all uses of Reg, CSE should not increase register |
| 444 | // pressure of CSReg. |
| 445 | bool MayIncreasePressure = true; |
| 446 | if (CSReg.isVirtual() && Reg.isVirtual()) { |
| 447 | MayIncreasePressure = false; |
| 448 | SmallPtrSet<MachineInstr*, 8> CSUses; |
| 449 | int NumOfUses = 0; |
| 450 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg: CSReg)) { |
| 451 | CSUses.insert(Ptr: &MI); |
| 452 | // Too costly to compute if NumOfUses is very large. Conservatively assume |
| 453 | // MayIncreasePressure to avoid spending too much time here. |
| 454 | if (++NumOfUses > CSUsesThreshold) { |
| 455 | MayIncreasePressure = true; |
| 456 | break; |
| 457 | } |
| 458 | } |
| 459 | if (!MayIncreasePressure) |
| 460 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
| 461 | if (!CSUses.count(Ptr: &MI)) { |
| 462 | MayIncreasePressure = true; |
| 463 | break; |
| 464 | } |
| 465 | } |
| 466 | } |
| 467 | if (!MayIncreasePressure) return true; |
| 468 | |
| 469 | // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in |
| 470 | // an immediate predecessor. We don't want to increase register pressure and |
| 471 | // end up causing other computation to be spilled. |
| 472 | if (TII->isAsCheapAsAMove(MI: *MI)) { |
| 473 | MachineBasicBlock *BB = MI->getParent(); |
| 474 | if (CSBB != BB && !CSBB->isSuccessor(MBB: BB)) |
| 475 | return false; |
| 476 | } |
| 477 | |
| 478 | // Heuristics #2: If the expression doesn't not use a vr and the only use |
| 479 | // of the redundant computation are copies, do not cse. |
| 480 | bool HasVRegUse = false; |
| 481 | for (const MachineOperand &MO : MI->all_uses()) { |
| 482 | if (MO.getReg().isVirtual()) { |
| 483 | HasVRegUse = true; |
| 484 | break; |
| 485 | } |
| 486 | } |
| 487 | if (!HasVRegUse) { |
| 488 | bool HasNonCopyUse = false; |
| 489 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
| 490 | // Ignore copies. |
| 491 | if (!MI.isCopyLike()) { |
| 492 | HasNonCopyUse = true; |
| 493 | break; |
| 494 | } |
| 495 | } |
| 496 | if (!HasNonCopyUse) |
| 497 | return false; |
| 498 | } |
| 499 | |
| 500 | // Heuristics #3: If the common subexpression is used by PHIs, do not reuse |
| 501 | // it unless the defined value is already used in the BB of the new use. |
| 502 | bool HasPHI = false; |
| 503 | for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg: CSReg)) { |
| 504 | HasPHI |= UseMI.isPHI(); |
| 505 | if (UseMI.getParent() == MI->getParent()) |
| 506 | return true; |
| 507 | } |
| 508 | |
| 509 | return !HasPHI; |
| 510 | } |
| 511 | |
| 512 | void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) { |
| 513 | LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); |
| 514 | ScopeType *Scope = new ScopeType(VNT); |
| 515 | ScopeMap[MBB] = Scope; |
| 516 | } |
| 517 | |
| 518 | void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) { |
| 519 | LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); |
| 520 | DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(Val: MBB); |
| 521 | assert(SI != ScopeMap.end()); |
| 522 | delete SI->second; |
| 523 | ScopeMap.erase(I: SI); |
| 524 | } |
| 525 | |
| 526 | bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) { |
| 527 | bool Changed = false; |
| 528 | |
| 529 | SmallVector<std::pair<Register, Register>, 8> CSEPairs; |
| 530 | SmallVector<unsigned, 2> ImplicitDefsToUpdate; |
| 531 | SmallVector<Register, 2> ImplicitDefs; |
| 532 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) { |
| 533 | if (!isCSECandidate(MI: &MI)) |
| 534 | continue; |
| 535 | |
| 536 | bool FoundCSE = VNT.count(Key: &MI); |
| 537 | if (!FoundCSE) { |
| 538 | // Using trivial copy propagation to find more CSE opportunities. |
| 539 | if (PerformTrivialCopyPropagation(MI: &MI, MBB)) { |
| 540 | Changed = true; |
| 541 | |
| 542 | // After coalescing MI itself may become a copy. |
| 543 | if (MI.isCopyLike()) |
| 544 | continue; |
| 545 | |
| 546 | // Try again to see if CSE is possible. |
| 547 | FoundCSE = VNT.count(Key: &MI); |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | // Commute commutable instructions. |
| 552 | bool Commuted = false; |
| 553 | if (!FoundCSE && MI.isCommutable()) { |
| 554 | if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { |
| 555 | Commuted = true; |
| 556 | FoundCSE = VNT.count(Key: NewMI); |
| 557 | if (NewMI != &MI) { |
| 558 | // New instruction. It doesn't need to be kept. |
| 559 | NewMI->eraseFromParent(); |
| 560 | Changed = true; |
| 561 | } else if (!FoundCSE) |
| 562 | // MI was changed but it didn't help, commute it back! |
| 563 | (void)TII->commuteInstruction(MI); |
| 564 | } |
| 565 | } |
| 566 | |
| 567 | // If the instruction defines physical registers and the values *may* be |
| 568 | // used, then it's not safe to replace it with a common subexpression. |
| 569 | // It's also not safe if the instruction uses physical registers. |
| 570 | bool CrossMBBPhysDef = false; |
| 571 | SmallSet<MCRegister, 8> PhysRefs; |
| 572 | PhysDefVector PhysDefs; |
| 573 | bool PhysUseDef = false; |
| 574 | if (FoundCSE && |
| 575 | hasLivePhysRegDefUses(MI: &MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { |
| 576 | FoundCSE = false; |
| 577 | |
| 578 | // ... Unless the CS is local or is in the sole predecessor block |
| 579 | // and it also defines the physical register which is not clobbered |
| 580 | // in between and the physical register uses were not clobbered. |
| 581 | // This can never be the case if the instruction both uses and |
| 582 | // defines the same physical register, which was detected above. |
| 583 | if (!PhysUseDef) { |
| 584 | unsigned CSVN = VNT.lookup(Key: &MI); |
| 585 | MachineInstr *CSMI = Exps[CSVN]; |
| 586 | if (PhysRegDefsReach(CSMI, MI: &MI, PhysRefs, PhysDefs, NonLocal&: CrossMBBPhysDef)) |
| 587 | FoundCSE = true; |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | if (!FoundCSE) { |
| 592 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 593 | Exps.push_back(Elt: &MI); |
| 594 | continue; |
| 595 | } |
| 596 | |
| 597 | // Found a common subexpression, eliminate it. |
| 598 | unsigned CSVN = VNT.lookup(Key: &MI); |
| 599 | MachineInstr *CSMI = Exps[CSVN]; |
| 600 | LLVM_DEBUG(dbgs() << "Examining: " << MI); |
| 601 | LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); |
| 602 | |
| 603 | // Prevent CSE-ing non-local convergent instructions. |
| 604 | // LLVM's current definition of `isConvergent` does not necessarily prove |
| 605 | // that non-local CSE is illegal. The following check extends the definition |
| 606 | // of `isConvergent` to assume a convergent instruction is dependent not |
| 607 | // only on additional conditions, but also on fewer conditions. LLVM does |
| 608 | // not have a MachineInstr attribute which expresses this extended |
| 609 | // definition, so it's necessary to use `isConvergent` to prevent illegally |
| 610 | // CSE-ing the subset of `isConvergent` instructions which do fall into this |
| 611 | // extended definition. |
| 612 | if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { |
| 613 | LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " |
| 614 | "different BBs, avoid CSE!\n" ); |
| 615 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 616 | Exps.push_back(Elt: &MI); |
| 617 | continue; |
| 618 | } |
| 619 | |
| 620 | // Check if it's profitable to perform this CSE. |
| 621 | bool DoCSE = true; |
| 622 | unsigned NumDefs = MI.getNumDefs(); |
| 623 | |
| 624 | for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { |
| 625 | MachineOperand &MO = MI.getOperand(i); |
| 626 | if (!MO.isReg() || !MO.isDef()) |
| 627 | continue; |
| 628 | Register OldReg = MO.getReg(); |
| 629 | Register NewReg = CSMI->getOperand(i).getReg(); |
| 630 | |
| 631 | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
| 632 | // we should make sure it is not dead at CSMI. |
| 633 | if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) |
| 634 | ImplicitDefsToUpdate.push_back(Elt: i); |
| 635 | |
| 636 | // Keep track of implicit defs of CSMI and MI, to clear possibly |
| 637 | // made-redundant kill flags. |
| 638 | if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) |
| 639 | ImplicitDefs.push_back(Elt: OldReg); |
| 640 | |
| 641 | if (OldReg == NewReg) { |
| 642 | --NumDefs; |
| 643 | continue; |
| 644 | } |
| 645 | |
| 646 | assert(OldReg.isVirtual() && NewReg.isVirtual() && |
| 647 | "Do not CSE physical register defs!" ); |
| 648 | |
| 649 | if (!isProfitableToCSE(CSReg: NewReg, Reg: OldReg, CSBB: CSMI->getParent(), MI: &MI)) { |
| 650 | LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n" ); |
| 651 | DoCSE = false; |
| 652 | break; |
| 653 | } |
| 654 | |
| 655 | // Don't perform CSE if the result of the new instruction cannot exist |
| 656 | // within the constraints (register class, bank, or low-level type) of |
| 657 | // the old instruction. |
| 658 | if (!MRI->constrainRegAttrs(Reg: NewReg, ConstrainingReg: OldReg)) { |
| 659 | LLVM_DEBUG( |
| 660 | dbgs() << "*** Not the same register constraints, avoid CSE!\n" ); |
| 661 | DoCSE = false; |
| 662 | break; |
| 663 | } |
| 664 | |
| 665 | CSEPairs.emplace_back(Args&: OldReg, Args&: NewReg); |
| 666 | --NumDefs; |
| 667 | } |
| 668 | |
| 669 | // Actually perform the elimination. |
| 670 | if (DoCSE) { |
| 671 | for (const std::pair<Register, Register> &CSEPair : CSEPairs) { |
| 672 | Register OldReg = CSEPair.first; |
| 673 | Register NewReg = CSEPair.second; |
| 674 | // OldReg may have been unused but is used now, clear the Dead flag |
| 675 | MachineInstr *Def = MRI->getUniqueVRegDef(Reg: NewReg); |
| 676 | assert(Def != nullptr && "CSEd register has no unique definition?" ); |
| 677 | Def->clearRegisterDeads(Reg: NewReg); |
| 678 | // Replace with NewReg and clear kill flags which may be wrong now. |
| 679 | MRI->replaceRegWith(FromReg: OldReg, ToReg: NewReg); |
| 680 | MRI->clearKillFlags(Reg: NewReg); |
| 681 | } |
| 682 | |
| 683 | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
| 684 | // we should make sure it is not dead at CSMI. |
| 685 | for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) |
| 686 | CSMI->getOperand(i: ImplicitDefToUpdate).setIsDead(false); |
| 687 | for (const auto &PhysDef : PhysDefs) |
| 688 | if (!MI.getOperand(i: PhysDef.first).isDead()) |
| 689 | CSMI->getOperand(i: PhysDef.first).setIsDead(false); |
| 690 | |
| 691 | // Go through implicit defs of CSMI and MI, and clear the kill flags on |
| 692 | // their uses in all the instructions between CSMI and MI. |
| 693 | // We might have made some of the kill flags redundant, consider: |
| 694 | // subs ... implicit-def %nzcv <- CSMI |
| 695 | // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore |
| 696 | // subs ... implicit-def %nzcv <- MI, to be eliminated |
| 697 | // csinc ... implicit killed %nzcv |
| 698 | // Since we eliminated MI, and reused a register imp-def'd by CSMI |
| 699 | // (here %nzcv), that register, if it was killed before MI, should have |
| 700 | // that kill flag removed, because it's lifetime was extended. |
| 701 | if (CSMI->getParent() == MI.getParent()) { |
| 702 | for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) |
| 703 | for (auto ImplicitDef : ImplicitDefs) |
| 704 | if (MachineOperand *MO = II->findRegisterUseOperand( |
| 705 | Reg: ImplicitDef, TRI, /*isKill=*/true)) |
| 706 | MO->setIsKill(false); |
| 707 | } else { |
| 708 | // If the instructions aren't in the same BB, bail out and clear the |
| 709 | // kill flag on all uses of the imp-def'd register. |
| 710 | for (auto ImplicitDef : ImplicitDefs) |
| 711 | MRI->clearKillFlags(Reg: ImplicitDef); |
| 712 | } |
| 713 | |
| 714 | if (CrossMBBPhysDef) { |
| 715 | // Add physical register defs now coming in from a predecessor to MBB |
| 716 | // livein list. |
| 717 | while (!PhysDefs.empty()) { |
| 718 | auto LiveIn = PhysDefs.pop_back_val(); |
| 719 | if (!MBB->isLiveIn(Reg: LiveIn.second)) |
| 720 | MBB->addLiveIn(PhysReg: LiveIn.second); |
| 721 | } |
| 722 | ++NumCrossBBCSEs; |
| 723 | } |
| 724 | |
| 725 | MI.eraseFromParent(); |
| 726 | ++NumCSEs; |
| 727 | if (!PhysRefs.empty()) |
| 728 | ++NumPhysCSEs; |
| 729 | if (Commuted) |
| 730 | ++NumCommutes; |
| 731 | Changed = true; |
| 732 | } else { |
| 733 | VNT.insert(Key: &MI, Val: CurrVN++); |
| 734 | Exps.push_back(Elt: &MI); |
| 735 | } |
| 736 | CSEPairs.clear(); |
| 737 | ImplicitDefsToUpdate.clear(); |
| 738 | ImplicitDefs.clear(); |
| 739 | } |
| 740 | |
| 741 | return Changed; |
| 742 | } |
| 743 | |
| 744 | /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given |
| 745 | /// dominator tree node if its a leaf or all of its children are done. Walk |
| 746 | /// up the dominator tree to destroy ancestors which are now done. |
| 747 | void MachineCSEImpl::ExitScopeIfDone( |
| 748 | MachineDomTreeNode *Node, |
| 749 | DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) { |
| 750 | if (OpenChildren[Node]) |
| 751 | return; |
| 752 | |
| 753 | // Pop scope. |
| 754 | ExitScope(MBB: Node->getBlock()); |
| 755 | |
| 756 | // Now traverse upwards to pop ancestors whose offsprings are all done. |
| 757 | while (MachineDomTreeNode *Parent = Node->getIDom()) { |
| 758 | unsigned Left = --OpenChildren[Parent]; |
| 759 | if (Left != 0) |
| 760 | break; |
| 761 | ExitScope(MBB: Parent->getBlock()); |
| 762 | Node = Parent; |
| 763 | } |
| 764 | } |
| 765 | |
| 766 | bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) { |
| 767 | SmallVector<MachineDomTreeNode*, 32> Scopes; |
| 768 | SmallVector<MachineDomTreeNode*, 8> WorkList; |
| 769 | DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; |
| 770 | |
| 771 | CurrVN = 0; |
| 772 | |
| 773 | // Perform a DFS walk to determine the order of visit. |
| 774 | WorkList.push_back(Elt: Node); |
| 775 | do { |
| 776 | Node = WorkList.pop_back_val(); |
| 777 | Scopes.push_back(Elt: Node); |
| 778 | size_t WorkListSize = WorkList.size(); |
| 779 | append_range(C&: WorkList, R: Node->children()); |
| 780 | OpenChildren[Node] = WorkList.size() - WorkListSize; // Number of children. |
| 781 | } while (!WorkList.empty()); |
| 782 | |
| 783 | // Now perform CSE. |
| 784 | bool Changed = false; |
| 785 | for (MachineDomTreeNode *Node : Scopes) { |
| 786 | MachineBasicBlock *MBB = Node->getBlock(); |
| 787 | EnterScope(MBB); |
| 788 | Changed |= ProcessBlockCSE(MBB); |
| 789 | // If it's a leaf node, it's done. Traverse upwards to pop ancestors. |
| 790 | ExitScopeIfDone(Node, OpenChildren); |
| 791 | } |
| 792 | |
| 793 | return Changed; |
| 794 | } |
| 795 | |
| 796 | // We use stronger checks for PRE candidate rather than for CSE ones to embrace |
| 797 | // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps |
| 798 | // to exclude instrs created by PRE that won't be CSEed later. |
| 799 | bool MachineCSEImpl::isPRECandidate(MachineInstr *MI, |
| 800 | SmallSet<MCRegister, 8> &PhysRefs) { |
| 801 | if (!isCSECandidate(MI) || |
| 802 | MI->isNotDuplicable() || |
| 803 | MI->mayLoad() || |
| 804 | TII->isAsCheapAsAMove(MI: *MI) || |
| 805 | MI->getNumDefs() != 1 || |
| 806 | MI->getNumExplicitDefs() != 1) |
| 807 | return false; |
| 808 | |
| 809 | for (const MachineOperand &MO : MI->operands()) { |
| 810 | if (MO.isReg() && !MO.getReg().isVirtual()) { |
| 811 | if (MO.isDef()) |
| 812 | return false; |
| 813 | else |
| 814 | PhysRefs.insert(V: MO.getReg()); |
| 815 | } |
| 816 | } |
| 817 | |
| 818 | return true; |
| 819 | } |
| 820 | |
| 821 | bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT, |
| 822 | MachineBasicBlock *MBB) { |
| 823 | bool Changed = false; |
| 824 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) { |
| 825 | SmallSet<MCRegister, 8> PhysRefs; |
| 826 | if (!isPRECandidate(MI: &MI, PhysRefs)) |
| 827 | continue; |
| 828 | |
| 829 | auto [It, Inserted] = PREMap.try_emplace(Key: &MI, Args&: MBB); |
| 830 | if (Inserted) |
| 831 | continue; |
| 832 | |
| 833 | auto *MBB1 = It->second; |
| 834 | assert( |
| 835 | !DT->properlyDominates(MBB, MBB1) && |
| 836 | "MBB cannot properly dominate MBB1 while DFS through dominators tree!" ); |
| 837 | auto CMBB = DT->findNearestCommonDominator(A: MBB, B: MBB1); |
| 838 | if (!CMBB->isLegalToHoistInto()) |
| 839 | continue; |
| 840 | |
| 841 | if (!isProfitableToHoistInto(CandidateBB: CMBB, MBB, MBB1)) |
| 842 | continue; |
| 843 | |
| 844 | // Two instrs are partial redundant if their basic blocks are reachable |
| 845 | // from one to another but one doesn't dominate another. |
| 846 | if (CMBB != MBB1) { |
| 847 | auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); |
| 848 | if (BB != nullptr && BB1 != nullptr && |
| 849 | (isPotentiallyReachable(From: BB1, To: BB) || |
| 850 | isPotentiallyReachable(From: BB, To: BB1))) { |
| 851 | // The following check extends the definition of `isConvergent` to |
| 852 | // assume a convergent instruction is dependent not only on additional |
| 853 | // conditions, but also on fewer conditions. LLVM does not have a |
| 854 | // MachineInstr attribute which expresses this extended definition, so |
| 855 | // it's necessary to use `isConvergent` to prevent illegally PRE-ing the |
| 856 | // subset of `isConvergent` instructions which do fall into this |
| 857 | // extended definition. |
| 858 | if (MI.isConvergent() && CMBB != MBB) |
| 859 | continue; |
| 860 | |
| 861 | // If this instruction uses physical registers then we can only do PRE |
| 862 | // if it's using the value that is live at the place we're hoisting to. |
| 863 | bool NonLocal; |
| 864 | PhysDefVector PhysDefs; |
| 865 | if (!PhysRefs.empty() && |
| 866 | !PhysRegDefsReach(CSMI: &*(CMBB->getFirstTerminator()), MI: &MI, PhysRefs, |
| 867 | PhysDefs, NonLocal)) |
| 868 | continue; |
| 869 | |
| 870 | assert(MI.getOperand(0).isDef() && |
| 871 | "First operand of instr with one explicit def must be this def" ); |
| 872 | Register VReg = MI.getOperand(i: 0).getReg(); |
| 873 | Register NewReg = MRI->cloneVirtualRegister(VReg); |
| 874 | if (!isProfitableToCSE(CSReg: NewReg, Reg: VReg, CSBB: CMBB, MI: &MI)) |
| 875 | continue; |
| 876 | MachineInstr &NewMI = |
| 877 | TII->duplicate(MBB&: *CMBB, InsertBefore: CMBB->getFirstTerminator(), Orig: MI); |
| 878 | |
| 879 | // When hoisting, make sure we don't carry the debug location of |
| 880 | // the original instruction, as that's not correct and can cause |
| 881 | // unexpected jumps when debugging optimized code. |
| 882 | auto EmptyDL = DebugLoc(); |
| 883 | NewMI.setDebugLoc(EmptyDL); |
| 884 | |
| 885 | NewMI.getOperand(i: 0).setReg(NewReg); |
| 886 | |
| 887 | PREMap[&MI] = CMBB; |
| 888 | ++NumPREs; |
| 889 | Changed = true; |
| 890 | } |
| 891 | } |
| 892 | } |
| 893 | return Changed; |
| 894 | } |
| 895 | |
| 896 | // This simple PRE (partial redundancy elimination) pass doesn't actually |
| 897 | // eliminate partial redundancy but transforms it to full redundancy, |
| 898 | // anticipating that the next CSE step will eliminate this created redundancy. |
| 899 | // If CSE doesn't eliminate this, than created instruction will remain dead |
| 900 | // and eliminated later by Remove Dead Machine Instructions pass. |
| 901 | bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) { |
| 902 | SmallVector<MachineDomTreeNode *, 32> BBs; |
| 903 | |
| 904 | PREMap.clear(); |
| 905 | bool Changed = false; |
| 906 | BBs.push_back(Elt: DT->getRootNode()); |
| 907 | do { |
| 908 | auto Node = BBs.pop_back_val(); |
| 909 | append_range(C&: BBs, R: Node->children()); |
| 910 | |
| 911 | MachineBasicBlock *MBB = Node->getBlock(); |
| 912 | Changed |= ProcessBlockPRE(DT, MBB); |
| 913 | |
| 914 | } while (!BBs.empty()); |
| 915 | |
| 916 | return Changed; |
| 917 | } |
| 918 | |
| 919 | bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, |
| 920 | MachineBasicBlock *MBB, |
| 921 | MachineBasicBlock *MBB1) { |
| 922 | if (CandidateBB->getParent()->getFunction().hasMinSize()) |
| 923 | return true; |
| 924 | assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB" ); |
| 925 | assert(DT->dominates(CandidateBB, MBB1) && |
| 926 | "CandidateBB should dominate MBB1" ); |
| 927 | return MBFI->getBlockFreq(MBB: CandidateBB) <= |
| 928 | MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB: MBB1); |
| 929 | } |
| 930 | |
| 931 | void MachineCSEImpl::releaseMemory() { |
| 932 | ScopeMap.clear(); |
| 933 | PREMap.clear(); |
| 934 | Exps.clear(); |
| 935 | } |
| 936 | |
| 937 | bool MachineCSEImpl::run(MachineFunction &MF) { |
| 938 | TII = MF.getSubtarget().getInstrInfo(); |
| 939 | TRI = MF.getSubtarget().getRegisterInfo(); |
| 940 | MRI = &MF.getRegInfo(); |
| 941 | LookAheadLimit = TII->getMachineCSELookAheadLimit(); |
| 942 | bool ChangedPRE, ChangedCSE; |
| 943 | ChangedPRE = PerformSimplePRE(DT); |
| 944 | ChangedCSE = PerformCSE(Node: DT->getRootNode()); |
| 945 | releaseMemory(); |
| 946 | return ChangedPRE || ChangedCSE; |
| 947 | } |
| 948 | |
| 949 | PreservedAnalyses MachineCSEPass::run(MachineFunction &MF, |
| 950 | MachineFunctionAnalysisManager &MFAM) { |
| 951 | MFPropsModifier _(*this, MF); |
| 952 | |
| 953 | MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
| 954 | MachineBlockFrequencyInfo &MBFI = |
| 955 | MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF); |
| 956 | MachineCSEImpl Impl(&MDT, &MBFI); |
| 957 | bool Changed = Impl.run(MF); |
| 958 | if (!Changed) |
| 959 | return PreservedAnalyses::all(); |
| 960 | |
| 961 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
| 962 | PA.preserve<MachineLoopAnalysis>(); |
| 963 | PA.preserve<MachineDominatorTreeAnalysis>(); |
| 964 | PA.preserve<MachineBlockFrequencyAnalysis>(); |
| 965 | PA.preserveSet<CFGAnalyses>(); |
| 966 | return PA; |
| 967 | } |
| 968 | |
| 969 | bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) { |
| 970 | if (skipFunction(F: MF.getFunction())) |
| 971 | return false; |
| 972 | |
| 973 | MachineDominatorTree &MDT = |
| 974 | getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
| 975 | MachineBlockFrequencyInfo &MBFI = |
| 976 | getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); |
| 977 | MachineCSEImpl Impl(&MDT, &MBFI); |
| 978 | return Impl.run(MF); |
| 979 | } |
| 980 | |