| 1 | //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file Break False Dependency pass. |
| 10 | /// |
| 11 | /// Some instructions have false dependencies which cause unnecessary stalls. |
| 12 | /// For example, instructions may write part of a register and implicitly |
| 13 | /// need to read the other parts of the register. This may cause unwanted |
| 14 | /// stalls preventing otherwise unrelated instructions from executing in |
| 15 | /// parallel in an out-of-order CPU. |
| 16 | /// This pass is aimed at identifying and avoiding these dependencies. |
| 17 | // |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #include "llvm/CodeGen/BreakFalseDeps.h" |
| 21 | #include "llvm/ADT/DepthFirstIterator.h" |
| 22 | #include "llvm/CodeGen/LivePhysRegs.h" |
| 23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 24 | #include "llvm/CodeGen/ReachingDefAnalysis.h" |
| 25 | #include "llvm/CodeGen/RegisterClassInfo.h" |
| 26 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 27 | #include "llvm/InitializePasses.h" |
| 28 | #include "llvm/MC/MCInstrDesc.h" |
| 29 | #include "llvm/MC/MCRegister.h" |
| 30 | #include "llvm/MC/MCRegisterInfo.h" |
| 31 | #include "llvm/Support/Debug.h" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | namespace { |
| 36 | |
| 37 | class BreakFalseDeps { |
| 38 | private: |
| 39 | MachineFunction *MF = nullptr; |
| 40 | const TargetInstrInfo *TII = nullptr; |
| 41 | const TargetRegisterInfo *TRI = nullptr; |
| 42 | RegisterClassInfo RegClassInfo; |
| 43 | |
| 44 | /// List of undefined register reads in this block in forward order. |
| 45 | std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; |
| 46 | |
| 47 | /// Storage for register unit liveness. |
| 48 | LivePhysRegs LiveRegSet; |
| 49 | |
| 50 | ReachingDefInfo *RDI = nullptr; |
| 51 | |
| 52 | /// True if the pass insert instructions or updates registers, false |
| 53 | /// otherwise. |
| 54 | bool Changed = false; |
| 55 | |
| 56 | public: |
| 57 | BreakFalseDeps(ReachingDefInfo *RDI) : RDI(RDI) {} |
| 58 | |
| 59 | bool run(MachineFunction &CurMF); |
| 60 | |
| 61 | private: |
| 62 | /// Process he given basic block. |
| 63 | void processBasicBlock(MachineBasicBlock *MBB); |
| 64 | |
| 65 | /// Update def-ages for registers defined by MI. |
| 66 | /// Also break dependencies on partial defs and undef uses. |
| 67 | void processDefs(MachineInstr *MI); |
| 68 | |
| 69 | /// Helps avoid false dependencies on undef registers by updating the |
| 70 | /// machine instructions' undef operand to use a register that the instruction |
| 71 | /// is truly dependent on, or use a register with clearance higher than Pref. |
| 72 | /// Returns true if it was able to find a true dependency, thus not requiring |
| 73 | /// a dependency breaking instruction regardless of clearance. |
| 74 | bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
| 75 | unsigned Pref); |
| 76 | |
| 77 | /// Return true to if it makes sense to break dependence on a partial |
| 78 | /// def or undef use. |
| 79 | bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); |
| 80 | |
| 81 | /// Break false dependencies on undefined register reads. |
| 82 | /// Walk the block backward computing precise liveness. This is expensive, so |
| 83 | /// we only do it on demand. Note that the occurrence of undefined register |
| 84 | /// reads that should be broken is very rare, but when they occur we may have |
| 85 | /// many in a single block. |
| 86 | void processUndefReads(MachineBasicBlock *); |
| 87 | }; |
| 88 | |
| 89 | class BreakFalseDepsLegacy : public MachineFunctionPass { |
| 90 | public: |
| 91 | static char ID; // Pass identification, replacement for typeid |
| 92 | |
| 93 | BreakFalseDepsLegacy() : MachineFunctionPass(ID) {} |
| 94 | |
| 95 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 96 | AU.setPreservesCFG(); |
| 97 | AU.addRequired<ReachingDefInfoWrapperPass>(); |
| 98 | MachineFunctionPass::getAnalysisUsage(AU); |
| 99 | } |
| 100 | |
| 101 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 102 | |
| 103 | MachineFunctionProperties getRequiredProperties() const override { |
| 104 | return MachineFunctionProperties().setNoVRegs(); |
| 105 | } |
| 106 | }; |
| 107 | |
| 108 | } // namespace |
| 109 | |
| 110 | #define DEBUG_TYPE "break-false-deps" |
| 111 | |
| 112 | char BreakFalseDepsLegacy::ID = 0; |
| 113 | INITIALIZE_PASS_BEGIN(BreakFalseDepsLegacy, DEBUG_TYPE, "BreakFalseDeps" , false, |
| 114 | false) |
| 115 | INITIALIZE_PASS_DEPENDENCY(ReachingDefInfoWrapperPass) |
| 116 | INITIALIZE_PASS_END(BreakFalseDepsLegacy, DEBUG_TYPE, "BreakFalseDeps" , false, |
| 117 | false) |
| 118 | |
| 119 | FunctionPass *llvm::createBreakFalseDepsLegacyPass() { |
| 120 | return new BreakFalseDepsLegacy(); |
| 121 | } |
| 122 | |
| 123 | bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
| 124 | unsigned Pref) { |
| 125 | |
| 126 | // We can't change tied operands. |
| 127 | if (MI->isRegTiedToDefOperand(UseOpIdx: OpIdx)) |
| 128 | return false; |
| 129 | |
| 130 | MachineOperand &MO = MI->getOperand(i: OpIdx); |
| 131 | assert(MO.isUndef() && "Expected undef machine operand" ); |
| 132 | |
| 133 | // We can't change registers that aren't renamable. |
| 134 | if (!MO.isRenamable()) |
| 135 | return false; |
| 136 | |
| 137 | MCRegister OriginalReg = MO.getReg().asMCReg(); |
| 138 | |
| 139 | // Update only undef operands that have reg units that are mapped to one root. |
| 140 | for (MCRegUnit Unit : TRI->regunits(Reg: OriginalReg)) { |
| 141 | unsigned NumRoots = 0; |
| 142 | for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) { |
| 143 | NumRoots++; |
| 144 | if (NumRoots > 1) |
| 145 | return false; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | // Get the undef operand's register class |
| 150 | const TargetRegisterClass *OpRC = TII->getRegClass(MCID: MI->getDesc(), OpNum: OpIdx); |
| 151 | assert(OpRC && "Not a valid register class" ); |
| 152 | |
| 153 | // If the instruction has a true dependency, we can hide the false depdency |
| 154 | // behind it. |
| 155 | for (MachineOperand &CurrMO : MI->all_uses()) { |
| 156 | if (CurrMO.isUndef() || !OpRC->contains(Reg: CurrMO.getReg())) |
| 157 | continue; |
| 158 | // We found a true dependency - replace the undef register with the true |
| 159 | // dependency. |
| 160 | MO.setReg(CurrMO.getReg()); |
| 161 | Changed = true; |
| 162 | return true; |
| 163 | } |
| 164 | |
| 165 | // Go over all registers in the register class and find the register with |
| 166 | // max clearance or clearance higher than Pref. |
| 167 | unsigned MaxClearance = 0; |
| 168 | unsigned MaxClearanceReg = OriginalReg; |
| 169 | ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC: OpRC); |
| 170 | for (MCPhysReg Reg : Order) { |
| 171 | unsigned Clearance = RDI->getClearance(MI, Reg); |
| 172 | if (Clearance <= MaxClearance) |
| 173 | continue; |
| 174 | MaxClearance = Clearance; |
| 175 | MaxClearanceReg = Reg; |
| 176 | |
| 177 | if (MaxClearance > Pref) |
| 178 | break; |
| 179 | } |
| 180 | |
| 181 | // Update the operand if we found a register with better clearance. |
| 182 | if (MaxClearanceReg != OriginalReg) { |
| 183 | MO.setReg(MaxClearanceReg); |
| 184 | Changed = true; |
| 185 | } |
| 186 | |
| 187 | return false; |
| 188 | } |
| 189 | |
| 190 | bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, |
| 191 | unsigned Pref) { |
| 192 | MCRegister Reg = MI->getOperand(i: OpIdx).getReg().asMCReg(); |
| 193 | unsigned Clearance = RDI->getClearance(MI, Reg); |
| 194 | LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); |
| 195 | |
| 196 | if (Pref > Clearance) { |
| 197 | LLVM_DEBUG(dbgs() << ": Break dependency.\n" ); |
| 198 | return true; |
| 199 | } |
| 200 | LLVM_DEBUG(dbgs() << ": OK .\n" ); |
| 201 | return false; |
| 202 | } |
| 203 | |
| 204 | void BreakFalseDeps::processDefs(MachineInstr *MI) { |
| 205 | assert(!MI->isDebugInstr() && "Won't process debug values" ); |
| 206 | |
| 207 | const MCInstrDesc &MCID = MI->getDesc(); |
| 208 | |
| 209 | // Break dependence on undef uses. Do this before updating LiveRegs below. |
| 210 | // This can remove a false dependence with no additional instructions. |
| 211 | for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { |
| 212 | MachineOperand &MO = MI->getOperand(i); |
| 213 | if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) |
| 214 | continue; |
| 215 | |
| 216 | unsigned Pref = TII->getUndefRegClearance(MI: *MI, OpNum: i, TRI); |
| 217 | if (Pref) { |
| 218 | bool HadTrueDependency = pickBestRegisterForUndef(MI, OpIdx: i, Pref); |
| 219 | // We don't need to bother trying to break a dependency if this |
| 220 | // instruction has a true dependency on that register through another |
| 221 | // operand - we'll have to wait for it to be available regardless. |
| 222 | if (!HadTrueDependency && shouldBreakDependence(MI, OpIdx: i, Pref)) |
| 223 | UndefReads.push_back(x: std::make_pair(x&: MI, y&: i)); |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | // The code below allows the target to create a new instruction to break the |
| 228 | // dependence. That opposes the goal of minimizing size, so bail out now. |
| 229 | if (MF->getFunction().hasMinSize()) |
| 230 | return; |
| 231 | |
| 232 | for (unsigned i = 0, |
| 233 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); |
| 234 | i != e; ++i) { |
| 235 | MachineOperand &MO = MI->getOperand(i); |
| 236 | if (!MO.isReg() || !MO.getReg()) |
| 237 | continue; |
| 238 | if (MO.isUse()) |
| 239 | continue; |
| 240 | // Check clearance before partial register updates. |
| 241 | unsigned Pref = TII->getPartialRegUpdateClearance(MI: *MI, OpNum: i, TRI); |
| 242 | if (Pref && shouldBreakDependence(MI, OpIdx: i, Pref)) { |
| 243 | TII->breakPartialRegDependency(MI&: *MI, OpNum: i, TRI); |
| 244 | Changed = true; |
| 245 | } |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { |
| 250 | if (UndefReads.empty()) |
| 251 | return; |
| 252 | |
| 253 | // The code below allows the target to create a new instruction to break the |
| 254 | // dependence. That opposes the goal of minimizing size, so bail out now. |
| 255 | if (MF->getFunction().hasMinSize()) |
| 256 | return; |
| 257 | |
| 258 | // Collect this block's live out register units. |
| 259 | LiveRegSet.init(TRI: *TRI); |
| 260 | // We do not need to care about pristine registers as they are just preserved |
| 261 | // but not actually used in the function. |
| 262 | LiveRegSet.addLiveOutsNoPristines(MBB: *MBB); |
| 263 | |
| 264 | MachineInstr *UndefMI = UndefReads.back().first; |
| 265 | unsigned OpIdx = UndefReads.back().second; |
| 266 | |
| 267 | for (MachineInstr &I : llvm::reverse(C&: *MBB)) { |
| 268 | // Update liveness, including the current instruction's defs. |
| 269 | LiveRegSet.stepBackward(MI: I); |
| 270 | |
| 271 | if (UndefMI == &I) { |
| 272 | if (!LiveRegSet.contains(Reg: UndefMI->getOperand(i: OpIdx).getReg())) { |
| 273 | TII->breakPartialRegDependency(MI&: *UndefMI, OpNum: OpIdx, TRI); |
| 274 | Changed = true; |
| 275 | } |
| 276 | |
| 277 | UndefReads.pop_back(); |
| 278 | if (UndefReads.empty()) |
| 279 | return; |
| 280 | |
| 281 | UndefMI = UndefReads.back().first; |
| 282 | OpIdx = UndefReads.back().second; |
| 283 | } |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { |
| 288 | UndefReads.clear(); |
| 289 | // If this block is not done, it makes little sense to make any decisions |
| 290 | // based on clearance information. We need to make a second pass anyway, |
| 291 | // and by then we'll have better information, so we can avoid doing the work |
| 292 | // to try and break dependencies now. |
| 293 | for (MachineInstr &MI : *MBB) { |
| 294 | if (!MI.isDebugInstr()) |
| 295 | processDefs(MI: &MI); |
| 296 | } |
| 297 | processUndefReads(MBB); |
| 298 | } |
| 299 | |
| 300 | bool BreakFalseDeps::run(MachineFunction &CurMF) { |
| 301 | MF = &CurMF; |
| 302 | TII = MF->getSubtarget().getInstrInfo(); |
| 303 | TRI = MF->getSubtarget().getRegisterInfo(); |
| 304 | |
| 305 | RegClassInfo.runOnMachineFunction(MF: CurMF, /*Rev=*/true); |
| 306 | |
| 307 | LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n" ); |
| 308 | |
| 309 | // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions |
| 310 | // in them. |
| 311 | df_iterator_default_set<MachineBasicBlock *> Reachable; |
| 312 | for (MachineBasicBlock *MBB : depth_first_ext(G: &CurMF, S&: Reachable)) |
| 313 | (void)MBB /* Mark all reachable blocks */; |
| 314 | |
| 315 | // Traverse the basic blocks. |
| 316 | for (MachineBasicBlock &MBB : CurMF) |
| 317 | if (Reachable.count(Ptr: &MBB)) |
| 318 | processBasicBlock(MBB: &MBB); |
| 319 | |
| 320 | return Changed; |
| 321 | } |
| 322 | |
| 323 | bool BreakFalseDepsLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 324 | if (skipFunction(F: MF.getFunction())) |
| 325 | return false; |
| 326 | |
| 327 | ReachingDefInfo *RDI = &getAnalysis<ReachingDefInfoWrapperPass>().getRDI(); |
| 328 | BreakFalseDeps Impl(RDI); |
| 329 | return Impl.run(CurMF&: MF); |
| 330 | } |
| 331 | |
| 332 | PreservedAnalyses |
| 333 | BreakFalseDepsPass::run(MachineFunction &MF, |
| 334 | MachineFunctionAnalysisManager &MFAM) { |
| 335 | MFPropsModifier _(*this, MF); |
| 336 | ReachingDefInfo *RDI = &MFAM.getResult<ReachingDefAnalysis>(IR&: MF); |
| 337 | if (BreakFalseDeps(RDI).run(CurMF&: MF)) { |
| 338 | PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); |
| 339 | PA.preserveSet<CFGAnalyses>(); |
| 340 | return PA; |
| 341 | } |
| 342 | return PreservedAnalyses::all(); |
| 343 | } |
| 344 | |