1 | //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file Break False Dependency pass. |
10 | /// |
11 | /// Some instructions have false dependencies which cause unnecessary stalls. |
12 | /// For example, instructions may write part of a register and implicitly |
13 | /// need to read the other parts of the register. This may cause unwanted |
14 | /// stalls preventing otherwise unrelated instructions from executing in |
15 | /// parallel in an out-of-order CPU. |
16 | /// This pass is aimed at identifying and avoiding these dependencies. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "llvm/ADT/DepthFirstIterator.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/ReachingDefAnalysis.h" |
24 | #include "llvm/CodeGen/RegisterClassInfo.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/InitializePasses.h" |
27 | #include "llvm/MC/MCInstrDesc.h" |
28 | #include "llvm/MC/MCRegister.h" |
29 | #include "llvm/MC/MCRegisterInfo.h" |
30 | #include "llvm/Support/Debug.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | namespace llvm { |
35 | |
36 | class BreakFalseDeps : public MachineFunctionPass { |
37 | private: |
38 | MachineFunction *MF = nullptr; |
39 | const TargetInstrInfo *TII = nullptr; |
40 | const TargetRegisterInfo *TRI = nullptr; |
41 | RegisterClassInfo RegClassInfo; |
42 | |
43 | /// List of undefined register reads in this block in forward order. |
44 | std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; |
45 | |
46 | /// Storage for register unit liveness. |
47 | LivePhysRegs LiveRegSet; |
48 | |
49 | ReachingDefAnalysis *RDA = nullptr; |
50 | |
51 | public: |
52 | static char ID; // Pass identification, replacement for typeid |
53 | |
54 | BreakFalseDeps() : MachineFunctionPass(ID) { |
55 | initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); |
56 | } |
57 | |
58 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
59 | AU.setPreservesAll(); |
60 | AU.addRequired<ReachingDefAnalysis>(); |
61 | MachineFunctionPass::getAnalysisUsage(AU); |
62 | } |
63 | |
64 | bool runOnMachineFunction(MachineFunction &MF) override; |
65 | |
66 | MachineFunctionProperties getRequiredProperties() const override { |
67 | return MachineFunctionProperties().setNoVRegs(); |
68 | } |
69 | |
70 | private: |
71 | /// Process he given basic block. |
72 | void processBasicBlock(MachineBasicBlock *MBB); |
73 | |
74 | /// Update def-ages for registers defined by MI. |
75 | /// Also break dependencies on partial defs and undef uses. |
76 | void processDefs(MachineInstr *MI); |
77 | |
78 | /// Helps avoid false dependencies on undef registers by updating the |
79 | /// machine instructions' undef operand to use a register that the instruction |
80 | /// is truly dependent on, or use a register with clearance higher than Pref. |
81 | /// Returns true if it was able to find a true dependency, thus not requiring |
82 | /// a dependency breaking instruction regardless of clearance. |
83 | bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
84 | unsigned Pref); |
85 | |
86 | /// Return true to if it makes sense to break dependence on a partial |
87 | /// def or undef use. |
88 | bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); |
89 | |
90 | /// Break false dependencies on undefined register reads. |
91 | /// Walk the block backward computing precise liveness. This is expensive, so |
92 | /// we only do it on demand. Note that the occurrence of undefined register |
93 | /// reads that should be broken is very rare, but when they occur we may have |
94 | /// many in a single block. |
95 | void processUndefReads(MachineBasicBlock *); |
96 | }; |
97 | |
98 | } // namespace llvm |
99 | |
100 | #define DEBUG_TYPE "break-false-deps" |
101 | |
102 | char BreakFalseDeps::ID = 0; |
103 | INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps" , false, false) |
104 | INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) |
105 | INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps" , false, false) |
106 | |
107 | FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } |
108 | |
109 | bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
110 | unsigned Pref) { |
111 | |
112 | // We can't change tied operands. |
113 | if (MI->isRegTiedToDefOperand(UseOpIdx: OpIdx)) |
114 | return false; |
115 | |
116 | MachineOperand &MO = MI->getOperand(i: OpIdx); |
117 | assert(MO.isUndef() && "Expected undef machine operand" ); |
118 | |
119 | // We can't change registers that aren't renamable. |
120 | if (!MO.isRenamable()) |
121 | return false; |
122 | |
123 | MCRegister OriginalReg = MO.getReg().asMCReg(); |
124 | |
125 | // Update only undef operands that have reg units that are mapped to one root. |
126 | for (MCRegUnit Unit : TRI->regunits(Reg: OriginalReg)) { |
127 | unsigned NumRoots = 0; |
128 | for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) { |
129 | NumRoots++; |
130 | if (NumRoots > 1) |
131 | return false; |
132 | } |
133 | } |
134 | |
135 | // Get the undef operand's register class |
136 | const TargetRegisterClass *OpRC = |
137 | TII->getRegClass(MCID: MI->getDesc(), OpNum: OpIdx, TRI, MF: *MF); |
138 | assert(OpRC && "Not a valid register class" ); |
139 | |
140 | // If the instruction has a true dependency, we can hide the false depdency |
141 | // behind it. |
142 | for (MachineOperand &CurrMO : MI->all_uses()) { |
143 | if (CurrMO.isUndef() || !OpRC->contains(Reg: CurrMO.getReg())) |
144 | continue; |
145 | // We found a true dependency - replace the undef register with the true |
146 | // dependency. |
147 | MO.setReg(CurrMO.getReg()); |
148 | return true; |
149 | } |
150 | |
151 | // Go over all registers in the register class and find the register with |
152 | // max clearance or clearance higher than Pref. |
153 | unsigned MaxClearance = 0; |
154 | unsigned MaxClearanceReg = OriginalReg; |
155 | ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC: OpRC); |
156 | for (MCPhysReg Reg : Order) { |
157 | unsigned Clearance = RDA->getClearance(MI, Reg); |
158 | if (Clearance <= MaxClearance) |
159 | continue; |
160 | MaxClearance = Clearance; |
161 | MaxClearanceReg = Reg; |
162 | |
163 | if (MaxClearance > Pref) |
164 | break; |
165 | } |
166 | |
167 | // Update the operand if we found a register with better clearance. |
168 | if (MaxClearanceReg != OriginalReg) |
169 | MO.setReg(MaxClearanceReg); |
170 | |
171 | return false; |
172 | } |
173 | |
174 | bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, |
175 | unsigned Pref) { |
176 | MCRegister Reg = MI->getOperand(i: OpIdx).getReg().asMCReg(); |
177 | unsigned Clearance = RDA->getClearance(MI, Reg); |
178 | LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); |
179 | |
180 | if (Pref > Clearance) { |
181 | LLVM_DEBUG(dbgs() << ": Break dependency.\n" ); |
182 | return true; |
183 | } |
184 | LLVM_DEBUG(dbgs() << ": OK .\n" ); |
185 | return false; |
186 | } |
187 | |
188 | void BreakFalseDeps::processDefs(MachineInstr *MI) { |
189 | assert(!MI->isDebugInstr() && "Won't process debug values" ); |
190 | |
191 | const MCInstrDesc &MCID = MI->getDesc(); |
192 | |
193 | // Break dependence on undef uses. Do this before updating LiveRegs below. |
194 | // This can remove a false dependence with no additional instructions. |
195 | for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { |
196 | MachineOperand &MO = MI->getOperand(i); |
197 | if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) |
198 | continue; |
199 | |
200 | unsigned Pref = TII->getUndefRegClearance(MI: *MI, OpNum: i, TRI); |
201 | if (Pref) { |
202 | bool HadTrueDependency = pickBestRegisterForUndef(MI, OpIdx: i, Pref); |
203 | // We don't need to bother trying to break a dependency if this |
204 | // instruction has a true dependency on that register through another |
205 | // operand - we'll have to wait for it to be available regardless. |
206 | if (!HadTrueDependency && shouldBreakDependence(MI, OpIdx: i, Pref)) |
207 | UndefReads.push_back(x: std::make_pair(x&: MI, y&: i)); |
208 | } |
209 | } |
210 | |
211 | // The code below allows the target to create a new instruction to break the |
212 | // dependence. That opposes the goal of minimizing size, so bail out now. |
213 | if (MF->getFunction().hasMinSize()) |
214 | return; |
215 | |
216 | for (unsigned i = 0, |
217 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); |
218 | i != e; ++i) { |
219 | MachineOperand &MO = MI->getOperand(i); |
220 | if (!MO.isReg() || !MO.getReg()) |
221 | continue; |
222 | if (MO.isUse()) |
223 | continue; |
224 | // Check clearance before partial register updates. |
225 | unsigned Pref = TII->getPartialRegUpdateClearance(MI: *MI, OpNum: i, TRI); |
226 | if (Pref && shouldBreakDependence(MI, OpIdx: i, Pref)) |
227 | TII->breakPartialRegDependency(MI&: *MI, OpNum: i, TRI); |
228 | } |
229 | } |
230 | |
231 | void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { |
232 | if (UndefReads.empty()) |
233 | return; |
234 | |
235 | // The code below allows the target to create a new instruction to break the |
236 | // dependence. That opposes the goal of minimizing size, so bail out now. |
237 | if (MF->getFunction().hasMinSize()) |
238 | return; |
239 | |
240 | // Collect this block's live out register units. |
241 | LiveRegSet.init(TRI: *TRI); |
242 | // We do not need to care about pristine registers as they are just preserved |
243 | // but not actually used in the function. |
244 | LiveRegSet.addLiveOutsNoPristines(MBB: *MBB); |
245 | |
246 | MachineInstr *UndefMI = UndefReads.back().first; |
247 | unsigned OpIdx = UndefReads.back().second; |
248 | |
249 | for (MachineInstr &I : llvm::reverse(C&: *MBB)) { |
250 | // Update liveness, including the current instruction's defs. |
251 | LiveRegSet.stepBackward(MI: I); |
252 | |
253 | if (UndefMI == &I) { |
254 | if (!LiveRegSet.contains(Reg: UndefMI->getOperand(i: OpIdx).getReg())) |
255 | TII->breakPartialRegDependency(MI&: *UndefMI, OpNum: OpIdx, TRI); |
256 | |
257 | UndefReads.pop_back(); |
258 | if (UndefReads.empty()) |
259 | return; |
260 | |
261 | UndefMI = UndefReads.back().first; |
262 | OpIdx = UndefReads.back().second; |
263 | } |
264 | } |
265 | } |
266 | |
267 | void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { |
268 | UndefReads.clear(); |
269 | // If this block is not done, it makes little sense to make any decisions |
270 | // based on clearance information. We need to make a second pass anyway, |
271 | // and by then we'll have better information, so we can avoid doing the work |
272 | // to try and break dependencies now. |
273 | for (MachineInstr &MI : *MBB) { |
274 | if (!MI.isDebugInstr()) |
275 | processDefs(MI: &MI); |
276 | } |
277 | processUndefReads(MBB); |
278 | } |
279 | |
280 | bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { |
281 | if (skipFunction(F: mf.getFunction())) |
282 | return false; |
283 | MF = &mf; |
284 | TII = MF->getSubtarget().getInstrInfo(); |
285 | TRI = MF->getSubtarget().getRegisterInfo(); |
286 | RDA = &getAnalysis<ReachingDefAnalysis>(); |
287 | |
288 | RegClassInfo.runOnMachineFunction(MF: mf, /*Rev=*/true); |
289 | |
290 | LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n" ); |
291 | |
292 | // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions |
293 | // in them. |
294 | df_iterator_default_set<MachineBasicBlock *> Reachable; |
295 | for (MachineBasicBlock *MBB : depth_first_ext(G: &mf, S&: Reachable)) |
296 | (void)MBB /* Mark all reachable blocks */; |
297 | |
298 | // Traverse the basic blocks. |
299 | for (MachineBasicBlock &MBB : mf) |
300 | if (Reachable.count(Ptr: &MBB)) |
301 | processBasicBlock(MBB: &MBB); |
302 | |
303 | return false; |
304 | } |
305 | |