1 | //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file Break False Dependency pass. |
10 | /// |
11 | /// Some instructions have false dependencies which cause unnecessary stalls. |
12 | /// For example, instructions may write part of a register and implicitly |
13 | /// need to read the other parts of the register. This may cause unwanted |
14 | /// stalls preventing otherwise unrelated instructions from executing in |
15 | /// parallel in an out-of-order CPU. |
16 | /// This pass is aimed at identifying and avoiding these dependencies. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "llvm/ADT/DepthFirstIterator.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/ReachingDefAnalysis.h" |
24 | #include "llvm/CodeGen/RegisterClassInfo.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/InitializePasses.h" |
27 | #include "llvm/MC/MCInstrDesc.h" |
28 | #include "llvm/MC/MCRegister.h" |
29 | #include "llvm/MC/MCRegisterInfo.h" |
30 | #include "llvm/Support/Debug.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | namespace llvm { |
35 | |
36 | class BreakFalseDeps : public MachineFunctionPass { |
37 | private: |
38 | MachineFunction *MF = nullptr; |
39 | const TargetInstrInfo *TII = nullptr; |
40 | const TargetRegisterInfo *TRI = nullptr; |
41 | RegisterClassInfo RegClassInfo; |
42 | |
43 | /// List of undefined register reads in this block in forward order. |
44 | std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; |
45 | |
46 | /// Storage for register unit liveness. |
47 | LivePhysRegs LiveRegSet; |
48 | |
49 | ReachingDefAnalysis *RDA = nullptr; |
50 | |
51 | public: |
52 | static char ID; // Pass identification, replacement for typeid |
53 | |
54 | BreakFalseDeps() : MachineFunctionPass(ID) { |
55 | initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); |
56 | } |
57 | |
58 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
59 | AU.setPreservesAll(); |
60 | AU.addRequired<ReachingDefAnalysis>(); |
61 | MachineFunctionPass::getAnalysisUsage(AU); |
62 | } |
63 | |
64 | bool runOnMachineFunction(MachineFunction &MF) override; |
65 | |
66 | MachineFunctionProperties getRequiredProperties() const override { |
67 | return MachineFunctionProperties().set( |
68 | MachineFunctionProperties::Property::NoVRegs); |
69 | } |
70 | |
71 | private: |
72 | /// Process he given basic block. |
73 | void processBasicBlock(MachineBasicBlock *MBB); |
74 | |
75 | /// Update def-ages for registers defined by MI. |
76 | /// Also break dependencies on partial defs and undef uses. |
77 | void processDefs(MachineInstr *MI); |
78 | |
79 | /// Helps avoid false dependencies on undef registers by updating the |
80 | /// machine instructions' undef operand to use a register that the instruction |
81 | /// is truly dependent on, or use a register with clearance higher than Pref. |
82 | /// Returns true if it was able to find a true dependency, thus not requiring |
83 | /// a dependency breaking instruction regardless of clearance. |
84 | bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
85 | unsigned Pref); |
86 | |
87 | /// Return true to if it makes sense to break dependence on a partial |
88 | /// def or undef use. |
89 | bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); |
90 | |
91 | /// Break false dependencies on undefined register reads. |
92 | /// Walk the block backward computing precise liveness. This is expensive, so |
93 | /// we only do it on demand. Note that the occurrence of undefined register |
94 | /// reads that should be broken is very rare, but when they occur we may have |
95 | /// many in a single block. |
96 | void processUndefReads(MachineBasicBlock *); |
97 | }; |
98 | |
99 | } // namespace llvm |
100 | |
101 | #define DEBUG_TYPE "break-false-deps" |
102 | |
103 | char BreakFalseDeps::ID = 0; |
104 | INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps" , false, false) |
105 | INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) |
106 | INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps" , false, false) |
107 | |
108 | FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } |
109 | |
110 | bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, |
111 | unsigned Pref) { |
112 | |
113 | // We can't change tied operands. |
114 | if (MI->isRegTiedToDefOperand(UseOpIdx: OpIdx)) |
115 | return false; |
116 | |
117 | MachineOperand &MO = MI->getOperand(i: OpIdx); |
118 | assert(MO.isUndef() && "Expected undef machine operand" ); |
119 | |
120 | // We can't change registers that aren't renamable. |
121 | if (!MO.isRenamable()) |
122 | return false; |
123 | |
124 | MCRegister OriginalReg = MO.getReg().asMCReg(); |
125 | |
126 | // Update only undef operands that have reg units that are mapped to one root. |
127 | for (MCRegUnit Unit : TRI->regunits(Reg: OriginalReg)) { |
128 | unsigned NumRoots = 0; |
129 | for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) { |
130 | NumRoots++; |
131 | if (NumRoots > 1) |
132 | return false; |
133 | } |
134 | } |
135 | |
136 | // Get the undef operand's register class |
137 | const TargetRegisterClass *OpRC = |
138 | TII->getRegClass(MCID: MI->getDesc(), OpNum: OpIdx, TRI, MF: *MF); |
139 | assert(OpRC && "Not a valid register class" ); |
140 | |
141 | // If the instruction has a true dependency, we can hide the false depdency |
142 | // behind it. |
143 | for (MachineOperand &CurrMO : MI->all_uses()) { |
144 | if (CurrMO.isUndef() || !OpRC->contains(Reg: CurrMO.getReg())) |
145 | continue; |
146 | // We found a true dependency - replace the undef register with the true |
147 | // dependency. |
148 | MO.setReg(CurrMO.getReg()); |
149 | return true; |
150 | } |
151 | |
152 | // Go over all registers in the register class and find the register with |
153 | // max clearance or clearance higher than Pref. |
154 | unsigned MaxClearance = 0; |
155 | unsigned MaxClearanceReg = OriginalReg; |
156 | ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC: OpRC); |
157 | for (MCPhysReg Reg : Order) { |
158 | unsigned Clearance = RDA->getClearance(MI, PhysReg: Reg); |
159 | if (Clearance <= MaxClearance) |
160 | continue; |
161 | MaxClearance = Clearance; |
162 | MaxClearanceReg = Reg; |
163 | |
164 | if (MaxClearance > Pref) |
165 | break; |
166 | } |
167 | |
168 | // Update the operand if we found a register with better clearance. |
169 | if (MaxClearanceReg != OriginalReg) |
170 | MO.setReg(MaxClearanceReg); |
171 | |
172 | return false; |
173 | } |
174 | |
175 | bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, |
176 | unsigned Pref) { |
177 | MCRegister Reg = MI->getOperand(i: OpIdx).getReg().asMCReg(); |
178 | unsigned Clearance = RDA->getClearance(MI, PhysReg: Reg); |
179 | LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); |
180 | |
181 | if (Pref > Clearance) { |
182 | LLVM_DEBUG(dbgs() << ": Break dependency.\n" ); |
183 | return true; |
184 | } |
185 | LLVM_DEBUG(dbgs() << ": OK .\n" ); |
186 | return false; |
187 | } |
188 | |
189 | void BreakFalseDeps::processDefs(MachineInstr *MI) { |
190 | assert(!MI->isDebugInstr() && "Won't process debug values" ); |
191 | |
192 | const MCInstrDesc &MCID = MI->getDesc(); |
193 | |
194 | // Break dependence on undef uses. Do this before updating LiveRegs below. |
195 | // This can remove a false dependence with no additional instructions. |
196 | for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { |
197 | MachineOperand &MO = MI->getOperand(i); |
198 | if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) |
199 | continue; |
200 | |
201 | unsigned Pref = TII->getUndefRegClearance(MI: *MI, OpNum: i, TRI); |
202 | if (Pref) { |
203 | bool HadTrueDependency = pickBestRegisterForUndef(MI, OpIdx: i, Pref); |
204 | // We don't need to bother trying to break a dependency if this |
205 | // instruction has a true dependency on that register through another |
206 | // operand - we'll have to wait for it to be available regardless. |
207 | if (!HadTrueDependency && shouldBreakDependence(MI, OpIdx: i, Pref)) |
208 | UndefReads.push_back(x: std::make_pair(x&: MI, y&: i)); |
209 | } |
210 | } |
211 | |
212 | // The code below allows the target to create a new instruction to break the |
213 | // dependence. That opposes the goal of minimizing size, so bail out now. |
214 | if (MF->getFunction().hasMinSize()) |
215 | return; |
216 | |
217 | for (unsigned i = 0, |
218 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); |
219 | i != e; ++i) { |
220 | MachineOperand &MO = MI->getOperand(i); |
221 | if (!MO.isReg() || !MO.getReg()) |
222 | continue; |
223 | if (MO.isUse()) |
224 | continue; |
225 | // Check clearance before partial register updates. |
226 | unsigned Pref = TII->getPartialRegUpdateClearance(MI: *MI, OpNum: i, TRI); |
227 | if (Pref && shouldBreakDependence(MI, OpIdx: i, Pref)) |
228 | TII->breakPartialRegDependency(MI&: *MI, OpNum: i, TRI); |
229 | } |
230 | } |
231 | |
232 | void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { |
233 | if (UndefReads.empty()) |
234 | return; |
235 | |
236 | // The code below allows the target to create a new instruction to break the |
237 | // dependence. That opposes the goal of minimizing size, so bail out now. |
238 | if (MF->getFunction().hasMinSize()) |
239 | return; |
240 | |
241 | // Collect this block's live out register units. |
242 | LiveRegSet.init(TRI: *TRI); |
243 | // We do not need to care about pristine registers as they are just preserved |
244 | // but not actually used in the function. |
245 | LiveRegSet.addLiveOutsNoPristines(MBB: *MBB); |
246 | |
247 | MachineInstr *UndefMI = UndefReads.back().first; |
248 | unsigned OpIdx = UndefReads.back().second; |
249 | |
250 | for (MachineInstr &I : llvm::reverse(C&: *MBB)) { |
251 | // Update liveness, including the current instruction's defs. |
252 | LiveRegSet.stepBackward(MI: I); |
253 | |
254 | if (UndefMI == &I) { |
255 | if (!LiveRegSet.contains(Reg: UndefMI->getOperand(i: OpIdx).getReg())) |
256 | TII->breakPartialRegDependency(MI&: *UndefMI, OpNum: OpIdx, TRI); |
257 | |
258 | UndefReads.pop_back(); |
259 | if (UndefReads.empty()) |
260 | return; |
261 | |
262 | UndefMI = UndefReads.back().first; |
263 | OpIdx = UndefReads.back().second; |
264 | } |
265 | } |
266 | } |
267 | |
268 | void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { |
269 | UndefReads.clear(); |
270 | // If this block is not done, it makes little sense to make any decisions |
271 | // based on clearance information. We need to make a second pass anyway, |
272 | // and by then we'll have better information, so we can avoid doing the work |
273 | // to try and break dependencies now. |
274 | for (MachineInstr &MI : *MBB) { |
275 | if (!MI.isDebugInstr()) |
276 | processDefs(MI: &MI); |
277 | } |
278 | processUndefReads(MBB); |
279 | } |
280 | |
281 | bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { |
282 | if (skipFunction(F: mf.getFunction())) |
283 | return false; |
284 | MF = &mf; |
285 | TII = MF->getSubtarget().getInstrInfo(); |
286 | TRI = MF->getSubtarget().getRegisterInfo(); |
287 | RDA = &getAnalysis<ReachingDefAnalysis>(); |
288 | |
289 | RegClassInfo.runOnMachineFunction(MF: mf); |
290 | |
291 | LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n" ); |
292 | |
293 | // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions |
294 | // in them. |
295 | df_iterator_default_set<MachineBasicBlock *> Reachable; |
296 | for (MachineBasicBlock *MBB : depth_first_ext(G: &mf, S&: Reachable)) |
297 | (void)MBB /* Mark all reachable blocks */; |
298 | |
299 | // Traverse the basic blocks. |
300 | for (MachineBasicBlock &MBB : mf) |
301 | if (Reachable.count(Ptr: &MBB)) |
302 | processBasicBlock(MBB: &MBB); |
303 | |
304 | return false; |
305 | } |
306 | |