1 | //===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AggressiveAntiDepBreaker class, which |
10 | // implements register anti-dependence breaking during post-RA |
11 | // scheduling. It attempts to break all anti-dependencies within a |
12 | // block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AggressiveAntiDepBreaker.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/SmallSet.h" |
19 | #include "llvm/ADT/iterator_range.h" |
20 | #include "llvm/CodeGen/MachineBasicBlock.h" |
21 | #include "llvm/CodeGen/MachineFrameInfo.h" |
22 | #include "llvm/CodeGen/MachineFunction.h" |
23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | #include "llvm/CodeGen/MachineOperand.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/RegisterClassInfo.h" |
27 | #include "llvm/CodeGen/ScheduleDAG.h" |
28 | #include "llvm/CodeGen/TargetInstrInfo.h" |
29 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
30 | #include "llvm/CodeGenTypes/MachineValueType.h" |
31 | #include "llvm/MC/MCInstrDesc.h" |
32 | #include "llvm/MC/MCRegisterInfo.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/raw_ostream.h" |
36 | #include <cassert> |
37 | #include <utility> |
38 | |
39 | using namespace llvm; |
40 | |
41 | #define DEBUG_TYPE "post-RA-sched" |
42 | |
43 | // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod |
44 | static cl::opt<int> |
45 | DebugDiv("agg-antidep-debugdiv" , |
46 | cl::desc("Debug control for aggressive anti-dep breaker" ), |
47 | cl::init(Val: 0), cl::Hidden); |
48 | |
49 | static cl::opt<int> |
50 | DebugMod("agg-antidep-debugmod" , |
51 | cl::desc("Debug control for aggressive anti-dep breaker" ), |
52 | cl::init(Val: 0), cl::Hidden); |
53 | |
54 | AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, |
55 | MachineBasicBlock *BB) |
56 | : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), |
57 | GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0), |
58 | DefIndices(TargetRegs, 0) { |
59 | const unsigned BBSize = BB->size(); |
60 | for (unsigned i = 0; i < NumTargetRegs; ++i) { |
61 | // Initialize all registers to be in their own group. Initially we |
62 | // assign the register to the same-indexed GroupNode. |
63 | GroupNodeIndices[i] = i; |
64 | // Initialize the indices to indicate that no registers are live. |
65 | KillIndices[i] = ~0u; |
66 | DefIndices[i] = BBSize; |
67 | } |
68 | } |
69 | |
70 | unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { |
71 | unsigned Node = GroupNodeIndices[Reg]; |
72 | while (GroupNodes[Node] != Node) |
73 | Node = GroupNodes[Node]; |
74 | |
75 | return Node; |
76 | } |
77 | |
78 | void AggressiveAntiDepState::GetGroupRegs( |
79 | unsigned Group, |
80 | std::vector<unsigned> &Regs, |
81 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) |
82 | { |
83 | for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { |
84 | if ((GetGroup(Reg) == Group) && (RegRefs->count(x: Reg) > 0)) |
85 | Regs.push_back(x: Reg); |
86 | } |
87 | } |
88 | |
89 | unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) { |
90 | assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!" ); |
91 | assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!" ); |
92 | |
93 | // find group for each register |
94 | unsigned Group1 = GetGroup(Reg: Reg1); |
95 | unsigned Group2 = GetGroup(Reg: Reg2); |
96 | |
97 | // if either group is 0, then that must become the parent |
98 | unsigned Parent = (Group1 == 0) ? Group1 : Group2; |
99 | unsigned Other = (Parent == Group1) ? Group2 : Group1; |
100 | GroupNodes.at(n: Other) = Parent; |
101 | return Parent; |
102 | } |
103 | |
104 | unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) { |
105 | // Create a new GroupNode for Reg. Reg's existing GroupNode must |
106 | // stay as is because there could be other GroupNodes referring to |
107 | // it. |
108 | unsigned idx = GroupNodes.size(); |
109 | GroupNodes.push_back(x: idx); |
110 | GroupNodeIndices[Reg] = idx; |
111 | return idx; |
112 | } |
113 | |
114 | bool AggressiveAntiDepState::IsLive(unsigned Reg) { |
115 | // KillIndex must be defined and DefIndex not defined for a register |
116 | // to be live. |
117 | return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); |
118 | } |
119 | |
120 | AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( |
121 | MachineFunction &MFi, const RegisterClassInfo &RCI, |
122 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) |
123 | : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), |
124 | TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { |
125 | /* Collect a bitset of all registers that are only broken if they |
126 | are on the critical path. */ |
127 | for (const TargetRegisterClass *RC : CriticalPathRCs) { |
128 | BitVector CPSet = TRI->getAllocatableSet(MF, RC); |
129 | if (CriticalPathSet.none()) |
130 | CriticalPathSet = CPSet; |
131 | else |
132 | CriticalPathSet |= CPSet; |
133 | } |
134 | |
135 | LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:" ); |
136 | LLVM_DEBUG(for (unsigned r |
137 | : CriticalPathSet.set_bits()) dbgs() |
138 | << " " << printReg(r, TRI)); |
139 | LLVM_DEBUG(dbgs() << '\n'); |
140 | } |
141 | |
142 | AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { |
143 | delete State; |
144 | } |
145 | |
146 | void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { |
147 | assert(!State); |
148 | State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); |
149 | |
150 | bool IsReturnBlock = BB->isReturnBlock(); |
151 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
152 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
153 | |
154 | // Examine the live-in regs of all successors. |
155 | for (MachineBasicBlock *Succ : BB->successors()) |
156 | for (const auto &LI : Succ->liveins()) { |
157 | for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { |
158 | unsigned Reg = *AI; |
159 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
160 | KillIndices[Reg] = BB->size(); |
161 | DefIndices[Reg] = ~0u; |
162 | } |
163 | } |
164 | |
165 | // Mark live-out callee-saved registers. In a return block this is |
166 | // all callee-saved registers. In non-return this is any |
167 | // callee-saved register that is not saved in the prolog. |
168 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
169 | BitVector Pristine = MFI.getPristineRegs(MF); |
170 | for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; |
171 | ++I) { |
172 | unsigned Reg = *I; |
173 | if (!IsReturnBlock && !Pristine.test(Idx: Reg)) |
174 | continue; |
175 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
176 | unsigned AliasReg = *AI; |
177 | State->UnionGroups(Reg1: AliasReg, Reg2: 0); |
178 | KillIndices[AliasReg] = BB->size(); |
179 | DefIndices[AliasReg] = ~0u; |
180 | } |
181 | } |
182 | } |
183 | |
184 | void AggressiveAntiDepBreaker::FinishBlock() { |
185 | delete State; |
186 | State = nullptr; |
187 | } |
188 | |
189 | void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, |
190 | unsigned InsertPosIndex) { |
191 | assert(Count < InsertPosIndex && "Instruction index out of expected range!" ); |
192 | |
193 | std::set<unsigned> PassthruRegs; |
194 | GetPassthruRegs(MI, PassthruRegs); |
195 | PrescanInstruction(MI, Count, PassthruRegs); |
196 | ScanInstruction(MI, Count); |
197 | |
198 | LLVM_DEBUG(dbgs() << "Observe: " ); |
199 | LLVM_DEBUG(MI.dump()); |
200 | LLVM_DEBUG(dbgs() << "\tRegs:" ); |
201 | |
202 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
203 | for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) { |
204 | // If Reg is current live, then mark that it can't be renamed as |
205 | // we don't know the extent of its live-range anymore (now that it |
206 | // has been scheduled). If it is not live but was defined in the |
207 | // previous schedule region, then set its def index to the most |
208 | // conservative location (i.e. the beginning of the previous |
209 | // schedule region). |
210 | if (State->IsLive(Reg)) { |
211 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() |
212 | << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) |
213 | << "->g0(region live-out)" ); |
214 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
215 | } else if ((DefIndices[Reg] < InsertPosIndex) |
216 | && (DefIndices[Reg] >= Count)) { |
217 | DefIndices[Reg] = Count; |
218 | } |
219 | } |
220 | LLVM_DEBUG(dbgs() << '\n'); |
221 | } |
222 | |
223 | bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, |
224 | MachineOperand &MO) { |
225 | if (!MO.isReg() || !MO.isImplicit()) |
226 | return false; |
227 | |
228 | Register Reg = MO.getReg(); |
229 | if (Reg == 0) |
230 | return false; |
231 | |
232 | MachineOperand *Op = nullptr; |
233 | if (MO.isDef()) |
234 | Op = MI.findRegisterUseOperand(Reg, /*TRI=*/nullptr, isKill: true); |
235 | else |
236 | Op = MI.findRegisterDefOperand(Reg, /*TRI=*/nullptr); |
237 | |
238 | return(Op && Op->isImplicit()); |
239 | } |
240 | |
241 | void AggressiveAntiDepBreaker::GetPassthruRegs( |
242 | MachineInstr &MI, std::set<unsigned> &PassthruRegs) { |
243 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
244 | MachineOperand &MO = MI.getOperand(i); |
245 | if (!MO.isReg()) continue; |
246 | if ((MO.isDef() && MI.isRegTiedToUseOperand(DefOpIdx: i)) || |
247 | IsImplicitDefUse(MI, MO)) { |
248 | const Register Reg = MO.getReg(); |
249 | for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) |
250 | PassthruRegs.insert(x: SubReg); |
251 | } |
252 | } |
253 | } |
254 | |
255 | /// AntiDepEdges - Return in Edges the anti- and output- dependencies |
256 | /// in SU that we want to consider for breaking. |
257 | static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) { |
258 | SmallSet<unsigned, 4> RegSet; |
259 | for (const SDep &Pred : SU->Preds) { |
260 | if ((Pred.getKind() == SDep::Anti) || (Pred.getKind() == SDep::Output)) { |
261 | if (RegSet.insert(V: Pred.getReg()).second) |
262 | Edges.push_back(x: &Pred); |
263 | } |
264 | } |
265 | } |
266 | |
267 | /// CriticalPathStep - Return the next SUnit after SU on the bottom-up |
268 | /// critical path. |
269 | static const SUnit *CriticalPathStep(const SUnit *SU) { |
270 | const SDep *Next = nullptr; |
271 | unsigned NextDepth = 0; |
272 | // Find the predecessor edge with the greatest depth. |
273 | if (SU) { |
274 | for (const SDep &Pred : SU->Preds) { |
275 | const SUnit *PredSU = Pred.getSUnit(); |
276 | unsigned PredLatency = Pred.getLatency(); |
277 | unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; |
278 | // In the case of a latency tie, prefer an anti-dependency edge over |
279 | // other types of edges. |
280 | if (NextDepth < PredTotalLatency || |
281 | (NextDepth == PredTotalLatency && Pred.getKind() == SDep::Anti)) { |
282 | NextDepth = PredTotalLatency; |
283 | Next = &Pred; |
284 | } |
285 | } |
286 | } |
287 | |
288 | return (Next) ? Next->getSUnit() : nullptr; |
289 | } |
290 | |
291 | void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, |
292 | const char *tag, |
293 | const char *, |
294 | const char *) { |
295 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
296 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
297 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& |
298 | RegRefs = State->GetRegRefs(); |
299 | |
300 | // FIXME: We must leave subregisters of live super registers as live, so that |
301 | // we don't clear out the register tracking information for subregisters of |
302 | // super registers we're still tracking (and with which we're unioning |
303 | // subregister definitions). |
304 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
305 | if (TRI->isSuperRegister(RegA: Reg, RegB: *AI) && State->IsLive(Reg: *AI)) { |
306 | LLVM_DEBUG(if (!header && footer) dbgs() << footer); |
307 | return; |
308 | } |
309 | |
310 | if (!State->IsLive(Reg)) { |
311 | KillIndices[Reg] = KillIdx; |
312 | DefIndices[Reg] = ~0u; |
313 | RegRefs.erase(x: Reg); |
314 | State->LeaveGroup(Reg); |
315 | LLVM_DEBUG(if (header) { |
316 | dbgs() << header << printReg(Reg, TRI); |
317 | header = nullptr; |
318 | }); |
319 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); |
320 | // Repeat for subregisters. Note that we only do this if the superregister |
321 | // was not live because otherwise, regardless whether we have an explicit |
322 | // use of the subregister, the subregister's contents are needed for the |
323 | // uses of the superregister. |
324 | for (MCPhysReg SubregReg : TRI->subregs(Reg)) { |
325 | if (!State->IsLive(Reg: SubregReg)) { |
326 | KillIndices[SubregReg] = KillIdx; |
327 | DefIndices[SubregReg] = ~0u; |
328 | RegRefs.erase(x: SubregReg); |
329 | State->LeaveGroup(Reg: SubregReg); |
330 | LLVM_DEBUG(if (header) { |
331 | dbgs() << header << printReg(Reg, TRI); |
332 | header = nullptr; |
333 | }); |
334 | LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" |
335 | << State->GetGroup(SubregReg) << tag); |
336 | } |
337 | } |
338 | } |
339 | |
340 | LLVM_DEBUG(if (!header && footer) dbgs() << footer); |
341 | } |
342 | |
343 | void AggressiveAntiDepBreaker::PrescanInstruction( |
344 | MachineInstr &MI, unsigned Count, std::set<unsigned> &PassthruRegs) { |
345 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
346 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& |
347 | RegRefs = State->GetRegRefs(); |
348 | |
349 | // Handle dead defs by simulating a last-use of the register just |
350 | // after the def. A dead def can occur because the def is truly |
351 | // dead, or because only a subregister is live at the def. If we |
352 | // don't do this the dead def will be incorrectly merged into the |
353 | // previous def. |
354 | for (const MachineOperand &MO : MI.all_defs()) { |
355 | Register Reg = MO.getReg(); |
356 | if (Reg == 0) continue; |
357 | |
358 | HandleLastUse(Reg, KillIdx: Count + 1, tag: "" , header: "\tDead Def: " , footer: "\n" ); |
359 | } |
360 | |
361 | LLVM_DEBUG(dbgs() << "\tDef Groups:" ); |
362 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
363 | MachineOperand &MO = MI.getOperand(i); |
364 | if (!MO.isReg() || !MO.isDef()) continue; |
365 | Register Reg = MO.getReg(); |
366 | if (Reg == 0) continue; |
367 | |
368 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" |
369 | << State->GetGroup(Reg)); |
370 | |
371 | // If MI's defs have a special allocation requirement, don't allow |
372 | // any def registers to be changed. Also assume all registers |
373 | // defined in a call must not be changed (ABI). Inline assembly may |
374 | // reference either system calls or the register directly. Skip it until we |
375 | // can tell user specified registers from compiler-specified. |
376 | if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) || |
377 | MI.isInlineAsm()) { |
378 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)" ); |
379 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
380 | } |
381 | |
382 | // Any aliased that are live at this point are completely or |
383 | // partially defined here, so group those aliases with Reg. |
384 | for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { |
385 | unsigned AliasReg = *AI; |
386 | if (State->IsLive(Reg: AliasReg)) { |
387 | State->UnionGroups(Reg1: Reg, Reg2: AliasReg); |
388 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " |
389 | << printReg(AliasReg, TRI) << ")" ); |
390 | } |
391 | } |
392 | |
393 | // Note register reference... |
394 | const TargetRegisterClass *RC = nullptr; |
395 | if (i < MI.getDesc().getNumOperands()) |
396 | RC = TII->getRegClass(MCID: MI.getDesc(), OpNum: i, TRI, MF); |
397 | AggressiveAntiDepState::RegisterReference RR = { .Operand: &MO, .RC: RC }; |
398 | RegRefs.insert(x: std::make_pair(x&: Reg, y&: RR)); |
399 | } |
400 | |
401 | LLVM_DEBUG(dbgs() << '\n'); |
402 | |
403 | // Scan the register defs for this instruction and update |
404 | // live-ranges. |
405 | for (const MachineOperand &MO : MI.operands()) { |
406 | if (!MO.isReg() || !MO.isDef()) continue; |
407 | Register Reg = MO.getReg(); |
408 | if (Reg == 0) continue; |
409 | // Ignore KILLs and passthru registers for liveness... |
410 | if (MI.isKill() || (PassthruRegs.count(x: Reg) != 0)) |
411 | continue; |
412 | |
413 | // Update def for Reg and aliases. |
414 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
415 | // We need to be careful here not to define already-live super registers. |
416 | // If the super register is already live, then this definition is not |
417 | // a definition of the whole super register (just a partial insertion |
418 | // into it). Earlier subregister definitions (which we've not yet visited |
419 | // because we're iterating bottom-up) need to be linked to the same group |
420 | // as this definition. |
421 | if (TRI->isSuperRegister(RegA: Reg, RegB: *AI) && State->IsLive(Reg: *AI)) |
422 | continue; |
423 | |
424 | DefIndices[*AI] = Count; |
425 | } |
426 | } |
427 | } |
428 | |
429 | void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, |
430 | unsigned Count) { |
431 | LLVM_DEBUG(dbgs() << "\tUse Groups:" ); |
432 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& |
433 | RegRefs = State->GetRegRefs(); |
434 | |
435 | // If MI's uses have special allocation requirement, don't allow |
436 | // any use registers to be changed. Also assume all registers |
437 | // used in a call must not be changed (ABI). |
438 | // Inline Assembly register uses also cannot be safely changed. |
439 | // FIXME: The issue with predicated instruction is more complex. We are being |
440 | // conservatively here because the kill markers cannot be trusted after |
441 | // if-conversion: |
442 | // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] |
443 | // ... |
444 | // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] |
445 | // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] |
446 | // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) |
447 | // |
448 | // The first R6 kill is not really a kill since it's killed by a predicated |
449 | // instruction which may not be executed. The second R6 def may or may not |
450 | // re-define R6 so it's not safe to change it since the last R6 use cannot be |
451 | // changed. |
452 | bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() || |
453 | TII->isPredicated(MI) || MI.isInlineAsm(); |
454 | |
455 | // Scan the register uses for this instruction and update |
456 | // live-ranges, groups and RegRefs. |
457 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
458 | MachineOperand &MO = MI.getOperand(i); |
459 | if (!MO.isReg() || !MO.isUse()) continue; |
460 | Register Reg = MO.getReg(); |
461 | if (Reg == 0) continue; |
462 | |
463 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" |
464 | << State->GetGroup(Reg)); |
465 | |
466 | // It wasn't previously live but now it is, this is a kill. Forget |
467 | // the previous live-range information and start a new live-range |
468 | // for the register. |
469 | HandleLastUse(Reg, KillIdx: Count, tag: "(last-use)" ); |
470 | |
471 | if (Special) { |
472 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)" ); |
473 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
474 | } |
475 | |
476 | // Note register reference... |
477 | const TargetRegisterClass *RC = nullptr; |
478 | if (i < MI.getDesc().getNumOperands()) |
479 | RC = TII->getRegClass(MCID: MI.getDesc(), OpNum: i, TRI, MF); |
480 | AggressiveAntiDepState::RegisterReference RR = { .Operand: &MO, .RC: RC }; |
481 | RegRefs.insert(x: std::make_pair(x&: Reg, y&: RR)); |
482 | } |
483 | |
484 | LLVM_DEBUG(dbgs() << '\n'); |
485 | |
486 | // Form a group of all defs and uses of a KILL instruction to ensure |
487 | // that all registers are renamed as a group. |
488 | if (MI.isKill()) { |
489 | LLVM_DEBUG(dbgs() << "\tKill Group:" ); |
490 | |
491 | unsigned FirstReg = 0; |
492 | for (const MachineOperand &MO : MI.operands()) { |
493 | if (!MO.isReg()) continue; |
494 | Register Reg = MO.getReg(); |
495 | if (Reg == 0) continue; |
496 | |
497 | if (FirstReg != 0) { |
498 | LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI)); |
499 | State->UnionGroups(Reg1: FirstReg, Reg2: Reg); |
500 | } else { |
501 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); |
502 | FirstReg = Reg; |
503 | } |
504 | } |
505 | |
506 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); |
507 | } |
508 | } |
509 | |
510 | BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { |
511 | BitVector BV(TRI->getNumRegs(), false); |
512 | bool first = true; |
513 | |
514 | // Check all references that need rewriting for Reg. For each, use |
515 | // the corresponding register class to narrow the set of registers |
516 | // that are appropriate for renaming. |
517 | for (const auto &Q : make_range(p: State->GetRegRefs().equal_range(x: Reg))) { |
518 | const TargetRegisterClass *RC = Q.second.RC; |
519 | if (!RC) continue; |
520 | |
521 | BitVector RCBV = TRI->getAllocatableSet(MF, RC); |
522 | if (first) { |
523 | BV |= RCBV; |
524 | first = false; |
525 | } else { |
526 | BV &= RCBV; |
527 | } |
528 | |
529 | LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); |
530 | } |
531 | |
532 | return BV; |
533 | } |
534 | |
535 | bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( |
536 | unsigned SuperReg, unsigned AntiDepGroupIndex, RenameOrderType &RenameOrder, |
537 | std::map<unsigned, unsigned> &RenameMap) { |
538 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
539 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
540 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& |
541 | RegRefs = State->GetRegRefs(); |
542 | |
543 | // Collect all referenced registers in the same group as |
544 | // AntiDepReg. These all need to be renamed together if we are to |
545 | // break the anti-dependence. |
546 | std::vector<unsigned> Regs; |
547 | State->GetGroupRegs(Group: AntiDepGroupIndex, Regs, RegRefs: &RegRefs); |
548 | assert(!Regs.empty() && "Empty register group!" ); |
549 | if (Regs.empty()) |
550 | return false; |
551 | |
552 | // Collect the BitVector of registers that can be used to rename |
553 | // each register. |
554 | LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex |
555 | << ":\n" ); |
556 | std::map<unsigned, BitVector> RenameRegisterMap; |
557 | for (unsigned Reg : Regs) { |
558 | // If Reg has any references, then collect possible rename regs |
559 | if (RegRefs.count(x: Reg) > 0) { |
560 | LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":" ); |
561 | |
562 | BitVector &BV = RenameRegisterMap[Reg]; |
563 | assert(BV.empty()); |
564 | BV = GetRenameRegisters(Reg); |
565 | |
566 | LLVM_DEBUG({ |
567 | dbgs() << " ::" ; |
568 | for (unsigned r : BV.set_bits()) |
569 | dbgs() << " " << printReg(r, TRI); |
570 | dbgs() << "\n" ; |
571 | }); |
572 | } |
573 | } |
574 | |
575 | // All group registers should be a subreg of SuperReg. |
576 | for (unsigned Reg : Regs) { |
577 | if (Reg == SuperReg) continue; |
578 | bool IsSub = TRI->isSubRegister(RegA: SuperReg, RegB: Reg); |
579 | // FIXME: remove this once PR18663 has been properly fixed. For now, |
580 | // return a conservative answer: |
581 | // assert(IsSub && "Expecting group subregister"); |
582 | if (!IsSub) |
583 | return false; |
584 | } |
585 | |
586 | #ifndef NDEBUG |
587 | // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod |
588 | if (DebugDiv > 0) { |
589 | static int renamecnt = 0; |
590 | if (renamecnt++ % DebugDiv != DebugMod) |
591 | return false; |
592 | |
593 | dbgs() << "*** Performing rename " << printReg(SuperReg, TRI) |
594 | << " for debug ***\n" ; |
595 | } |
596 | #endif |
597 | |
598 | // Check each possible rename register for SuperReg in round-robin |
599 | // order. If that register is available, and the corresponding |
600 | // registers are available for the other group subregisters, then we |
601 | // can use those registers to rename. |
602 | |
603 | // FIXME: Using getMinimalPhysRegClass is very conservative. We should |
604 | // check every use of the register and find the largest register class |
605 | // that can be used in all of them. |
606 | const TargetRegisterClass *SuperRC = |
607 | TRI->getMinimalPhysRegClass(Reg: SuperReg, VT: MVT::Other); |
608 | |
609 | ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC: SuperRC); |
610 | if (Order.empty()) { |
611 | LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n" ); |
612 | return false; |
613 | } |
614 | |
615 | LLVM_DEBUG(dbgs() << "\tFind Registers:" ); |
616 | |
617 | RenameOrder.insert(x: RenameOrderType::value_type(SuperRC, Order.size())); |
618 | |
619 | unsigned OrigR = RenameOrder[SuperRC]; |
620 | unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); |
621 | unsigned R = OrigR; |
622 | do { |
623 | if (R == 0) R = Order.size(); |
624 | --R; |
625 | const unsigned NewSuperReg = Order[R]; |
626 | // Don't consider non-allocatable registers |
627 | if (!MRI.isAllocatable(PhysReg: NewSuperReg)) continue; |
628 | // Don't replace a register with itself. |
629 | if (NewSuperReg == SuperReg) continue; |
630 | |
631 | LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); |
632 | RenameMap.clear(); |
633 | |
634 | // For each referenced group register (which must be a SuperReg or |
635 | // a subregister of SuperReg), find the corresponding subregister |
636 | // of NewSuperReg and make sure it is free to be renamed. |
637 | for (unsigned Reg : Regs) { |
638 | unsigned NewReg = 0; |
639 | if (Reg == SuperReg) { |
640 | NewReg = NewSuperReg; |
641 | } else { |
642 | unsigned NewSubRegIdx = TRI->getSubRegIndex(RegNo: SuperReg, SubRegNo: Reg); |
643 | if (NewSubRegIdx != 0) |
644 | NewReg = TRI->getSubReg(Reg: NewSuperReg, Idx: NewSubRegIdx); |
645 | } |
646 | |
647 | LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI)); |
648 | |
649 | // Check if Reg can be renamed to NewReg. |
650 | if (!RenameRegisterMap[Reg].test(Idx: NewReg)) { |
651 | LLVM_DEBUG(dbgs() << "(no rename)" ); |
652 | goto next_super_reg; |
653 | } |
654 | |
655 | // If NewReg is dead and NewReg's most recent def is not before |
656 | // Regs's kill, it's safe to replace Reg with NewReg. We |
657 | // must also check all aliases of NewReg, because we can't define a |
658 | // register when any sub or super is already live. |
659 | if (State->IsLive(Reg: NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { |
660 | LLVM_DEBUG(dbgs() << "(live)" ); |
661 | goto next_super_reg; |
662 | } else { |
663 | bool found = false; |
664 | for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { |
665 | unsigned AliasReg = *AI; |
666 | if (State->IsLive(Reg: AliasReg) || |
667 | (KillIndices[Reg] > DefIndices[AliasReg])) { |
668 | LLVM_DEBUG(dbgs() |
669 | << "(alias " << printReg(AliasReg, TRI) << " live)" ); |
670 | found = true; |
671 | break; |
672 | } |
673 | } |
674 | if (found) |
675 | goto next_super_reg; |
676 | } |
677 | |
678 | // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also |
679 | // defines 'NewReg' via an early-clobber operand. |
680 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: Reg))) { |
681 | MachineInstr *UseMI = Q.second.Operand->getParent(); |
682 | int Idx = UseMI->findRegisterDefOperandIdx(Reg: NewReg, TRI, isDead: false, Overlap: true); |
683 | if (Idx == -1) |
684 | continue; |
685 | |
686 | if (UseMI->getOperand(i: Idx).isEarlyClobber()) { |
687 | LLVM_DEBUG(dbgs() << "(ec)" ); |
688 | goto next_super_reg; |
689 | } |
690 | } |
691 | |
692 | // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining |
693 | // 'Reg' is an early-clobber define and that instruction also uses |
694 | // 'NewReg'. |
695 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: Reg))) { |
696 | if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber()) |
697 | continue; |
698 | |
699 | MachineInstr *DefMI = Q.second.Operand->getParent(); |
700 | if (DefMI->readsRegister(Reg: NewReg, TRI)) { |
701 | LLVM_DEBUG(dbgs() << "(ec)" ); |
702 | goto next_super_reg; |
703 | } |
704 | } |
705 | |
706 | // Record that 'Reg' can be renamed to 'NewReg'. |
707 | RenameMap.insert(x: std::pair<unsigned, unsigned>(Reg, NewReg)); |
708 | } |
709 | |
710 | // If we fall-out here, then every register in the group can be |
711 | // renamed, as recorded in RenameMap. |
712 | RenameOrder.erase(x: SuperRC); |
713 | RenameOrder.insert(x: RenameOrderType::value_type(SuperRC, R)); |
714 | LLVM_DEBUG(dbgs() << "]\n" ); |
715 | return true; |
716 | |
717 | next_super_reg: |
718 | LLVM_DEBUG(dbgs() << ']'); |
719 | } while (R != EndR); |
720 | |
721 | LLVM_DEBUG(dbgs() << '\n'); |
722 | |
723 | // No registers are free and available! |
724 | return false; |
725 | } |
726 | |
727 | /// BreakAntiDependencies - Identifiy anti-dependencies within the |
728 | /// ScheduleDAG and break them by renaming registers. |
729 | unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( |
730 | const std::vector<SUnit> &SUnits, |
731 | MachineBasicBlock::iterator Begin, |
732 | MachineBasicBlock::iterator End, |
733 | unsigned InsertPosIndex, |
734 | DbgValueVector &DbgValues) { |
735 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
736 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
737 | std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& |
738 | RegRefs = State->GetRegRefs(); |
739 | |
740 | // The code below assumes that there is at least one instruction, |
741 | // so just duck out immediately if the block is empty. |
742 | if (SUnits.empty()) return 0; |
743 | |
744 | // For each regclass the next register to use for renaming. |
745 | RenameOrderType RenameOrder; |
746 | |
747 | // ...need a map from MI to SUnit. |
748 | std::map<MachineInstr *, const SUnit *> MISUnitMap; |
749 | for (const SUnit &SU : SUnits) |
750 | MISUnitMap.insert(x: std::make_pair(x: SU.getInstr(), y: &SU)); |
751 | |
752 | // Track progress along the critical path through the SUnit graph as |
753 | // we walk the instructions. This is needed for regclasses that only |
754 | // break critical-path anti-dependencies. |
755 | const SUnit *CriticalPathSU = nullptr; |
756 | MachineInstr *CriticalPathMI = nullptr; |
757 | if (CriticalPathSet.any()) { |
758 | for (const SUnit &SU : SUnits) { |
759 | if (!CriticalPathSU || |
760 | ((SU.getDepth() + SU.Latency) > |
761 | (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { |
762 | CriticalPathSU = &SU; |
763 | } |
764 | } |
765 | assert(CriticalPathSU && "Failed to find SUnit critical path" ); |
766 | CriticalPathMI = CriticalPathSU->getInstr(); |
767 | } |
768 | |
769 | #ifndef NDEBUG |
770 | LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n" ); |
771 | LLVM_DEBUG(dbgs() << "Available regs:" ); |
772 | for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) { |
773 | if (!State->IsLive(Reg)) |
774 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); |
775 | } |
776 | LLVM_DEBUG(dbgs() << '\n'); |
777 | #endif |
778 | |
779 | BitVector RegAliases(TRI->getNumRegs()); |
780 | |
781 | // Attempt to break anti-dependence edges. Walk the instructions |
782 | // from the bottom up, tracking information about liveness as we go |
783 | // to help determine which registers are available. |
784 | unsigned Broken = 0; |
785 | unsigned Count = InsertPosIndex - 1; |
786 | for (MachineBasicBlock::iterator I = End, E = Begin; |
787 | I != E; --Count) { |
788 | MachineInstr &MI = *--I; |
789 | |
790 | if (MI.isDebugInstr()) |
791 | continue; |
792 | |
793 | LLVM_DEBUG(dbgs() << "Anti: " ); |
794 | LLVM_DEBUG(MI.dump()); |
795 | |
796 | std::set<unsigned> PassthruRegs; |
797 | GetPassthruRegs(MI, PassthruRegs); |
798 | |
799 | // Process the defs in MI... |
800 | PrescanInstruction(MI, Count, PassthruRegs); |
801 | |
802 | // The dependence edges that represent anti- and output- |
803 | // dependencies that are candidates for breaking. |
804 | std::vector<const SDep *> Edges; |
805 | const SUnit *PathSU = MISUnitMap[&MI]; |
806 | AntiDepEdges(SU: PathSU, Edges); |
807 | |
808 | // If MI is not on the critical path, then we don't rename |
809 | // registers in the CriticalPathSet. |
810 | BitVector *ExcludeRegs = nullptr; |
811 | if (&MI == CriticalPathMI) { |
812 | CriticalPathSU = CriticalPathStep(SU: CriticalPathSU); |
813 | CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; |
814 | } else if (CriticalPathSet.any()) { |
815 | ExcludeRegs = &CriticalPathSet; |
816 | } |
817 | |
818 | // Ignore KILL instructions (they form a group in ScanInstruction |
819 | // but don't cause any anti-dependence breaking themselves) |
820 | if (!MI.isKill()) { |
821 | // Attempt to break each anti-dependency... |
822 | for (const SDep *Edge : Edges) { |
823 | SUnit *NextSU = Edge->getSUnit(); |
824 | |
825 | if ((Edge->getKind() != SDep::Anti) && |
826 | (Edge->getKind() != SDep::Output)) continue; |
827 | |
828 | unsigned AntiDepReg = Edge->getReg(); |
829 | LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); |
830 | assert(AntiDepReg != 0 && "Anti-dependence on reg0?" ); |
831 | |
832 | if (!MRI.isAllocatable(PhysReg: AntiDepReg)) { |
833 | // Don't break anti-dependencies on non-allocatable registers. |
834 | LLVM_DEBUG(dbgs() << " (non-allocatable)\n" ); |
835 | continue; |
836 | } else if (ExcludeRegs && ExcludeRegs->test(Idx: AntiDepReg)) { |
837 | // Don't break anti-dependencies for critical path registers |
838 | // if not on the critical path |
839 | LLVM_DEBUG(dbgs() << " (not critical-path)\n" ); |
840 | continue; |
841 | } else if (PassthruRegs.count(x: AntiDepReg) != 0) { |
842 | // If the anti-dep register liveness "passes-thru", then |
843 | // don't try to change it. It will be changed along with |
844 | // the use if required to break an earlier antidep. |
845 | LLVM_DEBUG(dbgs() << " (passthru)\n" ); |
846 | continue; |
847 | } else { |
848 | // No anti-dep breaking for implicit deps |
849 | MachineOperand *AntiDepOp = |
850 | MI.findRegisterDefOperand(Reg: AntiDepReg, /*TRI=*/nullptr); |
851 | assert(AntiDepOp && "Can't find index for defined register operand" ); |
852 | if (!AntiDepOp || AntiDepOp->isImplicit()) { |
853 | LLVM_DEBUG(dbgs() << " (implicit)\n" ); |
854 | continue; |
855 | } |
856 | |
857 | // If the SUnit has other dependencies on the SUnit that |
858 | // it anti-depends on, don't bother breaking the |
859 | // anti-dependency since those edges would prevent such |
860 | // units from being scheduled past each other |
861 | // regardless. |
862 | // |
863 | // Also, if there are dependencies on other SUnits with the |
864 | // same register as the anti-dependency, don't attempt to |
865 | // break it. |
866 | for (const SDep &Pred : PathSU->Preds) { |
867 | if (Pred.getSUnit() == NextSU ? (Pred.getKind() != SDep::Anti || |
868 | Pred.getReg() != AntiDepReg) |
869 | : (Pred.getKind() == SDep::Data && |
870 | Pred.getReg() == AntiDepReg)) { |
871 | AntiDepReg = 0; |
872 | break; |
873 | } |
874 | } |
875 | for (const SDep &Pred : PathSU->Preds) { |
876 | if ((Pred.getSUnit() == NextSU) && (Pred.getKind() != SDep::Anti) && |
877 | (Pred.getKind() != SDep::Output)) { |
878 | LLVM_DEBUG(dbgs() << " (real dependency)\n" ); |
879 | AntiDepReg = 0; |
880 | break; |
881 | } else if ((Pred.getSUnit() != NextSU) && |
882 | (Pred.getKind() == SDep::Data) && |
883 | (Pred.getReg() == AntiDepReg)) { |
884 | LLVM_DEBUG(dbgs() << " (other dependency)\n" ); |
885 | AntiDepReg = 0; |
886 | break; |
887 | } |
888 | } |
889 | |
890 | if (AntiDepReg == 0) |
891 | continue; |
892 | } |
893 | |
894 | assert(AntiDepReg != 0); |
895 | |
896 | // Determine AntiDepReg's register group. |
897 | const unsigned GroupIndex = State->GetGroup(Reg: AntiDepReg); |
898 | if (GroupIndex == 0) { |
899 | LLVM_DEBUG(dbgs() << " (zero group)\n" ); |
900 | continue; |
901 | } |
902 | |
903 | LLVM_DEBUG(dbgs() << '\n'); |
904 | |
905 | // Look for a suitable register to use to break the anti-dependence. |
906 | std::map<unsigned, unsigned> RenameMap; |
907 | if (FindSuitableFreeRegisters(SuperReg: AntiDepReg, AntiDepGroupIndex: GroupIndex, RenameOrder, |
908 | RenameMap)) { |
909 | LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on " |
910 | << printReg(AntiDepReg, TRI) << ":" ); |
911 | |
912 | // Handle each group register... |
913 | for (const auto &P : RenameMap) { |
914 | unsigned CurrReg = P.first; |
915 | unsigned NewReg = P.second; |
916 | |
917 | LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" |
918 | << printReg(NewReg, TRI) << "(" |
919 | << RegRefs.count(CurrReg) << " refs)" ); |
920 | |
921 | // Update the references to the old register CurrReg to |
922 | // refer to the new register NewReg. |
923 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: CurrReg))) { |
924 | Q.second.Operand->setReg(NewReg); |
925 | // If the SU for the instruction being updated has debug |
926 | // information related to the anti-dependency register, make |
927 | // sure to update that as well. |
928 | const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; |
929 | if (!SU) continue; |
930 | UpdateDbgValues(DbgValues, ParentMI: Q.second.Operand->getParent(), |
931 | OldReg: AntiDepReg, NewReg); |
932 | } |
933 | |
934 | // We just went back in time and modified history; the |
935 | // liveness information for CurrReg is now inconsistent. Set |
936 | // the state as if it were dead. |
937 | State->UnionGroups(Reg1: NewReg, Reg2: 0); |
938 | RegRefs.erase(x: NewReg); |
939 | DefIndices[NewReg] = DefIndices[CurrReg]; |
940 | KillIndices[NewReg] = KillIndices[CurrReg]; |
941 | |
942 | State->UnionGroups(Reg1: CurrReg, Reg2: 0); |
943 | RegRefs.erase(x: CurrReg); |
944 | DefIndices[CurrReg] = KillIndices[CurrReg]; |
945 | KillIndices[CurrReg] = ~0u; |
946 | assert(((KillIndices[CurrReg] == ~0u) != |
947 | (DefIndices[CurrReg] == ~0u)) && |
948 | "Kill and Def maps aren't consistent for AntiDepReg!" ); |
949 | } |
950 | |
951 | ++Broken; |
952 | LLVM_DEBUG(dbgs() << '\n'); |
953 | } |
954 | } |
955 | } |
956 | |
957 | ScanInstruction(MI, Count); |
958 | } |
959 | |
960 | return Broken; |
961 | } |
962 | |
963 | AntiDepBreaker *llvm::createAggressiveAntiDepBreaker( |
964 | MachineFunction &MFi, const RegisterClassInfo &RCI, |
965 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) { |
966 | return new AggressiveAntiDepBreaker(MFi, RCI, CriticalPathRCs); |
967 | } |
968 | |