1 | //===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AggressiveAntiDepBreaker class, which |
10 | // implements register anti-dependence breaking during post-RA |
11 | // scheduling. It attempts to break all anti-dependencies within a |
12 | // block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AggressiveAntiDepBreaker.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/SmallSet.h" |
19 | #include "llvm/ADT/iterator_range.h" |
20 | #include "llvm/CodeGen/MachineBasicBlock.h" |
21 | #include "llvm/CodeGen/MachineFrameInfo.h" |
22 | #include "llvm/CodeGen/MachineFunction.h" |
23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | #include "llvm/CodeGen/MachineOperand.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/RegisterClassInfo.h" |
27 | #include "llvm/CodeGen/ScheduleDAG.h" |
28 | #include "llvm/CodeGen/TargetInstrInfo.h" |
29 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
30 | #include "llvm/CodeGenTypes/MachineValueType.h" |
31 | #include "llvm/MC/MCInstrDesc.h" |
32 | #include "llvm/MC/MCRegisterInfo.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/raw_ostream.h" |
36 | #include <cassert> |
37 | #include <utility> |
38 | |
39 | using namespace llvm; |
40 | |
41 | #define DEBUG_TYPE "post-RA-sched" |
42 | |
43 | // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod |
44 | static cl::opt<int> |
45 | DebugDiv("agg-antidep-debugdiv" , |
46 | cl::desc("Debug control for aggressive anti-dep breaker" ), |
47 | cl::init(Val: 0), cl::Hidden); |
48 | |
49 | static cl::opt<int> |
50 | DebugMod("agg-antidep-debugmod" , |
51 | cl::desc("Debug control for aggressive anti-dep breaker" ), |
52 | cl::init(Val: 0), cl::Hidden); |
53 | |
54 | AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, |
55 | MachineBasicBlock *BB) |
56 | : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), |
57 | GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0), |
58 | DefIndices(TargetRegs, 0) { |
59 | const unsigned BBSize = BB->size(); |
60 | for (unsigned i = 0; i < NumTargetRegs; ++i) { |
61 | // Initialize all registers to be in their own group. Initially we |
62 | // assign the register to the same-indexed GroupNode. |
63 | GroupNodeIndices[i] = i; |
64 | // Initialize the indices to indicate that no registers are live. |
65 | KillIndices[i] = ~0u; |
66 | DefIndices[i] = BBSize; |
67 | } |
68 | } |
69 | |
70 | unsigned AggressiveAntiDepState::GetGroup(MCRegister Reg) { |
71 | unsigned Node = GroupNodeIndices[Reg]; |
72 | while (GroupNodes[Node] != Node) |
73 | Node = GroupNodes[Node]; |
74 | |
75 | return Node; |
76 | } |
77 | |
78 | void AggressiveAntiDepState::GetGroupRegs( |
79 | unsigned Group, std::vector<MCRegister> &Regs, |
80 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
81 | *RegRefs) { |
82 | for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { |
83 | if ((GetGroup(Reg) == Group) && (RegRefs->count(x: Reg) > 0)) |
84 | Regs.push_back(x: Reg); |
85 | } |
86 | } |
87 | |
88 | unsigned AggressiveAntiDepState::UnionGroups(MCRegister Reg1, MCRegister Reg2) { |
89 | assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!" ); |
90 | assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!" ); |
91 | |
92 | // find group for each register |
93 | unsigned Group1 = GetGroup(Reg: Reg1); |
94 | unsigned Group2 = GetGroup(Reg: Reg2); |
95 | |
96 | // if either group is 0, then that must become the parent |
97 | unsigned Parent = (Group1 == 0) ? Group1 : Group2; |
98 | unsigned Other = (Parent == Group1) ? Group2 : Group1; |
99 | GroupNodes.at(n: Other) = Parent; |
100 | return Parent; |
101 | } |
102 | |
103 | unsigned AggressiveAntiDepState::LeaveGroup(MCRegister Reg) { |
104 | // Create a new GroupNode for Reg. Reg's existing GroupNode must |
105 | // stay as is because there could be other GroupNodes referring to |
106 | // it. |
107 | unsigned idx = GroupNodes.size(); |
108 | GroupNodes.push_back(x: idx); |
109 | GroupNodeIndices[Reg] = idx; |
110 | return idx; |
111 | } |
112 | |
113 | bool AggressiveAntiDepState::IsLive(MCRegister Reg) { |
114 | // KillIndex must be defined and DefIndex not defined for a register |
115 | // to be live. |
116 | return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); |
117 | } |
118 | |
119 | AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( |
120 | MachineFunction &MFi, const RegisterClassInfo &RCI, |
121 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) |
122 | : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), |
123 | TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { |
124 | /* Collect a bitset of all registers that are only broken if they |
125 | are on the critical path. */ |
126 | for (const TargetRegisterClass *RC : CriticalPathRCs) { |
127 | BitVector CPSet = TRI->getAllocatableSet(MF, RC); |
128 | if (CriticalPathSet.none()) |
129 | CriticalPathSet = CPSet; |
130 | else |
131 | CriticalPathSet |= CPSet; |
132 | } |
133 | |
134 | LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:" ); |
135 | LLVM_DEBUG(for (unsigned r |
136 | : CriticalPathSet.set_bits()) dbgs() |
137 | << " " << printReg(r, TRI)); |
138 | LLVM_DEBUG(dbgs() << '\n'); |
139 | } |
140 | |
141 | AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { |
142 | delete State; |
143 | } |
144 | |
145 | void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { |
146 | assert(!State); |
147 | State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); |
148 | |
149 | bool IsReturnBlock = BB->isReturnBlock(); |
150 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
151 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
152 | |
153 | // Examine the live-in regs of all successors. |
154 | for (MachineBasicBlock *Succ : BB->successors()) |
155 | for (const auto &LI : Succ->liveins()) { |
156 | for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { |
157 | unsigned Reg = *AI; |
158 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
159 | KillIndices[Reg] = BB->size(); |
160 | DefIndices[Reg] = ~0u; |
161 | } |
162 | } |
163 | |
164 | // Mark live-out callee-saved registers. In a return block this is |
165 | // all callee-saved registers. In non-return this is any |
166 | // callee-saved register that is not saved in the prolog. |
167 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
168 | BitVector Pristine = MFI.getPristineRegs(MF); |
169 | for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; |
170 | ++I) { |
171 | unsigned Reg = *I; |
172 | if (!IsReturnBlock && !Pristine.test(Idx: Reg)) |
173 | continue; |
174 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
175 | MCRegister AliasReg = *AI; |
176 | State->UnionGroups(Reg1: AliasReg, Reg2: 0); |
177 | KillIndices[AliasReg] = BB->size(); |
178 | DefIndices[AliasReg] = ~0u; |
179 | } |
180 | } |
181 | } |
182 | |
183 | void AggressiveAntiDepBreaker::FinishBlock() { |
184 | delete State; |
185 | State = nullptr; |
186 | } |
187 | |
188 | void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, |
189 | unsigned InsertPosIndex) { |
190 | assert(Count < InsertPosIndex && "Instruction index out of expected range!" ); |
191 | |
192 | std::set<MCRegister> PassthruRegs; |
193 | GetPassthruRegs(MI, PassthruRegs); |
194 | PrescanInstruction(MI, Count, PassthruRegs); |
195 | ScanInstruction(MI, Count); |
196 | |
197 | LLVM_DEBUG(dbgs() << "Observe: " ); |
198 | LLVM_DEBUG(MI.dump()); |
199 | LLVM_DEBUG(dbgs() << "\tRegs:" ); |
200 | |
201 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
202 | for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) { |
203 | // If Reg is current live, then mark that it can't be renamed as |
204 | // we don't know the extent of its live-range anymore (now that it |
205 | // has been scheduled). If it is not live but was defined in the |
206 | // previous schedule region, then set its def index to the most |
207 | // conservative location (i.e. the beginning of the previous |
208 | // schedule region). |
209 | if (State->IsLive(Reg)) { |
210 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() |
211 | << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) |
212 | << "->g0(region live-out)" ); |
213 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
214 | } else if ((DefIndices[Reg] < InsertPosIndex) |
215 | && (DefIndices[Reg] >= Count)) { |
216 | DefIndices[Reg] = Count; |
217 | } |
218 | } |
219 | LLVM_DEBUG(dbgs() << '\n'); |
220 | } |
221 | |
222 | bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, |
223 | MachineOperand &MO) { |
224 | if (!MO.isReg() || !MO.isImplicit()) |
225 | return false; |
226 | |
227 | Register Reg = MO.getReg(); |
228 | if (Reg == 0) |
229 | return false; |
230 | |
231 | MachineOperand *Op = nullptr; |
232 | if (MO.isDef()) |
233 | Op = MI.findRegisterUseOperand(Reg, /*TRI=*/nullptr, isKill: true); |
234 | else |
235 | Op = MI.findRegisterDefOperand(Reg, /*TRI=*/nullptr); |
236 | |
237 | return(Op && Op->isImplicit()); |
238 | } |
239 | |
240 | void AggressiveAntiDepBreaker::GetPassthruRegs( |
241 | MachineInstr &MI, std::set<MCRegister> &PassthruRegs) { |
242 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
243 | MachineOperand &MO = MI.getOperand(i); |
244 | if (!MO.isReg()) continue; |
245 | if ((MO.isDef() && MI.isRegTiedToUseOperand(DefOpIdx: i)) || |
246 | IsImplicitDefUse(MI, MO)) { |
247 | const Register Reg = MO.getReg(); |
248 | for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) |
249 | PassthruRegs.insert(x: SubReg); |
250 | } |
251 | } |
252 | } |
253 | |
254 | /// AntiDepEdges - Return in Edges the anti- and output- dependencies |
255 | /// in SU that we want to consider for breaking. |
256 | static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) { |
257 | SmallSet<Register, 4> RegSet; |
258 | for (const SDep &Pred : SU->Preds) { |
259 | if ((Pred.getKind() == SDep::Anti) || (Pred.getKind() == SDep::Output)) { |
260 | if (RegSet.insert(V: Pred.getReg()).second) |
261 | Edges.push_back(x: &Pred); |
262 | } |
263 | } |
264 | } |
265 | |
266 | /// CriticalPathStep - Return the next SUnit after SU on the bottom-up |
267 | /// critical path. |
268 | static const SUnit *CriticalPathStep(const SUnit *SU) { |
269 | const SDep *Next = nullptr; |
270 | unsigned NextDepth = 0; |
271 | // Find the predecessor edge with the greatest depth. |
272 | if (SU) { |
273 | for (const SDep &Pred : SU->Preds) { |
274 | const SUnit *PredSU = Pred.getSUnit(); |
275 | unsigned PredLatency = Pred.getLatency(); |
276 | unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; |
277 | // In the case of a latency tie, prefer an anti-dependency edge over |
278 | // other types of edges. |
279 | if (NextDepth < PredTotalLatency || |
280 | (NextDepth == PredTotalLatency && Pred.getKind() == SDep::Anti)) { |
281 | NextDepth = PredTotalLatency; |
282 | Next = &Pred; |
283 | } |
284 | } |
285 | } |
286 | |
287 | return (Next) ? Next->getSUnit() : nullptr; |
288 | } |
289 | |
290 | void AggressiveAntiDepBreaker::HandleLastUse(MCRegister Reg, unsigned KillIdx, |
291 | const char *tag, |
292 | const char *, |
293 | const char *) { |
294 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
295 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
296 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
297 | &RegRefs = State->GetRegRefs(); |
298 | |
299 | // FIXME: We must leave subregisters of live super registers as live, so that |
300 | // we don't clear out the register tracking information for subregisters of |
301 | // super registers we're still tracking (and with which we're unioning |
302 | // subregister definitions). |
303 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
304 | if (TRI->isSuperRegister(RegA: Reg, RegB: *AI) && State->IsLive(Reg: *AI)) { |
305 | LLVM_DEBUG(if (!header && footer) dbgs() << footer); |
306 | return; |
307 | } |
308 | |
309 | if (!State->IsLive(Reg)) { |
310 | KillIndices[Reg] = KillIdx; |
311 | DefIndices[Reg] = ~0u; |
312 | RegRefs.erase(x: Reg); |
313 | State->LeaveGroup(Reg); |
314 | LLVM_DEBUG(if (header) { |
315 | dbgs() << header << printReg(Reg, TRI); |
316 | header = nullptr; |
317 | }); |
318 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); |
319 | // Repeat for subregisters. Note that we only do this if the superregister |
320 | // was not live because otherwise, regardless whether we have an explicit |
321 | // use of the subregister, the subregister's contents are needed for the |
322 | // uses of the superregister. |
323 | for (MCPhysReg SubregReg : TRI->subregs(Reg)) { |
324 | if (!State->IsLive(Reg: SubregReg)) { |
325 | KillIndices[SubregReg] = KillIdx; |
326 | DefIndices[SubregReg] = ~0u; |
327 | RegRefs.erase(x: SubregReg); |
328 | State->LeaveGroup(Reg: SubregReg); |
329 | LLVM_DEBUG(if (header) { |
330 | dbgs() << header << printReg(Reg, TRI); |
331 | header = nullptr; |
332 | }); |
333 | LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" |
334 | << State->GetGroup(SubregReg) << tag); |
335 | } |
336 | } |
337 | } |
338 | |
339 | LLVM_DEBUG(if (!header && footer) dbgs() << footer); |
340 | } |
341 | |
342 | void AggressiveAntiDepBreaker::PrescanInstruction( |
343 | MachineInstr &MI, unsigned Count, |
344 | const std::set<MCRegister> &PassthruRegs) { |
345 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
346 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
347 | &RegRefs = State->GetRegRefs(); |
348 | |
349 | // Handle dead defs by simulating a last-use of the register just |
350 | // after the def. A dead def can occur because the def is truly |
351 | // dead, or because only a subregister is live at the def. If we |
352 | // don't do this the dead def will be incorrectly merged into the |
353 | // previous def. |
354 | for (const MachineOperand &MO : MI.all_defs()) { |
355 | Register Reg = MO.getReg(); |
356 | if (!Reg) |
357 | continue; |
358 | |
359 | HandleLastUse(Reg: Reg.asMCReg(), KillIdx: Count + 1, tag: "" , header: "\tDead Def: " , footer: "\n" ); |
360 | } |
361 | |
362 | LLVM_DEBUG(dbgs() << "\tDef Groups:" ); |
363 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
364 | MachineOperand &MO = MI.getOperand(i); |
365 | if (!MO.isReg() || !MO.isDef()) continue; |
366 | Register Reg = MO.getReg(); |
367 | if (!Reg) |
368 | continue; |
369 | |
370 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" |
371 | << State->GetGroup(Reg)); |
372 | |
373 | // If MI's defs have a special allocation requirement, don't allow |
374 | // any def registers to be changed. Also assume all registers |
375 | // defined in a call must not be changed (ABI). Inline assembly may |
376 | // reference either system calls or the register directly. Skip it until we |
377 | // can tell user specified registers from compiler-specified. |
378 | if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) || |
379 | MI.isInlineAsm()) { |
380 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)" ); |
381 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
382 | } |
383 | |
384 | // Any aliased that are live at this point are completely or |
385 | // partially defined here, so group those aliases with Reg. |
386 | for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { |
387 | MCRegister AliasReg = *AI; |
388 | if (State->IsLive(Reg: AliasReg)) { |
389 | State->UnionGroups(Reg1: Reg, Reg2: AliasReg); |
390 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " |
391 | << printReg(AliasReg, TRI) << ")" ); |
392 | } |
393 | } |
394 | |
395 | // Note register reference... |
396 | const TargetRegisterClass *RC = nullptr; |
397 | if (i < MI.getDesc().getNumOperands()) |
398 | RC = TII->getRegClass(MCID: MI.getDesc(), OpNum: i, TRI, MF); |
399 | AggressiveAntiDepState::RegisterReference RR = { .Operand: &MO, .RC: RC }; |
400 | RegRefs.emplace(args: Reg.asMCReg(), args&: RR); |
401 | } |
402 | |
403 | LLVM_DEBUG(dbgs() << '\n'); |
404 | |
405 | // Scan the register defs for this instruction and update |
406 | // live-ranges. |
407 | for (const MachineOperand &MO : MI.all_defs()) { |
408 | Register Reg = MO.getReg(); |
409 | if (!Reg) |
410 | continue; |
411 | // Ignore KILLs and passthru registers for liveness... |
412 | if (MI.isKill() || (PassthruRegs.count(x: Reg) != 0)) |
413 | continue; |
414 | |
415 | // Update def for Reg and aliases. |
416 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
417 | // We need to be careful here not to define already-live super registers. |
418 | // If the super register is already live, then this definition is not |
419 | // a definition of the whole super register (just a partial insertion |
420 | // into it). Earlier subregister definitions (which we've not yet visited |
421 | // because we're iterating bottom-up) need to be linked to the same group |
422 | // as this definition. |
423 | if (TRI->isSuperRegister(RegA: Reg, RegB: *AI) && State->IsLive(Reg: *AI)) |
424 | continue; |
425 | |
426 | DefIndices[(*AI).id()] = Count; |
427 | } |
428 | } |
429 | } |
430 | |
431 | void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, |
432 | unsigned Count) { |
433 | LLVM_DEBUG(dbgs() << "\tUse Groups:" ); |
434 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
435 | &RegRefs = State->GetRegRefs(); |
436 | |
437 | // If MI's uses have special allocation requirement, don't allow |
438 | // any use registers to be changed. Also assume all registers |
439 | // used in a call must not be changed (ABI). |
440 | // Inline Assembly register uses also cannot be safely changed. |
441 | // FIXME: The issue with predicated instruction is more complex. We are being |
442 | // conservatively here because the kill markers cannot be trusted after |
443 | // if-conversion: |
444 | // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] |
445 | // ... |
446 | // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] |
447 | // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] |
448 | // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) |
449 | // |
450 | // The first R6 kill is not really a kill since it's killed by a predicated |
451 | // instruction which may not be executed. The second R6 def may or may not |
452 | // re-define R6 so it's not safe to change it since the last R6 use cannot be |
453 | // changed. |
454 | bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() || |
455 | TII->isPredicated(MI) || MI.isInlineAsm(); |
456 | |
457 | // Scan the register uses for this instruction and update |
458 | // live-ranges, groups and RegRefs. |
459 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
460 | MachineOperand &MO = MI.getOperand(i); |
461 | if (!MO.isReg() || !MO.isUse()) continue; |
462 | Register Reg = MO.getReg(); |
463 | if (!Reg) |
464 | continue; |
465 | |
466 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" |
467 | << State->GetGroup(Reg)); |
468 | |
469 | // It wasn't previously live but now it is, this is a kill. Forget |
470 | // the previous live-range information and start a new live-range |
471 | // for the register. |
472 | HandleLastUse(Reg: Reg.asMCReg(), KillIdx: Count, tag: "(last-use)" ); |
473 | |
474 | if (Special) { |
475 | LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)" ); |
476 | State->UnionGroups(Reg1: Reg, Reg2: 0); |
477 | } |
478 | |
479 | // Note register reference... |
480 | const TargetRegisterClass *RC = nullptr; |
481 | if (i < MI.getDesc().getNumOperands()) |
482 | RC = TII->getRegClass(MCID: MI.getDesc(), OpNum: i, TRI, MF); |
483 | AggressiveAntiDepState::RegisterReference RR = { .Operand: &MO, .RC: RC }; |
484 | RegRefs.emplace(args: Reg.asMCReg(), args&: RR); |
485 | } |
486 | |
487 | LLVM_DEBUG(dbgs() << '\n'); |
488 | |
489 | // Form a group of all defs and uses of a KILL instruction to ensure |
490 | // that all registers are renamed as a group. |
491 | if (MI.isKill()) { |
492 | LLVM_DEBUG(dbgs() << "\tKill Group:" ); |
493 | |
494 | Register FirstReg; |
495 | for (const MachineOperand &MO : MI.operands()) { |
496 | if (!MO.isReg()) continue; |
497 | Register Reg = MO.getReg(); |
498 | if (!Reg) |
499 | continue; |
500 | |
501 | if (FirstReg) { |
502 | LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI)); |
503 | State->UnionGroups(Reg1: FirstReg, Reg2: Reg); |
504 | } else { |
505 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); |
506 | FirstReg = Reg; |
507 | } |
508 | } |
509 | |
510 | LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); |
511 | } |
512 | } |
513 | |
514 | BitVector AggressiveAntiDepBreaker::GetRenameRegisters(MCRegister Reg) { |
515 | BitVector BV(TRI->getNumRegs(), false); |
516 | bool first = true; |
517 | |
518 | // Check all references that need rewriting for Reg. For each, use |
519 | // the corresponding register class to narrow the set of registers |
520 | // that are appropriate for renaming. |
521 | for (const auto &Q : make_range(p: State->GetRegRefs().equal_range(x: Reg))) { |
522 | const TargetRegisterClass *RC = Q.second.RC; |
523 | if (!RC) continue; |
524 | |
525 | BitVector RCBV = TRI->getAllocatableSet(MF, RC); |
526 | if (first) { |
527 | BV |= RCBV; |
528 | first = false; |
529 | } else { |
530 | BV &= RCBV; |
531 | } |
532 | |
533 | LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); |
534 | } |
535 | |
536 | return BV; |
537 | } |
538 | |
539 | bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( |
540 | MCRegister SuperReg, unsigned AntiDepGroupIndex, |
541 | RenameOrderType &RenameOrder, std::map<MCRegister, MCRegister> &RenameMap) { |
542 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
543 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
544 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
545 | &RegRefs = State->GetRegRefs(); |
546 | |
547 | // Collect all referenced registers in the same group as |
548 | // AntiDepReg. These all need to be renamed together if we are to |
549 | // break the anti-dependence. |
550 | std::vector<MCRegister> Regs; |
551 | State->GetGroupRegs(Group: AntiDepGroupIndex, Regs, RegRefs: &RegRefs); |
552 | assert(!Regs.empty() && "Empty register group!" ); |
553 | if (Regs.empty()) |
554 | return false; |
555 | |
556 | // Collect the BitVector of registers that can be used to rename |
557 | // each register. |
558 | LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex |
559 | << ":\n" ); |
560 | std::map<MCRegister, BitVector> RenameRegisterMap; |
561 | for (MCRegister Reg : Regs) { |
562 | // If Reg has any references, then collect possible rename regs |
563 | if (RegRefs.count(x: Reg) > 0) { |
564 | LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":" ); |
565 | |
566 | BitVector &BV = RenameRegisterMap[Reg]; |
567 | assert(BV.empty()); |
568 | BV = GetRenameRegisters(Reg); |
569 | |
570 | LLVM_DEBUG({ |
571 | dbgs() << " ::" ; |
572 | for (unsigned r : BV.set_bits()) |
573 | dbgs() << " " << printReg(r, TRI); |
574 | dbgs() << "\n" ; |
575 | }); |
576 | } |
577 | } |
578 | |
579 | // All group registers should be a subreg of SuperReg. |
580 | for (MCRegister Reg : Regs) { |
581 | if (Reg == SuperReg) continue; |
582 | bool IsSub = TRI->isSubRegister(RegA: SuperReg, RegB: Reg); |
583 | // FIXME: remove this once PR18663 has been properly fixed. For now, |
584 | // return a conservative answer: |
585 | // assert(IsSub && "Expecting group subregister"); |
586 | if (!IsSub) |
587 | return false; |
588 | } |
589 | |
590 | #ifndef NDEBUG |
591 | // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod |
592 | if (DebugDiv > 0) { |
593 | static int renamecnt = 0; |
594 | if (renamecnt++ % DebugDiv != DebugMod) |
595 | return false; |
596 | |
597 | dbgs() << "*** Performing rename " << printReg(SuperReg, TRI) |
598 | << " for debug ***\n" ; |
599 | } |
600 | #endif |
601 | |
602 | // Check each possible rename register for SuperReg in round-robin |
603 | // order. If that register is available, and the corresponding |
604 | // registers are available for the other group subregisters, then we |
605 | // can use those registers to rename. |
606 | |
607 | // FIXME: Using getMinimalPhysRegClass is very conservative. We should |
608 | // check every use of the register and find the largest register class |
609 | // that can be used in all of them. |
610 | const TargetRegisterClass *SuperRC = |
611 | TRI->getMinimalPhysRegClass(Reg: SuperReg, VT: MVT::Other); |
612 | |
613 | ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC: SuperRC); |
614 | if (Order.empty()) { |
615 | LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n" ); |
616 | return false; |
617 | } |
618 | |
619 | LLVM_DEBUG(dbgs() << "\tFind Registers:" ); |
620 | |
621 | RenameOrder.insert(x: RenameOrderType::value_type(SuperRC, Order.size())); |
622 | |
623 | unsigned OrigR = RenameOrder[SuperRC]; |
624 | unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); |
625 | unsigned R = OrigR; |
626 | do { |
627 | if (R == 0) R = Order.size(); |
628 | --R; |
629 | const MCRegister NewSuperReg = Order[R]; |
630 | // Don't consider non-allocatable registers |
631 | if (!MRI.isAllocatable(PhysReg: NewSuperReg)) continue; |
632 | // Don't replace a register with itself. |
633 | if (NewSuperReg == SuperReg) continue; |
634 | |
635 | LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); |
636 | RenameMap.clear(); |
637 | |
638 | // For each referenced group register (which must be a SuperReg or |
639 | // a subregister of SuperReg), find the corresponding subregister |
640 | // of NewSuperReg and make sure it is free to be renamed. |
641 | for (MCRegister Reg : Regs) { |
642 | MCRegister NewReg; |
643 | if (Reg == SuperReg) { |
644 | NewReg = NewSuperReg; |
645 | } else { |
646 | unsigned NewSubRegIdx = TRI->getSubRegIndex(RegNo: SuperReg, SubRegNo: Reg); |
647 | if (NewSubRegIdx != 0) |
648 | NewReg = TRI->getSubReg(Reg: NewSuperReg, Idx: NewSubRegIdx); |
649 | } |
650 | |
651 | LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI)); |
652 | |
653 | // Check if Reg can be renamed to NewReg. |
654 | if (!RenameRegisterMap[Reg].test(Idx: NewReg)) { |
655 | LLVM_DEBUG(dbgs() << "(no rename)" ); |
656 | goto next_super_reg; |
657 | } |
658 | |
659 | // If NewReg is dead and NewReg's most recent def is not before |
660 | // Regs's kill, it's safe to replace Reg with NewReg. We |
661 | // must also check all aliases of NewReg, because we can't define a |
662 | // register when any sub or super is already live. |
663 | if (State->IsLive(Reg: NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { |
664 | LLVM_DEBUG(dbgs() << "(live)" ); |
665 | goto next_super_reg; |
666 | } else { |
667 | bool found = false; |
668 | for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { |
669 | MCRegister AliasReg = *AI; |
670 | if (State->IsLive(Reg: AliasReg) || |
671 | (KillIndices[Reg] > DefIndices[AliasReg])) { |
672 | LLVM_DEBUG(dbgs() |
673 | << "(alias " << printReg(AliasReg, TRI) << " live)" ); |
674 | found = true; |
675 | break; |
676 | } |
677 | } |
678 | if (found) |
679 | goto next_super_reg; |
680 | } |
681 | |
682 | // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also |
683 | // defines 'NewReg' via an early-clobber operand. |
684 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: Reg))) { |
685 | MachineInstr *UseMI = Q.second.Operand->getParent(); |
686 | int Idx = UseMI->findRegisterDefOperandIdx(Reg: NewReg, TRI, isDead: false, Overlap: true); |
687 | if (Idx == -1) |
688 | continue; |
689 | |
690 | if (UseMI->getOperand(i: Idx).isEarlyClobber()) { |
691 | LLVM_DEBUG(dbgs() << "(ec)" ); |
692 | goto next_super_reg; |
693 | } |
694 | } |
695 | |
696 | // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining |
697 | // 'Reg' is an early-clobber define and that instruction also uses |
698 | // 'NewReg'. |
699 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: Reg))) { |
700 | if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber()) |
701 | continue; |
702 | |
703 | MachineInstr *DefMI = Q.second.Operand->getParent(); |
704 | if (DefMI->readsRegister(Reg: NewReg, TRI)) { |
705 | LLVM_DEBUG(dbgs() << "(ec)" ); |
706 | goto next_super_reg; |
707 | } |
708 | } |
709 | |
710 | // Record that 'Reg' can be renamed to 'NewReg'. |
711 | RenameMap.insert(x: std::make_pair(x&: Reg, y&: NewReg)); |
712 | } |
713 | |
714 | // If we fall-out here, then every register in the group can be |
715 | // renamed, as recorded in RenameMap. |
716 | RenameOrder.erase(x: SuperRC); |
717 | RenameOrder.insert(x: RenameOrderType::value_type(SuperRC, R)); |
718 | LLVM_DEBUG(dbgs() << "]\n" ); |
719 | return true; |
720 | |
721 | next_super_reg: |
722 | LLVM_DEBUG(dbgs() << ']'); |
723 | } while (R != EndR); |
724 | |
725 | LLVM_DEBUG(dbgs() << '\n'); |
726 | |
727 | // No registers are free and available! |
728 | return false; |
729 | } |
730 | |
731 | /// BreakAntiDependencies - Identifiy anti-dependencies within the |
732 | /// ScheduleDAG and break them by renaming registers. |
733 | unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( |
734 | const std::vector<SUnit> &SUnits, |
735 | MachineBasicBlock::iterator Begin, |
736 | MachineBasicBlock::iterator End, |
737 | unsigned InsertPosIndex, |
738 | DbgValueVector &DbgValues) { |
739 | std::vector<unsigned> &KillIndices = State->GetKillIndices(); |
740 | std::vector<unsigned> &DefIndices = State->GetDefIndices(); |
741 | std::multimap<MCRegister, AggressiveAntiDepState::RegisterReference> |
742 | &RegRefs = State->GetRegRefs(); |
743 | |
744 | // The code below assumes that there is at least one instruction, |
745 | // so just duck out immediately if the block is empty. |
746 | if (SUnits.empty()) return 0; |
747 | |
748 | // For each regclass the next register to use for renaming. |
749 | RenameOrderType RenameOrder; |
750 | |
751 | // ...need a map from MI to SUnit. |
752 | std::map<MachineInstr *, const SUnit *> MISUnitMap; |
753 | for (const SUnit &SU : SUnits) |
754 | MISUnitMap.insert(x: std::make_pair(x: SU.getInstr(), y: &SU)); |
755 | |
756 | // Track progress along the critical path through the SUnit graph as |
757 | // we walk the instructions. This is needed for regclasses that only |
758 | // break critical-path anti-dependencies. |
759 | const SUnit *CriticalPathSU = nullptr; |
760 | MachineInstr *CriticalPathMI = nullptr; |
761 | if (CriticalPathSet.any()) { |
762 | for (const SUnit &SU : SUnits) { |
763 | if (!CriticalPathSU || |
764 | ((SU.getDepth() + SU.Latency) > |
765 | (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { |
766 | CriticalPathSU = &SU; |
767 | } |
768 | } |
769 | assert(CriticalPathSU && "Failed to find SUnit critical path" ); |
770 | CriticalPathMI = CriticalPathSU->getInstr(); |
771 | } |
772 | |
773 | #ifndef NDEBUG |
774 | LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n" ); |
775 | LLVM_DEBUG(dbgs() << "Available regs:" ); |
776 | for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) { |
777 | if (!State->IsLive(Reg)) |
778 | LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI)); |
779 | } |
780 | LLVM_DEBUG(dbgs() << '\n'); |
781 | #endif |
782 | |
783 | BitVector RegAliases(TRI->getNumRegs()); |
784 | |
785 | // Attempt to break anti-dependence edges. Walk the instructions |
786 | // from the bottom up, tracking information about liveness as we go |
787 | // to help determine which registers are available. |
788 | unsigned Broken = 0; |
789 | unsigned Count = InsertPosIndex - 1; |
790 | for (MachineBasicBlock::iterator I = End, E = Begin; |
791 | I != E; --Count) { |
792 | MachineInstr &MI = *--I; |
793 | |
794 | if (MI.isDebugInstr()) |
795 | continue; |
796 | |
797 | LLVM_DEBUG(dbgs() << "Anti: " ); |
798 | LLVM_DEBUG(MI.dump()); |
799 | |
800 | std::set<MCRegister> PassthruRegs; |
801 | GetPassthruRegs(MI, PassthruRegs); |
802 | |
803 | // Process the defs in MI... |
804 | PrescanInstruction(MI, Count, PassthruRegs); |
805 | |
806 | // The dependence edges that represent anti- and output- |
807 | // dependencies that are candidates for breaking. |
808 | std::vector<const SDep *> Edges; |
809 | const SUnit *PathSU = MISUnitMap[&MI]; |
810 | AntiDepEdges(SU: PathSU, Edges); |
811 | |
812 | // If MI is not on the critical path, then we don't rename |
813 | // registers in the CriticalPathSet. |
814 | BitVector *ExcludeRegs = nullptr; |
815 | if (&MI == CriticalPathMI) { |
816 | CriticalPathSU = CriticalPathStep(SU: CriticalPathSU); |
817 | CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; |
818 | } else if (CriticalPathSet.any()) { |
819 | ExcludeRegs = &CriticalPathSet; |
820 | } |
821 | |
822 | // Ignore KILL instructions (they form a group in ScanInstruction |
823 | // but don't cause any anti-dependence breaking themselves) |
824 | if (!MI.isKill()) { |
825 | // Attempt to break each anti-dependency... |
826 | for (const SDep *Edge : Edges) { |
827 | SUnit *NextSU = Edge->getSUnit(); |
828 | |
829 | if ((Edge->getKind() != SDep::Anti) && |
830 | (Edge->getKind() != SDep::Output)) continue; |
831 | |
832 | MCRegister AntiDepReg = Edge->getReg().asMCReg(); |
833 | LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); |
834 | assert(AntiDepReg && "Anti-dependence on reg0?" ); |
835 | |
836 | if (!MRI.isAllocatable(PhysReg: AntiDepReg)) { |
837 | // Don't break anti-dependencies on non-allocatable registers. |
838 | LLVM_DEBUG(dbgs() << " (non-allocatable)\n" ); |
839 | continue; |
840 | } else if (ExcludeRegs && ExcludeRegs->test(Idx: AntiDepReg.id())) { |
841 | // Don't break anti-dependencies for critical path registers |
842 | // if not on the critical path |
843 | LLVM_DEBUG(dbgs() << " (not critical-path)\n" ); |
844 | continue; |
845 | } else if (PassthruRegs.count(x: AntiDepReg) != 0) { |
846 | // If the anti-dep register liveness "passes-thru", then |
847 | // don't try to change it. It will be changed along with |
848 | // the use if required to break an earlier antidep. |
849 | LLVM_DEBUG(dbgs() << " (passthru)\n" ); |
850 | continue; |
851 | } else { |
852 | // No anti-dep breaking for implicit deps |
853 | MachineOperand *AntiDepOp = |
854 | MI.findRegisterDefOperand(Reg: AntiDepReg, /*TRI=*/nullptr); |
855 | assert(AntiDepOp && "Can't find index for defined register operand" ); |
856 | if (!AntiDepOp || AntiDepOp->isImplicit()) { |
857 | LLVM_DEBUG(dbgs() << " (implicit)\n" ); |
858 | continue; |
859 | } |
860 | |
861 | // If the SUnit has other dependencies on the SUnit that |
862 | // it anti-depends on, don't bother breaking the |
863 | // anti-dependency since those edges would prevent such |
864 | // units from being scheduled past each other |
865 | // regardless. |
866 | // |
867 | // Also, if there are dependencies on other SUnits with the |
868 | // same register as the anti-dependency, don't attempt to |
869 | // break it. |
870 | for (const SDep &Pred : PathSU->Preds) { |
871 | if (Pred.getSUnit() == NextSU ? (Pred.getKind() != SDep::Anti || |
872 | Pred.getReg() != AntiDepReg) |
873 | : (Pred.getKind() == SDep::Data && |
874 | Pred.getReg() == AntiDepReg)) { |
875 | AntiDepReg = MCRegister(); |
876 | break; |
877 | } |
878 | } |
879 | for (const SDep &Pred : PathSU->Preds) { |
880 | if ((Pred.getSUnit() == NextSU) && (Pred.getKind() != SDep::Anti) && |
881 | (Pred.getKind() != SDep::Output)) { |
882 | LLVM_DEBUG(dbgs() << " (real dependency)\n" ); |
883 | AntiDepReg = MCRegister(); |
884 | break; |
885 | } else if ((Pred.getSUnit() != NextSU) && |
886 | (Pred.getKind() == SDep::Data) && |
887 | (Pred.getReg() == AntiDepReg)) { |
888 | LLVM_DEBUG(dbgs() << " (other dependency)\n" ); |
889 | AntiDepReg = MCRegister(); |
890 | break; |
891 | } |
892 | } |
893 | |
894 | if (!AntiDepReg) |
895 | continue; |
896 | } |
897 | |
898 | assert(AntiDepReg); |
899 | |
900 | // Determine AntiDepReg's register group. |
901 | const unsigned GroupIndex = State->GetGroup(Reg: AntiDepReg); |
902 | if (GroupIndex == 0) { |
903 | LLVM_DEBUG(dbgs() << " (zero group)\n" ); |
904 | continue; |
905 | } |
906 | |
907 | LLVM_DEBUG(dbgs() << '\n'); |
908 | |
909 | // Look for a suitable register to use to break the anti-dependence. |
910 | std::map<MCRegister, MCRegister> RenameMap; |
911 | if (FindSuitableFreeRegisters(SuperReg: AntiDepReg, AntiDepGroupIndex: GroupIndex, RenameOrder, |
912 | RenameMap)) { |
913 | LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on " |
914 | << printReg(AntiDepReg, TRI) << ":" ); |
915 | |
916 | // Handle each group register... |
917 | for (const auto &P : RenameMap) { |
918 | MCRegister CurrReg = P.first; |
919 | MCRegister NewReg = P.second; |
920 | |
921 | LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" |
922 | << printReg(NewReg, TRI) << "(" |
923 | << RegRefs.count(CurrReg) << " refs)" ); |
924 | |
925 | // Update the references to the old register CurrReg to |
926 | // refer to the new register NewReg. |
927 | for (const auto &Q : make_range(p: RegRefs.equal_range(x: CurrReg))) { |
928 | Q.second.Operand->setReg(NewReg); |
929 | // If the SU for the instruction being updated has debug |
930 | // information related to the anti-dependency register, make |
931 | // sure to update that as well. |
932 | const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; |
933 | if (!SU) continue; |
934 | UpdateDbgValues(DbgValues, ParentMI: Q.second.Operand->getParent(), |
935 | OldReg: AntiDepReg, NewReg); |
936 | } |
937 | |
938 | // We just went back in time and modified history; the |
939 | // liveness information for CurrReg is now inconsistent. Set |
940 | // the state as if it were dead. |
941 | State->UnionGroups(Reg1: NewReg, Reg2: 0); |
942 | RegRefs.erase(x: NewReg); |
943 | DefIndices[NewReg] = DefIndices[CurrReg]; |
944 | KillIndices[NewReg] = KillIndices[CurrReg]; |
945 | |
946 | State->UnionGroups(Reg1: CurrReg, Reg2: 0); |
947 | RegRefs.erase(x: CurrReg); |
948 | DefIndices[CurrReg] = KillIndices[CurrReg]; |
949 | KillIndices[CurrReg] = ~0u; |
950 | assert(((KillIndices[CurrReg] == ~0u) != |
951 | (DefIndices[CurrReg] == ~0u)) && |
952 | "Kill and Def maps aren't consistent for AntiDepReg!" ); |
953 | } |
954 | |
955 | ++Broken; |
956 | LLVM_DEBUG(dbgs() << '\n'); |
957 | } |
958 | } |
959 | } |
960 | |
961 | ScanInstruction(MI, Count); |
962 | } |
963 | |
964 | return Broken; |
965 | } |
966 | |
967 | AntiDepBreaker *llvm::createAggressiveAntiDepBreaker( |
968 | MachineFunction &MFi, const RegisterClassInfo &RCI, |
969 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) { |
970 | return new AggressiveAntiDepBreaker(MFi, RCI, CriticalPathRCs); |
971 | } |
972 | |