1 | //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/CodeGen/ExecutionDomainFix.h" |
10 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
11 | #include "llvm/CodeGen/TargetInstrInfo.h" |
12 | #include "llvm/Support/Debug.h" |
13 | |
14 | using namespace llvm; |
15 | |
16 | #define DEBUG_TYPE "execution-deps-fix" |
17 | |
18 | iterator_range<SmallVectorImpl<int>::const_iterator> |
19 | ExecutionDomainFix::regIndices(unsigned Reg) const { |
20 | assert(Reg < AliasMap.size() && "Invalid register" ); |
21 | const auto &Entry = AliasMap[Reg]; |
22 | return make_range(x: Entry.begin(), y: Entry.end()); |
23 | } |
24 | |
25 | DomainValue *ExecutionDomainFix::alloc(int domain) { |
26 | DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue |
27 | : Avail.pop_back_val(); |
28 | if (domain >= 0) |
29 | dv->addDomain(domain); |
30 | assert(dv->Refs == 0 && "Reference count wasn't cleared" ); |
31 | assert(!dv->Next && "Chained DomainValue shouldn't have been recycled" ); |
32 | return dv; |
33 | } |
34 | |
35 | void ExecutionDomainFix::release(DomainValue *DV) { |
36 | while (DV) { |
37 | assert(DV->Refs && "Bad DomainValue" ); |
38 | if (--DV->Refs) |
39 | return; |
40 | |
41 | // There are no more DV references. Collapse any contained instructions. |
42 | if (DV->AvailableDomains && !DV->isCollapsed()) |
43 | collapse(dv: DV, domain: DV->getFirstDomain()); |
44 | |
45 | DomainValue *Next = DV->Next; |
46 | DV->clear(); |
47 | Avail.push_back(Elt: DV); |
48 | // Also release the next DomainValue in the chain. |
49 | DV = Next; |
50 | } |
51 | } |
52 | |
53 | DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { |
54 | DomainValue *DV = DVRef; |
55 | if (!DV || !DV->Next) |
56 | return DV; |
57 | |
58 | // DV has a chain. Find the end. |
59 | do |
60 | DV = DV->Next; |
61 | while (DV->Next); |
62 | |
63 | // Update DVRef to point to DV. |
64 | retain(DV); |
65 | release(DV: DVRef); |
66 | DVRef = DV; |
67 | return DV; |
68 | } |
69 | |
70 | void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { |
71 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
72 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
73 | |
74 | if (LiveRegs[rx] == dv) |
75 | return; |
76 | if (LiveRegs[rx]) |
77 | release(DV: LiveRegs[rx]); |
78 | LiveRegs[rx] = retain(DV: dv); |
79 | } |
80 | |
81 | void ExecutionDomainFix::kill(int rx) { |
82 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
83 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
84 | if (!LiveRegs[rx]) |
85 | return; |
86 | |
87 | release(DV: LiveRegs[rx]); |
88 | LiveRegs[rx] = nullptr; |
89 | } |
90 | |
91 | void ExecutionDomainFix::force(int rx, unsigned domain) { |
92 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
93 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
94 | if (DomainValue *dv = LiveRegs[rx]) { |
95 | if (dv->isCollapsed()) |
96 | dv->addDomain(domain); |
97 | else if (dv->hasDomain(domain)) |
98 | collapse(dv, domain); |
99 | else { |
100 | // This is an incompatible open DomainValue. Collapse it to whatever and |
101 | // force the new value into domain. This costs a domain crossing. |
102 | collapse(dv, domain: dv->getFirstDomain()); |
103 | assert(LiveRegs[rx] && "Not live after collapse?" ); |
104 | LiveRegs[rx]->addDomain(domain); |
105 | } |
106 | } else { |
107 | // Set up basic collapsed DomainValue. |
108 | setLiveReg(rx, dv: alloc(domain)); |
109 | } |
110 | } |
111 | |
112 | void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { |
113 | assert(dv->hasDomain(domain) && "Cannot collapse" ); |
114 | |
115 | // Collapse all the instructions. |
116 | while (!dv->Instrs.empty()) |
117 | TII->setExecutionDomain(MI&: *dv->Instrs.pop_back_val(), Domain: domain); |
118 | dv->setSingleDomain(domain); |
119 | |
120 | // If there are multiple users, give them new, unique DomainValues. |
121 | if (!LiveRegs.empty() && dv->Refs > 1) |
122 | for (unsigned rx = 0; rx != NumRegs; ++rx) |
123 | if (LiveRegs[rx] == dv) |
124 | setLiveReg(rx, dv: alloc(domain)); |
125 | } |
126 | |
127 | bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { |
128 | assert(!A->isCollapsed() && "Cannot merge into collapsed" ); |
129 | assert(!B->isCollapsed() && "Cannot merge from collapsed" ); |
130 | if (A == B) |
131 | return true; |
132 | // Restrict to the domains that A and B have in common. |
133 | unsigned common = A->getCommonDomains(mask: B->AvailableDomains); |
134 | if (!common) |
135 | return false; |
136 | A->AvailableDomains = common; |
137 | A->Instrs.append(in_start: B->Instrs.begin(), in_end: B->Instrs.end()); |
138 | |
139 | // Clear the old DomainValue so we won't try to swizzle instructions twice. |
140 | B->clear(); |
141 | // All uses of B are referred to A. |
142 | B->Next = retain(DV: A); |
143 | |
144 | for (unsigned rx = 0; rx != NumRegs; ++rx) { |
145 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
146 | if (LiveRegs[rx] == B) |
147 | setLiveReg(rx, dv: A); |
148 | } |
149 | return true; |
150 | } |
151 | |
152 | void ExecutionDomainFix::enterBasicBlock( |
153 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
154 | |
155 | MachineBasicBlock *MBB = TraversedMBB.MBB; |
156 | |
157 | // Set up LiveRegs to represent registers entering MBB. |
158 | // Set default domain values to 'no domain' (nullptr) |
159 | if (LiveRegs.empty()) |
160 | LiveRegs.assign(n: NumRegs, val: nullptr); |
161 | |
162 | // This is the entry block. |
163 | if (MBB->pred_empty()) { |
164 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n" ); |
165 | return; |
166 | } |
167 | |
168 | // Try to coalesce live-out registers from predecessors. |
169 | for (MachineBasicBlock *pred : MBB->predecessors()) { |
170 | assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && |
171 | "Should have pre-allocated MBBInfos for all MBBs" ); |
172 | LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; |
173 | // Incoming is null if this is a backedge from a BB |
174 | // we haven't processed yet |
175 | if (Incoming.empty()) |
176 | continue; |
177 | |
178 | for (unsigned rx = 0; rx != NumRegs; ++rx) { |
179 | DomainValue *pdv = resolve(DVRef&: Incoming[rx]); |
180 | if (!pdv) |
181 | continue; |
182 | if (!LiveRegs[rx]) { |
183 | setLiveReg(rx, dv: pdv); |
184 | continue; |
185 | } |
186 | |
187 | // We have a live DomainValue from more than one predecessor. |
188 | if (LiveRegs[rx]->isCollapsed()) { |
189 | // We are already collapsed, but predecessor is not. Force it. |
190 | unsigned Domain = LiveRegs[rx]->getFirstDomain(); |
191 | if (!pdv->isCollapsed() && pdv->hasDomain(domain: Domain)) |
192 | collapse(dv: pdv, domain: Domain); |
193 | continue; |
194 | } |
195 | |
196 | // Currently open, merge in predecessor. |
197 | if (!pdv->isCollapsed()) |
198 | merge(A: LiveRegs[rx], B: pdv); |
199 | else |
200 | force(rx, domain: pdv->getFirstDomain()); |
201 | } |
202 | } |
203 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) |
204 | << (!TraversedMBB.IsDone ? ": incomplete\n" |
205 | : ": all preds known\n" )); |
206 | } |
207 | |
208 | void ExecutionDomainFix::leaveBasicBlock( |
209 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
210 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
211 | unsigned MBBNumber = TraversedMBB.MBB->getNumber(); |
212 | assert(MBBNumber < MBBOutRegsInfos.size() && |
213 | "Unexpected basic block number." ); |
214 | // Save register clearances at end of MBB - used by enterBasicBlock(). |
215 | for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { |
216 | release(DV: OldLiveReg); |
217 | } |
218 | MBBOutRegsInfos[MBBNumber] = LiveRegs; |
219 | LiveRegs.clear(); |
220 | } |
221 | |
222 | bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { |
223 | // Update instructions with explicit execution domains. |
224 | std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI: *MI); |
225 | if (DomP.first) { |
226 | if (DomP.second) |
227 | visitSoftInstr(MI, mask: DomP.second); |
228 | else |
229 | visitHardInstr(MI, domain: DomP.first); |
230 | } |
231 | |
232 | return !DomP.first; |
233 | } |
234 | |
235 | void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { |
236 | assert(!MI->isDebugInstr() && "Won't process debug values" ); |
237 | const MCInstrDesc &MCID = MI->getDesc(); |
238 | for (unsigned i = 0, |
239 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); |
240 | i != e; ++i) { |
241 | MachineOperand &MO = MI->getOperand(i); |
242 | if (!MO.isReg()) |
243 | continue; |
244 | if (MO.isUse()) |
245 | continue; |
246 | for (int rx : regIndices(Reg: MO.getReg())) { |
247 | // This instruction explicitly defines rx. |
248 | LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); |
249 | |
250 | // Kill off domains redefined by generic instructions. |
251 | if (Kill) |
252 | kill(rx); |
253 | } |
254 | } |
255 | } |
256 | |
257 | void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { |
258 | // Collapse all uses. |
259 | for (unsigned i = mi->getDesc().getNumDefs(), |
260 | e = mi->getDesc().getNumOperands(); |
261 | i != e; ++i) { |
262 | MachineOperand &mo = mi->getOperand(i); |
263 | if (!mo.isReg()) |
264 | continue; |
265 | for (int rx : regIndices(Reg: mo.getReg())) { |
266 | force(rx, domain); |
267 | } |
268 | } |
269 | |
270 | // Kill all defs and force them. |
271 | for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { |
272 | MachineOperand &mo = mi->getOperand(i); |
273 | if (!mo.isReg()) |
274 | continue; |
275 | for (int rx : regIndices(Reg: mo.getReg())) { |
276 | kill(rx); |
277 | force(rx, domain); |
278 | } |
279 | } |
280 | } |
281 | |
282 | void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { |
283 | // Bitmask of available domains for this instruction after taking collapsed |
284 | // operands into account. |
285 | unsigned available = mask; |
286 | |
287 | // Scan the explicit use operands for incoming domains. |
288 | SmallVector<int, 4> used; |
289 | if (!LiveRegs.empty()) |
290 | for (unsigned i = mi->getDesc().getNumDefs(), |
291 | e = mi->getDesc().getNumOperands(); |
292 | i != e; ++i) { |
293 | MachineOperand &mo = mi->getOperand(i); |
294 | if (!mo.isReg()) |
295 | continue; |
296 | for (int rx : regIndices(Reg: mo.getReg())) { |
297 | DomainValue *dv = LiveRegs[rx]; |
298 | if (dv == nullptr) |
299 | continue; |
300 | // Bitmask of domains that dv and available have in common. |
301 | unsigned common = dv->getCommonDomains(mask: available); |
302 | // Is it possible to use this collapsed register for free? |
303 | if (dv->isCollapsed()) { |
304 | // Restrict available domains to the ones in common with the operand. |
305 | // If there are no common domains, we must pay the cross-domain |
306 | // penalty for this operand. |
307 | if (common) |
308 | available = common; |
309 | } else if (common) |
310 | // Open DomainValue is compatible, save it for merging. |
311 | used.push_back(Elt: rx); |
312 | else |
313 | // Open DomainValue is not compatible with instruction. It is useless |
314 | // now. |
315 | kill(rx); |
316 | } |
317 | } |
318 | |
319 | // If the collapsed operands force a single domain, propagate the collapse. |
320 | if (isPowerOf2_32(Value: available)) { |
321 | unsigned domain = llvm::countr_zero(Val: available); |
322 | TII->setExecutionDomain(MI&: *mi, Domain: domain); |
323 | visitHardInstr(mi, domain); |
324 | return; |
325 | } |
326 | |
327 | // Kill off any remaining uses that don't match available, and build a list of |
328 | // incoming DomainValues that we want to merge. |
329 | SmallVector<int, 4> Regs; |
330 | for (int rx : used) { |
331 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
332 | DomainValue *&LR = LiveRegs[rx]; |
333 | // This useless DomainValue could have been missed above. |
334 | if (!LR->getCommonDomains(mask: available)) { |
335 | kill(rx); |
336 | continue; |
337 | } |
338 | // Sorted insertion. |
339 | // Enables giving priority to the latest domains during merging. |
340 | const int Def = RDA->getReachingDef(MI: mi, PhysReg: RC->getRegister(i: rx)); |
341 | auto I = partition_point(Range&: Regs, P: [&](int I) { |
342 | return RDA->getReachingDef(MI: mi, PhysReg: RC->getRegister(i: I)) <= Def; |
343 | }); |
344 | Regs.insert(I, Elt: rx); |
345 | } |
346 | |
347 | // doms are now sorted in order of appearance. Try to merge them all, giving |
348 | // priority to the latest ones. |
349 | DomainValue *dv = nullptr; |
350 | while (!Regs.empty()) { |
351 | if (!dv) { |
352 | dv = LiveRegs[Regs.pop_back_val()]; |
353 | // Force the first dv to match the current instruction. |
354 | dv->AvailableDomains = dv->getCommonDomains(mask: available); |
355 | assert(dv->AvailableDomains && "Domain should have been filtered" ); |
356 | continue; |
357 | } |
358 | |
359 | DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; |
360 | // Skip already merged values. |
361 | if (Latest == dv || Latest->Next) |
362 | continue; |
363 | if (merge(A: dv, B: Latest)) |
364 | continue; |
365 | |
366 | // If latest didn't merge, it is useless now. Kill all registers using it. |
367 | for (int i : used) { |
368 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
369 | if (LiveRegs[i] == Latest) |
370 | kill(rx: i); |
371 | } |
372 | } |
373 | |
374 | // dv is the DomainValue we are going to use for this instruction. |
375 | if (!dv) { |
376 | dv = alloc(); |
377 | dv->AvailableDomains = available; |
378 | } |
379 | dv->Instrs.push_back(Elt: mi); |
380 | |
381 | // Finally set all defs and non-collapsed uses to dv. We must iterate through |
382 | // all the operators, including imp-def ones. |
383 | for (const MachineOperand &mo : mi->operands()) { |
384 | if (!mo.isReg()) |
385 | continue; |
386 | for (int rx : regIndices(Reg: mo.getReg())) { |
387 | if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { |
388 | kill(rx); |
389 | setLiveReg(rx, dv); |
390 | } |
391 | } |
392 | } |
393 | } |
394 | |
395 | void ExecutionDomainFix::processBasicBlock( |
396 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
397 | enterBasicBlock(TraversedMBB); |
398 | // If this block is not done, it makes little sense to make any decisions |
399 | // based on clearance information. We need to make a second pass anyway, |
400 | // and by then we'll have better information, so we can avoid doing the work |
401 | // to try and break dependencies now. |
402 | for (MachineInstr &MI : *TraversedMBB.MBB) { |
403 | if (!MI.isDebugInstr()) { |
404 | bool Kill = false; |
405 | if (TraversedMBB.PrimaryPass) |
406 | Kill = visitInstr(MI: &MI); |
407 | processDefs(MI: &MI, Kill); |
408 | } |
409 | } |
410 | leaveBasicBlock(TraversedMBB); |
411 | } |
412 | |
413 | bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { |
414 | if (skipFunction(F: mf.getFunction())) |
415 | return false; |
416 | MF = &mf; |
417 | TII = MF->getSubtarget().getInstrInfo(); |
418 | TRI = MF->getSubtarget().getRegisterInfo(); |
419 | LiveRegs.clear(); |
420 | assert(NumRegs == RC->getNumRegs() && "Bad regclass" ); |
421 | |
422 | LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " |
423 | << TRI->getRegClassName(RC) << " **********\n" ); |
424 | |
425 | // If no relevant registers are used in the function, we can skip it |
426 | // completely. |
427 | bool anyregs = false; |
428 | const MachineRegisterInfo &MRI = mf.getRegInfo(); |
429 | for (unsigned Reg : *RC) { |
430 | if (MRI.isPhysRegUsed(PhysReg: Reg)) { |
431 | anyregs = true; |
432 | break; |
433 | } |
434 | } |
435 | if (!anyregs) |
436 | return false; |
437 | |
438 | RDA = &getAnalysis<ReachingDefAnalysis>(); |
439 | |
440 | // Initialize the AliasMap on the first use. |
441 | if (AliasMap.empty()) { |
442 | // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and |
443 | // therefore the LiveRegs array. |
444 | AliasMap.resize(new_size: TRI->getNumRegs()); |
445 | for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) |
446 | for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); |
447 | ++AI) |
448 | AliasMap[*AI].push_back(Elt: i); |
449 | } |
450 | |
451 | // Initialize the MBBOutRegsInfos |
452 | MBBOutRegsInfos.resize(N: mf.getNumBlockIDs()); |
453 | |
454 | // Traverse the basic blocks. |
455 | LoopTraversal Traversal; |
456 | LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(MF&: mf); |
457 | for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder) |
458 | processBasicBlock(TraversedMBB); |
459 | |
460 | for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos) |
461 | for (DomainValue *OutLiveReg : OutLiveRegs) |
462 | if (OutLiveReg) |
463 | release(DV: OutLiveReg); |
464 | |
465 | MBBOutRegsInfos.clear(); |
466 | Avail.clear(); |
467 | Allocator.DestroyAll(); |
468 | |
469 | return false; |
470 | } |
471 | |