| 1 | //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/CodeGen/ExecutionDomainFix.h" |
| 10 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 11 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 12 | #include "llvm/Support/Debug.h" |
| 13 | |
| 14 | using namespace llvm; |
| 15 | |
| 16 | #define DEBUG_TYPE "execution-deps-fix" |
| 17 | |
| 18 | iterator_range<SmallVectorImpl<int>::const_iterator> |
| 19 | ExecutionDomainFix::regIndices(MCRegister Reg) const { |
| 20 | assert(Reg < AliasMap.size() && "Invalid register" ); |
| 21 | const auto &Entry = AliasMap[Reg]; |
| 22 | return make_range(x: Entry.begin(), y: Entry.end()); |
| 23 | } |
| 24 | |
| 25 | DomainValue *ExecutionDomainFix::alloc(int domain) { |
| 26 | DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue |
| 27 | : Avail.pop_back_val(); |
| 28 | if (domain >= 0) |
| 29 | dv->addDomain(domain); |
| 30 | assert(dv->Refs == 0 && "Reference count wasn't cleared" ); |
| 31 | assert(!dv->Next && "Chained DomainValue shouldn't have been recycled" ); |
| 32 | return dv; |
| 33 | } |
| 34 | |
| 35 | void ExecutionDomainFix::release(DomainValue *DV) { |
| 36 | while (DV) { |
| 37 | assert(DV->Refs && "Bad DomainValue" ); |
| 38 | if (--DV->Refs) |
| 39 | return; |
| 40 | |
| 41 | // There are no more DV references. Collapse any contained instructions. |
| 42 | if (DV->AvailableDomains && !DV->isCollapsed()) |
| 43 | collapse(dv: DV, domain: DV->getFirstDomain()); |
| 44 | |
| 45 | DomainValue *Next = DV->Next; |
| 46 | DV->clear(); |
| 47 | Avail.push_back(Elt: DV); |
| 48 | // Also release the next DomainValue in the chain. |
| 49 | DV = Next; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { |
| 54 | DomainValue *DV = DVRef; |
| 55 | if (!DV || !DV->Next) |
| 56 | return DV; |
| 57 | |
| 58 | // DV has a chain. Find the end. |
| 59 | do |
| 60 | DV = DV->Next; |
| 61 | while (DV->Next); |
| 62 | |
| 63 | // Update DVRef to point to DV. |
| 64 | retain(DV); |
| 65 | release(DV: DVRef); |
| 66 | DVRef = DV; |
| 67 | return DV; |
| 68 | } |
| 69 | |
| 70 | void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { |
| 71 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
| 72 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
| 73 | |
| 74 | if (LiveRegs[rx] == dv) |
| 75 | return; |
| 76 | if (LiveRegs[rx]) |
| 77 | release(DV: LiveRegs[rx]); |
| 78 | LiveRegs[rx] = retain(DV: dv); |
| 79 | } |
| 80 | |
| 81 | void ExecutionDomainFix::kill(int rx) { |
| 82 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
| 83 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
| 84 | if (!LiveRegs[rx]) |
| 85 | return; |
| 86 | |
| 87 | release(DV: LiveRegs[rx]); |
| 88 | LiveRegs[rx] = nullptr; |
| 89 | } |
| 90 | |
| 91 | void ExecutionDomainFix::force(int rx, unsigned domain) { |
| 92 | assert(unsigned(rx) < NumRegs && "Invalid index" ); |
| 93 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
| 94 | if (DomainValue *dv = LiveRegs[rx]) { |
| 95 | if (dv->isCollapsed()) |
| 96 | dv->addDomain(domain); |
| 97 | else if (dv->hasDomain(domain)) |
| 98 | collapse(dv, domain); |
| 99 | else { |
| 100 | // This is an incompatible open DomainValue. Collapse it to whatever and |
| 101 | // force the new value into domain. This costs a domain crossing. |
| 102 | collapse(dv, domain: dv->getFirstDomain()); |
| 103 | assert(LiveRegs[rx] && "Not live after collapse?" ); |
| 104 | LiveRegs[rx]->addDomain(domain); |
| 105 | } |
| 106 | } else { |
| 107 | // Set up basic collapsed DomainValue. |
| 108 | setLiveReg(rx, dv: alloc(domain)); |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { |
| 113 | assert(dv->hasDomain(domain) && "Cannot collapse" ); |
| 114 | |
| 115 | // Collapse all the instructions. |
| 116 | while (!dv->Instrs.empty()) |
| 117 | TII->setExecutionDomain(MI&: *dv->Instrs.pop_back_val(), Domain: domain); |
| 118 | dv->setSingleDomain(domain); |
| 119 | |
| 120 | // If there are multiple users, give them new, unique DomainValues. |
| 121 | if (!LiveRegs.empty() && dv->Refs > 1) |
| 122 | for (unsigned rx = 0; rx != NumRegs; ++rx) |
| 123 | if (LiveRegs[rx] == dv) |
| 124 | setLiveReg(rx, dv: alloc(domain)); |
| 125 | } |
| 126 | |
| 127 | bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { |
| 128 | assert(!A->isCollapsed() && "Cannot merge into collapsed" ); |
| 129 | assert(!B->isCollapsed() && "Cannot merge from collapsed" ); |
| 130 | if (A == B) |
| 131 | return true; |
| 132 | // Restrict to the domains that A and B have in common. |
| 133 | unsigned common = A->getCommonDomains(mask: B->AvailableDomains); |
| 134 | if (!common) |
| 135 | return false; |
| 136 | A->AvailableDomains = common; |
| 137 | A->Instrs.append(in_start: B->Instrs.begin(), in_end: B->Instrs.end()); |
| 138 | |
| 139 | // Clear the old DomainValue so we won't try to swizzle instructions twice. |
| 140 | B->clear(); |
| 141 | // All uses of B are referred to A. |
| 142 | B->Next = retain(DV: A); |
| 143 | |
| 144 | for (unsigned rx = 0; rx != NumRegs; ++rx) { |
| 145 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
| 146 | if (LiveRegs[rx] == B) |
| 147 | setLiveReg(rx, dv: A); |
| 148 | } |
| 149 | return true; |
| 150 | } |
| 151 | |
| 152 | void ExecutionDomainFix::enterBasicBlock( |
| 153 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
| 154 | |
| 155 | MachineBasicBlock *MBB = TraversedMBB.MBB; |
| 156 | |
| 157 | // Set up LiveRegs to represent registers entering MBB. |
| 158 | // Set default domain values to 'no domain' (nullptr) |
| 159 | if (LiveRegs.empty()) |
| 160 | LiveRegs.assign(n: NumRegs, val: nullptr); |
| 161 | |
| 162 | // This is the entry block. |
| 163 | if (MBB->pred_empty()) { |
| 164 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n" ); |
| 165 | return; |
| 166 | } |
| 167 | |
| 168 | // Try to coalesce live-out registers from predecessors. |
| 169 | for (MachineBasicBlock *pred : MBB->predecessors()) { |
| 170 | assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && |
| 171 | "Should have pre-allocated MBBInfos for all MBBs" ); |
| 172 | LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; |
| 173 | // Incoming is null if this is a backedge from a BB |
| 174 | // we haven't processed yet |
| 175 | if (Incoming.empty()) |
| 176 | continue; |
| 177 | |
| 178 | for (unsigned rx = 0; rx != NumRegs; ++rx) { |
| 179 | DomainValue *pdv = resolve(DVRef&: Incoming[rx]); |
| 180 | if (!pdv) |
| 181 | continue; |
| 182 | if (!LiveRegs[rx]) { |
| 183 | setLiveReg(rx, dv: pdv); |
| 184 | continue; |
| 185 | } |
| 186 | |
| 187 | // We have a live DomainValue from more than one predecessor. |
| 188 | if (LiveRegs[rx]->isCollapsed()) { |
| 189 | // We are already collapsed, but predecessor is not. Force it. |
| 190 | unsigned Domain = LiveRegs[rx]->getFirstDomain(); |
| 191 | if (!pdv->isCollapsed() && pdv->hasDomain(domain: Domain)) |
| 192 | collapse(dv: pdv, domain: Domain); |
| 193 | continue; |
| 194 | } |
| 195 | |
| 196 | // Currently open, merge in predecessor. |
| 197 | if (!pdv->isCollapsed()) |
| 198 | merge(A: LiveRegs[rx], B: pdv); |
| 199 | else |
| 200 | force(rx, domain: pdv->getFirstDomain()); |
| 201 | } |
| 202 | } |
| 203 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) |
| 204 | << (!TraversedMBB.IsDone ? ": incomplete\n" |
| 205 | : ": all preds known\n" )); |
| 206 | } |
| 207 | |
| 208 | void ExecutionDomainFix::leaveBasicBlock( |
| 209 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
| 210 | assert(!LiveRegs.empty() && "Must enter basic block first." ); |
| 211 | unsigned MBBNumber = TraversedMBB.MBB->getNumber(); |
| 212 | assert(MBBNumber < MBBOutRegsInfos.size() && |
| 213 | "Unexpected basic block number." ); |
| 214 | // Save register clearances at end of MBB - used by enterBasicBlock(). |
| 215 | for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { |
| 216 | release(DV: OldLiveReg); |
| 217 | } |
| 218 | MBBOutRegsInfos[MBBNumber] = LiveRegs; |
| 219 | LiveRegs.clear(); |
| 220 | } |
| 221 | |
| 222 | bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { |
| 223 | // Update instructions with explicit execution domains. |
| 224 | std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI: *MI); |
| 225 | if (DomP.first) { |
| 226 | if (DomP.second) |
| 227 | visitSoftInstr(MI, mask: DomP.second); |
| 228 | else |
| 229 | visitHardInstr(MI, domain: DomP.first); |
| 230 | } |
| 231 | |
| 232 | return !DomP.first; |
| 233 | } |
| 234 | |
| 235 | void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { |
| 236 | assert(!MI->isDebugInstr() && "Won't process debug values" ); |
| 237 | const MCInstrDesc &MCID = MI->getDesc(); |
| 238 | for (unsigned i = 0, |
| 239 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); |
| 240 | i != e; ++i) { |
| 241 | MachineOperand &MO = MI->getOperand(i); |
| 242 | if (!MO.isReg()) |
| 243 | continue; |
| 244 | if (MO.isUse()) |
| 245 | continue; |
| 246 | for (int rx : regIndices(Reg: MO.getReg())) { |
| 247 | // This instruction explicitly defines rx. |
| 248 | LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); |
| 249 | |
| 250 | // Kill off domains redefined by generic instructions. |
| 251 | if (Kill) |
| 252 | kill(rx); |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { |
| 258 | // Collapse all uses. |
| 259 | for (unsigned i = mi->getDesc().getNumDefs(), |
| 260 | e = mi->getDesc().getNumOperands(); |
| 261 | i != e; ++i) { |
| 262 | MachineOperand &mo = mi->getOperand(i); |
| 263 | if (!mo.isReg()) |
| 264 | continue; |
| 265 | for (int rx : regIndices(Reg: mo.getReg())) { |
| 266 | force(rx, domain); |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | // Kill all defs and force them. |
| 271 | for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { |
| 272 | MachineOperand &mo = mi->getOperand(i); |
| 273 | if (!mo.isReg()) |
| 274 | continue; |
| 275 | for (int rx : regIndices(Reg: mo.getReg())) { |
| 276 | kill(rx); |
| 277 | force(rx, domain); |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { |
| 283 | // Bitmask of available domains for this instruction after taking collapsed |
| 284 | // operands into account. |
| 285 | unsigned available = mask; |
| 286 | |
| 287 | // Scan the explicit use operands for incoming domains. |
| 288 | SmallVector<int, 4> used; |
| 289 | if (!LiveRegs.empty()) |
| 290 | for (unsigned i = mi->getDesc().getNumDefs(), |
| 291 | e = mi->getDesc().getNumOperands(); |
| 292 | i != e; ++i) { |
| 293 | MachineOperand &mo = mi->getOperand(i); |
| 294 | if (!mo.isReg()) |
| 295 | continue; |
| 296 | for (int rx : regIndices(Reg: mo.getReg())) { |
| 297 | DomainValue *dv = LiveRegs[rx]; |
| 298 | if (dv == nullptr) |
| 299 | continue; |
| 300 | // Bitmask of domains that dv and available have in common. |
| 301 | unsigned common = dv->getCommonDomains(mask: available); |
| 302 | // Is it possible to use this collapsed register for free? |
| 303 | if (dv->isCollapsed()) { |
| 304 | // Restrict available domains to the ones in common with the operand. |
| 305 | // If there are no common domains, we must pay the cross-domain |
| 306 | // penalty for this operand. |
| 307 | if (common) |
| 308 | available = common; |
| 309 | } else if (common) |
| 310 | // Open DomainValue is compatible, save it for merging. |
| 311 | used.push_back(Elt: rx); |
| 312 | else |
| 313 | // Open DomainValue is not compatible with instruction. It is useless |
| 314 | // now. |
| 315 | kill(rx); |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | // If the collapsed operands force a single domain, propagate the collapse. |
| 320 | if (isPowerOf2_32(Value: available)) { |
| 321 | unsigned domain = llvm::countr_zero(Val: available); |
| 322 | TII->setExecutionDomain(MI&: *mi, Domain: domain); |
| 323 | visitHardInstr(mi, domain); |
| 324 | return; |
| 325 | } |
| 326 | |
| 327 | // Kill off any remaining uses that don't match available, and build a list of |
| 328 | // incoming DomainValues that we want to merge. |
| 329 | SmallVector<int, 4> Regs; |
| 330 | for (int rx : used) { |
| 331 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
| 332 | DomainValue *&LR = LiveRegs[rx]; |
| 333 | // This useless DomainValue could have been missed above. |
| 334 | if (!LR->getCommonDomains(mask: available)) { |
| 335 | kill(rx); |
| 336 | continue; |
| 337 | } |
| 338 | // Sorted insertion. |
| 339 | // Enables giving priority to the latest domains during merging. |
| 340 | const int Def = RDA->getReachingDef(MI: mi, Reg: RC->getRegister(i: rx)); |
| 341 | auto I = partition_point(Range&: Regs, P: [&](int I) { |
| 342 | return RDA->getReachingDef(MI: mi, Reg: RC->getRegister(i: I)) <= Def; |
| 343 | }); |
| 344 | Regs.insert(I, Elt: rx); |
| 345 | } |
| 346 | |
| 347 | // doms are now sorted in order of appearance. Try to merge them all, giving |
| 348 | // priority to the latest ones. |
| 349 | DomainValue *dv = nullptr; |
| 350 | while (!Regs.empty()) { |
| 351 | if (!dv) { |
| 352 | dv = LiveRegs[Regs.pop_back_val()]; |
| 353 | // Force the first dv to match the current instruction. |
| 354 | dv->AvailableDomains = dv->getCommonDomains(mask: available); |
| 355 | assert(dv->AvailableDomains && "Domain should have been filtered" ); |
| 356 | continue; |
| 357 | } |
| 358 | |
| 359 | DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; |
| 360 | // Skip already merged values. |
| 361 | if (Latest == dv || Latest->Next) |
| 362 | continue; |
| 363 | if (merge(A: dv, B: Latest)) |
| 364 | continue; |
| 365 | |
| 366 | // If latest didn't merge, it is useless now. Kill all registers using it. |
| 367 | for (int i : used) { |
| 368 | assert(!LiveRegs.empty() && "no space allocated for live registers" ); |
| 369 | if (LiveRegs[i] == Latest) |
| 370 | kill(rx: i); |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | // dv is the DomainValue we are going to use for this instruction. |
| 375 | if (!dv) { |
| 376 | dv = alloc(); |
| 377 | dv->AvailableDomains = available; |
| 378 | } |
| 379 | dv->Instrs.push_back(Elt: mi); |
| 380 | |
| 381 | // Finally set all defs and non-collapsed uses to dv. We must iterate through |
| 382 | // all the operators, including imp-def ones. |
| 383 | for (const MachineOperand &mo : mi->operands()) { |
| 384 | if (!mo.isReg()) |
| 385 | continue; |
| 386 | for (int rx : regIndices(Reg: mo.getReg())) { |
| 387 | if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { |
| 388 | kill(rx); |
| 389 | setLiveReg(rx, dv); |
| 390 | } |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | void ExecutionDomainFix::processBasicBlock( |
| 396 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { |
| 397 | enterBasicBlock(TraversedMBB); |
| 398 | // If this block is not done, it makes little sense to make any decisions |
| 399 | // based on clearance information. We need to make a second pass anyway, |
| 400 | // and by then we'll have better information, so we can avoid doing the work |
| 401 | // to try and break dependencies now. |
| 402 | for (MachineInstr &MI : *TraversedMBB.MBB) { |
| 403 | if (!MI.isDebugInstr()) { |
| 404 | bool Kill = false; |
| 405 | if (TraversedMBB.PrimaryPass) |
| 406 | Kill = visitInstr(MI: &MI); |
| 407 | processDefs(MI: &MI, Kill); |
| 408 | } |
| 409 | } |
| 410 | leaveBasicBlock(TraversedMBB); |
| 411 | } |
| 412 | |
| 413 | bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { |
| 414 | if (skipFunction(F: mf.getFunction())) |
| 415 | return false; |
| 416 | MF = &mf; |
| 417 | TII = MF->getSubtarget().getInstrInfo(); |
| 418 | TRI = MF->getSubtarget().getRegisterInfo(); |
| 419 | LiveRegs.clear(); |
| 420 | assert(NumRegs == RC->getNumRegs() && "Bad regclass" ); |
| 421 | |
| 422 | LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " |
| 423 | << TRI->getRegClassName(RC) << " **********\n" ); |
| 424 | |
| 425 | // If no relevant registers are used in the function, we can skip it |
| 426 | // completely. |
| 427 | bool anyregs = false; |
| 428 | const MachineRegisterInfo &MRI = mf.getRegInfo(); |
| 429 | for (unsigned Reg : *RC) { |
| 430 | if (MRI.isPhysRegUsed(PhysReg: Reg)) { |
| 431 | anyregs = true; |
| 432 | break; |
| 433 | } |
| 434 | } |
| 435 | if (!anyregs) |
| 436 | return false; |
| 437 | |
| 438 | RDA = &getAnalysis<ReachingDefAnalysis>(); |
| 439 | |
| 440 | // Initialize the AliasMap on the first use. |
| 441 | if (AliasMap.empty()) { |
| 442 | // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and |
| 443 | // therefore the LiveRegs array. |
| 444 | AliasMap.resize(new_size: TRI->getNumRegs()); |
| 445 | for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) |
| 446 | for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); |
| 447 | ++AI) |
| 448 | AliasMap[(*AI).id()].push_back(Elt: i); |
| 449 | } |
| 450 | |
| 451 | // Initialize the MBBOutRegsInfos |
| 452 | MBBOutRegsInfos.resize(N: mf.getNumBlockIDs()); |
| 453 | |
| 454 | // Traverse the basic blocks. |
| 455 | LoopTraversal Traversal; |
| 456 | LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(MF&: mf); |
| 457 | for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder) |
| 458 | processBasicBlock(TraversedMBB); |
| 459 | |
| 460 | for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos) |
| 461 | for (DomainValue *OutLiveReg : OutLiveRegs) |
| 462 | if (OutLiveReg) |
| 463 | release(DV: OutLiveReg); |
| 464 | |
| 465 | MBBOutRegsInfos.clear(); |
| 466 | Avail.clear(); |
| 467 | Allocator.DestroyAll(); |
| 468 | |
| 469 | return false; |
| 470 | } |
| 471 | |