| 1 | //===--- HexagonAggressiveRDFCopy.cpp -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // RDF-based aggressive copy propagation. |
| 10 | // |
| 11 | // This optimization extends the standard RDF copy propagation with support for |
| 12 | // super-register and sub-register copy propagation. It determines candidates |
| 13 | // for copy propagation by verifying that both the copy instruction and all |
| 14 | // reached uses have the same reaching definitions for the source register(s). |
| 15 | // |
| 16 | // Key differences: |
| 17 | // 1. Super-register handling: Can propagate copies involving super-registers |
| 18 | // and their sub-registers (e.g., double-register pairs on Hexagon). |
| 19 | // 2. Sub-register coverage: Verifies that all sub-registers of the source |
| 20 | // register have consistent reaching definitions before propagating. |
| 21 | // 3. Combine instruction support: Handles A2_combinew instructions that |
| 22 | // combine two 32-bit registers into a 64-bit register pair. |
| 23 | // |
| 24 | // Algorithm: |
| 25 | // 1. Scan all basic blocks in dominator tree order, maintaining a stack of |
| 26 | // reaching definitions for each register. |
| 27 | // 2. For each copy instruction: |
| 28 | // a. Record the copy and its source/destination register mapping. |
| 29 | // b. Find the reaching definition for the source register at the copy. |
| 30 | // c. For each use reached by the copy's destination register: |
| 31 | // - Check if all sub-registers of the source have the same reaching |
| 32 | // definition at both the copy and the use. |
| 33 | // - If yes, mark the use as replaceable. |
| 34 | // 3. Replace all marked uses with the source register of the copy. |
| 35 | // |
| 36 | // Example: |
| 37 | // BB1: |
| 38 | // R1 = ... // Def1 |
| 39 | // D0 = A2_combinew R1, R0 // Copy: D0 = {R1, R0} |
| 40 | // ... = D0 // Use of D0 |
| 41 | // |
| 42 | // If R1 and R0 have the same reaching definitions at both the copy and the |
| 43 | // use, the use of D0 can be replaced with the original source registers. |
| 44 | // D0 is super-register corresponding to R1:0. |
| 45 | // |
| 46 | //===----------------------------------------------------------------------===// |
| 47 | |
| 48 | #include "HexagonAggressiveRDFCopy.h" |
| 49 | #include "llvm/CodeGen/MachineDominators.h" |
| 50 | #include "llvm/CodeGen/MachineInstr.h" |
| 51 | #include "llvm/CodeGen/MachineOperand.h" |
| 52 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 53 | #include "llvm/CodeGen/RDFGraph.h" |
| 54 | #include "llvm/CodeGen/RDFLiveness.h" |
| 55 | #include "llvm/CodeGen/RDFRegisters.h" |
| 56 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 57 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 58 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 59 | #include "llvm/MC/MCRegisterInfo.h" |
| 60 | #include "llvm/Support/CommandLine.h" |
| 61 | #include "llvm/Support/Debug.h" |
| 62 | #include "llvm/Support/ErrorHandling.h" |
| 63 | #include "llvm/Support/raw_ostream.h" |
| 64 | |
| 65 | #include <cassert> |
| 66 | #include <cstdint> |
| 67 | #include <utility> |
| 68 | |
| 69 | using namespace llvm; |
| 70 | using namespace rdf; |
| 71 | |
| 72 | #ifndef NDEBUG |
| 73 | extern cl::opt<unsigned> RDFCpLimit; |
| 74 | static unsigned RDFCpCount = 0; |
| 75 | #endif |
| 76 | |
| 77 | // Record destination and source registers in EqualityMap |
| 78 | // if this is a copy instruction |
| 79 | bool AggressiveCopyPropagation::interpretAsCopy(const MachineInstr *MI, |
| 80 | EqualityMap &EM) { |
| 81 | unsigned Opc = MI->getOpcode(); |
| 82 | switch (Opc) { |
| 83 | case TargetOpcode::COPY: { |
| 84 | const MachineOperand &Dst = MI->getOperand(i: 0); |
| 85 | const MachineOperand &Src = MI->getOperand(i: 1); |
| 86 | RegisterRef DstR = DFG.makeRegRef(Reg: Dst.getReg(), Sub: Dst.getSubReg()); |
| 87 | RegisterRef SrcR = DFG.makeRegRef(Reg: Src.getReg(), Sub: Src.getSubReg()); |
| 88 | assert(Register::isPhysicalRegister(DstR.Id)); |
| 89 | assert(Register::isPhysicalRegister(SrcR.Id)); |
| 90 | if (HRI.isFakeReg(Reg: DstR.Id) || HRI.isFakeReg(Reg: SrcR.Id)) |
| 91 | return false; |
| 92 | if (TRI.getMinimalPhysRegClass(Reg: DstR.Id) != |
| 93 | TRI.getMinimalPhysRegClass(Reg: SrcR.Id)) |
| 94 | return false; |
| 95 | if (!DFG.isTracked(RR: SrcR) || !DFG.isTracked(RR: DstR)) |
| 96 | return false; |
| 97 | EM.insert(x: std::make_pair(x&: DstR, y&: SrcR)); |
| 98 | return true; |
| 99 | } |
| 100 | case TargetOpcode::REG_SEQUENCE: |
| 101 | llvm_unreachable("Unexpected REG_SEQUENCE" ); |
| 102 | } |
| 103 | return false; |
| 104 | } |
| 105 | |
| 106 | // Track instructions determined to be copies along with their uses. |
| 107 | // The register, sub-register copy pairs are given by EqualityMap |
| 108 | // Find the reaching def from DefM stack for source (LHS) registers in each |
| 109 | // copy. ReachedUseToCopyMap stores each reached use (UseNode) of a copy along |
| 110 | // with the copy DefNode and source register |
| 111 | void AggressiveCopyPropagation::recordCopy(NodeAddr<StmtNode *> SA, |
| 112 | EqualityMap &EM) { |
| 113 | if (trace()) |
| 114 | CopyMap.insert(x: std::make_pair(x&: SA.Id, y&: EM)); |
| 115 | |
| 116 | // Find and store reaching def for each source register |
| 117 | // EqualityMap should also contain subregs |
| 118 | for (auto I : EM) { |
| 119 | if (PRI.equal_to(A: I.first, B: I.second)) |
| 120 | continue; |
| 121 | NodeId RDefId = 0; |
| 122 | auto FS = DefM.find(Val: I.second.Id); |
| 123 | if (FS != DefM.end() && !FS->second.empty()) { |
| 124 | auto Def = FS->second.top()->Addr->getRegRef(G: DFG); |
| 125 | // Avoid adding subreg as reaching def for superreg |
| 126 | if (Register::isPhysicalRegister(Reg: Def.Id) && |
| 127 | TRI.isSuperRegister(RegA: Def.Id, RegB: I.second.Id)) |
| 128 | continue; |
| 129 | RDefId = FS->second.top()->Id; |
| 130 | } |
| 131 | RDefMap[I.second][SA.Id] = RDefId; |
| 132 | } |
| 133 | for (NodeAddr<DefNode *> DA : SA.Addr->members_if(P: DFG.IsDef, G: DFG)) { |
| 134 | RegisterRef DR = DA.Addr->getRegRef(G: DFG); |
| 135 | auto FR = EM.find(x: DR); |
| 136 | if (FR == EM.end()) |
| 137 | continue; |
| 138 | // Iterate over DR and its subregisters |
| 139 | // if present in EqualityMap, find its reached uses |
| 140 | for (MCPhysReg SubDReg : TRI.subregs_inclusive(Reg: DR.Id)) { |
| 141 | auto SubDR = DFG.makeRegRef(Reg: SubDReg, Sub: 0); |
| 142 | auto FR = EM.find(x: SubDR); |
| 143 | if (FR == EM.end()) |
| 144 | continue; |
| 145 | RegisterRef SR = FR->second; |
| 146 | // Redundant copy |
| 147 | if (PRI.equal_to(A: SubDR, B: SR)) |
| 148 | continue; |
| 149 | for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) { |
| 150 | auto UA = DFG.addr<UseNode *>(N); |
| 151 | NextN = UA.Addr->getSibling(); |
| 152 | uint16_t F = UA.Addr->getFlags(); |
| 153 | // Skip phi node uses |
| 154 | // Skip shadow uses. When shadow nodes are present, the register has |
| 155 | // multiple reaching defs. |
| 156 | if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed) || |
| 157 | (F & NodeAttrs::Shadow)) |
| 158 | continue; |
| 159 | if (!PRI.equal_to(A: UA.Addr->getRegRef(G: DFG), B: SubDR)) |
| 160 | continue; |
| 161 | MachineOperand &Op = UA.Addr->getOp(); |
| 162 | // Skip operand if def and use of a register happens in same instruction |
| 163 | if (Op.isTied()) |
| 164 | continue; |
| 165 | if (ReachedUseToCopyMap.find(Val: UA.Id) != ReachedUseToCopyMap.end()) |
| 166 | llvm_unreachable("Multiple copy instructions reach this use" ); |
| 167 | ReachedUseToCopyMap.insert( |
| 168 | KV: std::make_pair(x&: UA.Id, y: std::make_pair(x&: DA, y&: SR))); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // Now that we can obtain reaching def for uses from DefM, |
| 175 | // check that reaching defs for source register and subregisters at the use |
| 176 | // instruction, are the same as reaching defs for the copy. Uses that satisfy |
| 177 | // this check can be replaced with the source registers of the copy. |
| 178 | void AggressiveCopyPropagation::recordReplacableUses(NodeAddr<InstrNode *> IA) { |
| 179 | for (NodeAddr<UseNode *> UA : IA.Addr->members_if(P: DFG.IsUse, G: DFG)) { |
| 180 | // Check if any uses of the instruction are reached by a copy |
| 181 | auto CopyUseIt = ReachedUseToCopyMap.find(Val: UA.Id); |
| 182 | if (CopyUseIt == ReachedUseToCopyMap.end()) |
| 183 | continue; |
| 184 | [[maybe_unused]] auto UseReg = UA.Addr->getRegRef(G: DFG); |
| 185 | auto DA = CopyUseIt->second.first; |
| 186 | auto SR = CopyUseIt->second.second; |
| 187 | [[maybe_unused]] auto DefReg = DA.Addr->getRegRef(G: DFG); |
| 188 | assert(PRI.equal_to(DefReg, UseReg)); |
| 189 | NodeAddr<InstrNode *> DefI = DA.Addr->getOwner(G: DFG); |
| 190 | // Aggr of subregs that have same reaching def (at IA) as DefI |
| 191 | RegisterAggr RRs(PRI); |
| 192 | // Registers that need to be added as use nodes in updated IA |
| 193 | SmallVector<RegisterRef, 4> UseRefs; |
| 194 | for (MCPhysReg S : TRI.subregs_inclusive(Reg: SR.Id)) { |
| 195 | auto SRef = DFG.makeRegRef(Reg: S, Sub: 0); |
| 196 | NodeId RDefId = 0; |
| 197 | // If there is no reaching def for SRef at DefI, |
| 198 | // do not check if SRef can be propagated |
| 199 | auto RDefIt = RDefMap.find(x: SRef); |
| 200 | if (RDefIt == RDefMap.end()) |
| 201 | continue; |
| 202 | auto DefIIt = RDefIt->second.find(x: DefI.Id); |
| 203 | if (DefIIt == RDefIt->second.end()) |
| 204 | continue; |
| 205 | // If we already have reaching def for SRef at IA, |
| 206 | // use it instead of searching DefM. |
| 207 | auto IAIt = RDefIt->second.find(x: IA.Id); |
| 208 | if (IAIt != RDefIt->second.end()) { |
| 209 | RDefId = IAIt->second; |
| 210 | } else { |
| 211 | auto F = DefM.find(Val: S); |
| 212 | if (F != DefM.end() && !F->second.empty()) { |
| 213 | auto Def = F->second.top()->Addr->getRegRef(G: DFG); |
| 214 | // Avoid adding subreg as reaching def for superreg |
| 215 | if (Register::isPhysicalRegister(Reg: Def.Id) && |
| 216 | TRI.isSuperRegister(RegA: Def.Id, RegB: S)) |
| 217 | continue; |
| 218 | RDefId = F->second.top()->Id; |
| 219 | } |
| 220 | } |
| 221 | // If reaching def for SRef at DefI is not same as IA, |
| 222 | // SRef can not propagated to IA. |
| 223 | if (DefIIt->second != RDefId) |
| 224 | continue; |
| 225 | RRs.insert(RR: SRef); |
| 226 | UseRefs.push_back(Elt: SRef); |
| 227 | RDefIt->second[IA.Id] = RDefId; |
| 228 | // If registers or sub-registers that can be propagated cover SR, |
| 229 | // the use node is a candidate for copy propagation |
| 230 | if (RRs.hasCoverOf(RR: SR)) |
| 231 | break; |
| 232 | } |
| 233 | // Use node can be replaced with new use nodes created from UseRefs |
| 234 | if (RRs.hasCoverOf(RR: SR)) |
| 235 | ReplacableUses.push_back(Elt: std::make_pair(x&: UA, y&: UseRefs)); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | // Recursively process all children in the dominator tree. |
| 240 | // Find copy instructions and reached uses that are candidates for propagation |
| 241 | void AggressiveCopyPropagation::scanBlock(MachineBasicBlock *B) { |
| 242 | NodeAddr<BlockNode *> BA = DFG.findBlock(BB: B); |
| 243 | DFG.markBlock(B: BA.Id, DefM); |
| 244 | |
| 245 | for (NodeAddr<InstrNode *> IA : BA.Addr->members(G: DFG)) { |
| 246 | if (DFG.IsCode<NodeAttrs::Stmt>(BA: IA)) { |
| 247 | NodeAddr<StmtNode *> SA = IA; |
| 248 | EqualityMap EM(RegisterRefLess(DFG.getPRI())); |
| 249 | if (interpretAsCopy(MI: SA.Addr->getCode(), EM)) |
| 250 | recordCopy(SA, EM); |
| 251 | recordReplacableUses(IA); |
| 252 | } |
| 253 | DFG.pushAllDefs(IA, DM&: DefM); |
| 254 | } |
| 255 | |
| 256 | MachineDomTreeNode *N = MDT.getNode(BB: B); |
| 257 | for (auto *I : *N) |
| 258 | scanBlock(B: I->getBlock()); |
| 259 | |
| 260 | DFG.releaseBlock(B: BA.Id, DefM); |
| 261 | return; |
| 262 | } |
| 263 | |
| 264 | bool AggressiveCopyPropagation::run() { |
| 265 | scanBlock(B: MDT.getRootNode()->getBlock()); |
| 266 | |
| 267 | if (trace()) { |
| 268 | dbgs() << "Copies:\n" ; |
| 269 | for (auto &C : CopyMap) { |
| 270 | dbgs() << "Instr: " << *DFG.addr<StmtNode *>(N: C.first).Addr->getCode(); |
| 271 | dbgs() << " eq: {" ; |
| 272 | for (auto J : C.second) |
| 273 | dbgs() << ' ' << Print<RegisterRef>(J.first, DFG) << '=' |
| 274 | << Print<RegisterRef>(J.second, DFG); |
| 275 | dbgs() << " }\n" ; |
| 276 | } |
| 277 | dbgs() << "\nCopy def-use:\n" ; |
| 278 | for (auto &U : ReachedUseToCopyMap) { |
| 279 | auto DA = U.second.first; |
| 280 | auto DefI = DA.Addr->getOwner(G: DFG); |
| 281 | auto UseI = DFG.addr<UseNode *>(N: U.first).Addr->getOwner(G: DFG); |
| 282 | dbgs() << "Copy def: " << *DFG.addr<StmtNode *>(N: DefI.Id).Addr->getCode(); |
| 283 | dbgs() << "Copy use: " << *DFG.addr<StmtNode *>(N: UseI.Id).Addr->getCode(); |
| 284 | } |
| 285 | dbgs() << "\nRDef map:\n" ; |
| 286 | for (auto R : RDefMap) { |
| 287 | dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {" ; |
| 288 | for (auto &M : R.second) |
| 289 | dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':' |
| 290 | << Print<NodeId>(M.second, DFG); |
| 291 | dbgs() << " }\n" ; |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | bool Changed = false; |
| 296 | #ifndef NDEBUG |
| 297 | bool HasLimit = RDFCpLimit.getNumOccurrences() > 0; |
| 298 | #endif |
| 299 | |
| 300 | auto MinPhysReg = [this](RegisterRef RR) -> unsigned { |
| 301 | const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Reg: RR.Id); |
| 302 | if ((RC.LaneMask & RR.Mask) == RC.LaneMask) |
| 303 | return RR.Id; |
| 304 | for (MCSubRegIndexIterator S(RR.Id, &TRI); S.isValid(); ++S) |
| 305 | if (RR.Mask == TRI.getSubRegIndexLaneMask(SubIdx: S.getSubRegIndex())) |
| 306 | return S.getSubReg(); |
| 307 | llvm_unreachable("Should have found a register" ); |
| 308 | return 0; |
| 309 | }; |
| 310 | |
| 311 | // Iterate over all candidate uses found and replace with source register of |
| 312 | // copy |
| 313 | for (auto P : ReplacableUses) { |
| 314 | #ifndef NDEBUG |
| 315 | if (HasLimit && RDFCpCount >= RDFCpLimit) |
| 316 | break; |
| 317 | #endif |
| 318 | NodeAddr<UseNode *> UA = P.first; |
| 319 | SmallVector<RegisterRef, 4> UseRefs = P.second; |
| 320 | |
| 321 | // UseRefs should never be empty if RRs.hasCoverOf(SR) was true |
| 322 | assert(!UseRefs.empty() && |
| 323 | "UseRefs should not be empty for replaceable use" ); |
| 324 | |
| 325 | auto IA = UA.Addr->getOwner(G: DFG); |
| 326 | auto DR = UA.Addr->getRegRef(G: DFG); |
| 327 | auto SR = ReachedUseToCopyMap[UA.Id].second; |
| 328 | if (HRI.isFakeReg(Reg: SR.Id)) |
| 329 | continue; |
| 330 | |
| 331 | if (trace()) { |
| 332 | dbgs() << "Can replace " << Print<RegisterRef>(DR, DFG) << " with " |
| 333 | << Print<RegisterRef>(SR, DFG) << " in " |
| 334 | << *NodeAddr<StmtNode *>(IA).Addr->getCode(); |
| 335 | } |
| 336 | |
| 337 | // Update existing use node to use the source register |
| 338 | MachineOperand &Op = UA.Addr->getOp(); |
| 339 | unsigned NewReg = MinPhysReg(SR); |
| 340 | Op.setReg(NewReg); |
| 341 | Op.setSubReg(0); |
| 342 | DFG.unlinkUse(UA, RemoveFromOwner: false); |
| 343 | bool firstUseNode = true; |
| 344 | |
| 345 | for (auto UR : UseRefs) { |
| 346 | // If we have more than one use (such as multiple subregs), |
| 347 | // add a new shadow use node |
| 348 | if (!firstUseNode) { |
| 349 | UA.Addr->setFlags(UA.Addr->getFlags() | NodeAttrs::Shadow); |
| 350 | UA = DFG.getNextShadow(IA, RA: UA, Create: true); |
| 351 | } |
| 352 | if (RDefMap[UR][IA.Id] != 0) { |
| 353 | UA.Addr->linkToDef(Self: UA.Id, DA: DFG.addr<DefNode *>(N: RDefMap[UR][IA.Id])); |
| 354 | } else { |
| 355 | // No reaching def present |
| 356 | UA.Addr->setReachingDef(0); |
| 357 | UA.Addr->setSibling(0); |
| 358 | } |
| 359 | firstUseNode = false; |
| 360 | } |
| 361 | |
| 362 | Changed = true; |
| 363 | #ifndef NDEBUG |
| 364 | if (HasLimit && RDFCpCount >= RDFCpLimit) |
| 365 | break; |
| 366 | RDFCpCount++; |
| 367 | #endif |
| 368 | |
| 369 | } // for (UA in replacable uses) |
| 370 | |
| 371 | return Changed; |
| 372 | } |
| 373 | |