| 1 | //===- R600MergeVectorRegisters.cpp ---------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// This pass merges inputs of swizzeable instructions into vector sharing |
| 11 | /// common data and/or have enough undef subreg using swizzle abilities. |
| 12 | /// |
| 13 | /// For instance let's consider the following pseudo code : |
| 14 | /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 |
| 15 | /// ... |
| 16 | /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 |
| 17 | /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 |
| 18 | /// |
| 19 | /// is turned into : |
| 20 | /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 |
| 21 | /// ... |
| 22 | /// %7 = INSERT_SUBREG %4, sub3 |
| 23 | /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 |
| 24 | /// |
| 25 | /// This allow regalloc to reduce register pressure for vector registers and |
| 26 | /// to reduce MOV count. |
| 27 | //===----------------------------------------------------------------------===// |
| 28 | |
| 29 | #include "MCTargetDesc/R600MCTargetDesc.h" |
| 30 | #include "R600.h" |
| 31 | #include "R600Defines.h" |
| 32 | #include "R600Subtarget.h" |
| 33 | #include "llvm/CodeGen/MachineDominators.h" |
| 34 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 35 | |
| 36 | using namespace llvm; |
| 37 | |
| 38 | #define DEBUG_TYPE "vec-merger" |
| 39 | |
| 40 | static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) { |
| 41 | if (Reg.isPhysical()) |
| 42 | return false; |
| 43 | const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
| 44 | return MI && MI->isImplicitDef(); |
| 45 | } |
| 46 | |
| 47 | namespace { |
| 48 | |
| 49 | class RegSeqInfo { |
| 50 | public: |
| 51 | MachineInstr *Instr; |
| 52 | DenseMap<Register, unsigned> RegToChan; |
| 53 | std::vector<Register> UndefReg; |
| 54 | |
| 55 | RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { |
| 56 | assert(MI->getOpcode() == R600::REG_SEQUENCE); |
| 57 | for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { |
| 58 | MachineOperand &MO = Instr->getOperand(i); |
| 59 | unsigned Chan = Instr->getOperand(i: i + 1).getImm(); |
| 60 | if (isImplicitlyDef(MRI, Reg: MO.getReg())) |
| 61 | UndefReg.emplace_back(args&: Chan); |
| 62 | else |
| 63 | RegToChan[MO.getReg()] = Chan; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | RegSeqInfo() = default; |
| 68 | |
| 69 | bool operator==(const RegSeqInfo &RSI) const { |
| 70 | return RSI.Instr == Instr; |
| 71 | } |
| 72 | }; |
| 73 | |
| 74 | class R600VectorRegMerger : public MachineFunctionPass { |
| 75 | private: |
| 76 | using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>; |
| 77 | |
| 78 | MachineRegisterInfo *MRI; |
| 79 | const R600InstrInfo *TII = nullptr; |
| 80 | DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; |
| 81 | InstructionSetMap PreviousRegSeqByReg; |
| 82 | InstructionSetMap PreviousRegSeqByUndefCount; |
| 83 | |
| 84 | bool canSwizzle(const MachineInstr &MI) const; |
| 85 | bool areAllUsesSwizzeable(Register Reg) const; |
| 86 | void SwizzleInput(MachineInstr &, |
| 87 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; |
| 88 | bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge, |
| 89 | std::vector<std::pair<unsigned, unsigned>> &Remap) const; |
| 90 | bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, |
| 91 | std::vector<std::pair<unsigned, unsigned>> &RemapChan); |
| 92 | bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, |
| 93 | std::vector<std::pair<unsigned, unsigned>> &RemapChan); |
| 94 | MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec, |
| 95 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; |
| 96 | void RemoveMI(MachineInstr *); |
| 97 | void trackRSI(const RegSeqInfo &RSI); |
| 98 | |
| 99 | public: |
| 100 | static char ID; |
| 101 | |
| 102 | R600VectorRegMerger() : MachineFunctionPass(ID) {} |
| 103 | |
| 104 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 105 | AU.setPreservesCFG(); |
| 106 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| 107 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
| 108 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
| 109 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
| 110 | MachineFunctionPass::getAnalysisUsage(AU); |
| 111 | } |
| 112 | |
| 113 | MachineFunctionProperties getRequiredProperties() const override { |
| 114 | return MachineFunctionProperties().setIsSSA(); |
| 115 | } |
| 116 | |
| 117 | StringRef getPassName() const override { |
| 118 | return "R600 Vector Registers Merge Pass" ; |
| 119 | } |
| 120 | |
| 121 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 122 | }; |
| 123 | |
| 124 | } // end anonymous namespace |
| 125 | |
| 126 | INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE, |
| 127 | "R600 Vector Reg Merger" , false, false) |
| 128 | INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE, |
| 129 | "R600 Vector Reg Merger" , false, false) |
| 130 | |
| 131 | char R600VectorRegMerger::ID = 0; |
| 132 | |
| 133 | char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID; |
| 134 | |
| 135 | bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) |
| 136 | const { |
| 137 | if (TII->get(Opcode: MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) |
| 138 | return true; |
| 139 | switch (MI.getOpcode()) { |
| 140 | case R600::R600_ExportSwz: |
| 141 | case R600::EG_ExportSwz: |
| 142 | return true; |
| 143 | default: |
| 144 | return false; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, |
| 149 | RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap) |
| 150 | const { |
| 151 | unsigned CurrentUndexIdx = 0; |
| 152 | for (auto &It : ToMerge->RegToChan) { |
| 153 | DenseMap<Register, unsigned>::const_iterator PosInUntouched = |
| 154 | Untouched->RegToChan.find(Val: It.first); |
| 155 | if (PosInUntouched != Untouched->RegToChan.end()) { |
| 156 | Remap.emplace_back(args&: It.second, args: (*PosInUntouched).second); |
| 157 | continue; |
| 158 | } |
| 159 | if (CurrentUndexIdx >= Untouched->UndefReg.size()) |
| 160 | return false; |
| 161 | Remap.emplace_back(args&: It.second, args: Untouched->UndefReg[CurrentUndexIdx++]); |
| 162 | } |
| 163 | |
| 164 | return true; |
| 165 | } |
| 166 | |
| 167 | static |
| 168 | unsigned getReassignedChan( |
| 169 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan, |
| 170 | unsigned Chan) { |
| 171 | for (const auto &J : RemapChan) { |
| 172 | if (J.first == Chan) |
| 173 | return J.second; |
| 174 | } |
| 175 | llvm_unreachable("Chan wasn't reassigned" ); |
| 176 | } |
| 177 | |
| 178 | MachineInstr *R600VectorRegMerger::RebuildVector( |
| 179 | RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, |
| 180 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { |
| 181 | Register Reg = RSI->Instr->getOperand(i: 0).getReg(); |
| 182 | MachineBasicBlock::iterator Pos = RSI->Instr; |
| 183 | MachineBasicBlock &MBB = *Pos->getParent(); |
| 184 | const DebugLoc &DL = Pos->getDebugLoc(); |
| 185 | |
| 186 | Register SrcVec = BaseRSI->Instr->getOperand(i: 0).getReg(); |
| 187 | DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; |
| 188 | std::vector<Register> UpdatedUndef = BaseRSI->UndefReg; |
| 189 | for (const auto &It : RSI->RegToChan) { |
| 190 | Register DstReg = MRI->createVirtualRegister(RegClass: &R600::R600_Reg128RegClass); |
| 191 | unsigned SubReg = It.first; |
| 192 | unsigned Swizzle = It.second; |
| 193 | unsigned Chan = getReassignedChan(RemapChan, Chan: Swizzle); |
| 194 | |
| 195 | MachineInstr *Tmp = BuildMI(BB&: MBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: R600::INSERT_SUBREG), |
| 196 | DestReg: DstReg) |
| 197 | .addReg(RegNo: SrcVec) |
| 198 | .addReg(RegNo: SubReg) |
| 199 | .addImm(Val: Chan); |
| 200 | UpdatedRegToChan[SubReg] = Chan; |
| 201 | std::vector<Register>::iterator ChanPos = llvm::find(Range&: UpdatedUndef, Val: Chan); |
| 202 | if (ChanPos != UpdatedUndef.end()) |
| 203 | UpdatedUndef.erase(position: ChanPos); |
| 204 | assert(!is_contained(UpdatedUndef, Chan) && |
| 205 | "UpdatedUndef shouldn't contain Chan more than once!" ); |
| 206 | LLVM_DEBUG(dbgs() << " ->" ; Tmp->dump();); |
| 207 | (void)Tmp; |
| 208 | SrcVec = DstReg; |
| 209 | } |
| 210 | MachineInstr *NewMI = |
| 211 | BuildMI(BB&: MBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: R600::COPY), DestReg: Reg).addReg(RegNo: SrcVec); |
| 212 | LLVM_DEBUG(dbgs() << " ->" ; NewMI->dump();); |
| 213 | |
| 214 | LLVM_DEBUG(dbgs() << " Updating Swizzle:\n" ); |
| 215 | for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(RegNo: Reg), |
| 216 | E = MRI->use_instr_end(); It != E; ++It) { |
| 217 | LLVM_DEBUG(dbgs() << " " ; (*It).dump(); dbgs() << " ->" ); |
| 218 | SwizzleInput(*It, RemapChan); |
| 219 | LLVM_DEBUG((*It).dump()); |
| 220 | } |
| 221 | RSI->Instr->eraseFromParent(); |
| 222 | |
| 223 | // Update RSI |
| 224 | RSI->Instr = NewMI; |
| 225 | RSI->RegToChan = std::move(UpdatedRegToChan); |
| 226 | RSI->UndefReg = std::move(UpdatedUndef); |
| 227 | |
| 228 | return NewMI; |
| 229 | } |
| 230 | |
| 231 | void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { |
| 232 | for (auto &It : PreviousRegSeqByReg) { |
| 233 | std::vector<MachineInstr *> &MIs = It.second; |
| 234 | MIs.erase(first: llvm::find(Range&: MIs, Val: MI), last: MIs.end()); |
| 235 | } |
| 236 | for (auto &It : PreviousRegSeqByUndefCount) { |
| 237 | std::vector<MachineInstr *> &MIs = It.second; |
| 238 | MIs.erase(first: llvm::find(Range&: MIs, Val: MI), last: MIs.end()); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, |
| 243 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { |
| 244 | unsigned Offset; |
| 245 | if (TII->get(Opcode: MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) |
| 246 | Offset = 2; |
| 247 | else |
| 248 | Offset = 3; |
| 249 | for (unsigned i = 0; i < 4; i++) { |
| 250 | unsigned Swizzle = MI.getOperand(i: i + Offset).getImm() + 1; |
| 251 | for (const auto &J : RemapChan) { |
| 252 | if (J.first == Swizzle) { |
| 253 | MI.getOperand(i: i + Offset).setImm(J.second - 1); |
| 254 | break; |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const { |
| 261 | return llvm::all_of(Range: MRI->use_instructions(Reg), |
| 262 | P: [&](const MachineInstr &MI) { return canSwizzle(MI); }); |
| 263 | } |
| 264 | |
| 265 | bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, |
| 266 | RegSeqInfo &CompatibleRSI, |
| 267 | std::vector<std::pair<unsigned, unsigned>> &RemapChan) { |
| 268 | for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), |
| 269 | MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { |
| 270 | if (!MOp->isReg()) |
| 271 | continue; |
| 272 | auto &Insts = PreviousRegSeqByReg[MOp->getReg()]; |
| 273 | if (Insts.empty()) |
| 274 | continue; |
| 275 | for (MachineInstr *MI : Insts) { |
| 276 | CompatibleRSI = PreviousRegSeq[MI]; |
| 277 | if (RSI == CompatibleRSI) |
| 278 | continue; |
| 279 | if (tryMergeVector(Untouched: &CompatibleRSI, ToMerge: &RSI, Remap&: RemapChan)) |
| 280 | return true; |
| 281 | } |
| 282 | } |
| 283 | return false; |
| 284 | } |
| 285 | |
| 286 | bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, |
| 287 | RegSeqInfo &CompatibleRSI, |
| 288 | std::vector<std::pair<unsigned, unsigned>> &RemapChan) { |
| 289 | unsigned NeededUndefs = 4 - RSI.UndefReg.size(); |
| 290 | std::vector<MachineInstr *> &MIs = |
| 291 | PreviousRegSeqByUndefCount[NeededUndefs]; |
| 292 | if (MIs.empty()) |
| 293 | return false; |
| 294 | CompatibleRSI = PreviousRegSeq[MIs.back()]; |
| 295 | tryMergeVector(Untouched: &CompatibleRSI, ToMerge: &RSI, Remap&: RemapChan); |
| 296 | return true; |
| 297 | } |
| 298 | |
| 299 | void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { |
| 300 | for (DenseMap<Register, unsigned>::const_iterator |
| 301 | It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { |
| 302 | PreviousRegSeqByReg[(*It).first].push_back(x: RSI.Instr); |
| 303 | } |
| 304 | PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(x: RSI.Instr); |
| 305 | PreviousRegSeq[RSI.Instr] = RSI; |
| 306 | } |
| 307 | |
| 308 | bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { |
| 309 | if (skipFunction(F: Fn.getFunction())) |
| 310 | return false; |
| 311 | |
| 312 | const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); |
| 313 | TII = ST.getInstrInfo(); |
| 314 | MRI = &Fn.getRegInfo(); |
| 315 | |
| 316 | for (MachineBasicBlock &MB : Fn) { |
| 317 | PreviousRegSeq.clear(); |
| 318 | PreviousRegSeqByReg.clear(); |
| 319 | PreviousRegSeqByUndefCount.clear(); |
| 320 | |
| 321 | for (MachineBasicBlock::iterator MII = MB.begin(), MIIE = MB.end(); |
| 322 | MII != MIIE; ++MII) { |
| 323 | MachineInstr &MI = *MII; |
| 324 | if (MI.getOpcode() != R600::REG_SEQUENCE) { |
| 325 | if (TII->get(Opcode: MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { |
| 326 | Register Reg = MI.getOperand(i: 1).getReg(); |
| 327 | for (MachineInstr &DefMI : MRI->def_instructions(Reg)) |
| 328 | RemoveMI(MI: &DefMI); |
| 329 | } |
| 330 | continue; |
| 331 | } |
| 332 | |
| 333 | RegSeqInfo RSI(*MRI, &MI); |
| 334 | |
| 335 | // All uses of MI are swizzeable ? |
| 336 | Register Reg = MI.getOperand(i: 0).getReg(); |
| 337 | if (!areAllUsesSwizzeable(Reg)) |
| 338 | continue; |
| 339 | |
| 340 | LLVM_DEBUG({ |
| 341 | dbgs() << "Trying to optimize " ; |
| 342 | MI.dump(); |
| 343 | }); |
| 344 | |
| 345 | RegSeqInfo CandidateRSI; |
| 346 | std::vector<std::pair<unsigned, unsigned>> RemapChan; |
| 347 | LLVM_DEBUG(dbgs() << "Using common slots...\n" ;); |
| 348 | if (tryMergeUsingCommonSlot(RSI, CompatibleRSI&: CandidateRSI, RemapChan)) { |
| 349 | // Remove CandidateRSI mapping |
| 350 | RemoveMI(MI: CandidateRSI.Instr); |
| 351 | MII = RebuildVector(RSI: &RSI, BaseRSI: &CandidateRSI, RemapChan); |
| 352 | trackRSI(RSI); |
| 353 | continue; |
| 354 | } |
| 355 | LLVM_DEBUG(dbgs() << "Using free slots...\n" ;); |
| 356 | RemapChan.clear(); |
| 357 | if (tryMergeUsingFreeSlot(RSI, CompatibleRSI&: CandidateRSI, RemapChan)) { |
| 358 | RemoveMI(MI: CandidateRSI.Instr); |
| 359 | MII = RebuildVector(RSI: &RSI, BaseRSI: &CandidateRSI, RemapChan); |
| 360 | trackRSI(RSI); |
| 361 | continue; |
| 362 | } |
| 363 | //Failed to merge |
| 364 | trackRSI(RSI); |
| 365 | } |
| 366 | } |
| 367 | return false; |
| 368 | } |
| 369 | |
| 370 | llvm::FunctionPass *llvm::createR600VectorRegMerger() { |
| 371 | return new R600VectorRegMerger(); |
| 372 | } |
| 373 | |