| 1 | //===-- SIPostRABundler.cpp -----------------------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | /// \file | 
|---|
| 10 | /// This pass creates bundles of memory instructions to protect adjacent loads | 
|---|
| 11 | /// and stores from being rescheduled apart from each other post-RA. | 
|---|
| 12 | /// | 
|---|
| 13 | //===----------------------------------------------------------------------===// | 
|---|
| 14 |  | 
|---|
| 15 | #include "SIPostRABundler.h" | 
|---|
| 16 | #include "AMDGPU.h" | 
|---|
| 17 | #include "GCNSubtarget.h" | 
|---|
| 18 | #include "llvm/ADT/SmallSet.h" | 
|---|
| 19 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|---|
| 20 |  | 
|---|
| 21 | using namespace llvm; | 
|---|
| 22 |  | 
|---|
| 23 | #define DEBUG_TYPE "si-post-ra-bundler" | 
|---|
| 24 |  | 
|---|
| 25 | namespace { | 
|---|
| 26 |  | 
|---|
| 27 | class SIPostRABundlerLegacy : public MachineFunctionPass { | 
|---|
| 28 | public: | 
|---|
| 29 | static char ID; | 
|---|
| 30 |  | 
|---|
| 31 | public: | 
|---|
| 32 | SIPostRABundlerLegacy() : MachineFunctionPass(ID) { | 
|---|
| 33 | initializeSIPostRABundlerLegacyPass(*PassRegistry::getPassRegistry()); | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | bool runOnMachineFunction(MachineFunction &MF) override; | 
|---|
| 37 |  | 
|---|
| 38 | StringRef getPassName() const override { | 
|---|
| 39 | return "SI post-RA bundler"; | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|---|
| 43 | AU.setPreservesAll(); | 
|---|
| 44 | MachineFunctionPass::getAnalysisUsage(AU); | 
|---|
| 45 | } | 
|---|
| 46 | }; | 
|---|
| 47 |  | 
|---|
| 48 | class SIPostRABundler { | 
|---|
| 49 | public: | 
|---|
| 50 | bool run(MachineFunction &MF); | 
|---|
| 51 |  | 
|---|
| 52 | private: | 
|---|
| 53 | const SIRegisterInfo *TRI; | 
|---|
| 54 |  | 
|---|
| 55 | SmallSet<Register, 16> Defs; | 
|---|
| 56 |  | 
|---|
| 57 | void collectUsedRegUnits(const MachineInstr &MI, | 
|---|
| 58 | BitVector &UsedRegUnits) const; | 
|---|
| 59 |  | 
|---|
| 60 | bool isBundleCandidate(const MachineInstr &MI) const; | 
|---|
| 61 | bool isDependentLoad(const MachineInstr &MI) const; | 
|---|
| 62 | bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; | 
|---|
| 63 | }; | 
|---|
| 64 |  | 
|---|
| 65 | constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | | 
|---|
| 66 | SIInstrFlags::SMRD | SIInstrFlags::DS | | 
|---|
| 67 | SIInstrFlags::FLAT | SIInstrFlags::MIMG | | 
|---|
| 68 | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; | 
|---|
| 69 |  | 
|---|
| 70 | } // End anonymous namespace. | 
|---|
| 71 |  | 
|---|
| 72 | INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler", false, | 
|---|
| 73 | false) | 
|---|
| 74 |  | 
|---|
| 75 | char SIPostRABundlerLegacy::ID = 0; | 
|---|
| 76 |  | 
|---|
| 77 | char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID; | 
|---|
| 78 |  | 
|---|
| 79 | FunctionPass *llvm::createSIPostRABundlerPass() { | 
|---|
| 80 | return new SIPostRABundlerLegacy(); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { | 
|---|
| 84 | if (!MI.mayLoad()) | 
|---|
| 85 | return false; | 
|---|
| 86 |  | 
|---|
| 87 | for (const MachineOperand &Op : MI.explicit_operands()) { | 
|---|
| 88 | if (!Op.isReg()) | 
|---|
| 89 | continue; | 
|---|
| 90 | Register Reg = Op.getReg(); | 
|---|
| 91 | for (Register Def : Defs) | 
|---|
| 92 | if (TRI->regsOverlap(RegA: Reg, RegB: Def)) | 
|---|
| 93 | return true; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | return false; | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, | 
|---|
| 100 | BitVector &UsedRegUnits) const { | 
|---|
| 101 | if (MI.isDebugInstr()) | 
|---|
| 102 | return; | 
|---|
| 103 |  | 
|---|
| 104 | for (const MachineOperand &Op : MI.operands()) { | 
|---|
| 105 | if (!Op.isReg() || !Op.readsReg()) | 
|---|
| 106 | continue; | 
|---|
| 107 |  | 
|---|
| 108 | Register Reg = Op.getReg(); | 
|---|
| 109 | assert(!Op.getSubReg() && | 
|---|
| 110 | "subregister indexes should not be present after RA"); | 
|---|
| 111 |  | 
|---|
| 112 | for (MCRegUnit Unit : TRI->regunits(Reg)) | 
|---|
| 113 | UsedRegUnits.set(Unit); | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { | 
|---|
| 118 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; | 
|---|
| 119 | return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | bool SIPostRABundler::canBundle(const MachineInstr &MI, | 
|---|
| 123 | const MachineInstr &NextMI) const { | 
|---|
| 124 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; | 
|---|
| 125 |  | 
|---|
| 126 | return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && | 
|---|
| 127 | NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && | 
|---|
| 128 | ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && | 
|---|
| 129 | !isDependentLoad(MI: NextMI)); | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) { | 
|---|
| 133 | if (skipFunction(F: MF.getFunction())) | 
|---|
| 134 | return false; | 
|---|
| 135 | return SIPostRABundler().run(MF); | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF, | 
|---|
| 139 | MachineFunctionAnalysisManager &) { | 
|---|
| 140 | SIPostRABundler().run(MF); | 
|---|
| 141 | return PreservedAnalyses::all(); | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | bool SIPostRABundler::run(MachineFunction &MF) { | 
|---|
| 145 |  | 
|---|
| 146 | TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); | 
|---|
| 147 | BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); | 
|---|
| 148 | BitVector KillUsedRegUnits(TRI->getNumRegUnits()); | 
|---|
| 149 |  | 
|---|
| 150 | bool Changed = false; | 
|---|
| 151 | for (MachineBasicBlock &MBB : MF) { | 
|---|
| 152 | bool HasIGLPInstrs = llvm::any_of(Range: MBB.instrs(), P: [](MachineInstr &MI) { | 
|---|
| 153 | unsigned Opc = MI.getOpcode(); | 
|---|
| 154 | return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; | 
|---|
| 155 | }); | 
|---|
| 156 |  | 
|---|
| 157 | // Don't cluster with IGLP instructions. | 
|---|
| 158 | if (HasIGLPInstrs) | 
|---|
| 159 | continue; | 
|---|
| 160 |  | 
|---|
| 161 | MachineBasicBlock::instr_iterator Next; | 
|---|
| 162 | MachineBasicBlock::instr_iterator B = MBB.instr_begin(); | 
|---|
| 163 | MachineBasicBlock::instr_iterator E = MBB.instr_end(); | 
|---|
| 164 |  | 
|---|
| 165 | for (auto I = B; I != E; I = Next) { | 
|---|
| 166 | Next = std::next(x: I); | 
|---|
| 167 | if (!isBundleCandidate(MI: *I)) | 
|---|
| 168 | continue; | 
|---|
| 169 |  | 
|---|
| 170 | assert(Defs.empty()); | 
|---|
| 171 |  | 
|---|
| 172 | if (I->getNumExplicitDefs() != 0) | 
|---|
| 173 | Defs.insert(V: I->defs().begin()->getReg()); | 
|---|
| 174 |  | 
|---|
| 175 | MachineBasicBlock::instr_iterator BundleStart = I; | 
|---|
| 176 | MachineBasicBlock::instr_iterator BundleEnd = I; | 
|---|
| 177 | unsigned ClauseLength = 1; | 
|---|
| 178 | for (I = Next; I != E; I = Next) { | 
|---|
| 179 | Next = std::next(x: I); | 
|---|
| 180 |  | 
|---|
| 181 | assert(BundleEnd != I); | 
|---|
| 182 | if (canBundle(MI: *BundleEnd, NextMI: *I)) { | 
|---|
| 183 | BundleEnd = I; | 
|---|
| 184 | if (I->getNumExplicitDefs() != 0) | 
|---|
| 185 | Defs.insert(V: I->defs().begin()->getReg()); | 
|---|
| 186 | ++ClauseLength; | 
|---|
| 187 | } else if (!I->isMetaInstruction()) { | 
|---|
| 188 | // Allow meta instructions in between bundle candidates, but do not | 
|---|
| 189 | // start or end a bundle on one. | 
|---|
| 190 | // | 
|---|
| 191 | // TODO: It may be better to move meta instructions like dbg_value | 
|---|
| 192 | // after the bundle. We're relying on the memory legalizer to unbundle | 
|---|
| 193 | // these. | 
|---|
| 194 | break; | 
|---|
| 195 | } | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | Next = std::next(x: BundleEnd); | 
|---|
| 199 | if (ClauseLength > 1) { | 
|---|
| 200 | Changed = true; | 
|---|
| 201 |  | 
|---|
| 202 | // Before register allocation, kills are inserted after potential soft | 
|---|
| 203 | // clauses to hint register allocation. Look for kills that look like | 
|---|
| 204 | // this, and erase them. | 
|---|
| 205 | if (Next != E && Next->isKill()) { | 
|---|
| 206 |  | 
|---|
| 207 | // TODO: Should maybe back-propagate kill flags to the bundle. | 
|---|
| 208 | for (const MachineInstr &BundleMI : make_range(x: BundleStart, y: Next)) | 
|---|
| 209 | collectUsedRegUnits(MI: BundleMI, UsedRegUnits&: BundleUsedRegUnits); | 
|---|
| 210 |  | 
|---|
| 211 | BundleUsedRegUnits.flip(); | 
|---|
| 212 |  | 
|---|
| 213 | while (Next != E && Next->isKill()) { | 
|---|
| 214 | MachineInstr &Kill = *Next; | 
|---|
| 215 | collectUsedRegUnits(MI: Kill, UsedRegUnits&: KillUsedRegUnits); | 
|---|
| 216 |  | 
|---|
| 217 | KillUsedRegUnits &= BundleUsedRegUnits; | 
|---|
| 218 |  | 
|---|
| 219 | // Erase the kill if it's a subset of the used registers. | 
|---|
| 220 | // | 
|---|
| 221 | // TODO: Should we just remove all kills? Is there any real reason to | 
|---|
| 222 | // keep them after RA? | 
|---|
| 223 | if (KillUsedRegUnits.none()) { | 
|---|
| 224 | ++Next; | 
|---|
| 225 | Kill.eraseFromParent(); | 
|---|
| 226 | } else | 
|---|
| 227 | break; | 
|---|
| 228 |  | 
|---|
| 229 | KillUsedRegUnits.reset(); | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | BundleUsedRegUnits.reset(); | 
|---|
| 233 | } | 
|---|
| 234 |  | 
|---|
| 235 | finalizeBundle(MBB, FirstMI: BundleStart, LastMI: Next); | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | Defs.clear(); | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | return Changed; | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|