| 1 | //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// Pass to temporarily raise the wave priority beginning the start of |
| 11 | /// the shader function until its last VMEM instructions to allow younger |
| 12 | /// waves to issue their VMEM instructions as well. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "AMDGPU.h" |
| 17 | #include "GCNSubtarget.h" |
| 18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 19 | #include "SIInstrInfo.h" |
| 20 | #include "llvm/ADT/PostOrderIterator.h" |
| 21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 22 | #include "llvm/CodeGen/MachinePassManager.h" |
| 23 | |
| 24 | using namespace llvm; |
| 25 | |
| 26 | #define DEBUG_TYPE "amdgpu-set-wave-priority" |
| 27 | |
| 28 | static cl::opt<unsigned> DefaultVALUInstsThreshold( |
| 29 | "amdgpu-set-wave-priority-valu-insts-threshold" , |
| 30 | cl::desc("VALU instruction count threshold for adjusting wave priority" ), |
| 31 | cl::init(Val: 100), cl::Hidden); |
| 32 | |
| 33 | namespace { |
| 34 | |
| 35 | struct MBBInfo { |
| 36 | MBBInfo() = default; |
| 37 | unsigned NumVALUInstsAtStart = 0; |
| 38 | bool MayReachVMEMLoad = false; |
| 39 | MachineInstr *LastVMEMLoad = nullptr; |
| 40 | }; |
| 41 | |
| 42 | using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>; |
| 43 | |
| 44 | class AMDGPUSetWavePriority { |
| 45 | public: |
| 46 | bool run(MachineFunction &MF); |
| 47 | |
| 48 | private: |
| 49 | MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, |
| 50 | MachineBasicBlock::iterator I, |
| 51 | unsigned priority) const; |
| 52 | |
| 53 | const SIInstrInfo *TII; |
| 54 | }; |
| 55 | |
| 56 | class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass { |
| 57 | public: |
| 58 | static char ID; |
| 59 | |
| 60 | AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {} |
| 61 | |
| 62 | StringRef getPassName() const override { return "Set wave priority" ; } |
| 63 | |
| 64 | bool runOnMachineFunction(MachineFunction &MF) override { |
| 65 | if (skipFunction(F: MF.getFunction())) |
| 66 | return false; |
| 67 | |
| 68 | return AMDGPUSetWavePriority().run(MF); |
| 69 | } |
| 70 | }; |
| 71 | |
| 72 | } // End anonymous namespace. |
| 73 | |
| 74 | INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority" , |
| 75 | false, false) |
| 76 | |
| 77 | char AMDGPUSetWavePriorityLegacy::ID = 0; |
| 78 | |
| 79 | FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { |
| 80 | return new AMDGPUSetWavePriorityLegacy(); |
| 81 | } |
| 82 | |
| 83 | MachineInstr * |
| 84 | AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB, |
| 85 | MachineBasicBlock::iterator I, |
| 86 | unsigned priority) const { |
| 87 | return BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: TII->get(Opcode: AMDGPU::S_SETPRIO)) |
| 88 | .addImm(Val: priority); |
| 89 | } |
| 90 | |
| 91 | // Checks that for every predecessor Pred that can reach a VMEM load, |
| 92 | // none of Pred's successors can reach a VMEM load. |
| 93 | static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, |
| 94 | MBBInfoSet &MBBInfos) { |
| 95 | for (const MachineBasicBlock *Pred : MBB.predecessors()) { |
| 96 | if (!MBBInfos[Pred].MayReachVMEMLoad) |
| 97 | continue; |
| 98 | for (const MachineBasicBlock *Succ : Pred->successors()) { |
| 99 | if (MBBInfos[Succ].MayReachVMEMLoad) |
| 100 | return false; |
| 101 | } |
| 102 | } |
| 103 | return true; |
| 104 | } |
| 105 | |
| 106 | static bool isVMEMLoad(const MachineInstr &MI) { |
| 107 | return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); |
| 108 | } |
| 109 | |
| 110 | PreservedAnalyses |
| 111 | llvm::AMDGPUSetWavePriorityPass::run(MachineFunction &MF, |
| 112 | MachineFunctionAnalysisManager &MFAM) { |
| 113 | if (!AMDGPUSetWavePriority().run(MF)) |
| 114 | return PreservedAnalyses::all(); |
| 115 | |
| 116 | return getMachineFunctionPassPreservedAnalyses(); |
| 117 | } |
| 118 | |
| 119 | bool AMDGPUSetWavePriority::run(MachineFunction &MF) { |
| 120 | const unsigned HighPriority = 3; |
| 121 | const unsigned LowPriority = 0; |
| 122 | |
| 123 | Function &F = MF.getFunction(); |
| 124 | if (!AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())) |
| 125 | return false; |
| 126 | |
| 127 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 128 | TII = ST.getInstrInfo(); |
| 129 | |
| 130 | unsigned VALUInstsThreshold = DefaultVALUInstsThreshold; |
| 131 | Attribute A = F.getFnAttribute(Kind: "amdgpu-wave-priority-threshold" ); |
| 132 | if (A.isValid()) |
| 133 | A.getValueAsString().getAsInteger(Radix: 0, Result&: VALUInstsThreshold); |
| 134 | |
| 135 | // Find VMEM loads that may be executed before long-enough sequences of |
| 136 | // VALU instructions. We currently assume that backedges/loops, branch |
| 137 | // probabilities and other details can be ignored, so we essentially |
| 138 | // determine the largest number of VALU instructions along every |
| 139 | // possible path from the start of the function that may potentially be |
| 140 | // executed provided no backedge is ever taken. |
| 141 | MBBInfoSet MBBInfos; |
| 142 | for (MachineBasicBlock *MBB : post_order(G: &MF)) { |
| 143 | bool AtStart = true; |
| 144 | unsigned MaxNumVALUInstsInMiddle = 0; |
| 145 | unsigned NumVALUInstsAtEnd = 0; |
| 146 | for (MachineInstr &MI : *MBB) { |
| 147 | if (isVMEMLoad(MI)) { |
| 148 | AtStart = false; |
| 149 | MBBInfo &Info = MBBInfos[MBB]; |
| 150 | Info.NumVALUInstsAtStart = 0; |
| 151 | MaxNumVALUInstsInMiddle = 0; |
| 152 | NumVALUInstsAtEnd = 0; |
| 153 | Info.LastVMEMLoad = &MI; |
| 154 | } else if (SIInstrInfo::isDS(MI)) { |
| 155 | AtStart = false; |
| 156 | MaxNumVALUInstsInMiddle = |
| 157 | std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd); |
| 158 | NumVALUInstsAtEnd = 0; |
| 159 | } else if (SIInstrInfo::isVALU(MI)) { |
| 160 | if (AtStart) |
| 161 | ++MBBInfos[MBB].NumVALUInstsAtStart; |
| 162 | ++NumVALUInstsAtEnd; |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | bool SuccsMayReachVMEMLoad = false; |
| 167 | unsigned NumFollowingVALUInsts = 0; |
| 168 | for (const MachineBasicBlock *Succ : MBB->successors()) { |
| 169 | const MBBInfo &SuccInfo = MBBInfos[Succ]; |
| 170 | SuccsMayReachVMEMLoad |= SuccInfo.MayReachVMEMLoad; |
| 171 | NumFollowingVALUInsts = |
| 172 | std::max(a: NumFollowingVALUInsts, b: SuccInfo.NumVALUInstsAtStart); |
| 173 | } |
| 174 | MBBInfo &Info = MBBInfos[MBB]; |
| 175 | if (AtStart) |
| 176 | Info.NumVALUInstsAtStart += NumFollowingVALUInsts; |
| 177 | NumVALUInstsAtEnd += NumFollowingVALUInsts; |
| 178 | |
| 179 | unsigned MaxNumVALUInsts = |
| 180 | std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd); |
| 181 | Info.MayReachVMEMLoad = |
| 182 | SuccsMayReachVMEMLoad || |
| 183 | (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold); |
| 184 | } |
| 185 | |
| 186 | MachineBasicBlock &Entry = MF.front(); |
| 187 | if (!MBBInfos[&Entry].MayReachVMEMLoad) |
| 188 | return false; |
| 189 | |
| 190 | // Raise the priority at the beginning of the shader. |
| 191 | MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); |
| 192 | while (I != E && !SIInstrInfo::isVALU(MI: *I) && !I->isTerminator()) |
| 193 | ++I; |
| 194 | BuildSetprioMI(MBB&: Entry, I, priority: HighPriority); |
| 195 | |
| 196 | // Lower the priority on edges where control leaves blocks from which |
| 197 | // the VMEM loads are reachable. |
| 198 | SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks; |
| 199 | for (MachineBasicBlock &MBB : MF) { |
| 200 | if (MBBInfos[&MBB].MayReachVMEMLoad) { |
| 201 | if (MBB.succ_empty()) |
| 202 | PriorityLoweringBlocks.insert(Ptr: &MBB); |
| 203 | continue; |
| 204 | } |
| 205 | |
| 206 | if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { |
| 207 | for (MachineBasicBlock *Pred : MBB.predecessors()) { |
| 208 | if (MBBInfos[Pred].MayReachVMEMLoad) |
| 209 | PriorityLoweringBlocks.insert(Ptr: Pred); |
| 210 | } |
| 211 | continue; |
| 212 | } |
| 213 | |
| 214 | // Where lowering the priority in predecessors is not possible, the |
| 215 | // block receiving control either was not part of a loop in the first |
| 216 | // place or the loop simplification/canonicalization pass should have |
| 217 | // already tried to split the edge and insert a preheader, and if for |
| 218 | // whatever reason it failed to do so, then this leaves us with the |
| 219 | // only option of lowering the priority within the loop. |
| 220 | PriorityLoweringBlocks.insert(Ptr: &MBB); |
| 221 | } |
| 222 | |
| 223 | for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { |
| 224 | MachineInstr *LastVMEMLoad = MBBInfos[MBB].LastVMEMLoad; |
| 225 | BuildSetprioMI(MBB&: *MBB, |
| 226 | I: LastVMEMLoad |
| 227 | ? std::next(x: MachineBasicBlock::iterator(LastVMEMLoad)) |
| 228 | : MBB->begin(), |
| 229 | priority: LowPriority); |
| 230 | } |
| 231 | |
| 232 | return true; |
| 233 | } |
| 234 | |