1 | //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Pass to temporarily raise the wave priority beginning the start of |
11 | /// the shader function until its last VMEM instructions to allow younger |
12 | /// waves to issue their VMEM instructions as well. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AMDGPU.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
19 | #include "SIInstrInfo.h" |
20 | #include "llvm/ADT/PostOrderIterator.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachinePassManager.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define DEBUG_TYPE "amdgpu-set-wave-priority" |
27 | |
28 | static cl::opt<unsigned> DefaultVALUInstsThreshold( |
29 | "amdgpu-set-wave-priority-valu-insts-threshold" , |
30 | cl::desc("VALU instruction count threshold for adjusting wave priority" ), |
31 | cl::init(Val: 100), cl::Hidden); |
32 | |
33 | namespace { |
34 | |
35 | struct MBBInfo { |
36 | MBBInfo() = default; |
37 | unsigned NumVALUInstsAtStart = 0; |
38 | bool MayReachVMEMLoad = false; |
39 | MachineInstr *LastVMEMLoad = nullptr; |
40 | }; |
41 | |
42 | using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>; |
43 | |
44 | class AMDGPUSetWavePriority { |
45 | public: |
46 | bool run(MachineFunction &MF); |
47 | |
48 | private: |
49 | MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, |
50 | MachineBasicBlock::iterator I, |
51 | unsigned priority) const; |
52 | |
53 | const SIInstrInfo *TII; |
54 | }; |
55 | |
56 | class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass { |
57 | public: |
58 | static char ID; |
59 | |
60 | AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {} |
61 | |
62 | StringRef getPassName() const override { return "Set wave priority" ; } |
63 | |
64 | bool runOnMachineFunction(MachineFunction &MF) override { |
65 | if (skipFunction(F: MF.getFunction())) |
66 | return false; |
67 | |
68 | return AMDGPUSetWavePriority().run(MF); |
69 | } |
70 | }; |
71 | |
72 | } // End anonymous namespace. |
73 | |
74 | INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority" , |
75 | false, false) |
76 | |
77 | char AMDGPUSetWavePriorityLegacy::ID = 0; |
78 | |
79 | FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { |
80 | return new AMDGPUSetWavePriorityLegacy(); |
81 | } |
82 | |
83 | MachineInstr * |
84 | AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB, |
85 | MachineBasicBlock::iterator I, |
86 | unsigned priority) const { |
87 | return BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: TII->get(Opcode: AMDGPU::S_SETPRIO)) |
88 | .addImm(Val: priority); |
89 | } |
90 | |
91 | // Checks that for every predecessor Pred that can reach a VMEM load, |
92 | // none of Pred's successors can reach a VMEM load. |
93 | static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, |
94 | MBBInfoSet &MBBInfos) { |
95 | for (const MachineBasicBlock *Pred : MBB.predecessors()) { |
96 | if (!MBBInfos[Pred].MayReachVMEMLoad) |
97 | continue; |
98 | for (const MachineBasicBlock *Succ : Pred->successors()) { |
99 | if (MBBInfos[Succ].MayReachVMEMLoad) |
100 | return false; |
101 | } |
102 | } |
103 | return true; |
104 | } |
105 | |
106 | static bool isVMEMLoad(const MachineInstr &MI) { |
107 | return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); |
108 | } |
109 | |
110 | PreservedAnalyses |
111 | llvm::AMDGPUSetWavePriorityPass::run(MachineFunction &MF, |
112 | MachineFunctionAnalysisManager &MFAM) { |
113 | if (!AMDGPUSetWavePriority().run(MF)) |
114 | return PreservedAnalyses::all(); |
115 | |
116 | return getMachineFunctionPassPreservedAnalyses(); |
117 | } |
118 | |
119 | bool AMDGPUSetWavePriority::run(MachineFunction &MF) { |
120 | const unsigned HighPriority = 3; |
121 | const unsigned LowPriority = 0; |
122 | |
123 | Function &F = MF.getFunction(); |
124 | if (!AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())) |
125 | return false; |
126 | |
127 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
128 | TII = ST.getInstrInfo(); |
129 | |
130 | unsigned VALUInstsThreshold = DefaultVALUInstsThreshold; |
131 | Attribute A = F.getFnAttribute(Kind: "amdgpu-wave-priority-threshold" ); |
132 | if (A.isValid()) |
133 | A.getValueAsString().getAsInteger(Radix: 0, Result&: VALUInstsThreshold); |
134 | |
135 | // Find VMEM loads that may be executed before long-enough sequences of |
136 | // VALU instructions. We currently assume that backedges/loops, branch |
137 | // probabilities and other details can be ignored, so we essentially |
138 | // determine the largest number of VALU instructions along every |
139 | // possible path from the start of the function that may potentially be |
140 | // executed provided no backedge is ever taken. |
141 | MBBInfoSet MBBInfos; |
142 | for (MachineBasicBlock *MBB : post_order(G: &MF)) { |
143 | bool AtStart = true; |
144 | unsigned MaxNumVALUInstsInMiddle = 0; |
145 | unsigned NumVALUInstsAtEnd = 0; |
146 | for (MachineInstr &MI : *MBB) { |
147 | if (isVMEMLoad(MI)) { |
148 | AtStart = false; |
149 | MBBInfo &Info = MBBInfos[MBB]; |
150 | Info.NumVALUInstsAtStart = 0; |
151 | MaxNumVALUInstsInMiddle = 0; |
152 | NumVALUInstsAtEnd = 0; |
153 | Info.LastVMEMLoad = &MI; |
154 | } else if (SIInstrInfo::isDS(MI)) { |
155 | AtStart = false; |
156 | MaxNumVALUInstsInMiddle = |
157 | std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd); |
158 | NumVALUInstsAtEnd = 0; |
159 | } else if (SIInstrInfo::isVALU(MI)) { |
160 | if (AtStart) |
161 | ++MBBInfos[MBB].NumVALUInstsAtStart; |
162 | ++NumVALUInstsAtEnd; |
163 | } |
164 | } |
165 | |
166 | bool SuccsMayReachVMEMLoad = false; |
167 | unsigned NumFollowingVALUInsts = 0; |
168 | for (const MachineBasicBlock *Succ : MBB->successors()) { |
169 | const MBBInfo &SuccInfo = MBBInfos[Succ]; |
170 | SuccsMayReachVMEMLoad |= SuccInfo.MayReachVMEMLoad; |
171 | NumFollowingVALUInsts = |
172 | std::max(a: NumFollowingVALUInsts, b: SuccInfo.NumVALUInstsAtStart); |
173 | } |
174 | MBBInfo &Info = MBBInfos[MBB]; |
175 | if (AtStart) |
176 | Info.NumVALUInstsAtStart += NumFollowingVALUInsts; |
177 | NumVALUInstsAtEnd += NumFollowingVALUInsts; |
178 | |
179 | unsigned MaxNumVALUInsts = |
180 | std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd); |
181 | Info.MayReachVMEMLoad = |
182 | SuccsMayReachVMEMLoad || |
183 | (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold); |
184 | } |
185 | |
186 | MachineBasicBlock &Entry = MF.front(); |
187 | if (!MBBInfos[&Entry].MayReachVMEMLoad) |
188 | return false; |
189 | |
190 | // Raise the priority at the beginning of the shader. |
191 | MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); |
192 | while (I != E && !SIInstrInfo::isVALU(MI: *I) && !I->isTerminator()) |
193 | ++I; |
194 | BuildSetprioMI(MBB&: Entry, I, priority: HighPriority); |
195 | |
196 | // Lower the priority on edges where control leaves blocks from which |
197 | // the VMEM loads are reachable. |
198 | SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks; |
199 | for (MachineBasicBlock &MBB : MF) { |
200 | if (MBBInfos[&MBB].MayReachVMEMLoad) { |
201 | if (MBB.succ_empty()) |
202 | PriorityLoweringBlocks.insert(Ptr: &MBB); |
203 | continue; |
204 | } |
205 | |
206 | if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { |
207 | for (MachineBasicBlock *Pred : MBB.predecessors()) { |
208 | if (MBBInfos[Pred].MayReachVMEMLoad) |
209 | PriorityLoweringBlocks.insert(Ptr: Pred); |
210 | } |
211 | continue; |
212 | } |
213 | |
214 | // Where lowering the priority in predecessors is not possible, the |
215 | // block receiving control either was not part of a loop in the first |
216 | // place or the loop simplification/canonicalization pass should have |
217 | // already tried to split the edge and insert a preheader, and if for |
218 | // whatever reason it failed to do so, then this leaves us with the |
219 | // only option of lowering the priority within the loop. |
220 | PriorityLoweringBlocks.insert(Ptr: &MBB); |
221 | } |
222 | |
223 | for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { |
224 | MachineInstr *LastVMEMLoad = MBBInfos[MBB].LastVMEMLoad; |
225 | BuildSetprioMI(MBB&: *MBB, |
226 | I: LastVMEMLoad |
227 | ? std::next(x: MachineBasicBlock::iterator(LastVMEMLoad)) |
228 | : MBB->begin(), |
229 | priority: LowPriority); |
230 | } |
231 | |
232 | return true; |
233 | } |
234 | |