1//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to temporarily raise the wave priority beginning the start of
11/// the shader function until its last VMEM instructions to allow younger
12/// waves to issue their VMEM instructions as well.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIInstrInfo.h"
20#include "llvm/ADT/PostOrderIterator.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/InitializePasses.h"
23#include "llvm/Support/Allocator.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "amdgpu-set-wave-priority"
28
29static cl::opt<unsigned> DefaultVALUInstsThreshold(
30 "amdgpu-set-wave-priority-valu-insts-threshold",
31 cl::desc("VALU instruction count threshold for adjusting wave priority"),
32 cl::init(Val: 100), cl::Hidden);
33
34namespace {
35
36struct MBBInfo {
37 MBBInfo() = default;
38 unsigned NumVALUInstsAtStart = 0;
39 bool MayReachVMEMLoad = false;
40 MachineInstr *LastVMEMLoad = nullptr;
41};
42
43using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
44
45class AMDGPUSetWavePriority : public MachineFunctionPass {
46public:
47 static char ID;
48
49 AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
50
51 StringRef getPassName() const override { return "Set wave priority"; }
52
53 bool runOnMachineFunction(MachineFunction &MF) override;
54
55private:
56 MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
57 MachineBasicBlock::iterator I,
58 unsigned priority) const;
59
60 const SIInstrInfo *TII;
61};
62
63} // End anonymous namespace.
64
65INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
66 false)
67
68char AMDGPUSetWavePriority::ID = 0;
69
70FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
71 return new AMDGPUSetWavePriority();
72}
73
74MachineInstr *
75AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
76 MachineBasicBlock::iterator I,
77 unsigned priority) const {
78 return BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: TII->get(Opcode: AMDGPU::S_SETPRIO))
79 .addImm(Val: priority);
80}
81
82// Checks that for every predecessor Pred that can reach a VMEM load,
83// none of Pred's successors can reach a VMEM load.
84static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
85 MBBInfoSet &MBBInfos) {
86 for (const MachineBasicBlock *Pred : MBB.predecessors()) {
87 if (!MBBInfos[Pred].MayReachVMEMLoad)
88 continue;
89 for (const MachineBasicBlock *Succ : Pred->successors()) {
90 if (MBBInfos[Succ].MayReachVMEMLoad)
91 return false;
92 }
93 }
94 return true;
95}
96
97static bool isVMEMLoad(const MachineInstr &MI) {
98 return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
99}
100
101bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
102 const unsigned HighPriority = 3;
103 const unsigned LowPriority = 0;
104
105 Function &F = MF.getFunction();
106 if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(CC: F.getCallingConv()))
107 return false;
108
109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110 TII = ST.getInstrInfo();
111
112 unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
113 Attribute A = F.getFnAttribute(Kind: "amdgpu-wave-priority-threshold");
114 if (A.isValid())
115 A.getValueAsString().getAsInteger(Radix: 0, Result&: VALUInstsThreshold);
116
117 // Find VMEM loads that may be executed before long-enough sequences of
118 // VALU instructions. We currently assume that backedges/loops, branch
119 // probabilities and other details can be ignored, so we essentially
120 // determine the largest number of VALU instructions along every
121 // possible path from the start of the function that may potentially be
122 // executed provided no backedge is ever taken.
123 MBBInfoSet MBBInfos;
124 for (MachineBasicBlock *MBB : post_order(G: &MF)) {
125 bool AtStart = true;
126 unsigned MaxNumVALUInstsInMiddle = 0;
127 unsigned NumVALUInstsAtEnd = 0;
128 for (MachineInstr &MI : *MBB) {
129 if (isVMEMLoad(MI)) {
130 AtStart = false;
131 MBBInfo &Info = MBBInfos[MBB];
132 Info.NumVALUInstsAtStart = 0;
133 MaxNumVALUInstsInMiddle = 0;
134 NumVALUInstsAtEnd = 0;
135 Info.LastVMEMLoad = &MI;
136 } else if (SIInstrInfo::isDS(MI)) {
137 AtStart = false;
138 MaxNumVALUInstsInMiddle =
139 std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd);
140 NumVALUInstsAtEnd = 0;
141 } else if (SIInstrInfo::isVALU(MI)) {
142 if (AtStart)
143 ++MBBInfos[MBB].NumVALUInstsAtStart;
144 ++NumVALUInstsAtEnd;
145 }
146 }
147
148 bool SuccsMayReachVMEMLoad = false;
149 unsigned NumFollowingVALUInsts = 0;
150 for (const MachineBasicBlock *Succ : MBB->successors()) {
151 SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
152 NumFollowingVALUInsts =
153 std::max(a: NumFollowingVALUInsts, b: MBBInfos[Succ].NumVALUInstsAtStart);
154 }
155 MBBInfo &Info = MBBInfos[MBB];
156 if (AtStart)
157 Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
158 NumVALUInstsAtEnd += NumFollowingVALUInsts;
159
160 unsigned MaxNumVALUInsts =
161 std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd);
162 Info.MayReachVMEMLoad =
163 SuccsMayReachVMEMLoad ||
164 (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
165 }
166
167 MachineBasicBlock &Entry = MF.front();
168 if (!MBBInfos[&Entry].MayReachVMEMLoad)
169 return false;
170
171 // Raise the priority at the beginning of the shader.
172 MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
173 while (I != E && !SIInstrInfo::isVALU(MI: *I) && !I->isTerminator())
174 ++I;
175 BuildSetprioMI(MBB&: Entry, I, priority: HighPriority);
176
177 // Lower the priority on edges where control leaves blocks from which
178 // the VMEM loads are reachable.
179 SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
180 for (MachineBasicBlock &MBB : MF) {
181 if (MBBInfos[&MBB].MayReachVMEMLoad) {
182 if (MBB.succ_empty())
183 PriorityLoweringBlocks.insert(Ptr: &MBB);
184 continue;
185 }
186
187 if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
188 for (MachineBasicBlock *Pred : MBB.predecessors()) {
189 if (MBBInfos[Pred].MayReachVMEMLoad)
190 PriorityLoweringBlocks.insert(Ptr: Pred);
191 }
192 continue;
193 }
194
195 // Where lowering the priority in predecessors is not possible, the
196 // block receiving control either was not part of a loop in the first
197 // place or the loop simplification/canonicalization pass should have
198 // already tried to split the edge and insert a preheader, and if for
199 // whatever reason it failed to do so, then this leaves us with the
200 // only option of lowering the priority within the loop.
201 PriorityLoweringBlocks.insert(Ptr: &MBB);
202 }
203
204 for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
205 BuildSetprioMI(
206 MBB&: *MBB,
207 I: MBBInfos[MBB].LastVMEMLoad
208 ? std::next(x: MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
209 : MBB->begin(),
210 priority: LowPriority);
211 }
212
213 return true;
214}
215