1//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_clause instructions to form hard clauses.
11///
12/// Clausing load instructions can give cache coherency benefits. Before gfx10,
13/// the hardware automatically detected "soft clauses", which were sequences of
14/// memory instructions of the same type. In gfx10 this detection was removed,
15/// and the s_clause instruction was introduced to explicitly mark "hard
16/// clauses".
17///
18/// It's the scheduler's job to form the clauses by putting similar memory
19/// instructions next to each other. Our job is just to insert an s_clause
20/// instruction to mark the start of each clause.
21///
22/// Note that hard clauses are very similar to, but logically distinct from, the
23/// groups of instructions that have to be restartable when XNACK is enabled.
24/// The rules are slightly different in each case. For example an s_nop
25/// instruction breaks a restartable group, but can appear in the middle of a
26/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27/// "soft clauses" or just "clauses".)
28///
29/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30/// groups, not hard clauses.
31//
32//===----------------------------------------------------------------------===//
33
34#include "AMDGPU.h"
35#include "GCNSubtarget.h"
36#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/CodeGen/MachineFunctionPass.h"
39#include "llvm/CodeGen/MachinePassManager.h"
40
41using namespace llvm;
42
43#define DEBUG_TYPE "si-insert-hard-clauses"
44
45static cl::opt<unsigned>
46 HardClauseLengthLimit("amdgpu-hard-clause-length-limit",
47 cl::desc("Maximum number of memory instructions to "
48 "place in the same hard clause"),
49 cl::Hidden);
50
51namespace {
52
53enum HardClauseType {
54 // For GFX10:
55
56 // Texture, buffer, global or scratch memory instructions.
57 HARDCLAUSE_VMEM,
58 // Flat (not global or scratch) memory instructions.
59 HARDCLAUSE_FLAT,
60
61 // For GFX11:
62
63 // Texture memory instructions.
64 HARDCLAUSE_MIMG_LOAD,
65 HARDCLAUSE_MIMG_STORE,
66 HARDCLAUSE_MIMG_ATOMIC,
67 HARDCLAUSE_MIMG_SAMPLE,
68 // Buffer, global or scratch memory instructions.
69 HARDCLAUSE_VMEM_LOAD,
70 HARDCLAUSE_VMEM_STORE,
71 HARDCLAUSE_VMEM_ATOMIC,
72 // Flat (not global or scratch) memory instructions.
73 HARDCLAUSE_FLAT_LOAD,
74 HARDCLAUSE_FLAT_STORE,
75 HARDCLAUSE_FLAT_ATOMIC,
76 // BVH instructions.
77 HARDCLAUSE_BVH,
78
79 // Common:
80
81 // Instructions that access LDS.
82 HARDCLAUSE_LDS,
83 // Scalar memory instructions.
84 HARDCLAUSE_SMEM,
85 // VALU instructions.
86 HARDCLAUSE_VALU,
87 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
88
89 // Internal instructions, which are allowed in the middle of a hard clause,
90 // except for s_waitcnt.
91 HARDCLAUSE_INTERNAL,
92 // Meta instructions that do not result in any ISA like KILL.
93 HARDCLAUSE_IGNORE,
94 // Instructions that are not allowed in a hard clause: SALU, export, branch,
95 // message, GDS, s_waitcnt and anything else not mentioned above.
96 HARDCLAUSE_ILLEGAL,
97};
98
99class SIInsertHardClauses {
100public:
101 const GCNSubtarget *ST = nullptr;
102
103 HardClauseType getHardClauseType(const MachineInstr &MI) {
104 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
105 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
106 if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
107 SIInstrInfo::isSegmentSpecificFLAT(MI)) {
108 if (ST->hasNSAClauseBug()) {
109 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
110 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
111 return HARDCLAUSE_ILLEGAL;
112 }
113 return HARDCLAUSE_VMEM;
114 }
115 if (SIInstrInfo::isFLAT(MI))
116 return HARDCLAUSE_FLAT;
117 } else {
118 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
119 if (SIInstrInfo::isMIMG(MI)) {
120 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
121 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
122 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
123 if (BaseInfo->BVH)
124 return HARDCLAUSE_BVH;
125 if (BaseInfo->Sampler || BaseInfo->MSAA)
126 return HARDCLAUSE_MIMG_SAMPLE;
127 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
128 : HARDCLAUSE_MIMG_LOAD
129 : HARDCLAUSE_MIMG_STORE;
130 }
131 if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
132 SIInstrInfo::isSegmentSpecificFLAT(MI)) {
133 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
134 : HARDCLAUSE_VMEM_LOAD
135 : HARDCLAUSE_VMEM_STORE;
136 }
137 if (SIInstrInfo::isFLAT(MI)) {
138 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
139 : HARDCLAUSE_FLAT_LOAD
140 : HARDCLAUSE_FLAT_STORE;
141 }
142 }
143 // TODO: LDS
144 if (SIInstrInfo::isSMRD(MI))
145 return HARDCLAUSE_SMEM;
146 }
147
148 // Don't form VALU clauses. It's not clear what benefit they give, if any.
149
150 // In practice s_nop is the only internal instruction we're likely to see.
151 // It's safe to treat the rest as illegal.
152 if (MI.getOpcode() == AMDGPU::S_NOP)
153 return HARDCLAUSE_INTERNAL;
154 if (MI.isMetaInstruction())
155 return HARDCLAUSE_IGNORE;
156 return HARDCLAUSE_ILLEGAL;
157 }
158
159 // Track information about a clause as we discover it.
160 struct ClauseInfo {
161 // The type of all (non-internal) instructions in the clause.
162 HardClauseType Type = HARDCLAUSE_ILLEGAL;
163 // The first (necessarily non-internal) instruction in the clause.
164 MachineInstr *First = nullptr;
165 // The last non-internal instruction in the clause.
166 MachineInstr *Last = nullptr;
167 // The length of the clause including any internal instructions in the
168 // middle (but not at the end) of the clause.
169 unsigned Length = 0;
170 // Internal instructions at the and of a clause should not be included in
171 // the clause. Count them in TrailingInternalLength until a new memory
172 // instruction is added.
173 unsigned TrailingInternalLength = 0;
174 // The base operands of *Last.
175 SmallVector<const MachineOperand *, 4> BaseOps;
176 };
177
178 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
179 if (CI.First == CI.Last)
180 return false;
181 assert(CI.Length <= ST->maxHardClauseLength() &&
182 "Hard clause is too long!");
183
184 auto &MBB = *CI.First->getParent();
185 auto ClauseMI =
186 BuildMI(BB&: MBB, I&: *CI.First, MIMD: DebugLoc(), MCID: SII->get(Opcode: AMDGPU::S_CLAUSE))
187 .addImm(Val: CI.Length - 1);
188 finalizeBundle(MBB, FirstMI: ClauseMI->getIterator(),
189 LastMI: std::next(x: CI.Last->getIterator()));
190 return true;
191 }
192
193 bool run(MachineFunction &MF) {
194 ST = &MF.getSubtarget<GCNSubtarget>();
195 if (!ST->hasHardClauses())
196 return false;
197
198 unsigned MaxClauseLength = MF.getFunction().getFnAttributeAsParsedInteger(
199 Kind: "amdgpu-hard-clause-length-limit", Default: 255);
200 if (HardClauseLengthLimit.getNumOccurrences())
201 MaxClauseLength = HardClauseLengthLimit;
202 MaxClauseLength = std::min(a: MaxClauseLength, b: ST->maxHardClauseLength());
203 if (MaxClauseLength <= 1)
204 return false;
205
206 const SIInstrInfo *SII = ST->getInstrInfo();
207 const TargetRegisterInfo *TRI = ST->getRegisterInfo();
208
209 bool Changed = false;
210 for (auto &MBB : MF) {
211 ClauseInfo CI;
212 for (auto &MI : MBB) {
213 HardClauseType Type = getHardClauseType(MI);
214
215 int64_t Dummy1;
216 bool Dummy2;
217 LocationSize Dummy3 = LocationSize::precise(Value: 0);
218 SmallVector<const MachineOperand *, 4> BaseOps;
219 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
220 if (!SII->getMemOperandsWithOffsetWidth(LdSt: MI, BaseOps, Offset&: Dummy1, OffsetIsScalable&: Dummy2,
221 Width&: Dummy3, TRI)) {
222 // We failed to get the base operands, so we'll never clause this
223 // instruction with any other, so pretend it's illegal.
224 Type = HARDCLAUSE_ILLEGAL;
225 }
226 }
227
228 if (CI.Length == MaxClauseLength ||
229 (CI.Length && Type != HARDCLAUSE_INTERNAL &&
230 Type != HARDCLAUSE_IGNORE &&
231 (Type != CI.Type ||
232 // Note that we lie to shouldClusterMemOps about the size of the
233 // cluster. When shouldClusterMemOps is called from the machine
234 // scheduler it limits the size of the cluster to avoid increasing
235 // register pressure too much, but this pass runs after register
236 // allocation so there is no need for that kind of limit.
237 // We also lie about the Offset and OffsetIsScalable parameters,
238 // as they aren't used in the SIInstrInfo implementation.
239 !SII->shouldClusterMemOps(BaseOps1: CI.BaseOps, Offset1: 0, OffsetIsScalable1: false, BaseOps2: BaseOps, Offset2: 0, OffsetIsScalable2: false,
240 ClusterSize: 2, NumBytes: 2)))) {
241 // Finish the current clause.
242 Changed |= emitClause(CI, SII);
243 CI = ClauseInfo();
244 }
245
246 if (CI.Length) {
247 // Extend the current clause.
248 if (Type != HARDCLAUSE_IGNORE) {
249 if (Type == HARDCLAUSE_INTERNAL) {
250 ++CI.TrailingInternalLength;
251 } else {
252 ++CI.Length;
253 CI.Length += CI.TrailingInternalLength;
254 CI.TrailingInternalLength = 0;
255 CI.Last = &MI;
256 CI.BaseOps = std::move(BaseOps);
257 }
258 }
259 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
260 // Start a new clause.
261 CI = ClauseInfo{.Type: Type, .First: &MI, .Last: &MI, .Length: 1, .TrailingInternalLength: 0, .BaseOps: std::move(BaseOps)};
262 }
263 }
264
265 // Finish the last clause in the basic block if any.
266 if (CI.Length)
267 Changed |= emitClause(CI, SII);
268 }
269
270 return Changed;
271 }
272};
273
274class SIInsertHardClausesLegacy : public MachineFunctionPass {
275public:
276 static char ID;
277 SIInsertHardClausesLegacy() : MachineFunctionPass(ID) {}
278
279 bool runOnMachineFunction(MachineFunction &MF) override {
280 if (skipFunction(F: MF.getFunction()))
281 return false;
282
283 return SIInsertHardClauses().run(MF);
284 }
285
286 void getAnalysisUsage(AnalysisUsage &AU) const override {
287 AU.setPreservesCFG();
288 MachineFunctionPass::getAnalysisUsage(AU);
289 }
290};
291
292} // namespace
293
294PreservedAnalyses
295llvm::SIInsertHardClausesPass::run(MachineFunction &MF,
296 MachineFunctionAnalysisManager &MFAM) {
297 if (!SIInsertHardClauses().run(MF))
298 return PreservedAnalyses::all();
299
300 auto PA = getMachineFunctionPassPreservedAnalyses();
301 PA.preserveSet<CFGAnalyses>();
302 return PA;
303}
304
305char SIInsertHardClausesLegacy::ID = 0;
306
307char &llvm::SIInsertHardClausesID = SIInsertHardClausesLegacy::ID;
308
309INITIALIZE_PASS(SIInsertHardClausesLegacy, DEBUG_TYPE, "SI Insert Hard Clauses",
310 false, false)
311