1//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_clause instructions to form hard clauses.
11///
12/// Clausing load instructions can give cache coherency benefits. Before gfx10,
13/// the hardware automatically detected "soft clauses", which were sequences of
14/// memory instructions of the same type. In gfx10 this detection was removed,
15/// and the s_clause instruction was introduced to explicitly mark "hard
16/// clauses".
17///
18/// It's the scheduler's job to form the clauses by putting similar memory
19/// instructions next to each other. Our job is just to insert an s_clause
20/// instruction to mark the start of each clause.
21///
22/// Note that hard clauses are very similar to, but logically distinct from, the
23/// groups of instructions that have to be restartable when XNACK is enabled.
24/// The rules are slightly different in each case. For example an s_nop
25/// instruction breaks a restartable group, but can appear in the middle of a
26/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27/// "soft clauses" or just "clauses".)
28///
29/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30/// groups, not hard clauses.
31//
32//===----------------------------------------------------------------------===//
33
34#include "AMDGPU.h"
35#include "GCNSubtarget.h"
36#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/CodeGen/MachineFunctionPass.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "si-insert-hard-clauses"
43
44namespace {
45
46enum HardClauseType {
47 // For GFX10:
48
49 // Texture, buffer, global or scratch memory instructions.
50 HARDCLAUSE_VMEM,
51 // Flat (not global or scratch) memory instructions.
52 HARDCLAUSE_FLAT,
53
54 // For GFX11:
55
56 // Texture memory instructions.
57 HARDCLAUSE_MIMG_LOAD,
58 HARDCLAUSE_MIMG_STORE,
59 HARDCLAUSE_MIMG_ATOMIC,
60 HARDCLAUSE_MIMG_SAMPLE,
61 // Buffer, global or scratch memory instructions.
62 HARDCLAUSE_VMEM_LOAD,
63 HARDCLAUSE_VMEM_STORE,
64 HARDCLAUSE_VMEM_ATOMIC,
65 // Flat (not global or scratch) memory instructions.
66 HARDCLAUSE_FLAT_LOAD,
67 HARDCLAUSE_FLAT_STORE,
68 HARDCLAUSE_FLAT_ATOMIC,
69 // BVH instructions.
70 HARDCLAUSE_BVH,
71
72 // Common:
73
74 // Instructions that access LDS.
75 HARDCLAUSE_LDS,
76 // Scalar memory instructions.
77 HARDCLAUSE_SMEM,
78 // VALU instructions.
79 HARDCLAUSE_VALU,
80 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
81
82 // Internal instructions, which are allowed in the middle of a hard clause,
83 // except for s_waitcnt.
84 HARDCLAUSE_INTERNAL,
85 // Meta instructions that do not result in any ISA like KILL.
86 HARDCLAUSE_IGNORE,
87 // Instructions that are not allowed in a hard clause: SALU, export, branch,
88 // message, GDS, s_waitcnt and anything else not mentioned above.
89 HARDCLAUSE_ILLEGAL,
90};
91
92class SIInsertHardClauses : public MachineFunctionPass {
93public:
94 static char ID;
95 const GCNSubtarget *ST = nullptr;
96
97 SIInsertHardClauses() : MachineFunctionPass(ID) {}
98
99 void getAnalysisUsage(AnalysisUsage &AU) const override {
100 AU.setPreservesCFG();
101 MachineFunctionPass::getAnalysisUsage(AU);
102 }
103
104 HardClauseType getHardClauseType(const MachineInstr &MI) {
105 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
106 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
107 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
108 if (ST->hasNSAClauseBug()) {
109 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
110 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
111 return HARDCLAUSE_ILLEGAL;
112 }
113 return HARDCLAUSE_VMEM;
114 }
115 if (SIInstrInfo::isFLAT(MI))
116 return HARDCLAUSE_FLAT;
117 } else {
118 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
119 if (SIInstrInfo::isMIMG(MI)) {
120 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
121 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
122 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
123 if (BaseInfo->BVH)
124 return HARDCLAUSE_BVH;
125 if (BaseInfo->Sampler)
126 return HARDCLAUSE_MIMG_SAMPLE;
127 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
128 : HARDCLAUSE_MIMG_LOAD
129 : HARDCLAUSE_MIMG_STORE;
130 }
131 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
132 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
133 : HARDCLAUSE_VMEM_LOAD
134 : HARDCLAUSE_VMEM_STORE;
135 }
136 if (SIInstrInfo::isFLAT(MI)) {
137 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
138 : HARDCLAUSE_FLAT_LOAD
139 : HARDCLAUSE_FLAT_STORE;
140 }
141 }
142 // TODO: LDS
143 if (SIInstrInfo::isSMRD(MI))
144 return HARDCLAUSE_SMEM;
145 }
146
147 // Don't form VALU clauses. It's not clear what benefit they give, if any.
148
149 // In practice s_nop is the only internal instruction we're likely to see.
150 // It's safe to treat the rest as illegal.
151 if (MI.getOpcode() == AMDGPU::S_NOP)
152 return HARDCLAUSE_INTERNAL;
153 if (MI.isMetaInstruction())
154 return HARDCLAUSE_IGNORE;
155 return HARDCLAUSE_ILLEGAL;
156 }
157
158 // Track information about a clause as we discover it.
159 struct ClauseInfo {
160 // The type of all (non-internal) instructions in the clause.
161 HardClauseType Type = HARDCLAUSE_ILLEGAL;
162 // The first (necessarily non-internal) instruction in the clause.
163 MachineInstr *First = nullptr;
164 // The last non-internal instruction in the clause.
165 MachineInstr *Last = nullptr;
166 // The length of the clause including any internal instructions in the
167 // middle (but not at the end) of the clause.
168 unsigned Length = 0;
169 // Internal instructions at the and of a clause should not be included in
170 // the clause. Count them in TrailingInternalLength until a new memory
171 // instruction is added.
172 unsigned TrailingInternalLength = 0;
173 // The base operands of *Last.
174 SmallVector<const MachineOperand *, 4> BaseOps;
175 };
176
177 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
178 if (CI.First == CI.Last)
179 return false;
180 assert(CI.Length <= ST->maxHardClauseLength() &&
181 "Hard clause is too long!");
182
183 auto &MBB = *CI.First->getParent();
184 auto ClauseMI =
185 BuildMI(BB&: MBB, I&: *CI.First, MIMD: DebugLoc(), MCID: SII->get(Opcode: AMDGPU::S_CLAUSE))
186 .addImm(Val: CI.Length - 1);
187 finalizeBundle(MBB, FirstMI: ClauseMI->getIterator(),
188 LastMI: std::next(x: CI.Last->getIterator()));
189 return true;
190 }
191
192 bool runOnMachineFunction(MachineFunction &MF) override {
193 if (skipFunction(F: MF.getFunction()))
194 return false;
195
196 ST = &MF.getSubtarget<GCNSubtarget>();
197 if (!ST->hasHardClauses())
198 return false;
199
200 const SIInstrInfo *SII = ST->getInstrInfo();
201 const TargetRegisterInfo *TRI = ST->getRegisterInfo();
202
203 bool Changed = false;
204 for (auto &MBB : MF) {
205 ClauseInfo CI;
206 for (auto &MI : MBB) {
207 HardClauseType Type = getHardClauseType(MI);
208
209 int64_t Dummy1;
210 bool Dummy2;
211 LocationSize Dummy3 = 0;
212 SmallVector<const MachineOperand *, 4> BaseOps;
213 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
214 if (!SII->getMemOperandsWithOffsetWidth(LdSt: MI, BaseOps, Offset&: Dummy1, OffsetIsScalable&: Dummy2,
215 Width&: Dummy3, TRI)) {
216 // We failed to get the base operands, so we'll never clause this
217 // instruction with any other, so pretend it's illegal.
218 Type = HARDCLAUSE_ILLEGAL;
219 }
220 }
221
222 if (CI.Length == ST->maxHardClauseLength() ||
223 (CI.Length && Type != HARDCLAUSE_INTERNAL &&
224 Type != HARDCLAUSE_IGNORE &&
225 (Type != CI.Type ||
226 // Note that we lie to shouldClusterMemOps about the size of the
227 // cluster. When shouldClusterMemOps is called from the machine
228 // scheduler it limits the size of the cluster to avoid increasing
229 // register pressure too much, but this pass runs after register
230 // allocation so there is no need for that kind of limit.
231 // We also lie about the Offset and OffsetIsScalable parameters,
232 // as they aren't used in the SIInstrInfo implementation.
233 !SII->shouldClusterMemOps(BaseOps1: CI.BaseOps, Offset1: 0, OffsetIsScalable1: false, BaseOps2: BaseOps, Offset2: 0, OffsetIsScalable2: false,
234 ClusterSize: 2, NumBytes: 2)))) {
235 // Finish the current clause.
236 Changed |= emitClause(CI, SII);
237 CI = ClauseInfo();
238 }
239
240 if (CI.Length) {
241 // Extend the current clause.
242 if (Type != HARDCLAUSE_IGNORE) {
243 if (Type == HARDCLAUSE_INTERNAL) {
244 ++CI.TrailingInternalLength;
245 } else {
246 ++CI.Length;
247 CI.Length += CI.TrailingInternalLength;
248 CI.TrailingInternalLength = 0;
249 CI.Last = &MI;
250 CI.BaseOps = std::move(BaseOps);
251 }
252 }
253 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
254 // Start a new clause.
255 CI = ClauseInfo{.Type: Type, .First: &MI, .Last: &MI, .Length: 1, .TrailingInternalLength: 0, .BaseOps: std::move(BaseOps)};
256 }
257 }
258
259 // Finish the last clause in the basic block if any.
260 if (CI.Length)
261 Changed |= emitClause(CI, SII);
262 }
263
264 return Changed;
265 }
266};
267
268} // namespace
269
270char SIInsertHardClauses::ID = 0;
271
272char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
273
274INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
275 false, false)
276