1 | //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Insert s_clause instructions to form hard clauses. |
11 | /// |
12 | /// Clausing load instructions can give cache coherency benefits. Before gfx10, |
13 | /// the hardware automatically detected "soft clauses", which were sequences of |
14 | /// memory instructions of the same type. In gfx10 this detection was removed, |
15 | /// and the s_clause instruction was introduced to explicitly mark "hard |
16 | /// clauses". |
17 | /// |
18 | /// It's the scheduler's job to form the clauses by putting similar memory |
19 | /// instructions next to each other. Our job is just to insert an s_clause |
20 | /// instruction to mark the start of each clause. |
21 | /// |
22 | /// Note that hard clauses are very similar to, but logically distinct from, the |
23 | /// groups of instructions that have to be restartable when XNACK is enabled. |
24 | /// The rules are slightly different in each case. For example an s_nop |
25 | /// instruction breaks a restartable group, but can appear in the middle of a |
26 | /// hard clause. (Before gfx10 there wasn't a distinction, and both were called |
27 | /// "soft clauses" or just "clauses".) |
28 | /// |
29 | /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable |
30 | /// groups, not hard clauses. |
31 | // |
32 | //===----------------------------------------------------------------------===// |
33 | |
34 | #include "AMDGPU.h" |
35 | #include "GCNSubtarget.h" |
36 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
37 | #include "llvm/ADT/SmallVector.h" |
38 | #include "llvm/CodeGen/MachineFunctionPass.h" |
39 | |
40 | using namespace llvm; |
41 | |
42 | #define DEBUG_TYPE "si-insert-hard-clauses" |
43 | |
44 | namespace { |
45 | |
46 | enum HardClauseType { |
47 | // For GFX10: |
48 | |
49 | // Texture, buffer, global or scratch memory instructions. |
50 | HARDCLAUSE_VMEM, |
51 | // Flat (not global or scratch) memory instructions. |
52 | HARDCLAUSE_FLAT, |
53 | |
54 | // For GFX11: |
55 | |
56 | // Texture memory instructions. |
57 | HARDCLAUSE_MIMG_LOAD, |
58 | HARDCLAUSE_MIMG_STORE, |
59 | HARDCLAUSE_MIMG_ATOMIC, |
60 | HARDCLAUSE_MIMG_SAMPLE, |
61 | // Buffer, global or scratch memory instructions. |
62 | HARDCLAUSE_VMEM_LOAD, |
63 | HARDCLAUSE_VMEM_STORE, |
64 | HARDCLAUSE_VMEM_ATOMIC, |
65 | // Flat (not global or scratch) memory instructions. |
66 | HARDCLAUSE_FLAT_LOAD, |
67 | HARDCLAUSE_FLAT_STORE, |
68 | HARDCLAUSE_FLAT_ATOMIC, |
69 | // BVH instructions. |
70 | HARDCLAUSE_BVH, |
71 | |
72 | // Common: |
73 | |
74 | // Instructions that access LDS. |
75 | HARDCLAUSE_LDS, |
76 | // Scalar memory instructions. |
77 | HARDCLAUSE_SMEM, |
78 | // VALU instructions. |
79 | HARDCLAUSE_VALU, |
80 | LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, |
81 | |
82 | // Internal instructions, which are allowed in the middle of a hard clause, |
83 | // except for s_waitcnt. |
84 | HARDCLAUSE_INTERNAL, |
85 | // Meta instructions that do not result in any ISA like KILL. |
86 | HARDCLAUSE_IGNORE, |
87 | // Instructions that are not allowed in a hard clause: SALU, export, branch, |
88 | // message, GDS, s_waitcnt and anything else not mentioned above. |
89 | HARDCLAUSE_ILLEGAL, |
90 | }; |
91 | |
92 | class SIInsertHardClauses : public MachineFunctionPass { |
93 | public: |
94 | static char ID; |
95 | const GCNSubtarget *ST = nullptr; |
96 | |
97 | SIInsertHardClauses() : MachineFunctionPass(ID) {} |
98 | |
99 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
100 | AU.setPreservesCFG(); |
101 | MachineFunctionPass::getAnalysisUsage(AU); |
102 | } |
103 | |
104 | HardClauseType getHardClauseType(const MachineInstr &MI) { |
105 | if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { |
106 | if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { |
107 | if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { |
108 | if (ST->hasNSAClauseBug()) { |
109 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode()); |
110 | if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) |
111 | return HARDCLAUSE_ILLEGAL; |
112 | } |
113 | return HARDCLAUSE_VMEM; |
114 | } |
115 | if (SIInstrInfo::isFLAT(MI)) |
116 | return HARDCLAUSE_FLAT; |
117 | } else { |
118 | assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); |
119 | if (SIInstrInfo::isMIMG(MI)) { |
120 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode()); |
121 | const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = |
122 | AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode); |
123 | if (BaseInfo->BVH) |
124 | return HARDCLAUSE_BVH; |
125 | if (BaseInfo->Sampler) |
126 | return HARDCLAUSE_MIMG_SAMPLE; |
127 | return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC |
128 | : HARDCLAUSE_MIMG_LOAD |
129 | : HARDCLAUSE_MIMG_STORE; |
130 | } |
131 | if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { |
132 | return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC |
133 | : HARDCLAUSE_VMEM_LOAD |
134 | : HARDCLAUSE_VMEM_STORE; |
135 | } |
136 | if (SIInstrInfo::isFLAT(MI)) { |
137 | return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC |
138 | : HARDCLAUSE_FLAT_LOAD |
139 | : HARDCLAUSE_FLAT_STORE; |
140 | } |
141 | } |
142 | // TODO: LDS |
143 | if (SIInstrInfo::isSMRD(MI)) |
144 | return HARDCLAUSE_SMEM; |
145 | } |
146 | |
147 | // Don't form VALU clauses. It's not clear what benefit they give, if any. |
148 | |
149 | // In practice s_nop is the only internal instruction we're likely to see. |
150 | // It's safe to treat the rest as illegal. |
151 | if (MI.getOpcode() == AMDGPU::S_NOP) |
152 | return HARDCLAUSE_INTERNAL; |
153 | if (MI.isMetaInstruction()) |
154 | return HARDCLAUSE_IGNORE; |
155 | return HARDCLAUSE_ILLEGAL; |
156 | } |
157 | |
158 | // Track information about a clause as we discover it. |
159 | struct ClauseInfo { |
160 | // The type of all (non-internal) instructions in the clause. |
161 | HardClauseType Type = HARDCLAUSE_ILLEGAL; |
162 | // The first (necessarily non-internal) instruction in the clause. |
163 | MachineInstr *First = nullptr; |
164 | // The last non-internal instruction in the clause. |
165 | MachineInstr *Last = nullptr; |
166 | // The length of the clause including any internal instructions in the |
167 | // middle (but not at the end) of the clause. |
168 | unsigned Length = 0; |
169 | // Internal instructions at the and of a clause should not be included in |
170 | // the clause. Count them in TrailingInternalLength until a new memory |
171 | // instruction is added. |
172 | unsigned TrailingInternalLength = 0; |
173 | // The base operands of *Last. |
174 | SmallVector<const MachineOperand *, 4> BaseOps; |
175 | }; |
176 | |
177 | bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { |
178 | if (CI.First == CI.Last) |
179 | return false; |
180 | assert(CI.Length <= ST->maxHardClauseLength() && |
181 | "Hard clause is too long!" ); |
182 | |
183 | auto &MBB = *CI.First->getParent(); |
184 | auto ClauseMI = |
185 | BuildMI(BB&: MBB, I&: *CI.First, MIMD: DebugLoc(), MCID: SII->get(Opcode: AMDGPU::S_CLAUSE)) |
186 | .addImm(Val: CI.Length - 1); |
187 | finalizeBundle(MBB, FirstMI: ClauseMI->getIterator(), |
188 | LastMI: std::next(x: CI.Last->getIterator())); |
189 | return true; |
190 | } |
191 | |
192 | bool runOnMachineFunction(MachineFunction &MF) override { |
193 | if (skipFunction(F: MF.getFunction())) |
194 | return false; |
195 | |
196 | ST = &MF.getSubtarget<GCNSubtarget>(); |
197 | if (!ST->hasHardClauses()) |
198 | return false; |
199 | |
200 | const SIInstrInfo *SII = ST->getInstrInfo(); |
201 | const TargetRegisterInfo *TRI = ST->getRegisterInfo(); |
202 | |
203 | bool Changed = false; |
204 | for (auto &MBB : MF) { |
205 | ClauseInfo CI; |
206 | for (auto &MI : MBB) { |
207 | HardClauseType Type = getHardClauseType(MI); |
208 | |
209 | int64_t Dummy1; |
210 | bool Dummy2; |
211 | LocationSize Dummy3 = 0; |
212 | SmallVector<const MachineOperand *, 4> BaseOps; |
213 | if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { |
214 | if (!SII->getMemOperandsWithOffsetWidth(LdSt: MI, BaseOps, Offset&: Dummy1, OffsetIsScalable&: Dummy2, |
215 | Width&: Dummy3, TRI)) { |
216 | // We failed to get the base operands, so we'll never clause this |
217 | // instruction with any other, so pretend it's illegal. |
218 | Type = HARDCLAUSE_ILLEGAL; |
219 | } |
220 | } |
221 | |
222 | if (CI.Length == ST->maxHardClauseLength() || |
223 | (CI.Length && Type != HARDCLAUSE_INTERNAL && |
224 | Type != HARDCLAUSE_IGNORE && |
225 | (Type != CI.Type || |
226 | // Note that we lie to shouldClusterMemOps about the size of the |
227 | // cluster. When shouldClusterMemOps is called from the machine |
228 | // scheduler it limits the size of the cluster to avoid increasing |
229 | // register pressure too much, but this pass runs after register |
230 | // allocation so there is no need for that kind of limit. |
231 | // We also lie about the Offset and OffsetIsScalable parameters, |
232 | // as they aren't used in the SIInstrInfo implementation. |
233 | !SII->shouldClusterMemOps(BaseOps1: CI.BaseOps, Offset1: 0, OffsetIsScalable1: false, BaseOps2: BaseOps, Offset2: 0, OffsetIsScalable2: false, |
234 | ClusterSize: 2, NumBytes: 2)))) { |
235 | // Finish the current clause. |
236 | Changed |= emitClause(CI, SII); |
237 | CI = ClauseInfo(); |
238 | } |
239 | |
240 | if (CI.Length) { |
241 | // Extend the current clause. |
242 | if (Type != HARDCLAUSE_IGNORE) { |
243 | if (Type == HARDCLAUSE_INTERNAL) { |
244 | ++CI.TrailingInternalLength; |
245 | } else { |
246 | ++CI.Length; |
247 | CI.Length += CI.TrailingInternalLength; |
248 | CI.TrailingInternalLength = 0; |
249 | CI.Last = &MI; |
250 | CI.BaseOps = std::move(BaseOps); |
251 | } |
252 | } |
253 | } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { |
254 | // Start a new clause. |
255 | CI = ClauseInfo{.Type: Type, .First: &MI, .Last: &MI, .Length: 1, .TrailingInternalLength: 0, .BaseOps: std::move(BaseOps)}; |
256 | } |
257 | } |
258 | |
259 | // Finish the last clause in the basic block if any. |
260 | if (CI.Length) |
261 | Changed |= emitClause(CI, SII); |
262 | } |
263 | |
264 | return Changed; |
265 | } |
266 | }; |
267 | |
268 | } // namespace |
269 | |
270 | char SIInsertHardClauses::ID = 0; |
271 | |
272 | char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; |
273 | |
274 | INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses" , |
275 | false, false) |
276 | |