1//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11/// 128 Alu instructions ; these instructions can access up to 4 prefetched
12/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13/// initiated by CF_ALU instructions.
14//===----------------------------------------------------------------------===//
15
16#include "MCTargetDesc/R600MCTargetDesc.h"
17#include "R600.h"
18#include "R600Defines.h"
19#include "R600Subtarget.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21
22using namespace llvm;
23
24namespace {
25
26class R600EmitClauseMarkers : public MachineFunctionPass {
27private:
28 const R600InstrInfo *TII = nullptr;
29 int Address = 0;
30
31 unsigned OccupiedDwords(MachineInstr &MI) const {
32 switch (MI.getOpcode()) {
33 case R600::INTERP_PAIR_XY:
34 case R600::INTERP_PAIR_ZW:
35 case R600::INTERP_VEC_LOAD:
36 case R600::DOT_4:
37 return 4;
38 case R600::KILL:
39 return 0;
40 default:
41 break;
42 }
43
44 // These will be expanded to two ALU instructions in the
45 // ExpandSpecialInstructions pass.
46 if (TII->isLDSRetInstr(Opcode: MI.getOpcode()))
47 return 2;
48
49 if (TII->isVector(MI) || TII->isCubeOp(opcode: MI.getOpcode()) ||
50 TII->isReductionOp(opcode: MI.getOpcode()))
51 return 4;
52
53 unsigned NumLiteral = 0;
54 for (MachineInstr::mop_iterator It = MI.operands_begin(),
55 E = MI.operands_end();
56 It != E; ++It) {
57 MachineOperand &MO = *It;
58 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
59 ++NumLiteral;
60 }
61 return 1 + NumLiteral;
62 }
63
64 bool isALU(const MachineInstr &MI) const {
65 if (TII->isALUInstr(Opcode: MI.getOpcode()))
66 return true;
67 if (TII->isVector(MI) || TII->isCubeOp(opcode: MI.getOpcode()))
68 return true;
69 switch (MI.getOpcode()) {
70 case R600::PRED_X:
71 case R600::INTERP_PAIR_XY:
72 case R600::INTERP_PAIR_ZW:
73 case R600::INTERP_VEC_LOAD:
74 case R600::COPY:
75 case R600::DOT_4:
76 return true;
77 default:
78 return false;
79 }
80 }
81
82 bool IsTrivialInst(MachineInstr &MI) const {
83 switch (MI.getOpcode()) {
84 case R600::KILL:
85 case R600::RETURN:
86 case R600::IMPLICIT_DEF:
87 return true;
88 default:
89 return false;
90 }
91 }
92
93 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
94 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
95 // (See also R600ISelLowering.cpp)
96 // ConstIndex value is in [0, 4095];
97 return std::pair<unsigned, unsigned>(
98 ((Sel >> 2) - 512) >> 12, // KC_BANK
99 // Line Number of ConstIndex
100 // A line contains 16 constant registers however KCX bank can lock
101 // two line at the same time ; thus we want to get an even line number.
102 // Line number can be retrieved with (>>4), using (>>5) <<1 generates
103 // an even number.
104 ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
105 }
106
107 bool
108 SubstituteKCacheBank(MachineInstr &MI,
109 std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
110 bool UpdateInstr = true) const {
111 std::vector<std::pair<unsigned, unsigned>> UsedKCache;
112
113 if (!TII->isALUInstr(Opcode: MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
114 return true;
115
116 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
117 TII->getSrcs(MI);
118 assert(
119 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
120 "Can't assign Const");
121 for (auto &[Op, Sel] : Consts) {
122 if (Op->getReg() != R600::ALU_CONST)
123 continue;
124 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
125 unsigned KCacheIndex = Index * 4 + Chan;
126 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
127 if (CachedConsts.empty()) {
128 CachedConsts.push_back(x: BankLine);
129 UsedKCache.emplace_back(args: 0, args&: KCacheIndex);
130 continue;
131 }
132 if (CachedConsts[0] == BankLine) {
133 UsedKCache.emplace_back(args: 0, args&: KCacheIndex);
134 continue;
135 }
136 if (CachedConsts.size() == 1) {
137 CachedConsts.push_back(x: BankLine);
138 UsedKCache.emplace_back(args: 1, args&: KCacheIndex);
139 continue;
140 }
141 if (CachedConsts[1] == BankLine) {
142 UsedKCache.emplace_back(args: 1, args&: KCacheIndex);
143 continue;
144 }
145 return false;
146 }
147
148 if (!UpdateInstr)
149 return true;
150
151 unsigned j = 0;
152 for (auto &[Op, Sel] : Consts) {
153 if (Op->getReg() != R600::ALU_CONST)
154 continue;
155 switch (UsedKCache[j].first) {
156 case 0:
157 Op->setReg(R600::R600_KC0RegClass.getRegister(i: UsedKCache[j].second));
158 break;
159 case 1:
160 Op->setReg(R600::R600_KC1RegClass.getRegister(i: UsedKCache[j].second));
161 break;
162 default:
163 llvm_unreachable("Wrong Cache Line");
164 }
165 j++;
166 }
167 return true;
168 }
169
170 bool canClauseLocalKillFitInClause(
171 unsigned AluInstCount,
172 std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
173 MachineBasicBlock::iterator Def,
174 MachineBasicBlock::iterator BBEnd) {
175 const R600RegisterInfo &TRI = TII->getRegisterInfo();
176 //TODO: change this to defs?
177 for (MachineOperand &MO : Def->all_defs()) {
178 if (TRI.isPhysRegLiveAcrossClauses(Reg: MO.getReg()))
179 continue;
180
181 // Def defines a clause local register, so check that its use will fit
182 // in the clause.
183 unsigned LastUseCount = 0;
184 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
185 AluInstCount += OccupiedDwords(MI&: *UseI);
186 // Make sure we won't need to end the clause due to KCache limitations.
187 if (!SubstituteKCacheBank(MI&: *UseI, CachedConsts&: KCacheBanks, UpdateInstr: false))
188 return false;
189
190 // We have reached the maximum instruction limit before finding the
191 // use that kills this register, so we cannot use this def in the
192 // current clause.
193 if (AluInstCount >= TII->getMaxAlusPerClause())
194 return false;
195
196 // TODO: Is this true? kill flag appears to work OK below
197 // Register kill flags have been cleared by the time we get to this
198 // pass, but it is safe to assume that all uses of this register
199 // occur in the same basic block as its definition, because
200 // it is illegal for the scheduler to schedule them in
201 // different blocks.
202 if (UseI->readsRegister(Reg: MO.getReg(), TRI: &TRI))
203 LastUseCount = AluInstCount;
204
205 // Exit early if the current use kills the register
206 if (UseI != Def && UseI->killsRegister(Reg: MO.getReg(), TRI: &TRI))
207 break;
208 }
209 if (LastUseCount)
210 return LastUseCount <= TII->getMaxAlusPerClause();
211 llvm_unreachable("Clause local register live at end of clause.");
212 }
213 return true;
214 }
215
216 MachineBasicBlock::iterator
217 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
218 MachineBasicBlock::iterator ClauseHead = I;
219 std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
220 bool PushBeforeModifier = false;
221 unsigned AluInstCount = 0;
222 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
223 if (IsTrivialInst(MI&: *I))
224 continue;
225 if (!isALU(MI: *I))
226 break;
227 if (AluInstCount > TII->getMaxAlusPerClause())
228 break;
229 if (I->getOpcode() == R600::PRED_X) {
230 // We put PRED_X in its own clause to ensure that ifcvt won't create
231 // clauses with more than 128 insts.
232 // IfCvt is indeed checking that "then" and "else" branches of an if
233 // statement have less than ~60 insts thus converted clauses can't be
234 // bigger than ~121 insts (predicate setter needs to be in the same
235 // clause as predicated alus).
236 if (AluInstCount > 0)
237 break;
238 if (TII->getFlagOp(MI&: *I).getImm() & MO_FLAG_PUSH)
239 PushBeforeModifier = true;
240 AluInstCount ++;
241 continue;
242 }
243 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
244 //
245 // * KILL or INTERP instructions
246 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
247 // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
248 //
249 // XXX: These checks have not been implemented yet.
250 if (TII->mustBeLastInClause(Opcode: I->getOpcode())) {
251 I++;
252 break;
253 }
254
255 // If this instruction defines a clause local register, make sure
256 // its use can fit in this clause.
257 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, Def: I, BBEnd: E))
258 break;
259
260 if (!SubstituteKCacheBank(MI&: *I, CachedConsts&: KCacheBanks))
261 break;
262 AluInstCount += OccupiedDwords(MI&: *I);
263 }
264 unsigned Opcode = PushBeforeModifier ?
265 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
266 BuildMI(BB&: MBB, I: ClauseHead, MIMD: MBB.findDebugLoc(MBBI: ClauseHead), MCID: TII->get(Opcode))
267 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
268 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
269 // pass may assume that identical ALU clause starter at the beginning of a
270 // true and false branch can be factorized which is not the case.
271 .addImm(Val: Address++) // ADDR
272 .addImm(Val: KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
273 .addImm(Val: (KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
274 .addImm(Val: KCacheBanks.empty()?0:2) // KM0
275 .addImm(Val: (KCacheBanks.size() < 2)?0:2) // KM1
276 .addImm(Val: KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
277 .addImm(Val: (KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
278 .addImm(Val: AluInstCount) // COUNT
279 .addImm(Val: 1); // Enabled
280 return I;
281 }
282
283public:
284 static char ID;
285
286 R600EmitClauseMarkers() : MachineFunctionPass(ID) {}
287
288 bool runOnMachineFunction(MachineFunction &MF) override {
289 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
290 TII = ST.getInstrInfo();
291
292 for (MachineBasicBlock &MBB : MF) {
293 MachineBasicBlock::iterator I = MBB.begin();
294 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
295 continue; // BB was already parsed
296 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
297 if (isALU(MI: *I)) {
298 auto next = MakeALUClause(MBB, I);
299 assert(next != I);
300 I = next;
301 } else
302 ++I;
303 }
304 }
305 return false;
306 }
307
308 StringRef getPassName() const override {
309 return "R600 Emit Clause Markers Pass";
310 }
311};
312
313char R600EmitClauseMarkers::ID = 0;
314
315} // end anonymous namespace
316
317INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
318 "R600 Emit Clause Markers", false, false)
319INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
320 "R600 Emit Clause Markers", false, false)
321
322FunctionPass *llvm::createR600EmitClauseMarkers() {
323 return new R600EmitClauseMarkers();
324}
325