AMDGPULowerVGPREncoding.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp]

1	//===- AMDGPULowerVGPREncoding.cpp - lower VGPRs above v255 ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Lower VGPRs above first 256 on gfx1250.
11	///
12	/// The pass scans used VGPRs and inserts S_SET_VGPR_MSB instructions to switch
13	/// VGPR addressing mode. The mode change is effective until the next change.
14	/// This instruction provides high bits of a VGPR address for four of the
15	/// operands: vdst, src0, src1, and src2, or other 4 operands depending on the
16	/// instruction encoding. If bits are set they are added as MSB to the
17	/// corresponding operand VGPR number.
18	///
19	/// There is no need to replace actual register operands because encoding of the
20	/// high and low VGPRs is the same. I.e. v0 has the encoding 0x100, so does
21	/// v256. v1 has the encoding 0x101 and v257 has the same encoding. So high
22	/// VGPRs will survive until actual encoding and will result in a same actual
23	/// bit encoding.
24	///
25	/// As a result the pass only inserts S_SET_VGPR_MSB to provide an actual offset
26	/// to a VGPR address of the subseqent instructions. The InstPrinter will take
27	/// care of the printing a low VGPR instead of a high one. In prinicple this
28	/// shall be viable to print actual high VGPR numbers, but that would disagree
29	/// with a disasm printing and create a situation where asm text is not
30	/// deterministic.
31	///
32	/// This pass creates a convention where non-fall through basic blocks shall
33	/// start with all 4 MSBs zero. Otherwise a disassembly would not be readable.
34	/// An optimization here is possible but deemed not desirable because of the
35	/// readbility concerns.
36	///
37	/// Consequentially the ABI is set to expect all 4 MSBs to be zero on entry.
38	/// The pass must run very late in the pipeline to make sure no changes to VGPR
39	/// operands will be made after it.
40	//
41	//===----------------------------------------------------------------------===//
42
43	#include "AMDGPULowerVGPREncoding.h"
44	#include "AMDGPU.h"
45	#include "GCNSubtarget.h"
46	#include "SIDefines.h"
47	#include "SIInstrInfo.h"
48	#include "llvm/ADT/bit.h"
49	#include "llvm/CodeGen/MachineBasicBlock.h"
50	#include "llvm/Support/Debug.h"
51	#include "llvm/Support/MathExtras.h"
52
53	using namespace llvm;
54
55	#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
56
57	namespace {
58
59	class AMDGPULowerVGPREncoding {
60	static constexpr unsigned OpNum = `4`;
61	static constexpr unsigned BitsPerField = `2`;
62	static constexpr unsigned NumFields = `4`;
63	static constexpr unsigned ModeWidth = NumFields * BitsPerField;
64	static constexpr unsigned ModeMask = (`1` << ModeWidth) - `1`;
65	static constexpr unsigned VGPRMSBShift =
66	llvm::countr_zero_constexpr<unsigned>(Val: AMDGPU::Hwreg::DST_VGPR_MSB);
67
68	struct OpMode {
69	// No MSBs set means they are not required to be of a particular value.
70	std::optional<unsigned> MSBits;
71
72	bool update(const OpMode &New, bool &Rewritten) {
73	bool Updated = false;
74	if (New.MSBits) {
75	if (*New.MSBits != MSBits.value_or(u: `0`)) {
76	Updated = true;
77	Rewritten \|= MSBits.has_value();
78	}
79	MSBits = New.MSBits;
80	}
81	return Updated;
82	}
83	};
84
85	struct ModeTy {
86	OpMode Ops[OpNum];
87
88	bool update(const ModeTy &New, bool &Rewritten) {
89	bool Updated = false;
90	for (unsigned I : seq(Size: OpNum))
91	Updated \|= Ops[I].update(New: New.Ops[I], Rewritten);
92	return Updated;
93	}
94
95	unsigned encode() const {
96	// Layout: [src0 msb, src1 msb, src2 msb, dst msb].
97	unsigned V = `0`;
98	for (const auto &[I, Op] : enumerate(First: Ops))
99	V \|= Op.MSBits.value_or(u: `0`) << (I * `2`);
100	return V;
101	}
102
103	void print(raw_ostream &OS) const {
104	static const char *FieldNames[] = {"src0", "src1", "src2", "dst"};
105	OS << `'{'`;
106	for (const auto &[I, Op] : enumerate(First: Ops)) {
107	if (I)
108	OS << ", ";
109	OS << FieldNames[I] << `'='`;
110	if (Op.MSBits)
111	OS << *Op.MSBits;
112	else
113	OS << `'?'`;
114	}
115	OS << `'}'`;
116	}
117
118	// Check if this mode is compatible with required \p NewMode without
119	// modification.
120	bool isCompatible(const ModeTy NewMode) const {
121	for (unsigned I : seq(Size: OpNum)) {
122	if (!NewMode.Ops[I].MSBits.has_value())
123	continue;
124	if (Ops[I].MSBits.value_or(u: `0`) != NewMode.Ops[I].MSBits.value_or(u: `0`))
125	return false;
126	}
127	return true;
128	}
129	};
130
131	public:
132	bool run(MachineFunction &MF);
133
134	private:
135	const SIInstrInfo *TII;
136	const SIRegisterInfo *TRI;
137
138	// Current basic block.
139	MachineBasicBlock *MBB;
140
141	/// Most recent s_set_ instruction.*
142	MachineInstr *MostRecentModeSet;
143
144	/// Current mode bits.
145	ModeTy CurrentMode;
146
147	/// Number of current hard clause instructions.
148	unsigned ClauseLen;
149
150	/// Number of hard clause instructions remaining.
151	unsigned ClauseRemaining;
152
153	/// Clause group breaks.
154	unsigned ClauseBreaks;
155
156	/// Last hard clause instruction.
157	MachineInstr *Clause;
158
159	// Remember whether XCNT is known to be zero because of an S_SET_VGPR_MSB
160	// instruction that we inserted, which implicitly waits for XCNT==0.
161	bool XCntIsZero;
162
163	/// Insert mode change before \p I. \returns true if mode was changed.
164	bool setMode(ModeTy NewMode, MachineBasicBlock::instr_iterator I);
165
166	/// Reset mode to default.
167	void resetMode(MachineBasicBlock::instr_iterator I) {
168	ModeTy Mode;
169	for (OpMode &Op : Mode.Ops)
170	Op.MSBits = `0`;
171	setMode(NewMode: Mode, I);
172	}
173
174	/// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.
175	std::optional<unsigned> getMSBs(const MachineOperand &MO) const;
176
177	/// Handle single \p MI. \return true if changed.
178	bool runOnMachineInstr(MachineInstr &MI);
179
180	/// Compute the mode for a single \p MI given \p Ops operands
181	/// bit mapping. Optionally takes second array \p Ops2 for VOPD.
182	/// If provided and an operand from \p Ops is not a VGPR, then \p Ops2
183	/// is checked.
184	void computeMode(ModeTy &NewMode, const MachineInstr &MI,
185	const AMDGPU::OpName Ops[OpNum],
186	const AMDGPU::OpName Ops2 = nullptr*);
187
188	/// Check if an instruction \p I is within a clause and returns a suitable
189	/// iterator to insert mode change. It may also modify the S_CLAUSE
190	/// instruction to extend it or drop the clause if it cannot be adjusted.
191	MachineBasicBlock::instr_iterator
192	handleClause(MachineBasicBlock::instr_iterator I);
193
194	/// Check if an instruction \p I is immediately after another program state
195	/// instruction which it cannot coissue with. If so, insert before that
196	/// instruction to encourage more coissuing.
197	MachineBasicBlock::instr_iterator
198	handleCoissue(MachineBasicBlock::instr_iterator I);
199
200	/// S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32 targeting MODE is
201	/// silently dropped on GFX1250. When set, the next S_SET_VGPR_MSB insertion
202	/// must be preceded by S_NOP to avoid the hazard.
203	bool needNopBeforeSetVGPRMSB(MachineBasicBlock::instr_iterator I);
204
205	/// Handle S_SETREG_IMM32_B32 targeting MODE register. On certain hardware,
206	/// this instruction clobbers VGPR MSB bits[12:19], so we need to restore
207	/// the current mode. \returns true if the instruction was modified or a
208	/// new one was inserted.
209	bool handleSetregMode(MachineInstr &MI);
210
211	/// Update bits[12:19] of the imm operand in S_SETREG_IMM32_B32 to contain
212	/// the VGPR MSB mode value. \returns true if the immediate was changed.
213	bool updateSetregModeImm(MachineInstr &MI, int64_t ModeValue);
214	};
215
216	bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
217	MachineBasicBlock::instr_iterator I) {
218	LLVM_DEBUG({
219	dbgs() << " setMode: NewMode=";
220	NewMode.print(dbgs());
221	dbgs() << " CurrentMode=";
222	CurrentMode.print(dbgs());
223	dbgs() << " MostRecentModeSet=" << (MostRecentModeSet ? "yes" : "null");
224	if (I != MBB->instr_end())
225	dbgs() << " before: " << *I;
226	else
227	dbgs() << " at end\n";
228	});
229
230	// Record previous mode into high 8 bits of the immediate.
231	int64_t OldModeBits = CurrentMode.encode() << ModeWidth;
232
233	bool Rewritten = false;
234	if (!CurrentMode.update(New: NewMode, Rewritten)) {
235	LLVM_DEBUG(dbgs() << " -> no change needed\n");
236	return false;
237	}
238
239	LLVM_DEBUG(dbgs() << " Rewritten=" << Rewritten << " after update\n");
240
241	if (MostRecentModeSet && !Rewritten) {
242	// Update MostRecentModeSet with the new mode.
243	MachineOperand &Op = MostRecentModeSet->getOperand(i: `0`);
244	// Carry old mode bits from the existing instruction.
245	int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
246	Op.setImm(CurrentMode.encode() \| OldModeBits);
247	LLVM_DEBUG(dbgs() << " -> piggybacked onto S_SET_VGPR_MSB: "
248	<< *MostRecentModeSet);
249	return true;
250	}
251
252	MachineBasicBlock::instr_iterator InsertPt = handleClause(I);
253	InsertPt = handleCoissue(I: InsertPt);
254	// Case 2 match in handleSetregMode: the setreg's imm[12:19] matched
255	// current MSBs, but the next VALU needs different MSBs, so this
256	// S_SET_VGPR_MSB would land right after the setreg. Insert S_NOP to
257	// prevent it from being silently dropped.
258	if (needNopBeforeSetVGPRMSB(I: InsertPt))
259	BuildMI(BB&: *MBB, I: InsertPt, MIMD: {}, MCID: TII->get(Opcode: AMDGPU::S_NOP)).addImm(Val: `0`);
260	MostRecentModeSet =
261	BuildMI(BB&: *MBB, I: InsertPt, MIMD: {}, MCID: TII->get(Opcode: AMDGPU::S_SET_VGPR_MSB))
262	.addImm(Val: NewMode.encode() \| OldModeBits);
263	LLVM_DEBUG(dbgs() << " -> inserted new S_SET_VGPR_MSB: "
264	<< *MostRecentModeSet);
265
266	// If we inserted S_SET_VGPR_MSB early then XCNT should remain zero from the
267	// insertion point to the current instruction. Remove any redundant
268	// S_WAIT_XCNT instructions in that range.
269	for (MachineInstr &MI : make_early_inc_range(Range: make_range(x: InsertPt, y: I))) {
270	assert(!SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isSMRD(MI));
271	if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT)
272	MI.eraseFromBundle();
273	}
274	XCntIsZero = true;
275
276	CurrentMode = NewMode;
277	return true;
278	}
279
280	std::optional<unsigned>
281	AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {
282	if (!MO.isReg())
283	return std::nullopt;
284
285	MCRegister Reg = MO.getReg();
286	const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
287	if (!RC \|\| !TRI->isVGPRClass(RC))
288	return std::nullopt;
289
290	unsigned Idx = TRI->getHWRegIndex(Reg);
291	return Idx >> `8`;
292	}
293
294	void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode,
295	const MachineInstr &MI,
296	const AMDGPU::OpName Ops[OpNum],
297	const AMDGPU::OpName *Ops2) {
298	NewMode = {};
299
300	for (unsigned I = `0`; I < OpNum; ++I) {
301	const MachineOperand *Op = TII->getNamedOperand(MI, OperandName: Ops[I]);
302
303	std::optional<unsigned> MSBits;
304	if (Op)
305	MSBits = getMSBs(MO: *Op);
306
307	#if !defined(NDEBUG)
308	if (MSBits.has_value() && Ops2) {
309	const MachineOperand *Op2 = TII->getNamedOperand(MI, Ops2[I]);
310	if (Op2) {
311	std::optional<unsigned> MSBits2;
312	MSBits2 = getMSBs(*Op2);
313	if (MSBits2.has_value() && MSBits != MSBits2)
314	llvm_unreachable("Invalid VOPD pair was created");
315	}
316	}
317	#endif
318
319	if (!MSBits.has_value() && Ops2) {
320	Op = TII->getNamedOperand(MI, OperandName: Ops2[I]);
321	if (Op)
322	MSBits = getMSBs(MO: *Op);
323	}
324
325	if (!MSBits.has_value())
326	continue;
327
328	// Skip tied uses of src2 of VOP2, these will be handled along with defs and
329	// only vdst bit affects these operands. We cannot skip tied uses of VOP3,
330	// these uses are real even if must match the vdst.
331	if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&
332	(SIInstrInfo::isVOP2(MI) \|\|
333	(SIInstrInfo::isVOP3(MI) &&
334	TII->hasVALU32BitEncoding(Opcode: MI.getOpcode()))))
335	continue;
336
337	NewMode.Ops[I].MSBits = MSBits.value();
338	}
339	}
340
341	bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
342	auto Ops = AMDGPU::getVGPRLoweringOperandTables(Desc: MI.getDesc());
343	if (Ops.first) {
344	ModeTy NewMode;
345	computeMode(NewMode, MI, Ops: Ops.first, Ops2: Ops.second);
346	LLVM_DEBUG({
347	dbgs() << " runOnMachineInstr: ";
348	MI.print(dbgs());
349	dbgs() << " computed NewMode=";
350	NewMode.print(dbgs());
351	dbgs() << " compatible=" << CurrentMode.isCompatible(NewMode) << `'\n'`;
352	});
353	if (!CurrentMode.isCompatible(NewMode) && MI.isCommutable() &&
354	TII->commuteInstruction(MI)) {
355	ModeTy NewModeCommuted;
356	computeMode(NewMode&: NewModeCommuted, MI, Ops: Ops.first, Ops2: Ops.second);
357	LLVM_DEBUG({
358	dbgs() << " commuted NewMode=";
359	NewModeCommuted.print(dbgs());
360	dbgs() << " compatible=" << CurrentMode.isCompatible(NewModeCommuted)
361	<< `'\n'`;
362	});
363	if (CurrentMode.isCompatible(NewMode: NewModeCommuted)) {
364	// Update CurrentMode with mode bits the commuted instruction relies on.
365	// This prevents later instructions from piggybacking and corrupting
366	// those bits (e.g., a nullopt src treated as 0 could be overwritten).
367	bool Unused = false;
368	CurrentMode.update(New: NewModeCommuted, Rewritten&: Unused);
369	// MI was modified by the commute above.
370	return true;
371	}
372	// Commute back.
373	if (!TII->commuteInstruction(MI))
374	llvm_unreachable("Failed to restore commuted instruction.");
375	}
376	return setMode(NewMode, I: MI.getIterator());
377	}
378	assert(!TII->hasVGPRUses(MI) \|\| MI.isMetaInstruction() \|\| MI.isPseudo());
379	return false;
380	}
381
382	MachineBasicBlock::instr_iterator
383	AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
384	if (!ClauseRemaining)
385	return I;
386
387	// A clause cannot start with a special instruction, place it right before
388	// the clause.
389	if (ClauseRemaining == ClauseLen) {
390	I = Clause->getPrevNode()->getIterator();
391	assert(I->isBundle());
392	return I;
393	}
394
395	// If a clause defines breaks each group cannot start with a mode change.
396	// just drop the clause.
397	if (ClauseBreaks) {
398	Clause->eraseFromBundle();
399	ClauseRemaining = `0`;
400	return I;
401	}
402
403	// Otherwise adjust a number of instructions in the clause if it fits.
404	// If it does not clause will just become shorter. Since the length
405	// recorded in the clause is one less, increment the length after the
406	// update. Note that SIMM16[5:0] must be 1-62, not 0 or 63.
407	if (ClauseLen < `63`)
408	Clause->getOperand(i: `0`).setImm(ClauseLen \| (ClauseBreaks << `8`));
409
410	++ClauseLen;
411
412	return I;
413	}
414
415	MachineBasicBlock::instr_iterator
416	AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
417	// "Program State instructions" are instructions which are used to control
418	// operation of the GPU rather than performing arithmetic. Such instructions
419	// have different coissuing rules w.r.t s_set_vgpr_msb.
420	auto isProgramStateInstr = [this](MachineInstr *MI) {
421	unsigned Opc = MI->getOpcode();
422	return TII->isBarrier(Opcode: Opc) \|\| TII->isWaitcnt(Opcode: Opc) \|\|
423	Opc == AMDGPU::S_DELAY_ALU;
424	};
425
426	while (I != MBB->begin()) {
427	auto Prev = std::prev(x: I);
428	if (!isProgramStateInstr (&*Prev))
429	return I;
430	I = Prev;
431	}
432
433	return I;
434	}
435
436	/// Returns whether \p MI is a S_SETREG_IMM32_B32(MODE).
437	static bool isSetregMode(const MachineInstr &MI, const SIInstrInfo &TII) {
438	if (MI.getOpcode() != AMDGPU::S_SETREG_IMM32_B32)
439	return false;
440
441	const MachineOperand *SIMM16Op =
442	TII.getNamedOperand(MI, OperandName: AMDGPU::OpName::simm16);
443	auto [HwRegId, _Offset, _Size] =
444	AMDGPU::Hwreg::HwregEncoding::decode(Encoded: SIMM16Op->getImm());
445	return HwRegId == AMDGPU::Hwreg::ID_MODE;
446	}
447
448	bool AMDGPULowerVGPREncoding::needNopBeforeSetVGPRMSB(
449	MachineBasicBlock::instr_iterator I) {
450	MachineBasicBlock *CurrentMBB = MBB;
451	while (true) {
452	// Walk the block backward until we hit a non-meta instruction or the
453	// beginning of the block.
454	while (I != CurrentMBB->instr_begin()) {
455	I = std::prev(x: I);
456	if (isSetregMode(MI: I, TII: TII))
457	return true;
458	if (!I ->isMetaInstruction())
459	return false;
460	}
461
462	// Look for a potential fallthrough predecessor block. When it ends with a
463	// S_SETREG_IMM32_B32(MODE) we need to insert a S_NOP too. We assume that an
464	// explicit jump to the current block from the block that would otherwise
465	// have naturally fallen through to it will remain in the final assembly.
466	CurrentMBB = CurrentMBB->getPrevNode();
467	if (!CurrentMBB \|\| !CurrentMBB->canFallThrough())
468	return false;
469	I = CurrentMBB->instr_end();
470	}
471	return false;
472	}
473
474	/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.
475	/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])
476	/// MODE register uses: (dst[0-1], src0[2-3], src1[4-5], src2[6-7])
477	/// This is a left rotation by 2 bits on an 8-bit value.
478	static int64_t convertModeToSetregFormat(int64_t Mode) {
479	assert(isUInt<`8`>(Mode) && "Mode expected to be 8-bit");
480	return llvm::rotl<uint8_t>(V: static_cast<uint8_t>(Mode), /R=/`2`);
481	}
482
483	bool AMDGPULowerVGPREncoding::updateSetregModeImm(MachineInstr &MI,
484	int64_t ModeValue) {
485	assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
486
487	// Convert from S_SET_VGPR_MSB format to MODE register format
488	int64_t SetregMode = convertModeToSetregFormat(Mode: ModeValue);
489
490	MachineOperand *ImmOp = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::imm);
491	int64_t OldImm = ImmOp->getImm();
492	// Note that Offset is ignored for mode bits here.
493	int64_t NewImm = (OldImm & ~int64_t(AMDGPU::Hwreg::VGPR_MSB_MASK)) \|
494	(SetregMode << VGPRMSBShift);
495	ImmOp->setImm(NewImm);
496	return NewImm != OldImm;
497	}
498
499	bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
500	using namespace AMDGPU::Hwreg;
501
502	assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
503	"only S_SETREG_IMM32_B32 needs to be handled");
504
505	LLVM_DEBUG(dbgs() << " handleSetregMode: " << MI);
506
507	MachineOperand *SIMM16Op = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::simm16);
508	assert(SIMM16Op && "SIMM16Op must be present");
509
510	auto [HwRegId, Offset, Size] = HwregEncoding::decode(Encoded: SIMM16Op->getImm());
511	LLVM_DEBUG(dbgs() << " HwRegId=" << HwRegId << " Offset=" << Offset
512	<< " Size=" << Size << `'\n'`);
513	if (HwRegId != ID_MODE) {
514	LLVM_DEBUG(dbgs() << " -> not ID_MODE, skipping\n");
515	return false;
516	}
517
518	// MostRecentModeSet is clobbered by SETREG and not relevant anymore.
519	MostRecentModeSet = nullptr;
520
521	int64_t ModeValue = CurrentMode.encode();
522	LLVM_DEBUG({
523	dbgs() << " CurrentMode=";
524	CurrentMode.print(dbgs());
525	dbgs() << " encoded=0x" << Twine::utohexstr(ModeValue)
526	<< " VGPRMSBShift=" << VGPRMSBShift << `'\n'`;
527	});
528
529	// Case 1: Size <= 12 - the original instruction uses imm32[0:Size-1], so
530	// imm32[12:19] is unused, or Offset is zero and it is safe to set
531	// imm32[12:19] to the correct VGPR MSBs.
532	if (!Offset \|\| Size <= VGPRMSBShift) {
533	// Set imm32[12:19] to the correct VGPR MSBs.
534	LLVM_DEBUG(dbgs() << " Case 1: Size(" << Size << ") <= VGPRMSBShift("
535	<< VGPRMSBShift << "), update mode bits[12:19]\n");
536	bool Changed = updateSetregModeImm(MI, ModeValue);
537	LLVM_DEBUG(dbgs() << " -> " << MI);
538	return Changed;
539	}
540
541	// Case 2: Size > 12 - the original instruction uses bits beyond 11, so we
542	// cannot arbitrarily modify imm32[12:19]. Check if it already matches VGPR
543	// MSBs. Note: imm32[12:19] is in MODE register format, while ModeValue is
544	// in S_SET_VGPR_MSB format, so we need to convert before comparing.
545	MachineOperand *ImmOp = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::imm);
546	assert(ImmOp && "ImmOp must be present");
547	int64_t ImmBits12To19 = (ImmOp->getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
548	int64_t SetregModeValue = convertModeToSetregFormat(Mode: ModeValue);
549	LLVM_DEBUG(dbgs() << " Case 2: Size(" << Size << ") > VGPRMSBShift, "
550	<< "ImmBits12To19=0x" << Twine::utohexstr(ImmBits12To19)
551	<< " SetregModeValue=0x"
552	<< Twine::utohexstr(SetregModeValue) << `'\n'`);
553	if (ImmBits12To19 == SetregModeValue) {
554	LLVM_DEBUG(dbgs() << " -> bits[12:19] already correct\n");
555	return false;
556	}
557
558	// imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after
559	// the original instruction to restore the correct value. Insert S_NOP
560	// to avoid the GFX1250 hazard where S_SET_VGPR_MSB immediately after
561	// S_SETREG_IMM32_B32(MODE) is silently dropped.
562	MachineBasicBlock::iterator InsertPt = std::next(x: MI.getIterator());
563	BuildMI(BB&: *MBB, I: InsertPt, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AMDGPU::S_NOP)).addImm(Val: `0`);
564	MostRecentModeSet = BuildMI(BB&: *MBB, I: InsertPt, MIMD: MI.getDebugLoc(),
565	MCID: TII->get(Opcode: AMDGPU::S_SET_VGPR_MSB))
566	.addImm(Val: ModeValue \| (ModeValue << ModeWidth));
567	LLVM_DEBUG(dbgs() << " -> inserted S_SET_VGPR_MSB after setreg: "
568	<< *MostRecentModeSet);
569	return true;
570	}
571
572	bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
573	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
574	if (!ST.has1024AddressableVGPRs())
575	return false;
576
577	TII = ST.getInstrInfo();
578	TRI = ST.getRegisterInfo();
579
580	LLVM_DEBUG(dbgs() << "*** AMDGPULowerVGPREncoding on " << MF.getName()
581	<< " ***\n");
582
583	bool Changed = false;
584	ClauseLen = ClauseRemaining = `0`;
585	CurrentMode = {};
586	for (auto &MBB : MF) {
587	MostRecentModeSet = nullptr;
588	XCntIsZero = false;
589	this->MBB = &MBB;
590
591	LLVM_DEBUG(dbgs() << "BB#" << MBB.getNumber() << `' '` << MBB.getName()
592	<< ":\n");
593
594	for (auto &MI : llvm::make_early_inc_range(Range: MBB.instrs())) {
595	if (MI.isMetaInstruction())
596	continue;
597
598	if (MI.isTerminator() \|\| MI.isCall()) {
599	LLVM_DEBUG(dbgs() << " terminator/call: " << MI);
600	if (MI.getOpcode() == AMDGPU::S_ENDPGM \|\|
601	MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
602	CurrentMode = {};
603	else
604	resetMode(I: MI.getIterator());
605	continue;
606	}
607
608	if (MI.isInlineAsm()) {
609	LLVM_DEBUG(dbgs() << " inline asm: " << MI);
610	if (TII->hasVGPRUses(MI))
611	resetMode(I: MI.getIterator());
612	continue;
613	}
614
615	if (MI.getOpcode() == AMDGPU::S_CLAUSE) {
616	assert(!ClauseRemaining && "Nested clauses are not supported");
617	ClauseLen = MI.getOperand(i: `0`).getImm();
618	ClauseBreaks = (ClauseLen >> `8`) & `15`;
619	ClauseLen = ClauseRemaining = (ClauseLen & `63`) + `1`;
620	Clause = &MI;
621	LLVM_DEBUG(dbgs() << " clause: len=" << ClauseLen
622	<< " breaks=" << ClauseBreaks << `'\n'`);
623	continue;
624	}
625
626	if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
627	ST.hasSetregVGPRMSBFixup()) {
628	Changed \|= handleSetregMode(MI);
629	continue;
630	}
631
632	// If XCNT is known to be zero then any S_WAIT_XCNT instruction is
633	// redundant and can be removed.
634	if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT && XCntIsZero) {
635	MI.eraseFromBundle();
636	Changed = true;
637	continue;
638	}
639
640	Changed \|= runOnMachineInstr(MI);
641
642	// Any VMEM or SMEM instruction may increment XCNT.
643	if (SIInstrInfo::isVMEM(MI) \|\| SIInstrInfo::isSMRD(MI))
644	XCntIsZero = false;
645
646	if (ClauseRemaining)
647	--ClauseRemaining;
648	}
649
650	// Reset the mode if we are falling through.
651	LLVM_DEBUG(dbgs() << " end of BB, resetting mode\n");
652	resetMode(I: MBB.instr_end());
653	}
654
655	return Changed;
656	}
657
658	class AMDGPULowerVGPREncodingLegacy : public MachineFunctionPass {
659	public:
660	static char ID;
661
662	AMDGPULowerVGPREncodingLegacy() : MachineFunctionPass (ID) {}
663
664	bool runOnMachineFunction(MachineFunction &MF) override {
665	return AMDGPULowerVGPREncoding ().run(MF);
666	}
667
668	void getAnalysisUsage(AnalysisUsage &AU) const override {
669	AU.setPreservesCFG();
670	MachineFunctionPass::getAnalysisUsage(AU);
671	}
672	};
673
674	} // namespace
675
676	char AMDGPULowerVGPREncodingLegacy::ID = `0`;
677
678	char &llvm::AMDGPULowerVGPREncodingLegacyID = AMDGPULowerVGPREncodingLegacy::ID;
679
680	INITIALIZE_PASS(AMDGPULowerVGPREncodingLegacy, DEBUG_TYPE,
681	"AMDGPU Lower VGPR Encoding", false, false)
682
683	PreservedAnalyses
684	AMDGPULowerVGPREncodingPass::run(MachineFunction &MF,
685	MachineFunctionAnalysisManager &MFAM) {
686	if (!AMDGPULowerVGPREncoding ().run(MF))
687	return PreservedAnalyses::all();
688
689	return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
690	}
691

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp