SIShrinkInstructions.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp]

1	//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	/// The pass tries to use the 32-bit encoding for instructions when possible.
8	//===----------------------------------------------------------------------===//
9	//
10
11	#include "SIShrinkInstructions.h"
12	#include "AMDGPU.h"
13	#include "GCNSubtarget.h"
14	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15	#include "Utils/AMDGPUBaseInfo.h"
16	#include "llvm/ADT/Statistic.h"
17	#include "llvm/CodeGen/MachineFunctionPass.h"
18
19	#define DEBUG_TYPE "si-shrink-instructions"
20
21	STATISTIC(NumInstructionsShrunk,
22	"Number of 64-bit instruction reduced to 32-bit.");
23	STATISTIC(NumLiteralConstantsFolded,
24	"Number of literal constants folded into 32-bit instructions.");
25
26	using namespace llvm;
27
28	namespace {
29
30	class SIShrinkInstructions {
31	MachineFunction *MF;
32	MachineRegisterInfo *MRI;
33	const GCNSubtarget *ST;
34	const SIInstrInfo *TII;
35	const SIRegisterInfo *TRI;
36	bool IsPostRA;
37
38	bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
39	bool shouldShrinkTrue16(MachineInstr &MI) const;
40	bool isKImmOperand(const MachineOperand &Src) const;
41	bool isKUImmOperand(const MachineOperand &Src) const;
42	bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
43	void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
44	void shrinkScalarCompare(MachineInstr &MI) const;
45	void shrinkMIMG(MachineInstr &MI) const;
46	void shrinkMadFma(MachineInstr &MI) const;
47	bool shrinkScalarLogicOp(MachineInstr &MI) const;
48	bool tryReplaceDeadSDST(MachineInstr &MI) const;
49	bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
50	Register Reg, unsigned SubReg) const;
51	bool instReadsReg(const MachineInstr MI, unsigned* Reg,
52	unsigned SubReg) const;
53	bool instModifiesReg(const MachineInstr MI, unsigned* Reg,
54	unsigned SubReg) const;
55	TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
56	unsigned I) const;
57	void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
58	MachineInstr matchSwap(MachineInstr &MovT) const*;
59
60	public:
61	SIShrinkInstructions() = default;
62	bool run(MachineFunction &MF);
63	};
64
65	class SIShrinkInstructionsLegacy : public MachineFunctionPass {
66
67	public:
68	static char ID;
69
70	SIShrinkInstructionsLegacy() : MachineFunctionPass (ID) {}
71
72	bool runOnMachineFunction(MachineFunction &MF) override;
73
74	StringRef getPassName() const override { return "SI Shrink Instructions"; }
75
76	void getAnalysisUsage(AnalysisUsage &AU) const override {
77	AU.setPreservesCFG();
78	MachineFunctionPass::getAnalysisUsage(AU);
79	}
80	};
81
82	} // End anonymous namespace.
83
84	INITIALIZE_PASS(SIShrinkInstructionsLegacy, DEBUG_TYPE,
85	"SI Shrink Instructions", false, false)
86
87	char SIShrinkInstructionsLegacy::ID = `0`;
88
89	FunctionPass *llvm::createSIShrinkInstructionsLegacyPass() {
90	return new SIShrinkInstructionsLegacy ();
91	}
92
93	/// This function checks \p MI for operands defined by a move immediate
94	/// instruction and then folds the literal constant into the instruction if it
95	/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
96	bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
97	bool TryToCommute) const {
98	assert(TII->isVOP1(MI) \|\| TII->isVOP2(MI) \|\| TII->isVOPC(MI));
99
100	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
101
102	// Try to fold Src0
103	MachineOperand &Src0 = MI.getOperand(i: Src0Idx);
104	if (Src0.isReg()) {
105	Register Reg = Src0.getReg();
106	if (Reg.isVirtual()) {
107	MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
108	if (Def && Def->isMoveImmediate()) {
109	MachineOperand &MovSrc = Def->getOperand(i: `1`);
110	bool ConstantFolded = false;
111
112	if (TII->isOperandLegal(MI, OpIdx: Src0Idx, MO: &MovSrc)) {
113	if (MovSrc.isImm()) {
114	Src0.ChangeToImmediate(ImmVal: MovSrc.getImm());
115	ConstantFolded = true;
116	} else if (MovSrc.isFI()) {
117	Src0.ChangeToFrameIndex(Idx: MovSrc.getIndex());
118	ConstantFolded = true;
119	} else if (MovSrc.isGlobal()) {
120	Src0.ChangeToGA(GV: MovSrc.getGlobal(), Offset: MovSrc.getOffset(),
121	TargetFlags: MovSrc.getTargetFlags());
122	ConstantFolded = true;
123	}
124	}
125
126	if (ConstantFolded) {
127	if (MRI->use_nodbg_empty(RegNo: Reg))
128	Def->eraseFromParent();
129	++NumLiteralConstantsFolded;
130	return true;
131	}
132	}
133	}
134	}
135
136	// We have failed to fold src0, so commute the instruction and try again.
137	if (TryToCommute && MI.isCommutable()) {
138	if (TII->commuteInstruction(MI)) {
139	if (foldImmediates(MI, TryToCommute: false))
140	return true;
141
142	// Commute back.
143	TII->commuteInstruction(MI);
144	}
145	}
146
147	return false;
148	}
149
150	/// Do not shrink the instruction if its registers are not expressible in the
151	/// shrunk encoding.
152	bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
153	for (unsigned I = `0`, E = MI.getNumExplicitOperands(); I != E; ++I) {
154	const MachineOperand &MO = MI.getOperand(i: I);
155	if (MO.isReg()) {
156	Register Reg = MO.getReg();
157	assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
158	"True16 Instructions post-RA");
159	if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
160	!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
161	return false;
162
163	if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
164	!AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
165	return false;
166	}
167	}
168	return true;
169	}
170
171	bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
172	return isInt<`16`>(x: SignExtend64(X: Src.getImm(), B: `32`)) &&
173	!TII->isInlineConstant(MI: *Src.getParent(), OpIdx: Src.getOperandNo());
174	}
175
176	bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
177	return isUInt<`16`>(x: Src.getImm()) &&
178	!TII->isInlineConstant(MI: *Src.getParent(), OpIdx: Src.getOperandNo());
179	}
180
181	bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
182	bool &IsUnsigned) const {
183	if (isInt<`16`>(x: SignExtend64(X: Src.getImm(), B: `32`))) {
184	IsUnsigned = false;
185	return !TII->isInlineConstant(MO: Src);
186	}
187
188	if (isUInt<`16`>(x: Src.getImm())) {
189	IsUnsigned = true;
190	return !TII->isInlineConstant(MO: Src);
191	}
192
193	return false;
194	}
195
196	/// \returns the opcode of an instruction a move immediate of the constant \p
197	/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
198	/// i.e.
199	///
200	/// If the bitreverse of a constant is an inline immediate, reverse the
201	/// immediate and return the bitreverse opcode.
202	///
203	/// If the bitwise negation of a constant is an inline immediate, reverse the
204	/// immediate and return the bitwise not opcode.
205	static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII,
206	const MachineOperand &Src,
207	int32_t &ModifiedImm, bool Scalar) {
208	if (TII->isInlineConstant(MO: Src))
209	return `0`;
210	int32_t SrcImm = static_cast<int32_t>(Src.getImm());
211
212	if (!Scalar) {
213	// We could handle the scalar case with here, but we would need to check
214	// that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
215	// it, as the reasonable values are already covered by s_movk_i32.
216	ModifiedImm = ~SrcImm;
217	if (TII->isInlineConstant(Imm: APInt (`32`, ModifiedImm, true)))
218	return AMDGPU::V_NOT_B32_e32;
219	}
220
221	ModifiedImm = reverseBits<int32_t>(Val: SrcImm);
222	if (TII->isInlineConstant(Imm: APInt (`32`, ModifiedImm, true)))
223	return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
224
225	return `0`;
226	}
227
228	/// Copy implicit register operands from specified instruction to this
229	/// instruction that are not part of the instruction definition.
230	void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
231	MachineInstr &MI) const {
232	MachineFunction &MF = *MI.getMF();
233	for (unsigned i = MI.getDesc().getNumOperands() +
234	MI.getDesc().implicit_uses().size() +
235	MI.getDesc().implicit_defs().size(),
236	e = MI.getNumOperands();
237	i != e; ++i) {
238	const MachineOperand &MO = MI.getOperand(i);
239	if ((MO.isReg() && MO.isImplicit()) \|\| MO.isRegMask())
240	NewMI.addOperand(MF, Op: MO);
241	}
242	}
243
244	void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
245	if (!ST->hasSCmpK())
246	return;
247
248	// cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
249	// get constants on the RHS.
250	if (!MI.getOperand(i: `0`).isReg())
251	TII->commuteInstruction(MI, NewMI: false, OpIdx1: `0`, OpIdx2: `1`);
252
253	// cmpk requires src0 to be a register
254	const MachineOperand &Src0 = MI.getOperand(i: `0`);
255	if (!Src0.isReg())
256	return;
257
258	MachineOperand &Src1 = MI.getOperand(i: `1`);
259	if (!Src1.isImm())
260	return;
261
262	int SOPKOpc = AMDGPU::getSOPKOp(Opcode: MI.getOpcode());
263	if (SOPKOpc == -`1`)
264	return;
265
266	// eq/ne is special because the imm16 can be treated as signed or unsigned,
267	// and initially selected to the unsigned versions.
268	if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 \|\| SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
269	bool HasUImm;
270	if (isKImmOrKUImmOperand(Src: Src1, IsUnsigned&: HasUImm)) {
271	if (!HasUImm) {
272	SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
273	AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
274	Src1.setImm(SignExtend32(X: Src1.getImm(), B: `32`));
275	}
276
277	MI.setDesc(TII->get(Opcode: SOPKOpc));
278	}
279
280	return;
281	}
282
283	const MCInstrDesc &NewDesc = TII->get(Opcode: SOPKOpc);
284
285	if ((SIInstrInfo::sopkIsZext(Opcode: SOPKOpc) && isKUImmOperand(Src: Src1)) \|\|
286	(!SIInstrInfo::sopkIsZext(Opcode: SOPKOpc) && isKImmOperand(Src: Src1))) {
287	if (!SIInstrInfo::sopkIsZext(Opcode: SOPKOpc))
288	Src1.setImm(SignExtend64(X: Src1.getImm(), B: `32`));
289	MI.setDesc(NewDesc);
290	}
291	}
292
293	// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
294	void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
295	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
296	if (!Info)
297	return;
298
299	uint8_t NewEncoding;
300	switch (Info->MIMGEncoding) {
301	case AMDGPU::MIMGEncGfx10NSA:
302	NewEncoding = AMDGPU::MIMGEncGfx10Default;
303	break;
304	case AMDGPU::MIMGEncGfx11NSA:
305	NewEncoding = AMDGPU::MIMGEncGfx11Default;
306	break;
307	default:
308	return;
309	}
310
311	int VAddr0Idx =
312	AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
313	unsigned NewAddrDwords = Info->VAddrDwords;
314	const TargetRegisterClass *RC;
315
316	if (Info->VAddrDwords == `2`) {
317	RC = &AMDGPU::VReg_64RegClass;
318	} else if (Info->VAddrDwords == `3`) {
319	RC = &AMDGPU::VReg_96RegClass;
320	} else if (Info->VAddrDwords == `4`) {
321	RC = &AMDGPU::VReg_128RegClass;
322	} else if (Info->VAddrDwords == `5`) {
323	RC = &AMDGPU::VReg_160RegClass;
324	} else if (Info->VAddrDwords == `6`) {
325	RC = &AMDGPU::VReg_192RegClass;
326	} else if (Info->VAddrDwords == `7`) {
327	RC = &AMDGPU::VReg_224RegClass;
328	} else if (Info->VAddrDwords == `8`) {
329	RC = &AMDGPU::VReg_256RegClass;
330	} else if (Info->VAddrDwords == `9`) {
331	RC = &AMDGPU::VReg_288RegClass;
332	} else if (Info->VAddrDwords == `10`) {
333	RC = &AMDGPU::VReg_320RegClass;
334	} else if (Info->VAddrDwords == `11`) {
335	RC = &AMDGPU::VReg_352RegClass;
336	} else if (Info->VAddrDwords == `12`) {
337	RC = &AMDGPU::VReg_384RegClass;
338	} else {
339	RC = &AMDGPU::VReg_512RegClass;
340	NewAddrDwords = `16`;
341	}
342
343	unsigned VgprBase = `0`;
344	unsigned NextVgpr = `0`;
345	bool IsUndef = true;
346	bool IsKill = NewAddrDwords == Info->VAddrDwords;
347	const unsigned NSAMaxSize = ST->getNSAMaxSize();
348	const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
349	const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
350	for (unsigned Idx = `0`; Idx < EndVAddr; ++Idx) {
351	const MachineOperand &Op = MI.getOperand(i: VAddr0Idx + Idx);
352	unsigned Vgpr = TRI->getHWRegIndex(Reg: Op.getReg());
353	unsigned Dwords = TRI->getRegSizeInBits(Reg: Op.getReg(), MRI: *MRI) / `32`;
354	assert(Dwords > `0` && "Un-implemented for less than 32 bit regs");
355
356	if (Idx == `0`) {
357	VgprBase = Vgpr;
358	NextVgpr = Vgpr + Dwords;
359	} else if (Vgpr == NextVgpr) {
360	NextVgpr = Vgpr + Dwords;
361	} else {
362	return;
363	}
364
365	if (!Op.isUndef())
366	IsUndef = false;
367	if (!Op.isKill())
368	IsKill = false;
369	}
370
371	if (VgprBase + NewAddrDwords > `256`)
372	return;
373
374	// Further check for implicit tied operands - this may be present if TFE is
375	// enabled
376	int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
377	int LWEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::lwe);
378	unsigned TFEVal = (TFEIdx == -`1`) ? `0` : MI.getOperand(i: TFEIdx).getImm();
379	unsigned LWEVal = (LWEIdx == -`1`) ? `0` : MI.getOperand(i: LWEIdx).getImm();
380	int ToUntie = -`1`;
381	if (TFEVal \|\| LWEVal) {
382	// TFE/LWE is enabled so we need to deal with an implicit tied operand
383	for (unsigned i = LWEIdx + `1`, e = MI.getNumOperands(); i != e; ++i) {
384	if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
385	MI.getOperand(i).isImplicit()) {
386	// This is the tied operand
387	assert(
388	ToUntie == -`1` &&
389	"found more than one tied implicit operand when expecting only 1");
390	ToUntie = i;
391	MI.untieRegOperand(OpIdx: ToUntie);
392	}
393	}
394	}
395
396	unsigned NewOpcode = AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: NewEncoding,
397	VDataDwords: Info->VDataDwords, VAddrDwords: NewAddrDwords);
398	MI.setDesc(TII->get(Opcode: NewOpcode));
399	MI.getOperand(i: VAddr0Idx).setReg(RC->getRegister(i: VgprBase));
400	MI.getOperand(i: VAddr0Idx).setIsUndef(IsUndef);
401	MI.getOperand(i: VAddr0Idx).setIsKill(IsKill);
402
403	for (unsigned i = `1`; i < EndVAddr; ++i)
404	MI.removeOperand(OpNo: VAddr0Idx + `1`);
405
406	if (ToUntie >= `0`) {
407	MI.tieOperands(
408	DefIdx: AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdata),
409	UseIdx: ToUntie - (EndVAddr - `1`));
410	}
411	}
412
413	// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
414	void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
415	// Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
416	// there is no reason to try to shrink them.
417	if (!ST->hasVOP3Literal())
418	return;
419
420	// There is no advantage to doing this pre-RA.
421	if (!IsPostRA)
422	return;
423
424	if (TII->hasAnyModifiersSet(MI))
425	return;
426
427	const unsigned Opcode = MI.getOpcode();
428	MachineOperand &Src0 = *TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::src0);
429	MachineOperand &Src1 = *TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::src1);
430	MachineOperand &Src2 = *TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::src2);
431	unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
432
433	bool Swap;
434
435	// Detect "Dst = VSrc VGPR + Imm" and convert to AK form.*
436	if (Src2.isImm() && !TII->isInlineConstant(MO: Src2)) {
437	if (Src1.isReg() && TRI->isVGPR(MRI: *MRI, Reg: Src1.getReg()))
438	Swap = false;
439	else if (Src0.isReg() && TRI->isVGPR(MRI: *MRI, Reg: Src0.getReg()))
440	Swap = true;
441	else
442	return;
443
444	switch (Opcode) {
445	default:
446	llvm_unreachable("Unexpected mad/fma opcode!");
447	case AMDGPU::V_MAD_F32_e64:
448	NewOpcode = AMDGPU::V_MADAK_F32;
449	break;
450	case AMDGPU::V_FMA_F32_e64:
451	NewOpcode = AMDGPU::V_FMAAK_F32;
452	break;
453	case AMDGPU::V_MAD_F16_e64:
454	NewOpcode = AMDGPU::V_MADAK_F16;
455	break;
456	case AMDGPU::V_FMA_F16_e64:
457	case AMDGPU::V_FMA_F16_gfx9_e64:
458	NewOpcode = AMDGPU::V_FMAAK_F16;
459	break;
460	case AMDGPU::V_FMA_F16_gfx9_t16_e64:
461	NewOpcode = AMDGPU::V_FMAAK_F16_t16;
462	break;
463	case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
464	NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
465	break;
466	}
467	}
468
469	// Detect "Dst = VSrc Imm + VGPR" and convert to MK form.*
470	if (Src2.isReg() && TRI->isVGPR(MRI: *MRI, Reg: Src2.getReg())) {
471	if (Src1.isImm() && !TII->isInlineConstant(MO: Src1))
472	Swap = false;
473	else if (Src0.isImm() && !TII->isInlineConstant(MO: Src0))
474	Swap = true;
475	else
476	return;
477
478	switch (Opcode) {
479	default:
480	llvm_unreachable("Unexpected mad/fma opcode!");
481	case AMDGPU::V_MAD_F32_e64:
482	NewOpcode = AMDGPU::V_MADMK_F32;
483	break;
484	case AMDGPU::V_FMA_F32_e64:
485	NewOpcode = AMDGPU::V_FMAMK_F32;
486	break;
487	case AMDGPU::V_MAD_F16_e64:
488	NewOpcode = AMDGPU::V_MADMK_F16;
489	break;
490	case AMDGPU::V_FMA_F16_e64:
491	case AMDGPU::V_FMA_F16_gfx9_e64:
492	NewOpcode = AMDGPU::V_FMAMK_F16;
493	break;
494	case AMDGPU::V_FMA_F16_gfx9_t16_e64:
495	NewOpcode = AMDGPU::V_FMAMK_F16_t16;
496	break;
497	case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
498	NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
499	break;
500	}
501	}
502
503	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
504	return;
505
506	if (AMDGPU::isTrue16Inst(Opc: NewOpcode) && !shouldShrinkTrue16(MI))
507	return;
508
509	if (Swap) {
510	// Swap Src0 and Src1 by building a new instruction.
511	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode),
512	DestReg: MI.getOperand(i: `0`).getReg())
513	.add(MO: Src1)
514	.add(MO: Src0)
515	.add(MO: Src2)
516	.setMIFlags(MI.getFlags());
517	MI.eraseFromParent();
518	} else {
519	TII->removeModOperands(MI);
520	MI.setDesc(TII->get(Opcode: NewOpcode));
521	}
522	}
523
524	/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
525	/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
526	/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
527	/// XNOR (as a ^ b == ~(a ^ ~b)).
528	/// \returns true if the caller should continue the machine function iterator
529	bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
530	unsigned Opc = MI.getOpcode();
531	const MachineOperand *Dest = &MI.getOperand(i: `0`);
532	MachineOperand *Src0 = &MI.getOperand(i: `1`);
533	MachineOperand *Src1 = &MI.getOperand(i: `2`);
534	MachineOperand *SrcReg = Src0;
535	MachineOperand *SrcImm = Src1;
536
537	if (!SrcImm->isImm() \|\|
538	AMDGPU::isInlinableLiteral32(Literal: SrcImm->getImm(), HasInv2Pi: ST->hasInv2PiInlineImm()))
539	return false;
540
541	uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
542	uint32_t NewImm = `0`;
543
544	if (Opc == AMDGPU::S_AND_B32) {
545	if (isPowerOf2_32(Value: ~Imm)) {
546	NewImm = llvm::countr_one(Value: Imm);
547	Opc = AMDGPU::S_BITSET0_B32;
548	} else if (AMDGPU::isInlinableLiteral32(Literal: ~Imm, HasInv2Pi: ST->hasInv2PiInlineImm())) {
549	NewImm = ~Imm;
550	Opc = AMDGPU::S_ANDN2_B32;
551	}
552	} else if (Opc == AMDGPU::S_OR_B32) {
553	if (isPowerOf2_32(Value: Imm)) {
554	NewImm = llvm::countr_zero(Val: Imm);
555	Opc = AMDGPU::S_BITSET1_B32;
556	} else if (AMDGPU::isInlinableLiteral32(Literal: ~Imm, HasInv2Pi: ST->hasInv2PiInlineImm())) {
557	NewImm = ~Imm;
558	Opc = AMDGPU::S_ORN2_B32;
559	}
560	} else if (Opc == AMDGPU::S_XOR_B32) {
561	if (AMDGPU::isInlinableLiteral32(Literal: ~Imm, HasInv2Pi: ST->hasInv2PiInlineImm())) {
562	NewImm = ~Imm;
563	Opc = AMDGPU::S_XNOR_B32;
564	}
565	} else {
566	llvm_unreachable("unexpected opcode");
567	}
568
569	if (NewImm != `0`) {
570	if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
571	MRI->setRegAllocationHint(VReg: Dest->getReg(), Type: `0`, PrefReg: SrcReg->getReg());
572	MRI->setRegAllocationHint(VReg: SrcReg->getReg(), Type: `0`, PrefReg: Dest->getReg());
573	return true;
574	}
575
576	if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
577	const bool IsUndef = SrcReg->isUndef();
578	const bool IsKill = SrcReg->isKill();
579	MI.setDesc(TII->get(Opcode: Opc));
580	if (Opc == AMDGPU::S_BITSET0_B32 \|\|
581	Opc == AMDGPU::S_BITSET1_B32) {
582	Src0->ChangeToImmediate(ImmVal: NewImm);
583	// Remove the immediate and add the tied input.
584	MI.getOperand(i: `2`).ChangeToRegister(Reg: Dest->getReg(), /IsDef/ isDef: false,
585	/isImp/ false, isKill: IsKill,
586	/isDead/ false, isUndef: IsUndef);
587	MI.tieOperands(DefIdx: `0`, UseIdx: `2`);
588	} else {
589	SrcImm->setImm(NewImm);
590	}
591	}
592	}
593
594	return false;
595	}
596
597	// This is the same as MachineInstr::readsRegister/modifiesRegister except
598	// it takes subregs into account.
599	bool SIShrinkInstructions::instAccessReg(
600	iterator_range<MachineInstr::const_mop_iterator> &&R, Register Reg,
601	unsigned SubReg) const {
602	for (const MachineOperand &MO : R) {
603	if (!MO.isReg())
604	continue;
605
606	if (Reg.isPhysical() && MO.getReg().isPhysical()) {
607	if (TRI->regsOverlap(RegA: Reg, RegB: MO.getReg()))
608	return true;
609	} else if (MO.getReg() == Reg && Reg.isVirtual()) {
610	LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubIdx: SubReg) &
611	TRI->getSubRegIndexLaneMask(SubIdx: MO.getSubReg());
612	if (Overlap.any())
613	return true;
614	}
615	}
616	return false;
617	}
618
619	bool SIShrinkInstructions::instReadsReg(const MachineInstr MI, unsigned* Reg,
620	unsigned SubReg) const {
621	return instAccessReg(R: MI->uses(), Reg, SubReg);
622	}
623
624	bool SIShrinkInstructions::instModifiesReg(const MachineInstr MI, unsigned* Reg,
625	unsigned SubReg) const {
626	return instAccessReg(R: MI->defs(), Reg, SubReg);
627	}
628
629	TargetInstrInfo::RegSubRegPair
630	SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
631	unsigned I) const {
632	if (TRI->getRegSizeInBits(Reg, MRI: *MRI) != `32`) {
633	if (Reg.isPhysical()) {
634	Reg = TRI->getSubReg(Reg, Idx: TRI->getSubRegFromChannel(Channel: I));
635	} else {
636	Sub = TRI->getSubRegFromChannel(Channel: I + TRI->getChannelFromSubReg(SubReg: Sub));
637	}
638	}
639	return TargetInstrInfo::RegSubRegPair (Reg, Sub);
640	}
641
642	void SIShrinkInstructions::dropInstructionKeepingImpDefs(
643	MachineInstr &MI) const {
644	for (unsigned i = MI.getDesc().getNumOperands() +
645	MI.getDesc().implicit_uses().size() +
646	MI.getDesc().implicit_defs().size(),
647	e = MI.getNumOperands();
648	i != e; ++i) {
649	const MachineOperand &Op = MI.getOperand(i);
650	if (!Op.isDef())
651	continue;
652	BuildMI(BB&: *MI.getParent(), I: MI.getIterator(), MIMD: MI.getDebugLoc(),
653	MCID: TII->get(Opcode: AMDGPU::IMPLICIT_DEF), DestReg: Op.getReg());
654	}
655
656	MI.eraseFromParent();
657	}
658
659	// Match:
660	// mov t, x
661	// mov x, y
662	// mov y, t
663	//
664	// =>
665	//
666	// mov t, x (t is potentially dead and move eliminated)
667	// v_swap_b32 x, y
668	//
669	// Returns next valid instruction pointer if was able to create v_swap_b32.
670	//
671	// This shall not be done too early not to prevent possible folding which may
672	// remove matched moves, and this should preferably be done before RA to
673	// release saved registers and also possibly after RA which can insert copies
674	// too.
675	//
676	// This is really just a generic peephole that is not a canonical shrinking,
677	// although requirements match the pass placement and it reduces code size too.
678	MachineInstr SIShrinkInstructions::matchSwap(MachineInstr &MovT) const* {
679	assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 \|\|
680	MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 \|\|
681	MovT.getOpcode() == AMDGPU::COPY);
682
683	Register T = MovT.getOperand(i: `0`).getReg();
684	unsigned Tsub = MovT.getOperand(i: `0`).getSubReg();
685	MachineOperand &Xop = MovT.getOperand(i: `1`);
686
687	if (!Xop.isReg())
688	return nullptr;
689	Register X = Xop.getReg();
690	unsigned Xsub = Xop.getSubReg();
691
692	unsigned Size = TII->getOpSize(MI: MovT, OpNo: `0`);
693
694	// We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
695	// are not allocatble.
696	if (Size == `2` && X.isVirtual())
697	return nullptr;
698
699	if (!TRI->isVGPR(MRI: *MRI, Reg: X))
700	return nullptr;
701
702	const unsigned SearchLimit = `16`;
703	unsigned Count = `0`;
704	bool KilledT = false;
705	for (auto Iter = std::next(x: MovT.getIterator()),
706	E = MovT.getParent()->instr_end();
707	Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
708
709	MachineInstr MovY = &Iter;
710	KilledT = MovY->killsRegister(Reg: T, TRI);
711
712	if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
713	MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
714	MovY->getOpcode() != AMDGPU::COPY) \|\|
715	!MovY->getOperand(i: `1`).isReg() \|\| MovY->getOperand(i: `1`).getReg() != T \|\|
716	MovY->getOperand(i: `1`).getSubReg() != Tsub)
717	continue;
718
719	Register Y = MovY->getOperand(i: `0`).getReg();
720	unsigned Ysub = MovY->getOperand(i: `0`).getSubReg();
721
722	if (!TRI->isVGPR(MRI: *MRI, Reg: Y))
723	continue;
724
725	MachineInstr MovX = nullptr*;
726	for (auto IY = MovY->getIterator(), I = std::next(x: MovT.getIterator());
727	I != IY; ++I) {
728	if (instReadsReg(MI: &I, Reg: X, SubReg: Xsub) \|\| instModifiesReg(MI: &I, Reg: Y, SubReg: Ysub) \|\|
729	instModifiesReg(MI: &*I, Reg: T, SubReg: Tsub) \|\|
730	(MovX && instModifiesReg(MI: &*I, Reg: X, SubReg: Xsub))) {
731	MovX = nullptr;
732	break;
733	}
734	if (!instReadsReg(MI: &*I, Reg: Y, SubReg: Ysub)) {
735	if (!MovX && instModifiesReg(MI: &*I, Reg: X, SubReg: Xsub)) {
736	MovX = nullptr;
737	break;
738	}
739	continue;
740	}
741	if (MovX \|\|
742	(I ->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
743	I ->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
744	I ->getOpcode() != AMDGPU::COPY) \|\|
745	I ->getOperand(i: `0`).getReg() != X \|\|
746	I ->getOperand(i: `0`).getSubReg() != Xsub) {
747	MovX = nullptr;
748	break;
749	}
750
751	if (Size > `4` && (I ->getNumImplicitOperands() > (I ->isCopy() ? `0U` : `1U`)))
752	continue;
753
754	MovX = &*I;
755	}
756
757	if (!MovX)
758	continue;
759
760	LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << MovX << MovY);
761
762	MachineBasicBlock &MBB = *MovT.getParent();
763	SmallVector<MachineInstr *, `4`> Swaps;
764	if (Size == `2`) {
765	auto *MIB = BuildMI(BB&: MBB, I: MovX->getIterator(), MIMD: MovT.getDebugLoc(),
766	MCID: TII->get(Opcode: AMDGPU::V_SWAP_B16))
767	.addDef(RegNo: X)
768	.addDef(RegNo: Y)
769	.addReg(RegNo: Y)
770	.addReg(RegNo: X)
771	.getInstr();
772	Swaps.push_back(Elt: MIB);
773	} else {
774	assert(Size > `0` && Size % `4` == `0`);
775	for (unsigned I = `0`; I < Size / `4`; ++I) {
776	TargetInstrInfo::RegSubRegPair X1, Y1;
777	X1 = getSubRegForIndex(Reg: X, Sub: Xsub, I);
778	Y1 = getSubRegForIndex(Reg: Y, Sub: Ysub, I);
779	auto *MIB = BuildMI(BB&: MBB, I: MovX->getIterator(), MIMD: MovT.getDebugLoc(),
780	MCID: TII->get(Opcode: AMDGPU::V_SWAP_B32))
781	.addDef(RegNo: X1.Reg, Flags: `0`, SubReg: X1.SubReg)
782	.addDef(RegNo: Y1.Reg, Flags: `0`, SubReg: Y1.SubReg)
783	.addReg(RegNo: Y1.Reg, flags: `0`, SubReg: Y1.SubReg)
784	.addReg(RegNo: X1.Reg, flags: `0`, SubReg: X1.SubReg)
785	.getInstr();
786	Swaps.push_back(Elt: MIB);
787	}
788	}
789	// Drop implicit EXEC.
790	if (MovX->hasRegisterImplicitUseOperand(Reg: AMDGPU::EXEC)) {
791	for (MachineInstr *Swap : Swaps) {
792	Swap->removeOperand(OpNo: Swap->getNumExplicitOperands());
793	Swap->copyImplicitOps(MF&: MBB.getParent(), MI: MovX);
794	}
795	}
796	MovX->eraseFromParent();
797	dropInstructionKeepingImpDefs(MI&: *MovY);
798	MachineInstr Next = &std::next(x: MovT.getIterator());
799
800	if (T.isVirtual() && MRI->use_nodbg_empty(RegNo: T)) {
801	dropInstructionKeepingImpDefs(MI&: MovT);
802	} else {
803	Xop.setIsKill(false);
804	for (int I = MovT.getNumImplicitOperands() - `1`; I >= `0`; --I ) {
805	unsigned OpNo = MovT.getNumExplicitOperands() + I;
806	const MachineOperand &Op = MovT.getOperand(i: OpNo);
807	if (Op.isKill() && TRI->regsOverlap(RegA: X, RegB: Op.getReg()))
808	MovT.removeOperand(OpNo);
809	}
810	}
811
812	return Next;
813	}
814
815	return nullptr;
816	}
817
818	// If an instruction has dead sdst replace it with NULL register on gfx1030+
819	bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
820	if (!ST->hasGFX10_3Insts())
821	return false;
822
823	MachineOperand *Op = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::sdst);
824	if (!Op)
825	return false;
826	Register SDstReg = Op->getReg();
827	if (SDstReg.isPhysical() \|\| !MRI->use_nodbg_empty(RegNo: SDstReg))
828	return false;
829
830	Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
831	return true;
832	}
833
834	bool SIShrinkInstructions::run(MachineFunction &MF) {
835
836	this->MF = &MF;
837	MRI = &MF.getRegInfo();
838	ST = &MF.getSubtarget<GCNSubtarget>();
839	TII = ST->getInstrInfo();
840	TRI = &TII->getRegisterInfo();
841	IsPostRA = MF.getProperties().hasNoVRegs();
842
843	unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
844
845	for (MachineBasicBlock &MBB : MF) {
846	MachineBasicBlock::iterator I, Next;
847	for (I = MBB.begin(); I != MBB.end(); I = Next) {
848	Next = std::next(x: I);
849	MachineInstr &MI = *I;
850
851	if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
852	// If this has a literal constant source that is the same as the
853	// reversed bits of an inline immediate, replace with a bitreverse of
854	// that constant. This saves 4 bytes in the common case of materializing
855	// sign bits.
856
857	// Test if we are after regalloc. We only want to do this after any
858	// optimizations happen because this will confuse them.
859	MachineOperand &Src = MI.getOperand(i: `1`);
860	if (Src.isImm() && IsPostRA) {
861	int32_t ModImm;
862	unsigned ModOpcode =
863	canModifyToInlineImmOp32(TII, Src, ModifiedImm&: ModImm, /Scalar=/false);
864	if (ModOpcode != `0`) {
865	MI.setDesc(TII->get(Opcode: ModOpcode));
866	Src.setImm(static_cast<int64_t>(ModImm));
867	continue;
868	}
869	}
870	}
871
872	if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 \|\|
873	MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 \|\|
874	MI.getOpcode() == AMDGPU::COPY)) {
875	if (auto *NextMI = matchSwap(MovT&: MI)) {
876	Next = NextMI->getIterator();
877	continue;
878	}
879	}
880
881	// Try to use S_ADDK_I32 and S_MULK_I32.
882	if (MI.getOpcode() == AMDGPU::S_ADD_I32 \|\|
883	MI.getOpcode() == AMDGPU::S_MUL_I32) {
884	const MachineOperand *Dest = &MI.getOperand(i: `0`);
885	MachineOperand *Src0 = &MI.getOperand(i: `1`);
886	MachineOperand *Src1 = &MI.getOperand(i: `2`);
887
888	if (!Src0->isReg() && Src1->isReg()) {
889	if (TII->commuteInstruction(MI, NewMI: false, OpIdx1: `1`, OpIdx2: `2`))
890	std::swap(a&: Src0, b&: Src1);
891	}
892
893	// FIXME: This could work better if hints worked with subregisters. If
894	// we have a vector add of a constant, we usually don't get the correct
895	// allocation due to the subregister usage.
896	if (Dest->getReg().isVirtual() && Src0->isReg()) {
897	MRI->setRegAllocationHint(VReg: Dest->getReg(), Type: `0`, PrefReg: Src0->getReg());
898	MRI->setRegAllocationHint(VReg: Src0->getReg(), Type: `0`, PrefReg: Dest->getReg());
899	continue;
900	}
901
902	if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
903	if (Src1->isImm() && isKImmOperand(Src: *Src1)) {
904	unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
905	AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
906
907	Src1->setImm(SignExtend64(X: Src1->getImm(), B: `32`));
908	MI.setDesc(TII->get(Opcode: Opc));
909	MI.tieOperands(DefIdx: `0`, UseIdx: `1`);
910	}
911	}
912	}
913
914	// Try to use s_cmpk_*
915	if (MI.isCompare() && TII->isSOPC(MI)) {
916	shrinkScalarCompare(MI);
917	continue;
918	}
919
920	// Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
921	if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
922	const MachineOperand &Dst = MI.getOperand(i: `0`);
923	MachineOperand &Src = MI.getOperand(i: `1`);
924
925	if (Src.isImm() && Dst.getReg().isPhysical()) {
926	unsigned ModOpc;
927	int32_t ModImm;
928	if (isKImmOperand(Src)) {
929	MI.setDesc(TII->get(Opcode: AMDGPU::S_MOVK_I32));
930	Src.setImm(SignExtend64(X: Src.getImm(), B: `32`));
931	} else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModifiedImm&: ModImm,
932	/Scalar=/true))) {
933	MI.setDesc(TII->get(Opcode: ModOpc));
934	Src.setImm(static_cast<int64_t>(ModImm));
935	}
936	}
937
938	continue;
939	}
940
941	// Shrink scalar logic operations.
942	if (MI.getOpcode() == AMDGPU::S_AND_B32 \|\|
943	MI.getOpcode() == AMDGPU::S_OR_B32 \|\|
944	MI.getOpcode() == AMDGPU::S_XOR_B32) {
945	if (shrinkScalarLogicOp(MI))
946	continue;
947	}
948
949	if (IsPostRA && TII->isMIMG(Opcode: MI.getOpcode()) &&
950	ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
951	shrinkMIMG(MI);
952	continue;
953	}
954
955	if (!TII->isVOP3(MI))
956	continue;
957
958	if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 \|\|
959	MI.getOpcode() == AMDGPU::V_FMA_F32_e64 \|\|
960	MI.getOpcode() == AMDGPU::V_MAD_F16_e64 \|\|
961	MI.getOpcode() == AMDGPU::V_FMA_F16_e64 \|\|
962	MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 \|\|
963	MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 \|\|
964	MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
965	shrinkMadFma(MI);
966	continue;
967	}
968
969	// If there is no chance we will shrink it and use VCC as sdst to get
970	// a 32 bit form try to replace dead sdst with NULL.
971	if (TII->isVOP3(Opcode: MI.getOpcode())) {
972	tryReplaceDeadSDST(MI);
973	if (!TII->hasVALU32BitEncoding(Opcode: MI.getOpcode())) {
974	continue;
975	}
976	}
977
978	if (!TII->canShrink(MI, MRI: *MRI)) {
979	// Try commuting the instruction and see if that enables us to shrink
980	// it.
981	if (!MI.isCommutable() \|\| !TII->commuteInstruction(MI) \|\|
982	!TII->canShrink(MI, MRI: *MRI)) {
983	tryReplaceDeadSDST(MI);
984	continue;
985	}
986	}
987
988	int Op32 = AMDGPU::getVOPe32(Opcode: MI.getOpcode());
989
990	if (TII->isVOPC(Opcode: Op32)) {
991	MachineOperand &Op0 = MI.getOperand(i: `0`);
992	if (Op0.isReg()) {
993	// Exclude VOPCX instructions as these don't explicitly write a
994	// dst.
995	Register DstReg = Op0.getReg();
996	if (DstReg.isVirtual()) {
997	// VOPC instructions can only write to the VCC register. We can't
998	// force them to use VCC here, because this is only one register and
999	// cannot deal with sequences which would require multiple copies of
1000	// VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
1001	//
1002	// So, instead of forcing the instruction to write to VCC, we
1003	// provide a hint to the register allocator to use VCC and then we
1004	// will run this pass again after RA and shrink it if it outputs to
1005	// VCC.
1006	MRI->setRegAllocationHint(VReg: DstReg, Type: `0`, PrefReg: VCCReg);
1007	continue;
1008	}
1009	if (DstReg != VCCReg)
1010	continue;
1011	}
1012	}
1013
1014	if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1015	// We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
1016	// instructions.
1017	const MachineOperand *Src2 =
1018	TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::src2);
1019	if (!Src2->isReg())
1020	continue;
1021	Register SReg = Src2->getReg();
1022	if (SReg.isVirtual()) {
1023	MRI->setRegAllocationHint(VReg: SReg, Type: `0`, PrefReg: VCCReg);
1024	continue;
1025	}
1026	if (SReg != VCCReg)
1027	continue;
1028	}
1029
1030	// Check for the bool flag output for instructions like V_ADD_I32_e64.
1031	const MachineOperand *SDst = TII->getNamedOperand(MI,
1032	OperandName: AMDGPU::OpName::sdst);
1033
1034	if (SDst) {
1035	bool Next = false;
1036
1037	if (SDst->getReg() != VCCReg) {
1038	if (SDst->getReg().isVirtual())
1039	MRI->setRegAllocationHint(VReg: SDst->getReg(), Type: `0`, PrefReg: VCCReg);
1040	Next = true;
1041	}
1042
1043	// All of the instructions with carry outs also have an SGPR input in
1044	// src2.
1045	const MachineOperand *Src2 = TII->getNamedOperand(MI,
1046	OperandName: AMDGPU::OpName::src2);
1047	if (Src2 && Src2->getReg() != VCCReg) {
1048	if (Src2->getReg().isVirtual())
1049	MRI->setRegAllocationHint(VReg: Src2->getReg(), Type: `0`, PrefReg: VCCReg);
1050	Next = true;
1051	}
1052
1053	if (Next)
1054	continue;
1055	}
1056
1057	// Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1058	// fold an immediate into the shrunk instruction as a literal operand. In
1059	// GFX10 VOP3 instructions can take a literal operand anyway, so there is
1060	// no advantage to doing this.
1061	if (ST->hasVOP3Literal() && !IsPostRA)
1062	continue;
1063
1064	if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(Opc: MI.getOpcode()) &&
1065	!shouldShrinkTrue16(MI))
1066	continue;
1067
1068	// We can shrink this instruction
1069	LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1070
1071	MachineInstr *Inst32 = TII->buildShrunkInst(MI, NewOpcode: Op32);
1072	++NumInstructionsShrunk;
1073
1074	// Copy extra operands not present in the instruction definition.
1075	copyExtraImplicitOps(NewMI&: *Inst32, MI);
1076
1077	// Copy deadness from the old explicit vcc def to the new implicit def.
1078	if (SDst && SDst->isDead())
1079	Inst32->findRegisterDefOperand(Reg: VCCReg, /TRI=/nullptr)->setIsDead();
1080
1081	MI.eraseFromParent();
1082	foldImmediates(MI&: *Inst32);
1083
1084	LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << `'\n'`);
1085	}
1086	}
1087	return false;
1088	}
1089
1090	bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
1091	if (skipFunction(F: MF.getFunction()))
1092	return false;
1093
1094	return SIShrinkInstructions ().run(MF);
1095	}
1096
1097	PreservedAnalyses
1098	SIShrinkInstructionsPass::run(MachineFunction &MF,
1099	MachineFunctionAnalysisManager &) {
1100	if (MF.getFunction().hasOptNone() \|\| !SIShrinkInstructions ().run(MF))
1101	return PreservedAnalyses::all();
1102
1103	auto PA = getMachineFunctionPassPreservedAnalyses();
1104	PA.preserveSet<CFGAnalyses>();
1105	return PA;
1106	}
1107

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp