GCNVOPDUtils.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp]

1	//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file This file contains the AMDGPU DAG scheduling
10	/// mutation to pair VOPD instructions back to back. It also contains
11	// subroutines useful in the creation of VOPD instructions
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "GCNVOPDUtils.h"
16	#include "AMDGPUSubtarget.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "SIInstrInfo.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "llvm/ADT/STLExtras.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/CodeGen/MachineBasicBlock.h"
24	#include "llvm/CodeGen/MachineInstr.h"
25	#include "llvm/CodeGen/MachineOperand.h"
26	#include "llvm/CodeGen/MachineRegisterInfo.h"
27	#include "llvm/CodeGen/MacroFusion.h"
28	#include "llvm/CodeGen/ScheduleDAG.h"
29	#include "llvm/CodeGen/ScheduleDAGMutation.h"
30	#include "llvm/CodeGen/TargetInstrInfo.h"
31	#include "llvm/MC/MCInst.h"
32
33	using namespace llvm;
34
35	#define DEBUG_TYPE "gcn-vopd-utils"
36
37	bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38	const MachineInstr &MIX,
39	const MachineInstr &MIY, bool IsVOPD3) {
40	namespace VOPD = AMDGPU::VOPD;
41
42	const MachineFunction *MF = MIX.getMF();
43	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
45	if (IsVOPD3 && !ST.hasVOPD3())
46	return false;
47	if (!IsVOPD3 && (TII.isVOP3(MI: MIX) \|\| TII.isVOP3(MI: MIY)))
48	return false;
49	if (TII.isDPP(MI: MIX) \|\| TII.isDPP(MI: MIY))
50	return false;
51
52	const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(Val: ST.getRegisterInfo());
53	const MachineRegisterInfo &MRI = MF->getRegInfo();
54	// Literals also count against scalar bus limit
55	SmallVector<const MachineOperand *> UniqueLiterals;
56	auto addLiteral = [&](const MachineOperand &Op) {
57	for (auto &Literal : UniqueLiterals) {
58	if (Literal->isIdenticalTo(Other: Op))
59	return;
60	}
61	UniqueLiterals.push_back(Elt: &Op);
62	};
63	SmallVector<Register> UniqueScalarRegs;
64
65	// MIX must not modify any registers used by MIY.
66	for (const auto &Use : MIY.uses())
67	if (Use.isReg() && MIX.modifiesRegister(Reg: Use.getReg(), TRI))
68	return false;
69
70	auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
71	const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
72	const MachineOperand &Operand = MI.getOperand(i: OperandIdx);
73	if (Operand.isReg() && TRI->isVectorRegister(MRI, Reg: Operand.getReg()))
74	return Operand.getReg();
75	return Register ();
76	};
77
78	auto InstInfo = AMDGPU::getVOPDInstInfo(OpX: MIX.getDesc(), OpY: MIY.getDesc());
79
80	for (auto CompIdx : VOPD::COMPONENTS) {
81	const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
82
83	const MachineOperand &Src0 = *TII.getNamedOperand(MI, OperandName: AMDGPU::OpName::src0);
84	if (Src0.isReg()) {
85	if (!TRI->isVectorRegister(MRI, Reg: Src0.getReg())) {
86	if (!is_contained(Range&: UniqueScalarRegs, Element: Src0.getReg()))
87	UniqueScalarRegs.push_back(Elt: Src0.getReg());
88	}
89	} else if (!TII.isInlineConstant(MO: Src0)) {
90	if (IsVOPD3)
91	return false;
92	addLiteral (Src0);
93	}
94
95	if (InstInfo [CompIdx].hasMandatoryLiteral()) {
96	if (IsVOPD3)
97	return false;
98
99	auto CompOprIdx = InstInfo [CompIdx].getMandatoryLiteralCompOperandIndex();
100	addLiteral (MI.getOperand(i: CompOprIdx));
101	}
102	if (MI.getDesc().hasImplicitUseOfPhysReg(Reg: AMDGPU::VCC))
103	UniqueScalarRegs.push_back(Elt: AMDGPU::VCC_LO);
104
105	if (IsVOPD3) {
106	for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
107	const MachineOperand *Src = TII.getNamedOperand(MI, OperandName: OpName);
108	if (!Src)
109	continue;
110	if (OpName == AMDGPU::OpName::src2) {
111	if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3))
112	continue;
113	if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
114	UniqueScalarRegs.push_back(Elt: Src->getReg());
115	continue;
116	}
117	}
118	if (!Src->isReg() \|\| !TRI->isVGPR(MRI, Reg: Src->getReg()))
119	return false;
120	}
121
122	for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
123	AMDGPU::OpName::op_sel}) {
124	if (TII.hasModifiersSet(MI, OpName))
125	return false;
126	}
127
128	// Neg is allowed, other modifiers are not. NB: even though sext has the
129	// same value as neg, there are no combinable instructions with sext.
130	for (auto OpName :
131	{AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
132	AMDGPU::OpName::src2_modifiers}) {
133	const MachineOperand *Mods = TII.getNamedOperand(MI, OperandName: OpName);
134	if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
135	return false;
136	}
137	}
138	}
139
140	if (UniqueLiterals.size() > `1`)
141	return false;
142	if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > `2`)
143	return false;
144
145	// On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
146	// source-cache.
147	bool SkipSrc = (ST.hasGFX11_7Insts() \|\| ST.hasGFX12Insts()) &&
148	MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
149	MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
150	bool AllowSameVGPR = ST.hasGFX1250Insts();
151
152	if (InstInfo.hasInvalidOperand(GetRegIdx: getVRegIdx, MRI: *TRI, SkipSrc, AllowSameVGPR,
153	VOPD3: IsVOPD3))
154	return false;
155
156	if (IsVOPD3) {
157	// BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
158	// MIX check is only relevant to scheduling?
159	if (AMDGPU::hasNamedOperand(Opcode: MIX.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3)) {
160	const MachineOperand &Src2 =
161	*TII.getNamedOperand(MI: MIX, OperandName: AMDGPU::OpName::src2);
162	if (!Src2.isImm() \|\| Src2.getImm())
163	return false;
164	}
165	if (AMDGPU::hasNamedOperand(Opcode: MIY.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3)) {
166	const MachineOperand &Src2 =
167	*TII.getNamedOperand(MI: MIY, OperandName: AMDGPU::OpName::src2);
168	if (!Src2.isImm() \|\| Src2.getImm())
169	return false;
170	}
171	}
172
173	LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
174	<< "\n\tY: " << MIY << "\n");
175	return true;
176	}
177
178	/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
179	/// together. Given SecondMI, when FirstMI is unspecified, then check if
180	/// SecondMI may be part of a fused pair at all.
181	static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
182	const TargetSubtargetInfo &TSI,
183	const MachineInstr *FirstMI,
184	const MachineInstr &SecondMI) {
185	const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
186	const GCNSubtarget &ST = STII.getSubtarget();
187	unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
188	unsigned Opc2 = SecondMI.getOpcode();
189
190	const auto checkVOPD = [&](bool VOPD3) -> bool {
191	auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc: Opc2, EncodingFamily, VOPD3);
192
193	// One instruction case
194	if (!FirstMI)
195	return SecondCanBeVOPD.Y \|\| SecondCanBeVOPD.X;
196
197	unsigned Opc = FirstMI->getOpcode();
198	auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
199
200	if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) \|\|
201	(FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
202	return false;
203
204	#ifdef EXPENSIVE_CHECKS
205	assert([&]() -> bool {
206	for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
207	MII != FirstMI->getParent()->instr_end(); ++MII) {
208	if (&*MII == &SecondMI)
209	return true;
210	}
211	return false;
212	}() && "Expected FirstMI to precede SecondMI");
213	#endif
214
215	return checkVOPDRegConstraints(TII: STII, MIX: *FirstMI, MIY: SecondMI, IsVOPD3: VOPD3);
216	};
217
218	return checkVOPD (false) \|\| (ST.hasVOPD3() && checkVOPD (true));
219	}
220
221	namespace {
222	/// Adapts design from MacroFusion
223	/// Puts valid candidate instructions back-to-back so they can easily
224	/// be turned into VOPD instructions
225	/// Greedily pairs instruction candidates. O(n^2) algorithm.
226	struct VOPDPairingMutation : ScheduleDAGMutation {
227	MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
228
229	VOPDPairingMutation(
230	MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
231	: shouldScheduleAdjacent(shouldScheduleAdjacent) {}
232
233	void apply(ScheduleDAGInstrs *DAG) override {
234	const TargetInstrInfo &TII = *DAG->TII;
235	const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
236	if (!AMDGPU::hasVOPD(STI: ST) \|\| !ST.isWave32()) {
237	LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
238	return;
239	}
240
241	std::vector<SUnit>::iterator ISUI, JSUI;
242	for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
243	const MachineInstr *IMI = ISUI ->getInstr();
244	if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
245	continue;
246	if (!hasLessThanNumFused(SU: *ISUI, FuseLimit: `2`))
247	continue;
248
249	for (JSUI = ISUI + `1`; JSUI != DAG->SUnits.end(); ++JSUI) {
250	if (JSUI ->isBoundaryNode())
251	continue;
252	const MachineInstr *JMI = JSUI ->getInstr();
253	if (!hasLessThanNumFused(SU: *JSUI, FuseLimit: `2`) \|\|
254	!shouldScheduleAdjacent(TII, ST, IMI, *JMI))
255	continue;
256	if (fuseInstructionPair(DAG&: DAG, FirstSU&: ISUI, SecondSU&: *JSUI))
257	break;
258	}
259	}
260	LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
261	}
262	};
263	} // namespace
264
265	std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
266	return std::make_unique<VOPDPairingMutation>(args&: shouldScheduleVOPDAdjacent);
267	}
268

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp