1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIInstrInfo.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineOperand.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/MacroFusion.h"
28#include "llvm/CodeGen/ScheduleDAG.h"
29#include "llvm/CodeGen/ScheduleDAGMutation.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
37bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38 const MachineInstr &MIX,
39 const MachineInstr &MIY, bool IsVOPD3) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = MIX.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
45 if (IsVOPD3 && !ST.hasVOPD3())
46 return false;
47 if (!IsVOPD3 && (TII.isVOP3(MI: MIX) || TII.isVOP3(MI: MIY)))
48 return false;
49 if (TII.isDPP(MI: MIX) || TII.isDPP(MI: MIY))
50 return false;
51
52 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(Val: ST.getRegisterInfo());
53 const MachineRegisterInfo &MRI = MF->getRegInfo();
54 // Literals also count against scalar bus limit
55 SmallVector<const MachineOperand *> UniqueLiterals;
56 auto addLiteral = [&](const MachineOperand &Op) {
57 for (auto &Literal : UniqueLiterals) {
58 if (Literal->isIdenticalTo(Other: Op))
59 return;
60 }
61 UniqueLiterals.push_back(Elt: &Op);
62 };
63 SmallVector<Register> UniqueScalarRegs;
64
65 // MIX must not modify any registers used by MIY.
66 for (const auto &Use : MIY.uses())
67 if (Use.isReg() && MIX.modifiesRegister(Reg: Use.getReg(), TRI))
68 return false;
69
70 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
71 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
72 const MachineOperand &Operand = MI.getOperand(i: OperandIdx);
73 if (Operand.isReg() && TRI->isVectorRegister(MRI, Reg: Operand.getReg()))
74 return Operand.getReg();
75 return Register();
76 };
77
78 auto InstInfo = AMDGPU::getVOPDInstInfo(OpX: MIX.getDesc(), OpY: MIY.getDesc());
79
80 for (auto CompIdx : VOPD::COMPONENTS) {
81 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
82
83 const MachineOperand &Src0 = *TII.getNamedOperand(MI, OperandName: AMDGPU::OpName::src0);
84 if (Src0.isReg()) {
85 if (!TRI->isVectorRegister(MRI, Reg: Src0.getReg())) {
86 if (!is_contained(Range&: UniqueScalarRegs, Element: Src0.getReg()))
87 UniqueScalarRegs.push_back(Elt: Src0.getReg());
88 }
89 } else if (!TII.isInlineConstant(MO: Src0)) {
90 if (IsVOPD3)
91 return false;
92 addLiteral(Src0);
93 }
94
95 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
96 if (IsVOPD3)
97 return false;
98
99 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
100 addLiteral(MI.getOperand(i: CompOprIdx));
101 }
102 if (MI.getDesc().hasImplicitUseOfPhysReg(Reg: AMDGPU::VCC))
103 UniqueScalarRegs.push_back(Elt: AMDGPU::VCC_LO);
104
105 if (IsVOPD3) {
106 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
107 const MachineOperand *Src = TII.getNamedOperand(MI, OperandName: OpName);
108 if (!Src)
109 continue;
110 if (OpName == AMDGPU::OpName::src2) {
111 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3))
112 continue;
113 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
114 UniqueScalarRegs.push_back(Elt: Src->getReg());
115 continue;
116 }
117 }
118 if (!Src->isReg() || !TRI->isVGPR(MRI, Reg: Src->getReg()))
119 return false;
120 }
121
122 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
123 AMDGPU::OpName::op_sel}) {
124 if (TII.hasModifiersSet(MI, OpName))
125 return false;
126 }
127
128 // Neg is allowed, other modifiers are not. NB: even though sext has the
129 // same value as neg, there are no combinable instructions with sext.
130 for (auto OpName :
131 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
132 AMDGPU::OpName::src2_modifiers}) {
133 const MachineOperand *Mods = TII.getNamedOperand(MI, OperandName: OpName);
134 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
135 return false;
136 }
137 }
138 }
139
140 if (UniqueLiterals.size() > 1)
141 return false;
142 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
143 return false;
144
145 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
146 // source-cache.
147 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
148 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
149 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
150 bool AllowSameVGPR = ST.hasGFX1250Insts();
151
152 if (InstInfo.hasInvalidOperand(GetRegIdx: getVRegIdx, MRI: *TRI, SkipSrc, AllowSameVGPR,
153 VOPD3: IsVOPD3))
154 return false;
155
156 if (IsVOPD3) {
157 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
158 // MIX check is only relevant to scheduling?
159 if (AMDGPU::hasNamedOperand(Opcode: MIX.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3)) {
160 const MachineOperand &Src2 =
161 *TII.getNamedOperand(MI: MIX, OperandName: AMDGPU::OpName::src2);
162 if (!Src2.isImm() || Src2.getImm())
163 return false;
164 }
165 if (AMDGPU::hasNamedOperand(Opcode: MIY.getOpcode(), NamedIdx: AMDGPU::OpName::bitop3)) {
166 const MachineOperand &Src2 =
167 *TII.getNamedOperand(MI: MIY, OperandName: AMDGPU::OpName::src2);
168 if (!Src2.isImm() || Src2.getImm())
169 return false;
170 }
171 }
172
173 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
174 << "\n\tY: " << MIY << "\n");
175 return true;
176}
177
178/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
179/// together. Given SecondMI, when FirstMI is unspecified, then check if
180/// SecondMI may be part of a fused pair at all.
181static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
182 const TargetSubtargetInfo &TSI,
183 const MachineInstr *FirstMI,
184 const MachineInstr &SecondMI) {
185 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
186 const GCNSubtarget &ST = STII.getSubtarget();
187 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
188 unsigned Opc2 = SecondMI.getOpcode();
189
190 const auto checkVOPD = [&](bool VOPD3) -> bool {
191 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc: Opc2, EncodingFamily, VOPD3);
192
193 // One instruction case
194 if (!FirstMI)
195 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
196
197 unsigned Opc = FirstMI->getOpcode();
198 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
199
200 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
201 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
202 return false;
203
204 assert([&]() -> bool {
205 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
206 MII != FirstMI->getParent()->instr_end(); ++MII) {
207 if (&*MII == &SecondMI)
208 return true;
209 }
210 return false;
211 }() && "Expected FirstMI to precede SecondMI");
212
213 return checkVOPDRegConstraints(TII: STII, MIX: *FirstMI, MIY: SecondMI, IsVOPD3: VOPD3);
214 };
215
216 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
217}
218
219namespace {
220/// Adapts design from MacroFusion
221/// Puts valid candidate instructions back-to-back so they can easily
222/// be turned into VOPD instructions
223/// Greedily pairs instruction candidates. O(n^2) algorithm.
224struct VOPDPairingMutation : ScheduleDAGMutation {
225 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
226
227 VOPDPairingMutation(
228 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
229 : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
230
231 void apply(ScheduleDAGInstrs *DAG) override {
232 const TargetInstrInfo &TII = *DAG->TII;
233 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
234 if (!AMDGPU::hasVOPD(STI: ST) || !ST.isWave32()) {
235 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
236 return;
237 }
238
239 std::vector<SUnit>::iterator ISUI, JSUI;
240 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
241 const MachineInstr *IMI = ISUI->getInstr();
242 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
243 continue;
244 if (!hasLessThanNumFused(SU: *ISUI, FuseLimit: 2))
245 continue;
246
247 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
248 if (JSUI->isBoundaryNode())
249 continue;
250 const MachineInstr *JMI = JSUI->getInstr();
251 if (!hasLessThanNumFused(SU: *JSUI, FuseLimit: 2) ||
252 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
253 continue;
254 if (fuseInstructionPair(DAG&: *DAG, FirstSU&: *ISUI, SecondSU&: *JSUI))
255 break;
256 }
257 }
258 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
259 }
260};
261} // namespace
262
263std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
264 return std::make_unique<VOPDPairingMutation>(args&: shouldScheduleVOPDAdjacent);
265}
266