1//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Combine VALU pairs into VOPD instructions
11/// Only works on wave32
12/// Has register requirements, we reject creating VOPD if the requirements are
13/// not met.
14/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
15/// instructions for VOPD back-to-back
16///
17//
18//===----------------------------------------------------------------------===//
19
20#include "AMDGPU.h"
21#include "GCNSubtarget.h"
22#include "GCNVOPDUtils.h"
23#include "SIInstrInfo.h"
24#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/MachinePassManager.h"
31#include "llvm/Support/Debug.h"
32
33#define DEBUG_TYPE "gcn-create-vopd"
34STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
35
36using namespace llvm;
37
38namespace {
39
40class GCNCreateVOPD {
41private:
42 class VOPDCombineInfo {
43 public:
44 VOPDCombineInfo() = default;
45 VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
46 bool VOPD3 = false)
47 : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
48
49 MachineInstr *FirstMI;
50 MachineInstr *SecondMI;
51 bool IsVOPD3;
52 };
53
54public:
55 const GCNSubtarget *ST = nullptr;
56
57 bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
58 auto *FirstMI = CI.FirstMI;
59 auto *SecondMI = CI.SecondMI;
60 unsigned Opc1 = FirstMI->getOpcode();
61 unsigned Opc2 = SecondMI->getOpcode();
62 unsigned EncodingFamily =
63 AMDGPU::getVOPDEncodingFamily(ST: SII->getSubtarget());
64 int NewOpcode = AMDGPU::getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc: Opc1, VOPD3: CI.IsVOPD3),
65 OpY: AMDGPU::getVOPDOpcode(Opc: Opc2, VOPD3: CI.IsVOPD3),
66 EncodingFamily, VOPD3: CI.IsVOPD3);
67 assert(NewOpcode != -1 &&
68 "Should have previously determined this as a possible VOPD\n");
69
70 auto VOPDInst = BuildMI(BB&: *FirstMI->getParent(), I: FirstMI,
71 MIMD: FirstMI->getDebugLoc(), MCID: SII->get(Opcode: NewOpcode))
72 .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
73
74 namespace VOPD = AMDGPU::VOPD;
75 MachineInstr *MI[] = {FirstMI, SecondMI};
76 auto InstInfo =
77 AMDGPU::getVOPDInstInfo(OpX: FirstMI->getDesc(), OpY: SecondMI->getDesc());
78
79 for (auto CompIdx : VOPD::COMPONENTS) {
80 auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
81 VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx));
82 }
83
84 const AMDGPU::OpName Mods[2][3] = {
85 {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
86 AMDGPU::OpName::vsrc2X_modifiers},
87 {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
88 AMDGPU::OpName::vsrc2Y_modifiers}};
89 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
90 AMDGPU::OpName::src1_modifiers,
91 AMDGPU::OpName::src2_modifiers};
92 const unsigned VOPDOpc = VOPDInst->getOpcode();
93
94 for (auto CompIdx : VOPD::COMPONENTS) {
95 auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
96 bool IsVOP3 = SII->isVOP3(MI: *MI[CompIdx]);
97 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
98 if (AMDGPU::hasNamedOperand(Opcode: VOPDOpc, NamedIdx: Mods[CompIdx][CompSrcIdx])) {
99 const MachineOperand *Mod =
100 SII->getNamedOperand(MI&: *MI[CompIdx], OperandName: SrcMods[CompSrcIdx]);
101 VOPDInst.addImm(Val: Mod ? Mod->getImm() : 0);
102 }
103 auto MCOprIdx =
104 InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3: IsVOP3);
105 VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx));
106 }
107 if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
108 VOPDInst.addReg(RegNo: AMDGPU::VCC_LO);
109 }
110
111 if (CI.IsVOPD3) {
112 if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc: Opc2))
113 VOPDInst.addImm(Val: BitOp2);
114 }
115
116 SII->fixImplicitOperands(MI&: *VOPDInst);
117 for (auto CompIdx : VOPD::COMPONENTS)
118 VOPDInst.copyImplicitOps(OtherMI: *MI[CompIdx]);
119
120 LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
121 << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
122
123 for (auto CompIdx : VOPD::COMPONENTS)
124 MI[CompIdx]->eraseFromParent();
125
126 ++NumVOPDCreated;
127 return true;
128 }
129
130 bool run(MachineFunction &MF) {
131 ST = &MF.getSubtarget<GCNSubtarget>();
132 if (!AMDGPU::hasVOPD(STI: *ST) || !ST->isWave32())
133 return false;
134 LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
135
136 const SIInstrInfo *SII = ST->getInstrInfo();
137 const SIRegisterInfo *TRI = ST->getRegisterInfo();
138 bool Changed = false;
139 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: *ST);
140 bool HasVOPD3 = ST->hasVOPD3();
141
142 SmallVector<VOPDCombineInfo> ReplaceCandidates;
143
144 for (auto &MBB : MF) {
145 auto MII = MBB.begin(), E = MBB.end();
146 while (MII != E) {
147 auto *FirstMI = &*MII;
148 MII = next_nodbg(It: MII, End: MBB.end());
149 if (MII == MBB.end())
150 break;
151 if (FirstMI->isDebugInstr())
152 continue;
153 auto *SecondMI = &*MII;
154 unsigned Opc = FirstMI->getOpcode();
155 unsigned Opc2 = SecondMI->getOpcode();
156 VOPDCombineInfo CI;
157
158 const auto checkVOPD = [&](bool VOPD3) -> bool {
159 llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
160 AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
161 llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
162 AMDGPU::getCanBeVOPD(Opc: Opc2, EncodingFamily, VOPD3);
163
164 if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y &&
165 llvm::checkVOPDRegConstraints(TII: *SII, FirstMI: *FirstMI, SecondMI: *SecondMI, IsVOPD3: VOPD3)) {
166 CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
167 return true;
168 }
169 // We can try swapping the order of the instructions, but in that case
170 // neither instruction can write to a register the other reads from.
171 // OpX cannot write something OpY reads because that is the hardware
172 // rule, and OpY cannot write what OpX reads because that would
173 // violate the data dependency in the original order.
174 for (const auto &Use : SecondMI->uses())
175 if (Use.isReg() && FirstMI->modifiesRegister(Reg: Use.getReg(), TRI))
176 return false;
177 if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X &&
178 llvm::checkVOPDRegConstraints(TII: *SII, FirstMI: *SecondMI, SecondMI: *FirstMI, IsVOPD3: VOPD3)) {
179 CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
180 return true;
181 }
182 return false;
183 };
184
185 if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
186 ReplaceCandidates.push_back(Elt: CI);
187 ++MII;
188 }
189 }
190 }
191 for (auto &CI : ReplaceCandidates) {
192 Changed |= doReplace(SII, CI);
193 }
194
195 return Changed;
196 }
197};
198
199class GCNCreateVOPDLegacy : public MachineFunctionPass {
200public:
201 static char ID;
202 GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
203
204 void getAnalysisUsage(AnalysisUsage &AU) const override {
205 AU.setPreservesCFG();
206 MachineFunctionPass::getAnalysisUsage(AU);
207 }
208
209 StringRef getPassName() const override {
210 return "GCN Create VOPD Instructions";
211 }
212 bool runOnMachineFunction(MachineFunction &MF) override {
213 if (skipFunction(F: MF.getFunction()))
214 return false;
215
216 return GCNCreateVOPD().run(MF);
217 }
218};
219
220} // namespace
221
222PreservedAnalyses
223llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
224 MachineFunctionAnalysisManager &AM) {
225 if (!GCNCreateVOPD().run(MF))
226 return PreservedAnalyses::all();
227 return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
228}
229
230char GCNCreateVOPDLegacy::ID = 0;
231
232char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
233
234INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
235 false, false)
236