| 1 | //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This file contains the AMDGPU DAG scheduling |
| 10 | /// mutation to pair VOPD instructions back to back. It also contains |
| 11 | // subroutines useful in the creation of VOPD instructions |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "GCNVOPDUtils.h" |
| 16 | #include "AMDGPUSubtarget.h" |
| 17 | #include "GCNSubtarget.h" |
| 18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 19 | #include "SIInstrInfo.h" |
| 20 | #include "Utils/AMDGPUBaseInfo.h" |
| 21 | #include "llvm/ADT/STLExtras.h" |
| 22 | #include "llvm/ADT/SmallVector.h" |
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 24 | #include "llvm/CodeGen/MachineInstr.h" |
| 25 | #include "llvm/CodeGen/MachineOperand.h" |
| 26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 27 | #include "llvm/CodeGen/MacroFusion.h" |
| 28 | #include "llvm/CodeGen/ScheduleDAG.h" |
| 29 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
| 30 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 31 | #include "llvm/MC/MCInst.h" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | #define DEBUG_TYPE "gcn-vopd-utils" |
| 36 | |
| 37 | bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, |
| 38 | const MachineInstr &FirstMI, |
| 39 | const MachineInstr &SecondMI) { |
| 40 | namespace VOPD = AMDGPU::VOPD; |
| 41 | |
| 42 | const MachineFunction *MF = FirstMI.getMF(); |
| 43 | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
| 44 | const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(Val: ST.getRegisterInfo()); |
| 45 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 46 | // Literals also count against scalar bus limit |
| 47 | SmallVector<const MachineOperand *> UniqueLiterals; |
| 48 | auto addLiteral = [&](const MachineOperand &Op) { |
| 49 | for (auto &Literal : UniqueLiterals) { |
| 50 | if (Literal->isIdenticalTo(Other: Op)) |
| 51 | return; |
| 52 | } |
| 53 | UniqueLiterals.push_back(Elt: &Op); |
| 54 | }; |
| 55 | SmallVector<Register> UniqueScalarRegs; |
| 56 | assert([&]() -> bool { |
| 57 | for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); |
| 58 | MII != FirstMI.getParent()->instr_end(); ++MII) { |
| 59 | if (&*MII == &SecondMI) |
| 60 | return true; |
| 61 | } |
| 62 | return false; |
| 63 | }() && "Expected FirstMI to precede SecondMI" ); |
| 64 | // Cannot pair dependent instructions |
| 65 | for (const auto &Use : SecondMI.uses()) |
| 66 | if (Use.isReg() && FirstMI.modifiesRegister(Reg: Use.getReg(), TRI)) |
| 67 | return false; |
| 68 | |
| 69 | auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { |
| 70 | const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; |
| 71 | const MachineOperand &Operand = MI.getOperand(i: OperandIdx); |
| 72 | if (Operand.isReg() && TRI->isVectorRegister(MRI, Reg: Operand.getReg())) |
| 73 | return Operand.getReg(); |
| 74 | return Register(); |
| 75 | }; |
| 76 | |
| 77 | auto InstInfo = |
| 78 | AMDGPU::getVOPDInstInfo(OpX: FirstMI.getDesc(), OpY: SecondMI.getDesc()); |
| 79 | |
| 80 | for (auto CompIdx : VOPD::COMPONENTS) { |
| 81 | const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; |
| 82 | |
| 83 | const MachineOperand &Src0 = MI.getOperand(i: VOPD::Component::SRC0); |
| 84 | if (Src0.isReg()) { |
| 85 | if (!TRI->isVectorRegister(MRI, Reg: Src0.getReg())) { |
| 86 | if (!is_contained(Range&: UniqueScalarRegs, Element: Src0.getReg())) |
| 87 | UniqueScalarRegs.push_back(Elt: Src0.getReg()); |
| 88 | } |
| 89 | } else { |
| 90 | if (!TII.isInlineConstant(MI, OpIdx: VOPD::Component::SRC0)) |
| 91 | addLiteral(Src0); |
| 92 | } |
| 93 | |
| 94 | if (InstInfo[CompIdx].hasMandatoryLiteral()) { |
| 95 | auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); |
| 96 | addLiteral(MI.getOperand(i: CompOprIdx)); |
| 97 | } |
| 98 | if (MI.getDesc().hasImplicitUseOfPhysReg(Reg: AMDGPU::VCC)) |
| 99 | UniqueScalarRegs.push_back(Elt: AMDGPU::VCC_LO); |
| 100 | } |
| 101 | |
| 102 | if (UniqueLiterals.size() > 1) |
| 103 | return false; |
| 104 | if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) |
| 105 | return false; |
| 106 | |
| 107 | // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. |
| 108 | bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 && |
| 109 | FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 && |
| 110 | SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32; |
| 111 | |
| 112 | if (InstInfo.hasInvalidOperand(GetRegIdx: getVRegIdx, SkipSrc)) |
| 113 | return false; |
| 114 | |
| 115 | LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI |
| 116 | << "\n\tY: " << SecondMI << "\n" ); |
| 117 | return true; |
| 118 | } |
| 119 | |
| 120 | /// Check if the instr pair, FirstMI and SecondMI, should be scheduled |
| 121 | /// together. Given SecondMI, when FirstMI is unspecified, then check if |
| 122 | /// SecondMI may be part of a fused pair at all. |
| 123 | static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, |
| 124 | const TargetSubtargetInfo &TSI, |
| 125 | const MachineInstr *FirstMI, |
| 126 | const MachineInstr &SecondMI) { |
| 127 | const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); |
| 128 | unsigned Opc2 = SecondMI.getOpcode(); |
| 129 | auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc: Opc2); |
| 130 | |
| 131 | // One instruction case |
| 132 | if (!FirstMI) |
| 133 | return SecondCanBeVOPD.Y; |
| 134 | |
| 135 | unsigned Opc = FirstMI->getOpcode(); |
| 136 | auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); |
| 137 | |
| 138 | if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || |
| 139 | (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) |
| 140 | return false; |
| 141 | |
| 142 | return checkVOPDRegConstraints(TII: STII, FirstMI: *FirstMI, SecondMI); |
| 143 | } |
| 144 | |
| 145 | namespace { |
| 146 | /// Adapts design from MacroFusion |
| 147 | /// Puts valid candidate instructions back-to-back so they can easily |
| 148 | /// be turned into VOPD instructions |
| 149 | /// Greedily pairs instruction candidates. O(n^2) algorithm. |
| 150 | struct VOPDPairingMutation : ScheduleDAGMutation { |
| 151 | MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer |
| 152 | |
| 153 | VOPDPairingMutation( |
| 154 | MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer |
| 155 | : shouldScheduleAdjacent(shouldScheduleAdjacent) {} |
| 156 | |
| 157 | void apply(ScheduleDAGInstrs *DAG) override { |
| 158 | const TargetInstrInfo &TII = *DAG->TII; |
| 159 | const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); |
| 160 | if (!AMDGPU::hasVOPD(STI: ST) || !ST.isWave32()) { |
| 161 | LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n" ); |
| 162 | return; |
| 163 | } |
| 164 | |
| 165 | std::vector<SUnit>::iterator ISUI, JSUI; |
| 166 | for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { |
| 167 | const MachineInstr *IMI = ISUI->getInstr(); |
| 168 | if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) |
| 169 | continue; |
| 170 | if (!hasLessThanNumFused(SU: *ISUI, FuseLimit: 2)) |
| 171 | continue; |
| 172 | |
| 173 | for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { |
| 174 | if (JSUI->isBoundaryNode()) |
| 175 | continue; |
| 176 | const MachineInstr *JMI = JSUI->getInstr(); |
| 177 | if (!hasLessThanNumFused(SU: *JSUI, FuseLimit: 2) || |
| 178 | !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) |
| 179 | continue; |
| 180 | if (fuseInstructionPair(DAG&: *DAG, FirstSU&: *ISUI, SecondSU&: *JSUI)) |
| 181 | break; |
| 182 | } |
| 183 | } |
| 184 | LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n" ); |
| 185 | } |
| 186 | }; |
| 187 | } // namespace |
| 188 | |
| 189 | std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { |
| 190 | return std::make_unique<VOPDPairingMutation>(args&: shouldScheduleVOPDAdjacent); |
| 191 | } |
| 192 | |