1//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Lowering the WWM_COPY instructions for various register classes.
11/// AMDGPU target generates WWM_COPY instruction to differentiate WWM
12/// copy from COPY. This pass generates the necessary exec mask manipulation
13/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
14/// COPY.
15//
16//===----------------------------------------------------------------------===//
17
18#include "SILowerWWMCopies.h"
19#include "AMDGPU.h"
20#include "GCNSubtarget.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIMachineFunctionInfo.h"
23#include "llvm/CodeGen/LiveIntervals.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/VirtRegMap.h"
26#include "llvm/InitializePasses.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "si-lower-wwm-copies"
31
32namespace {
33
34class SILowerWWMCopies {
35public:
36 SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM)
37 : LIS(LIS), Indexes(SI), VRM(VRM) {}
38 bool run(MachineFunction &MF);
39
40private:
41 bool isSCCLiveAtMI(const MachineInstr &MI);
42 void addToWWMSpills(MachineFunction &MF, Register Reg);
43
44 LiveIntervals *LIS;
45 SlotIndexes *Indexes;
46 VirtRegMap *VRM;
47 const SIRegisterInfo *TRI;
48 const MachineRegisterInfo *MRI;
49 SIMachineFunctionInfo *MFI;
50};
51
52class SILowerWWMCopiesLegacy : public MachineFunctionPass {
53public:
54 static char ID;
55
56 SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) {
57 initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry());
58 }
59
60 bool runOnMachineFunction(MachineFunction &MF) override;
61
62 StringRef getPassName() const override { return "SI Lower WWM Copies"; }
63
64 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
66 AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
67 AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>();
68 AU.setPreservesAll();
69 MachineFunctionPass::getAnalysisUsage(AU);
70 }
71};
72
73} // End anonymous namespace.
74
75INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
76 false, false)
77INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
78INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
79INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
80 false, false)
81
82char SILowerWWMCopiesLegacy::ID = 0;
83
84char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID;
85
86bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
87 // We can't determine the liveness info if LIS isn't available. Early return
88 // in that case and always assume SCC is live.
89 if (!LIS)
90 return true;
91
92 LiveRange &LR =
93 LIS->getRegUnit(Unit: *MCRegUnitIterator(MCRegister::from(Val: AMDGPU::SCC), TRI));
94 SlotIndex Idx = LIS->getInstructionIndex(Instr: MI);
95 return LR.liveAt(index: Idx);
96}
97
98// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
99// for preserving its entire lanes at function prolog/epilog.
100void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
101 if (Reg.isPhysical())
102 return;
103
104 // FIXME: VRM may be null here.
105 MCRegister PhysReg = VRM->getPhys(virtReg: Reg);
106 assert(PhysReg && "should have allocated a physical register");
107
108 MFI->allocateWWMSpill(MF, VGPR: PhysReg);
109}
110
111bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
112 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
113 auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
114
115 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
116 auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
117
118 auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
119 auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
120
121 SILowerWWMCopies Impl(LIS, Indexes, VRM);
122 return Impl.run(MF);
123}
124
125PreservedAnalyses
126SILowerWWMCopiesPass::run(MachineFunction &MF,
127 MachineFunctionAnalysisManager &MFAM) {
128 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
129 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
130 auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(IR&: MF);
131
132 SILowerWWMCopies Impl(LIS, Indexes, VRM);
133 Impl.run(MF);
134 return PreservedAnalyses::all();
135}
136
137bool SILowerWWMCopies::run(MachineFunction &MF) {
138 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
139 const SIInstrInfo *TII = ST.getInstrInfo();
140
141 MFI = MF.getInfo<SIMachineFunctionInfo>();
142 TRI = ST.getRegisterInfo();
143 MRI = &MF.getRegInfo();
144
145 if (!MFI->hasVRegFlags())
146 return false;
147
148 bool Changed = false;
149 for (MachineBasicBlock &MBB : MF) {
150 for (MachineInstr &MI : MBB) {
151 if (MI.getOpcode() != AMDGPU::WWM_COPY)
152 continue;
153
154 // TODO: Club adjacent WWM ops between same exec save/restore
155 assert(TII->isVGPRCopy(MI));
156
157 // For WWM vector copies, manipulate the exec mask around the copy
158 // instruction.
159 const DebugLoc &DL = MI.getDebugLoc();
160 MachineBasicBlock::iterator InsertPt = MI.getIterator();
161 Register RegForExecCopy = MFI->getSGPRForEXECCopy();
162 TII->insertScratchExecCopy(MF, MBB, MBBI: InsertPt, DL, Reg: RegForExecCopy,
163 IsSCCLive: isSCCLiveAtMI(MI), Indexes);
164 TII->restoreExec(MF, MBB, MBBI: ++InsertPt, DL, Reg: RegForExecCopy, Indexes);
165 addToWWMSpills(MF, Reg: MI.getOperand(i: 0).getReg());
166 LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
167
168 // Lower WWM_COPY back to COPY
169 MI.setDesc(TII->get(Opcode: AMDGPU::COPY));
170 Changed |= true;
171 }
172 }
173
174 return Changed;
175}
176