1 | //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Lowering the WWM_COPY instructions for various register classes. |
11 | /// AMDGPU target generates WWM_COPY instruction to differentiate WWM |
12 | /// copy from COPY. This pass generates the necessary exec mask manipulation |
13 | /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to |
14 | /// COPY. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "SILowerWWMCopies.h" |
19 | #include "AMDGPU.h" |
20 | #include "GCNSubtarget.h" |
21 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
22 | #include "SIMachineFunctionInfo.h" |
23 | #include "llvm/CodeGen/LiveIntervals.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/VirtRegMap.h" |
26 | #include "llvm/InitializePasses.h" |
27 | |
28 | using namespace llvm; |
29 | |
30 | #define DEBUG_TYPE "si-lower-wwm-copies" |
31 | |
32 | namespace { |
33 | |
34 | class SILowerWWMCopies { |
35 | public: |
36 | SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM) |
37 | : LIS(LIS), Indexes(SI), VRM(VRM) {} |
38 | bool run(MachineFunction &MF); |
39 | |
40 | private: |
41 | bool isSCCLiveAtMI(const MachineInstr &MI); |
42 | void addToWWMSpills(MachineFunction &MF, Register Reg); |
43 | |
44 | LiveIntervals *LIS; |
45 | SlotIndexes *Indexes; |
46 | VirtRegMap *VRM; |
47 | const SIRegisterInfo *TRI; |
48 | const MachineRegisterInfo *MRI; |
49 | SIMachineFunctionInfo *MFI; |
50 | }; |
51 | |
52 | class SILowerWWMCopiesLegacy : public MachineFunctionPass { |
53 | public: |
54 | static char ID; |
55 | |
56 | SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) { |
57 | initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry()); |
58 | } |
59 | |
60 | bool runOnMachineFunction(MachineFunction &MF) override; |
61 | |
62 | StringRef getPassName() const override { return "SI Lower WWM Copies" ; } |
63 | |
64 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
65 | AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); |
66 | AU.addUsedIfAvailable<SlotIndexesWrapperPass>(); |
67 | AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>(); |
68 | AU.setPreservesAll(); |
69 | MachineFunctionPass::getAnalysisUsage(AU); |
70 | } |
71 | }; |
72 | |
73 | } // End anonymous namespace. |
74 | |
75 | INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies" , |
76 | false, false) |
77 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
78 | INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) |
79 | INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies" , |
80 | false, false) |
81 | |
82 | char SILowerWWMCopiesLegacy::ID = 0; |
83 | |
84 | char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID; |
85 | |
86 | bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { |
87 | // We can't determine the liveness info if LIS isn't available. Early return |
88 | // in that case and always assume SCC is live. |
89 | if (!LIS) |
90 | return true; |
91 | |
92 | LiveRange &LR = |
93 | LIS->getRegUnit(Unit: *MCRegUnitIterator(MCRegister::from(Val: AMDGPU::SCC), TRI)); |
94 | SlotIndex Idx = LIS->getInstructionIndex(Instr: MI); |
95 | return LR.liveAt(index: Idx); |
96 | } |
97 | |
98 | // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills |
99 | // for preserving its entire lanes at function prolog/epilog. |
100 | void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { |
101 | if (Reg.isPhysical()) |
102 | return; |
103 | |
104 | // FIXME: VRM may be null here. |
105 | MCRegister PhysReg = VRM->getPhys(virtReg: Reg); |
106 | assert(PhysReg && "should have allocated a physical register" ); |
107 | |
108 | MFI->allocateWWMSpill(MF, VGPR: PhysReg); |
109 | } |
110 | |
111 | bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) { |
112 | auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); |
113 | auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; |
114 | |
115 | auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); |
116 | auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; |
117 | |
118 | auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>(); |
119 | auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr; |
120 | |
121 | SILowerWWMCopies Impl(LIS, Indexes, VRM); |
122 | return Impl.run(MF); |
123 | } |
124 | |
125 | PreservedAnalyses |
126 | SILowerWWMCopiesPass::run(MachineFunction &MF, |
127 | MachineFunctionAnalysisManager &MFAM) { |
128 | auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF); |
129 | auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF); |
130 | auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(IR&: MF); |
131 | |
132 | SILowerWWMCopies Impl(LIS, Indexes, VRM); |
133 | Impl.run(MF); |
134 | return PreservedAnalyses::all(); |
135 | } |
136 | |
137 | bool SILowerWWMCopies::run(MachineFunction &MF) { |
138 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
139 | const SIInstrInfo *TII = ST.getInstrInfo(); |
140 | |
141 | MFI = MF.getInfo<SIMachineFunctionInfo>(); |
142 | TRI = ST.getRegisterInfo(); |
143 | MRI = &MF.getRegInfo(); |
144 | |
145 | if (!MFI->hasVRegFlags()) |
146 | return false; |
147 | |
148 | bool Changed = false; |
149 | for (MachineBasicBlock &MBB : MF) { |
150 | for (MachineInstr &MI : MBB) { |
151 | if (MI.getOpcode() != AMDGPU::WWM_COPY) |
152 | continue; |
153 | |
154 | // TODO: Club adjacent WWM ops between same exec save/restore |
155 | assert(TII->isVGPRCopy(MI)); |
156 | |
157 | // For WWM vector copies, manipulate the exec mask around the copy |
158 | // instruction. |
159 | const DebugLoc &DL = MI.getDebugLoc(); |
160 | MachineBasicBlock::iterator InsertPt = MI.getIterator(); |
161 | Register RegForExecCopy = MFI->getSGPRForEXECCopy(); |
162 | TII->insertScratchExecCopy(MF, MBB, MBBI: InsertPt, DL, Reg: RegForExecCopy, |
163 | IsSCCLive: isSCCLiveAtMI(MI), Indexes); |
164 | TII->restoreExec(MF, MBB, MBBI: ++InsertPt, DL, Reg: RegForExecCopy, Indexes); |
165 | addToWWMSpills(MF, Reg: MI.getOperand(i: 0).getReg()); |
166 | LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); |
167 | |
168 | // Lower WWM_COPY back to COPY |
169 | MI.setDesc(TII->get(Opcode: AMDGPU::COPY)); |
170 | Changed |= true; |
171 | } |
172 | } |
173 | |
174 | return Changed; |
175 | } |
176 | |