1 | //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass mainly lowers early terminate pseudo instructions. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AMDGPU.h" |
15 | #include "GCNSubtarget.h" |
16 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
17 | #include "SIMachineFunctionInfo.h" |
18 | #include "llvm/CodeGen/MachineDominators.h" |
19 | #include "llvm/InitializePasses.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | #define DEBUG_TYPE "si-late-branch-lowering" |
24 | |
25 | namespace { |
26 | |
27 | class SILateBranchLowering : public MachineFunctionPass { |
28 | private: |
29 | const SIRegisterInfo *TRI = nullptr; |
30 | const SIInstrInfo *TII = nullptr; |
31 | MachineDominatorTree *MDT = nullptr; |
32 | |
33 | void expandChainCall(MachineInstr &MI); |
34 | void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); |
35 | |
36 | public: |
37 | static char ID; |
38 | |
39 | unsigned MovOpc; |
40 | Register ExecReg; |
41 | |
42 | SILateBranchLowering() : MachineFunctionPass(ID) {} |
43 | |
44 | bool runOnMachineFunction(MachineFunction &MF) override; |
45 | |
46 | StringRef getPassName() const override { |
47 | return "SI Final Branch Preparation" ; |
48 | } |
49 | |
50 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
51 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
52 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
53 | MachineFunctionPass::getAnalysisUsage(AU); |
54 | } |
55 | }; |
56 | |
57 | } // end anonymous namespace |
58 | |
59 | char SILateBranchLowering::ID = 0; |
60 | |
61 | INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, |
62 | "SI insert s_cbranch_execz instructions" , false, false) |
63 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
64 | INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, |
65 | "SI insert s_cbranch_execz instructions" , false, false) |
66 | |
67 | char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; |
68 | |
69 | static void generateEndPgm(MachineBasicBlock &MBB, |
70 | MachineBasicBlock::iterator I, DebugLoc DL, |
71 | const SIInstrInfo *TII, MachineFunction &MF) { |
72 | const Function &F = MF.getFunction(); |
73 | bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; |
74 | |
75 | // Check if hardware has been configured to expect color or depth exports. |
76 | bool HasColorExports = AMDGPU::getHasColorExport(F); |
77 | bool HasDepthExports = AMDGPU::getHasDepthExport(F); |
78 | bool HasExports = HasColorExports || HasDepthExports; |
79 | |
80 | // Prior to GFX10, hardware always expects at least one export for PS. |
81 | bool MustExport = !AMDGPU::isGFX10Plus(STI: TII->getSubtarget()); |
82 | |
83 | if (IsPS && (HasExports || MustExport)) { |
84 | // Generate "null export" if hardware is expecting PS to export. |
85 | const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); |
86 | int Target = |
87 | ST.hasNullExportTarget() |
88 | ? AMDGPU::Exp::ET_NULL |
89 | : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ); |
90 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::EXP_DONE)) |
91 | .addImm(Val: Target) |
92 | .addReg(RegNo: AMDGPU::VGPR0, flags: RegState::Undef) |
93 | .addReg(RegNo: AMDGPU::VGPR0, flags: RegState::Undef) |
94 | .addReg(RegNo: AMDGPU::VGPR0, flags: RegState::Undef) |
95 | .addReg(RegNo: AMDGPU::VGPR0, flags: RegState::Undef) |
96 | .addImm(Val: 1) // vm |
97 | .addImm(Val: 0) // compr |
98 | .addImm(Val: 0); // en |
99 | } |
100 | |
101 | // s_endpgm |
102 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::S_ENDPGM)).addImm(Val: 0); |
103 | } |
104 | |
105 | static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, |
106 | MachineDominatorTree *MDT) { |
107 | MachineBasicBlock *SplitBB = MBB.splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true); |
108 | |
109 | // Update dominator tree |
110 | using DomTreeT = DomTreeBase<MachineBasicBlock>; |
111 | SmallVector<DomTreeT::UpdateType, 16> DTUpdates; |
112 | for (MachineBasicBlock *Succ : SplitBB->successors()) { |
113 | DTUpdates.push_back(Elt: {DomTreeT::Insert, SplitBB, Succ}); |
114 | DTUpdates.push_back(Elt: {DomTreeT::Delete, &MBB, Succ}); |
115 | } |
116 | DTUpdates.push_back(Elt: {DomTreeT::Insert, &MBB, SplitBB}); |
117 | MDT->getBase().applyUpdates(Updates: DTUpdates); |
118 | } |
119 | |
120 | void SILateBranchLowering::expandChainCall(MachineInstr &MI) { |
121 | // This is a tail call that needs to be expanded into at least |
122 | // 2 instructions, one for setting EXEC and one for the actual tail call. |
123 | constexpr unsigned ExecIdx = 3; |
124 | |
125 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovOpc), DestReg: ExecReg) |
126 | ->addOperand(Op: MI.getOperand(i: ExecIdx)); |
127 | MI.removeOperand(OpNo: ExecIdx); |
128 | |
129 | MI.setDesc(TII->get(Opcode: AMDGPU::SI_TCRETURN)); |
130 | } |
131 | |
132 | void SILateBranchLowering::earlyTerm(MachineInstr &MI, |
133 | MachineBasicBlock *EarlyExitBlock) { |
134 | MachineBasicBlock &MBB = *MI.getParent(); |
135 | const DebugLoc DL = MI.getDebugLoc(); |
136 | |
137 | auto BranchMI = BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::S_CBRANCH_SCC0)) |
138 | .addMBB(MBB: EarlyExitBlock); |
139 | auto Next = std::next(x: MI.getIterator()); |
140 | |
141 | if (Next != MBB.end() && !Next->isTerminator()) |
142 | splitBlock(MBB, MI&: *BranchMI, MDT); |
143 | |
144 | MBB.addSuccessor(Succ: EarlyExitBlock); |
145 | MDT->getBase().insertEdge(From: &MBB, To: EarlyExitBlock); |
146 | } |
147 | |
148 | bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { |
149 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
150 | TII = ST.getInstrInfo(); |
151 | TRI = &TII->getRegisterInfo(); |
152 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
153 | |
154 | MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
155 | ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
156 | |
157 | SmallVector<MachineInstr *, 4> EarlyTermInstrs; |
158 | SmallVector<MachineInstr *, 1> EpilogInstrs; |
159 | bool MadeChange = false; |
160 | |
161 | for (MachineBasicBlock &MBB : MF) { |
162 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MBB)) { |
163 | switch (MI.getOpcode()) { |
164 | case AMDGPU::S_BRANCH: |
165 | // Optimize out branches to the next block. |
166 | // This only occurs in -O0 when BranchFolding is not executed. |
167 | if (MBB.isLayoutSuccessor(MBB: MI.getOperand(i: 0).getMBB())) { |
168 | assert(&MI == &MBB.back()); |
169 | MI.eraseFromParent(); |
170 | MadeChange = true; |
171 | } |
172 | break; |
173 | |
174 | case AMDGPU::SI_CS_CHAIN_TC_W32: |
175 | case AMDGPU::SI_CS_CHAIN_TC_W64: |
176 | expandChainCall(MI); |
177 | MadeChange = true; |
178 | break; |
179 | |
180 | case AMDGPU::SI_EARLY_TERMINATE_SCC0: |
181 | EarlyTermInstrs.push_back(Elt: &MI); |
182 | break; |
183 | |
184 | case AMDGPU::SI_RETURN_TO_EPILOG: |
185 | EpilogInstrs.push_back(Elt: &MI); |
186 | break; |
187 | |
188 | default: |
189 | break; |
190 | } |
191 | } |
192 | } |
193 | |
194 | // Lower any early exit branches first |
195 | if (!EarlyTermInstrs.empty()) { |
196 | MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock(); |
197 | DebugLoc DL; |
198 | |
199 | MF.insert(MBBI: MF.end(), MBB: EarlyExitBlock); |
200 | BuildMI(BB&: *EarlyExitBlock, I: EarlyExitBlock->end(), MIMD: DL, MCID: TII->get(Opcode: MovOpc), |
201 | DestReg: ExecReg) |
202 | .addImm(Val: 0); |
203 | generateEndPgm(MBB&: *EarlyExitBlock, I: EarlyExitBlock->end(), DL, TII, MF); |
204 | |
205 | for (MachineInstr *Instr : EarlyTermInstrs) { |
206 | // Early termination in GS does nothing |
207 | if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS) |
208 | earlyTerm(MI&: *Instr, EarlyExitBlock); |
209 | Instr->eraseFromParent(); |
210 | } |
211 | |
212 | EarlyTermInstrs.clear(); |
213 | MadeChange = true; |
214 | } |
215 | |
216 | // Now check return to epilog instructions occur at function end |
217 | if (!EpilogInstrs.empty()) { |
218 | MachineBasicBlock *EmptyMBBAtEnd = nullptr; |
219 | assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); |
220 | |
221 | // If there are multiple returns to epilog then all will |
222 | // become jumps to new empty end block. |
223 | if (EpilogInstrs.size() > 1) { |
224 | EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); |
225 | MF.insert(MBBI: MF.end(), MBB: EmptyMBBAtEnd); |
226 | } |
227 | |
228 | for (auto *MI : EpilogInstrs) { |
229 | auto MBB = MI->getParent(); |
230 | if (MBB == &MF.back() && MI == &MBB->back()) |
231 | continue; |
232 | |
233 | // SI_RETURN_TO_EPILOG is not the last instruction. |
234 | // Jump to empty block at function end. |
235 | if (!EmptyMBBAtEnd) { |
236 | EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); |
237 | MF.insert(MBBI: MF.end(), MBB: EmptyMBBAtEnd); |
238 | } |
239 | |
240 | MBB->addSuccessor(Succ: EmptyMBBAtEnd); |
241 | MDT->getBase().insertEdge(From: MBB, To: EmptyMBBAtEnd); |
242 | BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: AMDGPU::S_BRANCH)) |
243 | .addMBB(MBB: EmptyMBBAtEnd); |
244 | MI->eraseFromParent(); |
245 | MadeChange = true; |
246 | } |
247 | |
248 | EpilogInstrs.clear(); |
249 | } |
250 | |
251 | return MadeChange; |
252 | } |
253 | |