1 | //===-- SIPostRABundler.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass creates bundles of memory instructions to protect adjacent loads |
11 | /// and stores from being rescheduled apart from each other post-RA. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "SIPostRABundler.h" |
16 | #include "AMDGPU.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "llvm/ADT/SmallSet.h" |
19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | #define DEBUG_TYPE "si-post-ra-bundler" |
24 | |
25 | namespace { |
26 | |
27 | class SIPostRABundlerLegacy : public MachineFunctionPass { |
28 | public: |
29 | static char ID; |
30 | |
31 | public: |
32 | SIPostRABundlerLegacy() : MachineFunctionPass(ID) { |
33 | initializeSIPostRABundlerLegacyPass(*PassRegistry::getPassRegistry()); |
34 | } |
35 | |
36 | bool runOnMachineFunction(MachineFunction &MF) override; |
37 | |
38 | StringRef getPassName() const override { |
39 | return "SI post-RA bundler" ; |
40 | } |
41 | |
42 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
43 | AU.setPreservesAll(); |
44 | MachineFunctionPass::getAnalysisUsage(AU); |
45 | } |
46 | }; |
47 | |
48 | class SIPostRABundler { |
49 | public: |
50 | bool run(MachineFunction &MF); |
51 | |
52 | private: |
53 | const SIRegisterInfo *TRI; |
54 | |
55 | SmallSet<Register, 16> Defs; |
56 | |
57 | void collectUsedRegUnits(const MachineInstr &MI, |
58 | BitVector &UsedRegUnits) const; |
59 | |
60 | bool isBundleCandidate(const MachineInstr &MI) const; |
61 | bool isDependentLoad(const MachineInstr &MI) const; |
62 | bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; |
63 | }; |
64 | |
65 | constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | |
66 | SIInstrFlags::SMRD | SIInstrFlags::DS | |
67 | SIInstrFlags::FLAT | SIInstrFlags::MIMG | |
68 | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; |
69 | |
70 | } // End anonymous namespace. |
71 | |
72 | INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler" , false, |
73 | false) |
74 | |
75 | char SIPostRABundlerLegacy::ID = 0; |
76 | |
77 | char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID; |
78 | |
79 | FunctionPass *llvm::createSIPostRABundlerPass() { |
80 | return new SIPostRABundlerLegacy(); |
81 | } |
82 | |
83 | bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { |
84 | if (!MI.mayLoad()) |
85 | return false; |
86 | |
87 | for (const MachineOperand &Op : MI.explicit_operands()) { |
88 | if (!Op.isReg()) |
89 | continue; |
90 | Register Reg = Op.getReg(); |
91 | for (Register Def : Defs) |
92 | if (TRI->regsOverlap(RegA: Reg, RegB: Def)) |
93 | return true; |
94 | } |
95 | |
96 | return false; |
97 | } |
98 | |
99 | void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, |
100 | BitVector &UsedRegUnits) const { |
101 | if (MI.isDebugInstr()) |
102 | return; |
103 | |
104 | for (const MachineOperand &Op : MI.operands()) { |
105 | if (!Op.isReg() || !Op.readsReg()) |
106 | continue; |
107 | |
108 | Register Reg = Op.getReg(); |
109 | assert(!Op.getSubReg() && |
110 | "subregister indexes should not be present after RA" ); |
111 | |
112 | for (MCRegUnit Unit : TRI->regunits(Reg)) |
113 | UsedRegUnits.set(Unit); |
114 | } |
115 | } |
116 | |
117 | bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { |
118 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
119 | return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); |
120 | } |
121 | |
122 | bool SIPostRABundler::canBundle(const MachineInstr &MI, |
123 | const MachineInstr &NextMI) const { |
124 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
125 | |
126 | return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && |
127 | NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && |
128 | ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && |
129 | !isDependentLoad(MI: NextMI)); |
130 | } |
131 | |
132 | bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) { |
133 | if (skipFunction(F: MF.getFunction())) |
134 | return false; |
135 | return SIPostRABundler().run(MF); |
136 | } |
137 | |
138 | PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF, |
139 | MachineFunctionAnalysisManager &) { |
140 | SIPostRABundler().run(MF); |
141 | return PreservedAnalyses::all(); |
142 | } |
143 | |
144 | bool SIPostRABundler::run(MachineFunction &MF) { |
145 | |
146 | TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
147 | BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); |
148 | BitVector KillUsedRegUnits(TRI->getNumRegUnits()); |
149 | |
150 | bool Changed = false; |
151 | for (MachineBasicBlock &MBB : MF) { |
152 | bool HasIGLPInstrs = llvm::any_of(Range: MBB.instrs(), P: [](MachineInstr &MI) { |
153 | unsigned Opc = MI.getOpcode(); |
154 | return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; |
155 | }); |
156 | |
157 | // Don't cluster with IGLP instructions. |
158 | if (HasIGLPInstrs) |
159 | continue; |
160 | |
161 | MachineBasicBlock::instr_iterator Next; |
162 | MachineBasicBlock::instr_iterator B = MBB.instr_begin(); |
163 | MachineBasicBlock::instr_iterator E = MBB.instr_end(); |
164 | |
165 | for (auto I = B; I != E; I = Next) { |
166 | Next = std::next(x: I); |
167 | if (!isBundleCandidate(MI: *I)) |
168 | continue; |
169 | |
170 | assert(Defs.empty()); |
171 | |
172 | if (I->getNumExplicitDefs() != 0) |
173 | Defs.insert(V: I->defs().begin()->getReg()); |
174 | |
175 | MachineBasicBlock::instr_iterator BundleStart = I; |
176 | MachineBasicBlock::instr_iterator BundleEnd = I; |
177 | unsigned ClauseLength = 1; |
178 | for (I = Next; I != E; I = Next) { |
179 | Next = std::next(x: I); |
180 | |
181 | assert(BundleEnd != I); |
182 | if (canBundle(MI: *BundleEnd, NextMI: *I)) { |
183 | BundleEnd = I; |
184 | if (I->getNumExplicitDefs() != 0) |
185 | Defs.insert(V: I->defs().begin()->getReg()); |
186 | ++ClauseLength; |
187 | } else if (!I->isMetaInstruction()) { |
188 | // Allow meta instructions in between bundle candidates, but do not |
189 | // start or end a bundle on one. |
190 | // |
191 | // TODO: It may be better to move meta instructions like dbg_value |
192 | // after the bundle. We're relying on the memory legalizer to unbundle |
193 | // these. |
194 | break; |
195 | } |
196 | } |
197 | |
198 | Next = std::next(x: BundleEnd); |
199 | if (ClauseLength > 1) { |
200 | Changed = true; |
201 | |
202 | // Before register allocation, kills are inserted after potential soft |
203 | // clauses to hint register allocation. Look for kills that look like |
204 | // this, and erase them. |
205 | if (Next != E && Next->isKill()) { |
206 | |
207 | // TODO: Should maybe back-propagate kill flags to the bundle. |
208 | for (const MachineInstr &BundleMI : make_range(x: BundleStart, y: Next)) |
209 | collectUsedRegUnits(MI: BundleMI, UsedRegUnits&: BundleUsedRegUnits); |
210 | |
211 | BundleUsedRegUnits.flip(); |
212 | |
213 | while (Next != E && Next->isKill()) { |
214 | MachineInstr &Kill = *Next; |
215 | collectUsedRegUnits(MI: Kill, UsedRegUnits&: KillUsedRegUnits); |
216 | |
217 | KillUsedRegUnits &= BundleUsedRegUnits; |
218 | |
219 | // Erase the kill if it's a subset of the used registers. |
220 | // |
221 | // TODO: Should we just remove all kills? Is there any real reason to |
222 | // keep them after RA? |
223 | if (KillUsedRegUnits.none()) { |
224 | ++Next; |
225 | Kill.eraseFromParent(); |
226 | } else |
227 | break; |
228 | |
229 | KillUsedRegUnits.reset(); |
230 | } |
231 | |
232 | BundleUsedRegUnits.reset(); |
233 | } |
234 | |
235 | finalizeBundle(MBB, FirstMI: BundleStart, LastMI: Next); |
236 | } |
237 | |
238 | Defs.clear(); |
239 | } |
240 | } |
241 | |
242 | return Changed; |
243 | } |
244 | |