1 | //===-- SIPostRABundler.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass creates bundles of memory instructions to protect adjacent loads |
11 | /// and stores from being rescheduled apart from each other post-RA. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AMDGPU.h" |
16 | #include "GCNSubtarget.h" |
17 | #include "llvm/ADT/SmallSet.h" |
18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | #define DEBUG_TYPE "si-post-ra-bundler" |
23 | |
24 | namespace { |
25 | |
26 | class SIPostRABundler : public MachineFunctionPass { |
27 | public: |
28 | static char ID; |
29 | |
30 | public: |
31 | SIPostRABundler() : MachineFunctionPass(ID) { |
32 | initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry()); |
33 | } |
34 | |
35 | bool runOnMachineFunction(MachineFunction &MF) override; |
36 | |
37 | StringRef getPassName() const override { |
38 | return "SI post-RA bundler" ; |
39 | } |
40 | |
41 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
42 | AU.setPreservesAll(); |
43 | MachineFunctionPass::getAnalysisUsage(AU); |
44 | } |
45 | |
46 | private: |
47 | const SIRegisterInfo *TRI; |
48 | |
49 | SmallSet<Register, 16> Defs; |
50 | |
51 | void collectUsedRegUnits(const MachineInstr &MI, |
52 | BitVector &UsedRegUnits) const; |
53 | |
54 | bool isBundleCandidate(const MachineInstr &MI) const; |
55 | bool isDependentLoad(const MachineInstr &MI) const; |
56 | bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; |
57 | }; |
58 | |
59 | constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | |
60 | SIInstrFlags::SMRD | SIInstrFlags::DS | |
61 | SIInstrFlags::FLAT | SIInstrFlags::MIMG; |
62 | |
63 | } // End anonymous namespace. |
64 | |
65 | INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler" , false, false) |
66 | |
67 | char SIPostRABundler::ID = 0; |
68 | |
69 | char &llvm::SIPostRABundlerID = SIPostRABundler::ID; |
70 | |
71 | FunctionPass *llvm::createSIPostRABundlerPass() { |
72 | return new SIPostRABundler(); |
73 | } |
74 | |
75 | bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { |
76 | if (!MI.mayLoad()) |
77 | return false; |
78 | |
79 | for (const MachineOperand &Op : MI.explicit_operands()) { |
80 | if (!Op.isReg()) |
81 | continue; |
82 | Register Reg = Op.getReg(); |
83 | for (Register Def : Defs) |
84 | if (TRI->regsOverlap(RegA: Reg, RegB: Def)) |
85 | return true; |
86 | } |
87 | |
88 | return false; |
89 | } |
90 | |
91 | void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, |
92 | BitVector &UsedRegUnits) const { |
93 | if (MI.isDebugInstr()) |
94 | return; |
95 | |
96 | for (const MachineOperand &Op : MI.operands()) { |
97 | if (!Op.isReg() || !Op.readsReg()) |
98 | continue; |
99 | |
100 | Register Reg = Op.getReg(); |
101 | assert(!Op.getSubReg() && |
102 | "subregister indexes should not be present after RA" ); |
103 | |
104 | for (MCRegUnit Unit : TRI->regunits(Reg)) |
105 | UsedRegUnits.set(Unit); |
106 | } |
107 | } |
108 | |
109 | bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { |
110 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
111 | return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); |
112 | } |
113 | |
114 | bool SIPostRABundler::canBundle(const MachineInstr &MI, |
115 | const MachineInstr &NextMI) const { |
116 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
117 | |
118 | return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && |
119 | NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && |
120 | ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && |
121 | !isDependentLoad(MI: NextMI)); |
122 | } |
123 | |
124 | bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) { |
125 | if (skipFunction(F: MF.getFunction())) |
126 | return false; |
127 | |
128 | TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
129 | BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); |
130 | BitVector KillUsedRegUnits(TRI->getNumRegUnits()); |
131 | |
132 | bool Changed = false; |
133 | for (MachineBasicBlock &MBB : MF) { |
134 | bool HasIGLPInstrs = llvm::any_of(Range: MBB.instrs(), P: [](MachineInstr &MI) { |
135 | unsigned Opc = MI.getOpcode(); |
136 | return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; |
137 | }); |
138 | |
139 | // Don't cluster with IGLP instructions. |
140 | if (HasIGLPInstrs) |
141 | continue; |
142 | |
143 | MachineBasicBlock::instr_iterator Next; |
144 | MachineBasicBlock::instr_iterator B = MBB.instr_begin(); |
145 | MachineBasicBlock::instr_iterator E = MBB.instr_end(); |
146 | |
147 | for (auto I = B; I != E; I = Next) { |
148 | Next = std::next(x: I); |
149 | if (!isBundleCandidate(MI: *I)) |
150 | continue; |
151 | |
152 | assert(Defs.empty()); |
153 | |
154 | if (I->getNumExplicitDefs() != 0) |
155 | Defs.insert(V: I->defs().begin()->getReg()); |
156 | |
157 | MachineBasicBlock::instr_iterator BundleStart = I; |
158 | MachineBasicBlock::instr_iterator BundleEnd = I; |
159 | unsigned ClauseLength = 1; |
160 | for (I = Next; I != E; I = Next) { |
161 | Next = std::next(x: I); |
162 | |
163 | assert(BundleEnd != I); |
164 | if (canBundle(MI: *BundleEnd, NextMI: *I)) { |
165 | BundleEnd = I; |
166 | if (I->getNumExplicitDefs() != 0) |
167 | Defs.insert(V: I->defs().begin()->getReg()); |
168 | ++ClauseLength; |
169 | } else if (!I->isMetaInstruction()) { |
170 | // Allow meta instructions in between bundle candidates, but do not |
171 | // start or end a bundle on one. |
172 | // |
173 | // TODO: It may be better to move meta instructions like dbg_value |
174 | // after the bundle. We're relying on the memory legalizer to unbundle |
175 | // these. |
176 | break; |
177 | } |
178 | } |
179 | |
180 | Next = std::next(x: BundleEnd); |
181 | if (ClauseLength > 1) { |
182 | Changed = true; |
183 | |
184 | // Before register allocation, kills are inserted after potential soft |
185 | // clauses to hint register allocation. Look for kills that look like |
186 | // this, and erase them. |
187 | if (Next != E && Next->isKill()) { |
188 | |
189 | // TODO: Should maybe back-propagate kill flags to the bundle. |
190 | for (const MachineInstr &BundleMI : make_range(x: BundleStart, y: Next)) |
191 | collectUsedRegUnits(MI: BundleMI, UsedRegUnits&: BundleUsedRegUnits); |
192 | |
193 | BundleUsedRegUnits.flip(); |
194 | |
195 | while (Next != E && Next->isKill()) { |
196 | MachineInstr &Kill = *Next; |
197 | collectUsedRegUnits(MI: Kill, UsedRegUnits&: KillUsedRegUnits); |
198 | |
199 | KillUsedRegUnits &= BundleUsedRegUnits; |
200 | |
201 | // Erase the kill if it's a subset of the used registers. |
202 | // |
203 | // TODO: Should we just remove all kills? Is there any real reason to |
204 | // keep them after RA? |
205 | if (KillUsedRegUnits.none()) { |
206 | ++Next; |
207 | Kill.eraseFromParent(); |
208 | } else |
209 | break; |
210 | |
211 | KillUsedRegUnits.reset(); |
212 | } |
213 | |
214 | BundleUsedRegUnits.reset(); |
215 | } |
216 | |
217 | finalizeBundle(MBB, FirstMI: BundleStart, LastMI: Next); |
218 | } |
219 | |
220 | Defs.clear(); |
221 | } |
222 | } |
223 | |
224 | return Changed; |
225 | } |
226 | |