1//===-- SIPostRABundler.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass creates bundles of memory instructions to protect adjacent loads
11/// and stores from being rescheduled apart from each other post-RA.
12///
13//===----------------------------------------------------------------------===//
14
15#include "SIPostRABundler.h"
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "si-post-ra-bundler"
24
25namespace {
26
27class SIPostRABundlerLegacy : public MachineFunctionPass {
28public:
29 static char ID;
30
31public:
32 SIPostRABundlerLegacy() : MachineFunctionPass(ID) {
33 initializeSIPostRABundlerLegacyPass(*PassRegistry::getPassRegistry());
34 }
35
36 bool runOnMachineFunction(MachineFunction &MF) override;
37
38 StringRef getPassName() const override {
39 return "SI post-RA bundler";
40 }
41
42 void getAnalysisUsage(AnalysisUsage &AU) const override {
43 AU.setPreservesAll();
44 MachineFunctionPass::getAnalysisUsage(AU);
45 }
46};
47
48class SIPostRABundler {
49public:
50 bool run(MachineFunction &MF);
51
52private:
53 const SIRegisterInfo *TRI;
54
55 SmallSet<Register, 16> Defs;
56
57 void collectUsedRegUnits(const MachineInstr &MI,
58 BitVector &UsedRegUnits) const;
59
60 bool isBundleCandidate(const MachineInstr &MI) const;
61 bool isDependentLoad(const MachineInstr &MI) const;
62 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
63};
64
65constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
66 SIInstrFlags::SMRD | SIInstrFlags::DS |
67 SIInstrFlags::FLAT | SIInstrFlags::MIMG |
68 SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
69
70} // End anonymous namespace.
71
72INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler", false,
73 false)
74
75char SIPostRABundlerLegacy::ID = 0;
76
77char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID;
78
79FunctionPass *llvm::createSIPostRABundlerPass() {
80 return new SIPostRABundlerLegacy();
81}
82
83bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
84 if (!MI.mayLoad())
85 return false;
86
87 for (const MachineOperand &Op : MI.explicit_operands()) {
88 if (!Op.isReg())
89 continue;
90 Register Reg = Op.getReg();
91 for (Register Def : Defs)
92 if (TRI->regsOverlap(RegA: Reg, RegB: Def))
93 return true;
94 }
95
96 return false;
97}
98
99void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
100 BitVector &UsedRegUnits) const {
101 if (MI.isDebugInstr())
102 return;
103
104 for (const MachineOperand &Op : MI.operands()) {
105 if (!Op.isReg() || !Op.readsReg())
106 continue;
107
108 Register Reg = Op.getReg();
109 assert(!Op.getSubReg() &&
110 "subregister indexes should not be present after RA");
111
112 for (MCRegUnit Unit : TRI->regunits(Reg))
113 UsedRegUnits.set(Unit);
114 }
115}
116
117bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
118 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
119 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
120}
121
122bool SIPostRABundler::canBundle(const MachineInstr &MI,
123 const MachineInstr &NextMI) const {
124 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
125
126 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
127 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
128 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
129 !isDependentLoad(MI: NextMI));
130}
131
132bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) {
133 if (skipFunction(F: MF.getFunction()))
134 return false;
135 return SIPostRABundler().run(MF);
136}
137
138PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF,
139 MachineFunctionAnalysisManager &) {
140 SIPostRABundler().run(MF);
141 return PreservedAnalyses::all();
142}
143
144bool SIPostRABundler::run(MachineFunction &MF) {
145
146 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
147 BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
148 BitVector KillUsedRegUnits(TRI->getNumRegUnits());
149
150 bool Changed = false;
151 for (MachineBasicBlock &MBB : MF) {
152 bool HasIGLPInstrs = llvm::any_of(Range: MBB.instrs(), P: [](MachineInstr &MI) {
153 unsigned Opc = MI.getOpcode();
154 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
155 });
156
157 // Don't cluster with IGLP instructions.
158 if (HasIGLPInstrs)
159 continue;
160
161 MachineBasicBlock::instr_iterator Next;
162 MachineBasicBlock::instr_iterator B = MBB.instr_begin();
163 MachineBasicBlock::instr_iterator E = MBB.instr_end();
164
165 for (auto I = B; I != E; I = Next) {
166 Next = std::next(x: I);
167 if (!isBundleCandidate(MI: *I))
168 continue;
169
170 assert(Defs.empty());
171
172 if (I->getNumExplicitDefs() != 0)
173 Defs.insert(V: I->defs().begin()->getReg());
174
175 MachineBasicBlock::instr_iterator BundleStart = I;
176 MachineBasicBlock::instr_iterator BundleEnd = I;
177 unsigned ClauseLength = 1;
178 for (I = Next; I != E; I = Next) {
179 Next = std::next(x: I);
180
181 assert(BundleEnd != I);
182 if (canBundle(MI: *BundleEnd, NextMI: *I)) {
183 BundleEnd = I;
184 if (I->getNumExplicitDefs() != 0)
185 Defs.insert(V: I->defs().begin()->getReg());
186 ++ClauseLength;
187 } else if (!I->isMetaInstruction()) {
188 // Allow meta instructions in between bundle candidates, but do not
189 // start or end a bundle on one.
190 //
191 // TODO: It may be better to move meta instructions like dbg_value
192 // after the bundle. We're relying on the memory legalizer to unbundle
193 // these.
194 break;
195 }
196 }
197
198 Next = std::next(x: BundleEnd);
199 if (ClauseLength > 1) {
200 Changed = true;
201
202 // Before register allocation, kills are inserted after potential soft
203 // clauses to hint register allocation. Look for kills that look like
204 // this, and erase them.
205 if (Next != E && Next->isKill()) {
206
207 // TODO: Should maybe back-propagate kill flags to the bundle.
208 for (const MachineInstr &BundleMI : make_range(x: BundleStart, y: Next))
209 collectUsedRegUnits(MI: BundleMI, UsedRegUnits&: BundleUsedRegUnits);
210
211 BundleUsedRegUnits.flip();
212
213 while (Next != E && Next->isKill()) {
214 MachineInstr &Kill = *Next;
215 collectUsedRegUnits(MI: Kill, UsedRegUnits&: KillUsedRegUnits);
216
217 KillUsedRegUnits &= BundleUsedRegUnits;
218
219 // Erase the kill if it's a subset of the used registers.
220 //
221 // TODO: Should we just remove all kills? Is there any real reason to
222 // keep them after RA?
223 if (KillUsedRegUnits.none()) {
224 ++Next;
225 Kill.eraseFromParent();
226 } else
227 break;
228
229 KillUsedRegUnits.reset();
230 }
231
232 BundleUsedRegUnits.reset();
233 }
234
235 finalizeBundle(MBB, FirstMI: BundleStart, LastMI: Next);
236 }
237
238 Defs.clear();
239 }
240 }
241
242 return Changed;
243}
244