1//===-- SIPostRABundler.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass creates bundles of memory instructions to protect adjacent loads
11/// and stores from being rescheduled apart from each other post-RA.
12///
13//===----------------------------------------------------------------------===//
14
15#include "SIPostRABundler.h"
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "si-post-ra-bundler"
24
25namespace {
26
27class SIPostRABundlerLegacy : public MachineFunctionPass {
28public:
29 static char ID;
30
31public:
32 SIPostRABundlerLegacy() : MachineFunctionPass(ID) {}
33
34 bool runOnMachineFunction(MachineFunction &MF) override;
35
36 StringRef getPassName() const override {
37 return "SI post-RA bundler";
38 }
39
40 void getAnalysisUsage(AnalysisUsage &AU) const override {
41 AU.setPreservesAll();
42 MachineFunctionPass::getAnalysisUsage(AU);
43 }
44};
45
46class SIPostRABundler {
47public:
48 bool run(MachineFunction &MF);
49
50private:
51 const SIRegisterInfo *TRI;
52
53 SmallSet<Register, 16> Defs;
54
55 void collectUsedRegUnits(const MachineInstr &MI,
56 BitVector &UsedRegUnits) const;
57
58 bool isBundleCandidate(const MachineInstr &MI) const;
59 bool isDependentLoad(const MachineInstr &MI) const;
60 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
61};
62
63constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
64 SIInstrFlags::SMRD | SIInstrFlags::DS |
65 SIInstrFlags::FLAT | SIInstrFlags::MIMG |
66 SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
67
68} // End anonymous namespace.
69
70INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler", false,
71 false)
72
73char SIPostRABundlerLegacy::ID = 0;
74
75char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID;
76
77FunctionPass *llvm::createSIPostRABundlerPass() {
78 return new SIPostRABundlerLegacy();
79}
80
81bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
82 if (!MI.mayLoad())
83 return false;
84
85 for (const MachineOperand &Op : MI.explicit_operands()) {
86 if (!Op.isReg())
87 continue;
88 Register Reg = Op.getReg();
89 for (Register Def : Defs)
90 if (TRI->regsOverlap(RegA: Reg, RegB: Def))
91 return true;
92 }
93
94 return false;
95}
96
97void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
98 BitVector &UsedRegUnits) const {
99 if (MI.isDebugInstr())
100 return;
101
102 for (const MachineOperand &Op : MI.operands()) {
103 if (!Op.isReg() || !Op.readsReg())
104 continue;
105
106 Register Reg = Op.getReg();
107 assert(!Op.getSubReg() &&
108 "subregister indexes should not be present after RA");
109
110 for (MCRegUnit Unit : TRI->regunits(Reg))
111 UsedRegUnits.set(static_cast<unsigned>(Unit));
112 }
113}
114
115bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
116 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
117 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
118}
119
120bool SIPostRABundler::canBundle(const MachineInstr &MI,
121 const MachineInstr &NextMI) const {
122 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
123
124 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
125 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
126 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
127 !isDependentLoad(MI: NextMI));
128}
129
130bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) {
131 if (skipFunction(F: MF.getFunction()))
132 return false;
133 return SIPostRABundler().run(MF);
134}
135
136PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF,
137 MachineFunctionAnalysisManager &) {
138 SIPostRABundler().run(MF);
139 return PreservedAnalyses::all();
140}
141
142bool SIPostRABundler::run(MachineFunction &MF) {
143
144 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
145 BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
146 BitVector KillUsedRegUnits(TRI->getNumRegUnits());
147
148 bool Changed = false;
149 for (MachineBasicBlock &MBB : MF) {
150 bool HasIGLPInstrs = llvm::any_of(Range: MBB.instrs(), P: [](MachineInstr &MI) {
151 unsigned Opc = MI.getOpcode();
152 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
153 });
154
155 // Don't cluster with IGLP instructions.
156 if (HasIGLPInstrs)
157 continue;
158
159 MachineBasicBlock::instr_iterator Next;
160 MachineBasicBlock::instr_iterator B = MBB.instr_begin();
161 MachineBasicBlock::instr_iterator E = MBB.instr_end();
162
163 for (auto I = B; I != E; I = Next) {
164 Next = std::next(x: I);
165 if (!isBundleCandidate(MI: *I))
166 continue;
167
168 assert(Defs.empty());
169
170 if (I->getNumExplicitDefs() != 0)
171 Defs.insert(V: I->defs().begin()->getReg());
172
173 MachineBasicBlock::instr_iterator BundleStart = I;
174 MachineBasicBlock::instr_iterator BundleEnd = I;
175 unsigned ClauseLength = 1;
176 for (I = Next; I != E; I = Next) {
177 Next = std::next(x: I);
178
179 assert(BundleEnd != I);
180 if (canBundle(MI: *BundleEnd, NextMI: *I)) {
181 BundleEnd = I;
182 if (I->getNumExplicitDefs() != 0)
183 Defs.insert(V: I->defs().begin()->getReg());
184 ++ClauseLength;
185 } else if (!I->isMetaInstruction() ||
186 I->getOpcode() == AMDGPU::SCHED_BARRIER) {
187 // SCHED_BARRIER is not bundled to be honored by scheduler later.
188 // Allow other meta instructions in between bundle candidates, but do
189 // not start or end a bundle on one.
190 //
191 // TODO: It may be better to move meta instructions like dbg_value
192 // after the bundle. We're relying on the memory legalizer to unbundle
193 // these.
194 break;
195 }
196 }
197
198 Next = std::next(x: BundleEnd);
199 if (ClauseLength > 1) {
200 Changed = true;
201
202 // Before register allocation, kills are inserted after potential soft
203 // clauses to hint register allocation. Look for kills that look like
204 // this, and erase them.
205 if (Next != E && Next->isKill()) {
206
207 // TODO: Should maybe back-propagate kill flags to the bundle.
208 for (const MachineInstr &BundleMI : make_range(x: BundleStart, y: Next))
209 collectUsedRegUnits(MI: BundleMI, UsedRegUnits&: BundleUsedRegUnits);
210
211 BundleUsedRegUnits.flip();
212
213 while (Next != E && Next->isKill()) {
214 MachineInstr &Kill = *Next;
215 collectUsedRegUnits(MI: Kill, UsedRegUnits&: KillUsedRegUnits);
216
217 KillUsedRegUnits &= BundleUsedRegUnits;
218
219 // Erase the kill if it's a subset of the used registers.
220 //
221 // TODO: Should we just remove all kills? Is there any real reason to
222 // keep them after RA?
223 if (KillUsedRegUnits.none()) {
224 ++Next;
225 Kill.eraseFromParent();
226 } else
227 break;
228
229 KillUsedRegUnits.reset();
230 }
231
232 BundleUsedRegUnits.reset();
233 }
234
235 finalizeBundle(MBB, FirstMI: BundleStart, LastMI: Next);
236 }
237
238 Defs.clear();
239 }
240 }
241
242 return Changed;
243}
244