1 | //===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Load/Store Pairing: It identifies pairs of load or store instructions |
10 | // operating on consecutive memory locations and merges them into a single |
11 | // paired instruction, leveraging hardware support for paired memory accesses. |
12 | // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. |
13 | // |
14 | // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as |
15 | // merging zero store instructions, promoting loads that read directly from a |
16 | // preceding store, and merging base register updates with load/store |
17 | // instructions (via pre-/post-indexed addressing). These advanced |
18 | // transformations are not yet implemented in the RISC-V pass but represent |
19 | // potential future enhancements for further optimizing RISC-V memory |
20 | // operations. |
21 | // |
22 | //===----------------------------------------------------------------------===// |
23 | |
24 | #include "RISCV.h" |
25 | #include "RISCVTargetMachine.h" |
26 | #include "llvm/Analysis/AliasAnalysis.h" |
27 | #include "llvm/CodeGen/Passes.h" |
28 | #include "llvm/MC/TargetRegistry.h" |
29 | #include "llvm/Support/Debug.h" |
30 | #include "llvm/Target/TargetOptions.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "riscv-load-store-opt" |
35 | #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" |
36 | |
37 | // The LdStLimit limits number of instructions how far we search for load/store |
38 | // pairs. |
39 | static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit" , cl::init(Val: 128), |
40 | cl::Hidden); |
41 | |
42 | namespace { |
43 | |
44 | struct RISCVLoadStoreOpt : public MachineFunctionPass { |
45 | static char ID; |
46 | bool runOnMachineFunction(MachineFunction &Fn) override; |
47 | |
48 | RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} |
49 | |
50 | MachineFunctionProperties getRequiredProperties() const override { |
51 | return MachineFunctionProperties().setNoVRegs(); |
52 | } |
53 | |
54 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
55 | AU.addRequired<AAResultsWrapperPass>(); |
56 | MachineFunctionPass::getAnalysisUsage(AU); |
57 | } |
58 | |
59 | StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } |
60 | |
61 | // Find and pair load/store instructions. |
62 | bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); |
63 | |
64 | // Convert load/store pairs to single instructions. |
65 | bool tryConvertToLdStPair(MachineBasicBlock::iterator First, |
66 | MachineBasicBlock::iterator Second); |
67 | |
68 | // Scan the instructions looking for a load/store that can be combined |
69 | // with the current instruction into a load/store pair. |
70 | // Return the matching instruction if one is found, else MBB->end(). |
71 | MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, |
72 | bool &MergeForward); |
73 | |
74 | MachineBasicBlock::iterator |
75 | mergePairedInsns(MachineBasicBlock::iterator I, |
76 | MachineBasicBlock::iterator Paired, bool MergeForward); |
77 | |
78 | private: |
79 | AliasAnalysis *AA; |
80 | MachineRegisterInfo *MRI; |
81 | const RISCVInstrInfo *TII; |
82 | const RISCVRegisterInfo *TRI; |
83 | LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
84 | }; |
85 | } // end anonymous namespace |
86 | |
87 | char RISCVLoadStoreOpt::ID = 0; |
88 | INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, |
89 | false) |
90 | |
91 | bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { |
92 | if (skipFunction(F: Fn.getFunction())) |
93 | return false; |
94 | const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); |
95 | if (!Subtarget.useLoadStorePairs()) |
96 | return false; |
97 | |
98 | bool MadeChange = false; |
99 | TII = Subtarget.getInstrInfo(); |
100 | TRI = Subtarget.getRegisterInfo(); |
101 | MRI = &Fn.getRegInfo(); |
102 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
103 | ModifiedRegUnits.init(TRI: *TRI); |
104 | UsedRegUnits.init(TRI: *TRI); |
105 | |
106 | for (MachineBasicBlock &MBB : Fn) { |
107 | LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n" ); |
108 | |
109 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
110 | MBBI != E;) { |
111 | if (TII->isPairableLdStInstOpc(Opc: MBBI->getOpcode()) && |
112 | tryToPairLdStInst(MBBI)) |
113 | MadeChange = true; |
114 | else |
115 | ++MBBI; |
116 | } |
117 | } |
118 | return MadeChange; |
119 | } |
120 | |
121 | // Find loads and stores that can be merged into a single load or store pair |
122 | // instruction. |
123 | bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { |
124 | MachineInstr &MI = *MBBI; |
125 | |
126 | // If this is volatile, it is not a candidate. |
127 | if (MI.hasOrderedMemoryRef()) |
128 | return false; |
129 | |
130 | if (!TII->isLdStSafeToPair(LdSt: MI, TRI)) |
131 | return false; |
132 | |
133 | // Look ahead for a pairable instruction. |
134 | MachineBasicBlock::iterator E = MI.getParent()->end(); |
135 | bool MergeForward; |
136 | MachineBasicBlock::iterator Paired = findMatchingInsn(I: MBBI, MergeForward); |
137 | if (Paired != E) { |
138 | MBBI = mergePairedInsns(I: MBBI, Paired, MergeForward); |
139 | return true; |
140 | } |
141 | return false; |
142 | } |
143 | |
144 | // Merge two adjacent load/store instructions into a paired instruction |
145 | // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of |
146 | // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the |
147 | // appropriate paired opcode, verifies that the memory operand is properly |
148 | // aligned, and checks that the offset is valid. If all conditions are met, it |
149 | // builds and inserts the paired instruction. |
150 | bool RISCVLoadStoreOpt::tryConvertToLdStPair( |
151 | MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { |
152 | unsigned PairOpc; |
153 | Align RequiredAlignment; |
154 | switch (First->getOpcode()) { |
155 | default: |
156 | llvm_unreachable("Unsupported load/store instruction for pairing" ); |
157 | case RISCV::SW: |
158 | PairOpc = RISCV::MIPS_SWP; |
159 | RequiredAlignment = Align(8); |
160 | break; |
161 | case RISCV::LW: |
162 | PairOpc = RISCV::MIPS_LWP; |
163 | RequiredAlignment = Align(8); |
164 | break; |
165 | case RISCV::SD: |
166 | PairOpc = RISCV::MIPS_SDP; |
167 | RequiredAlignment = Align(16); |
168 | break; |
169 | case RISCV::LD: |
170 | PairOpc = RISCV::MIPS_LDP; |
171 | RequiredAlignment = Align(16); |
172 | break; |
173 | } |
174 | |
175 | MachineFunction *MF = First->getMF(); |
176 | const MachineMemOperand *MMO = *First->memoperands_begin(); |
177 | Align MMOAlign = MMO->getAlign(); |
178 | |
179 | if (MMOAlign < RequiredAlignment) |
180 | return false; |
181 | |
182 | int64_t Offset = First->getOperand(i: 2).getImm(); |
183 | if (!isUInt<7>(x: Offset)) |
184 | return false; |
185 | |
186 | MachineInstrBuilder MIB = BuildMI( |
187 | MF&: *MF, MIMD: First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(), |
188 | MCID: TII->get(Opcode: PairOpc)); |
189 | MIB.add(MO: First->getOperand(i: 0)) |
190 | .add(MO: Second->getOperand(i: 0)) |
191 | .add(MO: First->getOperand(i: 1)) |
192 | .add(MO: First->getOperand(i: 2)) |
193 | .cloneMergedMemRefs(OtherMIs: {&*First, &*Second}); |
194 | |
195 | First->getParent()->insert(I: First, MI: MIB); |
196 | |
197 | First->removeFromParent(); |
198 | Second->removeFromParent(); |
199 | |
200 | return true; |
201 | } |
202 | |
203 | static bool mayAlias(MachineInstr &MIa, |
204 | SmallVectorImpl<MachineInstr *> &MemInsns, |
205 | AliasAnalysis *AA) { |
206 | for (MachineInstr *MIb : MemInsns) |
207 | if (MIa.mayAlias(AA, Other: *MIb, /*UseTBAA*/ false)) |
208 | return true; |
209 | |
210 | return false; |
211 | } |
212 | |
213 | // Scan the instructions looking for a load/store that can be combined with the |
214 | // current instruction into a wider equivalent or a load/store pair. |
215 | // TODO: Extend pairing logic to consider reordering both instructions |
216 | // to a safe "middle" position rather than only merging forward/backward. |
217 | // This requires more sophisticated checks for aliasing, register |
218 | // liveness, and potential scheduling hazards. |
219 | MachineBasicBlock::iterator |
220 | RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, |
221 | bool &MergeForward) { |
222 | MachineBasicBlock::iterator E = I->getParent()->end(); |
223 | MachineBasicBlock::iterator MBBI = I; |
224 | MachineInstr &FirstMI = *I; |
225 | MBBI = next_nodbg(It: MBBI, End: E); |
226 | |
227 | bool MayLoad = FirstMI.mayLoad(); |
228 | Register Reg = FirstMI.getOperand(i: 0).getReg(); |
229 | Register BaseReg = FirstMI.getOperand(i: 1).getReg(); |
230 | int64_t Offset = FirstMI.getOperand(i: 2).getImm(); |
231 | int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); |
232 | |
233 | MergeForward = false; |
234 | |
235 | // Track which register units have been modified and used between the first |
236 | // insn (inclusive) and the second insn. |
237 | ModifiedRegUnits.clear(); |
238 | UsedRegUnits.clear(); |
239 | |
240 | // Remember any instructions that read/write memory between FirstMI and MI. |
241 | SmallVector<MachineInstr *, 4> MemInsns; |
242 | |
243 | for (unsigned Count = 0; MBBI != E && Count < LdStLimit; |
244 | MBBI = next_nodbg(It: MBBI, End: E)) { |
245 | MachineInstr &MI = *MBBI; |
246 | |
247 | // Don't count transient instructions towards the search limit since there |
248 | // may be different numbers of them if e.g. debug information is present. |
249 | if (!MI.isTransient()) |
250 | ++Count; |
251 | |
252 | if (MI.getOpcode() == FirstMI.getOpcode() && |
253 | TII->isLdStSafeToPair(LdSt: MI, TRI)) { |
254 | Register MIBaseReg = MI.getOperand(i: 1).getReg(); |
255 | int64_t MIOffset = MI.getOperand(i: 2).getImm(); |
256 | |
257 | if (BaseReg == MIBaseReg) { |
258 | if ((Offset != MIOffset + OffsetStride) && |
259 | (Offset + OffsetStride != MIOffset)) { |
260 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
261 | TRI); |
262 | MemInsns.push_back(Elt: &MI); |
263 | continue; |
264 | } |
265 | |
266 | // If the destination register of one load is the same register or a |
267 | // sub/super register of the other load, bail and keep looking. |
268 | if (MayLoad && |
269 | TRI->isSuperOrSubRegisterEq(RegA: Reg, RegB: MI.getOperand(i: 0).getReg())) { |
270 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
271 | TRI); |
272 | MemInsns.push_back(Elt: &MI); |
273 | continue; |
274 | } |
275 | |
276 | // If the BaseReg has been modified, then we cannot do the optimization. |
277 | if (!ModifiedRegUnits.available(Reg: BaseReg)) |
278 | return E; |
279 | |
280 | // If the Rt of the second instruction was not modified or used between |
281 | // the two instructions and none of the instructions between the second |
282 | // and first alias with the second, we can combine the second into the |
283 | // first. |
284 | if (ModifiedRegUnits.available(Reg: MI.getOperand(i: 0).getReg()) && |
285 | !(MI.mayLoad() && |
286 | !UsedRegUnits.available(Reg: MI.getOperand(i: 0).getReg())) && |
287 | !mayAlias(MIa&: MI, MemInsns, AA)) { |
288 | |
289 | MergeForward = false; |
290 | return MBBI; |
291 | } |
292 | |
293 | // Likewise, if the Rt of the first instruction is not modified or used |
294 | // between the two instructions and none of the instructions between the |
295 | // first and the second alias with the first, we can combine the first |
296 | // into the second. |
297 | if (!(MayLoad && |
298 | !UsedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) && |
299 | !mayAlias(MIa&: FirstMI, MemInsns, AA)) { |
300 | |
301 | if (ModifiedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) { |
302 | MergeForward = true; |
303 | return MBBI; |
304 | } |
305 | } |
306 | // Unable to combine these instructions due to interference in between. |
307 | // Keep looking. |
308 | } |
309 | } |
310 | |
311 | // If the instruction wasn't a matching load or store. Stop searching if we |
312 | // encounter a call instruction that might modify memory. |
313 | if (MI.isCall()) |
314 | return E; |
315 | |
316 | // Update modified / uses register units. |
317 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
318 | |
319 | // Otherwise, if the base register is modified, we have no match, so |
320 | // return early. |
321 | if (!ModifiedRegUnits.available(Reg: BaseReg)) |
322 | return E; |
323 | |
324 | // Update list of instructions that read/write memory. |
325 | if (MI.mayLoadOrStore()) |
326 | MemInsns.push_back(Elt: &MI); |
327 | } |
328 | return E; |
329 | } |
330 | |
331 | MachineBasicBlock::iterator |
332 | RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, |
333 | MachineBasicBlock::iterator Paired, |
334 | bool MergeForward) { |
335 | MachineBasicBlock::iterator E = I->getParent()->end(); |
336 | MachineBasicBlock::iterator NextI = next_nodbg(It: I, End: E); |
337 | // If NextI is the second of the two instructions to be merged, we need |
338 | // to skip one further. Either way we merge will invalidate the iterator, |
339 | // and we don't need to scan the new instruction, as it's a pairwise |
340 | // instruction, which we're not considering for further action anyway. |
341 | if (NextI == Paired) |
342 | NextI = next_nodbg(It: NextI, End: E); |
343 | |
344 | // Insert our new paired instruction after whichever of the paired |
345 | // instructions MergeForward indicates. |
346 | MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; |
347 | MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; |
348 | int Offset = I->getOperand(i: 2).getImm(); |
349 | int PairedOffset = Paired->getOperand(i: 2).getImm(); |
350 | bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; |
351 | |
352 | if (!MergeForward) |
353 | Paired->getOperand(i: 1).setIsKill(false); |
354 | |
355 | // Kill flags may become invalid when moving stores for pairing. |
356 | if (I->getOperand(i: 0).isUse()) { |
357 | if (!MergeForward) { |
358 | // Check if the Paired store's source register has a kill flag and clear |
359 | // it only if there are intermediate uses between I and Paired. |
360 | MachineOperand &PairedRegOp = Paired->getOperand(i: 0); |
361 | if (PairedRegOp.isKill()) { |
362 | for (auto It = std::next(x: I); It != Paired; ++It) { |
363 | if (It->readsRegister(Reg: PairedRegOp.getReg(), TRI)) { |
364 | PairedRegOp.setIsKill(false); |
365 | break; |
366 | } |
367 | } |
368 | } |
369 | } else { |
370 | // Clear kill flags of the first store's register in the forward |
371 | // direction. |
372 | Register Reg = I->getOperand(i: 0).getReg(); |
373 | for (MachineInstr &MI : make_range(x: std::next(x: I), y: std::next(x: Paired))) |
374 | MI.clearRegisterKills(Reg, RegInfo: TRI); |
375 | } |
376 | } |
377 | |
378 | MachineInstr *ToInsert = DeletionPoint->removeFromParent(); |
379 | MachineBasicBlock &MBB = *InsertionPoint->getParent(); |
380 | MachineBasicBlock::iterator First, Second; |
381 | |
382 | if (!InsertAfter) { |
383 | First = MBB.insert(I: InsertionPoint, MI: ToInsert); |
384 | Second = InsertionPoint; |
385 | } else { |
386 | Second = MBB.insertAfter(I: InsertionPoint, MI: ToInsert); |
387 | First = InsertionPoint; |
388 | } |
389 | |
390 | if (tryConvertToLdStPair(First, Second)) { |
391 | LLVM_DEBUG(dbgs() << "Pairing load/store:\n " ); |
392 | LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); |
393 | } |
394 | |
395 | return NextI; |
396 | } |
397 | |
398 | // Returns an instance of the Load / Store Optimization pass. |
399 | FunctionPass *llvm::createRISCVLoadStoreOptPass() { |
400 | return new RISCVLoadStoreOpt(); |
401 | } |
402 | |