1//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Load/Store Pairing: It identifies pairs of load or store instructions
10// operating on consecutive memory locations and merges them into a single
11// paired instruction, leveraging hardware support for paired memory accesses.
12// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
13//
14// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
15// merging zero store instructions, promoting loads that read directly from a
16// preceding store, and merging base register updates with load/store
17// instructions (via pre-/post-indexed addressing). These advanced
18// transformations are not yet implemented in the RISC-V pass but represent
19// potential future enhancements for further optimizing RISC-V memory
20// operations.
21//
22//===----------------------------------------------------------------------===//
23
24#include "RISCV.h"
25#include "RISCVTargetMachine.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/MC/TargetRegistry.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Target/TargetOptions.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "riscv-load-store-opt"
35#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
36
37// The LdStLimit limits number of instructions how far we search for load/store
38// pairs.
39static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(Val: 128),
40 cl::Hidden);
41
42namespace {
43
44struct RISCVLoadStoreOpt : public MachineFunctionPass {
45 static char ID;
46 bool runOnMachineFunction(MachineFunction &Fn) override;
47
48 RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
49
50 MachineFunctionProperties getRequiredProperties() const override {
51 return MachineFunctionProperties().setNoVRegs();
52 }
53
54 void getAnalysisUsage(AnalysisUsage &AU) const override {
55 AU.addRequired<AAResultsWrapperPass>();
56 MachineFunctionPass::getAnalysisUsage(AU);
57 }
58
59 StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
60
61 // Find and pair load/store instructions.
62 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
63
64 // Convert load/store pairs to single instructions.
65 bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
66 MachineBasicBlock::iterator Second);
67
68 // Scan the instructions looking for a load/store that can be combined
69 // with the current instruction into a load/store pair.
70 // Return the matching instruction if one is found, else MBB->end().
71 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
72 bool &MergeForward);
73
74 MachineBasicBlock::iterator
75 mergePairedInsns(MachineBasicBlock::iterator I,
76 MachineBasicBlock::iterator Paired, bool MergeForward);
77
78private:
79 AliasAnalysis *AA;
80 MachineRegisterInfo *MRI;
81 const RISCVInstrInfo *TII;
82 const RISCVRegisterInfo *TRI;
83 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
84};
85} // end anonymous namespace
86
87char RISCVLoadStoreOpt::ID = 0;
88INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
89 false)
90
91bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
92 if (skipFunction(F: Fn.getFunction()))
93 return false;
94 const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
95 if (!Subtarget.useLoadStorePairs())
96 return false;
97
98 bool MadeChange = false;
99 TII = Subtarget.getInstrInfo();
100 TRI = Subtarget.getRegisterInfo();
101 MRI = &Fn.getRegInfo();
102 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
103 ModifiedRegUnits.init(TRI: *TRI);
104 UsedRegUnits.init(TRI: *TRI);
105
106 for (MachineBasicBlock &MBB : Fn) {
107 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
108
109 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
110 MBBI != E;) {
111 if (TII->isPairableLdStInstOpc(Opc: MBBI->getOpcode()) &&
112 tryToPairLdStInst(MBBI))
113 MadeChange = true;
114 else
115 ++MBBI;
116 }
117 }
118 return MadeChange;
119}
120
121// Find loads and stores that can be merged into a single load or store pair
122// instruction.
123bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
124 MachineInstr &MI = *MBBI;
125
126 // If this is volatile, it is not a candidate.
127 if (MI.hasOrderedMemoryRef())
128 return false;
129
130 if (!TII->isLdStSafeToPair(LdSt: MI, TRI))
131 return false;
132
133 // Look ahead for a pairable instruction.
134 MachineBasicBlock::iterator E = MI.getParent()->end();
135 bool MergeForward;
136 MachineBasicBlock::iterator Paired = findMatchingInsn(I: MBBI, MergeForward);
137 if (Paired != E) {
138 MBBI = mergePairedInsns(I: MBBI, Paired, MergeForward);
139 return true;
140 }
141 return false;
142}
143
144// Merge two adjacent load/store instructions into a paired instruction
145// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
146// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
147// appropriate paired opcode, verifies that the memory operand is properly
148// aligned, and checks that the offset is valid. If all conditions are met, it
149// builds and inserts the paired instruction.
150bool RISCVLoadStoreOpt::tryConvertToLdStPair(
151 MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
152 unsigned PairOpc;
153 Align RequiredAlignment;
154 switch (First->getOpcode()) {
155 default:
156 llvm_unreachable("Unsupported load/store instruction for pairing");
157 case RISCV::SW:
158 PairOpc = RISCV::MIPS_SWP;
159 RequiredAlignment = Align(8);
160 break;
161 case RISCV::LW:
162 PairOpc = RISCV::MIPS_LWP;
163 RequiredAlignment = Align(8);
164 break;
165 case RISCV::SD:
166 PairOpc = RISCV::MIPS_SDP;
167 RequiredAlignment = Align(16);
168 break;
169 case RISCV::LD:
170 PairOpc = RISCV::MIPS_LDP;
171 RequiredAlignment = Align(16);
172 break;
173 }
174
175 MachineFunction *MF = First->getMF();
176 const MachineMemOperand *MMO = *First->memoperands_begin();
177 Align MMOAlign = MMO->getAlign();
178
179 if (MMOAlign < RequiredAlignment)
180 return false;
181
182 int64_t Offset = First->getOperand(i: 2).getImm();
183 if (!isUInt<7>(x: Offset))
184 return false;
185
186 MachineInstrBuilder MIB = BuildMI(
187 MF&: *MF, MIMD: First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
188 MCID: TII->get(Opcode: PairOpc));
189 MIB.add(MO: First->getOperand(i: 0))
190 .add(MO: Second->getOperand(i: 0))
191 .add(MO: First->getOperand(i: 1))
192 .add(MO: First->getOperand(i: 2))
193 .cloneMergedMemRefs(OtherMIs: {&*First, &*Second});
194
195 First->getParent()->insert(I: First, MI: MIB);
196
197 First->removeFromParent();
198 Second->removeFromParent();
199
200 return true;
201}
202
203static bool mayAlias(MachineInstr &MIa,
204 SmallVectorImpl<MachineInstr *> &MemInsns,
205 AliasAnalysis *AA) {
206 for (MachineInstr *MIb : MemInsns)
207 if (MIa.mayAlias(AA, Other: *MIb, /*UseTBAA*/ false))
208 return true;
209
210 return false;
211}
212
213// Scan the instructions looking for a load/store that can be combined with the
214// current instruction into a wider equivalent or a load/store pair.
215// TODO: Extend pairing logic to consider reordering both instructions
216// to a safe "middle" position rather than only merging forward/backward.
217// This requires more sophisticated checks for aliasing, register
218// liveness, and potential scheduling hazards.
219MachineBasicBlock::iterator
220RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
221 bool &MergeForward) {
222 MachineBasicBlock::iterator E = I->getParent()->end();
223 MachineBasicBlock::iterator MBBI = I;
224 MachineInstr &FirstMI = *I;
225 MBBI = next_nodbg(It: MBBI, End: E);
226
227 bool MayLoad = FirstMI.mayLoad();
228 Register Reg = FirstMI.getOperand(i: 0).getReg();
229 Register BaseReg = FirstMI.getOperand(i: 1).getReg();
230 int64_t Offset = FirstMI.getOperand(i: 2).getImm();
231 int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
232
233 MergeForward = false;
234
235 // Track which register units have been modified and used between the first
236 // insn (inclusive) and the second insn.
237 ModifiedRegUnits.clear();
238 UsedRegUnits.clear();
239
240 // Remember any instructions that read/write memory between FirstMI and MI.
241 SmallVector<MachineInstr *, 4> MemInsns;
242
243 for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
244 MBBI = next_nodbg(It: MBBI, End: E)) {
245 MachineInstr &MI = *MBBI;
246
247 // Don't count transient instructions towards the search limit since there
248 // may be different numbers of them if e.g. debug information is present.
249 if (!MI.isTransient())
250 ++Count;
251
252 if (MI.getOpcode() == FirstMI.getOpcode() &&
253 TII->isLdStSafeToPair(LdSt: MI, TRI)) {
254 Register MIBaseReg = MI.getOperand(i: 1).getReg();
255 int64_t MIOffset = MI.getOperand(i: 2).getImm();
256
257 if (BaseReg == MIBaseReg) {
258 if ((Offset != MIOffset + OffsetStride) &&
259 (Offset + OffsetStride != MIOffset)) {
260 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
261 TRI);
262 MemInsns.push_back(Elt: &MI);
263 continue;
264 }
265
266 // If the destination register of one load is the same register or a
267 // sub/super register of the other load, bail and keep looking.
268 if (MayLoad &&
269 TRI->isSuperOrSubRegisterEq(RegA: Reg, RegB: MI.getOperand(i: 0).getReg())) {
270 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
271 TRI);
272 MemInsns.push_back(Elt: &MI);
273 continue;
274 }
275
276 // If the BaseReg has been modified, then we cannot do the optimization.
277 if (!ModifiedRegUnits.available(Reg: BaseReg))
278 return E;
279
280 // If the Rt of the second instruction was not modified or used between
281 // the two instructions and none of the instructions between the second
282 // and first alias with the second, we can combine the second into the
283 // first.
284 if (ModifiedRegUnits.available(Reg: MI.getOperand(i: 0).getReg()) &&
285 !(MI.mayLoad() &&
286 !UsedRegUnits.available(Reg: MI.getOperand(i: 0).getReg())) &&
287 !mayAlias(MIa&: MI, MemInsns, AA)) {
288
289 MergeForward = false;
290 return MBBI;
291 }
292
293 // Likewise, if the Rt of the first instruction is not modified or used
294 // between the two instructions and none of the instructions between the
295 // first and the second alias with the first, we can combine the first
296 // into the second.
297 if (!(MayLoad &&
298 !UsedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) &&
299 !mayAlias(MIa&: FirstMI, MemInsns, AA)) {
300
301 if (ModifiedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) {
302 MergeForward = true;
303 return MBBI;
304 }
305 }
306 // Unable to combine these instructions due to interference in between.
307 // Keep looking.
308 }
309 }
310
311 // If the instruction wasn't a matching load or store. Stop searching if we
312 // encounter a call instruction that might modify memory.
313 if (MI.isCall())
314 return E;
315
316 // Update modified / uses register units.
317 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
318
319 // Otherwise, if the base register is modified, we have no match, so
320 // return early.
321 if (!ModifiedRegUnits.available(Reg: BaseReg))
322 return E;
323
324 // Update list of instructions that read/write memory.
325 if (MI.mayLoadOrStore())
326 MemInsns.push_back(Elt: &MI);
327 }
328 return E;
329}
330
331MachineBasicBlock::iterator
332RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
333 MachineBasicBlock::iterator Paired,
334 bool MergeForward) {
335 MachineBasicBlock::iterator E = I->getParent()->end();
336 MachineBasicBlock::iterator NextI = next_nodbg(It: I, End: E);
337 // If NextI is the second of the two instructions to be merged, we need
338 // to skip one further. Either way we merge will invalidate the iterator,
339 // and we don't need to scan the new instruction, as it's a pairwise
340 // instruction, which we're not considering for further action anyway.
341 if (NextI == Paired)
342 NextI = next_nodbg(It: NextI, End: E);
343
344 // Insert our new paired instruction after whichever of the paired
345 // instructions MergeForward indicates.
346 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
347 MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
348 int Offset = I->getOperand(i: 2).getImm();
349 int PairedOffset = Paired->getOperand(i: 2).getImm();
350 bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
351
352 if (!MergeForward)
353 Paired->getOperand(i: 1).setIsKill(false);
354
355 // Kill flags may become invalid when moving stores for pairing.
356 if (I->getOperand(i: 0).isUse()) {
357 if (!MergeForward) {
358 // Check if the Paired store's source register has a kill flag and clear
359 // it only if there are intermediate uses between I and Paired.
360 MachineOperand &PairedRegOp = Paired->getOperand(i: 0);
361 if (PairedRegOp.isKill()) {
362 for (auto It = std::next(x: I); It != Paired; ++It) {
363 if (It->readsRegister(Reg: PairedRegOp.getReg(), TRI)) {
364 PairedRegOp.setIsKill(false);
365 break;
366 }
367 }
368 }
369 } else {
370 // Clear kill flags of the first store's register in the forward
371 // direction.
372 Register Reg = I->getOperand(i: 0).getReg();
373 for (MachineInstr &MI : make_range(x: std::next(x: I), y: std::next(x: Paired)))
374 MI.clearRegisterKills(Reg, RegInfo: TRI);
375 }
376 }
377
378 MachineInstr *ToInsert = DeletionPoint->removeFromParent();
379 MachineBasicBlock &MBB = *InsertionPoint->getParent();
380 MachineBasicBlock::iterator First, Second;
381
382 if (!InsertAfter) {
383 First = MBB.insert(I: InsertionPoint, MI: ToInsert);
384 Second = InsertionPoint;
385 } else {
386 Second = MBB.insertAfter(I: InsertionPoint, MI: ToInsert);
387 First = InsertionPoint;
388 }
389
390 if (tryConvertToLdStPair(First, Second)) {
391 LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
392 LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
393 }
394
395 return NextI;
396}
397
398// Returns an instance of the Load / Store Optimization pass.
399FunctionPass *llvm::createRISCVLoadStoreOptPass() {
400 return new RISCVLoadStoreOpt();
401}
402