1//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Load/Store Pairing: It identifies pairs of load or store instructions
10// operating on consecutive memory locations and merges them into a single
11// paired instruction, leveraging hardware support for paired memory accesses.
12// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
13//
14// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
15// register allocation didn't provide suitable consecutive registers.
16//
17// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
18// merging zero store instructions, promoting loads that read directly from a
19// preceding store, and merging base register updates with load/store
20// instructions (via pre-/post-indexed addressing). These advanced
21// transformations are not yet implemented in the RISC-V pass but represent
22// potential future enhancements for further optimizing RISC-V memory
23// operations.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVTargetMachine.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/CodeGen/Passes.h"
33#include "llvm/MC/TargetRegistry.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Target/TargetOptions.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "riscv-load-store-opt"
40#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
41
42// The LdStLimit limits number of instructions how far we search for load/store
43// pairs.
44static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(Val: 128),
45 cl::Hidden);
46STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
47STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
48
49namespace {
50
51struct RISCVLoadStoreOpt : public MachineFunctionPass {
52 static char ID;
53 bool runOnMachineFunction(MachineFunction &Fn) override;
54
55 RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
56
57 MachineFunctionProperties getRequiredProperties() const override {
58 return MachineFunctionProperties().setNoVRegs();
59 }
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
62 AU.addRequired<AAResultsWrapperPass>();
63 MachineFunctionPass::getAnalysisUsage(AU);
64 }
65
66 StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
67
68 // Find and pair load/store instructions.
69 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
70
71 // Convert load/store pairs to single instructions.
72 bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
73 MachineBasicBlock::iterator Second);
74 bool tryConvertToXqcilsmLdStPair(MachineFunction *MF,
75 MachineBasicBlock::iterator First,
76 MachineBasicBlock::iterator Second);
77 bool tryConvertToXqcilsmMultiLdSt(MachineBasicBlock::iterator &First);
78 bool tryConvertToMIPSLdStPair(MachineFunction *MF,
79 MachineBasicBlock::iterator First,
80 MachineBasicBlock::iterator Second);
81
82 // Scan the instructions looking for a load/store that can be combined
83 // with the current instruction into a load/store pair.
84 // Return the matching instruction if one is found, else MBB->end().
85 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
86 bool &MergeForward);
87
88 MachineBasicBlock::iterator
89 mergePairedInsns(MachineBasicBlock::iterator I,
90 MachineBasicBlock::iterator Paired, bool MergeForward);
91
92 // Post reg-alloc zilsd part
93 bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
94 MachineBasicBlock::iterator &MBBI);
95 bool isValidZilsdRegPair(Register First, Register Second);
96 void splitLdSdIntoTwo(MachineBasicBlock &MBB,
97 MachineBasicBlock::iterator &MBBI, bool IsLoad);
98
99private:
100 AliasAnalysis *AA;
101 MachineRegisterInfo *MRI;
102 const RISCVInstrInfo *TII;
103 const RISCVRegisterInfo *TRI;
104 const RISCVSubtarget *STI = nullptr;
105 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
106};
107} // end anonymous namespace
108
109char RISCVLoadStoreOpt::ID = 0;
110INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
111 false)
112
113bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
114 if (skipFunction(F: Fn.getFunction()))
115 return false;
116
117 bool MadeChange = false;
118 STI = &Fn.getSubtarget<RISCVSubtarget>();
119 TII = STI->getInstrInfo();
120 TRI = STI->getRegisterInfo();
121 MRI = &Fn.getRegInfo();
122 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
123 ModifiedRegUnits.init(TRI: *TRI);
124 UsedRegUnits.init(TRI: *TRI);
125
126 if (STI->useMIPSLoadStorePairs() || STI->hasVendorXqcilsm()) {
127 for (MachineBasicBlock &MBB : Fn) {
128 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
129
130 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
131 MBBI != E;) {
132 if (TII->isPairableLdStInstOpc(Opc: MBBI->getOpcode()) &&
133 tryToPairLdStInst(MBBI))
134 MadeChange = true;
135 else
136 ++MBBI;
137 }
138 }
139 }
140
141 if (!STI->is64Bit() && STI->hasStdExtZilsd()) {
142 for (auto &MBB : Fn) {
143 for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
144 if (fixInvalidRegPairOp(MBB, MBBI)) {
145 MadeChange = true;
146 // Iterator was updated by fixInvalidRegPairOp
147 } else {
148 ++MBBI;
149 }
150 }
151 }
152 }
153
154 return MadeChange;
155}
156
157// Find loads and stores that can be merged into a single load or store pair
158// instruction.
159bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
160 MachineInstr &MI = *MBBI;
161
162 // If this is volatile, it is not a candidate.
163 if (MI.hasOrderedMemoryRef())
164 return false;
165
166 if (!TII->isLdStSafeToPair(LdSt: MI, TRI))
167 return false;
168
169 // If Xqcilsm is available, first try to form a multi-instruction group (>2).
170 if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
171 if (tryConvertToXqcilsmMultiLdSt(First&: MBBI))
172 return true;
173 }
174
175 // Look ahead for a pairable instruction.
176 MachineBasicBlock::iterator E = MI.getParent()->end();
177 bool MergeForward;
178 MachineBasicBlock::iterator Paired = findMatchingInsn(I: MBBI, MergeForward);
179 if (Paired != E) {
180 MBBI = mergePairedInsns(I: MBBI, Paired, MergeForward);
181 return true;
182 }
183 return false;
184}
185
186static bool isMemOpAligned(MachineInstr &MI, Align RequiredAlignment) {
187 const MachineMemOperand *MMO = *MI.memoperands_begin();
188 Align MMOAlign = MMO->getAlign();
189 return MMOAlign >= RequiredAlignment;
190}
191
192// Convert set of 3 or more LW/SW instructions to QC_LWMI/QC_SWMI/QC_SETWMI.
193// For now this only handles consecutive loads and stores traversing the basic
194// block top-down.
195// TODO: Traverse the basic block bottom-up as well.
196bool RISCVLoadStoreOpt::tryConvertToXqcilsmMultiLdSt(
197 MachineBasicBlock::iterator &FirstIt) {
198 MachineInstr &FirstMI = *FirstIt;
199 MachineFunction *MF = FirstMI.getMF();
200
201 if (STI->is64Bit() || !STI->hasVendorXqcilsm())
202 return false;
203
204 unsigned Opc = FirstMI.getOpcode();
205 if (Opc != RISCV::LW && Opc != RISCV::SW)
206 return false;
207
208 if (!FirstMI.hasOneMemOperand())
209 return false;
210
211 if (!isMemOpAligned(MI&: FirstMI, RequiredAlignment: Align(4)))
212 return false;
213
214 // Require simple reg+imm addressing.
215 const MachineOperand &BaseOp = FirstMI.getOperand(i: 1);
216 const MachineOperand &OffOp = FirstMI.getOperand(i: 2);
217 if (!BaseOp.isReg() || !OffOp.isImm())
218 return false;
219
220 Register Base = BaseOp.getReg();
221 int64_t BaseOff = OffOp.getImm();
222
223 if (!isShiftedUInt<5, 2>(x: BaseOff))
224 return false;
225
226 Register StartReg = FirstMI.getOperand(i: 0).getReg();
227 bool IsLoad = (Opc == RISCV::LW);
228
229 // Load rd cannot be x0 and must not clobber the base register.
230 if (IsLoad) {
231 if (StartReg == RISCV::X0)
232 return false;
233 if (StartReg == Base)
234 return false;
235 }
236
237 // Collect a set of consecutive matching instructions.
238 SmallVector<MachineInstr *, 8> Group;
239 Group.push_back(Elt: &FirstMI);
240
241 MachineBasicBlock::iterator E = FirstIt->getParent()->end();
242 MachineBasicBlock::iterator It = next_nodbg(It: FirstIt, End: E);
243 int64_t ExpectedOff = BaseOff + 4;
244 unsigned Index = 1;
245 enum class StoreMode { Unknown, Setwmi, Swmi };
246 StoreMode SMode = StoreMode::Unknown;
247
248 while (It != E) {
249 MachineInstr &MI = *It;
250
251 if (!TII->isPairableLdStInstOpc(Opc: MI.getOpcode()))
252 break;
253 if (MI.getOpcode() != Opc)
254 break;
255 if (!TII->isLdStSafeToPair(LdSt: MI, TRI))
256 break;
257 if (!MI.hasOneMemOperand())
258 break;
259 if (!isMemOpAligned(MI, RequiredAlignment: Align(4)))
260 break;
261
262 const MachineOperand &BaseMIOp = MI.getOperand(i: 1);
263 const MachineOperand &OffsetMIOp = MI.getOperand(i: 2);
264 if (!BaseMIOp.isReg() || !OffsetMIOp.isImm())
265 break;
266 if (BaseMIOp.getReg() != Base)
267 break;
268 int64_t Off = OffsetMIOp.getImm();
269 if (Off != ExpectedOff)
270 break;
271
272 Register Reg = MI.getOperand(i: 0).getReg();
273 if (IsLoad) {
274 // For loads, require consecutive destination registers.
275 if (Reg != StartReg + Index)
276 break;
277 if (Reg == Base)
278 break;
279 } else {
280 // For stores, decide mode based on the second instruction and then
281 // enforce the same for the rest.
282 if (SMode == StoreMode::Unknown) {
283 if (Reg == StartReg)
284 SMode = StoreMode::Setwmi;
285 else if (Reg == StartReg + 1)
286 SMode = StoreMode::Swmi;
287 else
288 break;
289 } else if (SMode == StoreMode::Setwmi) {
290 if (Reg != StartReg)
291 break;
292 } else {
293 if (Reg != StartReg + Index)
294 break;
295 }
296 }
297
298 // Passed checks, extend the group.
299 Group.push_back(Elt: &MI);
300 ++Index;
301 ExpectedOff += 4;
302 It = next_nodbg(It, End: E);
303 }
304
305 // We only handle more than 2 here. Pairs are handled in
306 // tryConvertToXqcilsmLdStPair.
307 unsigned Len = Group.size();
308 if (Len < 3 || Len > 31)
309 return false;
310
311 unsigned NewOpc;
312 RegState StartRegState;
313 bool AddImplicitRegs = true;
314
315 if (IsLoad) {
316 NewOpc = RISCV::QC_LWMI;
317 StartRegState = RegState::Define;
318 } else {
319 assert(SMode != StoreMode::Unknown &&
320 "Group should be large enough to know the store mode");
321 if (SMode == StoreMode::Setwmi) {
322 NewOpc = RISCV::QC_SETWMI;
323 // Kill if any of the individual stores killed the reg.
324 bool StartKill = false;
325 for (MachineInstr *MI : Group)
326 StartKill |= MI->getOperand(i: 0).isKill();
327 StartRegState = getKillRegState(B: StartKill);
328 AddImplicitRegs = false;
329 } else {
330 // SWMI requires consecutive source regs and rd != x0.
331 if (StartReg == RISCV::X0)
332 return false;
333 NewOpc = RISCV::QC_SWMI;
334 StartRegState = getKillRegState(B: Group.front()->getOperand(i: 0).isKill());
335 }
336 }
337
338 // Aggregate kill on base.
339 bool BaseKill = false;
340 for (MachineInstr *MI : Group)
341 BaseKill |= MI->getOperand(i: 1).isKill();
342
343 // Build the new instruction.
344 DebugLoc DL = FirstMI.getDebugLoc();
345 if (!DL)
346 DL = Group.back()->getDebugLoc();
347 MachineInstrBuilder MIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: NewOpc));
348 MIB.addReg(RegNo: StartReg, Flags: StartRegState)
349 .addReg(RegNo: Base, Flags: getKillRegState(B: BaseKill))
350 .addImm(Val: Len)
351 .addImm(Val: BaseOff);
352
353 // Merge memory references.
354 MIB.cloneMergedMemRefs(OtherMIs: Group);
355
356 if (AddImplicitRegs) {
357 // Add implicit operands for the additional registers.
358 for (unsigned i = 1; i < Len; ++i) {
359 Register R = StartReg + i;
360 RegState State;
361 if (IsLoad)
362 State = RegState::ImplicitDefine;
363 else
364 State = RegState::Implicit |
365 getKillRegState(B: Group[i]->getOperand(i: 0).isKill());
366 MIB.addReg(RegNo: R, Flags: State);
367 }
368 }
369
370 // Insert before the first instruction and remove all in the group.
371 MachineBasicBlock *MBB = FirstIt->getParent();
372 MachineBasicBlock::iterator NewIt = MBB->insert(I: FirstIt, MI: MIB);
373 for (MachineInstr *MI : Group)
374 MI->removeFromParent();
375
376 // Advance the cursor to the next non-debug instruction after the group.
377 FirstIt = next_nodbg(It: NewIt, End: MBB->end());
378 return true;
379}
380
381bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair(
382 MachineFunction *MF, MachineBasicBlock::iterator First,
383 MachineBasicBlock::iterator Second) {
384 unsigned Opc = First->getOpcode();
385 if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc)
386 return false;
387
388 const auto &FirstOp1 = First->getOperand(i: 1);
389 const auto &SecondOp1 = Second->getOperand(i: 1);
390 const auto &FirstOp2 = First->getOperand(i: 2);
391 const auto &SecondOp2 = Second->getOperand(i: 2);
392
393 // Require simple reg+imm addressing for both.
394 if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() ||
395 !SecondOp2.isImm())
396 return false;
397
398 Register Base1 = FirstOp1.getReg();
399 Register Base2 = SecondOp1.getReg();
400
401 if (Base1 != Base2)
402 return false;
403
404 if (!First->hasOneMemOperand() || !Second->hasOneMemOperand())
405 return false;
406
407 if (!isMemOpAligned(MI&: *First, RequiredAlignment: Align(4)) || !isMemOpAligned(MI&: *Second, RequiredAlignment: Align(4)))
408 return false;
409
410 auto &FirstOp0 = First->getOperand(i: 0);
411 auto &SecondOp0 = Second->getOperand(i: 0);
412
413 int64_t Off1 = FirstOp2.getImm();
414 int64_t Off2 = SecondOp2.getImm();
415
416 if (Off2 < Off1) {
417 std::swap(a&: FirstOp0, b&: SecondOp0);
418 std::swap(a&: Off1, b&: Off2);
419 }
420
421 if (!isShiftedUInt<5, 2>(x: Off1) || (Off2 - Off1 != 4))
422 return false;
423
424 Register StartReg = FirstOp0.getReg();
425 Register NextReg = SecondOp0.getReg();
426
427 unsigned XqciOpc;
428 RegState StartRegState;
429 RegState NextRegState = {};
430 bool AddNextReg = true;
431
432 if (Opc == RISCV::LW) {
433
434 if (StartReg == RISCV::X0)
435 return false;
436
437 // If the base reg gets overwritten by one of the loads bail out.
438 if (StartReg == Base1 || NextReg == Base1)
439 return false;
440
441 // The registers need to be consecutive.
442 if (NextReg != StartReg + 1)
443 return false;
444
445 XqciOpc = RISCV::QC_LWMI;
446 StartRegState = RegState::Define;
447 NextRegState = RegState::ImplicitDefine;
448 } else {
449 assert(Opc == RISCV::SW && "Expected a SW instruction");
450 if (StartReg == NextReg) {
451 XqciOpc = RISCV::QC_SETWMI;
452 StartRegState = getKillRegState(B: FirstOp0.isKill() || SecondOp0.isKill());
453 AddNextReg = false;
454 } else if (NextReg == StartReg + 1 && StartReg != RISCV::X0) {
455 XqciOpc = RISCV::QC_SWMI;
456 StartRegState = getKillRegState(B: FirstOp0.isKill());
457 NextRegState = RegState::Implicit | getKillRegState(B: SecondOp0.isKill());
458 } else {
459 return false;
460 }
461 }
462
463 DebugLoc DL =
464 First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc();
465 MachineInstrBuilder MIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: XqciOpc));
466 MIB.addReg(RegNo: StartReg, Flags: StartRegState)
467 .addReg(RegNo: Base1, Flags: getKillRegState(B: FirstOp1.isKill() || SecondOp1.isKill()))
468 .addImm(Val: 2)
469 .addImm(Val: Off1)
470 .cloneMergedMemRefs(OtherMIs: {&*First, &*Second});
471
472 if (AddNextReg)
473 MIB.addReg(RegNo: NextReg, Flags: NextRegState);
474
475 First->getParent()->insert(I: First, MI: MIB);
476 First->removeFromParent();
477 Second->removeFromParent();
478
479 return true;
480}
481
482bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair(
483 MachineFunction *MF, MachineBasicBlock::iterator First,
484 MachineBasicBlock::iterator Second) {
485 // Try converting to SWP/LWP/LDP/SDP.
486 // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment.
487 unsigned PairOpc;
488 Align RequiredAlignment;
489 switch (First->getOpcode()) {
490 default:
491 llvm_unreachable("Unsupported load/store instruction for pairing");
492 case RISCV::SW:
493 PairOpc = RISCV::MIPS_SWP;
494 RequiredAlignment = Align(8);
495 break;
496 case RISCV::LW:
497 PairOpc = RISCV::MIPS_LWP;
498 RequiredAlignment = Align(8);
499 break;
500 case RISCV::SD:
501 PairOpc = RISCV::MIPS_SDP;
502 RequiredAlignment = Align(16);
503 break;
504 case RISCV::LD:
505 PairOpc = RISCV::MIPS_LDP;
506 RequiredAlignment = Align(16);
507 break;
508 }
509
510 if (!First->hasOneMemOperand())
511 return false;
512
513 if (!isMemOpAligned(MI&: *First, RequiredAlignment))
514 return false;
515
516 int64_t Offset = First->getOperand(i: 2).getImm();
517 if (!isUInt<7>(x: Offset))
518 return false;
519
520 MachineInstrBuilder MIB = BuildMI(
521 MF&: *MF, MIMD: First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
522 MCID: TII->get(Opcode: PairOpc));
523 MIB.add(MO: First->getOperand(i: 0))
524 .add(MO: Second->getOperand(i: 0))
525 .add(MO: First->getOperand(i: 1))
526 .add(MO: First->getOperand(i: 2))
527 .cloneMergedMemRefs(OtherMIs: {&*First, &*Second});
528
529 First->getParent()->insert(I: First, MI: MIB);
530
531 First->removeFromParent();
532 Second->removeFromParent();
533
534 return true;
535}
536
537// Merge two adjacent load/store instructions into a paired instruction.
538// This function calls the vendor specific implementation that seelects the
539// appropriate paired opcode, verifies that the memory operand is properly
540// aligned, and checks that the offset is valid. If all conditions are met, it
541// builds and inserts the paired instruction.
542bool RISCVLoadStoreOpt::tryConvertToLdStPair(
543 MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
544 MachineFunction *MF = First->getMF();
545
546 // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled.
547 if (!STI->is64Bit() && STI->hasVendorXqcilsm())
548 return tryConvertToXqcilsmLdStPair(MF, First, Second);
549
550 // Else try to convert them into MIPS Paired Loads/Stores.
551 return tryConvertToMIPSLdStPair(MF, First, Second);
552}
553
554static bool mayAlias(MachineInstr &MIa,
555 SmallVectorImpl<MachineInstr *> &MemInsns,
556 AliasAnalysis *AA) {
557 for (MachineInstr *MIb : MemInsns)
558 if (MIa.mayAlias(AA, Other: *MIb, /*UseTBAA*/ false))
559 return true;
560
561 return false;
562}
563
564// Scan the instructions looking for a load/store that can be combined with the
565// current instruction into a wider equivalent or a load/store pair.
566// TODO: Extend pairing logic to consider reordering both instructions
567// to a safe "middle" position rather than only merging forward/backward.
568// This requires more sophisticated checks for aliasing, register
569// liveness, and potential scheduling hazards.
570MachineBasicBlock::iterator
571RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
572 bool &MergeForward) {
573 MachineBasicBlock::iterator E = I->getParent()->end();
574 MachineBasicBlock::iterator MBBI = I;
575 MachineInstr &FirstMI = *I;
576 MBBI = next_nodbg(It: MBBI, End: E);
577
578 bool MayLoad = FirstMI.mayLoad();
579 Register Reg = FirstMI.getOperand(i: 0).getReg();
580 Register BaseReg = FirstMI.getOperand(i: 1).getReg();
581 int64_t Offset = FirstMI.getOperand(i: 2).getImm();
582 int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
583
584 MergeForward = false;
585
586 // Track which register units have been modified and used between the first
587 // insn (inclusive) and the second insn.
588 ModifiedRegUnits.clear();
589 UsedRegUnits.clear();
590
591 // Remember any instructions that read/write memory between FirstMI and MI.
592 SmallVector<MachineInstr *, 4> MemInsns;
593
594 for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
595 MBBI = next_nodbg(It: MBBI, End: E)) {
596 MachineInstr &MI = *MBBI;
597
598 // Don't count transient instructions towards the search limit since there
599 // may be different numbers of them if e.g. debug information is present.
600 if (!MI.isTransient())
601 ++Count;
602
603 if (MI.getOpcode() == FirstMI.getOpcode() &&
604 TII->isLdStSafeToPair(LdSt: MI, TRI)) {
605 Register MIBaseReg = MI.getOperand(i: 1).getReg();
606 int64_t MIOffset = MI.getOperand(i: 2).getImm();
607
608 if (BaseReg == MIBaseReg) {
609 if ((Offset != MIOffset + OffsetStride) &&
610 (Offset + OffsetStride != MIOffset)) {
611 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
612 TRI);
613 MemInsns.push_back(Elt: &MI);
614 continue;
615 }
616
617 // If the destination register of one load is the same register or a
618 // sub/super register of the other load, bail and keep looking.
619 if (MayLoad &&
620 TRI->isSuperOrSubRegisterEq(RegA: Reg, RegB: MI.getOperand(i: 0).getReg())) {
621 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
622 TRI);
623 MemInsns.push_back(Elt: &MI);
624 continue;
625 }
626
627 // If the BaseReg has been modified, then we cannot do the optimization.
628 if (!ModifiedRegUnits.available(Reg: BaseReg))
629 return E;
630
631 // If the Rt of the second instruction was not modified or used between
632 // the two instructions and none of the instructions between the second
633 // and first alias with the second, we can combine the second into the
634 // first.
635 if (ModifiedRegUnits.available(Reg: MI.getOperand(i: 0).getReg()) &&
636 !(MI.mayLoad() &&
637 !UsedRegUnits.available(Reg: MI.getOperand(i: 0).getReg())) &&
638 !mayAlias(MIa&: MI, MemInsns, AA)) {
639
640 MergeForward = false;
641 return MBBI;
642 }
643
644 // Likewise, if the Rt of the first instruction is not modified or used
645 // between the two instructions and none of the instructions between the
646 // first and the second alias with the first, we can combine the first
647 // into the second.
648 if (!(MayLoad &&
649 !UsedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) &&
650 !mayAlias(MIa&: FirstMI, MemInsns, AA)) {
651
652 if (ModifiedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) {
653 MergeForward = true;
654 return MBBI;
655 }
656 }
657 // Unable to combine these instructions due to interference in between.
658 // Keep looking.
659 }
660 }
661
662 // If the instruction wasn't a matching load or store. Stop searching if we
663 // encounter a call instruction that might modify memory.
664 if (MI.isCall())
665 return E;
666
667 // Update modified / uses register units.
668 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
669
670 // Otherwise, if the base register is modified, we have no match, so
671 // return early.
672 if (!ModifiedRegUnits.available(Reg: BaseReg))
673 return E;
674
675 // Update list of instructions that read/write memory.
676 if (MI.mayLoadOrStore())
677 MemInsns.push_back(Elt: &MI);
678 }
679 return E;
680}
681
682MachineBasicBlock::iterator
683RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
684 MachineBasicBlock::iterator Paired,
685 bool MergeForward) {
686 MachineBasicBlock::iterator E = I->getParent()->end();
687 MachineBasicBlock::iterator NextI = next_nodbg(It: I, End: E);
688 // If NextI is the second of the two instructions to be merged, skip one
689 // further for now. For the MIPS load/store, the merge will invalidate the
690 // iterator, and we don't need to scan the new instruction, as it's a pairwise
691 // instruction, which we're not considering for further action anyway. For the
692 // Xqcilsm load/store, we may not want to do this as the second instruction
693 // could possibly be the first in another pair if we do not merge here. This
694 // is handled in the else block after the call to tryConvertToLdStPair below.
695 if (NextI == Paired)
696 NextI = next_nodbg(It: NextI, End: E);
697
698 // Insert our new paired instruction after whichever of the paired
699 // instructions MergeForward indicates.
700 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
701 MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
702 int Offset = I->getOperand(i: 2).getImm();
703 int PairedOffset = Paired->getOperand(i: 2).getImm();
704 bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
705
706 if (!MergeForward)
707 Paired->getOperand(i: 1).setIsKill(false);
708
709 // Kill flags may become invalid when moving stores for pairing.
710 if (I->getOperand(i: 0).isUse()) {
711 if (!MergeForward) {
712 // Check if the Paired store's source register has a kill flag and clear
713 // it only if there are intermediate uses between I and Paired.
714 MachineOperand &PairedRegOp = Paired->getOperand(i: 0);
715 if (PairedRegOp.isKill()) {
716 for (auto It = std::next(x: I); It != Paired; ++It) {
717 if (It->readsRegister(Reg: PairedRegOp.getReg(), TRI)) {
718 PairedRegOp.setIsKill(false);
719 break;
720 }
721 }
722 }
723 } else {
724 // Clear kill flags of the first store's register in the forward
725 // direction.
726 Register Reg = I->getOperand(i: 0).getReg();
727 for (MachineInstr &MI : make_range(x: std::next(x: I), y: std::next(x: Paired)))
728 MI.clearRegisterKills(Reg, RegInfo: TRI);
729 }
730 }
731
732 MachineInstr *ToInsert = DeletionPoint->removeFromParent();
733 MachineBasicBlock &MBB = *InsertionPoint->getParent();
734 MachineBasicBlock::iterator First, Second;
735
736 if (!InsertAfter) {
737 First = MBB.insert(I: InsertionPoint, MI: ToInsert);
738 Second = InsertionPoint;
739 } else {
740 Second = MBB.insertAfter(I: InsertionPoint, MI: ToInsert);
741 First = InsertionPoint;
742 }
743
744 if (tryConvertToLdStPair(First, Second)) {
745 LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
746 LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
747 } else if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
748 // We were unable to form the pair, so use the next non-debug instruction
749 // after the first instruction we had wanted to merge.
750 NextI = next_nodbg(It: I, End: E);
751 }
752
753 return NextI;
754}
755
756//===----------------------------------------------------------------------===//
757// Post reg-alloc zilsd pass implementation
758//===----------------------------------------------------------------------===//
759
760bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
761 // Special case: First register can not be zero unless both registers are
762 // zeros.
763 // Spec says: LD instructions with destination x0 are processed as any other
764 // load, but the result is discarded entirely and x1 is not written. If using
765 // x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
766 // not accessed.
767 if (First == RISCV::X0)
768 return Second == RISCV::X0;
769
770 // Check if registers form a valid even/odd pair for Zilsd
771 unsigned FirstNum = TRI->getEncodingValue(Reg: First);
772 unsigned SecondNum = TRI->getEncodingValue(Reg: Second);
773
774 // Must be consecutive and first must be even
775 return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
776}
777
778void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
779 MachineBasicBlock::iterator &MBBI,
780 bool IsLoad) {
781 MachineInstr *MI = &*MBBI;
782 DebugLoc DL = MI->getDebugLoc();
783
784 const MachineOperand &FirstOp = MI->getOperand(i: 0);
785 const MachineOperand &SecondOp = MI->getOperand(i: 1);
786 const MachineOperand &BaseOp = MI->getOperand(i: 2);
787 Register FirstReg = FirstOp.getReg();
788 Register SecondReg = SecondOp.getReg();
789 Register BaseReg = BaseOp.getReg();
790
791 // Handle both immediate and symbolic operands for offset
792 const MachineOperand &OffsetOp = MI->getOperand(i: 3);
793 int BaseOffset;
794 if (OffsetOp.isImm())
795 BaseOffset = OffsetOp.getImm();
796 else
797 // For symbolic operands, extract the embedded offset
798 BaseOffset = OffsetOp.getOffset();
799
800 unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
801 MachineInstrBuilder MIB1, MIB2;
802
803 // Create two separate instructions
804 if (IsLoad) {
805 // It's possible that first register is same as base register, when we split
806 // it becomes incorrect because base register is overwritten, e.g.
807 // X10, X13 = PseudoLD_RV32_OPT killed X10, 0
808 // =>
809 // X10 = LW X10, 0
810 // X13 = LW killed X10, 4
811 // we can just switch the order to resolve that:
812 // X13 = LW X10, 4
813 // X10 = LW killed X10, 0
814 if (FirstReg == BaseReg) {
815 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
816 .addReg(RegNo: SecondReg,
817 Flags: RegState::Define | getDeadRegState(B: SecondOp.isDead()))
818 .addReg(RegNo: BaseReg);
819 MIB1 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
820 .addReg(RegNo: FirstReg,
821 Flags: RegState::Define | getDeadRegState(B: FirstOp.isDead()))
822 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: BaseOp.isKill()));
823
824 } else {
825 MIB1 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
826 .addReg(RegNo: FirstReg,
827 Flags: RegState::Define | getDeadRegState(B: FirstOp.isDead()))
828 .addReg(RegNo: BaseReg);
829
830 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
831 .addReg(RegNo: SecondReg,
832 Flags: RegState::Define | getDeadRegState(B: SecondOp.isDead()))
833 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: BaseOp.isKill()));
834 }
835
836 ++NumLD2LW;
837 LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
838 } else {
839 assert(
840 FirstReg != SecondReg &&
841 "First register and second register is impossible to be same register");
842 MIB1 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
843 .addReg(RegNo: FirstReg, Flags: getKillRegState(B: FirstOp.isKill()))
844 .addReg(RegNo: BaseReg);
845
846 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc))
847 .addReg(RegNo: SecondReg, Flags: getKillRegState(B: SecondOp.isKill()))
848 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: BaseOp.isKill()));
849
850 ++NumSD2SW;
851 LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
852 }
853
854 // Add offset operands - preserve symbolic references
855 MIB1.add(MO: OffsetOp);
856 if (OffsetOp.isImm())
857 MIB2.addImm(Val: BaseOffset + 4);
858 else if (OffsetOp.isGlobal())
859 MIB2.addGlobalAddress(GV: OffsetOp.getGlobal(), Offset: BaseOffset + 4,
860 TargetFlags: OffsetOp.getTargetFlags());
861 else if (OffsetOp.isCPI())
862 MIB2.addConstantPoolIndex(Idx: OffsetOp.getIndex(), Offset: BaseOffset + 4,
863 TargetFlags: OffsetOp.getTargetFlags());
864 else if (OffsetOp.isBlockAddress())
865 MIB2.addBlockAddress(BA: OffsetOp.getBlockAddress(), Offset: BaseOffset + 4,
866 TargetFlags: OffsetOp.getTargetFlags());
867
868 // Copy memory operands if the original instruction had them
869 // FIXME: This is overly conservative; the new instruction accesses 4 bytes,
870 // not 8.
871 MIB1.cloneMemRefs(OtherMI: *MI);
872 MIB2.cloneMemRefs(OtherMI: *MI);
873
874 // Remove the original paired instruction and update iterator
875 MBBI = MBB.erase(I: MBBI);
876}
877
878bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
879 MachineBasicBlock::iterator &MBBI) {
880 MachineInstr *MI = &*MBBI;
881 unsigned Opcode = MI->getOpcode();
882
883 // Check if this is a Zilsd pseudo that needs fixing
884 if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
885 return false;
886
887 bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
888
889 const MachineOperand &FirstOp = MI->getOperand(i: 0);
890 const MachineOperand &SecondOp = MI->getOperand(i: 1);
891 Register FirstReg = FirstOp.getReg();
892 Register SecondReg = SecondOp.getReg();
893
894 if (!isValidZilsdRegPair(First: FirstReg, Second: SecondReg)) {
895 // Need to split back into two instructions
896 splitLdSdIntoTwo(MBB, MBBI, IsLoad);
897 return true;
898 }
899
900 // Registers are valid, convert to real LD/SD instruction
901 const MachineOperand &BaseOp = MI->getOperand(i: 2);
902 Register BaseReg = BaseOp.getReg();
903 DebugLoc DL = MI->getDebugLoc();
904 // Handle both immediate and symbolic operands for offset
905 const MachineOperand &OffsetOp = MI->getOperand(i: 3);
906
907 unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
908
909 // Create register pair from the two individual registers
910 unsigned RegPair = TRI->getMatchingSuperReg(Reg: FirstReg, SubIdx: RISCV::sub_gpr_even,
911 RC: &RISCV::GPRPairRegClass);
912 // Create the real LD/SD instruction with register pair
913 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RealOpc));
914
915 if (IsLoad) {
916 // For LD, the register pair is the destination
917 MIB.addReg(RegNo: RegPair, Flags: RegState::Define | getDeadRegState(B: FirstOp.isDead() &&
918 SecondOp.isDead()));
919 } else {
920 // For SD, the register pair is the source
921 MIB.addReg(RegNo: RegPair, Flags: getKillRegState(B: FirstOp.isKill() && SecondOp.isKill()));
922 }
923
924 MIB.addReg(RegNo: BaseReg, Flags: getKillRegState(B: BaseOp.isKill()))
925 .add(MO: OffsetOp)
926 .cloneMemRefs(OtherMI: *MI);
927
928 LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
929 << "\n");
930
931 // Remove the pseudo instruction and update iterator
932 MBBI = MBB.erase(I: MBBI);
933
934 return true;
935}
936
937// Returns an instance of the Load / Store Optimization pass.
938FunctionPass *llvm::createRISCVLoadStoreOptPass() {
939 return new RISCVLoadStoreOpt();
940}
941