1//===-- RISCVZilsdOptimizer.cpp - RISC-V Zilsd Load/Store Optimizer ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load/store optimizations for the
10// RISC-V Zilsd extension. It combines pairs of 32-bit load/store instructions
11// into single 64-bit LD/SD instructions when possible.
12//
13// The pass runs in two phases:
14// 1. Pre-allocation: Reschedules loads/stores to bring consecutive memory
15// accesses closer together and forms LD/SD pairs with register hints.
16// 2. Post-allocation: Fixes invalid LD/SD instructions if register allocation
17// didn't provide suitable consecutive registers.
18//
19// Note: second phase is integrated into RISCVLoadStoreOptimizer
20//
21//===----------------------------------------------------------------------===//
22
23#include "RISCV.h"
24#include "RISCVInstrInfo.h"
25#include "RISCVRegisterInfo.h"
26#include "RISCVSubtarget.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/Analysis/AliasAnalysis.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineDominators.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/InitializePasses.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include <algorithm>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "riscv-zilsd-opt"
46
47STATISTIC(NumLDFormed, "Number of LD instructions formed");
48STATISTIC(NumSDFormed, "Number of SD instructions formed");
49
50static cl::opt<bool>
51 DisableZilsdOpt("disable-riscv-zilsd-opt", cl::Hidden, cl::init(Val: false),
52 cl::desc("Disable Zilsd load/store optimization"));
53
54static cl::opt<unsigned> MaxRescheduleDistance(
55 "riscv-zilsd-max-reschedule-distance", cl::Hidden, cl::init(Val: 10),
56 cl::desc("Maximum distance for rescheduling load/store instructions"));
57
58namespace {
59
60//===----------------------------------------------------------------------===//
61// Pre-allocation Zilsd optimization pass
62//===----------------------------------------------------------------------===//
63class RISCVPreAllocZilsdOpt : public MachineFunctionPass {
64public:
65 static char ID;
66
67 RISCVPreAllocZilsdOpt() : MachineFunctionPass(ID) {}
68
69 bool runOnMachineFunction(MachineFunction &MF) override;
70
71 StringRef getPassName() const override {
72 return "RISC-V pre-allocation Zilsd load/store optimization";
73 }
74
75 MachineFunctionProperties getRequiredProperties() const override {
76 return MachineFunctionProperties().setIsSSA();
77 }
78
79 void getAnalysisUsage(AnalysisUsage &AU) const override {
80 AU.addRequired<AAResultsWrapperPass>();
81 AU.addRequired<MachineDominatorTreeWrapperPass>();
82 AU.setPreservesCFG();
83 MachineFunctionPass::getAnalysisUsage(AU);
84 }
85 enum class MemoryOffsetKind {
86 Imm = 0,
87 Global = 1,
88 CPI = 2,
89 BlockAddr = 3,
90 FrameIdx = 4,
91 Unknown = 5,
92 };
93 using MemOffset = std::pair<MemoryOffsetKind, int>;
94 using BaseRegInfo = std::pair<unsigned, MemoryOffsetKind>;
95
96private:
97 bool isMemoryOp(const MachineInstr &MI);
98 bool rescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
99 bool canFormLdSdPair(MachineInstr *MI0, MachineInstr *MI1);
100 bool rescheduleOps(MachineBasicBlock *MBB,
101 SmallVectorImpl<MachineInstr *> &MIs, BaseRegInfo Base,
102 bool IsLoad,
103 DenseMap<MachineInstr *, unsigned> &MI2LocMap);
104 bool isSafeToMove(MachineInstr *MI, MachineInstr *Target, bool MoveForward);
105 MemOffset getMemoryOpOffset(const MachineInstr &MI);
106
107 const RISCVSubtarget *STI;
108 const RISCVInstrInfo *TII;
109 const RISCVRegisterInfo *TRI;
110 MachineRegisterInfo *MRI;
111 AliasAnalysis *AA;
112 MachineDominatorTree *DT;
113 Align RequiredAlign;
114};
115
116} // end anonymous namespace
117
118char RISCVPreAllocZilsdOpt::ID = 0;
119
120INITIALIZE_PASS_BEGIN(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
121 "RISC-V pre-allocation Zilsd optimization", false, false)
122INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
123INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
124INITIALIZE_PASS_END(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
125 "RISC-V pre-allocation Zilsd optimization", false, false)
126
127//===----------------------------------------------------------------------===//
128// Pre-allocation pass implementation
129//===----------------------------------------------------------------------===//
130
131bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
132
133 if (DisableZilsdOpt || skipFunction(F: MF.getFunction()))
134 return false;
135
136 STI = &MF.getSubtarget<RISCVSubtarget>();
137
138 // Only run on RV32 with Zilsd extension
139 if (STI->is64Bit() || !STI->hasStdExtZilsd())
140 return false;
141
142 TII = STI->getInstrInfo();
143 TRI = STI->getRegisterInfo();
144 MRI = &MF.getRegInfo();
145 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
146 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
147
148 // Check alignment: default is 8-byte, but allow 4-byte with tune feature
149 // If unaligned scalar memory is enabled, allow any alignment
150 RequiredAlign = STI->getZilsdAlign();
151 bool Modified = false;
152 for (auto &MBB : MF) {
153 Modified |= rescheduleLoadStoreInstrs(MBB: &MBB);
154 }
155
156 return Modified;
157}
158
159RISCVPreAllocZilsdOpt::MemOffset
160RISCVPreAllocZilsdOpt::getMemoryOpOffset(const MachineInstr &MI) {
161 switch (MI.getOpcode()) {
162 case RISCV::LW:
163 case RISCV::SW: {
164 // For LW/SW, the base is in operand 1 and offset is in operand 2
165 const MachineOperand &BaseOp = MI.getOperand(i: 1);
166 const MachineOperand &OffsetOp = MI.getOperand(i: 2);
167
168 // Handle immediate offset
169 if (OffsetOp.isImm()) {
170 if (BaseOp.isFI())
171 return std::make_pair(x: MemoryOffsetKind::FrameIdx, y: OffsetOp.getImm());
172 return std::make_pair(x: MemoryOffsetKind::Imm, y: OffsetOp.getImm());
173 }
174
175 // Handle symbolic operands with MO_LO flag (from MergeBaseOffset)
176 if (OffsetOp.getTargetFlags() & RISCVII::MO_LO) {
177 if (OffsetOp.isGlobal())
178 return std::make_pair(x: MemoryOffsetKind::Global, y: OffsetOp.getOffset());
179 if (OffsetOp.isCPI())
180 return std::make_pair(x: MemoryOffsetKind::CPI, y: OffsetOp.getOffset());
181 if (OffsetOp.isBlockAddress())
182 return std::make_pair(x: MemoryOffsetKind::BlockAddr,
183 y: OffsetOp.getOffset());
184 }
185
186 break;
187 }
188 default:
189 break;
190 }
191
192 return std::make_pair(x: MemoryOffsetKind::Unknown, y: 0);
193}
194
195bool RISCVPreAllocZilsdOpt::canFormLdSdPair(MachineInstr *MI0,
196 MachineInstr *MI1) {
197 if (!MI0->hasOneMemOperand() || !MI1->hasOneMemOperand())
198 return false;
199
200 // Get offsets and check they are consecutive
201 int Offset0 = getMemoryOpOffset(MI: *MI0).second;
202 int Offset1 = getMemoryOpOffset(MI: *MI1).second;
203
204 // Offsets must be 4 bytes apart
205 if (Offset1 - Offset0 != 4)
206 return false;
207
208 // We need to guarantee the alignment(base + offset) is legal.
209 const MachineMemOperand *MMO = *MI0->memoperands_begin();
210 if (MMO->getAlign() < RequiredAlign)
211 return false;
212
213 // Check that the two destination/source registers are different for
214 // load/store respectively.
215 // The only case two destinations/sources can be same is (x0, x0). This pass
216 // is run before register coalescer so it will be the form of:
217 // %0 = COPY $x0
218 // SW %0, %ptr
219 // instead of:
220 // SW $x0, %ptr
221 Register FirstReg = MI0->getOperand(i: 0).getReg();
222 Register SecondReg = MI1->getOperand(i: 0).getReg();
223 if (FirstReg == SecondReg) {
224 const MachineInstr *FirstOpDefInst = MRI->getUniqueVRegDef(Reg: FirstReg);
225 if (FirstOpDefInst->isCopy() &&
226 FirstOpDefInst->getOperand(i: 1).getReg() == RISCV::X0)
227 return true;
228 return false;
229 }
230
231 return true;
232}
233
234bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
235 bool MoveForward) {
236 MachineBasicBlock *MBB = MI->getParent();
237 MachineBasicBlock::iterator Start = MI->getIterator();
238 MachineBasicBlock::iterator End = Target->getIterator();
239
240 if (!MoveForward)
241 std::swap(a&: Start, b&: End);
242
243 // Increment Start to skip the current instruction
244 if (Start != MBB->end())
245 ++Start;
246
247 Register DefReg = MI->getOperand(i: 0).getReg();
248 const MachineOperand &BaseOp = MI->getOperand(i: 1);
249
250 unsigned ScanCount = 0;
251 for (auto It = Start; It != End; ++It, ++ScanCount) {
252 // Don't move across calls or terminators
253 if (It->isCall() || It->isTerminator()) {
254 LLVM_DEBUG(dbgs() << "Cannot move across call/terminator: " << *It);
255 return false;
256 }
257
258 // Don't move across instructions that modify memory barrier
259 if (It->hasUnmodeledSideEffects()) {
260 LLVM_DEBUG(dbgs() << "Cannot move across instruction with side effects: "
261 << *It);
262 return false;
263 }
264
265 // Check if the base register is modified
266 if (BaseOp.isReg() && It->modifiesRegister(Reg: BaseOp.getReg(), TRI)) {
267 LLVM_DEBUG(dbgs() << "Base register " << BaseOp.getReg()
268 << " modified by: " << *It);
269 return false;
270 }
271
272 // For loads, check if the loaded value is used
273 if (MI->mayLoad() &&
274 (It->readsRegister(Reg: DefReg, TRI) || It->modifiesRegister(Reg: DefReg, TRI))) {
275 LLVM_DEBUG(dbgs() << "Destination register " << DefReg
276 << " used by: " << *It);
277 return false;
278 }
279
280 // For stores, check if the stored register is modified
281 if (MI->mayStore() && It->modifiesRegister(Reg: DefReg, TRI)) {
282 LLVM_DEBUG(dbgs() << "Source register " << DefReg
283 << " modified by: " << *It);
284 return false;
285 }
286
287 // Check for memory operation interference
288 if (It->mayLoadOrStore() && It->mayAlias(AA, Other: *MI, /*UseTBAA*/ false)) {
289 LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
290 return false;
291 }
292 }
293
294 return true;
295}
296
297bool RISCVPreAllocZilsdOpt::rescheduleOps(
298 MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &MIs,
299 BaseRegInfo Base, bool IsLoad,
300 DenseMap<MachineInstr *, unsigned> &MI2LocMap) {
301 // Sort by offset, at this point it ensure base reg and MemoryOffsetKind are
302 // same, so we just need to simply sort by offset value.
303 llvm::sort(Start: MIs.begin(), End: MIs.end(), Comp: [this](MachineInstr *A, MachineInstr *B) {
304 return getMemoryOpOffset(MI: *A).second < getMemoryOpOffset(MI: *B).second;
305 });
306
307 bool Modified = false;
308
309 // Try to pair consecutive operations
310 for (size_t i = 0; i + 1 < MIs.size(); i++) {
311 MachineInstr *MI0 = MIs[i];
312 MachineInstr *MI1 = MIs[i + 1];
313
314 Register FirstReg = MI0->getOperand(i: 0).getReg();
315 Register SecondReg = MI1->getOperand(i: 0).getReg();
316 const MachineOperand &BaseOp = MI0->getOperand(i: 1);
317 const MachineOperand &OffsetOp = MI0->getOperand(i: 2);
318 assert((BaseOp.isReg() || BaseOp.isFI()) &&
319 "Base register should be register or frame index");
320
321 // At this point, MI0 and MI1 are:
322 // 1. both either LW or SW.
323 // 2. guaranteed to have same memory kind.
324 // 3. guaranteed to have same base register.
325 // 4. already be sorted by offset value.
326 // so we don't have to check these in canFormLdSdPair.
327 if (!canFormLdSdPair(MI0, MI1))
328 continue;
329
330 // Use MI2LocMap to determine which instruction appears later in program
331 // order
332 bool MI1IsLater = MI2LocMap[MI1] > MI2LocMap[MI0];
333
334 // For loads: move later instruction up (backwards) to earlier instruction
335 // For stores: move earlier instruction down (forwards) to later instruction
336 MachineInstr *MoveInstr, *TargetInstr;
337 if (IsLoad) {
338 // For loads: move the later instruction to the earlier one
339 MoveInstr = MI1IsLater ? MI1 : MI0;
340 TargetInstr = MI1IsLater ? MI0 : MI1;
341 } else {
342 // For stores: move the earlier instruction to the later one
343 MoveInstr = MI1IsLater ? MI0 : MI1;
344 TargetInstr = MI1IsLater ? MI1 : MI0;
345 }
346
347 unsigned Distance = MI1IsLater ? MI2LocMap[MI1] - MI2LocMap[MI0]
348 : MI2LocMap[MI0] - MI2LocMap[MI1];
349 if (!isSafeToMove(MI: MoveInstr, Target: TargetInstr, MoveForward: !IsLoad) ||
350 Distance > MaxRescheduleDistance)
351 continue;
352
353 // Move the instruction to the target position
354 MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator();
355 ++InsertPos;
356
357 // If we need to move an instruction, do it now
358 if (MoveInstr != TargetInstr)
359 MBB->splice(Where: InsertPos, Other: MBB, From: MoveInstr->getIterator());
360
361 // Create the paired instruction
362 MachineInstrBuilder MIB;
363 DebugLoc DL = MI0->getDebugLoc();
364
365 if (IsLoad) {
366 MIB = BuildMI(BB&: *MBB, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoLD_RV32_OPT))
367 .addReg(RegNo: FirstReg, Flags: RegState::Define)
368 .addReg(RegNo: SecondReg, Flags: RegState::Define);
369 ++NumLDFormed;
370 LLVM_DEBUG(dbgs() << "Formed LD: " << *MIB << "\n");
371 } else {
372 MIB = BuildMI(BB&: *MBB, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoSD_RV32_OPT))
373 .addReg(RegNo: FirstReg)
374 .addReg(RegNo: SecondReg);
375 ++NumSDFormed;
376 LLVM_DEBUG(dbgs() << "Formed SD: " << *MIB << "\n");
377 }
378
379 if (BaseOp.isReg())
380 MIB = MIB.addReg(RegNo: BaseOp.getReg());
381 else
382 MIB = MIB.addFrameIndex(Idx: BaseOp.getIndex());
383 MIB = MIB.add(MO: OffsetOp);
384
385 // Copy memory operands
386 MIB.cloneMergedMemRefs(OtherMIs: {MI0, MI1});
387
388 // Add register allocation hints for consecutive registers
389 // RISC-V Zilsd requires even/odd register pairs
390 // Only set hints for virtual registers (physical registers already have
391 // encoding)
392 if (FirstReg.isVirtual() && SecondReg.isVirtual()) {
393 // For virtual registers, we can't determine even/odd yet, but we can hint
394 // that they should be allocated as a consecutive pair
395 MRI->setRegAllocationHint(VReg: FirstReg, Type: RISCVRI::RegPairEven, PrefReg: SecondReg);
396 MRI->setRegAllocationHint(VReg: SecondReg, Type: RISCVRI::RegPairOdd, PrefReg: FirstReg);
397 }
398
399 // Remove the original instructions
400 MI0->eraseFromParent();
401 MI1->eraseFromParent();
402
403 Modified = true;
404
405 // Skip the next instruction since we've already processed it
406 i++;
407 }
408
409 return Modified;
410}
411
412bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) {
413 unsigned Opcode = MI.getOpcode();
414 if (Opcode != RISCV::LW && Opcode != RISCV::SW)
415 return false;
416
417 if (!MI.getOperand(i: 1).isReg() && !MI.getOperand(i: 1).isFI())
418 return false;
419
420 // When no memory operands are present, conservatively assume unaligned,
421 // volatile, unfoldable.
422 if (!MI.hasOneMemOperand())
423 return false;
424
425 const MachineMemOperand *MMO = *MI.memoperands_begin();
426
427 if (MMO->isVolatile() || MMO->isAtomic())
428 return false;
429
430 // sw <undef> could probably be eliminated entirely, but for now we just want
431 // to avoid making a mess of it.
432 if (MI.getOperand(i: 0).isReg() && MI.getOperand(i: 0).isUndef())
433 return false;
434
435 // Likewise don't mess with references to undefined addresses.
436 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 1).isUndef())
437 return false;
438
439 return true;
440}
441
442bool RISCVPreAllocZilsdOpt::rescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
443 bool Modified = false;
444
445 // Process the basic block in windows delimited by calls, terminators,
446 // or instructions with duplicate base+offset pairs
447 MachineBasicBlock::iterator MBBI = MBB->begin();
448 MachineBasicBlock::iterator E = MBB->end();
449
450 while (MBBI != E) {
451 // Map from instruction to its location in the current window
452 DenseMap<MachineInstr *, unsigned> MI2LocMap;
453
454 // Map from base register to list of load/store instructions
455 using Base2InstMap = DenseMap<BaseRegInfo, SmallVector<MachineInstr *, 4>>;
456 using BaseVec = SmallVector<BaseRegInfo, 4>;
457 Base2InstMap Base2LdsMap;
458 Base2InstMap Base2StsMap;
459 BaseVec LdBases;
460 BaseVec StBases;
461
462 unsigned Loc = 0;
463
464 // Build the current window of instructions
465 for (; MBBI != E; ++MBBI) {
466 MachineInstr &MI = *MBBI;
467
468 // Stop at barriers (calls and terminators)
469 if (MI.isCall() || MI.isTerminator()) {
470 // Move past the barrier for next iteration
471 ++MBBI;
472 break;
473 }
474
475 // Track instruction location in window
476 if (!MI.isDebugInstr())
477 MI2LocMap[&MI] = ++Loc;
478
479 MemOffset Offset = getMemoryOpOffset(MI);
480 // Skip non-memory operations or it's not a valid memory offset kind.
481 if (!isMemoryOp(MI) || Offset.first == MemoryOffsetKind::Unknown)
482 continue;
483
484 bool IsLd = (MI.getOpcode() == RISCV::LW);
485 const MachineOperand &BaseOp = MI.getOperand(i: 1);
486 unsigned Base;
487 if (BaseOp.isReg())
488 Base = BaseOp.getReg().id();
489 else
490 Base = BaseOp.getIndex();
491 bool StopHere = false;
492
493 // Lambda to find or add base register entries
494 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
495 auto [BI, Inserted] = Base2Ops.try_emplace(Key: {Base, Offset.first});
496 if (Inserted) {
497 // First time seeing this base register
498 BI->second.push_back(Elt: &MI);
499 Bases.push_back(Elt: {Base, Offset.first});
500 return;
501 }
502 // Check if we've seen this exact base+offset before
503 if (any_of(Range&: BI->second, P: [&](const MachineInstr *PrevMI) {
504 return Offset == getMemoryOpOffset(MI: *PrevMI);
505 })) {
506 // Found duplicate base+offset - stop here to process current window
507 StopHere = true;
508 } else {
509 BI->second.push_back(Elt: &MI);
510 }
511 };
512
513 if (IsLd)
514 FindBases(Base2LdsMap, LdBases);
515 else
516 FindBases(Base2StsMap, StBases);
517
518 if (StopHere) {
519 // Found a duplicate (a base+offset combination that's seen earlier).
520 // Backtrack to process the current window.
521 --Loc;
522 break;
523 }
524 }
525
526 // Process the current window - reschedule loads
527 for (auto Base : LdBases) {
528 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
529 if (Lds.size() > 1) {
530 Modified |= rescheduleOps(MBB, MIs&: Lds, Base, IsLoad: true, MI2LocMap);
531 }
532 }
533
534 // Process the current window - reschedule stores
535 for (auto Base : StBases) {
536 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
537 if (Sts.size() > 1) {
538 Modified |= rescheduleOps(MBB, MIs&: Sts, Base, IsLoad: false, MI2LocMap);
539 }
540 }
541 }
542
543 return Modified;
544}
545
546//===----------------------------------------------------------------------===//
547// Pass creation functions
548//===----------------------------------------------------------------------===//
549
550FunctionPass *llvm::createRISCVPreAllocZilsdOptPass() {
551 return new RISCVPreAllocZilsdOpt();
552}
553