| 1 | //===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Load/Store Pairing: It identifies pairs of load or store instructions |
| 10 | // operating on consecutive memory locations and merges them into a single |
| 11 | // paired instruction, leveraging hardware support for paired memory accesses. |
| 12 | // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. |
| 13 | // |
| 14 | // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as |
| 15 | // merging zero store instructions, promoting loads that read directly from a |
| 16 | // preceding store, and merging base register updates with load/store |
| 17 | // instructions (via pre-/post-indexed addressing). These advanced |
| 18 | // transformations are not yet implemented in the RISC-V pass but represent |
| 19 | // potential future enhancements for further optimizing RISC-V memory |
| 20 | // operations. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "RISCV.h" |
| 25 | #include "RISCVTargetMachine.h" |
| 26 | #include "llvm/Analysis/AliasAnalysis.h" |
| 27 | #include "llvm/CodeGen/Passes.h" |
| 28 | #include "llvm/MC/TargetRegistry.h" |
| 29 | #include "llvm/Support/Debug.h" |
| 30 | #include "llvm/Target/TargetOptions.h" |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | #define DEBUG_TYPE "riscv-load-store-opt" |
| 35 | #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" |
| 36 | |
| 37 | // The LdStLimit limits number of instructions how far we search for load/store |
| 38 | // pairs. |
| 39 | static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit" , cl::init(Val: 128), |
| 40 | cl::Hidden); |
| 41 | |
| 42 | namespace { |
| 43 | |
| 44 | struct RISCVLoadStoreOpt : public MachineFunctionPass { |
| 45 | static char ID; |
| 46 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 47 | |
| 48 | RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} |
| 49 | |
| 50 | MachineFunctionProperties getRequiredProperties() const override { |
| 51 | return MachineFunctionProperties().setNoVRegs(); |
| 52 | } |
| 53 | |
| 54 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 55 | AU.addRequired<AAResultsWrapperPass>(); |
| 56 | MachineFunctionPass::getAnalysisUsage(AU); |
| 57 | } |
| 58 | |
| 59 | StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } |
| 60 | |
| 61 | // Find and pair load/store instructions. |
| 62 | bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); |
| 63 | |
| 64 | // Convert load/store pairs to single instructions. |
| 65 | bool tryConvertToLdStPair(MachineBasicBlock::iterator First, |
| 66 | MachineBasicBlock::iterator Second); |
| 67 | |
| 68 | // Scan the instructions looking for a load/store that can be combined |
| 69 | // with the current instruction into a load/store pair. |
| 70 | // Return the matching instruction if one is found, else MBB->end(). |
| 71 | MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, |
| 72 | bool &MergeForward); |
| 73 | |
| 74 | MachineBasicBlock::iterator |
| 75 | mergePairedInsns(MachineBasicBlock::iterator I, |
| 76 | MachineBasicBlock::iterator Paired, bool MergeForward); |
| 77 | |
| 78 | private: |
| 79 | AliasAnalysis *AA; |
| 80 | MachineRegisterInfo *MRI; |
| 81 | const RISCVInstrInfo *TII; |
| 82 | const RISCVRegisterInfo *TRI; |
| 83 | LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
| 84 | }; |
| 85 | } // end anonymous namespace |
| 86 | |
| 87 | char RISCVLoadStoreOpt::ID = 0; |
| 88 | INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, |
| 89 | false) |
| 90 | |
| 91 | bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { |
| 92 | if (skipFunction(F: Fn.getFunction())) |
| 93 | return false; |
| 94 | const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); |
| 95 | if (!Subtarget.useLoadStorePairs()) |
| 96 | return false; |
| 97 | |
| 98 | bool MadeChange = false; |
| 99 | TII = Subtarget.getInstrInfo(); |
| 100 | TRI = Subtarget.getRegisterInfo(); |
| 101 | MRI = &Fn.getRegInfo(); |
| 102 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
| 103 | ModifiedRegUnits.init(TRI: *TRI); |
| 104 | UsedRegUnits.init(TRI: *TRI); |
| 105 | |
| 106 | for (MachineBasicBlock &MBB : Fn) { |
| 107 | LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n" ); |
| 108 | |
| 109 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
| 110 | MBBI != E;) { |
| 111 | if (TII->isPairableLdStInstOpc(Opc: MBBI->getOpcode()) && |
| 112 | tryToPairLdStInst(MBBI)) |
| 113 | MadeChange = true; |
| 114 | else |
| 115 | ++MBBI; |
| 116 | } |
| 117 | } |
| 118 | return MadeChange; |
| 119 | } |
| 120 | |
| 121 | // Find loads and stores that can be merged into a single load or store pair |
| 122 | // instruction. |
| 123 | bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { |
| 124 | MachineInstr &MI = *MBBI; |
| 125 | |
| 126 | // If this is volatile, it is not a candidate. |
| 127 | if (MI.hasOrderedMemoryRef()) |
| 128 | return false; |
| 129 | |
| 130 | if (!TII->isLdStSafeToPair(LdSt: MI, TRI)) |
| 131 | return false; |
| 132 | |
| 133 | // Look ahead for a pairable instruction. |
| 134 | MachineBasicBlock::iterator E = MI.getParent()->end(); |
| 135 | bool MergeForward; |
| 136 | MachineBasicBlock::iterator Paired = findMatchingInsn(I: MBBI, MergeForward); |
| 137 | if (Paired != E) { |
| 138 | MBBI = mergePairedInsns(I: MBBI, Paired, MergeForward); |
| 139 | return true; |
| 140 | } |
| 141 | return false; |
| 142 | } |
| 143 | |
| 144 | // Merge two adjacent load/store instructions into a paired instruction |
| 145 | // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of |
| 146 | // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the |
| 147 | // appropriate paired opcode, verifies that the memory operand is properly |
| 148 | // aligned, and checks that the offset is valid. If all conditions are met, it |
| 149 | // builds and inserts the paired instruction. |
| 150 | bool RISCVLoadStoreOpt::tryConvertToLdStPair( |
| 151 | MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { |
| 152 | unsigned PairOpc; |
| 153 | Align RequiredAlignment; |
| 154 | switch (First->getOpcode()) { |
| 155 | default: |
| 156 | llvm_unreachable("Unsupported load/store instruction for pairing" ); |
| 157 | case RISCV::SW: |
| 158 | PairOpc = RISCV::MIPS_SWP; |
| 159 | RequiredAlignment = Align(8); |
| 160 | break; |
| 161 | case RISCV::LW: |
| 162 | PairOpc = RISCV::MIPS_LWP; |
| 163 | RequiredAlignment = Align(8); |
| 164 | break; |
| 165 | case RISCV::SD: |
| 166 | PairOpc = RISCV::MIPS_SDP; |
| 167 | RequiredAlignment = Align(16); |
| 168 | break; |
| 169 | case RISCV::LD: |
| 170 | PairOpc = RISCV::MIPS_LDP; |
| 171 | RequiredAlignment = Align(16); |
| 172 | break; |
| 173 | } |
| 174 | |
| 175 | MachineFunction *MF = First->getMF(); |
| 176 | const MachineMemOperand *MMO = *First->memoperands_begin(); |
| 177 | Align MMOAlign = MMO->getAlign(); |
| 178 | |
| 179 | if (MMOAlign < RequiredAlignment) |
| 180 | return false; |
| 181 | |
| 182 | int64_t Offset = First->getOperand(i: 2).getImm(); |
| 183 | if (!isUInt<7>(x: Offset)) |
| 184 | return false; |
| 185 | |
| 186 | MachineInstrBuilder MIB = BuildMI( |
| 187 | MF&: *MF, MIMD: First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(), |
| 188 | MCID: TII->get(Opcode: PairOpc)); |
| 189 | MIB.add(MO: First->getOperand(i: 0)) |
| 190 | .add(MO: Second->getOperand(i: 0)) |
| 191 | .add(MO: First->getOperand(i: 1)) |
| 192 | .add(MO: First->getOperand(i: 2)) |
| 193 | .cloneMergedMemRefs(OtherMIs: {&*First, &*Second}); |
| 194 | |
| 195 | First->getParent()->insert(I: First, MI: MIB); |
| 196 | |
| 197 | First->removeFromParent(); |
| 198 | Second->removeFromParent(); |
| 199 | |
| 200 | return true; |
| 201 | } |
| 202 | |
| 203 | static bool mayAlias(MachineInstr &MIa, |
| 204 | SmallVectorImpl<MachineInstr *> &MemInsns, |
| 205 | AliasAnalysis *AA) { |
| 206 | for (MachineInstr *MIb : MemInsns) |
| 207 | if (MIa.mayAlias(AA, Other: *MIb, /*UseTBAA*/ false)) |
| 208 | return true; |
| 209 | |
| 210 | return false; |
| 211 | } |
| 212 | |
| 213 | // Scan the instructions looking for a load/store that can be combined with the |
| 214 | // current instruction into a wider equivalent or a load/store pair. |
| 215 | // TODO: Extend pairing logic to consider reordering both instructions |
| 216 | // to a safe "middle" position rather than only merging forward/backward. |
| 217 | // This requires more sophisticated checks for aliasing, register |
| 218 | // liveness, and potential scheduling hazards. |
| 219 | MachineBasicBlock::iterator |
| 220 | RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, |
| 221 | bool &MergeForward) { |
| 222 | MachineBasicBlock::iterator E = I->getParent()->end(); |
| 223 | MachineBasicBlock::iterator MBBI = I; |
| 224 | MachineInstr &FirstMI = *I; |
| 225 | MBBI = next_nodbg(It: MBBI, End: E); |
| 226 | |
| 227 | bool MayLoad = FirstMI.mayLoad(); |
| 228 | Register Reg = FirstMI.getOperand(i: 0).getReg(); |
| 229 | Register BaseReg = FirstMI.getOperand(i: 1).getReg(); |
| 230 | int64_t Offset = FirstMI.getOperand(i: 2).getImm(); |
| 231 | int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); |
| 232 | |
| 233 | MergeForward = false; |
| 234 | |
| 235 | // Track which register units have been modified and used between the first |
| 236 | // insn (inclusive) and the second insn. |
| 237 | ModifiedRegUnits.clear(); |
| 238 | UsedRegUnits.clear(); |
| 239 | |
| 240 | // Remember any instructions that read/write memory between FirstMI and MI. |
| 241 | SmallVector<MachineInstr *, 4> MemInsns; |
| 242 | |
| 243 | for (unsigned Count = 0; MBBI != E && Count < LdStLimit; |
| 244 | MBBI = next_nodbg(It: MBBI, End: E)) { |
| 245 | MachineInstr &MI = *MBBI; |
| 246 | |
| 247 | // Don't count transient instructions towards the search limit since there |
| 248 | // may be different numbers of them if e.g. debug information is present. |
| 249 | if (!MI.isTransient()) |
| 250 | ++Count; |
| 251 | |
| 252 | if (MI.getOpcode() == FirstMI.getOpcode() && |
| 253 | TII->isLdStSafeToPair(LdSt: MI, TRI)) { |
| 254 | Register MIBaseReg = MI.getOperand(i: 1).getReg(); |
| 255 | int64_t MIOffset = MI.getOperand(i: 2).getImm(); |
| 256 | |
| 257 | if (BaseReg == MIBaseReg) { |
| 258 | if ((Offset != MIOffset + OffsetStride) && |
| 259 | (Offset + OffsetStride != MIOffset)) { |
| 260 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 261 | TRI); |
| 262 | MemInsns.push_back(Elt: &MI); |
| 263 | continue; |
| 264 | } |
| 265 | |
| 266 | // If the destination register of one load is the same register or a |
| 267 | // sub/super register of the other load, bail and keep looking. |
| 268 | if (MayLoad && |
| 269 | TRI->isSuperOrSubRegisterEq(RegA: Reg, RegB: MI.getOperand(i: 0).getReg())) { |
| 270 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| 271 | TRI); |
| 272 | MemInsns.push_back(Elt: &MI); |
| 273 | continue; |
| 274 | } |
| 275 | |
| 276 | // If the BaseReg has been modified, then we cannot do the optimization. |
| 277 | if (!ModifiedRegUnits.available(Reg: BaseReg)) |
| 278 | return E; |
| 279 | |
| 280 | // If the Rt of the second instruction was not modified or used between |
| 281 | // the two instructions and none of the instructions between the second |
| 282 | // and first alias with the second, we can combine the second into the |
| 283 | // first. |
| 284 | if (ModifiedRegUnits.available(Reg: MI.getOperand(i: 0).getReg()) && |
| 285 | !(MI.mayLoad() && |
| 286 | !UsedRegUnits.available(Reg: MI.getOperand(i: 0).getReg())) && |
| 287 | !mayAlias(MIa&: MI, MemInsns, AA)) { |
| 288 | |
| 289 | MergeForward = false; |
| 290 | return MBBI; |
| 291 | } |
| 292 | |
| 293 | // Likewise, if the Rt of the first instruction is not modified or used |
| 294 | // between the two instructions and none of the instructions between the |
| 295 | // first and the second alias with the first, we can combine the first |
| 296 | // into the second. |
| 297 | if (!(MayLoad && |
| 298 | !UsedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) && |
| 299 | !mayAlias(MIa&: FirstMI, MemInsns, AA)) { |
| 300 | |
| 301 | if (ModifiedRegUnits.available(Reg: FirstMI.getOperand(i: 0).getReg())) { |
| 302 | MergeForward = true; |
| 303 | return MBBI; |
| 304 | } |
| 305 | } |
| 306 | // Unable to combine these instructions due to interference in between. |
| 307 | // Keep looking. |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | // If the instruction wasn't a matching load or store. Stop searching if we |
| 312 | // encounter a call instruction that might modify memory. |
| 313 | if (MI.isCall()) |
| 314 | return E; |
| 315 | |
| 316 | // Update modified / uses register units. |
| 317 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
| 318 | |
| 319 | // Otherwise, if the base register is modified, we have no match, so |
| 320 | // return early. |
| 321 | if (!ModifiedRegUnits.available(Reg: BaseReg)) |
| 322 | return E; |
| 323 | |
| 324 | // Update list of instructions that read/write memory. |
| 325 | if (MI.mayLoadOrStore()) |
| 326 | MemInsns.push_back(Elt: &MI); |
| 327 | } |
| 328 | return E; |
| 329 | } |
| 330 | |
| 331 | MachineBasicBlock::iterator |
| 332 | RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, |
| 333 | MachineBasicBlock::iterator Paired, |
| 334 | bool MergeForward) { |
| 335 | MachineBasicBlock::iterator E = I->getParent()->end(); |
| 336 | MachineBasicBlock::iterator NextI = next_nodbg(It: I, End: E); |
| 337 | // If NextI is the second of the two instructions to be merged, we need |
| 338 | // to skip one further. Either way we merge will invalidate the iterator, |
| 339 | // and we don't need to scan the new instruction, as it's a pairwise |
| 340 | // instruction, which we're not considering for further action anyway. |
| 341 | if (NextI == Paired) |
| 342 | NextI = next_nodbg(It: NextI, End: E); |
| 343 | |
| 344 | // Insert our new paired instruction after whichever of the paired |
| 345 | // instructions MergeForward indicates. |
| 346 | MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; |
| 347 | MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; |
| 348 | int Offset = I->getOperand(i: 2).getImm(); |
| 349 | int PairedOffset = Paired->getOperand(i: 2).getImm(); |
| 350 | bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; |
| 351 | |
| 352 | if (!MergeForward) |
| 353 | Paired->getOperand(i: 1).setIsKill(false); |
| 354 | |
| 355 | // Kill flags may become invalid when moving stores for pairing. |
| 356 | if (I->getOperand(i: 0).isUse()) { |
| 357 | if (!MergeForward) { |
| 358 | // Check if the Paired store's source register has a kill flag and clear |
| 359 | // it only if there are intermediate uses between I and Paired. |
| 360 | MachineOperand &PairedRegOp = Paired->getOperand(i: 0); |
| 361 | if (PairedRegOp.isKill()) { |
| 362 | for (auto It = std::next(x: I); It != Paired; ++It) { |
| 363 | if (It->readsRegister(Reg: PairedRegOp.getReg(), TRI)) { |
| 364 | PairedRegOp.setIsKill(false); |
| 365 | break; |
| 366 | } |
| 367 | } |
| 368 | } |
| 369 | } else { |
| 370 | // Clear kill flags of the first store's register in the forward |
| 371 | // direction. |
| 372 | Register Reg = I->getOperand(i: 0).getReg(); |
| 373 | for (MachineInstr &MI : make_range(x: std::next(x: I), y: std::next(x: Paired))) |
| 374 | MI.clearRegisterKills(Reg, RegInfo: TRI); |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | MachineInstr *ToInsert = DeletionPoint->removeFromParent(); |
| 379 | MachineBasicBlock &MBB = *InsertionPoint->getParent(); |
| 380 | MachineBasicBlock::iterator First, Second; |
| 381 | |
| 382 | if (!InsertAfter) { |
| 383 | First = MBB.insert(I: InsertionPoint, MI: ToInsert); |
| 384 | Second = InsertionPoint; |
| 385 | } else { |
| 386 | Second = MBB.insertAfter(I: InsertionPoint, MI: ToInsert); |
| 387 | First = InsertionPoint; |
| 388 | } |
| 389 | |
| 390 | if (tryConvertToLdStPair(First, Second)) { |
| 391 | LLVM_DEBUG(dbgs() << "Pairing load/store:\n " ); |
| 392 | LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); |
| 393 | } |
| 394 | |
| 395 | return NextI; |
| 396 | } |
| 397 | |
| 398 | // Returns an instance of the Load / Store Optimization pass. |
| 399 | FunctionPass *llvm::createRISCVLoadStoreOptPass() { |
| 400 | return new RISCVLoadStoreOpt(); |
| 401 | } |
| 402 | |