| 1 | //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Merge the offset of address calculation into the offset field |
| 10 | // of instructions in a global address lowering sequence. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "RISCV.h" |
| 15 | #include "RISCVTargetMachine.h" |
| 16 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 17 | #include "llvm/CodeGen/Passes.h" |
| 18 | #include "llvm/MC/TargetRegistry.h" |
| 19 | #include "llvm/Support/Debug.h" |
| 20 | #include "llvm/Target/TargetOptions.h" |
| 21 | #include <optional> |
| 22 | using namespace llvm; |
| 23 | |
| 24 | #define DEBUG_TYPE "riscv-merge-base-offset" |
| 25 | #define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset" |
| 26 | namespace { |
| 27 | |
| 28 | class RISCVMergeBaseOffsetOpt : public MachineFunctionPass { |
| 29 | const RISCVSubtarget *ST = nullptr; |
| 30 | MachineRegisterInfo *MRI; |
| 31 | |
| 32 | public: |
| 33 | static char ID; |
| 34 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 35 | bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo); |
| 36 | |
| 37 | bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo); |
| 38 | bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail, |
| 39 | int64_t Offset); |
| 40 | bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 41 | MachineInstr &TailAdd, Register GSReg); |
| 42 | bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 43 | MachineInstr &TailShXAdd, Register GSReg); |
| 44 | |
| 45 | bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo); |
| 46 | bool foldShxaddIntoScaledMemory(MachineInstr &Hi, MachineInstr &Lo); |
| 47 | |
| 48 | RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} |
| 49 | |
| 50 | MachineFunctionProperties getRequiredProperties() const override { |
| 51 | return MachineFunctionProperties().setIsSSA(); |
| 52 | } |
| 53 | |
| 54 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 55 | AU.setPreservesCFG(); |
| 56 | MachineFunctionPass::getAnalysisUsage(AU); |
| 57 | } |
| 58 | |
| 59 | StringRef getPassName() const override { |
| 60 | return RISCV_MERGE_BASE_OFFSET_NAME; |
| 61 | } |
| 62 | }; |
| 63 | } // end anonymous namespace |
| 64 | |
| 65 | char RISCVMergeBaseOffsetOpt::ID = 0; |
| 66 | INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, |
| 67 | RISCV_MERGE_BASE_OFFSET_NAME, false, false) |
| 68 | |
| 69 | // Detect either of the patterns: |
| 70 | // |
| 71 | // 1. (medlow pattern): |
| 72 | // a. lui vreg1, %hi(s) |
| 73 | // addi vreg2, vreg1, %lo(s) |
| 74 | // |
| 75 | // b. qc.e.li vreg1, s |
| 76 | // |
| 77 | // 2. (medany pattern): |
| 78 | // .Lpcrel_hi1: |
| 79 | // auipc vreg1, %pcrel_hi(s) |
| 80 | // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1) |
| 81 | // |
| 82 | // The pattern is only accepted if: |
| 83 | // 1) The first instruction has only one use, which is the ADDI. |
| 84 | // 2) The address operands have the appropriate type, reflecting the |
| 85 | // lowering of a global address or constant pool using medlow or medany. |
| 86 | // 3) The offset value in the Global Address or Constant Pool is 0. |
| 87 | bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, |
| 88 | MachineInstr *&Lo) { |
| 89 | auto HiOpc = Hi.getOpcode(); |
| 90 | if (HiOpc != RISCV::LUI && HiOpc != RISCV::AUIPC && |
| 91 | HiOpc != RISCV::PseudoMovAddr && HiOpc != RISCV::QC_E_LI) |
| 92 | return false; |
| 93 | |
| 94 | const MachineOperand &HiOp1 = Hi.getOperand(i: 1); |
| 95 | unsigned ExpectedFlags = HiOpc == RISCV::AUIPC ? RISCVII::MO_PCREL_HI |
| 96 | : HiOpc == RISCV::QC_E_LI ? RISCVII::MO_None |
| 97 | : RISCVII::MO_HI; |
| 98 | if (HiOp1.getTargetFlags() != ExpectedFlags) |
| 99 | return false; |
| 100 | |
| 101 | if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) || |
| 102 | HiOp1.getOffset() != 0) |
| 103 | return false; |
| 104 | |
| 105 | if (HiOpc == RISCV::PseudoMovAddr || HiOpc == RISCV::QC_E_LI) { |
| 106 | // Most of the code should handle it correctly without modification by |
| 107 | // setting Lo and Hi both point to PseudoMovAddr/QC_E_LI |
| 108 | Lo = &Hi; |
| 109 | } else { |
| 110 | Register HiDestReg = Hi.getOperand(i: 0).getReg(); |
| 111 | if (!MRI->hasOneUse(RegNo: HiDestReg)) |
| 112 | return false; |
| 113 | |
| 114 | Lo = &*MRI->use_instr_begin(RegNo: HiDestReg); |
| 115 | if (Lo->getOpcode() != RISCV::ADDI) |
| 116 | return false; |
| 117 | } |
| 118 | |
| 119 | if (HiOpc != RISCV::QC_E_LI) { |
| 120 | const MachineOperand &LoOp2 = Lo->getOperand(i: 2); |
| 121 | if (HiOpc == RISCV::LUI || HiOpc == RISCV::PseudoMovAddr) { |
| 122 | if (LoOp2.getTargetFlags() != RISCVII::MO_LO || |
| 123 | !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) || |
| 124 | LoOp2.getOffset() != 0) |
| 125 | return false; |
| 126 | } else { |
| 127 | assert(HiOpc == RISCV::AUIPC); |
| 128 | if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO || |
| 129 | LoOp2.getType() != MachineOperand::MO_MCSymbol) |
| 130 | return false; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | if (HiOp1.isGlobal()) { |
| 135 | LLVM_DEBUG(dbgs() << " Found lowered global address: " |
| 136 | << *HiOp1.getGlobal() << "\n" ); |
| 137 | } else if (HiOp1.isBlockAddress()) { |
| 138 | LLVM_DEBUG(dbgs() << " Found lowered basic address: " |
| 139 | << *HiOp1.getBlockAddress() << "\n" ); |
| 140 | } else if (HiOp1.isCPI()) { |
| 141 | LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex() |
| 142 | << "\n" ); |
| 143 | } |
| 144 | |
| 145 | return true; |
| 146 | } |
| 147 | |
| 148 | // Update the offset in Hi and Lo instructions. |
| 149 | // Delete the tail instruction and update all the uses to use the |
| 150 | // output from Lo. |
| 151 | bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 152 | MachineInstr &Tail, int64_t Offset) { |
| 153 | assert(isInt<32>(Offset) && "Unexpected offset" ); |
| 154 | |
| 155 | // If Hi is an AUIPC, don't fold the offset if it is outside the bounds of |
| 156 | // the global object. The object may be within 2GB of the PC, but addresses |
| 157 | // outside of the object might not be. |
| 158 | auto HiOpc = Hi.getOpcode(); |
| 159 | if (HiOpc == RISCV::AUIPC && Hi.getOperand(i: 1).isGlobal()) { |
| 160 | const GlobalValue *GV = Hi.getOperand(i: 1).getGlobal(); |
| 161 | Type *Ty = GV->getValueType(); |
| 162 | if (!Ty->isSized() || Offset < 0 || |
| 163 | (uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty)) |
| 164 | return false; |
| 165 | } |
| 166 | |
| 167 | // Put the offset back in Hi and the Lo |
| 168 | Hi.getOperand(i: 1).setOffset(Offset); |
| 169 | if (Hi.getOpcode() != RISCV::AUIPC && Hi.getOpcode() != RISCV::QC_E_LI) |
| 170 | Lo.getOperand(i: 2).setOffset(Offset); |
| 171 | // Delete the tail instruction. |
| 172 | Register LoOp0Reg = Lo.getOperand(i: 0).getReg(); |
| 173 | Register TailOp0Reg = Tail.getOperand(i: 0).getReg(); |
| 174 | MRI->constrainRegClass(Reg: LoOp0Reg, RC: MRI->getRegClass(Reg: TailOp0Reg)); |
| 175 | MRI->replaceRegWith(FromReg: TailOp0Reg, ToReg: LoOp0Reg); |
| 176 | Tail.eraseFromParent(); |
| 177 | LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" |
| 178 | << " " << Hi << " " << Lo;); |
| 179 | return true; |
| 180 | } |
| 181 | |
| 182 | // Detect patterns for large offsets that are passed into an ADD instruction. |
| 183 | // If the pattern is found, updates the offset in Hi and Lo instructions |
| 184 | // and deletes TailAdd and the instructions that produced the offset. |
| 185 | // |
| 186 | // Base address lowering is of the form: |
| 187 | // Hi: lui vreg1, %hi(s) |
| 188 | // Lo: addi vreg2, vreg1, %lo(s) |
| 189 | // / \ |
| 190 | // / \ |
| 191 | // / \ |
| 192 | // / The large offset can be of two forms: \ |
| 193 | // 1) Offset that has non zero bits in lower 2) Offset that has non zero |
| 194 | // 12 bits and upper 20 bits bits in upper 20 bits only |
| 195 | // OffseLUI: lui vreg3, 4 |
| 196 | // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128 |
| 197 | // \ / |
| 198 | // \ / |
| 199 | // \ / |
| 200 | // \ / |
| 201 | // TailAdd: add vreg4, vreg2, voff |
| 202 | bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi, |
| 203 | MachineInstr &Lo, |
| 204 | MachineInstr &TailAdd, |
| 205 | Register GAReg) { |
| 206 | assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!" ); |
| 207 | Register Rs = TailAdd.getOperand(i: 1).getReg(); |
| 208 | Register Rt = TailAdd.getOperand(i: 2).getReg(); |
| 209 | Register Reg = Rs == GAReg ? Rt : Rs; |
| 210 | |
| 211 | // Can't fold if the register has more than one use. |
| 212 | if (!Reg.isVirtual() || !MRI->hasOneUse(RegNo: Reg)) |
| 213 | return false; |
| 214 | // This can point to an ADDI(W) or a LUI: |
| 215 | MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); |
| 216 | auto OffsetTailOpc = OffsetTail.getOpcode(); |
| 217 | if (OffsetTailOpc == RISCV::ADDI || OffsetTailOpc == RISCV::ADDIW) { |
| 218 | // The offset value has non zero bits in both %hi and %lo parts. |
| 219 | // Detect an ADDI that feeds from a LUI instruction. |
| 220 | MachineOperand &AddiImmOp = OffsetTail.getOperand(i: 2); |
| 221 | if (AddiImmOp.getTargetFlags() != RISCVII::MO_None) |
| 222 | return false; |
| 223 | Register AddiReg = OffsetTail.getOperand(i: 1).getReg(); |
| 224 | int64_t OffLo = AddiImmOp.getImm(); |
| 225 | |
| 226 | // Handle rs1 of ADDI is X0. |
| 227 | if (AddiReg == RISCV::X0) { |
| 228 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail); |
| 229 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset: OffLo)) |
| 230 | return false; |
| 231 | OffsetTail.eraseFromParent(); |
| 232 | return true; |
| 233 | } |
| 234 | |
| 235 | MachineInstr &OffsetLui = *MRI->getVRegDef(Reg: AddiReg); |
| 236 | MachineOperand &LuiImmOp = OffsetLui.getOperand(i: 1); |
| 237 | if (OffsetLui.getOpcode() != RISCV::LUI || |
| 238 | LuiImmOp.getTargetFlags() != RISCVII::MO_None || |
| 239 | !MRI->hasOneUse(RegNo: OffsetLui.getOperand(i: 0).getReg())) |
| 240 | return false; |
| 241 | int64_t Offset = SignExtend64<32>(x: LuiImmOp.getImm() << 12); |
| 242 | Offset += OffLo; |
| 243 | // RV32 ignores the upper 32 bits. ADDIW sign extends the result. |
| 244 | if (!ST->is64Bit() || OffsetTailOpc == RISCV::ADDIW) |
| 245 | Offset = SignExtend64<32>(x: Offset); |
| 246 | // We can only fold simm32 offsets. |
| 247 | if (!isInt<32>(x: Offset)) |
| 248 | return false; |
| 249 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail |
| 250 | << " " << OffsetLui); |
| 251 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset)) |
| 252 | return false; |
| 253 | OffsetTail.eraseFromParent(); |
| 254 | OffsetLui.eraseFromParent(); |
| 255 | return true; |
| 256 | } else if (OffsetTailOpc == RISCV::LUI) { |
| 257 | // The offset value has all zero bits in the lower 12 bits. Only LUI |
| 258 | // exists. |
| 259 | LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); |
| 260 | int64_t Offset = SignExtend64<32>(x: OffsetTail.getOperand(i: 1).getImm() << 12); |
| 261 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset)) |
| 262 | return false; |
| 263 | OffsetTail.eraseFromParent(); |
| 264 | return true; |
| 265 | } |
| 266 | return false; |
| 267 | } |
| 268 | |
| 269 | // Detect patterns for offsets that are passed into a SHXADD instruction. |
| 270 | // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15. |
| 271 | // The constant is created with addi voff, x0, C, and shXadd is used to |
| 272 | // fill insert the trailing zeros and do the addition. |
| 273 | // If the pattern is found, updates the offset in Hi and Lo instructions |
| 274 | // and deletes TailShXAdd and the instructions that produced the offset. |
| 275 | // |
| 276 | // Hi: lui vreg1, %hi(s) |
| 277 | // Lo: addi vreg2, vreg1, %lo(s) |
| 278 | // OffsetTail: addi voff, x0, C |
| 279 | // TailAdd: shXadd vreg4, voff, vreg2 |
| 280 | bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi, |
| 281 | MachineInstr &Lo, |
| 282 | MachineInstr &TailShXAdd, |
| 283 | Register GAReg) { |
| 284 | assert((TailShXAdd.getOpcode() == RISCV::SH1ADD || |
| 285 | TailShXAdd.getOpcode() == RISCV::SH2ADD || |
| 286 | TailShXAdd.getOpcode() == RISCV::SH3ADD) && |
| 287 | "Expected SHXADD instruction!" ); |
| 288 | |
| 289 | if (GAReg != TailShXAdd.getOperand(i: 2).getReg()) |
| 290 | return false; |
| 291 | |
| 292 | // The first source is the shifted operand. |
| 293 | Register Rs1 = TailShXAdd.getOperand(i: 1).getReg(); |
| 294 | |
| 295 | // Can't fold if the register has more than one use. |
| 296 | if (!Rs1.isVirtual() || !MRI->hasOneUse(RegNo: Rs1)) |
| 297 | return false; |
| 298 | // This can point to an ADDI X0, C. |
| 299 | MachineInstr &OffsetTail = *MRI->getVRegDef(Reg: Rs1); |
| 300 | if (OffsetTail.getOpcode() != RISCV::ADDI) |
| 301 | return false; |
| 302 | if (!OffsetTail.getOperand(i: 1).isReg() || |
| 303 | OffsetTail.getOperand(i: 1).getReg() != RISCV::X0 || |
| 304 | !OffsetTail.getOperand(i: 2).isImm()) |
| 305 | return false; |
| 306 | |
| 307 | int64_t Offset = OffsetTail.getOperand(i: 2).getImm(); |
| 308 | assert(isInt<12>(Offset) && "Unexpected offset" ); |
| 309 | |
| 310 | unsigned ShAmt; |
| 311 | switch (TailShXAdd.getOpcode()) { |
| 312 | default: llvm_unreachable("Unexpected opcode" ); |
| 313 | case RISCV::SH1ADD: ShAmt = 1; break; |
| 314 | case RISCV::SH2ADD: ShAmt = 2; break; |
| 315 | case RISCV::SH3ADD: ShAmt = 3; break; |
| 316 | } |
| 317 | |
| 318 | Offset = (uint64_t)Offset << ShAmt; |
| 319 | |
| 320 | LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); |
| 321 | if (!foldOffset(Hi, Lo, Tail&: TailShXAdd, Offset)) |
| 322 | return false; |
| 323 | OffsetTail.eraseFromParent(); |
| 324 | return true; |
| 325 | } |
| 326 | |
| 327 | bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi, |
| 328 | MachineInstr &Lo) { |
| 329 | Register DestReg = Lo.getOperand(i: 0).getReg(); |
| 330 | |
| 331 | // Look for arithmetic instructions we can get an offset from. |
| 332 | // We might be able to remove the arithmetic instructions by folding the |
| 333 | // offset into the LUI+ADDI. |
| 334 | if (!MRI->hasOneUse(RegNo: DestReg)) |
| 335 | return false; |
| 336 | |
| 337 | // Lo has only one use. |
| 338 | MachineInstr &Tail = *MRI->use_instr_begin(RegNo: DestReg); |
| 339 | switch (Tail.getOpcode()) { |
| 340 | default: |
| 341 | LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr: " |
| 342 | << Tail); |
| 343 | break; |
| 344 | case RISCV::ADDI: |
| 345 | case RISCV::QC_E_ADDI: |
| 346 | case RISCV::QC_E_ADDAI: { |
| 347 | // Offset is simply an immediate operand. |
| 348 | int64_t Offset = Tail.getOperand(i: 2).getImm(); |
| 349 | if (Tail.getOpcode() == RISCV::ADDI) { |
| 350 | // We might have two ADDIs in a row. |
| 351 | Register TailDestReg = Tail.getOperand(i: 0).getReg(); |
| 352 | if (MRI->hasOneUse(RegNo: TailDestReg)) { |
| 353 | MachineInstr &TailTail = *MRI->use_instr_begin(RegNo: TailDestReg); |
| 354 | if (TailTail.getOpcode() == RISCV::ADDI) { |
| 355 | Offset += TailTail.getOperand(i: 2).getImm(); |
| 356 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); |
| 357 | if (!foldOffset(Hi, Lo, Tail&: TailTail, Offset)) |
| 358 | return false; |
| 359 | Tail.eraseFromParent(); |
| 360 | return true; |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); |
| 366 | return foldOffset(Hi, Lo, Tail, Offset); |
| 367 | } |
| 368 | case RISCV::ADD: |
| 369 | // The offset is too large to fit in the immediate field of ADDI. |
| 370 | // This can be in two forms: |
| 371 | // 1) LUI hi_Offset followed by: |
| 372 | // ADDI lo_offset |
| 373 | // This happens in case the offset has non zero bits in |
| 374 | // both hi 20 and lo 12 bits. |
| 375 | // 2) LUI (offset20) |
| 376 | // This happens in case the lower 12 bits of the offset are zeros. |
| 377 | return foldLargeOffset(Hi, Lo, TailAdd&: Tail, GAReg: DestReg); |
| 378 | case RISCV::SH1ADD: |
| 379 | case RISCV::SH2ADD: |
| 380 | case RISCV::SH3ADD: |
| 381 | // The offset is too large to fit in the immediate field of ADDI. |
| 382 | // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or |
| 383 | // (SH3ADD (ADDI X0, C), DestReg). |
| 384 | return foldShiftedOffset(Hi, Lo, TailShXAdd&: Tail, GAReg: DestReg); |
| 385 | } |
| 386 | |
| 387 | return false; |
| 388 | } |
| 389 | |
| 390 | static void overwriteMachineOperandInPlace(MachineOperand &MO, |
| 391 | const MachineOperand &ImmOp) { |
| 392 | switch (ImmOp.getType()) { |
| 393 | case MachineOperand::MO_ConstantPoolIndex: |
| 394 | MO.ChangeToCPI(Idx: ImmOp.getIndex(), Offset: ImmOp.getOffset(), TargetFlags: ImmOp.getTargetFlags()); |
| 395 | break; |
| 396 | case MachineOperand::MO_GlobalAddress: |
| 397 | MO.ChangeToGA(GV: ImmOp.getGlobal(), Offset: ImmOp.getOffset(), TargetFlags: ImmOp.getTargetFlags()); |
| 398 | break; |
| 399 | case MachineOperand::MO_MCSymbol: |
| 400 | MO.ChangeToMCSymbol(Sym: ImmOp.getMCSymbol(), TargetFlags: ImmOp.getTargetFlags()); |
| 401 | MO.setOffset(ImmOp.getOffset()); |
| 402 | break; |
| 403 | case MachineOperand::MO_BlockAddress: |
| 404 | MO.ChangeToBA(BA: ImmOp.getBlockAddress(), Offset: ImmOp.getOffset(), |
| 405 | TargetFlags: ImmOp.getTargetFlags()); |
| 406 | break; |
| 407 | default: |
| 408 | report_fatal_error(reason: "unsupported machine operand type" ); |
| 409 | break; |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, |
| 414 | MachineInstr &Lo) { |
| 415 | Register DestReg = Lo.getOperand(i: 0).getReg(); |
| 416 | |
| 417 | // If all the uses are memory ops with the same offset, we can transform: |
| 418 | // |
| 419 | // 1. (medlow pattern): |
| 420 | // a. Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) |
| 421 | // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) |
| 422 | // Tail: lw vreg3, 8(vreg2) |
| 423 | // |
| 424 | // b. Hi: qc.e.li vreg1, foo ---> qc.e.li vreg1, foo+8 |
| 425 | // Tail: lw vreg2, 8(vreg1) ---> lw vreg2, 0(vreg1) |
| 426 | // |
| 427 | // 2. (medany pattern): |
| 428 | // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8) |
| 429 | // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1) |
| 430 | // Tail: lw vreg3, 8(vreg2) |
| 431 | |
| 432 | std::optional<int64_t> CommonOffset; |
| 433 | DenseMap<const MachineInstr *, SmallVector<unsigned>> |
| 434 | InlineAsmMemoryOpIndexesMap; |
| 435 | for (const MachineInstr &UseMI : MRI->use_instructions(Reg: DestReg)) { |
| 436 | switch (UseMI.getOpcode()) { |
| 437 | default: |
| 438 | LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); |
| 439 | return false; |
| 440 | case RISCV::LB: |
| 441 | case RISCV::LH: |
| 442 | case RISCV::LH_INX: |
| 443 | case RISCV::LW: |
| 444 | case RISCV::LW_INX: |
| 445 | case RISCV::LBU: |
| 446 | case RISCV::LHU: |
| 447 | case RISCV::LWU: |
| 448 | case RISCV::LD: |
| 449 | case RISCV::LD_RV32: |
| 450 | case RISCV::FLH: |
| 451 | case RISCV::FLW: |
| 452 | case RISCV::FLD: |
| 453 | case RISCV::SB: |
| 454 | case RISCV::SH: |
| 455 | case RISCV::SH_INX: |
| 456 | case RISCV::SW: |
| 457 | case RISCV::SW_INX: |
| 458 | case RISCV::SD: |
| 459 | case RISCV::SD_RV32: |
| 460 | case RISCV::FSH: |
| 461 | case RISCV::FSW: |
| 462 | case RISCV::FSD: { |
| 463 | if (UseMI.getOperand(i: 1).isFI()) |
| 464 | return false; |
| 465 | // Register defined by Lo should not be the value register. |
| 466 | if (DestReg == UseMI.getOperand(i: 0).getReg()) |
| 467 | return false; |
| 468 | assert(DestReg == UseMI.getOperand(1).getReg() && |
| 469 | "Expected base address use" ); |
| 470 | // All load/store instructions must use the same offset. |
| 471 | int64_t Offset = UseMI.getOperand(i: 2).getImm(); |
| 472 | if (CommonOffset && Offset != CommonOffset) |
| 473 | return false; |
| 474 | CommonOffset = Offset; |
| 475 | break; |
| 476 | } |
| 477 | case RISCV::PseudoCCLD: |
| 478 | case RISCV::PseudoCCLW: |
| 479 | case RISCV::PseudoCCLWU: |
| 480 | case RISCV::PseudoCCLH: |
| 481 | case RISCV::PseudoCCLHU: |
| 482 | case RISCV::PseudoCCLB: |
| 483 | case RISCV::PseudoCCLBU: { |
| 484 | // The SFB Pseudos are like their non-SFB counterparts but have more |
| 485 | // operands. |
| 486 | if (UseMI.getOperand(i: 2).isFI()) |
| 487 | return false; |
| 488 | // Register defined by Lo should not be the (tied) false value, or a |
| 489 | // register used in the branch predicate. |
| 490 | if (DestReg == UseMI.getOperand(i: 1).getReg() || |
| 491 | DestReg == UseMI.getOperand(i: 5).getReg()) |
| 492 | return false; |
| 493 | if (UseMI.getOperand(i: 6).isReg() && |
| 494 | DestReg == UseMI.getOperand(i: 6).getReg()) |
| 495 | return false; |
| 496 | assert(DestReg == UseMI.getOperand(2).getReg() && |
| 497 | "Expected base address use" ); |
| 498 | // All load/store instructions must use the same offset. |
| 499 | int64_t Offset = UseMI.getOperand(i: 3).getImm(); |
| 500 | if (CommonOffset && Offset != CommonOffset) |
| 501 | return false; |
| 502 | CommonOffset = Offset; |
| 503 | break; |
| 504 | } |
| 505 | case RISCV::INLINEASM: |
| 506 | case RISCV::INLINEASM_BR: { |
| 507 | SmallVector<unsigned> InlineAsmMemoryOpIndexes; |
| 508 | unsigned NumOps = 0; |
| 509 | for (unsigned I = InlineAsm::MIOp_FirstOperand; |
| 510 | I < UseMI.getNumOperands(); I += 1 + NumOps) { |
| 511 | const MachineOperand &FlagsMO = UseMI.getOperand(i: I); |
| 512 | // Should be an imm. |
| 513 | if (!FlagsMO.isImm()) |
| 514 | continue; |
| 515 | |
| 516 | const InlineAsm::Flag Flags(FlagsMO.getImm()); |
| 517 | NumOps = Flags.getNumOperandRegisters(); |
| 518 | |
| 519 | // Memory constraints have two operands. |
| 520 | if (NumOps != 2 || !Flags.isMemKind()) { |
| 521 | // If the register is used by something other than a memory |
| 522 | // constraint, we should not fold. |
| 523 | for (unsigned J = 0; J < NumOps; ++J) { |
| 524 | const MachineOperand &MO = UseMI.getOperand(i: I + 1 + J); |
| 525 | if (MO.isReg() && MO.getReg() == DestReg) |
| 526 | return false; |
| 527 | } |
| 528 | continue; |
| 529 | } |
| 530 | |
| 531 | // We can't do this for constraint A because AMO instructions don't have |
| 532 | // an immediate offset field. |
| 533 | if (Flags.getMemoryConstraintID() == InlineAsm::ConstraintCode::A) |
| 534 | return false; |
| 535 | |
| 536 | const MachineOperand &AddrMO = UseMI.getOperand(i: I + 1); |
| 537 | if (!AddrMO.isReg() || AddrMO.getReg() != DestReg) |
| 538 | continue; |
| 539 | |
| 540 | const MachineOperand &OffsetMO = UseMI.getOperand(i: I + 2); |
| 541 | if (!OffsetMO.isImm()) |
| 542 | continue; |
| 543 | |
| 544 | // All inline asm memory operands must use the same offset. |
| 545 | int64_t Offset = OffsetMO.getImm(); |
| 546 | if (CommonOffset && Offset != CommonOffset) |
| 547 | return false; |
| 548 | CommonOffset = Offset; |
| 549 | InlineAsmMemoryOpIndexes.push_back(Elt: I + 1); |
| 550 | } |
| 551 | InlineAsmMemoryOpIndexesMap.insert( |
| 552 | KV: std::make_pair(x: &UseMI, y&: InlineAsmMemoryOpIndexes)); |
| 553 | break; |
| 554 | } |
| 555 | } |
| 556 | } |
| 557 | |
| 558 | // We found a common offset. |
| 559 | // Update the offsets in global address lowering. |
| 560 | // We may have already folded some arithmetic so we need to add to any |
| 561 | // existing offset. |
| 562 | int64_t NewOffset = Hi.getOperand(i: 1).getOffset() + *CommonOffset; |
| 563 | // RV32 ignores the upper 32 bits. |
| 564 | if (!ST->is64Bit()) |
| 565 | NewOffset = SignExtend64<32>(x: NewOffset); |
| 566 | // We can only fold simm32 offsets. |
| 567 | if (!isInt<32>(x: NewOffset)) |
| 568 | return false; |
| 569 | |
| 570 | Hi.getOperand(i: 1).setOffset(NewOffset); |
| 571 | MachineOperand &ImmOp = |
| 572 | Hi.getOpcode() == RISCV::QC_E_LI ? Lo.getOperand(i: 1) : Lo.getOperand(i: 2); |
| 573 | auto HiOpc = Hi.getOpcode(); |
| 574 | // Expand PseudoMovAddr into LUI |
| 575 | if (HiOpc == RISCV::PseudoMovAddr) { |
| 576 | auto *TII = ST->getInstrInfo(); |
| 577 | Hi.setDesc(TII->get(Opcode: RISCV::LUI)); |
| 578 | Hi.removeOperand(OpNo: 2); |
| 579 | } |
| 580 | |
| 581 | if (HiOpc != RISCV::AUIPC) |
| 582 | ImmOp.setOffset(NewOffset); |
| 583 | |
| 584 | // Update the immediate in the load/store instructions to add the offset. |
| 585 | for (MachineInstr &UseMI : |
| 586 | llvm::make_early_inc_range(Range: MRI->use_instructions(Reg: DestReg))) { |
| 587 | if (UseMI.getOpcode() == RISCV::INLINEASM || |
| 588 | UseMI.getOpcode() == RISCV::INLINEASM_BR) { |
| 589 | auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI]; |
| 590 | for (unsigned I : InlineAsmMemoryOpIndexes) { |
| 591 | MachineOperand &MO = UseMI.getOperand(i: I + 1); |
| 592 | overwriteMachineOperandInPlace(MO, ImmOp); |
| 593 | } |
| 594 | } else { |
| 595 | unsigned ImmIdx; |
| 596 | switch (UseMI.getOpcode()) { |
| 597 | case RISCV::INLINEASM: |
| 598 | case RISCV::INLINEASM_BR: |
| 599 | llvm_unreachable("Should have been dealt with before this else" ); |
| 600 | case RISCV::PseudoCCLD: |
| 601 | case RISCV::PseudoCCLW: |
| 602 | case RISCV::PseudoCCLWU: |
| 603 | case RISCV::PseudoCCLH: |
| 604 | case RISCV::PseudoCCLHU: |
| 605 | case RISCV::PseudoCCLB: |
| 606 | case RISCV::PseudoCCLBU: |
| 607 | ImmIdx = 3; |
| 608 | break; |
| 609 | case RISCV::LB: |
| 610 | case RISCV::LH: |
| 611 | case RISCV::LH_INX: |
| 612 | case RISCV::LW: |
| 613 | case RISCV::LW_INX: |
| 614 | case RISCV::LBU: |
| 615 | case RISCV::LHU: |
| 616 | case RISCV::LWU: |
| 617 | case RISCV::LD: |
| 618 | case RISCV::LD_RV32: |
| 619 | case RISCV::FLH: |
| 620 | case RISCV::FLW: |
| 621 | case RISCV::FLD: |
| 622 | case RISCV::SB: |
| 623 | case RISCV::SH: |
| 624 | case RISCV::SH_INX: |
| 625 | case RISCV::SW: |
| 626 | case RISCV::SW_INX: |
| 627 | case RISCV::SD: |
| 628 | case RISCV::SD_RV32: |
| 629 | case RISCV::FSH: |
| 630 | case RISCV::FSW: |
| 631 | case RISCV::FSD: |
| 632 | ImmIdx = 2; |
| 633 | break; |
| 634 | default: |
| 635 | llvm_unreachable("Unknown Instruction" ); |
| 636 | } |
| 637 | |
| 638 | MachineOperand &MO = UseMI.getOperand(i: ImmIdx); |
| 639 | if (Hi.getOpcode() == RISCV::QC_E_LI) { |
| 640 | MO.ChangeToImmediate(ImmVal: 0); |
| 641 | } else { |
| 642 | overwriteMachineOperandInPlace(MO, ImmOp); |
| 643 | } |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from |
| 648 | // being erased |
| 649 | if (&Lo == &Hi) |
| 650 | return true; |
| 651 | |
| 652 | MRI->replaceRegWith(FromReg: Lo.getOperand(i: 0).getReg(), ToReg: Hi.getOperand(i: 0).getReg()); |
| 653 | Lo.eraseFromParent(); |
| 654 | return true; |
| 655 | } |
| 656 | |
| 657 | // Try to fold sequences of the form: |
| 658 | // Hi/lo: qc.e.li vreg1, s -> qc.e.li vreg1, s+imm |
| 659 | // TailAdd: shxadd vreg2, vreg3, vreg1 -> deleted |
| 660 | // Tail: lx vreg4, imm(vreg2) -> qc.lrx vreg4, vreg1, vreg3, (1-7) |
| 661 | bool RISCVMergeBaseOffsetOpt::foldShxaddIntoScaledMemory(MachineInstr &Hi, |
| 662 | MachineInstr &Lo) { |
| 663 | if (!ST->hasVendorXqcisls() || ST->is64Bit()) |
| 664 | return false; |
| 665 | |
| 666 | if (Hi.getOpcode() != RISCV::QC_E_LI) |
| 667 | return false; |
| 668 | |
| 669 | Register BaseReg = Hi.getOperand(i: 0).getReg(); |
| 670 | if (!BaseReg.isVirtual() || !MRI->hasOneUse(RegNo: BaseReg)) |
| 671 | return false; |
| 672 | |
| 673 | MachineInstr &ShxAdd = *MRI->use_instr_begin(RegNo: BaseReg); |
| 674 | unsigned ShxOpc = ShxAdd.getOpcode(); |
| 675 | unsigned ShAmt = 0; |
| 676 | switch (ShxOpc) { |
| 677 | default: |
| 678 | return false; |
| 679 | case RISCV::SH1ADD: |
| 680 | ShAmt = 1; |
| 681 | break; |
| 682 | case RISCV::SH2ADD: |
| 683 | ShAmt = 2; |
| 684 | break; |
| 685 | case RISCV::SH3ADD: |
| 686 | ShAmt = 3; |
| 687 | break; |
| 688 | case RISCV::QC_SHLADD: |
| 689 | uint8_t ShlImm = ShxAdd.getOperand(i: 3).getImm(); |
| 690 | if (ShlImm > 7) |
| 691 | return false; |
| 692 | ShAmt = ShlImm; |
| 693 | break; |
| 694 | } |
| 695 | |
| 696 | // shxadd Rd, Rs1, Rs2 |
| 697 | Register ScaledReg = ShxAdd.getOperand(i: 0).getReg(); |
| 698 | Register IndexReg = ShxAdd.getOperand(i: 1).getReg(); |
| 699 | |
| 700 | if (!IndexReg.isVirtual()) |
| 701 | return false; |
| 702 | |
| 703 | if (ShxAdd.getOperand(i: 2).getReg() != BaseReg) |
| 704 | return false; |
| 705 | |
| 706 | if (!ScaledReg.isVirtual() || !MRI->hasOneUse(RegNo: ScaledReg)) |
| 707 | return false; |
| 708 | |
| 709 | MachineInstr &TailMem = *MRI->use_instr_begin(RegNo: ScaledReg); |
| 710 | unsigned Opc = TailMem.getOpcode(); |
| 711 | unsigned NewOpc = 0; |
| 712 | |
| 713 | switch (Opc) { |
| 714 | case RISCV::LB: |
| 715 | NewOpc = RISCV::QC_LRB; |
| 716 | break; |
| 717 | case RISCV::LBU: |
| 718 | NewOpc = RISCV::QC_LRBU; |
| 719 | break; |
| 720 | case RISCV::LH: |
| 721 | NewOpc = RISCV::QC_LRH; |
| 722 | break; |
| 723 | case RISCV::LHU: |
| 724 | NewOpc = RISCV::QC_LRHU; |
| 725 | break; |
| 726 | case RISCV::LW: |
| 727 | NewOpc = RISCV::QC_LRW; |
| 728 | break; |
| 729 | case RISCV::SB: |
| 730 | NewOpc = RISCV::QC_SRB; |
| 731 | break; |
| 732 | case RISCV::SH: |
| 733 | NewOpc = RISCV::QC_SRH; |
| 734 | break; |
| 735 | case RISCV::SW: |
| 736 | NewOpc = RISCV::QC_SRW; |
| 737 | break; |
| 738 | default: |
| 739 | return false; |
| 740 | } |
| 741 | |
| 742 | if (!TailMem.getOperand(i: 1).isReg() || |
| 743 | TailMem.getOperand(i: 1).getReg() != ScaledReg) |
| 744 | return false; |
| 745 | if (!TailMem.getOperand(i: 2).isImm()) |
| 746 | return false; |
| 747 | int64_t Imm = TailMem.getOperand(i: 2).getImm(); |
| 748 | |
| 749 | // Update QC_E_LI offset. |
| 750 | int64_t NewOffset = SignExtend64<32>(x: Hi.getOperand(i: 1).getOffset() + Imm); |
| 751 | |
| 752 | Hi.getOperand(i: 1).setOffset(NewOffset); |
| 753 | |
| 754 | // Build scaled load/store. |
| 755 | auto *TII = ST->getInstrInfo(); |
| 756 | auto *MBB = TailMem.getParent(); |
| 757 | |
| 758 | // Ensure index register satisfies GPRNoX0 class required by QC_LR*/QC_SR*. |
| 759 | MRI->constrainRegClass(Reg: IndexReg, RC: &RISCV::GPRNoX0RegClass); |
| 760 | |
| 761 | BuildMI(BB&: *MBB, I&: TailMem, MIMD: TailMem.getDebugLoc(), MCID: TII->get(Opcode: NewOpc)) |
| 762 | .add(MO: TailMem.getOperand(i: 0)) |
| 763 | .addReg(RegNo: BaseReg, Flags: getKillRegState(B: ShxAdd.getOperand(i: 2).isKill())) |
| 764 | .addReg(RegNo: IndexReg, Flags: getKillRegState(B: ShxAdd.getOperand(i: 1).isKill())) |
| 765 | .addImm(Val: ShAmt) |
| 766 | .cloneMemRefs(OtherMI: TailMem); |
| 767 | |
| 768 | TailMem.eraseFromParent(); |
| 769 | ShxAdd.eraseFromParent(); |
| 770 | return true; |
| 771 | } |
| 772 | |
| 773 | bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { |
| 774 | if (skipFunction(F: Fn.getFunction())) |
| 775 | return false; |
| 776 | |
| 777 | ST = &Fn.getSubtarget<RISCVSubtarget>(); |
| 778 | |
| 779 | bool MadeChange = false; |
| 780 | MRI = &Fn.getRegInfo(); |
| 781 | for (MachineBasicBlock &MBB : Fn) { |
| 782 | LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n" ); |
| 783 | for (MachineInstr &Hi : MBB) { |
| 784 | MachineInstr *Lo = nullptr; |
| 785 | if (!detectFoldable(Hi, Lo)) |
| 786 | continue; |
| 787 | MadeChange |= detectAndFoldOffset(Hi, Lo&: *Lo); |
| 788 | MadeChange |= foldIntoMemoryOps(Hi, Lo&: *Lo); |
| 789 | MadeChange |= foldShxaddIntoScaledMemory(Hi, Lo&: *Lo); |
| 790 | } |
| 791 | } |
| 792 | |
| 793 | return MadeChange; |
| 794 | } |
| 795 | |
| 796 | /// Returns an instance of the Merge Base Offset Optimization pass. |
| 797 | FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() { |
| 798 | return new RISCVMergeBaseOffsetOpt(); |
| 799 | } |
| 800 | |