| 1 | //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Merge the offset of address calculation into the offset field |
| 10 | // of instructions in a global address lowering sequence. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "RISCV.h" |
| 15 | #include "RISCVTargetMachine.h" |
| 16 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 17 | #include "llvm/CodeGen/Passes.h" |
| 18 | #include "llvm/MC/TargetRegistry.h" |
| 19 | #include "llvm/Support/Debug.h" |
| 20 | #include "llvm/Target/TargetOptions.h" |
| 21 | #include <optional> |
| 22 | using namespace llvm; |
| 23 | |
| 24 | #define DEBUG_TYPE "riscv-merge-base-offset" |
| 25 | #define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset" |
| 26 | namespace { |
| 27 | |
| 28 | class RISCVMergeBaseOffsetOpt : public MachineFunctionPass { |
| 29 | const RISCVSubtarget *ST = nullptr; |
| 30 | MachineRegisterInfo *MRI; |
| 31 | |
| 32 | public: |
| 33 | static char ID; |
| 34 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 35 | bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo); |
| 36 | |
| 37 | bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo); |
| 38 | bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail, |
| 39 | int64_t Offset); |
| 40 | bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 41 | MachineInstr &TailAdd, Register GSReg); |
| 42 | bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 43 | MachineInstr &TailShXAdd, Register GSReg); |
| 44 | |
| 45 | bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo); |
| 46 | |
| 47 | RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} |
| 48 | |
| 49 | MachineFunctionProperties getRequiredProperties() const override { |
| 50 | return MachineFunctionProperties().setIsSSA(); |
| 51 | } |
| 52 | |
| 53 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 54 | AU.setPreservesCFG(); |
| 55 | MachineFunctionPass::getAnalysisUsage(AU); |
| 56 | } |
| 57 | |
| 58 | StringRef getPassName() const override { |
| 59 | return RISCV_MERGE_BASE_OFFSET_NAME; |
| 60 | } |
| 61 | }; |
| 62 | } // end anonymous namespace |
| 63 | |
| 64 | char RISCVMergeBaseOffsetOpt::ID = 0; |
| 65 | INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, |
| 66 | RISCV_MERGE_BASE_OFFSET_NAME, false, false) |
| 67 | |
| 68 | // Detect either of the patterns: |
| 69 | // |
| 70 | // 1. (medlow pattern): |
| 71 | // lui vreg1, %hi(s) |
| 72 | // addi vreg2, vreg1, %lo(s) |
| 73 | // |
| 74 | // 2. (medany pattern): |
| 75 | // .Lpcrel_hi1: |
| 76 | // auipc vreg1, %pcrel_hi(s) |
| 77 | // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1) |
| 78 | // |
| 79 | // The pattern is only accepted if: |
| 80 | // 1) The first instruction has only one use, which is the ADDI. |
| 81 | // 2) The address operands have the appropriate type, reflecting the |
| 82 | // lowering of a global address or constant pool using medlow or medany. |
| 83 | // 3) The offset value in the Global Address or Constant Pool is 0. |
| 84 | bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, |
| 85 | MachineInstr *&Lo) { |
| 86 | if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC && |
| 87 | Hi.getOpcode() != RISCV::PseudoMovAddr) |
| 88 | return false; |
| 89 | |
| 90 | const MachineOperand &HiOp1 = Hi.getOperand(i: 1); |
| 91 | unsigned ExpectedFlags = |
| 92 | Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI; |
| 93 | if (HiOp1.getTargetFlags() != ExpectedFlags) |
| 94 | return false; |
| 95 | |
| 96 | if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) || |
| 97 | HiOp1.getOffset() != 0) |
| 98 | return false; |
| 99 | |
| 100 | if (Hi.getOpcode() == RISCV::PseudoMovAddr) { |
| 101 | // Most of the code should handle it correctly without modification by |
| 102 | // setting Lo and Hi both point to PseudoMovAddr |
| 103 | Lo = &Hi; |
| 104 | } else { |
| 105 | Register HiDestReg = Hi.getOperand(i: 0).getReg(); |
| 106 | if (!MRI->hasOneUse(RegNo: HiDestReg)) |
| 107 | return false; |
| 108 | |
| 109 | Lo = &*MRI->use_instr_begin(RegNo: HiDestReg); |
| 110 | if (Lo->getOpcode() != RISCV::ADDI) |
| 111 | return false; |
| 112 | } |
| 113 | |
| 114 | const MachineOperand &LoOp2 = Lo->getOperand(i: 2); |
| 115 | if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) { |
| 116 | if (LoOp2.getTargetFlags() != RISCVII::MO_LO || |
| 117 | !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) || |
| 118 | LoOp2.getOffset() != 0) |
| 119 | return false; |
| 120 | } else { |
| 121 | assert(Hi.getOpcode() == RISCV::AUIPC); |
| 122 | if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO || |
| 123 | LoOp2.getType() != MachineOperand::MO_MCSymbol) |
| 124 | return false; |
| 125 | } |
| 126 | |
| 127 | if (HiOp1.isGlobal()) { |
| 128 | LLVM_DEBUG(dbgs() << " Found lowered global address: " |
| 129 | << *HiOp1.getGlobal() << "\n" ); |
| 130 | } else if (HiOp1.isBlockAddress()) { |
| 131 | LLVM_DEBUG(dbgs() << " Found lowered basic address: " |
| 132 | << *HiOp1.getBlockAddress() << "\n" ); |
| 133 | } else if (HiOp1.isCPI()) { |
| 134 | LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex() |
| 135 | << "\n" ); |
| 136 | } |
| 137 | |
| 138 | return true; |
| 139 | } |
| 140 | |
| 141 | // Update the offset in Hi and Lo instructions. |
| 142 | // Delete the tail instruction and update all the uses to use the |
| 143 | // output from Lo. |
| 144 | bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo, |
| 145 | MachineInstr &Tail, int64_t Offset) { |
| 146 | assert(isInt<32>(Offset) && "Unexpected offset" ); |
| 147 | |
| 148 | // If Hi is an AUIPC, don't fold the offset if it is outside the bounds of |
| 149 | // the global object. The object may be within 2GB of the PC, but addresses |
| 150 | // outside of the object might not be. |
| 151 | if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(i: 1).isGlobal()) { |
| 152 | const GlobalValue *GV = Hi.getOperand(i: 1).getGlobal(); |
| 153 | Type *Ty = GV->getValueType(); |
| 154 | if (!Ty->isSized() || Offset < 0 || |
| 155 | (uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty)) |
| 156 | return false; |
| 157 | } |
| 158 | |
| 159 | // Put the offset back in Hi and the Lo |
| 160 | Hi.getOperand(i: 1).setOffset(Offset); |
| 161 | if (Hi.getOpcode() != RISCV::AUIPC) |
| 162 | Lo.getOperand(i: 2).setOffset(Offset); |
| 163 | // Delete the tail instruction. |
| 164 | MRI->constrainRegClass(Reg: Lo.getOperand(i: 0).getReg(), |
| 165 | RC: MRI->getRegClass(Reg: Tail.getOperand(i: 0).getReg())); |
| 166 | MRI->replaceRegWith(FromReg: Tail.getOperand(i: 0).getReg(), ToReg: Lo.getOperand(i: 0).getReg()); |
| 167 | Tail.eraseFromParent(); |
| 168 | LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" |
| 169 | << " " << Hi << " " << Lo;); |
| 170 | return true; |
| 171 | } |
| 172 | |
| 173 | // Detect patterns for large offsets that are passed into an ADD instruction. |
| 174 | // If the pattern is found, updates the offset in Hi and Lo instructions |
| 175 | // and deletes TailAdd and the instructions that produced the offset. |
| 176 | // |
| 177 | // Base address lowering is of the form: |
| 178 | // Hi: lui vreg1, %hi(s) |
| 179 | // Lo: addi vreg2, vreg1, %lo(s) |
| 180 | // / \ |
| 181 | // / \ |
| 182 | // / \ |
| 183 | // / The large offset can be of two forms: \ |
| 184 | // 1) Offset that has non zero bits in lower 2) Offset that has non zero |
| 185 | // 12 bits and upper 20 bits bits in upper 20 bits only |
| 186 | // OffseLUI: lui vreg3, 4 |
| 187 | // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128 |
| 188 | // \ / |
| 189 | // \ / |
| 190 | // \ / |
| 191 | // \ / |
| 192 | // TailAdd: add vreg4, vreg2, voff |
| 193 | bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi, |
| 194 | MachineInstr &Lo, |
| 195 | MachineInstr &TailAdd, |
| 196 | Register GAReg) { |
| 197 | assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!" ); |
| 198 | Register Rs = TailAdd.getOperand(i: 1).getReg(); |
| 199 | Register Rt = TailAdd.getOperand(i: 2).getReg(); |
| 200 | Register Reg = Rs == GAReg ? Rt : Rs; |
| 201 | |
| 202 | // Can't fold if the register has more than one use. |
| 203 | if (!Reg.isVirtual() || !MRI->hasOneUse(RegNo: Reg)) |
| 204 | return false; |
| 205 | // This can point to an ADDI(W) or a LUI: |
| 206 | MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); |
| 207 | if (OffsetTail.getOpcode() == RISCV::ADDI || |
| 208 | OffsetTail.getOpcode() == RISCV::ADDIW) { |
| 209 | // The offset value has non zero bits in both %hi and %lo parts. |
| 210 | // Detect an ADDI that feeds from a LUI instruction. |
| 211 | MachineOperand &AddiImmOp = OffsetTail.getOperand(i: 2); |
| 212 | if (AddiImmOp.getTargetFlags() != RISCVII::MO_None) |
| 213 | return false; |
| 214 | Register AddiReg = OffsetTail.getOperand(i: 1).getReg(); |
| 215 | int64_t OffLo = AddiImmOp.getImm(); |
| 216 | |
| 217 | // Handle rs1 of ADDI is X0. |
| 218 | if (AddiReg == RISCV::X0) { |
| 219 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail); |
| 220 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset: OffLo)) |
| 221 | return false; |
| 222 | OffsetTail.eraseFromParent(); |
| 223 | return true; |
| 224 | } |
| 225 | |
| 226 | MachineInstr &OffsetLui = *MRI->getVRegDef(Reg: AddiReg); |
| 227 | MachineOperand &LuiImmOp = OffsetLui.getOperand(i: 1); |
| 228 | if (OffsetLui.getOpcode() != RISCV::LUI || |
| 229 | LuiImmOp.getTargetFlags() != RISCVII::MO_None || |
| 230 | !MRI->hasOneUse(RegNo: OffsetLui.getOperand(i: 0).getReg())) |
| 231 | return false; |
| 232 | int64_t Offset = SignExtend64<32>(x: LuiImmOp.getImm() << 12); |
| 233 | Offset += OffLo; |
| 234 | // RV32 ignores the upper 32 bits. ADDIW sign extends the result. |
| 235 | if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW) |
| 236 | Offset = SignExtend64<32>(x: Offset); |
| 237 | // We can only fold simm32 offsets. |
| 238 | if (!isInt<32>(x: Offset)) |
| 239 | return false; |
| 240 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail |
| 241 | << " " << OffsetLui); |
| 242 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset)) |
| 243 | return false; |
| 244 | OffsetTail.eraseFromParent(); |
| 245 | OffsetLui.eraseFromParent(); |
| 246 | return true; |
| 247 | } else if (OffsetTail.getOpcode() == RISCV::LUI) { |
| 248 | // The offset value has all zero bits in the lower 12 bits. Only LUI |
| 249 | // exists. |
| 250 | LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); |
| 251 | int64_t Offset = SignExtend64<32>(x: OffsetTail.getOperand(i: 1).getImm() << 12); |
| 252 | if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset)) |
| 253 | return false; |
| 254 | OffsetTail.eraseFromParent(); |
| 255 | return true; |
| 256 | } |
| 257 | return false; |
| 258 | } |
| 259 | |
| 260 | // Detect patterns for offsets that are passed into a SHXADD instruction. |
| 261 | // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15. |
| 262 | // The constant is created with addi voff, x0, C, and shXadd is used to |
| 263 | // fill insert the trailing zeros and do the addition. |
| 264 | // If the pattern is found, updates the offset in Hi and Lo instructions |
| 265 | // and deletes TailShXAdd and the instructions that produced the offset. |
| 266 | // |
| 267 | // Hi: lui vreg1, %hi(s) |
| 268 | // Lo: addi vreg2, vreg1, %lo(s) |
| 269 | // OffsetTail: addi voff, x0, C |
| 270 | // TailAdd: shXadd vreg4, voff, vreg2 |
| 271 | bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi, |
| 272 | MachineInstr &Lo, |
| 273 | MachineInstr &TailShXAdd, |
| 274 | Register GAReg) { |
| 275 | assert((TailShXAdd.getOpcode() == RISCV::SH1ADD || |
| 276 | TailShXAdd.getOpcode() == RISCV::SH2ADD || |
| 277 | TailShXAdd.getOpcode() == RISCV::SH3ADD) && |
| 278 | "Expected SHXADD instruction!" ); |
| 279 | |
| 280 | if (GAReg != TailShXAdd.getOperand(i: 2).getReg()) |
| 281 | return false; |
| 282 | |
| 283 | // The first source is the shifted operand. |
| 284 | Register Rs1 = TailShXAdd.getOperand(i: 1).getReg(); |
| 285 | |
| 286 | // Can't fold if the register has more than one use. |
| 287 | if (!Rs1.isVirtual() || !MRI->hasOneUse(RegNo: Rs1)) |
| 288 | return false; |
| 289 | // This can point to an ADDI X0, C. |
| 290 | MachineInstr &OffsetTail = *MRI->getVRegDef(Reg: Rs1); |
| 291 | if (OffsetTail.getOpcode() != RISCV::ADDI) |
| 292 | return false; |
| 293 | if (!OffsetTail.getOperand(i: 1).isReg() || |
| 294 | OffsetTail.getOperand(i: 1).getReg() != RISCV::X0 || |
| 295 | !OffsetTail.getOperand(i: 2).isImm()) |
| 296 | return false; |
| 297 | |
| 298 | int64_t Offset = OffsetTail.getOperand(i: 2).getImm(); |
| 299 | assert(isInt<12>(Offset) && "Unexpected offset" ); |
| 300 | |
| 301 | unsigned ShAmt; |
| 302 | switch (TailShXAdd.getOpcode()) { |
| 303 | default: llvm_unreachable("Unexpected opcode" ); |
| 304 | case RISCV::SH1ADD: ShAmt = 1; break; |
| 305 | case RISCV::SH2ADD: ShAmt = 2; break; |
| 306 | case RISCV::SH3ADD: ShAmt = 3; break; |
| 307 | } |
| 308 | |
| 309 | Offset = (uint64_t)Offset << ShAmt; |
| 310 | |
| 311 | LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); |
| 312 | if (!foldOffset(Hi, Lo, Tail&: TailShXAdd, Offset)) |
| 313 | return false; |
| 314 | OffsetTail.eraseFromParent(); |
| 315 | return true; |
| 316 | } |
| 317 | |
| 318 | bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi, |
| 319 | MachineInstr &Lo) { |
| 320 | Register DestReg = Lo.getOperand(i: 0).getReg(); |
| 321 | |
| 322 | // Look for arithmetic instructions we can get an offset from. |
| 323 | // We might be able to remove the arithmetic instructions by folding the |
| 324 | // offset into the LUI+ADDI. |
| 325 | if (!MRI->hasOneUse(RegNo: DestReg)) |
| 326 | return false; |
| 327 | |
| 328 | // Lo has only one use. |
| 329 | MachineInstr &Tail = *MRI->use_instr_begin(RegNo: DestReg); |
| 330 | switch (Tail.getOpcode()) { |
| 331 | default: |
| 332 | LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" |
| 333 | << Tail); |
| 334 | break; |
| 335 | case RISCV::ADDI: { |
| 336 | // Offset is simply an immediate operand. |
| 337 | int64_t Offset = Tail.getOperand(i: 2).getImm(); |
| 338 | |
| 339 | // We might have two ADDIs in a row. |
| 340 | Register TailDestReg = Tail.getOperand(i: 0).getReg(); |
| 341 | if (MRI->hasOneUse(RegNo: TailDestReg)) { |
| 342 | MachineInstr &TailTail = *MRI->use_instr_begin(RegNo: TailDestReg); |
| 343 | if (TailTail.getOpcode() == RISCV::ADDI) { |
| 344 | Offset += TailTail.getOperand(i: 2).getImm(); |
| 345 | LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); |
| 346 | if (!foldOffset(Hi, Lo, Tail&: TailTail, Offset)) |
| 347 | return false; |
| 348 | Tail.eraseFromParent(); |
| 349 | return true; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); |
| 354 | return foldOffset(Hi, Lo, Tail, Offset); |
| 355 | } |
| 356 | case RISCV::ADD: |
| 357 | // The offset is too large to fit in the immediate field of ADDI. |
| 358 | // This can be in two forms: |
| 359 | // 1) LUI hi_Offset followed by: |
| 360 | // ADDI lo_offset |
| 361 | // This happens in case the offset has non zero bits in |
| 362 | // both hi 20 and lo 12 bits. |
| 363 | // 2) LUI (offset20) |
| 364 | // This happens in case the lower 12 bits of the offset are zeros. |
| 365 | return foldLargeOffset(Hi, Lo, TailAdd&: Tail, GAReg: DestReg); |
| 366 | case RISCV::SH1ADD: |
| 367 | case RISCV::SH2ADD: |
| 368 | case RISCV::SH3ADD: |
| 369 | // The offset is too large to fit in the immediate field of ADDI. |
| 370 | // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or |
| 371 | // (SH3ADD (ADDI X0, C), DestReg). |
| 372 | return foldShiftedOffset(Hi, Lo, TailShXAdd&: Tail, GAReg: DestReg); |
| 373 | } |
| 374 | |
| 375 | return false; |
| 376 | } |
| 377 | |
| 378 | bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, |
| 379 | MachineInstr &Lo) { |
| 380 | Register DestReg = Lo.getOperand(i: 0).getReg(); |
| 381 | |
| 382 | // If all the uses are memory ops with the same offset, we can transform: |
| 383 | // |
| 384 | // 1. (medlow pattern): |
| 385 | // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) |
| 386 | // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) |
| 387 | // Tail: lw vreg3, 8(vreg2) |
| 388 | // |
| 389 | // 2. (medany pattern): |
| 390 | // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8) |
| 391 | // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1) |
| 392 | // Tail: lw vreg3, 8(vreg2) |
| 393 | |
| 394 | std::optional<int64_t> CommonOffset; |
| 395 | DenseMap<const MachineInstr *, SmallVector<unsigned>> |
| 396 | InlineAsmMemoryOpIndexesMap; |
| 397 | for (const MachineInstr &UseMI : MRI->use_instructions(Reg: DestReg)) { |
| 398 | switch (UseMI.getOpcode()) { |
| 399 | default: |
| 400 | LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); |
| 401 | return false; |
| 402 | case RISCV::LB: |
| 403 | case RISCV::LH: |
| 404 | case RISCV::LH_INX: |
| 405 | case RISCV::LW: |
| 406 | case RISCV::LW_INX: |
| 407 | case RISCV::LBU: |
| 408 | case RISCV::LHU: |
| 409 | case RISCV::LWU: |
| 410 | case RISCV::LD: |
| 411 | case RISCV::LD_RV32: |
| 412 | case RISCV::FLH: |
| 413 | case RISCV::FLW: |
| 414 | case RISCV::FLD: |
| 415 | case RISCV::SB: |
| 416 | case RISCV::SH: |
| 417 | case RISCV::SH_INX: |
| 418 | case RISCV::SW: |
| 419 | case RISCV::SW_INX: |
| 420 | case RISCV::SD: |
| 421 | case RISCV::SD_RV32: |
| 422 | case RISCV::FSH: |
| 423 | case RISCV::FSW: |
| 424 | case RISCV::FSD: { |
| 425 | if (UseMI.getOperand(i: 1).isFI()) |
| 426 | return false; |
| 427 | // Register defined by Lo should not be the value register. |
| 428 | if (DestReg == UseMI.getOperand(i: 0).getReg()) |
| 429 | return false; |
| 430 | assert(DestReg == UseMI.getOperand(1).getReg() && |
| 431 | "Expected base address use" ); |
| 432 | // All load/store instructions must use the same offset. |
| 433 | int64_t Offset = UseMI.getOperand(i: 2).getImm(); |
| 434 | if (CommonOffset && Offset != CommonOffset) |
| 435 | return false; |
| 436 | CommonOffset = Offset; |
| 437 | break; |
| 438 | } |
| 439 | case RISCV::INLINEASM: |
| 440 | case RISCV::INLINEASM_BR: { |
| 441 | SmallVector<unsigned> InlineAsmMemoryOpIndexes; |
| 442 | unsigned NumOps = 0; |
| 443 | for (unsigned I = InlineAsm::MIOp_FirstOperand; |
| 444 | I < UseMI.getNumOperands(); I += 1 + NumOps) { |
| 445 | const MachineOperand &FlagsMO = UseMI.getOperand(i: I); |
| 446 | // Should be an imm. |
| 447 | if (!FlagsMO.isImm()) |
| 448 | continue; |
| 449 | |
| 450 | const InlineAsm::Flag Flags(FlagsMO.getImm()); |
| 451 | NumOps = Flags.getNumOperandRegisters(); |
| 452 | |
| 453 | // Memory constraints have two operands. |
| 454 | if (NumOps != 2 || !Flags.isMemKind()) { |
| 455 | // If the register is used by something other than a memory |
| 456 | // constraint, we should not fold. |
| 457 | for (unsigned J = 0; J < NumOps; ++J) { |
| 458 | const MachineOperand &MO = UseMI.getOperand(i: I + 1 + J); |
| 459 | if (MO.isReg() && MO.getReg() == DestReg) |
| 460 | return false; |
| 461 | } |
| 462 | continue; |
| 463 | } |
| 464 | |
| 465 | // We can't do this for constraint A because AMO instructions don't have |
| 466 | // an immediate offset field. |
| 467 | if (Flags.getMemoryConstraintID() == InlineAsm::ConstraintCode::A) |
| 468 | return false; |
| 469 | |
| 470 | const MachineOperand &AddrMO = UseMI.getOperand(i: I + 1); |
| 471 | if (!AddrMO.isReg() || AddrMO.getReg() != DestReg) |
| 472 | continue; |
| 473 | |
| 474 | const MachineOperand &OffsetMO = UseMI.getOperand(i: I + 2); |
| 475 | if (!OffsetMO.isImm()) |
| 476 | continue; |
| 477 | |
| 478 | // All inline asm memory operands must use the same offset. |
| 479 | int64_t Offset = OffsetMO.getImm(); |
| 480 | if (CommonOffset && Offset != CommonOffset) |
| 481 | return false; |
| 482 | CommonOffset = Offset; |
| 483 | InlineAsmMemoryOpIndexes.push_back(Elt: I + 1); |
| 484 | } |
| 485 | InlineAsmMemoryOpIndexesMap.insert( |
| 486 | KV: std::make_pair(x: &UseMI, y&: InlineAsmMemoryOpIndexes)); |
| 487 | break; |
| 488 | } |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | // We found a common offset. |
| 493 | // Update the offsets in global address lowering. |
| 494 | // We may have already folded some arithmetic so we need to add to any |
| 495 | // existing offset. |
| 496 | int64_t NewOffset = Hi.getOperand(i: 1).getOffset() + *CommonOffset; |
| 497 | // RV32 ignores the upper 32 bits. |
| 498 | if (!ST->is64Bit()) |
| 499 | NewOffset = SignExtend64<32>(x: NewOffset); |
| 500 | // We can only fold simm32 offsets. |
| 501 | if (!isInt<32>(x: NewOffset)) |
| 502 | return false; |
| 503 | |
| 504 | Hi.getOperand(i: 1).setOffset(NewOffset); |
| 505 | MachineOperand &ImmOp = Lo.getOperand(i: 2); |
| 506 | // Expand PseudoMovAddr into LUI |
| 507 | if (Hi.getOpcode() == RISCV::PseudoMovAddr) { |
| 508 | auto *TII = ST->getInstrInfo(); |
| 509 | Hi.setDesc(TII->get(Opcode: RISCV::LUI)); |
| 510 | Hi.removeOperand(OpNo: 2); |
| 511 | } |
| 512 | |
| 513 | if (Hi.getOpcode() != RISCV::AUIPC) |
| 514 | ImmOp.setOffset(NewOffset); |
| 515 | |
| 516 | // Update the immediate in the load/store instructions to add the offset. |
| 517 | for (MachineInstr &UseMI : |
| 518 | llvm::make_early_inc_range(Range: MRI->use_instructions(Reg: DestReg))) { |
| 519 | if (UseMI.getOpcode() == RISCV::INLINEASM || |
| 520 | UseMI.getOpcode() == RISCV::INLINEASM_BR) { |
| 521 | auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI]; |
| 522 | for (unsigned I : InlineAsmMemoryOpIndexes) { |
| 523 | MachineOperand &MO = UseMI.getOperand(i: I + 1); |
| 524 | switch (ImmOp.getType()) { |
| 525 | case MachineOperand::MO_GlobalAddress: |
| 526 | MO.ChangeToGA(GV: ImmOp.getGlobal(), Offset: ImmOp.getOffset(), |
| 527 | TargetFlags: ImmOp.getTargetFlags()); |
| 528 | break; |
| 529 | case MachineOperand::MO_MCSymbol: |
| 530 | MO.ChangeToMCSymbol(Sym: ImmOp.getMCSymbol(), TargetFlags: ImmOp.getTargetFlags()); |
| 531 | MO.setOffset(ImmOp.getOffset()); |
| 532 | break; |
| 533 | case MachineOperand::MO_BlockAddress: |
| 534 | MO.ChangeToBA(BA: ImmOp.getBlockAddress(), Offset: ImmOp.getOffset(), |
| 535 | TargetFlags: ImmOp.getTargetFlags()); |
| 536 | break; |
| 537 | default: |
| 538 | report_fatal_error(reason: "unsupported machine operand type" ); |
| 539 | break; |
| 540 | } |
| 541 | } |
| 542 | } else { |
| 543 | UseMI.removeOperand(OpNo: 2); |
| 544 | UseMI.addOperand(Op: ImmOp); |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from |
| 549 | // being erased |
| 550 | if (&Lo == &Hi) |
| 551 | return true; |
| 552 | |
| 553 | MRI->replaceRegWith(FromReg: Lo.getOperand(i: 0).getReg(), ToReg: Hi.getOperand(i: 0).getReg()); |
| 554 | Lo.eraseFromParent(); |
| 555 | return true; |
| 556 | } |
| 557 | |
| 558 | bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { |
| 559 | if (skipFunction(F: Fn.getFunction())) |
| 560 | return false; |
| 561 | |
| 562 | ST = &Fn.getSubtarget<RISCVSubtarget>(); |
| 563 | |
| 564 | bool MadeChange = false; |
| 565 | MRI = &Fn.getRegInfo(); |
| 566 | for (MachineBasicBlock &MBB : Fn) { |
| 567 | LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n" ); |
| 568 | for (MachineInstr &Hi : MBB) { |
| 569 | MachineInstr *Lo = nullptr; |
| 570 | if (!detectFoldable(Hi, Lo)) |
| 571 | continue; |
| 572 | MadeChange |= detectAndFoldOffset(Hi, Lo&: *Lo); |
| 573 | MadeChange |= foldIntoMemoryOps(Hi, Lo&: *Lo); |
| 574 | } |
| 575 | } |
| 576 | |
| 577 | return MadeChange; |
| 578 | } |
| 579 | |
| 580 | /// Returns an instance of the Merge Base Offset Optimization pass. |
| 581 | FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() { |
| 582 | return new RISCVMergeBaseOffsetOpt(); |
| 583 | } |
| 584 | |