1//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCV.h"
15#include "RISCVTargetMachine.h"
16#include "llvm/CodeGen/MachineFunctionPass.h"
17#include "llvm/CodeGen/Passes.h"
18#include "llvm/MC/TargetRegistry.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Target/TargetOptions.h"
21#include <optional>
22using namespace llvm;
23
24#define DEBUG_TYPE "riscv-merge-base-offset"
25#define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
26namespace {
27
28class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
29 const RISCVSubtarget *ST = nullptr;
30 MachineRegisterInfo *MRI;
31
32public:
33 static char ID;
34 bool runOnMachineFunction(MachineFunction &Fn) override;
35 bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
36
37 bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
38 bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
39 int64_t Offset);
40 bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
41 MachineInstr &TailAdd, Register GSReg);
42 bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
43 MachineInstr &TailShXAdd, Register GSReg);
44
45 bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
46 bool foldShxaddIntoScaledMemory(MachineInstr &Hi, MachineInstr &Lo);
47
48 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
49
50 MachineFunctionProperties getRequiredProperties() const override {
51 return MachineFunctionProperties().setIsSSA();
52 }
53
54 void getAnalysisUsage(AnalysisUsage &AU) const override {
55 AU.setPreservesCFG();
56 MachineFunctionPass::getAnalysisUsage(AU);
57 }
58
59 StringRef getPassName() const override {
60 return RISCV_MERGE_BASE_OFFSET_NAME;
61 }
62};
63} // end anonymous namespace
64
65char RISCVMergeBaseOffsetOpt::ID = 0;
66INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
67 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
68
69// Detect either of the patterns:
70//
71// 1. (medlow pattern):
72// a. lui vreg1, %hi(s)
73// addi vreg2, vreg1, %lo(s)
74//
75// b. qc.e.li vreg1, s
76//
77// 2. (medany pattern):
78// .Lpcrel_hi1:
79// auipc vreg1, %pcrel_hi(s)
80// addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
81//
82// The pattern is only accepted if:
83// 1) The first instruction has only one use, which is the ADDI.
84// 2) The address operands have the appropriate type, reflecting the
85// lowering of a global address or constant pool using medlow or medany.
86// 3) The offset value in the Global Address or Constant Pool is 0.
87bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
88 MachineInstr *&Lo) {
89 auto HiOpc = Hi.getOpcode();
90 if (HiOpc != RISCV::LUI && HiOpc != RISCV::AUIPC &&
91 HiOpc != RISCV::PseudoMovAddr && HiOpc != RISCV::QC_E_LI)
92 return false;
93
94 const MachineOperand &HiOp1 = Hi.getOperand(i: 1);
95 unsigned ExpectedFlags = HiOpc == RISCV::AUIPC ? RISCVII::MO_PCREL_HI
96 : HiOpc == RISCV::QC_E_LI ? RISCVII::MO_None
97 : RISCVII::MO_HI;
98 if (HiOp1.getTargetFlags() != ExpectedFlags)
99 return false;
100
101 if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) ||
102 HiOp1.getOffset() != 0)
103 return false;
104
105 if (HiOpc == RISCV::PseudoMovAddr || HiOpc == RISCV::QC_E_LI) {
106 // Most of the code should handle it correctly without modification by
107 // setting Lo and Hi both point to PseudoMovAddr/QC_E_LI
108 Lo = &Hi;
109 } else {
110 Register HiDestReg = Hi.getOperand(i: 0).getReg();
111 if (!MRI->hasOneUse(RegNo: HiDestReg))
112 return false;
113
114 Lo = &*MRI->use_instr_begin(RegNo: HiDestReg);
115 if (Lo->getOpcode() != RISCV::ADDI)
116 return false;
117 }
118
119 if (HiOpc != RISCV::QC_E_LI) {
120 const MachineOperand &LoOp2 = Lo->getOperand(i: 2);
121 if (HiOpc == RISCV::LUI || HiOpc == RISCV::PseudoMovAddr) {
122 if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
123 !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
124 LoOp2.getOffset() != 0)
125 return false;
126 } else {
127 assert(HiOpc == RISCV::AUIPC);
128 if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
129 LoOp2.getType() != MachineOperand::MO_MCSymbol)
130 return false;
131 }
132 }
133
134 if (HiOp1.isGlobal()) {
135 LLVM_DEBUG(dbgs() << " Found lowered global address: "
136 << *HiOp1.getGlobal() << "\n");
137 } else if (HiOp1.isBlockAddress()) {
138 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
139 << *HiOp1.getBlockAddress() << "\n");
140 } else if (HiOp1.isCPI()) {
141 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
142 << "\n");
143 }
144
145 return true;
146}
147
148// Update the offset in Hi and Lo instructions.
149// Delete the tail instruction and update all the uses to use the
150// output from Lo.
151bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
152 MachineInstr &Tail, int64_t Offset) {
153 assert(isInt<32>(Offset) && "Unexpected offset");
154
155 // If Hi is an AUIPC, don't fold the offset if it is outside the bounds of
156 // the global object. The object may be within 2GB of the PC, but addresses
157 // outside of the object might not be.
158 auto HiOpc = Hi.getOpcode();
159 if (HiOpc == RISCV::AUIPC && Hi.getOperand(i: 1).isGlobal()) {
160 const GlobalValue *GV = Hi.getOperand(i: 1).getGlobal();
161 Type *Ty = GV->getValueType();
162 if (!Ty->isSized() || Offset < 0 ||
163 (uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty))
164 return false;
165 }
166
167 // Put the offset back in Hi and the Lo
168 Hi.getOperand(i: 1).setOffset(Offset);
169 if (Hi.getOpcode() != RISCV::AUIPC && Hi.getOpcode() != RISCV::QC_E_LI)
170 Lo.getOperand(i: 2).setOffset(Offset);
171 // Delete the tail instruction.
172 Register LoOp0Reg = Lo.getOperand(i: 0).getReg();
173 Register TailOp0Reg = Tail.getOperand(i: 0).getReg();
174 MRI->constrainRegClass(Reg: LoOp0Reg, RC: MRI->getRegClass(Reg: TailOp0Reg));
175 MRI->replaceRegWith(FromReg: TailOp0Reg, ToReg: LoOp0Reg);
176 Tail.eraseFromParent();
177 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
178 << " " << Hi << " " << Lo;);
179 return true;
180}
181
182// Detect patterns for large offsets that are passed into an ADD instruction.
183// If the pattern is found, updates the offset in Hi and Lo instructions
184// and deletes TailAdd and the instructions that produced the offset.
185//
186// Base address lowering is of the form:
187// Hi: lui vreg1, %hi(s)
188// Lo: addi vreg2, vreg1, %lo(s)
189// / \
190// / \
191// / \
192// / The large offset can be of two forms: \
193// 1) Offset that has non zero bits in lower 2) Offset that has non zero
194// 12 bits and upper 20 bits bits in upper 20 bits only
195// OffseLUI: lui vreg3, 4
196// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
197// \ /
198// \ /
199// \ /
200// \ /
201// TailAdd: add vreg4, vreg2, voff
202bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
203 MachineInstr &Lo,
204 MachineInstr &TailAdd,
205 Register GAReg) {
206 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
207 Register Rs = TailAdd.getOperand(i: 1).getReg();
208 Register Rt = TailAdd.getOperand(i: 2).getReg();
209 Register Reg = Rs == GAReg ? Rt : Rs;
210
211 // Can't fold if the register has more than one use.
212 if (!Reg.isVirtual() || !MRI->hasOneUse(RegNo: Reg))
213 return false;
214 // This can point to an ADDI(W) or a LUI:
215 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
216 auto OffsetTailOpc = OffsetTail.getOpcode();
217 if (OffsetTailOpc == RISCV::ADDI || OffsetTailOpc == RISCV::ADDIW) {
218 // The offset value has non zero bits in both %hi and %lo parts.
219 // Detect an ADDI that feeds from a LUI instruction.
220 MachineOperand &AddiImmOp = OffsetTail.getOperand(i: 2);
221 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
222 return false;
223 Register AddiReg = OffsetTail.getOperand(i: 1).getReg();
224 int64_t OffLo = AddiImmOp.getImm();
225
226 // Handle rs1 of ADDI is X0.
227 if (AddiReg == RISCV::X0) {
228 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
229 if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset: OffLo))
230 return false;
231 OffsetTail.eraseFromParent();
232 return true;
233 }
234
235 MachineInstr &OffsetLui = *MRI->getVRegDef(Reg: AddiReg);
236 MachineOperand &LuiImmOp = OffsetLui.getOperand(i: 1);
237 if (OffsetLui.getOpcode() != RISCV::LUI ||
238 LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
239 !MRI->hasOneUse(RegNo: OffsetLui.getOperand(i: 0).getReg()))
240 return false;
241 int64_t Offset = SignExtend64<32>(x: LuiImmOp.getImm() << 12);
242 Offset += OffLo;
243 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
244 if (!ST->is64Bit() || OffsetTailOpc == RISCV::ADDIW)
245 Offset = SignExtend64<32>(x: Offset);
246 // We can only fold simm32 offsets.
247 if (!isInt<32>(x: Offset))
248 return false;
249 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
250 << " " << OffsetLui);
251 if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset))
252 return false;
253 OffsetTail.eraseFromParent();
254 OffsetLui.eraseFromParent();
255 return true;
256 } else if (OffsetTailOpc == RISCV::LUI) {
257 // The offset value has all zero bits in the lower 12 bits. Only LUI
258 // exists.
259 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
260 int64_t Offset = SignExtend64<32>(x: OffsetTail.getOperand(i: 1).getImm() << 12);
261 if (!foldOffset(Hi, Lo, Tail&: TailAdd, Offset))
262 return false;
263 OffsetTail.eraseFromParent();
264 return true;
265 }
266 return false;
267}
268
269// Detect patterns for offsets that are passed into a SHXADD instruction.
270// The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
271// The constant is created with addi voff, x0, C, and shXadd is used to
272// fill insert the trailing zeros and do the addition.
273// If the pattern is found, updates the offset in Hi and Lo instructions
274// and deletes TailShXAdd and the instructions that produced the offset.
275//
276// Hi: lui vreg1, %hi(s)
277// Lo: addi vreg2, vreg1, %lo(s)
278// OffsetTail: addi voff, x0, C
279// TailAdd: shXadd vreg4, voff, vreg2
280bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
281 MachineInstr &Lo,
282 MachineInstr &TailShXAdd,
283 Register GAReg) {
284 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
285 TailShXAdd.getOpcode() == RISCV::SH2ADD ||
286 TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
287 "Expected SHXADD instruction!");
288
289 if (GAReg != TailShXAdd.getOperand(i: 2).getReg())
290 return false;
291
292 // The first source is the shifted operand.
293 Register Rs1 = TailShXAdd.getOperand(i: 1).getReg();
294
295 // Can't fold if the register has more than one use.
296 if (!Rs1.isVirtual() || !MRI->hasOneUse(RegNo: Rs1))
297 return false;
298 // This can point to an ADDI X0, C.
299 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg: Rs1);
300 if (OffsetTail.getOpcode() != RISCV::ADDI)
301 return false;
302 if (!OffsetTail.getOperand(i: 1).isReg() ||
303 OffsetTail.getOperand(i: 1).getReg() != RISCV::X0 ||
304 !OffsetTail.getOperand(i: 2).isImm())
305 return false;
306
307 int64_t Offset = OffsetTail.getOperand(i: 2).getImm();
308 assert(isInt<12>(Offset) && "Unexpected offset");
309
310 unsigned ShAmt;
311 switch (TailShXAdd.getOpcode()) {
312 default: llvm_unreachable("Unexpected opcode");
313 case RISCV::SH1ADD: ShAmt = 1; break;
314 case RISCV::SH2ADD: ShAmt = 2; break;
315 case RISCV::SH3ADD: ShAmt = 3; break;
316 }
317
318 Offset = (uint64_t)Offset << ShAmt;
319
320 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
321 if (!foldOffset(Hi, Lo, Tail&: TailShXAdd, Offset))
322 return false;
323 OffsetTail.eraseFromParent();
324 return true;
325}
326
327bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
328 MachineInstr &Lo) {
329 Register DestReg = Lo.getOperand(i: 0).getReg();
330
331 // Look for arithmetic instructions we can get an offset from.
332 // We might be able to remove the arithmetic instructions by folding the
333 // offset into the LUI+ADDI.
334 if (!MRI->hasOneUse(RegNo: DestReg))
335 return false;
336
337 // Lo has only one use.
338 MachineInstr &Tail = *MRI->use_instr_begin(RegNo: DestReg);
339 switch (Tail.getOpcode()) {
340 default:
341 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
342 << Tail);
343 break;
344 case RISCV::ADDI:
345 case RISCV::QC_E_ADDI:
346 case RISCV::QC_E_ADDAI: {
347 // Offset is simply an immediate operand.
348 int64_t Offset = Tail.getOperand(i: 2).getImm();
349 if (Tail.getOpcode() == RISCV::ADDI) {
350 // We might have two ADDIs in a row.
351 Register TailDestReg = Tail.getOperand(i: 0).getReg();
352 if (MRI->hasOneUse(RegNo: TailDestReg)) {
353 MachineInstr &TailTail = *MRI->use_instr_begin(RegNo: TailDestReg);
354 if (TailTail.getOpcode() == RISCV::ADDI) {
355 Offset += TailTail.getOperand(i: 2).getImm();
356 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
357 if (!foldOffset(Hi, Lo, Tail&: TailTail, Offset))
358 return false;
359 Tail.eraseFromParent();
360 return true;
361 }
362 }
363 }
364
365 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
366 return foldOffset(Hi, Lo, Tail, Offset);
367 }
368 case RISCV::ADD:
369 // The offset is too large to fit in the immediate field of ADDI.
370 // This can be in two forms:
371 // 1) LUI hi_Offset followed by:
372 // ADDI lo_offset
373 // This happens in case the offset has non zero bits in
374 // both hi 20 and lo 12 bits.
375 // 2) LUI (offset20)
376 // This happens in case the lower 12 bits of the offset are zeros.
377 return foldLargeOffset(Hi, Lo, TailAdd&: Tail, GAReg: DestReg);
378 case RISCV::SH1ADD:
379 case RISCV::SH2ADD:
380 case RISCV::SH3ADD:
381 // The offset is too large to fit in the immediate field of ADDI.
382 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
383 // (SH3ADD (ADDI X0, C), DestReg).
384 return foldShiftedOffset(Hi, Lo, TailShXAdd&: Tail, GAReg: DestReg);
385 }
386
387 return false;
388}
389
390bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
391 MachineInstr &Lo) {
392 Register DestReg = Lo.getOperand(i: 0).getReg();
393
394 // If all the uses are memory ops with the same offset, we can transform:
395 //
396 // 1. (medlow pattern):
397 // a. Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
398 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
399 // Tail: lw vreg3, 8(vreg2)
400 //
401 // b. Hi: qc.e.li vreg1, foo ---> qc.e.li vreg1, foo+8
402 // Tail: lw vreg2, 8(vreg1) ---> lw vreg2, 0(vreg1)
403 //
404 // 2. (medany pattern):
405 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
406 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
407 // Tail: lw vreg3, 8(vreg2)
408
409 std::optional<int64_t> CommonOffset;
410 DenseMap<const MachineInstr *, SmallVector<unsigned>>
411 InlineAsmMemoryOpIndexesMap;
412 for (const MachineInstr &UseMI : MRI->use_instructions(Reg: DestReg)) {
413 switch (UseMI.getOpcode()) {
414 default:
415 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
416 return false;
417 case RISCV::LB:
418 case RISCV::LH:
419 case RISCV::LH_INX:
420 case RISCV::LW:
421 case RISCV::LW_INX:
422 case RISCV::LBU:
423 case RISCV::LHU:
424 case RISCV::LWU:
425 case RISCV::LD:
426 case RISCV::LD_RV32:
427 case RISCV::FLH:
428 case RISCV::FLW:
429 case RISCV::FLD:
430 case RISCV::SB:
431 case RISCV::SH:
432 case RISCV::SH_INX:
433 case RISCV::SW:
434 case RISCV::SW_INX:
435 case RISCV::SD:
436 case RISCV::SD_RV32:
437 case RISCV::FSH:
438 case RISCV::FSW:
439 case RISCV::FSD: {
440 if (UseMI.getOperand(i: 1).isFI())
441 return false;
442 // Register defined by Lo should not be the value register.
443 if (DestReg == UseMI.getOperand(i: 0).getReg())
444 return false;
445 assert(DestReg == UseMI.getOperand(1).getReg() &&
446 "Expected base address use");
447 // All load/store instructions must use the same offset.
448 int64_t Offset = UseMI.getOperand(i: 2).getImm();
449 if (CommonOffset && Offset != CommonOffset)
450 return false;
451 CommonOffset = Offset;
452 break;
453 }
454 case RISCV::INLINEASM:
455 case RISCV::INLINEASM_BR: {
456 SmallVector<unsigned> InlineAsmMemoryOpIndexes;
457 unsigned NumOps = 0;
458 for (unsigned I = InlineAsm::MIOp_FirstOperand;
459 I < UseMI.getNumOperands(); I += 1 + NumOps) {
460 const MachineOperand &FlagsMO = UseMI.getOperand(i: I);
461 // Should be an imm.
462 if (!FlagsMO.isImm())
463 continue;
464
465 const InlineAsm::Flag Flags(FlagsMO.getImm());
466 NumOps = Flags.getNumOperandRegisters();
467
468 // Memory constraints have two operands.
469 if (NumOps != 2 || !Flags.isMemKind()) {
470 // If the register is used by something other than a memory
471 // constraint, we should not fold.
472 for (unsigned J = 0; J < NumOps; ++J) {
473 const MachineOperand &MO = UseMI.getOperand(i: I + 1 + J);
474 if (MO.isReg() && MO.getReg() == DestReg)
475 return false;
476 }
477 continue;
478 }
479
480 // We can't do this for constraint A because AMO instructions don't have
481 // an immediate offset field.
482 if (Flags.getMemoryConstraintID() == InlineAsm::ConstraintCode::A)
483 return false;
484
485 const MachineOperand &AddrMO = UseMI.getOperand(i: I + 1);
486 if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
487 continue;
488
489 const MachineOperand &OffsetMO = UseMI.getOperand(i: I + 2);
490 if (!OffsetMO.isImm())
491 continue;
492
493 // All inline asm memory operands must use the same offset.
494 int64_t Offset = OffsetMO.getImm();
495 if (CommonOffset && Offset != CommonOffset)
496 return false;
497 CommonOffset = Offset;
498 InlineAsmMemoryOpIndexes.push_back(Elt: I + 1);
499 }
500 InlineAsmMemoryOpIndexesMap.insert(
501 KV: std::make_pair(x: &UseMI, y&: InlineAsmMemoryOpIndexes));
502 break;
503 }
504 }
505 }
506
507 // We found a common offset.
508 // Update the offsets in global address lowering.
509 // We may have already folded some arithmetic so we need to add to any
510 // existing offset.
511 int64_t NewOffset = Hi.getOperand(i: 1).getOffset() + *CommonOffset;
512 // RV32 ignores the upper 32 bits.
513 if (!ST->is64Bit())
514 NewOffset = SignExtend64<32>(x: NewOffset);
515 // We can only fold simm32 offsets.
516 if (!isInt<32>(x: NewOffset))
517 return false;
518
519 Hi.getOperand(i: 1).setOffset(NewOffset);
520 MachineOperand &ImmOp =
521 Hi.getOpcode() == RISCV::QC_E_LI ? Lo.getOperand(i: 1) : Lo.getOperand(i: 2);
522 auto HiOpc = Hi.getOpcode();
523 // Expand PseudoMovAddr into LUI
524 if (HiOpc == RISCV::PseudoMovAddr) {
525 auto *TII = ST->getInstrInfo();
526 Hi.setDesc(TII->get(Opcode: RISCV::LUI));
527 Hi.removeOperand(OpNo: 2);
528 }
529
530 if (HiOpc != RISCV::AUIPC)
531 ImmOp.setOffset(NewOffset);
532
533 // Update the immediate in the load/store instructions to add the offset.
534 for (MachineInstr &UseMI :
535 llvm::make_early_inc_range(Range: MRI->use_instructions(Reg: DestReg))) {
536 if (UseMI.getOpcode() == RISCV::INLINEASM ||
537 UseMI.getOpcode() == RISCV::INLINEASM_BR) {
538 auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
539 for (unsigned I : InlineAsmMemoryOpIndexes) {
540 MachineOperand &MO = UseMI.getOperand(i: I + 1);
541 switch (ImmOp.getType()) {
542 case MachineOperand::MO_GlobalAddress:
543 MO.ChangeToGA(GV: ImmOp.getGlobal(), Offset: ImmOp.getOffset(),
544 TargetFlags: ImmOp.getTargetFlags());
545 break;
546 case MachineOperand::MO_MCSymbol:
547 MO.ChangeToMCSymbol(Sym: ImmOp.getMCSymbol(), TargetFlags: ImmOp.getTargetFlags());
548 MO.setOffset(ImmOp.getOffset());
549 break;
550 case MachineOperand::MO_BlockAddress:
551 MO.ChangeToBA(BA: ImmOp.getBlockAddress(), Offset: ImmOp.getOffset(),
552 TargetFlags: ImmOp.getTargetFlags());
553 break;
554 default:
555 report_fatal_error(reason: "unsupported machine operand type");
556 break;
557 }
558 }
559 } else {
560 if (Hi.getOpcode() == RISCV::QC_E_LI) {
561 UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
562 } else {
563 UseMI.removeOperand(OpNo: 2);
564 UseMI.addOperand(Op: ImmOp);
565 }
566 }
567 }
568
569 // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
570 // being erased
571 if (&Lo == &Hi)
572 return true;
573
574 MRI->replaceRegWith(FromReg: Lo.getOperand(i: 0).getReg(), ToReg: Hi.getOperand(i: 0).getReg());
575 Lo.eraseFromParent();
576 return true;
577}
578
579// Try to fold sequences of the form:
580// Hi/lo: qc.e.li vreg1, s -> qc.e.li vreg1, s+imm
581// TailAdd: shxadd vreg2, vreg3, vreg1 -> deleted
582// Tail: lx vreg4, imm(vreg2) -> qc.lrx vreg4, vreg1, vreg3, (1-7)
583bool RISCVMergeBaseOffsetOpt::foldShxaddIntoScaledMemory(MachineInstr &Hi,
584 MachineInstr &Lo) {
585 if (!ST->hasVendorXqcisls() || ST->is64Bit())
586 return false;
587
588 if (Hi.getOpcode() != RISCV::QC_E_LI)
589 return false;
590
591 Register BaseReg = Hi.getOperand(i: 0).getReg();
592 if (!BaseReg.isVirtual() || !MRI->hasOneUse(RegNo: BaseReg))
593 return false;
594
595 MachineInstr &ShxAdd = *MRI->use_instr_begin(RegNo: BaseReg);
596 unsigned ShxOpc = ShxAdd.getOpcode();
597 unsigned ShAmt = 0;
598 switch (ShxOpc) {
599 default:
600 return false;
601 case RISCV::SH1ADD:
602 ShAmt = 1;
603 break;
604 case RISCV::SH2ADD:
605 ShAmt = 2;
606 break;
607 case RISCV::SH3ADD:
608 ShAmt = 3;
609 break;
610 case RISCV::QC_SHLADD:
611 uint8_t ShlImm = ShxAdd.getOperand(i: 3).getImm();
612 if (ShlImm > 7)
613 return false;
614 ShAmt = ShlImm;
615 break;
616 }
617
618 // shxadd Rd, Rs1, Rs2
619 Register ScaledReg = ShxAdd.getOperand(i: 0).getReg();
620 Register IndexReg = ShxAdd.getOperand(i: 1).getReg();
621
622 if (!IndexReg.isVirtual())
623 return false;
624
625 if (ShxAdd.getOperand(i: 2).getReg() != BaseReg)
626 return false;
627
628 if (!ScaledReg.isVirtual() || !MRI->hasOneUse(RegNo: ScaledReg))
629 return false;
630
631 MachineInstr &TailMem = *MRI->use_instr_begin(RegNo: ScaledReg);
632 unsigned Opc = TailMem.getOpcode();
633 unsigned NewOpc = 0;
634
635 switch (Opc) {
636 case RISCV::LB:
637 NewOpc = RISCV::QC_LRB;
638 break;
639 case RISCV::LBU:
640 NewOpc = RISCV::QC_LRBU;
641 break;
642 case RISCV::LH:
643 NewOpc = RISCV::QC_LRH;
644 break;
645 case RISCV::LHU:
646 NewOpc = RISCV::QC_LRHU;
647 break;
648 case RISCV::LW:
649 NewOpc = RISCV::QC_LRW;
650 break;
651 case RISCV::SB:
652 NewOpc = RISCV::QC_SRB;
653 break;
654 case RISCV::SH:
655 NewOpc = RISCV::QC_SRH;
656 break;
657 case RISCV::SW:
658 NewOpc = RISCV::QC_SRW;
659 break;
660 default:
661 return false;
662 }
663
664 if (!TailMem.getOperand(i: 1).isReg() ||
665 TailMem.getOperand(i: 1).getReg() != ScaledReg)
666 return false;
667 if (!TailMem.getOperand(i: 2).isImm())
668 return false;
669 int64_t Imm = TailMem.getOperand(i: 2).getImm();
670
671 // Update QC_E_LI offset.
672 int64_t NewOffset = SignExtend64<32>(x: Hi.getOperand(i: 1).getOffset() + Imm);
673
674 Hi.getOperand(i: 1).setOffset(NewOffset);
675
676 // Build scaled load/store.
677 auto *TII = ST->getInstrInfo();
678 auto *MBB = TailMem.getParent();
679
680 // Ensure index register satisfies GPRNoX0 class required by QC_LR*/QC_SR*.
681 MRI->constrainRegClass(Reg: IndexReg, RC: &RISCV::GPRNoX0RegClass);
682
683 BuildMI(BB&: *MBB, I&: TailMem, MIMD: TailMem.getDebugLoc(), MCID: TII->get(Opcode: NewOpc))
684 .add(MO: TailMem.getOperand(i: 0))
685 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: ShxAdd.getOperand(i: 2).isKill()))
686 .addReg(RegNo: IndexReg, Flags: getKillRegState(B: ShxAdd.getOperand(i: 1).isKill()))
687 .addImm(Val: ShAmt)
688 .cloneMemRefs(OtherMI: TailMem);
689
690 TailMem.eraseFromParent();
691 ShxAdd.eraseFromParent();
692 return true;
693}
694
695bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
696 if (skipFunction(F: Fn.getFunction()))
697 return false;
698
699 ST = &Fn.getSubtarget<RISCVSubtarget>();
700
701 bool MadeChange = false;
702 MRI = &Fn.getRegInfo();
703 for (MachineBasicBlock &MBB : Fn) {
704 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
705 for (MachineInstr &Hi : MBB) {
706 MachineInstr *Lo = nullptr;
707 if (!detectFoldable(Hi, Lo))
708 continue;
709 MadeChange |= detectAndFoldOffset(Hi, Lo&: *Lo);
710 MadeChange |= foldIntoMemoryOps(Hi, Lo&: *Lo);
711 MadeChange |= foldShxaddIntoScaledMemory(Hi, Lo&: *Lo);
712 }
713 }
714
715 return MadeChange;
716}
717
718/// Returns an instance of the Merge Base Offset Optimization pass.
719FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
720 return new RISCVMergeBaseOffsetOpt();
721}
722