1//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass searches for instructions that are prevented from being compressed
10// by one of the following:
11//
12// 1. The use of a single uncompressed register.
13// 2. A base register + offset where the offset is too large to be compressed
14// and the base register may or may not be compressed.
15//
16//
17// For case 1, if a compressed register is available, then the uncompressed
18// register is copied to the compressed register and its uses are replaced.
19//
20// For example, storing zero uses the incompressible zero register:
21// sw zero, 0(a0) # if zero
22// sw zero, 8(a0) # if zero
23// sw zero, 4(a0) # if zero
24// sw zero, 24(a0) # if zero
25//
26// If a compressed register (e.g. a1) is available, the above can be transformed
27// to the following to improve code size:
28// li a1, 0
29// c.sw a1, 0(a0)
30// c.sw a1, 8(a0)
31// c.sw a1, 4(a0)
32// c.sw a1, 24(a0)
33//
34//
35// For case 2, if a compressed register is available, then the original base
36// is copied and adjusted such that:
37//
38// new_base_register = base_register + adjustment
39// base_register + large_offset = new_base_register + small_offset
40//
41// For example, the following offsets are too large for c.sw:
42// lui a2, 983065
43// sw a1, -236(a2)
44// sw a1, -240(a2)
45// sw a1, -244(a2)
46// sw a1, -248(a2)
47// sw a1, -252(a2)
48// sw a0, -256(a2)
49//
50// If a compressed register is available (e.g. a3), a new base could be created
51// such that the addresses can accessed with a compressible offset, thus
52// improving code size:
53// lui a2, 983065
54// addi a3, a2, -256
55// c.sw a1, 20(a3)
56// c.sw a1, 16(a3)
57// c.sw a1, 12(a3)
58// c.sw a1, 8(a3)
59// c.sw a1, 4(a3)
60// c.sw a0, 0(a3)
61//
62//
63// This optimization is only applied if there are enough uses of the copied
64// register for code size to be reduced.
65//
66//===----------------------------------------------------------------------===//
67
68#include "RISCV.h"
69#include "RISCVSubtarget.h"
70#include "llvm/CodeGen/Passes.h"
71#include "llvm/CodeGen/RegisterScavenging.h"
72#include "llvm/MC/TargetRegistry.h"
73#include "llvm/Support/Debug.h"
74
75using namespace llvm;
76
77#define DEBUG_TYPE "riscv-make-compressible"
78#define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79
80namespace {
81
82struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83 static char ID;
84
85 bool runOnMachineFunction(MachineFunction &Fn) override;
86
87 RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88
89 StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90};
91} // namespace
92
93char RISCVMakeCompressibleOpt::ID = 0;
94INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95 RISCV_COMPRESS_INSTRS_NAME, false, false)
96
97// Return log2(widthInBytes) of load/store done by Opcode.
98static unsigned log2LdstWidth(unsigned Opcode) {
99 switch (Opcode) {
100 default:
101 llvm_unreachable("Unexpected opcode");
102 case RISCV::LBU:
103 case RISCV::SB:
104 case RISCV::QC_E_LBU:
105 case RISCV::QC_E_SB:
106 return 0;
107 case RISCV::LH:
108 case RISCV::LH_INX:
109 case RISCV::LHU:
110 case RISCV::SH:
111 case RISCV::SH_INX:
112 case RISCV::QC_E_LH:
113 case RISCV::QC_E_LHU:
114 case RISCV::QC_E_SH:
115 return 1;
116 case RISCV::LW:
117 case RISCV::LW_INX:
118 case RISCV::SW:
119 case RISCV::SW_INX:
120 case RISCV::FLW:
121 case RISCV::FSW:
122 case RISCV::QC_E_LW:
123 case RISCV::QC_E_SW:
124 return 2;
125 case RISCV::LD:
126 case RISCV::LD_RV32:
127 case RISCV::SD:
128 case RISCV::SD_RV32:
129 case RISCV::FLD:
130 case RISCV::FSD:
131 return 3;
132 }
133}
134
135// Return bit field size of immediate operand of Opcode.
136static unsigned offsetMask(unsigned Opcode) {
137 switch (Opcode) {
138 default:
139 llvm_unreachable("Unexpected opcode");
140 case RISCV::LBU:
141 case RISCV::SB:
142 case RISCV::QC_E_LBU:
143 case RISCV::QC_E_SB:
144 return maskTrailingOnes<unsigned>(N: 2U);
145 case RISCV::LH:
146 case RISCV::LH_INX:
147 case RISCV::LHU:
148 case RISCV::SH:
149 case RISCV::SH_INX:
150 case RISCV::QC_E_LH:
151 case RISCV::QC_E_LHU:
152 case RISCV::QC_E_SH:
153 return maskTrailingOnes<unsigned>(N: 1U);
154 case RISCV::LW:
155 case RISCV::LW_INX:
156 case RISCV::SW:
157 case RISCV::SW_INX:
158 case RISCV::FLW:
159 case RISCV::FSW:
160 case RISCV::LD:
161 case RISCV::LD_RV32:
162 case RISCV::SD:
163 case RISCV::SD_RV32:
164 case RISCV::FLD:
165 case RISCV::FSD:
166 case RISCV::QC_E_LW:
167 case RISCV::QC_E_SW:
168 return maskTrailingOnes<unsigned>(N: 5U);
169 }
170}
171
172// Return a mask for the offset bits of a non-stack-pointer based compressed
173// load/store.
174static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
175 return offsetMask(Opcode) << log2LdstWidth(Opcode);
176}
177
178// Return true if Offset fits within a compressed stack-pointer based
179// load/store.
180static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
181 // Compressed sp-based loads and stores only work for 32/64 bits.
182 switch (log2LdstWidth(Opcode)) {
183 case 2:
184 return isShiftedUInt<6, 2>(x: Offset);
185 case 3:
186 return isShiftedUInt<6, 3>(x: Offset);
187 }
188 return false;
189}
190
191// Given an offset for a load/store, return the adjustment required to the base
192// register such that the address can be accessed with a compressible offset.
193// This will return 0 if the offset is already compressible.
194static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
195 // Return the excess bits that do not fit in a compressible offset.
196 return Offset & ~compressedLDSTOffsetMask(Opcode);
197}
198
199// Return true if Reg is in a compressed register class.
200static bool isCompressedReg(Register Reg) {
201 return RISCV::GPRCRegClass.contains(Reg) ||
202 RISCV::GPRF16CRegClass.contains(Reg) ||
203 RISCV::GPRF32CRegClass.contains(Reg) ||
204 RISCV::FPR32CRegClass.contains(Reg) ||
205 RISCV::FPR64CRegClass.contains(Reg) ||
206 RISCV::GPRPairCRegClass.contains(Reg);
207}
208
209// Return true if MI is a load for which there exists a compressed version.
210static bool isCompressibleLoad(const MachineInstr &MI) {
211 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
212
213 switch (MI.getOpcode()) {
214 default:
215 return false;
216 case RISCV::LBU:
217 case RISCV::LH:
218 case RISCV::LH_INX:
219 case RISCV::LHU:
220 return STI.hasStdExtZcb();
221 case RISCV::LW:
222 case RISCV::LW_INX:
223 case RISCV::LD:
224 return STI.hasStdExtZca();
225 case RISCV::LD_RV32:
226 return STI.hasStdExtZclsd();
227 case RISCV::FLW:
228 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
229 case RISCV::FLD:
230 return STI.hasStdExtCOrZcd();
231 // For the Xqcilo loads we mark it as compressible only if Xqcilia is also
232 // enabled so that QC_E_ADDI can be used to create the new base.
233 case RISCV::QC_E_LBU:
234 case RISCV::QC_E_LH:
235 case RISCV::QC_E_LHU:
236 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia() &&
237 STI.hasStdExtZcb();
238 case RISCV::QC_E_LW:
239 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia();
240 }
241}
242
243// Return true if MI is a store for which there exists a compressed version.
244static bool isCompressibleStore(const MachineInstr &MI) {
245 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
246
247 switch (MI.getOpcode()) {
248 default:
249 return false;
250 case RISCV::SB:
251 case RISCV::SH:
252 case RISCV::SH_INX:
253 return STI.hasStdExtZcb();
254 case RISCV::SW:
255 case RISCV::SW_INX:
256 case RISCV::SD:
257 return STI.hasStdExtZca();
258 case RISCV::SD_RV32:
259 return STI.hasStdExtZclsd();
260 case RISCV::FSW:
261 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
262 case RISCV::FSD:
263 return STI.hasStdExtCOrZcd();
264 // For the Xqcilo stores we mark it as compressible only if Xqcilia is also
265 // enabled so that QC_E_ADDI can be used to create the new base.
266 case RISCV::QC_E_SB:
267 case RISCV::QC_E_SH:
268 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia() &&
269 STI.hasStdExtZcb();
270 case RISCV::QC_E_SW:
271 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia();
272 }
273}
274
275// Find a single register and/or large offset which, if compressible, would
276// allow the given instruction to be compressed.
277//
278// Possible return values:
279//
280// {Reg, 0} - Uncompressed Reg needs replacing with a compressed
281// register.
282// {Reg, N} - Reg needs replacing with a compressed register and
283// N needs adding to the new register. (Reg may be
284// compressed or uncompressed).
285// {RISCV::NoRegister, 0} - No suitable optimization found for this
286// instruction.
287static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
288 const unsigned Opcode = MI.getOpcode();
289
290 if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
291 const MachineOperand &MOImm = MI.getOperand(i: 2);
292 if (!MOImm.isImm())
293 return RegImmPair(Register(), 0);
294
295 int64_t Offset = MOImm.getImm();
296 int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
297 Register Base = MI.getOperand(i: 1).getReg();
298
299 // Memory accesses via the stack pointer do not have a requirement for
300 // either of the registers to be compressible and can take a larger offset.
301 if (RISCV::SPRegClass.contains(Reg: Base)) {
302 if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
303 return RegImmPair(Base, NewBaseAdjust);
304 } else {
305 Register SrcDest = MI.getOperand(i: 0).getReg();
306 bool SrcDestCompressed = isCompressedReg(Reg: SrcDest);
307 bool BaseCompressed = isCompressedReg(Reg: Base);
308
309 // If only Base and/or offset prevent compression, then return Base and
310 // any adjustment required to make the offset compressible.
311 if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
312 return RegImmPair(Base, NewBaseAdjust);
313
314 // For loads, we can only change the base register since dest is defined
315 // rather than used.
316 //
317 // For stores, we can change SrcDest (and Base if SrcDest == Base) but
318 // cannot resolve an incompressible offset in this case.
319 if (isCompressibleStore(MI)) {
320 if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
321 !NewBaseAdjust)
322 return RegImmPair(SrcDest, NewBaseAdjust);
323 }
324 }
325 }
326 return RegImmPair(Register(), 0);
327}
328
329static bool isXqciloLdSt(const MachineInstr &MI) {
330 switch (MI.getOpcode()) {
331 default:
332 return false;
333 case RISCV::QC_E_SB:
334 case RISCV::QC_E_SH:
335 case RISCV::QC_E_SW:
336 case RISCV::QC_E_LBU:
337 case RISCV::QC_E_LH:
338 case RISCV::QC_E_LHU:
339 case RISCV::QC_E_LW:
340 return true;
341 }
342}
343
344// Check all uses after FirstMI of the given register, keeping a vector of
345// instructions that would be compressible if the given register (and offset if
346// applicable) were compressible.
347//
348// If there are enough uses for this optimization to improve code size and a
349// compressed register is available, return that compressed register.
350static Register analyzeCompressibleUses(MachineInstr &FirstMI,
351 RegImmPair RegImm,
352 SmallVectorImpl<MachineInstr *> &MIs) {
353 MachineBasicBlock &MBB = *FirstMI.getParent();
354 const TargetRegisterInfo *TRI =
355 MBB.getParent()->getSubtarget().getRegisterInfo();
356 bool XqciloLdSt = false;
357
358 for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
359 E = MBB.instr_end();
360 I != E; ++I) {
361 MachineInstr &MI = *I;
362
363 // Determine if this is an instruction which would benefit from using the
364 // new register.
365 RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
366 if (CandidateRegImm.Reg == RegImm.Reg &&
367 CandidateRegImm.Imm == RegImm.Imm) {
368 XqciloLdSt |= isXqciloLdSt(MI);
369 MIs.push_back(Elt: &MI);
370 }
371
372 // If RegImm.Reg is modified by this instruction, then we cannot optimize
373 // past this instruction. If the register is already compressed, then it may
374 // possible to optimize a large offset in the current instruction - this
375 // will have been detected by the preceding call to
376 // getRegImmPairPreventingCompression.
377 if (MI.modifiesRegister(Reg: RegImm.Reg, TRI))
378 break;
379 }
380
381 // Adjusting the base costs:
382 // a. --> addi (uncompressed 4 bytes)
383 // lw/sw (4 bytes) --> compressed to 2 bytes
384 // lw/sw (4 bytes) --> compressed to 2 bytes
385 // lw/sw (4 bytes) --> compressed to 2 bytes
386 // at least three lw/sw instructions for code size reduction.
387 //
388 // b. --> qc.e.addi (uncompressed 6 bytes)
389 // qc.e.lw/sw (6 bytes) --> compressed to 2 bytes
390 // qc.e.lw/sw (6 bytes) --> compressed to 2 bytes
391 // at least two qc.e.lw/sw instructions for code size reduction.
392 //
393 // If no base adjustment is required, then copying the register costs one new
394 // c.mv (or c.li Rd, 0 for "copying" the zero register) and therefore two uses
395 // are required for a code size reduction. For GPR pairs, we need 2 ADDIs to
396 // copy so we need three users.
397 unsigned BaseCost = XqciloLdSt ? 2 : 3;
398 unsigned CopyCost = RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg) ? 2 : 1;
399 assert((RegImm.Imm == 0 || CopyCost == 1) && "GPRPair should have zero imm");
400 if (MIs.size() <= CopyCost || (RegImm.Imm != 0 && MIs.size() < BaseCost))
401 return Register();
402
403 // Find a compressible register which will be available from the first
404 // instruction we care about to the last.
405 const TargetRegisterClass *RCToScavenge;
406
407 // Work out the compressed register class from which to scavenge.
408 if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg))
409 RCToScavenge = &RISCV::GPRCRegClass;
410 else if (RISCV::GPRF16RegClass.contains(Reg: RegImm.Reg))
411 RCToScavenge = &RISCV::GPRF16CRegClass;
412 else if (RISCV::GPRF32RegClass.contains(Reg: RegImm.Reg))
413 RCToScavenge = &RISCV::GPRF32CRegClass;
414 else if (RISCV::FPR32RegClass.contains(Reg: RegImm.Reg))
415 RCToScavenge = &RISCV::FPR32CRegClass;
416 else if (RISCV::FPR64RegClass.contains(Reg: RegImm.Reg))
417 RCToScavenge = &RISCV::FPR64CRegClass;
418 else if (RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg))
419 RCToScavenge = &RISCV::GPRPairCRegClass;
420 else
421 return Register();
422
423 RegScavenger RS;
424 RS.enterBasicBlockEnd(MBB);
425 RS.backward(I: std::next(x: MIs.back()->getIterator()));
426 return RS.scavengeRegisterBackwards(RC: *RCToScavenge, To: FirstMI.getIterator(),
427 /*RestoreAfter=*/false, /*SPAdj=*/0,
428 /*AllowSpill=*/false);
429}
430
431// Update uses of the old register in the given instruction to the new register.
432static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
433 Register NewReg) {
434 unsigned Opcode = MI.getOpcode();
435
436 // If this pass is extended to support more instructions, the check for
437 // definedness may need to be strengthened.
438 assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
439 "Unsupported instruction for this optimization.");
440
441 int SkipN = 0;
442
443 // Skip the first (value) operand to a store instruction (except if the store
444 // offset is zero) in order to avoid an incorrect transformation.
445 // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
446 if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
447 SkipN = 1;
448
449 // Update registers
450 for (MachineOperand &MO : drop_begin(RangeOrContainer: MI.operands(), N: SkipN))
451 if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
452 // Do not update operands that define the old register.
453 //
454 // The new register was scavenged for the range of instructions that are
455 // being updated, therefore it should not be defined within this range
456 // except possibly in the final instruction.
457 if (MO.isDef()) {
458 assert(isCompressibleLoad(MI));
459 continue;
460 }
461 // Update reg
462 MO.setReg(NewReg);
463 }
464
465 // Update offset
466 MachineOperand &MOImm = MI.getOperand(i: 2);
467 int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
468 MOImm.setImm(NewOffset);
469}
470
471bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
472 // This is a size optimization.
473 if (skipFunction(F: Fn.getFunction()) || !Fn.getFunction().hasMinSize())
474 return false;
475
476 const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
477 const RISCVInstrInfo &TII = *STI.getInstrInfo();
478
479 // This optimization only makes sense if compressed instructions are emitted.
480 if (!STI.hasStdExtZca())
481 return false;
482
483 for (MachineBasicBlock &MBB : Fn) {
484 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
485 for (MachineInstr &MI : MBB) {
486 // Determine if this instruction would otherwise be compressed if not for
487 // an incompressible register or offset.
488 RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
489 if (!RegImm.Reg && RegImm.Imm == 0)
490 continue;
491
492 // Determine if there is a set of instructions for which replacing this
493 // register with a compressed register (and compressible offset if
494 // applicable) is possible and will allow compression.
495 SmallVector<MachineInstr *, 8> MIs;
496 Register NewReg = analyzeCompressibleUses(FirstMI&: MI, RegImm, MIs);
497 if (!NewReg)
498 continue;
499
500 // Create the appropriate copy and/or offset.
501 if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg)) {
502 if (isInt<12>(x: RegImm.Imm)) {
503 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII.get(Opcode: RISCV::ADDI), DestReg: NewReg)
504 .addReg(RegNo: RegImm.Reg)
505 .addImm(Val: RegImm.Imm);
506 } else {
507 assert(STI.hasVendorXqcilia() && isInt<26>(RegImm.Imm));
508 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII.get(Opcode: RISCV::QC_E_ADDI), DestReg: NewReg)
509 .addReg(RegNo: RegImm.Reg)
510 .addImm(Val: RegImm.Imm);
511 }
512 } else {
513 assert(RegImm.Imm == 0);
514 TII.copyPhysReg(MBB, MBBI: MI, DL: MI.getDebugLoc(), DstReg: NewReg, SrcReg: RegImm.Reg,
515 /*KillSrc*/ false);
516 }
517
518 // Update the set of instructions to use the compressed register and
519 // compressible offset instead. These instructions should now be
520 // compressible.
521 // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
522 // expected to become compressible.
523 for (MachineInstr *UpdateMI : MIs)
524 updateOperands(MI&: *UpdateMI, OldRegImm: RegImm, NewReg);
525 }
526 }
527 return true;
528}
529
530/// Returns an instance of the Make Compressible Optimization pass.
531FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
532 return new RISCVMakeCompressibleOpt();
533}
534