1//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass searches for instructions that are prevented from being compressed
10// by one of the following:
11//
12// 1. The use of a single uncompressed register.
13// 2. A base register + offset where the offset is too large to be compressed
14// and the base register may or may not be compressed.
15//
16//
17// For case 1, if a compressed register is available, then the uncompressed
18// register is copied to the compressed register and its uses are replaced.
19//
20// For example, storing zero uses the incompressible zero register:
21// sw zero, 0(a0) # if zero
22// sw zero, 8(a0) # if zero
23// sw zero, 4(a0) # if zero
24// sw zero, 24(a0) # if zero
25//
26// If a compressed register (e.g. a1) is available, the above can be transformed
27// to the following to improve code size:
28// li a1, 0
29// c.sw a1, 0(a0)
30// c.sw a1, 8(a0)
31// c.sw a1, 4(a0)
32// c.sw a1, 24(a0)
33//
34//
35// For case 2, if a compressed register is available, then the original base
36// is copied and adjusted such that:
37//
38// new_base_register = base_register + adjustment
39// base_register + large_offset = new_base_register + small_offset
40//
41// For example, the following offsets are too large for c.sw:
42// lui a2, 983065
43// sw a1, -236(a2)
44// sw a1, -240(a2)
45// sw a1, -244(a2)
46// sw a1, -248(a2)
47// sw a1, -252(a2)
48// sw a0, -256(a2)
49//
50// If a compressed register is available (e.g. a3), a new base could be created
51// such that the addresses can accessed with a compressible offset, thus
52// improving code size:
53// lui a2, 983065
54// addi a3, a2, -256
55// c.sw a1, 20(a3)
56// c.sw a1, 16(a3)
57// c.sw a1, 12(a3)
58// c.sw a1, 8(a3)
59// c.sw a1, 4(a3)
60// c.sw a0, 0(a3)
61//
62//
63// This optimization is only applied if there are enough uses of the copied
64// register for code size to be reduced.
65//
66//===----------------------------------------------------------------------===//
67
68#include "RISCV.h"
69#include "RISCVSubtarget.h"
70#include "llvm/CodeGen/Passes.h"
71#include "llvm/CodeGen/RegisterScavenging.h"
72#include "llvm/MC/TargetRegistry.h"
73#include "llvm/Support/Debug.h"
74
75using namespace llvm;
76
77#define DEBUG_TYPE "riscv-make-compressible"
78#define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79
80namespace {
81
82struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83 static char ID;
84
85 bool runOnMachineFunction(MachineFunction &Fn) override;
86
87 RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88
89 StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90};
91} // namespace
92
93char RISCVMakeCompressibleOpt::ID = 0;
94INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95 RISCV_COMPRESS_INSTRS_NAME, false, false)
96
97// Return log2(widthInBytes) of load/store done by Opcode.
98static unsigned log2LdstWidth(unsigned Opcode) {
99 switch (Opcode) {
100 default:
101 llvm_unreachable("Unexpected opcode");
102 case RISCV::LBU:
103 case RISCV::SB:
104 return 0;
105 case RISCV::LH:
106 case RISCV::LH_INX:
107 case RISCV::LHU:
108 case RISCV::SH:
109 case RISCV::SH_INX:
110 return 1;
111 case RISCV::LW:
112 case RISCV::LW_INX:
113 case RISCV::SW:
114 case RISCV::SW_INX:
115 case RISCV::FLW:
116 case RISCV::FSW:
117 return 2;
118 case RISCV::LD:
119 case RISCV::LD_RV32:
120 case RISCV::SD:
121 case RISCV::SD_RV32:
122 case RISCV::FLD:
123 case RISCV::FSD:
124 return 3;
125 }
126}
127
128// Return bit field size of immediate operand of Opcode.
129static unsigned offsetMask(unsigned Opcode) {
130 switch (Opcode) {
131 default:
132 llvm_unreachable("Unexpected opcode");
133 case RISCV::LBU:
134 case RISCV::SB:
135 return maskTrailingOnes<unsigned>(N: 2U);
136 case RISCV::LH:
137 case RISCV::LH_INX:
138 case RISCV::LHU:
139 case RISCV::SH:
140 case RISCV::SH_INX:
141 return maskTrailingOnes<unsigned>(N: 1U);
142 case RISCV::LW:
143 case RISCV::LW_INX:
144 case RISCV::SW:
145 case RISCV::SW_INX:
146 case RISCV::FLW:
147 case RISCV::FSW:
148 case RISCV::LD:
149 case RISCV::LD_RV32:
150 case RISCV::SD:
151 case RISCV::SD_RV32:
152 case RISCV::FLD:
153 case RISCV::FSD:
154 return maskTrailingOnes<unsigned>(N: 5U);
155 }
156}
157
158// Return a mask for the offset bits of a non-stack-pointer based compressed
159// load/store.
160static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
161 return offsetMask(Opcode) << log2LdstWidth(Opcode);
162}
163
164// Return true if Offset fits within a compressed stack-pointer based
165// load/store.
166static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
167 // Compressed sp-based loads and stores only work for 32/64 bits.
168 switch (log2LdstWidth(Opcode)) {
169 case 2:
170 return isShiftedUInt<6, 2>(x: Offset);
171 case 3:
172 return isShiftedUInt<6, 3>(x: Offset);
173 }
174 return false;
175}
176
177// Given an offset for a load/store, return the adjustment required to the base
178// register such that the address can be accessed with a compressible offset.
179// This will return 0 if the offset is already compressible.
180static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
181 // Return the excess bits that do not fit in a compressible offset.
182 return Offset & ~compressedLDSTOffsetMask(Opcode);
183}
184
185// Return true if Reg is in a compressed register class.
186static bool isCompressedReg(Register Reg) {
187 return RISCV::GPRCRegClass.contains(Reg) ||
188 RISCV::GPRF16CRegClass.contains(Reg) ||
189 RISCV::GPRF32CRegClass.contains(Reg) ||
190 RISCV::FPR32CRegClass.contains(Reg) ||
191 RISCV::FPR64CRegClass.contains(Reg) ||
192 RISCV::GPRPairCRegClass.contains(Reg);
193}
194
195// Return true if MI is a load for which there exists a compressed version.
196static bool isCompressibleLoad(const MachineInstr &MI) {
197 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
198
199 switch (MI.getOpcode()) {
200 default:
201 return false;
202 case RISCV::LBU:
203 case RISCV::LH:
204 case RISCV::LH_INX:
205 case RISCV::LHU:
206 return STI.hasStdExtZcb();
207 case RISCV::LW:
208 case RISCV::LW_INX:
209 case RISCV::LD:
210 return STI.hasStdExtZca();
211 case RISCV::LD_RV32:
212 return STI.hasStdExtZclsd();
213 case RISCV::FLW:
214 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
215 case RISCV::FLD:
216 return STI.hasStdExtCOrZcd();
217 }
218}
219
220// Return true if MI is a store for which there exists a compressed version.
221static bool isCompressibleStore(const MachineInstr &MI) {
222 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
223
224 switch (MI.getOpcode()) {
225 default:
226 return false;
227 case RISCV::SB:
228 case RISCV::SH:
229 case RISCV::SH_INX:
230 return STI.hasStdExtZcb();
231 case RISCV::SW:
232 case RISCV::SW_INX:
233 case RISCV::SD:
234 return STI.hasStdExtZca();
235 case RISCV::SD_RV32:
236 return STI.hasStdExtZclsd();
237 case RISCV::FSW:
238 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
239 case RISCV::FSD:
240 return STI.hasStdExtCOrZcd();
241 }
242}
243
244// Find a single register and/or large offset which, if compressible, would
245// allow the given instruction to be compressed.
246//
247// Possible return values:
248//
249// {Reg, 0} - Uncompressed Reg needs replacing with a compressed
250// register.
251// {Reg, N} - Reg needs replacing with a compressed register and
252// N needs adding to the new register. (Reg may be
253// compressed or uncompressed).
254// {RISCV::NoRegister, 0} - No suitable optimization found for this
255// instruction.
256static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
257 const unsigned Opcode = MI.getOpcode();
258
259 if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
260 const MachineOperand &MOImm = MI.getOperand(i: 2);
261 if (!MOImm.isImm())
262 return RegImmPair(RISCV::NoRegister, 0);
263
264 int64_t Offset = MOImm.getImm();
265 int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
266 Register Base = MI.getOperand(i: 1).getReg();
267
268 // Memory accesses via the stack pointer do not have a requirement for
269 // either of the registers to be compressible and can take a larger offset.
270 if (RISCV::SPRegClass.contains(Reg: Base)) {
271 if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
272 return RegImmPair(Base, NewBaseAdjust);
273 } else {
274 Register SrcDest = MI.getOperand(i: 0).getReg();
275 bool SrcDestCompressed = isCompressedReg(Reg: SrcDest);
276 bool BaseCompressed = isCompressedReg(Reg: Base);
277
278 // If only Base and/or offset prevent compression, then return Base and
279 // any adjustment required to make the offset compressible.
280 if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
281 return RegImmPair(Base, NewBaseAdjust);
282
283 // For loads, we can only change the base register since dest is defined
284 // rather than used.
285 //
286 // For stores, we can change SrcDest (and Base if SrcDest == Base) but
287 // cannot resolve an incompressible offset in this case.
288 if (isCompressibleStore(MI)) {
289 if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
290 !NewBaseAdjust)
291 return RegImmPair(SrcDest, NewBaseAdjust);
292 }
293 }
294 }
295 return RegImmPair(RISCV::NoRegister, 0);
296}
297
298// Check all uses after FirstMI of the given register, keeping a vector of
299// instructions that would be compressible if the given register (and offset if
300// applicable) were compressible.
301//
302// If there are enough uses for this optimization to improve code size and a
303// compressed register is available, return that compressed register.
304static Register analyzeCompressibleUses(MachineInstr &FirstMI,
305 RegImmPair RegImm,
306 SmallVectorImpl<MachineInstr *> &MIs) {
307 MachineBasicBlock &MBB = *FirstMI.getParent();
308 const TargetRegisterInfo *TRI =
309 MBB.getParent()->getSubtarget().getRegisterInfo();
310
311 for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
312 E = MBB.instr_end();
313 I != E; ++I) {
314 MachineInstr &MI = *I;
315
316 // Determine if this is an instruction which would benefit from using the
317 // new register.
318 RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
319 if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
320 MIs.push_back(Elt: &MI);
321
322 // If RegImm.Reg is modified by this instruction, then we cannot optimize
323 // past this instruction. If the register is already compressed, then it may
324 // possible to optimize a large offset in the current instruction - this
325 // will have been detected by the preceding call to
326 // getRegImmPairPreventingCompression.
327 if (MI.modifiesRegister(Reg: RegImm.Reg, TRI))
328 break;
329 }
330
331 // Adjusting the base costs one new uncompressed addi and therefore three uses
332 // are required for a code size reduction. If no base adjustment is required,
333 // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
334 // the zero register) and therefore two uses are required for a code size
335 // reduction. For GPR pairs, we need 2 ADDIs to copy so we need three users.
336 unsigned CopyCost = RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg) ? 2 : 1;
337 assert((RegImm.Imm == 0 || CopyCost == 1) && "GPRPair should have zero imm");
338 if (MIs.size() <= CopyCost || (RegImm.Imm != 0 && MIs.size() <= 2))
339 return Register();
340
341 // Find a compressible register which will be available from the first
342 // instruction we care about to the last.
343 const TargetRegisterClass *RCToScavenge;
344
345 // Work out the compressed register class from which to scavenge.
346 if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg))
347 RCToScavenge = &RISCV::GPRCRegClass;
348 else if (RISCV::GPRF16RegClass.contains(Reg: RegImm.Reg))
349 RCToScavenge = &RISCV::GPRF16CRegClass;
350 else if (RISCV::GPRF32RegClass.contains(Reg: RegImm.Reg))
351 RCToScavenge = &RISCV::GPRF32CRegClass;
352 else if (RISCV::FPR32RegClass.contains(Reg: RegImm.Reg))
353 RCToScavenge = &RISCV::FPR32CRegClass;
354 else if (RISCV::FPR64RegClass.contains(Reg: RegImm.Reg))
355 RCToScavenge = &RISCV::FPR64CRegClass;
356 else if (RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg))
357 RCToScavenge = &RISCV::GPRPairCRegClass;
358 else
359 return Register();
360
361 RegScavenger RS;
362 RS.enterBasicBlockEnd(MBB);
363 RS.backward(I: std::next(x: MIs.back()->getIterator()));
364 return RS.scavengeRegisterBackwards(RC: *RCToScavenge, To: FirstMI.getIterator(),
365 /*RestoreAfter=*/false, /*SPAdj=*/0,
366 /*AllowSpill=*/false);
367}
368
369// Update uses of the old register in the given instruction to the new register.
370static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
371 Register NewReg) {
372 unsigned Opcode = MI.getOpcode();
373
374 // If this pass is extended to support more instructions, the check for
375 // definedness may need to be strengthened.
376 assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
377 "Unsupported instruction for this optimization.");
378
379 int SkipN = 0;
380
381 // Skip the first (value) operand to a store instruction (except if the store
382 // offset is zero) in order to avoid an incorrect transformation.
383 // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
384 if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
385 SkipN = 1;
386
387 // Update registers
388 for (MachineOperand &MO : drop_begin(RangeOrContainer: MI.operands(), N: SkipN))
389 if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
390 // Do not update operands that define the old register.
391 //
392 // The new register was scavenged for the range of instructions that are
393 // being updated, therefore it should not be defined within this range
394 // except possibly in the final instruction.
395 if (MO.isDef()) {
396 assert(isCompressibleLoad(MI));
397 continue;
398 }
399 // Update reg
400 MO.setReg(NewReg);
401 }
402
403 // Update offset
404 MachineOperand &MOImm = MI.getOperand(i: 2);
405 int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
406 MOImm.setImm(NewOffset);
407}
408
409bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
410 // This is a size optimization.
411 if (skipFunction(F: Fn.getFunction()) || !Fn.getFunction().hasMinSize())
412 return false;
413
414 const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
415 const RISCVInstrInfo &TII = *STI.getInstrInfo();
416
417 // This optimization only makes sense if compressed instructions are emitted.
418 if (!STI.hasStdExtZca())
419 return false;
420
421 for (MachineBasicBlock &MBB : Fn) {
422 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
423 for (MachineInstr &MI : MBB) {
424 // Determine if this instruction would otherwise be compressed if not for
425 // an incompressible register or offset.
426 RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
427 if (!RegImm.Reg && RegImm.Imm == 0)
428 continue;
429
430 // Determine if there is a set of instructions for which replacing this
431 // register with a compressed register (and compressible offset if
432 // applicable) is possible and will allow compression.
433 SmallVector<MachineInstr *, 8> MIs;
434 Register NewReg = analyzeCompressibleUses(FirstMI&: MI, RegImm, MIs);
435 if (!NewReg)
436 continue;
437
438 // Create the appropriate copy and/or offset.
439 if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg)) {
440 assert(isInt<12>(RegImm.Imm));
441 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII.get(Opcode: RISCV::ADDI), DestReg: NewReg)
442 .addReg(RegNo: RegImm.Reg)
443 .addImm(Val: RegImm.Imm);
444 } else {
445 assert(RegImm.Imm == 0);
446 TII.copyPhysReg(MBB, MBBI: MI, DL: MI.getDebugLoc(), DstReg: NewReg, SrcReg: RegImm.Reg,
447 /*KillSrc*/ false);
448 }
449
450 // Update the set of instructions to use the compressed register and
451 // compressible offset instead. These instructions should now be
452 // compressible.
453 // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
454 // expected to become compressible.
455 for (MachineInstr *UpdateMI : MIs)
456 updateOperands(MI&: *UpdateMI, OldRegImm: RegImm, NewReg);
457 }
458 }
459 return true;
460}
461
462/// Returns an instance of the Make Compressible Optimization pass.
463FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
464 return new RISCVMakeCompressibleOpt();
465}
466