RISCVMakeCompressible.cpp source code [llvm_projects/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp]

1	//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass searches for instructions that are prevented from being compressed
10	// by one of the following:
11	//
12	// 1. The use of a single uncompressed register.
13	// 2. A base register + offset where the offset is too large to be compressed
14	// and the base register may or may not be compressed.
15	//
16	//
17	// For case 1, if a compressed register is available, then the uncompressed
18	// register is copied to the compressed register and its uses are replaced.
19	//
20	// For example, storing zero uses the incompressible zero register:
21	// sw zero, 0(a0) # if zero
22	// sw zero, 8(a0) # if zero
23	// sw zero, 4(a0) # if zero
24	// sw zero, 24(a0) # if zero
25	//
26	// If a compressed register (e.g. a1) is available, the above can be transformed
27	// to the following to improve code size:
28	// li a1, 0
29	// c.sw a1, 0(a0)
30	// c.sw a1, 8(a0)
31	// c.sw a1, 4(a0)
32	// c.sw a1, 24(a0)
33	//
34	//
35	// For case 2, if a compressed register is available, then the original base
36	// is copied and adjusted such that:
37	//
38	// new_base_register = base_register + adjustment
39	// base_register + large_offset = new_base_register + small_offset
40	//
41	// For example, the following offsets are too large for c.sw:
42	// lui a2, 983065
43	// sw a1, -236(a2)
44	// sw a1, -240(a2)
45	// sw a1, -244(a2)
46	// sw a1, -248(a2)
47	// sw a1, -252(a2)
48	// sw a0, -256(a2)
49	//
50	// If a compressed register is available (e.g. a3), a new base could be created
51	// such that the addresses can accessed with a compressible offset, thus
52	// improving code size:
53	// lui a2, 983065
54	// addi a3, a2, -256
55	// c.sw a1, 20(a3)
56	// c.sw a1, 16(a3)
57	// c.sw a1, 12(a3)
58	// c.sw a1, 8(a3)
59	// c.sw a1, 4(a3)
60	// c.sw a0, 0(a3)
61	//
62	//
63	// This optimization is only applied if there are enough uses of the copied
64	// register for code size to be reduced.
65	//
66	//===----------------------------------------------------------------------===//
67
68	#include "RISCV.h"
69	#include "RISCVSubtarget.h"
70	#include "llvm/CodeGen/Passes.h"
71	#include "llvm/CodeGen/RegisterScavenging.h"
72	#include "llvm/MC/TargetRegistry.h"
73	#include "llvm/Support/Debug.h"
74
75	using namespace llvm;
76
77	#define DEBUG_TYPE "riscv-make-compressible"
78	#define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79
80	namespace {
81
82	struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83	static char ID;
84
85	bool runOnMachineFunction(MachineFunction &Fn) override;
86
87	RISCVMakeCompressibleOpt() : MachineFunctionPass (ID) {}
88
89	StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90	};
91	} // namespace
92
93	char RISCVMakeCompressibleOpt::ID = `0`;
94	INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95	RISCV_COMPRESS_INSTRS_NAME, false, false)
96
97	// Return log2(widthInBytes) of load/store done by Opcode.
98	static unsigned log2LdstWidth(unsigned Opcode) {
99	switch (Opcode) {
100	default:
101	llvm_unreachable("Unexpected opcode");
102	case RISCV::LBU:
103	case RISCV::SB:
104	return `0`;
105	case RISCV::LH:
106	case RISCV::LH_INX:
107	case RISCV::LHU:
108	case RISCV::SH:
109	case RISCV::SH_INX:
110	return `1`;
111	case RISCV::LW:
112	case RISCV::LW_INX:
113	case RISCV::SW:
114	case RISCV::SW_INX:
115	case RISCV::FLW:
116	case RISCV::FSW:
117	return `2`;
118	case RISCV::LD:
119	case RISCV::LD_RV32:
120	case RISCV::SD:
121	case RISCV::SD_RV32:
122	case RISCV::FLD:
123	case RISCV::FSD:
124	return `3`;
125	}
126	}
127
128	// Return bit field size of immediate operand of Opcode.
129	static unsigned offsetMask(unsigned Opcode) {
130	switch (Opcode) {
131	default:
132	llvm_unreachable("Unexpected opcode");
133	case RISCV::LBU:
134	case RISCV::SB:
135	return maskTrailingOnes<unsigned>(N: `2U`);
136	case RISCV::LH:
137	case RISCV::LH_INX:
138	case RISCV::LHU:
139	case RISCV::SH:
140	case RISCV::SH_INX:
141	return maskTrailingOnes<unsigned>(N: `1U`);
142	case RISCV::LW:
143	case RISCV::LW_INX:
144	case RISCV::SW:
145	case RISCV::SW_INX:
146	case RISCV::FLW:
147	case RISCV::FSW:
148	case RISCV::LD:
149	case RISCV::LD_RV32:
150	case RISCV::SD:
151	case RISCV::SD_RV32:
152	case RISCV::FLD:
153	case RISCV::FSD:
154	return maskTrailingOnes<unsigned>(N: `5U`);
155	}
156	}
157
158	// Return a mask for the offset bits of a non-stack-pointer based compressed
159	// load/store.
160	static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
161	return offsetMask(Opcode) << log2LdstWidth(Opcode);
162	}
163
164	// Return true if Offset fits within a compressed stack-pointer based
165	// load/store.
166	static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
167	// Compressed sp-based loads and stores only work for 32/64 bits.
168	switch (log2LdstWidth(Opcode)) {
169	case `2`:
170	return isShiftedUInt<`6`, `2`>(x: Offset);
171	case `3`:
172	return isShiftedUInt<`6`, `3`>(x: Offset);
173	}
174	return false;
175	}
176
177	// Given an offset for a load/store, return the adjustment required to the base
178	// register such that the address can be accessed with a compressible offset.
179	// This will return 0 if the offset is already compressible.
180	static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
181	// Return the excess bits that do not fit in a compressible offset.
182	return Offset & ~compressedLDSTOffsetMask(Opcode);
183	}
184
185	// Return true if Reg is in a compressed register class.
186	static bool isCompressedReg(Register Reg) {
187	return RISCV::GPRCRegClass.contains(Reg) \|\|
188	RISCV::GPRF16CRegClass.contains(Reg) \|\|
189	RISCV::GPRF32CRegClass.contains(Reg) \|\|
190	RISCV::FPR32CRegClass.contains(Reg) \|\|
191	RISCV::FPR64CRegClass.contains(Reg) \|\|
192	RISCV::GPRPairCRegClass.contains(Reg);
193	}
194
195	// Return true if MI is a load for which there exists a compressed version.
196	static bool isCompressibleLoad(const MachineInstr &MI) {
197	const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
198
199	switch (MI.getOpcode()) {
200	default:
201	return false;
202	case RISCV::LBU:
203	case RISCV::LH:
204	case RISCV::LH_INX:
205	case RISCV::LHU:
206	return STI.hasStdExtZcb();
207	case RISCV::LW:
208	case RISCV::LW_INX:
209	case RISCV::LD:
210	return STI.hasStdExtZca();
211	case RISCV::LD_RV32:
212	return STI.hasStdExtZclsd();
213	case RISCV::FLW:
214	return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
215	case RISCV::FLD:
216	return STI.hasStdExtCOrZcd();
217	}
218	}
219
220	// Return true if MI is a store for which there exists a compressed version.
221	static bool isCompressibleStore(const MachineInstr &MI) {
222	const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
223
224	switch (MI.getOpcode()) {
225	default:
226	return false;
227	case RISCV::SB:
228	case RISCV::SH:
229	case RISCV::SH_INX:
230	return STI.hasStdExtZcb();
231	case RISCV::SW:
232	case RISCV::SW_INX:
233	case RISCV::SD:
234	return STI.hasStdExtZca();
235	case RISCV::SD_RV32:
236	return STI.hasStdExtZclsd();
237	case RISCV::FSW:
238	return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
239	case RISCV::FSD:
240	return STI.hasStdExtCOrZcd();
241	}
242	}
243
244	// Find a single register and/or large offset which, if compressible, would
245	// allow the given instruction to be compressed.
246	//
247	// Possible return values:
248	//
249	// {Reg, 0} - Uncompressed Reg needs replacing with a compressed
250	// register.
251	// {Reg, N} - Reg needs replacing with a compressed register and
252	// N needs adding to the new register. (Reg may be
253	// compressed or uncompressed).
254	// {RISCV::NoRegister, 0} - No suitable optimization found for this
255	// instruction.
256	static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
257	const unsigned Opcode = MI.getOpcode();
258
259	if (isCompressibleLoad(MI) \|\| isCompressibleStore(MI)) {
260	const MachineOperand &MOImm = MI.getOperand(i: `2`);
261	if (!MOImm.isImm())
262	return RegImmPair (RISCV::NoRegister, `0`);
263
264	int64_t Offset = MOImm.getImm();
265	int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
266	Register Base = MI.getOperand(i: `1`).getReg();
267
268	// Memory accesses via the stack pointer do not have a requirement for
269	// either of the registers to be compressible and can take a larger offset.
270	if (RISCV::SPRegClass.contains(Reg: Base)) {
271	if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
272	return RegImmPair (Base, NewBaseAdjust);
273	} else {
274	Register SrcDest = MI.getOperand(i: `0`).getReg();
275	bool SrcDestCompressed = isCompressedReg(Reg: SrcDest);
276	bool BaseCompressed = isCompressedReg(Reg: Base);
277
278	// If only Base and/or offset prevent compression, then return Base and
279	// any adjustment required to make the offset compressible.
280	if ((!BaseCompressed \|\| NewBaseAdjust) && SrcDestCompressed)
281	return RegImmPair (Base, NewBaseAdjust);
282
283	// For loads, we can only change the base register since dest is defined
284	// rather than used.
285	//
286	// For stores, we can change SrcDest (and Base if SrcDest == Base) but
287	// cannot resolve an incompressible offset in this case.
288	if (isCompressibleStore(MI)) {
289	if (!SrcDestCompressed && (BaseCompressed \|\| SrcDest == Base) &&
290	!NewBaseAdjust)
291	return RegImmPair (SrcDest, NewBaseAdjust);
292	}
293	}
294	}
295	return RegImmPair (RISCV::NoRegister, `0`);
296	}
297
298	// Check all uses after FirstMI of the given register, keeping a vector of
299	// instructions that would be compressible if the given register (and offset if
300	// applicable) were compressible.
301	//
302	// If there are enough uses for this optimization to improve code size and a
303	// compressed register is available, return that compressed register.
304	static Register analyzeCompressibleUses(MachineInstr &FirstMI,
305	RegImmPair RegImm,
306	SmallVectorImpl<MachineInstr *> &MIs) {
307	MachineBasicBlock &MBB = *FirstMI.getParent();
308	const TargetRegisterInfo *TRI =
309	MBB.getParent()->getSubtarget().getRegisterInfo();
310
311	for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
312	E = MBB.instr_end();
313	I != E; ++I) {
314	MachineInstr &MI = *I;
315
316	// Determine if this is an instruction which would benefit from using the
317	// new register.
318	RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
319	if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
320	MIs.push_back(Elt: &MI);
321
322	// If RegImm.Reg is modified by this instruction, then we cannot optimize
323	// past this instruction. If the register is already compressed, then it may
324	// possible to optimize a large offset in the current instruction - this
325	// will have been detected by the preceding call to
326	// getRegImmPairPreventingCompression.
327	if (MI.modifiesRegister(Reg: RegImm.Reg, TRI))
328	break;
329	}
330
331	// Adjusting the base costs one new uncompressed addi and therefore three uses
332	// are required for a code size reduction. If no base adjustment is required,
333	// then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
334	// the zero register) and therefore two uses are required for a code size
335	// reduction. For GPR pairs, we need 2 ADDIs to copy so we need three users.
336	unsigned CopyCost = RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg) ? `2` : `1`;
337	assert((RegImm.Imm == `0` \|\| CopyCost == `1`) && "GPRPair should have zero imm");
338	if (MIs.size() <= CopyCost \|\| (RegImm.Imm != `0` && MIs.size() <= `2`))
339	return Register ();
340
341	// Find a compressible register which will be available from the first
342	// instruction we care about to the last.
343	const TargetRegisterClass *RCToScavenge;
344
345	// Work out the compressed register class from which to scavenge.
346	if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg))
347	RCToScavenge = &RISCV::GPRCRegClass;
348	else if (RISCV::GPRF16RegClass.contains(Reg: RegImm.Reg))
349	RCToScavenge = &RISCV::GPRF16CRegClass;
350	else if (RISCV::GPRF32RegClass.contains(Reg: RegImm.Reg))
351	RCToScavenge = &RISCV::GPRF32CRegClass;
352	else if (RISCV::FPR32RegClass.contains(Reg: RegImm.Reg))
353	RCToScavenge = &RISCV::FPR32CRegClass;
354	else if (RISCV::FPR64RegClass.contains(Reg: RegImm.Reg))
355	RCToScavenge = &RISCV::FPR64CRegClass;
356	else if (RISCV::GPRPairRegClass.contains(Reg: RegImm.Reg))
357	RCToScavenge = &RISCV::GPRPairCRegClass;
358	else
359	return Register ();
360
361	RegScavenger RS;
362	RS.enterBasicBlockEnd(MBB);
363	RS.backward(I: std::next(x: MIs.back()->getIterator()));
364	return RS.scavengeRegisterBackwards(RC: *RCToScavenge, To: FirstMI.getIterator(),
365	/RestoreAfter=/false, /SPAdj=/`0`,
366	/AllowSpill=/false);
367	}
368
369	// Update uses of the old register in the given instruction to the new register.
370	static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
371	Register NewReg) {
372	unsigned Opcode = MI.getOpcode();
373
374	// If this pass is extended to support more instructions, the check for
375	// definedness may need to be strengthened.
376	assert((isCompressibleLoad(MI) \|\| isCompressibleStore(MI)) &&
377	"Unsupported instruction for this optimization.");
378
379	int SkipN = `0`;
380
381	// Skip the first (value) operand to a store instruction (except if the store
382	// offset is zero) in order to avoid an incorrect transformation.
383	// e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
384	if (isCompressibleStore(MI) && OldRegImm.Imm != `0`)
385	SkipN = `1`;
386
387	// Update registers
388	for (MachineOperand &MO : drop_begin(RangeOrContainer: MI.operands(), N: SkipN))
389	if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
390	// Do not update operands that define the old register.
391	//
392	// The new register was scavenged for the range of instructions that are
393	// being updated, therefore it should not be defined within this range
394	// except possibly in the final instruction.
395	if (MO.isDef()) {
396	assert(isCompressibleLoad(MI));
397	continue;
398	}
399	// Update reg
400	MO.setReg(NewReg);
401	}
402
403	// Update offset
404	MachineOperand &MOImm = MI.getOperand(i: `2`);
405	int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
406	MOImm.setImm(NewOffset);
407	}
408
409	bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
410	// This is a size optimization.
411	if (skipFunction(F: Fn.getFunction()) \|\| !Fn.getFunction().hasMinSize())
412	return false;
413
414	const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
415	const RISCVInstrInfo &TII = *STI.getInstrInfo();
416
417	// This optimization only makes sense if compressed instructions are emitted.
418	if (!STI.hasStdExtZca())
419	return false;
420
421	for (MachineBasicBlock &MBB : Fn) {
422	LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
423	for (MachineInstr &MI : MBB) {
424	// Determine if this instruction would otherwise be compressed if not for
425	// an incompressible register or offset.
426	RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
427	if (!RegImm.Reg && RegImm.Imm == `0`)
428	continue;
429
430	// Determine if there is a set of instructions for which replacing this
431	// register with a compressed register (and compressible offset if
432	// applicable) is possible and will allow compression.
433	SmallVector<MachineInstr *, `8`> MIs;
434	Register NewReg = analyzeCompressibleUses(FirstMI&: MI, RegImm, MIs);
435	if (!NewReg)
436	continue;
437
438	// Create the appropriate copy and/or offset.
439	if (RISCV::GPRRegClass.contains(Reg: RegImm.Reg)) {
440	assert(isInt<`12`>(RegImm.Imm));
441	BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII.get(Opcode: RISCV::ADDI), DestReg: NewReg)
442	.addReg(RegNo: RegImm.Reg)
443	.addImm(Val: RegImm.Imm);
444	} else {
445	assert(RegImm.Imm == `0`);
446	TII.copyPhysReg(MBB, MBBI: MI, DL: MI.getDebugLoc(), DstReg: NewReg, SrcReg: RegImm.Reg,
447	/KillSrc/ false);
448	}
449
450	// Update the set of instructions to use the compressed register and
451	// compressible offset instead. These instructions should now be
452	// compressible.
453	// TODO: Update all uses if RegImm.Imm == 0? Not just those that are
454	// expected to become compressible.
455	for (MachineInstr *UpdateMI : MIs)
456	updateOperands(MI&: *UpdateMI, OldRegImm: RegImm, NewReg);
457	}
458	}
459	return true;
460	}
461
462	/// Returns an instance of the Make Compressible Optimization pass.
463	FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
464	return new RISCVMakeCompressibleOpt ();
465	}
466

Browse the source code of llvm_projects/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp