AArch64SRLTDefineSuperRegs.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp]

1	//===- AArch64SRLTDefineSuperRegs.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// When SubRegister Liveness Tracking (SRLT) is enabled, this pass adds
10	// extra implicit-def's to instructions that define the low N bits of
11	// a GPR/FPR register to also define the top bits, because all AArch64
12	// instructions that write the low bits of a GPR/FPR also implicitly zero
13	// the top bits. For example, 'mov w0, w1' writes zeroes to the top 32-bits of
14	// x0, so this pass adds a `implicit-def $x0` after register allocation.
15	//
16	// These semantics are originally represented in the MIR using `SUBREG_TO_REG`
17	// which expresses that the top bits have been defined by the preceding
18	// instructions, but during register coalescing this information is lost and in
19	// contrast to when SRTL is disabled, when rewriting virtual -> physical
20	// registers the implicit-defs are not added to the instruction.
21	//
22	// There have been several attempts to fix this in the coalescer [1], but each
23	// iteration has exposed new bugs and the patch had to be reverted.
24	// Additionally, the concept of adding 'implicit-def' of a virtual register is
25	// particularly fragile and many places don't expect it (for example in
26	// `X86::commuteInstructionImpl` the code only looks at specific operands and
27	// does not consider implicit-defs. Similar in `SplitEditor::addDeadDef` where
28	// it traverses operand 'defs' rather than 'all_defs').
29	//
30	// We want a temporary solution that doesn't impact other targets and is simpler
31	// and less intrusive than the patch proposed for the register coalescer [1], so
32	// that we can enable SRLT for AArch64.
33	//
34	// The approach here is to just add the 'implicit-def' manually after rewriting
35	// virtual regs -> physical regs. This still means that during the register
36	// allocation process the dependences are not accurately represented in the MIR
37	// and LiveIntervals, but there are several reasons why we believe this isn't a
38	// problem in practice:
39	// (A) The register allocator only spills entire virtual registers.
40	// This is additionally guarded by code in
41	// AArch64InstrInfo::storeRegToStackSlot/loadRegFromStackSlot
42	// where it checks if a register matches the expected register class.
43	// (B) Rematerialization only happens when the instruction writes the full
44	// register.
45	// (C) The high bits of the AArch64 register cannot be written independently.
46	// (D) Instructions that write only part of a register always take that same
47	// register as a tied input operand, to indicate it's a merging operation.
48	//
49	// (A) means that for two virtual registers of regclass GPR32 and GPR64, if the
50	// GPR32 register is coalesced into the GPR64 vreg then the full GPR64 would
51	// be spilled/filled even if only the low 32-bits would be required for the
52	// given liverange. (B) means that the top bits of a GPR64 would never be
53	// overwritten by rematerialising a GPR32 sub-register for a given liverange.
54	// (C-D) means that we can assume that the MIR as input to the register
55	// allocator correctly expresses the instruction behaviour and dependences
56	// between values, so unless the register allocator would violate (A) or (B),
57	// the MIR is otherwise sound.
58	//
59	// Alternative approaches have also been considered, such as:
60	// (1) Changing the AArch64 instruction definitions to write all bits and
61	// extract the low N bits for the result.
62	// (2) Disabling coalescing of SUBREG_TO_REG and using regalloc hints to tell
63	// the register allocator to favour the same register for the input/output.
64	// (3) Adding a new coalescer guard node with a tied-operand constraint, such
65	// that when the SUBREG_TO_REG is removed, something still represents that
66	// the top bits are defined. The node would get removed before rewriting
67	// virtregs.
68	// (4) Using an explicit INSERT_SUBREG into a zero value and try to optimize
69	// away the INSERT_SUBREG (this is a more explicit variant of (2) and (3))
70	// (5) Adding a new MachineOperand flag that represents the top bits would be
71	// defined, but are not read nor undef.
72	//
73	// (1) would be the best approach but would be a significant effort as it
74	// requires rewriting most/all instruction definitions and fixing MIR passes
75	// that rely on the current definitions, whereas (2-4) result in sub-optimal
76	// code that can't really be avoided because the explicit nodes would stop
77	// rematerialization. (5) might be a way to mitigate the
78	// fragility of implicit-def's of virtual registers if we want to pursue
79	// landing [1], but then we'd rather choose approach (1) to avoid using
80	// SUBREG_TO_REG entirely.
81	//
82	// [1] https://github.com/llvm/llvm-project/pull/168353
83	//===----------------------------------------------------------------------===//
84
85	#include "AArch64InstrInfo.h"
86	#include "AArch64MachineFunctionInfo.h"
87	#include "AArch64Subtarget.h"
88	#include "MCTargetDesc/AArch64AddressingModes.h"
89	#include "llvm/ADT/BitVector.h"
90	#include "llvm/ADT/SmallSet.h"
91	#include "llvm/CodeGen/MachineBasicBlock.h"
92	#include "llvm/CodeGen/MachineFunctionPass.h"
93	#include "llvm/CodeGen/MachineRegisterInfo.h"
94	#include "llvm/CodeGen/TargetRegisterInfo.h"
95	#include "llvm/Support/Debug.h"
96
97	using namespace llvm;
98
99	#define DEBUG_TYPE "aarch64-srlt-define-superregs"
100	#define PASS_NAME "AArch64 SRLT Define Super-Regs Pass"
101
102	namespace {
103
104	struct AArch64SRLTDefineSuperRegs : public MachineFunctionPass {
105	inline static char ID = `0`;
106
107	AArch64SRLTDefineSuperRegs() : MachineFunctionPass (ID) {}
108
109	bool runOnMachineFunction(MachineFunction &MF) override;
110
111	Register getWidestSuperReg(Register R, const BitVector &RequiredBaseRegUnits,
112	const BitVector &QHiRegUnits);
113
114	StringRef getPassName() const override { return PASS_NAME; }
115
116	void getAnalysisUsage(AnalysisUsage &AU) const override {
117	AU.setPreservesCFG();
118	AU.addPreservedID(ID&: MachineLoopInfoID);
119	AU.addPreservedID(ID&: MachineDominatorsID);
120	MachineFunctionPass::getAnalysisUsage(AU);
121	}
122
123	private:
124	MachineFunction MF = nullptr*;
125	const AArch64Subtarget Subtarget = nullptr*;
126	const AArch64RegisterInfo TRI = nullptr*;
127	};
128
129	} // end anonymous namespace
130
131	INITIALIZE_PASS(AArch64SRLTDefineSuperRegs, DEBUG_TYPE, PASS_NAME, false, false)
132
133	// Returns the widest super-reg for a given reg, or NoRegister if no suitable
134	// wider super-reg has been found. For example:
135	// W0 -> X0
136	// B1 -> Q1 (without SVE)
137	// -> Z1 (with SVE)
138	// W1_W2 -> X1_X2
139	// D0_D1 -> Q0_Q1 (without SVE)
140	// -> Z0_Z1 (with SVE)
141	Register AArch64SRLTDefineSuperRegs::getWidestSuperReg(
142	Register R, const BitVector &RequiredBaseRegUnits,
143	const BitVector &QHiRegUnits) {
144	assert(R.isPhysical() &&
145	"Expected to be run straight after virtregrewriter!");
146
147	BitVector Units(TRI->getNumRegUnits());
148	for (MCRegUnit U : TRI->regunits(Reg: R))
149	Units.set((unsigned)U);
150
151	auto IsSuitableSuperReg = [&](Register SR) {
152	for (MCRegUnit U : TRI->regunits(Reg: SR)) {
153	// Avoid choosing z1 as super-reg of d1 if SVE is not available.
154	// Q_HI registers are only set for SVE registers, as those consist*
155	// of the Q register for the low 128 bits and the Q_HI (artificial)
156	// register for the top (vscale-1) 128 bits.*
157	if (QHiRegUnits.test(Idx: (unsigned)U) &&
158	!Subtarget->isSVEorStreamingSVEAvailable())
159	return false;
160	// We consider a super-reg as unsuitable if any of its reg units is not
161	// artificial and not shared, as that would imply that U is a unit for a
162	// different register, which means the candidate super-reg is likely
163	// a register tuple.
164	if (!TRI->isArtificialRegUnit(Unit: U) &&
165	(!Units.test(Idx: (unsigned)U) \|\| !RequiredBaseRegUnits.test(Idx: (unsigned)U)))
166	return false;
167	}
168	return true;
169	};
170
171	Register LargestSuperReg = AArch64::NoRegister;
172	for (Register SR : TRI->superregs(Reg: R))
173	if (IsSuitableSuperReg (SR) && (LargestSuperReg == AArch64::NoRegister \|\|
174	TRI->isSuperRegister(RegA: LargestSuperReg, RegB: SR)))
175	LargestSuperReg = SR;
176
177	return LargestSuperReg;
178	}
179
180	bool AArch64SRLTDefineSuperRegs::runOnMachineFunction(MachineFunction &MF) {
181	this->MF = &MF;
182	Subtarget = &MF.getSubtarget<AArch64Subtarget>();
183	TRI = Subtarget->getRegisterInfo();
184	const MachineRegisterInfo *MRI = &MF.getRegInfo();
185
186	if (!MRI->subRegLivenessEnabled())
187	return false;
188
189	assert(!MRI->isSSA() && "Expected to be run after breaking down SSA form!");
190
191	auto XRegs = seq_inclusive<unsigned>(Begin: AArch64::X0, End: AArch64::X28);
192	auto ZRegs = seq_inclusive<unsigned>(Begin: AArch64::Z0, End: AArch64::Z31);
193	constexpr unsigned FixedRegs[] = {AArch64::FP, AArch64::LR, AArch64::SP};
194
195	BitVector RequiredBaseRegUnits(TRI->getNumRegUnits());
196	for (Register R : concat<unsigned>(Ranges&: XRegs, Ranges&: ZRegs, Ranges: FixedRegs))
197	for (MCRegUnit U : TRI->regunits(Reg: R))
198	RequiredBaseRegUnits.set((unsigned)U);
199
200	BitVector QHiRegUnits(TRI->getNumRegUnits());
201	for (Register R : seq_inclusive<unsigned>(Begin: AArch64::Q0_HI, End: AArch64::Q31_HI))
202	for (MCRegUnit U : TRI->regunits(Reg: R))
203	QHiRegUnits.set((unsigned)U);
204
205	bool Changed = false;
206	for (MachineBasicBlock &MBB : MF) {
207	for (MachineInstr &MI : MBB) {
208	// PATCHPOINT may have a 'def' that's not a register, avoid this.
209	if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
210	continue;
211	// For each partial register write, also add an implicit-def for top bits
212	// of the register (e.g. for w0 add a def of x0).
213	SmallSet<Register, `8`> SuperRegs;
214	for (const MachineOperand &DefOp : MI.defs())
215	if (Register R = getWidestSuperReg(R: DefOp.getReg(), RequiredBaseRegUnits,
216	QHiRegUnits);
217	R != AArch64::NoRegister)
218	SuperRegs.insert(V: R);
219
220	if (!SuperRegs.size())
221	continue;
222
223	LLVM_DEBUG(dbgs() << "Adding implicit-defs to: " << MI);
224	for (Register R : SuperRegs) {
225	LLVM_DEBUG(dbgs() << " " << printReg(R, TRI) << "\n");
226	bool IsRenamable = any_of(Range: MI.defs(), P: [&](const MachineOperand &MO) {
227	return MO.isRenamable() && TRI->regsOverlap(RegA: MO.getReg(), RegB: R);
228	});
229	bool IsDead = any_of(Range: MI.defs(), P: [&](const MachineOperand &MO) {
230	return MO.isDead() && TRI->regsOverlap(RegA: MO.getReg(), RegB: R);
231	});
232	MachineOperand DefOp = MachineOperand::CreateReg(
233	Reg: R, /isDef=/true, /isImp=/true, /isKill=/false,
234	/isDead=/IsDead, /isUndef=/false, /isEarlyClobber=/false,
235	/SubReg=/`0`, /isDebug=/false, /isInternalRead=/false,
236	/isRenamable=/IsRenamable);
237	MI.addOperand(Op: DefOp);
238	}
239	Changed = true;
240	}
241	}
242
243	return Changed;
244	}
245
246	FunctionPass *llvm::createAArch64SRLTDefineSuperRegsPass() {
247	return new AArch64SRLTDefineSuperRegs ();
248	}
249

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp