SILowerSGPRSpills.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp]

1	//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10	// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11	//
12	// This pass must never create new SGPR virtual registers.
13	//
14	// FIXME: Must stop RegScavenger spills in later passes.
15	//
16	//===----------------------------------------------------------------------===//
17
18	#include "SILowerSGPRSpills.h"
19	#include "AMDGPU.h"
20	#include "GCNSubtarget.h"
21	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22	#include "SIMachineFunctionInfo.h"
23	#include "llvm/CodeGen/LiveIntervals.h"
24	#include "llvm/CodeGen/MachineDominators.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/RegisterScavenging.h"
27
28	using namespace llvm;
29
30	#define DEBUG_TYPE "si-lower-sgpr-spills"
31
32	using MBBVector = SmallVector<MachineBasicBlock *, `4`>;
33
34	namespace {
35
36	static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
37	"amdgpu-num-vgprs-for-wwm-alloc",
38	cl::desc ("Max num VGPRs for whole-wave register allocation."),
39	cl::ReallyHidden, cl::init(Val: `10`));
40
41	class SILowerSGPRSpills {
42	private:
43	const SIRegisterInfo TRI = nullptr*;
44	const SIInstrInfo TII = nullptr*;
45	LiveIntervals LIS = nullptr*;
46	SlotIndexes Indexes = nullptr*;
47	MachineDominatorTree MDT = nullptr*;
48
49	// Save and Restore blocks of the current function. Typically there is a
50	// single save block, unless Windows EH funclets are involved.
51	MBBVector SaveBlocks;
52	MBBVector RestoreBlocks;
53
54	public:
55	SILowerSGPRSpills(LiveIntervals LIS, SlotIndexes Indexes,
56	MachineDominatorTree *MDT)
57	: LIS(LIS), Indexes(Indexes), MDT(MDT) {}
58	bool run(MachineFunction &MF);
59	void calculateSaveRestoreBlocks(MachineFunction &MF);
60	bool spillCalleeSavedRegs(MachineFunction &MF,
61	SmallVectorImpl<int> &CalleeSavedFIs);
62	void updateLaneVGPRDomInstr(
63	int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
64	DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
65	void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
66	};
67
68	class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
69	public:
70	static char ID;
71
72	SILowerSGPRSpillsLegacy() : MachineFunctionPass (ID) {}
73
74	bool runOnMachineFunction(MachineFunction &MF) override;
75
76	void getAnalysisUsage(AnalysisUsage &AU) const override {
77	AU.addRequired<MachineDominatorTreeWrapperPass>();
78	AU.setPreservesAll();
79	MachineFunctionPass::getAnalysisUsage(AU);
80	}
81
82	MachineFunctionProperties getClearedProperties() const override {
83	// SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
84	return MachineFunctionProperties ().setIsSSA().setNoVRegs();
85	}
86	};
87
88	} // end anonymous namespace
89
90	char SILowerSGPRSpillsLegacy::ID = `0`;
91
92	INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
93	"SI lower SGPR spill instructions", false, false)
94	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
95	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
96	INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
97	INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
98	"SI lower SGPR spill instructions", false, false)
99
100	char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
101
102	static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
103	const TargetRegisterInfo *TRI) {
104	for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
105	if (MBB.isLiveIn(Reg: *R)) {
106	return true;
107	}
108	}
109	return false;
110	}
111
112	/// Insert spill code for the callee-saved registers used in the function.
113	static void insertCSRSaves(MachineBasicBlock &SaveBlock,
114	ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
115	LiveIntervals *LIS) {
116	MachineFunction &MF = *SaveBlock.getParent();
117	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
118	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
119	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
120	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
121	const SIRegisterInfo *RI = ST.getRegisterInfo();
122
123	MachineBasicBlock::iterator I = SaveBlock.begin();
124	if (!TFI->spillCalleeSavedRegisters(MBB&: SaveBlock, MI: I, CSI, TRI)) {
125	for (const CalleeSavedInfo &CS : CSI) {
126	// Insert the spill to the stack frame.
127	MCRegister Reg = CS.getReg();
128
129	MachineInstrSpan MIS(I, &SaveBlock);
130	const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
131	Reg, VT: Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
132
133	// If this value was already livein, we probably have a direct use of the
134	// incoming register value, so don't kill at the spill point. This happens
135	// since we pass some special inputs (workgroup IDs) in the callee saved
136	// range.
137	const bool IsLiveIn = isLiveIntoMBB(Reg, MBB&: SaveBlock, TRI);
138	TII.storeRegToStackSlot(MBB&: SaveBlock, MI: I, SrcReg: Reg, isKill: !IsLiveIn, FrameIndex: CS.getFrameIdx(),
139	RC, TRI, VReg: Register ());
140
141	if (Indexes) {
142	assert(std::distance(MIS.begin(), I) == `1`);
143	MachineInstr &Inst = *std::prev(x: I);
144	Indexes->insertMachineInstrInMaps(MI&: Inst);
145	}
146
147	if (LIS)
148	LIS->removeAllRegUnitsForPhysReg(Reg);
149	}
150	} else {
151	// TFI doesn't update Indexes and LIS, so we have to do it separately.
152	if (Indexes)
153	Indexes->repairIndexesInRange(MBB: &SaveBlock, Begin: SaveBlock.begin(), End: I);
154
155	if (LIS)
156	for (const CalleeSavedInfo &CS : CSI)
157	LIS->removeAllRegUnitsForPhysReg(Reg: CS.getReg());
158	}
159	}
160
161	/// Insert restore code for the callee-saved registers used in the function.
162	static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
163	MutableArrayRef<CalleeSavedInfo> CSI,
164	SlotIndexes Indexes, LiveIntervals LIS) {
165	MachineFunction &MF = *RestoreBlock.getParent();
166	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
167	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
168	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
169	// Restore all registers immediately before the return and any
170	// terminators that precede it.
171	MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
172	const MachineBasicBlock::iterator BeforeRestoresI =
173	I == RestoreBlock.begin() ? I : std::prev(x: I);
174
175	// FIXME: Just emit the readlane/writelane directly
176	if (!TFI->restoreCalleeSavedRegisters(MBB&: RestoreBlock, MI: I, CSI, TRI)) {
177	for (const CalleeSavedInfo &CI : reverse(C&: CSI)) {
178	// Insert in reverse order. loadRegFromStackSlot can insert
179	// multiple instructions.
180	TFI->restoreCalleeSavedRegister(MBB&: RestoreBlock, MI: I, CS: CI, TII: &TII, TRI);
181
182	if (Indexes) {
183	MachineInstr &Inst = *std::prev(x: I);
184	Indexes->insertMachineInstrInMaps(MI&: Inst);
185	}
186
187	if (LIS)
188	LIS->removeAllRegUnitsForPhysReg(Reg: CI.getReg());
189	}
190	} else {
191	// TFI doesn't update Indexes and LIS, so we have to do it separately.
192	if (Indexes)
193	Indexes->repairIndexesInRange(MBB: &RestoreBlock, Begin: BeforeRestoresI,
194	End: RestoreBlock.getFirstTerminator());
195
196	if (LIS)
197	for (const CalleeSavedInfo &CS : CSI)
198	LIS->removeAllRegUnitsForPhysReg(Reg: CS.getReg());
199	}
200	}
201
202	/// Compute the sets of entry and return blocks for saving and restoring
203	/// callee-saved registers, and placing prolog and epilog code.
204	void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
205	const MachineFrameInfo &MFI = MF.getFrameInfo();
206
207	// Even when we do not change any CSR, we still want to insert the
208	// prologue and epilogue of the function.
209	// So set the save points for those.
210
211	// Use the points found by shrink-wrapping, if any.
212	if (MFI.getSavePoint()) {
213	SaveBlocks.push_back(Elt: MFI.getSavePoint());
214	assert(MFI.getRestorePoint() && "Both restore and save must be set");
215	MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
216	// If RestoreBlock does not have any successor and is not a return block
217	// then the end point is unreachable and we do not need to insert any
218	// epilogue.
219	if (!RestoreBlock->succ_empty() \|\| RestoreBlock->isReturnBlock())
220	RestoreBlocks.push_back(Elt: RestoreBlock);
221	return;
222	}
223
224	// Save refs to entry and return blocks.
225	SaveBlocks.push_back(Elt: &MF.front());
226	for (MachineBasicBlock &MBB : MF) {
227	if (MBB.isEHFuncletEntry())
228	SaveBlocks.push_back(Elt: &MBB);
229	if (MBB.isReturnBlock())
230	RestoreBlocks.push_back(Elt: &MBB);
231	}
232	}
233
234	// TODO: To support shrink wrapping, this would need to copy
235	// PrologEpilogInserter's updateLiveness.
236	static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
237	MachineBasicBlock &EntryBB = MF.front();
238
239	for (const CalleeSavedInfo &CSIReg : CSI)
240	EntryBB.addLiveIn(PhysReg: CSIReg.getReg());
241	EntryBB.sortUniqueLiveIns();
242	}
243
244	bool SILowerSGPRSpills::spillCalleeSavedRegs(
245	MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
246	MachineRegisterInfo &MRI = MF.getRegInfo();
247	const Function &F = MF.getFunction();
248	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
249	const SIFrameLowering *TFI = ST.getFrameLowering();
250	MachineFrameInfo &MFI = MF.getFrameInfo();
251	RegScavenger RS = nullptr*;
252
253	// Determine which of the registers in the callee save list should be saved.
254	BitVector SavedRegs;
255	TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
256
257	// Add the code to save and restore the callee saved registers.
258	if (!F.hasFnAttribute(Kind: Attribute::Naked)) {
259	// FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
260	// necessary for verifier liveness checks.
261	MFI.setCalleeSavedInfoValid(true);
262
263	std::vector<CalleeSavedInfo> CSI;
264	const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
265
266	for (unsigned I = `0`; CSRegs[I]; ++I) {
267	MCRegister Reg = CSRegs[I];
268
269	if (SavedRegs.test(Idx: Reg)) {
270	const TargetRegisterClass *RC =
271	TRI->getMinimalPhysRegClass(Reg, VT: MVT::i32);
272	int JunkFI = MFI.CreateStackObject(Size: TRI->getSpillSize(RC: *RC),
273	Alignment: TRI->getSpillAlign(RC: RC), isSpillSlot: true*);
274
275	CSI.emplace_back(args&: Reg, args&: JunkFI);
276	CalleeSavedFIs.push_back(Elt: JunkFI);
277	}
278	}
279
280	if (!CSI.empty()) {
281	for (MachineBasicBlock *SaveBlock : SaveBlocks)
282	insertCSRSaves(SaveBlock&: *SaveBlock, CSI, Indexes, LIS);
283
284	// Add live ins to save blocks.
285	assert(SaveBlocks.size() == `1` && "shrink wrapping not fully implemented");
286	updateLiveness(MF, CSI);
287
288	for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
289	insertCSRRestores(RestoreBlock&: *RestoreBlock, CSI, Indexes, LIS);
290	return true;
291	}
292	}
293
294	return false;
295	}
296
297	void SILowerSGPRSpills::updateLaneVGPRDomInstr(
298	int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
299	DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
300	// For the Def of a virtual LaneVPGR to dominate all its uses, we should
301	// insert an IMPLICIT_DEF before the dominating spill. Switching to a
302	// depth first order doesn't really help since the machine function can be in
303	// the unstructured control flow post-SSA. For each virtual register, hence
304	// finding the common dominator to get either the dominating spill or a block
305	// dominating all spills.
306	SIMachineFunctionInfo *FuncInfo =
307	MBB->getParent()->getInfo<SIMachineFunctionInfo>();
308	ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
309	FuncInfo->getSGPRSpillToVirtualVGPRLanes(FrameIndex: FI);
310	Register PrevLaneVGPR;
311	for (auto &Spill : VGPRSpills) {
312	if (PrevLaneVGPR == Spill.VGPR)
313	continue;
314
315	PrevLaneVGPR = Spill.VGPR;
316	auto I = LaneVGPRDomInstr.find(Val: Spill.VGPR);
317	if (Spill.Lane == `0` && I == LaneVGPRDomInstr.end()) {
318	// Initially add the spill instruction itself for Insertion point.
319	LaneVGPRDomInstr [Spill.VGPR] = InsertPt;
320	} else {
321	assert(I != LaneVGPRDomInstr.end());
322	auto PrevInsertPt = I ->second;
323	MachineBasicBlock *DomMBB = PrevInsertPt ->getParent();
324	if (DomMBB == MBB) {
325	// The insertion point earlier selected in a predecessor block whose
326	// spills are currently being lowered. The earlier InsertPt would be
327	// the one just before the block terminator and it should be changed
328	// if we insert any new spill in it.
329	if (MDT->dominates(A: &InsertPt, B: &PrevInsertPt))
330	I ->second = InsertPt;
331
332	continue;
333	}
334
335	// Find the common dominator block between PrevInsertPt and the
336	// current spill.
337	DomMBB = MDT->findNearestCommonDominator(A: DomMBB, B: MBB);
338	if (DomMBB == MBB)
339	I ->second = InsertPt;
340	else if (DomMBB != PrevInsertPt ->getParent())
341	I ->second = &(*DomMBB->getFirstTerminator());
342	}
343	}
344	}
345
346	void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
347	BitVector &RegMask) {
348	// Determine an optimal number of VGPRs for WWM allocation. The complement
349	// list will be available for allocating other VGPR virtual registers.
350	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
351	MachineRegisterInfo &MRI = MF.getRegInfo();
352	BitVector ReservedRegs = TRI->getReservedRegs(MF);
353	BitVector NonWwmAllocMask(TRI->getNumRegs());
354
355	// FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
356	// to have a balanced allocation between WWM values and per-thread vector
357	// register operands.
358	unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
359	NumRegs =
360	std::min(a: static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), b: NumRegs);
361
362	auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
363	// Try to use the highest available registers for now. Later after
364	// vgpr-regalloc, they can be shifted to the lowest range.
365	unsigned I = `0`;
366	for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - `1`;
367	(I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
368	if (!ReservedRegs.test(Idx: Reg) &&
369	!MRI.isPhysRegUsed(PhysReg: Reg, /SkipRegMaskTest=/true)) {
370	TRI->markSuperRegs(RegisterSet&: RegMask, Reg);
371	++I;
372	}
373	}
374
375	if (I != NumRegs) {
376	// Reserve an arbitrary register and report the error.
377	TRI->markSuperRegs(RegisterSet&: RegMask, Reg: AMDGPU::VGPR0);
378	MF.getFunction().getContext().emitError(
379	ErrorStr: "can't find enough VGPRs for wwm-regalloc");
380	}
381	}
382
383	bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
384	auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
385	LiveIntervals LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr*;
386	auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
387	SlotIndexes Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr*;
388	MachineDominatorTree *MDT =
389	&getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
390	return SILowerSGPRSpills (LIS, Indexes, MDT).run(MF);
391	}
392
393	bool SILowerSGPRSpills::run(MachineFunction &MF) {
394	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
395	TII = ST.getInstrInfo();
396	TRI = &TII->getRegisterInfo();
397
398	assert(SaveBlocks.empty() && RestoreBlocks.empty());
399
400	// First, expose any CSR SGPR spills. This is mostly the same as what PEI
401	// does, but somewhat simpler.
402	calculateSaveRestoreBlocks(MF);
403	SmallVector<int> CalleeSavedFIs;
404	bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
405
406	MachineFrameInfo &MFI = MF.getFrameInfo();
407	MachineRegisterInfo &MRI = MF.getRegInfo();
408	SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
409
410	if (!MFI.hasStackObjects() && !HasCSRs) {
411	SaveBlocks.clear();
412	RestoreBlocks.clear();
413	return false;
414	}
415
416	bool MadeChange = false;
417	bool SpilledToVirtVGPRLanes = false;
418
419	// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
420	// handled as SpilledToReg in regular PrologEpilogInserter.
421	const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
422	(HasCSRs \|\| FuncInfo->hasSpilledSGPRs());
423	if (HasSGPRSpillToVGPR) {
424	// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
425	// are spilled to VGPRs, in which case we can eliminate the stack usage.
426	//
427	// This operates under the assumption that only other SGPR spills are users
428	// of the frame index.
429
430	// To track the spill frame indices handled in this pass.
431	BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
432
433	// To track the IMPLICIT_DEF insertion point for the lane vgprs.
434	DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
435
436	for (MachineBasicBlock &MBB : MF) {
437	for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MBB)) {
438	if (!TII->isSGPRSpill(MI))
439	continue;
440
441	if (MI.getOperand(i: `0`).isUndef()) {
442	if (Indexes)
443	Indexes->removeMachineInstrFromMaps(MI);
444	MI.eraseFromParent();
445	continue;
446	}
447
448	int FI = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::addr)->getIndex();
449	assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
450
451	bool IsCalleeSaveSGPRSpill = llvm::is_contained(Range&: CalleeSavedFIs, Element: FI);
452	if (IsCalleeSaveSGPRSpill) {
453	// Spill callee-saved SGPRs into physical VGPR lanes.
454
455	// TODO: This is to ensure the CFIs are static for efficient frame
456	// unwinding in the debugger. Spilling them into virtual VGPR lanes
457	// involve regalloc to allocate the physical VGPRs and that might
458	// cause intermediate spill/split of such liveranges for successful
459	// allocation. This would result in broken CFI encoding unless the
460	// regalloc aware CFI generation to insert new CFIs along with the
461	// intermediate spills is implemented. There is no such support
462	// currently exist in the LLVM compiler.
463	if (FuncInfo->allocateSGPRSpillToVGPRLane(
464	MF, FI, /SpillToPhysVGPRLane=/true)) {
465	bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
466	MI, FI, RS: nullptr, Indexes, LIS, SpillToPhysVGPRLane: true);
467	if (!Spilled)
468	llvm_unreachable(
469	"failed to spill SGPR to physical VGPR lane when allocated");
470	}
471	} else {
472	MachineInstrSpan MIS(&MI, &MBB);
473	if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
474	bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
475	MI, FI, RS: nullptr, Indexes, LIS);
476	if (!Spilled)
477	llvm_unreachable(
478	"failed to spill SGPR to virtual VGPR lane when allocated");
479	SpillFIs.set(FI);
480	updateLaneVGPRDomInstr(FI, MBB: &MBB, InsertPt: MIS.begin(), LaneVGPRDomInstr);
481	SpilledToVirtVGPRLanes = true;
482	}
483	}
484	}
485	}
486
487	for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
488	auto InsertPt = LaneVGPRDomInstr [Reg];
489	// Insert the IMPLICIT_DEF at the identified points.
490	MachineBasicBlock &Block = *InsertPt ->getParent();
491	DebugLoc DL = Block.findDebugLoc(MBBI: InsertPt);
492	auto MIB =
493	BuildMI(BB&: Block, I&: *InsertPt, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::IMPLICIT_DEF), DestReg: Reg);
494
495	// Add WWM flag to the virtual register.
496	FuncInfo->setFlag(Reg, Flag: AMDGPU::VirtRegFlag::WWM_REG);
497
498	// Set SGPR_SPILL asm printer flag
499	MIB ->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
500	if (LIS) {
501	LIS->InsertMachineInstrInMaps(MI&: *MIB);
502	LIS->createAndComputeVirtRegInterval(Reg);
503	}
504	}
505
506	// Determine the registers for WWM allocation and also compute the register
507	// mask for non-wwm VGPR allocation.
508	if (FuncInfo->getSGPRSpillVGPRs().size()) {
509	BitVector WwmRegMask(TRI->getNumRegs());
510
511	determineRegsForWWMAllocation(MF, RegMask&: WwmRegMask);
512
513	BitVector NonWwmRegMask(WwmRegMask);
514	NonWwmRegMask.flip().clearBitsNotInMask(Mask: TRI->getAllVGPRRegMask());
515
516	// The complement set will be the registers for non-wwm (per-thread) vgpr
517	// allocation.
518	FuncInfo->updateNonWWMRegMask(RegMask&: NonWwmRegMask);
519	}
520
521	for (MachineBasicBlock &MBB : MF) {
522	// FIXME: The dead frame indices are replaced with a null register from
523	// the debug value instructions. We should instead, update it with the
524	// correct register value. But not sure the register value alone is
525	// adequate to lower the DIExpression. It should be worked out later.
526	for (MachineInstr &MI : MBB) {
527	if (MI.isDebugValue()) {
528	uint32_t StackOperandIdx = MI.isDebugValueList() ? `2` : `0`;
529	if (MI.getOperand(i: StackOperandIdx).isFI() &&
530	!MFI.isFixedObjectIndex(
531	ObjectIdx: MI.getOperand(i: StackOperandIdx).getIndex()) &&
532	SpillFIs [MI.getOperand(i: StackOperandIdx).getIndex()]) {
533	MI.getOperand(i: StackOperandIdx)
534	.ChangeToRegister(Reg: Register (), isDef: false /isDef/);
535	}
536	}
537	}
538	}
539
540	// All those frame indices which are dead by now should be removed from the
541	// function frame. Otherwise, there is a side effect such as re-mapping of
542	// free frame index ids by the later pass(es) like "stack slot coloring"
543	// which in turn could mess-up with the book keeping of "frame index to VGPR
544	// lane".
545	FuncInfo->removeDeadFrameIndices(MFI, /ResetSGPRSpillStackIDs/ false);
546
547	MadeChange = true;
548	}
549
550	if (SpilledToVirtVGPRLanes) {
551	const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
552	// Shift back the reserved SGPR for EXEC copy into the lowest range.
553	// This SGPR is reserved to handle the whole-wave spill/copy operations
554	// that might get inserted during vgpr regalloc.
555	Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
556	if (UnusedLowSGPR && TRI->getHWRegIndex(Reg: UnusedLowSGPR) <
557	TRI->getHWRegIndex(Reg: FuncInfo->getSGPRForEXECCopy()))
558	FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
559	} else {
560	// No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
561	// spills/copies. Reset the SGPR reserved for EXEC copy.
562	FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
563	}
564
565	SaveBlocks.clear();
566	RestoreBlocks.clear();
567
568	return MadeChange;
569	}
570
571	PreservedAnalyses
572	SILowerSGPRSpillsPass::run(MachineFunction &MF,
573	MachineFunctionAnalysisManager &MFAM) {
574	MFPropsModifier _(*this, MF);
575	auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
576	auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
577	MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
578	SILowerSGPRSpills (LIS, Indexes, MDT).run(MF);
579	return PreservedAnalyses::all();
580	}
581

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp