SILowerSGPRSpills.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp]

1	//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10	// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11	//
12	// This pass must never create new SGPR virtual registers.
13	//
14	// FIXME: Must stop RegScavenger spills in later passes.
15	//
16	//===----------------------------------------------------------------------===//
17
18	#include "SILowerSGPRSpills.h"
19	#include "AMDGPU.h"
20	#include "GCNSubtarget.h"
21	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22	#include "SIMachineFunctionInfo.h"
23	#include "llvm/CodeGen/LiveIntervals.h"
24	#include "llvm/CodeGen/MachineDominators.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/RegisterScavenging.h"
27	#include "llvm/InitializePasses.h"
28
29	using namespace llvm;
30
31	#define DEBUG_TYPE "si-lower-sgpr-spills"
32
33	using MBBVector = SmallVector<MachineBasicBlock *, `4`>;
34
35	namespace {
36
37	static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
38	"amdgpu-num-vgprs-for-wwm-alloc",
39	cl::desc ("Max num VGPRs for whole-wave register allocation."),
40	cl::ReallyHidden, cl::init(Val: `10`));
41
42	class SILowerSGPRSpills {
43	private:
44	const SIRegisterInfo TRI = nullptr*;
45	const SIInstrInfo TII = nullptr*;
46	LiveIntervals LIS = nullptr*;
47	SlotIndexes Indexes = nullptr*;
48	MachineDominatorTree MDT = nullptr*;
49
50	// Save and Restore blocks of the current function. Typically there is a
51	// single save block, unless Windows EH funclets are involved.
52	MBBVector SaveBlocks;
53	MBBVector RestoreBlocks;
54
55	public:
56	SILowerSGPRSpills(LiveIntervals LIS, SlotIndexes Indexes,
57	MachineDominatorTree *MDT)
58	: LIS(LIS), Indexes(Indexes), MDT(MDT) {}
59	bool run(MachineFunction &MF);
60	void calculateSaveRestoreBlocks(MachineFunction &MF);
61	bool spillCalleeSavedRegs(MachineFunction &MF,
62	SmallVectorImpl<int> &CalleeSavedFIs);
63	void updateLaneVGPRDomInstr(
64	int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
65	DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
66	void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
67	};
68
69	class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
70	public:
71	static char ID;
72
73	SILowerSGPRSpillsLegacy() : MachineFunctionPass (ID) {}
74
75	bool runOnMachineFunction(MachineFunction &MF) override;
76
77	void getAnalysisUsage(AnalysisUsage &AU) const override {
78	AU.addRequired<MachineDominatorTreeWrapperPass>();
79	AU.setPreservesAll();
80	MachineFunctionPass::getAnalysisUsage(AU);
81	}
82
83	MachineFunctionProperties getClearedProperties() const override {
84	// SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
85	return MachineFunctionProperties ().setIsSSA().setNoVRegs();
86	}
87	};
88
89	} // end anonymous namespace
90
91	char SILowerSGPRSpillsLegacy::ID = `0`;
92
93	INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
94	"SI lower SGPR spill instructions", false, false)
95	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
96	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
97	INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
98	INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
99	"SI lower SGPR spill instructions", false, false)
100
101	char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
102
103	static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
104	const TargetRegisterInfo *TRI) {
105	for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
106	if (MBB.isLiveIn(Reg: *R)) {
107	return true;
108	}
109	}
110	return false;
111	}
112
113	/// Insert spill code for the callee-saved registers used in the function.
114	static void insertCSRSaves(MachineBasicBlock &SaveBlock,
115	ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
116	LiveIntervals *LIS) {
117	MachineFunction &MF = *SaveBlock.getParent();
118	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
119	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
120	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
121	const SIRegisterInfo *RI = ST.getRegisterInfo();
122
123	MachineBasicBlock::iterator I = SaveBlock.begin();
124	if (!TFI->spillCalleeSavedRegisters(MBB&: SaveBlock, MI: I, CSI, TRI: RI)) {
125	for (const CalleeSavedInfo &CS : CSI) {
126	// Insert the spill to the stack frame.
127	MCRegister Reg = CS.getReg();
128
129	MachineInstrSpan MIS(I, &SaveBlock);
130	const TargetRegisterClass *RC = RI->getMinimalPhysRegClass(
131	Reg, VT: Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
132
133	// If this value was already livein, we probably have a direct use of the
134	// incoming register value, so don't kill at the spill point. This happens
135	// since we pass some special inputs (workgroup IDs) in the callee saved
136	// range.
137	const bool IsLiveIn = isLiveIntoMBB(Reg, MBB&: SaveBlock, TRI: RI);
138	TII.storeRegToStackSlot(MBB&: SaveBlock, MI: I, SrcReg: Reg, isKill: !IsLiveIn, FrameIndex: CS.getFrameIdx(),
139	RC, VReg: Register ());
140
141	if (Indexes) {
142	assert(std::distance(MIS.begin(), I) == `1`);
143	MachineInstr &Inst = *std::prev(x: I);
144	Indexes->insertMachineInstrInMaps(MI&: Inst);
145	}
146
147	if (LIS)
148	LIS->removeAllRegUnitsForPhysReg(Reg);
149	}
150	} else {
151	// TFI doesn't update Indexes and LIS, so we have to do it separately.
152	if (Indexes)
153	Indexes->repairIndexesInRange(MBB: &SaveBlock, Begin: SaveBlock.begin(), End: I);
154
155	if (LIS)
156	for (const CalleeSavedInfo &CS : CSI)
157	LIS->removeAllRegUnitsForPhysReg(Reg: CS.getReg());
158	}
159	}
160
161	/// Insert restore code for the callee-saved registers used in the function.
162	static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
163	MutableArrayRef<CalleeSavedInfo> CSI,
164	SlotIndexes Indexes, LiveIntervals LIS) {
165	MachineFunction &MF = *RestoreBlock.getParent();
166	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
167	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
168	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
169	// Restore all registers immediately before the return and any
170	// terminators that precede it.
171	MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
172	const MachineBasicBlock::iterator BeforeRestoresI =
173	I == RestoreBlock.begin() ? I : std::prev(x: I);
174
175	// FIXME: Just emit the readlane/writelane directly
176	if (!TFI->restoreCalleeSavedRegisters(MBB&: RestoreBlock, MI: I, CSI, TRI)) {
177	for (const CalleeSavedInfo &CI : reverse(C&: CSI)) {
178	// Insert in reverse order. loadRegFromStackSlot can insert
179	// multiple instructions.
180	TFI->restoreCalleeSavedRegister(MBB&: RestoreBlock, MI: I, CS: CI, TII: &TII, TRI);
181
182	if (Indexes) {
183	MachineInstr &Inst = *std::prev(x: I);
184	Indexes->insertMachineInstrInMaps(MI&: Inst);
185	}
186
187	if (LIS)
188	LIS->removeAllRegUnitsForPhysReg(Reg: CI.getReg());
189	}
190	} else {
191	// TFI doesn't update Indexes and LIS, so we have to do it separately.
192	if (Indexes)
193	Indexes->repairIndexesInRange(MBB: &RestoreBlock, Begin: BeforeRestoresI,
194	End: RestoreBlock.getFirstTerminator());
195
196	if (LIS)
197	for (const CalleeSavedInfo &CS : CSI)
198	LIS->removeAllRegUnitsForPhysReg(Reg: CS.getReg());
199	}
200	}
201
202	/// Compute the sets of entry and return blocks for saving and restoring
203	/// callee-saved registers, and placing prolog and epilog code.
204	void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
205	const MachineFrameInfo &MFI = MF.getFrameInfo();
206
207	// Even when we do not change any CSR, we still want to insert the
208	// prologue and epilogue of the function.
209	// So set the save points for those.
210
211	// Use the points found by shrink-wrapping, if any.
212	if (!MFI.getSavePoints().empty()) {
213	assert(MFI.getSavePoints().size() == `1` &&
214	"Multiple save points not yet supported!");
215	const auto &SavePoint = *MFI.getSavePoints().begin();
216	SaveBlocks.push_back(Elt: SavePoint.first);
217	assert(MFI.getRestorePoints().size() == `1` &&
218	"Multiple restore points not yet supported!");
219	const auto &RestorePoint = *MFI.getRestorePoints().begin();
220	MachineBasicBlock *RestoreBlock = RestorePoint.first;
221	// If RestoreBlock does not have any successor and is not a return block
222	// then the end point is unreachable and we do not need to insert any
223	// epilogue.
224	if (!RestoreBlock->succ_empty() \|\| RestoreBlock->isReturnBlock())
225	RestoreBlocks.push_back(Elt: RestoreBlock);
226	return;
227	}
228
229	// Save refs to entry and return blocks.
230	SaveBlocks.push_back(Elt: &MF.front());
231	for (MachineBasicBlock &MBB : MF) {
232	if (MBB.isEHFuncletEntry())
233	SaveBlocks.push_back(Elt: &MBB);
234	if (MBB.isReturnBlock())
235	RestoreBlocks.push_back(Elt: &MBB);
236	}
237	}
238
239	// TODO: To support shrink wrapping, this would need to copy
240	// PrologEpilogInserter's updateLiveness.
241	static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
242	MachineBasicBlock &EntryBB = MF.front();
243
244	for (const CalleeSavedInfo &CSIReg : CSI)
245	EntryBB.addLiveIn(PhysReg: CSIReg.getReg());
246	EntryBB.sortUniqueLiveIns();
247	}
248
249	bool SILowerSGPRSpills::spillCalleeSavedRegs(
250	MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
251	MachineRegisterInfo &MRI = MF.getRegInfo();
252	const Function &F = MF.getFunction();
253	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
254	const SIFrameLowering *TFI = ST.getFrameLowering();
255	MachineFrameInfo &MFI = MF.getFrameInfo();
256	RegScavenger RS = nullptr*;
257
258	// Determine which of the registers in the callee save list should be saved.
259	BitVector SavedRegs;
260	TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
261
262	// Add the code to save and restore the callee saved registers.
263	if (!F.hasFnAttribute(Kind: Attribute::Naked)) {
264	// FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
265	// necessary for verifier liveness checks.
266	MFI.setCalleeSavedInfoValid(true);
267
268	std::vector<CalleeSavedInfo> CSI;
269	const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
270
271	for (unsigned I = `0`; CSRegs[I]; ++I) {
272	MCRegister Reg = CSRegs[I];
273
274	if (SavedRegs.test(Idx: Reg)) {
275	const TargetRegisterClass *RC =
276	TRI->getMinimalPhysRegClass(Reg, VT: MVT::i32);
277	int JunkFI = MFI.CreateStackObject(Size: TRI->getSpillSize(RC: *RC),
278	Alignment: TRI->getSpillAlign(RC: RC), isSpillSlot: true*);
279
280	CSI.emplace_back(args&: Reg, args&: JunkFI);
281	CalleeSavedFIs.push_back(Elt: JunkFI);
282	}
283	}
284
285	if (!CSI.empty()) {
286	for (MachineBasicBlock *SaveBlock : SaveBlocks)
287	insertCSRSaves(SaveBlock&: *SaveBlock, CSI, Indexes, LIS);
288
289	// Add live ins to save blocks.
290	assert(SaveBlocks.size() == `1` && "shrink wrapping not fully implemented");
291	updateLiveness(MF, CSI);
292
293	for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
294	insertCSRRestores(RestoreBlock&: *RestoreBlock, CSI, Indexes, LIS);
295	return true;
296	}
297	}
298
299	return false;
300	}
301
302	void SILowerSGPRSpills::updateLaneVGPRDomInstr(
303	int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
304	DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
305	// For the Def of a virtual LaneVGPR to dominate all its uses, we should
306	// insert an IMPLICIT_DEF before the dominating spill. Switching to a
307	// depth first order doesn't really help since the machine function can be in
308	// the unstructured control flow post-SSA. For each virtual register, hence
309	// finding the common dominator to get either the dominating spill or a block
310	// dominating all spills.
311	SIMachineFunctionInfo *FuncInfo =
312	MBB->getParent()->getInfo<SIMachineFunctionInfo>();
313	ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
314	FuncInfo->getSGPRSpillToVirtualVGPRLanes(FrameIndex: FI);
315	Register PrevLaneVGPR;
316	for (auto &Spill : VGPRSpills) {
317	if (PrevLaneVGPR == Spill.VGPR)
318	continue;
319
320	PrevLaneVGPR = Spill.VGPR;
321	auto I = LaneVGPRDomInstr.find(Val: Spill.VGPR);
322	if (Spill.Lane == `0` && I == LaneVGPRDomInstr.end()) {
323	// Initially add the spill instruction itself for Insertion point.
324	LaneVGPRDomInstr [Spill.VGPR] = InsertPt;
325	} else {
326	assert(I != LaneVGPRDomInstr.end());
327	auto PrevInsertPt = I ->second;
328	MachineBasicBlock *DomMBB = PrevInsertPt ->getParent();
329	if (DomMBB == MBB) {
330	// The insertion point earlier selected in a predecessor block whose
331	// spills are currently being lowered. The earlier InsertPt would be
332	// the one just before the block terminator and it should be changed
333	// if we insert any new spill in it.
334	if (MDT->dominates(A: &InsertPt, B: &PrevInsertPt))
335	I ->second = InsertPt;
336
337	continue;
338	}
339
340	// Find the common dominator block between PrevInsertPt and the
341	// current spill.
342	DomMBB = MDT->findNearestCommonDominator(A: DomMBB, B: MBB);
343	if (DomMBB == MBB)
344	I ->second = InsertPt;
345	else if (DomMBB != PrevInsertPt ->getParent())
346	I ->second = &(*DomMBB->getFirstTerminator());
347	}
348	}
349	}
350
351	void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
352	BitVector &RegMask) {
353	// Determine an optimal number of VGPRs for WWM allocation. The complement
354	// list will be available for allocating other VGPR virtual registers.
355	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
356	MachineRegisterInfo &MRI = MF.getRegInfo();
357	BitVector ReservedRegs = TRI->getReservedRegs(MF);
358	BitVector NonWwmAllocMask(TRI->getNumRegs());
359	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
360
361	// FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
362	// to have a balanced allocation between WWM values and per-thread vector
363	// register operands.
364	unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
365	NumRegs =
366	std::min(a: static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), b: NumRegs);
367
368	auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(F: MF.getFunction());
369	// Try to use the highest available registers for now. Later after
370	// vgpr-regalloc, they can be shifted to the lowest range.
371	unsigned I = `0`;
372	for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - `1`;
373	(I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
374	if (!ReservedRegs.test(Idx: Reg) &&
375	!MRI.isPhysRegUsed(PhysReg: Reg, /SkipRegMaskTest=/true)) {
376	TRI->markSuperRegs(RegisterSet&: RegMask, Reg);
377	++I;
378	}
379	}
380
381	if (I != NumRegs) {
382	// Reserve an arbitrary register and report the error.
383	TRI->markSuperRegs(RegisterSet&: RegMask, Reg: AMDGPU::VGPR0);
384	MF.getFunction().getContext().emitError(
385	ErrorStr: "cannot find enough VGPRs for wwm-regalloc");
386	}
387	}
388
389	bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
390	auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
391	LiveIntervals LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr*;
392	auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
393	SlotIndexes Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr*;
394	MachineDominatorTree *MDT =
395	&getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
396	return SILowerSGPRSpills (LIS, Indexes, MDT).run(MF);
397	}
398
399	bool SILowerSGPRSpills::run(MachineFunction &MF) {
400	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
401	TII = ST.getInstrInfo();
402	TRI = &TII->getRegisterInfo();
403
404	assert(SaveBlocks.empty() && RestoreBlocks.empty());
405
406	// First, expose any CSR SGPR spills. This is mostly the same as what PEI
407	// does, but somewhat simpler.
408	calculateSaveRestoreBlocks(MF);
409	SmallVector<int> CalleeSavedFIs;
410	bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
411
412	MachineFrameInfo &MFI = MF.getFrameInfo();
413	MachineRegisterInfo &MRI = MF.getRegInfo();
414	SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
415
416	if (!MFI.hasStackObjects() && !HasCSRs) {
417	SaveBlocks.clear();
418	RestoreBlocks.clear();
419	return false;
420	}
421
422	bool MadeChange = false;
423	bool SpilledToVirtVGPRLanes = false;
424
425	// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
426	// handled as SpilledToReg in regular PrologEpilogInserter.
427	const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
428	(HasCSRs \|\| FuncInfo->hasSpilledSGPRs());
429	if (HasSGPRSpillToVGPR) {
430	// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
431	// are spilled to VGPRs, in which case we can eliminate the stack usage.
432	//
433	// This operates under the assumption that only other SGPR spills are users
434	// of the frame index.
435
436	// To track the spill frame indices handled in this pass.
437	BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
438
439	// To track the IMPLICIT_DEF insertion point for the lane vgprs.
440	DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
441
442	for (MachineBasicBlock &MBB : MF) {
443	for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MBB)) {
444	if (!TII->isSGPRSpill(MI))
445	continue;
446
447	if (MI.getOperand(i: `0`).isUndef()) {
448	if (Indexes)
449	Indexes->removeMachineInstrFromMaps(MI);
450	MI.eraseFromParent();
451	continue;
452	}
453
454	int FI = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::addr)->getIndex();
455	assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
456
457	bool IsCalleeSaveSGPRSpill = llvm::is_contained(Range&: CalleeSavedFIs, Element: FI);
458	if (IsCalleeSaveSGPRSpill) {
459	// Spill callee-saved SGPRs into physical VGPR lanes.
460
461	// TODO: This is to ensure the CFIs are static for efficient frame
462	// unwinding in the debugger. Spilling them into virtual VGPR lanes
463	// involve regalloc to allocate the physical VGPRs and that might
464	// cause intermediate spill/split of such liveranges for successful
465	// allocation. This would result in broken CFI encoding unless the
466	// regalloc aware CFI generation to insert new CFIs along with the
467	// intermediate spills is implemented. There is no such support
468	// currently exist in the LLVM compiler.
469	if (FuncInfo->allocateSGPRSpillToVGPRLane(
470	MF, FI, /SpillToPhysVGPRLane=/true)) {
471	bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
472	MI, FI, RS: nullptr, Indexes, LIS, SpillToPhysVGPRLane: true);
473	if (!Spilled)
474	llvm_unreachable(
475	"failed to spill SGPR to physical VGPR lane when allocated");
476	}
477	} else {
478	MachineInstrSpan MIS(&MI, &MBB);
479	if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
480	bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
481	MI, FI, RS: nullptr, Indexes, LIS);
482	if (!Spilled)
483	llvm_unreachable(
484	"failed to spill SGPR to virtual VGPR lane when allocated");
485	SpillFIs.set(FI);
486	updateLaneVGPRDomInstr(FI, MBB: &MBB, InsertPt: MIS.begin(), LaneVGPRDomInstr);
487	SpilledToVirtVGPRLanes = true;
488	}
489	}
490	}
491	}
492
493	for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
494	auto InsertPt = LaneVGPRDomInstr [Reg];
495	// Insert the IMPLICIT_DEF at the identified points.
496	MachineBasicBlock &Block = *InsertPt ->getParent();
497	DebugLoc DL = Block.findDebugLoc(MBBI: InsertPt);
498	auto MIB =
499	BuildMI(BB&: Block, I&: *InsertPt, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::IMPLICIT_DEF), DestReg: Reg);
500
501	// Add WWM flag to the virtual register.
502	FuncInfo->setFlag(Reg, Flag: AMDGPU::VirtRegFlag::WWM_REG);
503
504	// Set SGPR_SPILL asm printer flag
505	MIB ->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
506	if (LIS) {
507	LIS->InsertMachineInstrInMaps(MI&: *MIB);
508	LIS->createAndComputeVirtRegInterval(Reg);
509	}
510	}
511
512	// Determine the registers for WWM allocation and also compute the register
513	// mask for non-wwm VGPR allocation.
514	if (FuncInfo->getSGPRSpillVGPRs().size()) {
515	BitVector WwmRegMask(TRI->getNumRegs());
516
517	determineRegsForWWMAllocation(MF, RegMask&: WwmRegMask);
518
519	BitVector NonWwmRegMask(WwmRegMask);
520	NonWwmRegMask.flip().clearBitsNotInMask(Mask: TRI->getAllVGPRRegMask());
521
522	// The complement set will be the registers for non-wwm (per-thread) vgpr
523	// allocation.
524	FuncInfo->updateNonWWMRegMask(RegMask&: NonWwmRegMask);
525	}
526
527	for (MachineBasicBlock &MBB : MF) {
528	// FIXME: The dead frame indices are replaced with a null register from
529	// the debug value instructions. We should instead, update it with the
530	// correct register value. But not sure the register value alone is
531	// adequate to lower the DIExpression. It should be worked out later.
532	for (MachineInstr &MI : MBB) {
533	if (MI.isDebugValue()) {
534	uint32_t StackOperandIdx = MI.isDebugValueList() ? `2` : `0`;
535	if (MI.getOperand(i: StackOperandIdx).isFI() &&
536	!MFI.isFixedObjectIndex(
537	ObjectIdx: MI.getOperand(i: StackOperandIdx).getIndex()) &&
538	SpillFIs [MI.getOperand(i: StackOperandIdx).getIndex()]) {
539	MI.getOperand(i: StackOperandIdx)
540	.ChangeToRegister(Reg: Register (), isDef: false /isDef/);
541	}
542	}
543	}
544	}
545
546	// All those frame indices which are dead by now should be removed from the
547	// function frame. Otherwise, there is a side effect such as re-mapping of
548	// free frame index ids by the later pass(es) like "stack slot coloring"
549	// which in turn could mess-up with the book keeping of "frame index to VGPR
550	// lane".
551	FuncInfo->removeDeadFrameIndices(MFI, /ResetSGPRSpillStackIDs/ false);
552
553	MadeChange = true;
554	}
555
556	if (SpilledToVirtVGPRLanes) {
557	const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
558	// Shift back the reserved SGPR for EXEC copy into the lowest range.
559	// This SGPR is reserved to handle the whole-wave spill/copy operations
560	// that might get inserted during vgpr regalloc.
561	Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
562	if (UnusedLowSGPR && TRI->getHWRegIndex(Reg: UnusedLowSGPR) <
563	TRI->getHWRegIndex(Reg: FuncInfo->getSGPRForEXECCopy()))
564	FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
565	} else {
566	// No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
567	// spills/copies. Reset the SGPR reserved for EXEC copy.
568	FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
569	}
570
571	SaveBlocks.clear();
572	RestoreBlocks.clear();
573
574	return MadeChange;
575	}
576
577	PreservedAnalyses
578	SILowerSGPRSpillsPass::run(MachineFunction &MF,
579	MachineFunctionAnalysisManager &MFAM) {
580	MFPropsModifier _(*this, MF);
581	auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(IR&: MF);
582	auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(IR&: MF);
583	MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
584	SILowerSGPRSpills (LIS, Indexes, MDT).run(MF);
585	return PreservedAnalyses::all();
586	}
587

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp