1//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11/// structures and waterfall loops.
12///
13/// When we do structurization, we usually transform an if-else into two
14/// successive if-then (with a flow block to do predicate inversion). Consider a
15/// simple case after structurization: A divergent value %a was defined before
16/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17/// bb.if:
18/// %a = ...
19/// ...
20/// bb.then:
21/// ... = op %a
22/// ... // %a can be dead here
23/// bb.flow:
24/// ...
25/// bb.else:
26/// ... = %a
27/// ...
28/// bb.endif
29///
30/// As register allocator has no idea of the thread-control-flow, it will just
31/// assume %a would be alive in the whole range of bb.then because of a later
32/// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33/// to exec mask. For this if-else case, the lanes active in bb.then will be
34/// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35/// after the last use in bb.then until the end of the block. The reason is
36/// the instructions in bb.then will only overwrite lanes that will never be
37/// accessed in bb.else.
38///
39/// This pass aims to tell register allocator that %a is in-fact dead,
40/// through inserting a phi-node in bb.flow saying that %a is undef when coming
41/// from bb.then, and then replace the uses in the bb.else with the result of
42/// newly inserted phi.
43///
44/// Two key conditions must be met to ensure correctness:
45/// 1.) The def-point should be in the same loop-level as if-else-endif to make
46/// sure the second loop iteration still get correct data.
47/// 2.) There should be no further uses after the IF-ELSE region.
48///
49///
50/// Waterfall loops get inserted around instructions that use divergent values
51/// but can only be executed with a uniform value. For example an indirect call
52/// to a divergent address:
53/// bb.start:
54/// %a = ...
55/// %fun = ...
56/// ...
57/// bb.loop:
58/// call %fun (%a)
59/// ... // %a can be dead here
60/// loop %bb.loop
61///
62/// The loop block is executed multiple times, but it is run exactly once for
63/// each active lane. Similar to the if-else case, the register allocator
64/// assumes that %a is live throughout the loop as it is used again in the next
65/// iteration. If %a is a VGPR that is unused after the loop, it does not need
66/// to be live after its last use in the loop block. By inserting a phi-node at
67/// the start of bb.loop that is undef when coming from bb.loop, the register
68/// allocation knows that the value of %a does not need to be preserved through
69/// iterations of the loop.
70///
71//
72//===----------------------------------------------------------------------===//
73
74#include "SIOptimizeVGPRLiveRange.h"
75#include "AMDGPU.h"
76#include "GCNSubtarget.h"
77#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
78#include "SIMachineFunctionInfo.h"
79#include "llvm/CodeGen/LiveVariables.h"
80#include "llvm/CodeGen/MachineDominators.h"
81#include "llvm/CodeGen/MachineLoopInfo.h"
82#include "llvm/CodeGen/TargetRegisterInfo.h"
83
84using namespace llvm;
85
86#define DEBUG_TYPE "si-opt-vgpr-liverange"
87
88namespace {
89
90class SIOptimizeVGPRLiveRange {
91private:
92 const SIRegisterInfo *TRI = nullptr;
93 const SIInstrInfo *TII = nullptr;
94 LiveVariables *LV = nullptr;
95 MachineDominatorTree *MDT = nullptr;
96 const MachineLoopInfo *Loops = nullptr;
97 MachineRegisterInfo *MRI = nullptr;
98
99public:
100 SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT,
101 MachineLoopInfo *Loops)
102 : LV(LV), MDT(MDT), Loops(Loops) {}
103 bool run(MachineFunction &MF);
104
105 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
106
107 void collectElseRegionBlocks(MachineBasicBlock *Flow,
108 MachineBasicBlock *Endif,
109 SmallSetVector<MachineBasicBlock *, 16> &) const;
110
111 void
112 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
113 MachineBasicBlock *Endif,
114 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
115 SmallVectorImpl<Register> &CandidateRegs) const;
116
117 void collectWaterfallCandidateRegisters(
118 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
119 SmallSetVector<Register, 16> &CandidateRegs,
120 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
121 SmallVectorImpl<MachineInstr *> &Instructions) const;
122
123 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
124 SmallVectorImpl<MachineInstr *> &Uses) const;
125
126 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
127 MachineBasicBlock *Flow) const;
128
129 void updateLiveRangeInElseRegion(
130 Register Reg, Register NewReg, MachineBasicBlock *Flow,
131 MachineBasicBlock *Endif,
132 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
133
134 void
135 optimizeLiveRange(Register Reg, MachineBasicBlock *If,
136 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
137 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
138
139 void optimizeWaterfallLiveRange(
140 Register Reg, MachineBasicBlock *LoopHeader,
141 SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
142 SmallVectorImpl<MachineInstr *> &Instructions) const;
143};
144
145class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass {
146public:
147 static char ID;
148
149 SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {}
150
151 bool runOnMachineFunction(MachineFunction &MF) override;
152
153 StringRef getPassName() const override {
154 return "SI Optimize VGPR LiveRange";
155 }
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.setPreservesCFG();
159 AU.addRequired<LiveVariablesWrapperPass>();
160 AU.addRequired<MachineDominatorTreeWrapperPass>();
161 AU.addRequired<MachineLoopInfoWrapperPass>();
162 AU.addPreserved<LiveVariablesWrapperPass>();
163 AU.addPreserved<MachineDominatorTreeWrapperPass>();
164 AU.addPreserved<MachineLoopInfoWrapperPass>();
165 MachineFunctionPass::getAnalysisUsage(AU);
166 }
167
168 MachineFunctionProperties getRequiredProperties() const override {
169 return MachineFunctionProperties().setIsSSA();
170 }
171
172 MachineFunctionProperties getClearedProperties() const override {
173 return MachineFunctionProperties().setNoPHIs();
174 }
175};
176
177} // end anonymous namespace
178
179// Check whether the MBB is a else flow block and get the branching target which
180// is the Endif block
181MachineBasicBlock *
182SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
183 for (auto &BR : MBB->terminators()) {
184 if (BR.getOpcode() == AMDGPU::SI_ELSE)
185 return BR.getOperand(i: 2).getMBB();
186 }
187 return nullptr;
188}
189
190void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
191 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
192 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
193 assert(Flow != Endif);
194
195 MachineBasicBlock *MBB = Endif;
196 unsigned Cur = 0;
197 while (MBB) {
198 for (auto *Pred : MBB->predecessors()) {
199 if (Pred != Flow)
200 Blocks.insert(X: Pred);
201 }
202
203 if (Cur < Blocks.size())
204 MBB = Blocks[Cur++];
205 else
206 MBB = nullptr;
207 }
208
209 LLVM_DEBUG({
210 dbgs() << "Found Else blocks: ";
211 for (auto *MBB : Blocks)
212 dbgs() << printMBBReference(*MBB) << ' ';
213 dbgs() << '\n';
214 });
215}
216
217/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
218void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
219 Register Reg, MachineBasicBlock *MBB,
220 SmallVectorImpl<MachineInstr *> &Uses) const {
221 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
222 if (UseMI.getParent() == MBB && !UseMI.isPHI())
223 Uses.push_back(Elt: &UseMI);
224 }
225}
226
227/// Collect the killed registers in the ELSE region which are not alive through
228/// the whole THEN region.
229void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
230 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
231 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
232 SmallVectorImpl<Register> &CandidateRegs) const {
233
234 SmallSet<Register, 8> KillsInElse;
235
236 for (auto *Else : ElseBlocks) {
237 for (auto &MI : Else->instrs()) {
238 if (MI.isDebugInstr())
239 continue;
240
241 for (auto &MO : MI.operands()) {
242 if (!MO.isReg() || !MO.getReg() || MO.isDef())
243 continue;
244
245 Register MOReg = MO.getReg();
246 // We can only optimize AGPR/VGPR virtual register
247 if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg))
248 continue;
249
250 if (MO.readsReg()) {
251 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg);
252 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent();
253 // Make sure two conditions are met:
254 // a.) the value is defined before/in the IF block
255 // b.) should be defined in the same loop-level.
256 if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) &&
257 Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If)) {
258 // Check if the register is live into the endif block. If not,
259 // consider it killed in the else region.
260 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg);
261 if (!VI.isLiveIn(MBB: *Endif, Reg: MOReg, MRI&: *MRI)) {
262 KillsInElse.insert(V: MOReg);
263 } else {
264 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
265 << " as Live in Endif\n");
266 }
267 }
268 }
269 }
270 }
271 }
272
273 // Check the phis in the Endif, looking for value coming from the ELSE
274 // region. Make sure the phi-use is the last use.
275 for (auto &MI : Endif->phis()) {
276 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
277 auto &MO = MI.getOperand(i: Idx);
278 auto *Pred = MI.getOperand(i: Idx + 1).getMBB();
279 if (Pred == Flow)
280 continue;
281 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
282
283 if (!MO.isReg() || !MO.getReg() || MO.isUndef())
284 continue;
285
286 Register Reg = MO.getReg();
287 if (Reg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg))
288 continue;
289
290 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
291
292 if (VI.isLiveIn(MBB: *Endif, Reg, MRI&: *MRI)) {
293 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
294 << " as Live in Endif\n");
295 continue;
296 }
297 // Make sure two conditions are met:
298 // a.) the value is defined before/in the IF block
299 // b.) should be defined in the same loop-level.
300 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
301 if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) &&
302 Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If))
303 KillsInElse.insert(V: Reg);
304 }
305 }
306
307 auto IsLiveThroughThen = [&](Register Reg) {
308 for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E;
309 ++I) {
310 if (!I->readsReg())
311 continue;
312 auto *UseMI = I->getParent();
313 auto *UseMBB = UseMI->getParent();
314 if (UseMBB == Flow || UseMBB == Endif) {
315 if (!UseMI->isPHI())
316 return true;
317
318 auto *IncomingMBB = UseMI->getOperand(i: I.getOperandNo() + 1).getMBB();
319 // The register is live through the path If->Flow or Flow->Endif.
320 // we should not optimize for such cases.
321 if ((UseMBB == Flow && IncomingMBB != If) ||
322 (UseMBB == Endif && IncomingMBB == Flow))
323 return true;
324 }
325 }
326 return false;
327 };
328
329 for (auto Reg : KillsInElse) {
330 if (!IsLiveThroughThen(Reg))
331 CandidateRegs.push_back(Elt: Reg);
332 }
333}
334
335/// Collect the registers used in the waterfall loop block that are defined
336/// before.
337void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
338 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
339 SmallSetVector<Register, 16> &CandidateRegs,
340 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
341 SmallVectorImpl<MachineInstr *> &Instructions) const {
342
343 // Collect loop instructions, potentially spanning multiple blocks
344 auto *MBB = LoopHeader;
345 for (;;) {
346 Blocks.insert(X: MBB);
347 for (auto &MI : *MBB) {
348 if (MI.isDebugInstr())
349 continue;
350 Instructions.push_back(Elt: &MI);
351 }
352 if (MBB == LoopEnd)
353 break;
354
355 if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
356 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
357 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
358 return;
359 }
360
361 MBB = *MBB->succ_begin();
362 }
363
364 for (auto *I : Instructions) {
365 auto &MI = *I;
366
367 for (auto &MO : MI.all_uses()) {
368 if (!MO.getReg())
369 continue;
370
371 Register MOReg = MO.getReg();
372 // We can only optimize AGPR/VGPR virtual register
373 if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg))
374 continue;
375
376 if (MO.readsReg()) {
377 MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent();
378 // Make sure the value is defined before the LOOP block
379 if (!Blocks.contains(key: DefMBB) && !CandidateRegs.contains(key: MOReg)) {
380 // If the variable is used after the loop, the register coalescer will
381 // merge the newly created register and remove the phi node again.
382 // Just do nothing in that case.
383 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg: MOReg);
384 bool IsUsed = false;
385 for (auto *Succ : LoopEnd->successors()) {
386 if (!Blocks.contains(key: Succ) &&
387 OldVarInfo.isLiveIn(MBB: *Succ, Reg: MOReg, MRI&: *MRI)) {
388 IsUsed = true;
389 break;
390 }
391 }
392 if (!IsUsed) {
393 LLVM_DEBUG(dbgs() << "Found candidate reg: "
394 << printReg(MOReg, TRI, 0, MRI) << '\n');
395 CandidateRegs.insert(X: MOReg);
396 } else {
397 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
398 << printReg(MOReg, TRI, 0, MRI) << '\n');
399 }
400 }
401 }
402 }
403 }
404}
405
406// Re-calculate the liveness of \p Reg in the THEN-region
407void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
408 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
409 SetVector<MachineBasicBlock *> Blocks;
410 SmallVector<MachineBasicBlock *> WorkList({If});
411
412 // Collect all successors until we see the flow block, where we should
413 // reconverge.
414 while (!WorkList.empty()) {
415 auto *MBB = WorkList.pop_back_val();
416 for (auto *Succ : MBB->successors()) {
417 if (Succ != Flow && Blocks.insert(X: Succ))
418 WorkList.push_back(Elt: Succ);
419 }
420 }
421
422 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
423 for (MachineBasicBlock *MBB : Blocks) {
424 // Clear Live bit, as we will recalculate afterwards
425 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
426 << '\n');
427 OldVarInfo.AliveBlocks.reset(Idx: MBB->getNumber());
428 }
429
430 SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
431
432 // Get the blocks the Reg should be alive through
433 for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E;
434 ++I) {
435 auto *UseMI = I->getParent();
436 if (UseMI->isPHI() && I->readsReg()) {
437 if (Blocks.contains(key: UseMI->getParent()))
438 PHIIncoming.insert(Ptr: UseMI->getOperand(i: I.getOperandNo() + 1).getMBB());
439 }
440 }
441
442 for (MachineBasicBlock *MBB : Blocks) {
443 SmallVector<MachineInstr *> Uses;
444 // PHI instructions has been processed before.
445 findNonPHIUsesInBlock(Reg, MBB, Uses);
446
447 if (Uses.size() == 1) {
448 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
449 << printMBBReference(*MBB) << '\n');
450 LV->HandleVirtRegUse(reg: Reg, MBB, MI&: *(*Uses.begin()));
451 } else if (Uses.size() > 1) {
452 // Process the instructions in-order
453 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
454 << printMBBReference(*MBB) << '\n');
455 for (MachineInstr &MI : *MBB) {
456 if (llvm::is_contained(Range&: Uses, Element: &MI))
457 LV->HandleVirtRegUse(reg: Reg, MBB, MI);
458 }
459 }
460
461 // Mark Reg alive through the block if this is a PHI incoming block
462 if (PHIIncoming.contains(Ptr: MBB))
463 LV->MarkVirtRegAliveInBlock(VRInfo&: OldVarInfo, DefBlock: MRI->getVRegDef(Reg)->getParent(),
464 BB: MBB);
465 }
466
467 // Set the isKilled flag if we get new Kills in the THEN region.
468 for (auto *MI : OldVarInfo.Kills) {
469 if (Blocks.contains(key: MI->getParent()))
470 MI->addRegisterKilled(IncomingReg: Reg, RegInfo: TRI);
471 }
472}
473
474void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
475 Register Reg, Register NewReg, MachineBasicBlock *Flow,
476 MachineBasicBlock *Endif,
477 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
478 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg);
479 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
480
481 // Transfer aliveBlocks from Reg to NewReg
482 for (auto *MBB : ElseBlocks) {
483 unsigned BBNum = MBB->getNumber();
484 if (OldVarInfo.AliveBlocks.test(Idx: BBNum)) {
485 NewVarInfo.AliveBlocks.set(BBNum);
486 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
487 << '\n');
488 OldVarInfo.AliveBlocks.reset(Idx: BBNum);
489 }
490 }
491
492 // Transfer the possible Kills in ElseBlocks from Reg to NewReg
493 auto I = OldVarInfo.Kills.begin();
494 while (I != OldVarInfo.Kills.end()) {
495 if (ElseBlocks.contains(key: (*I)->getParent())) {
496 NewVarInfo.Kills.push_back(x: *I);
497 I = OldVarInfo.Kills.erase(position: I);
498 } else {
499 ++I;
500 }
501 }
502}
503
504void SIOptimizeVGPRLiveRange::optimizeLiveRange(
505 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
506 MachineBasicBlock *Endif,
507 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
508 // Insert a new PHI, marking the value from the THEN region being
509 // undef.
510 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
511 const auto *RC = MRI->getRegClass(Reg);
512 Register NewReg = MRI->createVirtualRegister(RegClass: RC);
513 Register UndefReg = MRI->createVirtualRegister(RegClass: RC);
514 MachineInstrBuilder PHI = BuildMI(BB&: *Flow, I: Flow->getFirstNonPHI(), MIMD: DebugLoc(),
515 MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg);
516 for (auto *Pred : Flow->predecessors()) {
517 if (Pred == If)
518 PHI.addReg(RegNo: Reg).addMBB(MBB: Pred);
519 else
520 PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred);
521 }
522
523 // Replace all uses in the ELSE region or the PHIs in ENDIF block
524 // Use early increment range because setReg() will update the linked list.
525 for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) {
526 auto *UseMI = O.getParent();
527 auto *UseBlock = UseMI->getParent();
528 // Replace uses in Endif block
529 if (UseBlock == Endif) {
530 if (UseMI->isPHI())
531 O.setReg(NewReg);
532 else if (UseMI->isDebugInstr())
533 continue;
534 else {
535 // DetectDeadLanes may mark register uses as undef without removing
536 // them, in which case a non-phi instruction using the original register
537 // may exist in the Endif block even though the register is not live
538 // into it.
539 assert(!O.readsReg());
540 }
541 continue;
542 }
543
544 // Replace uses in Else region
545 if (ElseBlocks.contains(key: UseBlock))
546 O.setReg(NewReg);
547 }
548
549 // The optimized Reg is not alive through Flow blocks anymore.
550 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
551 OldVarInfo.AliveBlocks.reset(Idx: Flow->getNumber());
552
553 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
554 updateLiveRangeInThenRegion(Reg, If, Flow);
555}
556
557void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
558 Register Reg, MachineBasicBlock *LoopHeader,
559 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
560 SmallVectorImpl<MachineInstr *> &Instructions) const {
561 // Insert a new PHI, marking the value from the last loop iteration undef.
562 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
563 const auto *RC = MRI->getRegClass(Reg);
564 Register NewReg = MRI->createVirtualRegister(RegClass: RC);
565 Register UndefReg = MRI->createVirtualRegister(RegClass: RC);
566
567 // Replace all uses in the LOOP region
568 // Use early increment range because setReg() will update the linked list.
569 for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) {
570 auto *UseMI = O.getParent();
571 auto *UseBlock = UseMI->getParent();
572 // Replace uses in Loop blocks
573 if (Blocks.contains(key: UseBlock))
574 O.setReg(NewReg);
575 }
576
577 MachineInstrBuilder PHI =
578 BuildMI(BB&: *LoopHeader, I: LoopHeader->getFirstNonPHI(), MIMD: DebugLoc(),
579 MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg);
580 for (auto *Pred : LoopHeader->predecessors()) {
581 if (Blocks.contains(key: Pred))
582 PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred);
583 else
584 PHI.addReg(RegNo: Reg).addMBB(MBB: Pred);
585 }
586
587 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg);
588 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
589
590 // Find last use and mark as kill
591 MachineInstr *Kill = nullptr;
592 for (auto *MI : reverse(C&: Instructions)) {
593 if (MI->readsRegister(Reg: NewReg, TRI)) {
594 MI->addRegisterKilled(IncomingReg: NewReg, RegInfo: TRI);
595 NewVarInfo.Kills.push_back(x: MI);
596 Kill = MI;
597 break;
598 }
599 }
600 assert(Kill && "Failed to find last usage of register in loop");
601
602 MachineBasicBlock *KillBlock = Kill->getParent();
603 bool PostKillBlock = false;
604 for (auto *Block : Blocks) {
605 auto BBNum = Block->getNumber();
606
607 // collectWaterfallCandidateRegisters only collects registers that are dead
608 // after the loop. So we know that the old reg is no longer live throughout
609 // the waterfall loop.
610 OldVarInfo.AliveBlocks.reset(Idx: BBNum);
611
612 // The new register is live up to (and including) the block that kills it.
613 PostKillBlock |= (Block == KillBlock);
614 if (PostKillBlock) {
615 NewVarInfo.AliveBlocks.reset(Idx: BBNum);
616 } else if (Block != LoopHeader) {
617 NewVarInfo.AliveBlocks.set(BBNum);
618 }
619 }
620}
621
622char SIOptimizeVGPRLiveRangeLegacy::ID = 0;
623
624INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
625 "SI Optimize VGPR LiveRange", false, false)
626INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
627INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
628INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
629INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
630 "SI Optimize VGPR LiveRange", false, false)
631
632char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID;
633
634FunctionPass *llvm::createSIOptimizeVGPRLiveRangeLegacyPass() {
635 return new SIOptimizeVGPRLiveRangeLegacy();
636}
637
638bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) {
639 if (skipFunction(F: MF.getFunction()))
640 return false;
641
642 LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
643 MachineDominatorTree *MDT =
644 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
645 MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
646 return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
647}
648
649PreservedAnalyses
650SIOptimizeVGPRLiveRangePass::run(MachineFunction &MF,
651 MachineFunctionAnalysisManager &MFAM) {
652 MFPropsModifier _(*this, MF);
653 LiveVariables *LV = &MFAM.getResult<LiveVariablesAnalysis>(IR&: MF);
654 MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
655 MachineLoopInfo *Loops = &MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
656
657 bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
658 if (!Changed)
659 return PreservedAnalyses::all();
660
661 auto PA = getMachineFunctionPassPreservedAnalyses();
662 PA.preserve<LiveVariablesAnalysis>();
663 PA.preserve<DominatorTreeAnalysis>();
664 PA.preserve<MachineLoopAnalysis>();
665 PA.preserveSet<CFGAnalyses>();
666 return PA;
667}
668
669bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) {
670 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
671 TII = ST.getInstrInfo();
672 TRI = &TII->getRegisterInfo();
673 MRI = &MF.getRegInfo();
674
675 bool MadeChange = false;
676
677 // TODO: we need to think about the order of visiting the blocks to get
678 // optimal result for nesting if-else cases.
679 for (MachineBasicBlock &MBB : MF) {
680 for (auto &MI : MBB.terminators()) {
681 // Detect the if-else blocks
682 if (MI.getOpcode() == AMDGPU::SI_IF) {
683 MachineBasicBlock *IfTarget = MI.getOperand(i: 2).getMBB();
684 auto *Endif = getElseTarget(MBB: IfTarget);
685 if (!Endif)
686 continue;
687
688 // Skip unexpected control flow.
689 if (!MDT->dominates(A: &MBB, B: IfTarget) || !MDT->dominates(A: IfTarget, B: Endif))
690 continue;
691
692 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
693 SmallVector<Register> CandidateRegs;
694
695 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
696 << printMBBReference(MBB) << ' '
697 << printMBBReference(*IfTarget) << ' '
698 << printMBBReference(*Endif) << '\n');
699
700 // Collect all the blocks in the ELSE region
701 collectElseRegionBlocks(Flow: IfTarget, Endif, Blocks&: ElseBlocks);
702
703 // Collect the registers can be optimized
704 collectCandidateRegisters(If: &MBB, Flow: IfTarget, Endif, ElseBlocks,
705 CandidateRegs);
706 MadeChange |= !CandidateRegs.empty();
707 // Now we are safe to optimize.
708 for (auto Reg : CandidateRegs)
709 optimizeLiveRange(Reg, If: &MBB, Flow: IfTarget, Endif, ElseBlocks);
710 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
711 auto *LoopHeader = MI.getOperand(i: 0).getMBB();
712 auto *LoopEnd = &MBB;
713
714 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
715 << printMBBReference(*LoopHeader) << '\n');
716
717 SmallSetVector<Register, 16> CandidateRegs;
718 SmallVector<MachineInstr *, 16> Instructions;
719 SmallSetVector<MachineBasicBlock *, 2> Blocks;
720
721 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
722 Blocks, Instructions);
723 MadeChange |= !CandidateRegs.empty();
724 // Now we are safe to optimize.
725 for (auto Reg : CandidateRegs)
726 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
727 }
728 }
729 }
730
731 return MadeChange;
732}
733