1 | //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else |
11 | /// structures and waterfall loops. |
12 | /// |
13 | /// When we do structurization, we usually transform an if-else into two |
14 | /// successive if-then (with a flow block to do predicate inversion). Consider a |
15 | /// simple case after structurization: A divergent value %a was defined before |
16 | /// if-else and used in both THEN (use in THEN is optional) and ELSE part: |
17 | /// bb.if: |
18 | /// %a = ... |
19 | /// ... |
20 | /// bb.then: |
21 | /// ... = op %a |
22 | /// ... // %a can be dead here |
23 | /// bb.flow: |
24 | /// ... |
25 | /// bb.else: |
26 | /// ... = %a |
27 | /// ... |
28 | /// bb.endif |
29 | /// |
30 | /// As register allocator has no idea of the thread-control-flow, it will just |
31 | /// assume %a would be alive in the whole range of bb.then because of a later |
32 | /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect |
33 | /// to exec mask. For this if-else case, the lanes active in bb.then will be |
34 | /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead |
35 | /// after the last use in bb.then until the end of the block. The reason is |
36 | /// the instructions in bb.then will only overwrite lanes that will never be |
37 | /// accessed in bb.else. |
38 | /// |
39 | /// This pass aims to tell register allocator that %a is in-fact dead, |
40 | /// through inserting a phi-node in bb.flow saying that %a is undef when coming |
41 | /// from bb.then, and then replace the uses in the bb.else with the result of |
42 | /// newly inserted phi. |
43 | /// |
44 | /// Two key conditions must be met to ensure correctness: |
45 | /// 1.) The def-point should be in the same loop-level as if-else-endif to make |
46 | /// sure the second loop iteration still get correct data. |
47 | /// 2.) There should be no further uses after the IF-ELSE region. |
48 | /// |
49 | /// |
50 | /// Waterfall loops get inserted around instructions that use divergent values |
51 | /// but can only be executed with a uniform value. For example an indirect call |
52 | /// to a divergent address: |
53 | /// bb.start: |
54 | /// %a = ... |
55 | /// %fun = ... |
56 | /// ... |
57 | /// bb.loop: |
58 | /// call %fun (%a) |
59 | /// ... // %a can be dead here |
60 | /// loop %bb.loop |
61 | /// |
62 | /// The loop block is executed multiple times, but it is run exactly once for |
63 | /// each active lane. Similar to the if-else case, the register allocator |
64 | /// assumes that %a is live throughout the loop as it is used again in the next |
65 | /// iteration. If %a is a VGPR that is unused after the loop, it does not need |
66 | /// to be live after its last use in the loop block. By inserting a phi-node at |
67 | /// the start of bb.loop that is undef when coming from bb.loop, the register |
68 | /// allocation knows that the value of %a does not need to be preserved through |
69 | /// iterations of the loop. |
70 | /// |
71 | // |
72 | //===----------------------------------------------------------------------===// |
73 | |
74 | #include "SIOptimizeVGPRLiveRange.h" |
75 | #include "AMDGPU.h" |
76 | #include "GCNSubtarget.h" |
77 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
78 | #include "SIMachineFunctionInfo.h" |
79 | #include "llvm/CodeGen/LiveVariables.h" |
80 | #include "llvm/CodeGen/MachineDominators.h" |
81 | #include "llvm/CodeGen/MachineLoopInfo.h" |
82 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
83 | |
84 | using namespace llvm; |
85 | |
86 | #define DEBUG_TYPE "si-opt-vgpr-liverange" |
87 | |
88 | namespace { |
89 | |
90 | class SIOptimizeVGPRLiveRange { |
91 | private: |
92 | const SIRegisterInfo *TRI = nullptr; |
93 | const SIInstrInfo *TII = nullptr; |
94 | LiveVariables *LV = nullptr; |
95 | MachineDominatorTree *MDT = nullptr; |
96 | const MachineLoopInfo *Loops = nullptr; |
97 | MachineRegisterInfo *MRI = nullptr; |
98 | |
99 | public: |
100 | SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT, |
101 | MachineLoopInfo *Loops) |
102 | : LV(LV), MDT(MDT), Loops(Loops) {} |
103 | bool run(MachineFunction &MF); |
104 | |
105 | MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; |
106 | |
107 | void collectElseRegionBlocks(MachineBasicBlock *Flow, |
108 | MachineBasicBlock *Endif, |
109 | SmallSetVector<MachineBasicBlock *, 16> &) const; |
110 | |
111 | void |
112 | collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, |
113 | MachineBasicBlock *Endif, |
114 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, |
115 | SmallVectorImpl<Register> &CandidateRegs) const; |
116 | |
117 | void collectWaterfallCandidateRegisters( |
118 | MachineBasicBlock *, MachineBasicBlock *LoopEnd, |
119 | SmallSetVector<Register, 16> &CandidateRegs, |
120 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
121 | SmallVectorImpl<MachineInstr *> &Instructions) const; |
122 | |
123 | void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, |
124 | SmallVectorImpl<MachineInstr *> &Uses) const; |
125 | |
126 | void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, |
127 | MachineBasicBlock *Flow) const; |
128 | |
129 | void updateLiveRangeInElseRegion( |
130 | Register Reg, Register NewReg, MachineBasicBlock *Flow, |
131 | MachineBasicBlock *Endif, |
132 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; |
133 | |
134 | void |
135 | optimizeLiveRange(Register Reg, MachineBasicBlock *If, |
136 | MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
137 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; |
138 | |
139 | void optimizeWaterfallLiveRange( |
140 | Register Reg, MachineBasicBlock *, |
141 | SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, |
142 | SmallVectorImpl<MachineInstr *> &Instructions) const; |
143 | }; |
144 | |
145 | class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass { |
146 | public: |
147 | static char ID; |
148 | |
149 | SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {} |
150 | |
151 | bool runOnMachineFunction(MachineFunction &MF) override; |
152 | |
153 | StringRef getPassName() const override { |
154 | return "SI Optimize VGPR LiveRange" ; |
155 | } |
156 | |
157 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
158 | AU.setPreservesCFG(); |
159 | AU.addRequired<LiveVariablesWrapperPass>(); |
160 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
161 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
162 | AU.addPreserved<LiveVariablesWrapperPass>(); |
163 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
164 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
165 | MachineFunctionPass::getAnalysisUsage(AU); |
166 | } |
167 | |
168 | MachineFunctionProperties getRequiredProperties() const override { |
169 | return MachineFunctionProperties().setIsSSA(); |
170 | } |
171 | |
172 | MachineFunctionProperties getClearedProperties() const override { |
173 | return MachineFunctionProperties().setNoPHIs(); |
174 | } |
175 | }; |
176 | |
177 | } // end anonymous namespace |
178 | |
179 | // Check whether the MBB is a else flow block and get the branching target which |
180 | // is the Endif block |
181 | MachineBasicBlock * |
182 | SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { |
183 | for (auto &BR : MBB->terminators()) { |
184 | if (BR.getOpcode() == AMDGPU::SI_ELSE) |
185 | return BR.getOperand(i: 2).getMBB(); |
186 | } |
187 | return nullptr; |
188 | } |
189 | |
190 | void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( |
191 | MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
192 | SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { |
193 | assert(Flow != Endif); |
194 | |
195 | MachineBasicBlock *MBB = Endif; |
196 | unsigned Cur = 0; |
197 | while (MBB) { |
198 | for (auto *Pred : MBB->predecessors()) { |
199 | if (Pred != Flow) |
200 | Blocks.insert(X: Pred); |
201 | } |
202 | |
203 | if (Cur < Blocks.size()) |
204 | MBB = Blocks[Cur++]; |
205 | else |
206 | MBB = nullptr; |
207 | } |
208 | |
209 | LLVM_DEBUG({ |
210 | dbgs() << "Found Else blocks: " ; |
211 | for (auto *MBB : Blocks) |
212 | dbgs() << printMBBReference(*MBB) << ' '; |
213 | dbgs() << '\n'; |
214 | }); |
215 | } |
216 | |
217 | /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. |
218 | void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( |
219 | Register Reg, MachineBasicBlock *MBB, |
220 | SmallVectorImpl<MachineInstr *> &Uses) const { |
221 | for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { |
222 | if (UseMI.getParent() == MBB && !UseMI.isPHI()) |
223 | Uses.push_back(Elt: &UseMI); |
224 | } |
225 | } |
226 | |
227 | /// Collect the killed registers in the ELSE region which are not alive through |
228 | /// the whole THEN region. |
229 | void SIOptimizeVGPRLiveRange::collectCandidateRegisters( |
230 | MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
231 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, |
232 | SmallVectorImpl<Register> &CandidateRegs) const { |
233 | |
234 | SmallSet<Register, 8> KillsInElse; |
235 | |
236 | for (auto *Else : ElseBlocks) { |
237 | for (auto &MI : Else->instrs()) { |
238 | if (MI.isDebugInstr()) |
239 | continue; |
240 | |
241 | for (auto &MO : MI.operands()) { |
242 | if (!MO.isReg() || !MO.getReg() || MO.isDef()) |
243 | continue; |
244 | |
245 | Register MOReg = MO.getReg(); |
246 | // We can only optimize AGPR/VGPR virtual register |
247 | if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg)) |
248 | continue; |
249 | |
250 | if (MO.readsReg()) { |
251 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg); |
252 | const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent(); |
253 | // Make sure two conditions are met: |
254 | // a.) the value is defined before/in the IF block |
255 | // b.) should be defined in the same loop-level. |
256 | if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) && |
257 | Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If)) { |
258 | // Check if the register is live into the endif block. If not, |
259 | // consider it killed in the else region. |
260 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg); |
261 | if (!VI.isLiveIn(MBB: *Endif, Reg: MOReg, MRI&: *MRI)) { |
262 | KillsInElse.insert(V: MOReg); |
263 | } else { |
264 | LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) |
265 | << " as Live in Endif\n" ); |
266 | } |
267 | } |
268 | } |
269 | } |
270 | } |
271 | } |
272 | |
273 | // Check the phis in the Endif, looking for value coming from the ELSE |
274 | // region. Make sure the phi-use is the last use. |
275 | for (auto &MI : Endif->phis()) { |
276 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
277 | auto &MO = MI.getOperand(i: Idx); |
278 | auto *Pred = MI.getOperand(i: Idx + 1).getMBB(); |
279 | if (Pred == Flow) |
280 | continue; |
281 | assert(ElseBlocks.contains(Pred) && "Should be from Else region\n" ); |
282 | |
283 | if (!MO.isReg() || !MO.getReg() || MO.isUndef()) |
284 | continue; |
285 | |
286 | Register Reg = MO.getReg(); |
287 | if (Reg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg)) |
288 | continue; |
289 | |
290 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
291 | |
292 | if (VI.isLiveIn(MBB: *Endif, Reg, MRI&: *MRI)) { |
293 | LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) |
294 | << " as Live in Endif\n" ); |
295 | continue; |
296 | } |
297 | // Make sure two conditions are met: |
298 | // a.) the value is defined before/in the IF block |
299 | // b.) should be defined in the same loop-level. |
300 | const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); |
301 | if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) && |
302 | Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If)) |
303 | KillsInElse.insert(V: Reg); |
304 | } |
305 | } |
306 | |
307 | auto IsLiveThroughThen = [&](Register Reg) { |
308 | for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E; |
309 | ++I) { |
310 | if (!I->readsReg()) |
311 | continue; |
312 | auto *UseMI = I->getParent(); |
313 | auto *UseMBB = UseMI->getParent(); |
314 | if (UseMBB == Flow || UseMBB == Endif) { |
315 | if (!UseMI->isPHI()) |
316 | return true; |
317 | |
318 | auto *IncomingMBB = UseMI->getOperand(i: I.getOperandNo() + 1).getMBB(); |
319 | // The register is live through the path If->Flow or Flow->Endif. |
320 | // we should not optimize for such cases. |
321 | if ((UseMBB == Flow && IncomingMBB != If) || |
322 | (UseMBB == Endif && IncomingMBB == Flow)) |
323 | return true; |
324 | } |
325 | } |
326 | return false; |
327 | }; |
328 | |
329 | for (auto Reg : KillsInElse) { |
330 | if (!IsLiveThroughThen(Reg)) |
331 | CandidateRegs.push_back(Elt: Reg); |
332 | } |
333 | } |
334 | |
335 | /// Collect the registers used in the waterfall loop block that are defined |
336 | /// before. |
337 | void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( |
338 | MachineBasicBlock *, MachineBasicBlock *LoopEnd, |
339 | SmallSetVector<Register, 16> &CandidateRegs, |
340 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
341 | SmallVectorImpl<MachineInstr *> &Instructions) const { |
342 | |
343 | // Collect loop instructions, potentially spanning multiple blocks |
344 | auto *MBB = LoopHeader; |
345 | for (;;) { |
346 | Blocks.insert(X: MBB); |
347 | for (auto &MI : *MBB) { |
348 | if (MI.isDebugInstr()) |
349 | continue; |
350 | Instructions.push_back(Elt: &MI); |
351 | } |
352 | if (MBB == LoopEnd) |
353 | break; |
354 | |
355 | if ((MBB != LoopHeader && MBB->pred_size() != 1) || |
356 | (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { |
357 | LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n" ); |
358 | return; |
359 | } |
360 | |
361 | MBB = *MBB->succ_begin(); |
362 | } |
363 | |
364 | for (auto *I : Instructions) { |
365 | auto &MI = *I; |
366 | |
367 | for (auto &MO : MI.all_uses()) { |
368 | if (!MO.getReg()) |
369 | continue; |
370 | |
371 | Register MOReg = MO.getReg(); |
372 | // We can only optimize AGPR/VGPR virtual register |
373 | if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg)) |
374 | continue; |
375 | |
376 | if (MO.readsReg()) { |
377 | MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent(); |
378 | // Make sure the value is defined before the LOOP block |
379 | if (!Blocks.contains(key: DefMBB) && !CandidateRegs.contains(key: MOReg)) { |
380 | // If the variable is used after the loop, the register coalescer will |
381 | // merge the newly created register and remove the phi node again. |
382 | // Just do nothing in that case. |
383 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg: MOReg); |
384 | bool IsUsed = false; |
385 | for (auto *Succ : LoopEnd->successors()) { |
386 | if (!Blocks.contains(key: Succ) && |
387 | OldVarInfo.isLiveIn(MBB: *Succ, Reg: MOReg, MRI&: *MRI)) { |
388 | IsUsed = true; |
389 | break; |
390 | } |
391 | } |
392 | if (!IsUsed) { |
393 | LLVM_DEBUG(dbgs() << "Found candidate reg: " |
394 | << printReg(MOReg, TRI, 0, MRI) << '\n'); |
395 | CandidateRegs.insert(X: MOReg); |
396 | } else { |
397 | LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " |
398 | << printReg(MOReg, TRI, 0, MRI) << '\n'); |
399 | } |
400 | } |
401 | } |
402 | } |
403 | } |
404 | } |
405 | |
406 | // Re-calculate the liveness of \p Reg in the THEN-region |
407 | void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( |
408 | Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { |
409 | SetVector<MachineBasicBlock *> Blocks; |
410 | SmallVector<MachineBasicBlock *> WorkList({If}); |
411 | |
412 | // Collect all successors until we see the flow block, where we should |
413 | // reconverge. |
414 | while (!WorkList.empty()) { |
415 | auto *MBB = WorkList.pop_back_val(); |
416 | for (auto *Succ : MBB->successors()) { |
417 | if (Succ != Flow && Blocks.insert(X: Succ)) |
418 | WorkList.push_back(Elt: Succ); |
419 | } |
420 | } |
421 | |
422 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
423 | for (MachineBasicBlock *MBB : Blocks) { |
424 | // Clear Live bit, as we will recalculate afterwards |
425 | LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) |
426 | << '\n'); |
427 | OldVarInfo.AliveBlocks.reset(Idx: MBB->getNumber()); |
428 | } |
429 | |
430 | SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; |
431 | |
432 | // Get the blocks the Reg should be alive through |
433 | for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E; |
434 | ++I) { |
435 | auto *UseMI = I->getParent(); |
436 | if (UseMI->isPHI() && I->readsReg()) { |
437 | if (Blocks.contains(key: UseMI->getParent())) |
438 | PHIIncoming.insert(Ptr: UseMI->getOperand(i: I.getOperandNo() + 1).getMBB()); |
439 | } |
440 | } |
441 | |
442 | for (MachineBasicBlock *MBB : Blocks) { |
443 | SmallVector<MachineInstr *> Uses; |
444 | // PHI instructions has been processed before. |
445 | findNonPHIUsesInBlock(Reg, MBB, Uses); |
446 | |
447 | if (Uses.size() == 1) { |
448 | LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " |
449 | << printMBBReference(*MBB) << '\n'); |
450 | LV->HandleVirtRegUse(reg: Reg, MBB, MI&: *(*Uses.begin())); |
451 | } else if (Uses.size() > 1) { |
452 | // Process the instructions in-order |
453 | LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " |
454 | << printMBBReference(*MBB) << '\n'); |
455 | for (MachineInstr &MI : *MBB) { |
456 | if (llvm::is_contained(Range&: Uses, Element: &MI)) |
457 | LV->HandleVirtRegUse(reg: Reg, MBB, MI); |
458 | } |
459 | } |
460 | |
461 | // Mark Reg alive through the block if this is a PHI incoming block |
462 | if (PHIIncoming.contains(Ptr: MBB)) |
463 | LV->MarkVirtRegAliveInBlock(VRInfo&: OldVarInfo, DefBlock: MRI->getVRegDef(Reg)->getParent(), |
464 | BB: MBB); |
465 | } |
466 | |
467 | // Set the isKilled flag if we get new Kills in the THEN region. |
468 | for (auto *MI : OldVarInfo.Kills) { |
469 | if (Blocks.contains(key: MI->getParent())) |
470 | MI->addRegisterKilled(IncomingReg: Reg, RegInfo: TRI); |
471 | } |
472 | } |
473 | |
474 | void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( |
475 | Register Reg, Register NewReg, MachineBasicBlock *Flow, |
476 | MachineBasicBlock *Endif, |
477 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { |
478 | LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg); |
479 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
480 | |
481 | // Transfer aliveBlocks from Reg to NewReg |
482 | for (auto *MBB : ElseBlocks) { |
483 | unsigned BBNum = MBB->getNumber(); |
484 | if (OldVarInfo.AliveBlocks.test(Idx: BBNum)) { |
485 | NewVarInfo.AliveBlocks.set(BBNum); |
486 | LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) |
487 | << '\n'); |
488 | OldVarInfo.AliveBlocks.reset(Idx: BBNum); |
489 | } |
490 | } |
491 | |
492 | // Transfer the possible Kills in ElseBlocks from Reg to NewReg |
493 | auto I = OldVarInfo.Kills.begin(); |
494 | while (I != OldVarInfo.Kills.end()) { |
495 | if (ElseBlocks.contains(key: (*I)->getParent())) { |
496 | NewVarInfo.Kills.push_back(x: *I); |
497 | I = OldVarInfo.Kills.erase(position: I); |
498 | } else { |
499 | ++I; |
500 | } |
501 | } |
502 | } |
503 | |
504 | void SIOptimizeVGPRLiveRange::optimizeLiveRange( |
505 | Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, |
506 | MachineBasicBlock *Endif, |
507 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { |
508 | // Insert a new PHI, marking the value from the THEN region being |
509 | // undef. |
510 | LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); |
511 | const auto *RC = MRI->getRegClass(Reg); |
512 | Register NewReg = MRI->createVirtualRegister(RegClass: RC); |
513 | Register UndefReg = MRI->createVirtualRegister(RegClass: RC); |
514 | MachineInstrBuilder PHI = BuildMI(BB&: *Flow, I: Flow->getFirstNonPHI(), MIMD: DebugLoc(), |
515 | MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg); |
516 | for (auto *Pred : Flow->predecessors()) { |
517 | if (Pred == If) |
518 | PHI.addReg(RegNo: Reg).addMBB(MBB: Pred); |
519 | else |
520 | PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred); |
521 | } |
522 | |
523 | // Replace all uses in the ELSE region or the PHIs in ENDIF block |
524 | // Use early increment range because setReg() will update the linked list. |
525 | for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) { |
526 | auto *UseMI = O.getParent(); |
527 | auto *UseBlock = UseMI->getParent(); |
528 | // Replace uses in Endif block |
529 | if (UseBlock == Endif) { |
530 | if (UseMI->isPHI()) |
531 | O.setReg(NewReg); |
532 | else if (UseMI->isDebugInstr()) |
533 | continue; |
534 | else { |
535 | // DetectDeadLanes may mark register uses as undef without removing |
536 | // them, in which case a non-phi instruction using the original register |
537 | // may exist in the Endif block even though the register is not live |
538 | // into it. |
539 | assert(!O.readsReg()); |
540 | } |
541 | continue; |
542 | } |
543 | |
544 | // Replace uses in Else region |
545 | if (ElseBlocks.contains(key: UseBlock)) |
546 | O.setReg(NewReg); |
547 | } |
548 | |
549 | // The optimized Reg is not alive through Flow blocks anymore. |
550 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
551 | OldVarInfo.AliveBlocks.reset(Idx: Flow->getNumber()); |
552 | |
553 | updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); |
554 | updateLiveRangeInThenRegion(Reg, If, Flow); |
555 | } |
556 | |
557 | void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( |
558 | Register Reg, MachineBasicBlock *, |
559 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
560 | SmallVectorImpl<MachineInstr *> &Instructions) const { |
561 | // Insert a new PHI, marking the value from the last loop iteration undef. |
562 | LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); |
563 | const auto *RC = MRI->getRegClass(Reg); |
564 | Register NewReg = MRI->createVirtualRegister(RegClass: RC); |
565 | Register UndefReg = MRI->createVirtualRegister(RegClass: RC); |
566 | |
567 | // Replace all uses in the LOOP region |
568 | // Use early increment range because setReg() will update the linked list. |
569 | for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) { |
570 | auto *UseMI = O.getParent(); |
571 | auto *UseBlock = UseMI->getParent(); |
572 | // Replace uses in Loop blocks |
573 | if (Blocks.contains(key: UseBlock)) |
574 | O.setReg(NewReg); |
575 | } |
576 | |
577 | MachineInstrBuilder PHI = |
578 | BuildMI(BB&: *LoopHeader, I: LoopHeader->getFirstNonPHI(), MIMD: DebugLoc(), |
579 | MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg); |
580 | for (auto *Pred : LoopHeader->predecessors()) { |
581 | if (Blocks.contains(key: Pred)) |
582 | PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred); |
583 | else |
584 | PHI.addReg(RegNo: Reg).addMBB(MBB: Pred); |
585 | } |
586 | |
587 | LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg); |
588 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
589 | |
590 | // Find last use and mark as kill |
591 | MachineInstr *Kill = nullptr; |
592 | for (auto *MI : reverse(C&: Instructions)) { |
593 | if (MI->readsRegister(Reg: NewReg, TRI)) { |
594 | MI->addRegisterKilled(IncomingReg: NewReg, RegInfo: TRI); |
595 | NewVarInfo.Kills.push_back(x: MI); |
596 | Kill = MI; |
597 | break; |
598 | } |
599 | } |
600 | assert(Kill && "Failed to find last usage of register in loop" ); |
601 | |
602 | MachineBasicBlock *KillBlock = Kill->getParent(); |
603 | bool PostKillBlock = false; |
604 | for (auto *Block : Blocks) { |
605 | auto BBNum = Block->getNumber(); |
606 | |
607 | // collectWaterfallCandidateRegisters only collects registers that are dead |
608 | // after the loop. So we know that the old reg is no longer live throughout |
609 | // the waterfall loop. |
610 | OldVarInfo.AliveBlocks.reset(Idx: BBNum); |
611 | |
612 | // The new register is live up to (and including) the block that kills it. |
613 | PostKillBlock |= (Block == KillBlock); |
614 | if (PostKillBlock) { |
615 | NewVarInfo.AliveBlocks.reset(Idx: BBNum); |
616 | } else if (Block != LoopHeader) { |
617 | NewVarInfo.AliveBlocks.set(BBNum); |
618 | } |
619 | } |
620 | } |
621 | |
622 | char SIOptimizeVGPRLiveRangeLegacy::ID = 0; |
623 | |
624 | INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE, |
625 | "SI Optimize VGPR LiveRange" , false, false) |
626 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
627 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
628 | INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) |
629 | INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE, |
630 | "SI Optimize VGPR LiveRange" , false, false) |
631 | |
632 | char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID; |
633 | |
634 | FunctionPass *llvm::createSIOptimizeVGPRLiveRangeLegacyPass() { |
635 | return new SIOptimizeVGPRLiveRangeLegacy(); |
636 | } |
637 | |
638 | bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) { |
639 | if (skipFunction(F: MF.getFunction())) |
640 | return false; |
641 | |
642 | LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV(); |
643 | MachineDominatorTree *MDT = |
644 | &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
645 | MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
646 | return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF); |
647 | } |
648 | |
649 | PreservedAnalyses |
650 | SIOptimizeVGPRLiveRangePass::run(MachineFunction &MF, |
651 | MachineFunctionAnalysisManager &MFAM) { |
652 | MFPropsModifier _(*this, MF); |
653 | LiveVariables *LV = &MFAM.getResult<LiveVariablesAnalysis>(IR&: MF); |
654 | MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
655 | MachineLoopInfo *Loops = &MFAM.getResult<MachineLoopAnalysis>(IR&: MF); |
656 | |
657 | bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF); |
658 | if (!Changed) |
659 | return PreservedAnalyses::all(); |
660 | |
661 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
662 | PA.preserve<LiveVariablesAnalysis>(); |
663 | PA.preserve<DominatorTreeAnalysis>(); |
664 | PA.preserve<MachineLoopAnalysis>(); |
665 | PA.preserveSet<CFGAnalyses>(); |
666 | return PA; |
667 | } |
668 | |
669 | bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) { |
670 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
671 | TII = ST.getInstrInfo(); |
672 | TRI = &TII->getRegisterInfo(); |
673 | MRI = &MF.getRegInfo(); |
674 | |
675 | bool MadeChange = false; |
676 | |
677 | // TODO: we need to think about the order of visiting the blocks to get |
678 | // optimal result for nesting if-else cases. |
679 | for (MachineBasicBlock &MBB : MF) { |
680 | for (auto &MI : MBB.terminators()) { |
681 | // Detect the if-else blocks |
682 | if (MI.getOpcode() == AMDGPU::SI_IF) { |
683 | MachineBasicBlock *IfTarget = MI.getOperand(i: 2).getMBB(); |
684 | auto *Endif = getElseTarget(MBB: IfTarget); |
685 | if (!Endif) |
686 | continue; |
687 | |
688 | // Skip unexpected control flow. |
689 | if (!MDT->dominates(A: &MBB, B: IfTarget) || !MDT->dominates(A: IfTarget, B: Endif)) |
690 | continue; |
691 | |
692 | SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; |
693 | SmallVector<Register> CandidateRegs; |
694 | |
695 | LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " |
696 | << printMBBReference(MBB) << ' ' |
697 | << printMBBReference(*IfTarget) << ' ' |
698 | << printMBBReference(*Endif) << '\n'); |
699 | |
700 | // Collect all the blocks in the ELSE region |
701 | collectElseRegionBlocks(Flow: IfTarget, Endif, Blocks&: ElseBlocks); |
702 | |
703 | // Collect the registers can be optimized |
704 | collectCandidateRegisters(If: &MBB, Flow: IfTarget, Endif, ElseBlocks, |
705 | CandidateRegs); |
706 | MadeChange |= !CandidateRegs.empty(); |
707 | // Now we are safe to optimize. |
708 | for (auto Reg : CandidateRegs) |
709 | optimizeLiveRange(Reg, If: &MBB, Flow: IfTarget, Endif, ElseBlocks); |
710 | } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { |
711 | auto * = MI.getOperand(i: 0).getMBB(); |
712 | auto *LoopEnd = &MBB; |
713 | |
714 | LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " |
715 | << printMBBReference(*LoopHeader) << '\n'); |
716 | |
717 | SmallSetVector<Register, 16> CandidateRegs; |
718 | SmallVector<MachineInstr *, 16> Instructions; |
719 | SmallSetVector<MachineBasicBlock *, 2> Blocks; |
720 | |
721 | collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, |
722 | Blocks, Instructions); |
723 | MadeChange |= !CandidateRegs.empty(); |
724 | // Now we are safe to optimize. |
725 | for (auto Reg : CandidateRegs) |
726 | optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); |
727 | } |
728 | } |
729 | } |
730 | |
731 | return MadeChange; |
732 | } |
733 | |