1 | //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else |
11 | /// structures and waterfall loops. |
12 | /// |
13 | /// When we do structurization, we usually transform an if-else into two |
14 | /// successive if-then (with a flow block to do predicate inversion). Consider a |
15 | /// simple case after structurization: A divergent value %a was defined before |
16 | /// if-else and used in both THEN (use in THEN is optional) and ELSE part: |
17 | /// bb.if: |
18 | /// %a = ... |
19 | /// ... |
20 | /// bb.then: |
21 | /// ... = op %a |
22 | /// ... // %a can be dead here |
23 | /// bb.flow: |
24 | /// ... |
25 | /// bb.else: |
26 | /// ... = %a |
27 | /// ... |
28 | /// bb.endif |
29 | /// |
30 | /// As register allocator has no idea of the thread-control-flow, it will just |
31 | /// assume %a would be alive in the whole range of bb.then because of a later |
32 | /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect |
33 | /// to exec mask. For this if-else case, the lanes active in bb.then will be |
34 | /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead |
35 | /// after the last use in bb.then until the end of the block. The reason is |
36 | /// the instructions in bb.then will only overwrite lanes that will never be |
37 | /// accessed in bb.else. |
38 | /// |
39 | /// This pass aims to tell register allocator that %a is in-fact dead, |
40 | /// through inserting a phi-node in bb.flow saying that %a is undef when coming |
41 | /// from bb.then, and then replace the uses in the bb.else with the result of |
42 | /// newly inserted phi. |
43 | /// |
44 | /// Two key conditions must be met to ensure correctness: |
45 | /// 1.) The def-point should be in the same loop-level as if-else-endif to make |
46 | /// sure the second loop iteration still get correct data. |
47 | /// 2.) There should be no further uses after the IF-ELSE region. |
48 | /// |
49 | /// |
50 | /// Waterfall loops get inserted around instructions that use divergent values |
51 | /// but can only be executed with a uniform value. For example an indirect call |
52 | /// to a divergent address: |
53 | /// bb.start: |
54 | /// %a = ... |
55 | /// %fun = ... |
56 | /// ... |
57 | /// bb.loop: |
58 | /// call %fun (%a) |
59 | /// ... // %a can be dead here |
60 | /// loop %bb.loop |
61 | /// |
62 | /// The loop block is executed multiple times, but it is run exactly once for |
63 | /// each active lane. Similar to the if-else case, the register allocator |
64 | /// assumes that %a is live throughout the loop as it is used again in the next |
65 | /// iteration. If %a is a VGPR that is unused after the loop, it does not need |
66 | /// to be live after its last use in the loop block. By inserting a phi-node at |
67 | /// the start of bb.loop that is undef when coming from bb.loop, the register |
68 | /// allocation knows that the value of %a does not need to be preserved through |
69 | /// iterations of the loop. |
70 | /// |
71 | // |
72 | //===----------------------------------------------------------------------===// |
73 | |
74 | #include "AMDGPU.h" |
75 | #include "GCNSubtarget.h" |
76 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
77 | #include "SIMachineFunctionInfo.h" |
78 | #include "llvm/CodeGen/LiveVariables.h" |
79 | #include "llvm/CodeGen/MachineDominators.h" |
80 | #include "llvm/CodeGen/MachineLoopInfo.h" |
81 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
82 | #include "llvm/InitializePasses.h" |
83 | |
84 | using namespace llvm; |
85 | |
86 | #define DEBUG_TYPE "si-opt-vgpr-liverange" |
87 | |
88 | namespace { |
89 | |
90 | class SIOptimizeVGPRLiveRange : public MachineFunctionPass { |
91 | private: |
92 | const SIRegisterInfo *TRI = nullptr; |
93 | const SIInstrInfo *TII = nullptr; |
94 | LiveVariables *LV = nullptr; |
95 | MachineDominatorTree *MDT = nullptr; |
96 | const MachineLoopInfo *Loops = nullptr; |
97 | MachineRegisterInfo *MRI = nullptr; |
98 | |
99 | public: |
100 | static char ID; |
101 | |
102 | MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; |
103 | |
104 | void collectElseRegionBlocks(MachineBasicBlock *Flow, |
105 | MachineBasicBlock *Endif, |
106 | SmallSetVector<MachineBasicBlock *, 16> &) const; |
107 | |
108 | void |
109 | collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, |
110 | MachineBasicBlock *Endif, |
111 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, |
112 | SmallVectorImpl<Register> &CandidateRegs) const; |
113 | |
114 | void collectWaterfallCandidateRegisters( |
115 | MachineBasicBlock *, MachineBasicBlock *LoopEnd, |
116 | SmallSetVector<Register, 16> &CandidateRegs, |
117 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
118 | SmallVectorImpl<MachineInstr *> &Instructions) const; |
119 | |
120 | void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, |
121 | SmallVectorImpl<MachineInstr *> &Uses) const; |
122 | |
123 | void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, |
124 | MachineBasicBlock *Flow) const; |
125 | |
126 | void updateLiveRangeInElseRegion( |
127 | Register Reg, Register NewReg, MachineBasicBlock *Flow, |
128 | MachineBasicBlock *Endif, |
129 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; |
130 | |
131 | void |
132 | optimizeLiveRange(Register Reg, MachineBasicBlock *If, |
133 | MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
134 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; |
135 | |
136 | void optimizeWaterfallLiveRange( |
137 | Register Reg, MachineBasicBlock *, |
138 | SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, |
139 | SmallVectorImpl<MachineInstr *> &Instructions) const; |
140 | |
141 | SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} |
142 | |
143 | bool runOnMachineFunction(MachineFunction &MF) override; |
144 | |
145 | StringRef getPassName() const override { |
146 | return "SI Optimize VGPR LiveRange" ; |
147 | } |
148 | |
149 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
150 | AU.addRequired<LiveVariablesWrapperPass>(); |
151 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
152 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
153 | AU.addPreserved<LiveVariablesWrapperPass>(); |
154 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
155 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
156 | MachineFunctionPass::getAnalysisUsage(AU); |
157 | } |
158 | |
159 | MachineFunctionProperties getRequiredProperties() const override { |
160 | return MachineFunctionProperties().set( |
161 | MachineFunctionProperties::Property::IsSSA); |
162 | } |
163 | |
164 | MachineFunctionProperties getClearedProperties() const override { |
165 | return MachineFunctionProperties().set( |
166 | MachineFunctionProperties::Property::NoPHIs); |
167 | } |
168 | }; |
169 | |
170 | } // end anonymous namespace |
171 | |
172 | // Check whether the MBB is a else flow block and get the branching target which |
173 | // is the Endif block |
174 | MachineBasicBlock * |
175 | SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { |
176 | for (auto &BR : MBB->terminators()) { |
177 | if (BR.getOpcode() == AMDGPU::SI_ELSE) |
178 | return BR.getOperand(i: 2).getMBB(); |
179 | } |
180 | return nullptr; |
181 | } |
182 | |
183 | void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( |
184 | MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
185 | SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { |
186 | assert(Flow != Endif); |
187 | |
188 | MachineBasicBlock *MBB = Endif; |
189 | unsigned Cur = 0; |
190 | while (MBB) { |
191 | for (auto *Pred : MBB->predecessors()) { |
192 | if (Pred != Flow && !Blocks.contains(key: Pred)) |
193 | Blocks.insert(X: Pred); |
194 | } |
195 | |
196 | if (Cur < Blocks.size()) |
197 | MBB = Blocks[Cur++]; |
198 | else |
199 | MBB = nullptr; |
200 | } |
201 | |
202 | LLVM_DEBUG({ |
203 | dbgs() << "Found Else blocks: " ; |
204 | for (auto *MBB : Blocks) |
205 | dbgs() << printMBBReference(*MBB) << ' '; |
206 | dbgs() << '\n'; |
207 | }); |
208 | } |
209 | |
210 | /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. |
211 | void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( |
212 | Register Reg, MachineBasicBlock *MBB, |
213 | SmallVectorImpl<MachineInstr *> &Uses) const { |
214 | for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { |
215 | if (UseMI.getParent() == MBB && !UseMI.isPHI()) |
216 | Uses.push_back(Elt: &UseMI); |
217 | } |
218 | } |
219 | |
220 | /// Collect the killed registers in the ELSE region which are not alive through |
221 | /// the whole THEN region. |
222 | void SIOptimizeVGPRLiveRange::collectCandidateRegisters( |
223 | MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, |
224 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, |
225 | SmallVectorImpl<Register> &CandidateRegs) const { |
226 | |
227 | SmallSet<Register, 8> KillsInElse; |
228 | |
229 | for (auto *Else : ElseBlocks) { |
230 | for (auto &MI : Else->instrs()) { |
231 | if (MI.isDebugInstr()) |
232 | continue; |
233 | |
234 | for (auto &MO : MI.operands()) { |
235 | if (!MO.isReg() || !MO.getReg() || MO.isDef()) |
236 | continue; |
237 | |
238 | Register MOReg = MO.getReg(); |
239 | // We can only optimize AGPR/VGPR virtual register |
240 | if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg)) |
241 | continue; |
242 | |
243 | if (MO.readsReg()) { |
244 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg); |
245 | const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent(); |
246 | // Make sure two conditions are met: |
247 | // a.) the value is defined before/in the IF block |
248 | // b.) should be defined in the same loop-level. |
249 | if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) && |
250 | Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If)) { |
251 | // Check if the register is live into the endif block. If not, |
252 | // consider it killed in the else region. |
253 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg: MOReg); |
254 | if (!VI.isLiveIn(MBB: *Endif, Reg: MOReg, MRI&: *MRI)) { |
255 | KillsInElse.insert(V: MOReg); |
256 | } else { |
257 | LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) |
258 | << " as Live in Endif\n" ); |
259 | } |
260 | } |
261 | } |
262 | } |
263 | } |
264 | } |
265 | |
266 | // Check the phis in the Endif, looking for value coming from the ELSE |
267 | // region. Make sure the phi-use is the last use. |
268 | for (auto &MI : Endif->phis()) { |
269 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
270 | auto &MO = MI.getOperand(i: Idx); |
271 | auto *Pred = MI.getOperand(i: Idx + 1).getMBB(); |
272 | if (Pred == Flow) |
273 | continue; |
274 | assert(ElseBlocks.contains(Pred) && "Should be from Else region\n" ); |
275 | |
276 | if (!MO.isReg() || !MO.getReg() || MO.isUndef()) |
277 | continue; |
278 | |
279 | Register Reg = MO.getReg(); |
280 | if (Reg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg)) |
281 | continue; |
282 | |
283 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
284 | |
285 | if (VI.isLiveIn(MBB: *Endif, Reg, MRI&: *MRI)) { |
286 | LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) |
287 | << " as Live in Endif\n" ); |
288 | continue; |
289 | } |
290 | // Make sure two conditions are met: |
291 | // a.) the value is defined before/in the IF block |
292 | // b.) should be defined in the same loop-level. |
293 | const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); |
294 | if ((VI.AliveBlocks.test(Idx: If->getNumber()) || DefMBB == If) && |
295 | Loops->getLoopFor(BB: DefMBB) == Loops->getLoopFor(BB: If)) |
296 | KillsInElse.insert(V: Reg); |
297 | } |
298 | } |
299 | |
300 | auto IsLiveThroughThen = [&](Register Reg) { |
301 | for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E; |
302 | ++I) { |
303 | if (!I->readsReg()) |
304 | continue; |
305 | auto *UseMI = I->getParent(); |
306 | auto *UseMBB = UseMI->getParent(); |
307 | if (UseMBB == Flow || UseMBB == Endif) { |
308 | if (!UseMI->isPHI()) |
309 | return true; |
310 | |
311 | auto *IncomingMBB = UseMI->getOperand(i: I.getOperandNo() + 1).getMBB(); |
312 | // The register is live through the path If->Flow or Flow->Endif. |
313 | // we should not optimize for such cases. |
314 | if ((UseMBB == Flow && IncomingMBB != If) || |
315 | (UseMBB == Endif && IncomingMBB == Flow)) |
316 | return true; |
317 | } |
318 | } |
319 | return false; |
320 | }; |
321 | |
322 | for (auto Reg : KillsInElse) { |
323 | if (!IsLiveThroughThen(Reg)) |
324 | CandidateRegs.push_back(Elt: Reg); |
325 | } |
326 | } |
327 | |
328 | /// Collect the registers used in the waterfall loop block that are defined |
329 | /// before. |
330 | void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( |
331 | MachineBasicBlock *, MachineBasicBlock *LoopEnd, |
332 | SmallSetVector<Register, 16> &CandidateRegs, |
333 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
334 | SmallVectorImpl<MachineInstr *> &Instructions) const { |
335 | |
336 | // Collect loop instructions, potentially spanning multiple blocks |
337 | auto *MBB = LoopHeader; |
338 | for (;;) { |
339 | Blocks.insert(X: MBB); |
340 | for (auto &MI : *MBB) { |
341 | if (MI.isDebugInstr()) |
342 | continue; |
343 | Instructions.push_back(Elt: &MI); |
344 | } |
345 | if (MBB == LoopEnd) |
346 | break; |
347 | |
348 | if ((MBB != LoopHeader && MBB->pred_size() != 1) || |
349 | (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { |
350 | LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n" ); |
351 | return; |
352 | } |
353 | |
354 | MBB = *MBB->succ_begin(); |
355 | } |
356 | |
357 | for (auto *I : Instructions) { |
358 | auto &MI = *I; |
359 | |
360 | for (auto &MO : MI.all_uses()) { |
361 | if (!MO.getReg()) |
362 | continue; |
363 | |
364 | Register MOReg = MO.getReg(); |
365 | // We can only optimize AGPR/VGPR virtual register |
366 | if (MOReg.isPhysical() || !TRI->isVectorRegister(MRI: *MRI, Reg: MOReg)) |
367 | continue; |
368 | |
369 | if (MO.readsReg()) { |
370 | MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg: MOReg)->getParent(); |
371 | // Make sure the value is defined before the LOOP block |
372 | if (!Blocks.contains(key: DefMBB) && !CandidateRegs.contains(key: MOReg)) { |
373 | // If the variable is used after the loop, the register coalescer will |
374 | // merge the newly created register and remove the phi node again. |
375 | // Just do nothing in that case. |
376 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg: MOReg); |
377 | bool IsUsed = false; |
378 | for (auto *Succ : LoopEnd->successors()) { |
379 | if (!Blocks.contains(key: Succ) && |
380 | OldVarInfo.isLiveIn(MBB: *Succ, Reg: MOReg, MRI&: *MRI)) { |
381 | IsUsed = true; |
382 | break; |
383 | } |
384 | } |
385 | if (!IsUsed) { |
386 | LLVM_DEBUG(dbgs() << "Found candidate reg: " |
387 | << printReg(MOReg, TRI, 0, MRI) << '\n'); |
388 | CandidateRegs.insert(X: MOReg); |
389 | } else { |
390 | LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " |
391 | << printReg(MOReg, TRI, 0, MRI) << '\n'); |
392 | } |
393 | } |
394 | } |
395 | } |
396 | } |
397 | } |
398 | |
399 | // Re-calculate the liveness of \p Reg in the THEN-region |
400 | void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( |
401 | Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { |
402 | SetVector<MachineBasicBlock *> Blocks; |
403 | SmallVector<MachineBasicBlock *> WorkList({If}); |
404 | |
405 | // Collect all successors until we see the flow block, where we should |
406 | // reconverge. |
407 | while (!WorkList.empty()) { |
408 | auto *MBB = WorkList.pop_back_val(); |
409 | for (auto *Succ : MBB->successors()) { |
410 | if (Succ != Flow && !Blocks.contains(key: Succ)) { |
411 | WorkList.push_back(Elt: Succ); |
412 | Blocks.insert(X: Succ); |
413 | } |
414 | } |
415 | } |
416 | |
417 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
418 | for (MachineBasicBlock *MBB : Blocks) { |
419 | // Clear Live bit, as we will recalculate afterwards |
420 | LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) |
421 | << '\n'); |
422 | OldVarInfo.AliveBlocks.reset(Idx: MBB->getNumber()); |
423 | } |
424 | |
425 | SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; |
426 | |
427 | // Get the blocks the Reg should be alive through |
428 | for (auto I = MRI->use_nodbg_begin(RegNo: Reg), E = MRI->use_nodbg_end(); I != E; |
429 | ++I) { |
430 | auto *UseMI = I->getParent(); |
431 | if (UseMI->isPHI() && I->readsReg()) { |
432 | if (Blocks.contains(key: UseMI->getParent())) |
433 | PHIIncoming.insert(Ptr: UseMI->getOperand(i: I.getOperandNo() + 1).getMBB()); |
434 | } |
435 | } |
436 | |
437 | for (MachineBasicBlock *MBB : Blocks) { |
438 | SmallVector<MachineInstr *> Uses; |
439 | // PHI instructions has been processed before. |
440 | findNonPHIUsesInBlock(Reg, MBB, Uses); |
441 | |
442 | if (Uses.size() == 1) { |
443 | LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " |
444 | << printMBBReference(*MBB) << '\n'); |
445 | LV->HandleVirtRegUse(reg: Reg, MBB, MI&: *(*Uses.begin())); |
446 | } else if (Uses.size() > 1) { |
447 | // Process the instructions in-order |
448 | LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " |
449 | << printMBBReference(*MBB) << '\n'); |
450 | for (MachineInstr &MI : *MBB) { |
451 | if (llvm::is_contained(Range&: Uses, Element: &MI)) |
452 | LV->HandleVirtRegUse(reg: Reg, MBB, MI); |
453 | } |
454 | } |
455 | |
456 | // Mark Reg alive through the block if this is a PHI incoming block |
457 | if (PHIIncoming.contains(Ptr: MBB)) |
458 | LV->MarkVirtRegAliveInBlock(VRInfo&: OldVarInfo, DefBlock: MRI->getVRegDef(Reg)->getParent(), |
459 | BB: MBB); |
460 | } |
461 | |
462 | // Set the isKilled flag if we get new Kills in the THEN region. |
463 | for (auto *MI : OldVarInfo.Kills) { |
464 | if (Blocks.contains(key: MI->getParent())) |
465 | MI->addRegisterKilled(IncomingReg: Reg, RegInfo: TRI); |
466 | } |
467 | } |
468 | |
469 | void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( |
470 | Register Reg, Register NewReg, MachineBasicBlock *Flow, |
471 | MachineBasicBlock *Endif, |
472 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { |
473 | LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg); |
474 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
475 | |
476 | // Transfer aliveBlocks from Reg to NewReg |
477 | for (auto *MBB : ElseBlocks) { |
478 | unsigned BBNum = MBB->getNumber(); |
479 | if (OldVarInfo.AliveBlocks.test(Idx: BBNum)) { |
480 | NewVarInfo.AliveBlocks.set(BBNum); |
481 | LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) |
482 | << '\n'); |
483 | OldVarInfo.AliveBlocks.reset(Idx: BBNum); |
484 | } |
485 | } |
486 | |
487 | // Transfer the possible Kills in ElseBlocks from Reg to NewReg |
488 | auto I = OldVarInfo.Kills.begin(); |
489 | while (I != OldVarInfo.Kills.end()) { |
490 | if (ElseBlocks.contains(key: (*I)->getParent())) { |
491 | NewVarInfo.Kills.push_back(x: *I); |
492 | I = OldVarInfo.Kills.erase(position: I); |
493 | } else { |
494 | ++I; |
495 | } |
496 | } |
497 | } |
498 | |
499 | void SIOptimizeVGPRLiveRange::optimizeLiveRange( |
500 | Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, |
501 | MachineBasicBlock *Endif, |
502 | SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { |
503 | // Insert a new PHI, marking the value from the THEN region being |
504 | // undef. |
505 | LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); |
506 | const auto *RC = MRI->getRegClass(Reg); |
507 | Register NewReg = MRI->createVirtualRegister(RegClass: RC); |
508 | Register UndefReg = MRI->createVirtualRegister(RegClass: RC); |
509 | MachineInstrBuilder PHI = BuildMI(BB&: *Flow, I: Flow->getFirstNonPHI(), MIMD: DebugLoc(), |
510 | MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg); |
511 | for (auto *Pred : Flow->predecessors()) { |
512 | if (Pred == If) |
513 | PHI.addReg(RegNo: Reg).addMBB(MBB: Pred); |
514 | else |
515 | PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred); |
516 | } |
517 | |
518 | // Replace all uses in the ELSE region or the PHIs in ENDIF block |
519 | // Use early increment range because setReg() will update the linked list. |
520 | for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) { |
521 | auto *UseMI = O.getParent(); |
522 | auto *UseBlock = UseMI->getParent(); |
523 | // Replace uses in Endif block |
524 | if (UseBlock == Endif) { |
525 | if (UseMI->isPHI()) |
526 | O.setReg(NewReg); |
527 | else if (UseMI->isDebugInstr()) |
528 | continue; |
529 | else { |
530 | // DetectDeadLanes may mark register uses as undef without removing |
531 | // them, in which case a non-phi instruction using the original register |
532 | // may exist in the Endif block even though the register is not live |
533 | // into it. |
534 | assert(!O.readsReg()); |
535 | } |
536 | continue; |
537 | } |
538 | |
539 | // Replace uses in Else region |
540 | if (ElseBlocks.contains(key: UseBlock)) |
541 | O.setReg(NewReg); |
542 | } |
543 | |
544 | // The optimized Reg is not alive through Flow blocks anymore. |
545 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
546 | OldVarInfo.AliveBlocks.reset(Idx: Flow->getNumber()); |
547 | |
548 | updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); |
549 | updateLiveRangeInThenRegion(Reg, If, Flow); |
550 | } |
551 | |
552 | void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( |
553 | Register Reg, MachineBasicBlock *, |
554 | SmallSetVector<MachineBasicBlock *, 2> &Blocks, |
555 | SmallVectorImpl<MachineInstr *> &Instructions) const { |
556 | // Insert a new PHI, marking the value from the last loop iteration undef. |
557 | LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); |
558 | const auto *RC = MRI->getRegClass(Reg); |
559 | Register NewReg = MRI->createVirtualRegister(RegClass: RC); |
560 | Register UndefReg = MRI->createVirtualRegister(RegClass: RC); |
561 | |
562 | // Replace all uses in the LOOP region |
563 | // Use early increment range because setReg() will update the linked list. |
564 | for (auto &O : make_early_inc_range(Range: MRI->use_operands(Reg))) { |
565 | auto *UseMI = O.getParent(); |
566 | auto *UseBlock = UseMI->getParent(); |
567 | // Replace uses in Loop blocks |
568 | if (Blocks.contains(key: UseBlock)) |
569 | O.setReg(NewReg); |
570 | } |
571 | |
572 | MachineInstrBuilder PHI = |
573 | BuildMI(BB&: *LoopHeader, I: LoopHeader->getFirstNonPHI(), MIMD: DebugLoc(), |
574 | MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: NewReg); |
575 | for (auto *Pred : LoopHeader->predecessors()) { |
576 | if (Blocks.contains(key: Pred)) |
577 | PHI.addReg(RegNo: UndefReg, flags: RegState::Undef).addMBB(MBB: Pred); |
578 | else |
579 | PHI.addReg(RegNo: Reg).addMBB(MBB: Pred); |
580 | } |
581 | |
582 | LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(Reg: NewReg); |
583 | LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); |
584 | |
585 | // Find last use and mark as kill |
586 | MachineInstr *Kill = nullptr; |
587 | for (auto *MI : reverse(C&: Instructions)) { |
588 | if (MI->readsRegister(Reg: NewReg, TRI)) { |
589 | MI->addRegisterKilled(IncomingReg: NewReg, RegInfo: TRI); |
590 | NewVarInfo.Kills.push_back(x: MI); |
591 | Kill = MI; |
592 | break; |
593 | } |
594 | } |
595 | assert(Kill && "Failed to find last usage of register in loop" ); |
596 | |
597 | MachineBasicBlock *KillBlock = Kill->getParent(); |
598 | bool PostKillBlock = false; |
599 | for (auto *Block : Blocks) { |
600 | auto BBNum = Block->getNumber(); |
601 | |
602 | // collectWaterfallCandidateRegisters only collects registers that are dead |
603 | // after the loop. So we know that the old reg is no longer live throughout |
604 | // the waterfall loop. |
605 | OldVarInfo.AliveBlocks.reset(Idx: BBNum); |
606 | |
607 | // The new register is live up to (and including) the block that kills it. |
608 | PostKillBlock |= (Block == KillBlock); |
609 | if (PostKillBlock) { |
610 | NewVarInfo.AliveBlocks.reset(Idx: BBNum); |
611 | } else if (Block != LoopHeader) { |
612 | NewVarInfo.AliveBlocks.set(BBNum); |
613 | } |
614 | } |
615 | } |
616 | |
617 | char SIOptimizeVGPRLiveRange::ID = 0; |
618 | |
619 | INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, |
620 | "SI Optimize VGPR LiveRange" , false, false) |
621 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
622 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
623 | INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) |
624 | INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, |
625 | "SI Optimize VGPR LiveRange" , false, false) |
626 | |
627 | char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; |
628 | |
629 | FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { |
630 | return new SIOptimizeVGPRLiveRange(); |
631 | } |
632 | |
633 | bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { |
634 | |
635 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
636 | TII = ST.getInstrInfo(); |
637 | TRI = &TII->getRegisterInfo(); |
638 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
639 | Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
640 | LV = &getAnalysis<LiveVariablesWrapperPass>().getLV(); |
641 | MRI = &MF.getRegInfo(); |
642 | |
643 | if (skipFunction(F: MF.getFunction())) |
644 | return false; |
645 | |
646 | bool MadeChange = false; |
647 | |
648 | // TODO: we need to think about the order of visiting the blocks to get |
649 | // optimal result for nesting if-else cases. |
650 | for (MachineBasicBlock &MBB : MF) { |
651 | for (auto &MI : MBB.terminators()) { |
652 | // Detect the if-else blocks |
653 | if (MI.getOpcode() == AMDGPU::SI_IF) { |
654 | MachineBasicBlock *IfTarget = MI.getOperand(i: 2).getMBB(); |
655 | auto *Endif = getElseTarget(MBB: IfTarget); |
656 | if (!Endif) |
657 | continue; |
658 | |
659 | // Skip unexpected control flow. |
660 | if (!MDT->dominates(A: &MBB, B: IfTarget) || !MDT->dominates(A: IfTarget, B: Endif)) |
661 | continue; |
662 | |
663 | SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; |
664 | SmallVector<Register> CandidateRegs; |
665 | |
666 | LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " |
667 | << printMBBReference(MBB) << ' ' |
668 | << printMBBReference(*IfTarget) << ' ' |
669 | << printMBBReference(*Endif) << '\n'); |
670 | |
671 | // Collect all the blocks in the ELSE region |
672 | collectElseRegionBlocks(Flow: IfTarget, Endif, Blocks&: ElseBlocks); |
673 | |
674 | // Collect the registers can be optimized |
675 | collectCandidateRegisters(If: &MBB, Flow: IfTarget, Endif, ElseBlocks, |
676 | CandidateRegs); |
677 | MadeChange |= !CandidateRegs.empty(); |
678 | // Now we are safe to optimize. |
679 | for (auto Reg : CandidateRegs) |
680 | optimizeLiveRange(Reg, If: &MBB, Flow: IfTarget, Endif, ElseBlocks); |
681 | } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { |
682 | auto * = MI.getOperand(i: 0).getMBB(); |
683 | auto *LoopEnd = &MBB; |
684 | |
685 | LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " |
686 | << printMBBReference(*LoopHeader) << '\n'); |
687 | |
688 | SmallSetVector<Register, 16> CandidateRegs; |
689 | SmallVector<MachineInstr *, 16> Instructions; |
690 | SmallSetVector<MachineBasicBlock *, 2> Blocks; |
691 | |
692 | collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, |
693 | Blocks, Instructions); |
694 | MadeChange |= !CandidateRegs.empty(); |
695 | // Now we are safe to optimize. |
696 | for (auto Reg : CandidateRegs) |
697 | optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); |
698 | } |
699 | } |
700 | } |
701 | |
702 | return MadeChange; |
703 | } |
704 | |