1 | //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file implements the idea to rewrite undef incoming operand for certain |
9 | // PHIs in structurized CFG. This pass only works on IR that has gone through |
10 | // StructurizedCFG pass, and this pass has some additional limitation that make |
11 | // it can only run after SIAnnotateControlFlow. |
12 | // |
13 | // To achieve optimal code generation for AMDGPU, we assume that uniformity |
14 | // analysis reports the PHI in join block of divergent branch as uniform if |
15 | // it has one unique uniform value plus additional undefined/poisoned incoming |
16 | // value. That is to say the later compiler pipeline will ensure such PHI always |
17 | // return uniform value and ensure it work correctly. Let's take a look at two |
18 | // typical patterns in structured CFG that need to be taken care: (In both |
19 | // patterns, block %if terminate with divergent branch.) |
20 | // |
21 | // Pattern A: Block with undefined incoming value dominates defined predecessor |
22 | // %if |
23 | // | \ |
24 | // | %then |
25 | // | / |
26 | // %endif: %phi = phi [%undef, %if], [%uniform, %then] |
27 | // |
28 | // Pattern B: Block with defined incoming value dominates undefined predecessor |
29 | // %if |
30 | // | \ |
31 | // | %then |
32 | // | / |
33 | // %endif: %phi = phi [%uniform, %if], [%undef, %then] |
34 | // |
35 | // For pattern A, by reporting %phi as uniform, the later pipeline need to make |
36 | // sure it be handled correctly. The backend usually allocates a scalar register |
37 | // and if any thread in a wave takes %then path, the scalar register will get |
38 | // the %uniform value. |
39 | // |
40 | // For pattern B, we will replace the undef operand with the other defined value |
41 | // in this pass. So the scalar register allocated for such PHI will get correct |
42 | // liveness. Without this transformation, the scalar register may be overwritten |
43 | // in the %then block. |
44 | // |
45 | // Limitation note: |
46 | // If the join block of divergent threads is a loop header, the pass cannot |
47 | // handle it correctly right now. For below case, the undef in %phi should also |
48 | // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header |
49 | // block to get a separate join block, then we can rewrite the undef correctly. |
50 | // %if |
51 | // | \ |
52 | // | %then |
53 | // | / |
54 | // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] |
55 | // | | |
56 | // \--- |
57 | |
58 | #include "AMDGPU.h" |
59 | #include "llvm/Analysis/UniformityAnalysis.h" |
60 | #include "llvm/IR/BasicBlock.h" |
61 | #include "llvm/IR/Constants.h" |
62 | #include "llvm/IR/Dominators.h" |
63 | #include "llvm/IR/Instructions.h" |
64 | #include "llvm/InitializePasses.h" |
65 | |
66 | using namespace llvm; |
67 | |
68 | #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" |
69 | |
70 | namespace { |
71 | |
72 | class AMDGPURewriteUndefForPHILegacy : public FunctionPass { |
73 | public: |
74 | static char ID; |
75 | AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {} |
76 | bool runOnFunction(Function &F) override; |
77 | StringRef getPassName() const override { |
78 | return "AMDGPU Rewrite Undef for PHI" ; |
79 | } |
80 | |
81 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
82 | AU.addRequired<UniformityInfoWrapperPass>(); |
83 | AU.addRequired<DominatorTreeWrapperPass>(); |
84 | |
85 | AU.addPreserved<DominatorTreeWrapperPass>(); |
86 | AU.setPreservesCFG(); |
87 | } |
88 | }; |
89 | |
90 | } // end anonymous namespace |
91 | char AMDGPURewriteUndefForPHILegacy::ID = 0; |
92 | |
93 | INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
94 | "Rewrite undef for PHI" , false, false) |
95 | INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) |
96 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
97 | INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
98 | "Rewrite undef for PHI" , false, false) |
99 | |
100 | bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { |
101 | bool Changed = false; |
102 | SmallVector<PHINode *> ToBeDeleted; |
103 | for (auto &BB : F) { |
104 | for (auto &PHI : BB.phis()) { |
105 | if (UA.isDivergent(I: &PHI)) |
106 | continue; |
107 | |
108 | // The unique incoming value except undef/poison for the PHI node. |
109 | Value *UniqueDefinedIncoming = nullptr; |
110 | // The divergent block with defined incoming value that dominates all |
111 | // other block with the same incoming value. |
112 | BasicBlock *DominateBB = nullptr; |
113 | // Predecessors with undefined incoming value (excluding loop backedge). |
114 | SmallVector<BasicBlock *> Undefs; |
115 | |
116 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { |
117 | Value *Incoming = PHI.getIncomingValue(i); |
118 | BasicBlock *IncomingBB = PHI.getIncomingBlock(i); |
119 | |
120 | if (Incoming == &PHI) |
121 | continue; |
122 | |
123 | if (isa<UndefValue>(Val: Incoming)) { |
124 | // Undef from loop backedge will not be replaced. |
125 | if (!DT->dominates(A: &BB, B: IncomingBB)) |
126 | Undefs.push_back(Elt: IncomingBB); |
127 | continue; |
128 | } |
129 | |
130 | if (!UniqueDefinedIncoming) { |
131 | UniqueDefinedIncoming = Incoming; |
132 | DominateBB = IncomingBB; |
133 | } else if (Incoming == UniqueDefinedIncoming) { |
134 | // Update DominateBB if necessary. |
135 | if (DT->dominates(A: IncomingBB, B: DominateBB)) |
136 | DominateBB = IncomingBB; |
137 | } else { |
138 | UniqueDefinedIncoming = nullptr; |
139 | break; |
140 | } |
141 | } |
142 | // We only need to replace the undef for the PHI which is merging |
143 | // defined/undefined values from divergent threads. |
144 | // TODO: We should still be able to replace undef value if the unique |
145 | // value is a Constant. |
146 | if (!UniqueDefinedIncoming || Undefs.empty() || |
147 | !UA.isDivergent(I: DominateBB->getTerminator())) |
148 | continue; |
149 | |
150 | // We only replace the undef when DominateBB truly dominates all the |
151 | // other predecessors with undefined incoming value. Make sure DominateBB |
152 | // dominates BB so that UniqueDefinedIncoming is available in BB and |
153 | // afterwards. |
154 | if (DT->dominates(A: DominateBB, B: &BB) && all_of(Range&: Undefs, P: [&](BasicBlock *UD) { |
155 | return DT->dominates(A: DominateBB, B: UD); |
156 | })) { |
157 | PHI.replaceAllUsesWith(V: UniqueDefinedIncoming); |
158 | ToBeDeleted.push_back(Elt: &PHI); |
159 | Changed = true; |
160 | } |
161 | } |
162 | } |
163 | |
164 | for (auto *PHI : ToBeDeleted) |
165 | PHI->eraseFromParent(); |
166 | |
167 | return Changed; |
168 | } |
169 | |
170 | bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { |
171 | UniformityInfo &UA = |
172 | getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); |
173 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
174 | return rewritePHIs(F, UA, DT); |
175 | } |
176 | |
177 | PreservedAnalyses |
178 | AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { |
179 | UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(IR&: F); |
180 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
181 | bool Changed = rewritePHIs(F, UA, DT); |
182 | if (Changed) { |
183 | PreservedAnalyses PA; |
184 | PA.preserveSet<CFGAnalyses>(); |
185 | return PA; |
186 | } |
187 | |
188 | return PreservedAnalyses::all(); |
189 | } |
190 | |
191 | FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { |
192 | return new AMDGPURewriteUndefForPHILegacy(); |
193 | } |
194 | |