1 | //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file implements the idea to rewrite undef incoming operand for certain |
9 | // PHIs in structurized CFG. This pass only works on IR that has gone through |
10 | // StructurizedCFG pass, and this pass has some additional limitation that make |
11 | // it can only run after SIAnnotateControlFlow. |
12 | // |
13 | // To achieve optimal code generation for AMDGPU, we assume that uniformity |
14 | // analysis reports the PHI in join block of divergent branch as uniform if |
15 | // it has one unique uniform value plus additional undefined/poisoned incoming |
16 | // value. That is to say the later compiler pipeline will ensure such PHI always |
17 | // return uniform value and ensure it work correctly. Let's take a look at two |
18 | // typical patterns in structured CFG that need to be taken care: (In both |
19 | // patterns, block %if terminate with divergent branch.) |
20 | // |
21 | // Pattern A: Block with undefined incoming value dominates defined predecessor |
22 | // %if |
23 | // | \ |
24 | // | %then |
25 | // | / |
26 | // %endif: %phi = phi [%undef, %if], [%uniform, %then] |
27 | // |
28 | // Pattern B: Block with defined incoming value dominates undefined predecessor |
29 | // %if |
30 | // | \ |
31 | // | %then |
32 | // | / |
33 | // %endif: %phi = phi [%uniform, %if], [%undef, %then] |
34 | // |
35 | // For pattern A, by reporting %phi as uniform, the later pipeline need to make |
36 | // sure it be handled correctly. The backend usually allocates a scalar register |
37 | // and if any thread in a wave takes %then path, the scalar register will get |
38 | // the %uniform value. |
39 | // |
40 | // For pattern B, we will replace the undef operand with the other defined value |
41 | // in this pass. So the scalar register allocated for such PHI will get correct |
42 | // liveness. Without this transformation, the scalar register may be overwritten |
43 | // in the %then block. |
44 | // |
45 | // Limitation note: |
46 | // If the join block of divergent threads is a loop header, the pass cannot |
47 | // handle it correctly right now. For below case, the undef in %phi should also |
48 | // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header |
49 | // block to get a separate join block, then we can rewrite the undef correctly. |
50 | // %if |
51 | // | \ |
52 | // | %then |
53 | // | / |
54 | // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] |
55 | // | | |
56 | // \--- |
57 | |
58 | #include "AMDGPU.h" |
59 | #include "llvm/Analysis/UniformityAnalysis.h" |
60 | #include "llvm/IR/BasicBlock.h" |
61 | #include "llvm/IR/Constants.h" |
62 | #include "llvm/IR/Dominators.h" |
63 | #include "llvm/IR/Instructions.h" |
64 | #include "llvm/InitializePasses.h" |
65 | |
66 | using namespace llvm; |
67 | |
68 | #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" |
69 | |
70 | namespace { |
71 | |
72 | class AMDGPURewriteUndefForPHILegacy : public FunctionPass { |
73 | public: |
74 | static char ID; |
75 | AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) { |
76 | initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry()); |
77 | } |
78 | bool runOnFunction(Function &F) override; |
79 | StringRef getPassName() const override { |
80 | return "AMDGPU Rewrite Undef for PHI" ; |
81 | } |
82 | |
83 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
84 | AU.addRequired<UniformityInfoWrapperPass>(); |
85 | AU.addRequired<DominatorTreeWrapperPass>(); |
86 | |
87 | AU.addPreserved<DominatorTreeWrapperPass>(); |
88 | AU.setPreservesCFG(); |
89 | } |
90 | }; |
91 | |
92 | } // end anonymous namespace |
93 | char AMDGPURewriteUndefForPHILegacy::ID = 0; |
94 | |
95 | INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
96 | "Rewrite undef for PHI" , false, false) |
97 | INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) |
98 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
99 | INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
100 | "Rewrite undef for PHI" , false, false) |
101 | |
102 | bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { |
103 | bool Changed = false; |
104 | SmallVector<PHINode *> ToBeDeleted; |
105 | for (auto &BB : F) { |
106 | for (auto &PHI : BB.phis()) { |
107 | if (UA.isDivergent(I: &PHI)) |
108 | continue; |
109 | |
110 | // The unique incoming value except undef/poison for the PHI node. |
111 | Value *UniqueDefinedIncoming = nullptr; |
112 | // The divergent block with defined incoming value that dominates all |
113 | // other block with the same incoming value. |
114 | BasicBlock *DominateBB = nullptr; |
115 | // Predecessors with undefined incoming value (excluding loop backedge). |
116 | SmallVector<BasicBlock *> Undefs; |
117 | |
118 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { |
119 | Value *Incoming = PHI.getIncomingValue(i); |
120 | BasicBlock *IncomingBB = PHI.getIncomingBlock(i); |
121 | |
122 | if (Incoming == &PHI) |
123 | continue; |
124 | |
125 | if (isa<UndefValue>(Val: Incoming)) { |
126 | // Undef from loop backedge will not be replaced. |
127 | if (!DT->dominates(A: &BB, B: IncomingBB)) |
128 | Undefs.push_back(Elt: IncomingBB); |
129 | continue; |
130 | } |
131 | |
132 | if (!UniqueDefinedIncoming) { |
133 | UniqueDefinedIncoming = Incoming; |
134 | DominateBB = IncomingBB; |
135 | } else if (Incoming == UniqueDefinedIncoming) { |
136 | // Update DominateBB if necessary. |
137 | if (DT->dominates(A: IncomingBB, B: DominateBB)) |
138 | DominateBB = IncomingBB; |
139 | } else { |
140 | UniqueDefinedIncoming = nullptr; |
141 | break; |
142 | } |
143 | } |
144 | // We only need to replace the undef for the PHI which is merging |
145 | // defined/undefined values from divergent threads. |
146 | // TODO: We should still be able to replace undef value if the unique |
147 | // value is a Constant. |
148 | if (!UniqueDefinedIncoming || Undefs.empty() || |
149 | !UA.isDivergent(I: DominateBB->getTerminator())) |
150 | continue; |
151 | |
152 | // We only replace the undef when DominateBB truly dominates all the |
153 | // other predecessors with undefined incoming value. Make sure DominateBB |
154 | // dominates BB so that UniqueDefinedIncoming is available in BB and |
155 | // afterwards. |
156 | if (DT->dominates(A: DominateBB, B: &BB) && all_of(Range&: Undefs, P: [&](BasicBlock *UD) { |
157 | return DT->dominates(A: DominateBB, B: UD); |
158 | })) { |
159 | PHI.replaceAllUsesWith(V: UniqueDefinedIncoming); |
160 | ToBeDeleted.push_back(Elt: &PHI); |
161 | Changed = true; |
162 | } |
163 | } |
164 | } |
165 | |
166 | for (auto *PHI : ToBeDeleted) |
167 | PHI->eraseFromParent(); |
168 | |
169 | return Changed; |
170 | } |
171 | |
172 | bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { |
173 | UniformityInfo &UA = |
174 | getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); |
175 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
176 | return rewritePHIs(F, UA, DT); |
177 | } |
178 | |
179 | PreservedAnalyses |
180 | AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { |
181 | UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(IR&: F); |
182 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
183 | bool Changed = rewritePHIs(F, UA, DT); |
184 | if (Changed) { |
185 | PreservedAnalyses PA; |
186 | PA.preserveSet<CFGAnalyses>(); |
187 | return PA; |
188 | } |
189 | |
190 | return PreservedAnalyses::all(); |
191 | } |
192 | |
193 | FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { |
194 | return new AMDGPURewriteUndefForPHILegacy(); |
195 | } |
196 | |