1//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements the idea to rewrite undef incoming operand for certain
9// PHIs in structurized CFG. This pass only works on IR that has gone through
10// StructurizedCFG pass, and this pass has some additional limitation that make
11// it can only run after SIAnnotateControlFlow.
12//
13// To achieve optimal code generation for AMDGPU, we assume that uniformity
14// analysis reports the PHI in join block of divergent branch as uniform if
15// it has one unique uniform value plus additional undefined/poisoned incoming
16// value. That is to say the later compiler pipeline will ensure such PHI always
17// return uniform value and ensure it work correctly. Let's take a look at two
18// typical patterns in structured CFG that need to be taken care: (In both
19// patterns, block %if terminate with divergent branch.)
20//
21// Pattern A: Block with undefined incoming value dominates defined predecessor
22// %if
23// | \
24// | %then
25// | /
26// %endif: %phi = phi [%undef, %if], [%uniform, %then]
27//
28// Pattern B: Block with defined incoming value dominates undefined predecessor
29// %if
30// | \
31// | %then
32// | /
33// %endif: %phi = phi [%uniform, %if], [%undef, %then]
34//
35// For pattern A, by reporting %phi as uniform, the later pipeline need to make
36// sure it be handled correctly. The backend usually allocates a scalar register
37// and if any thread in a wave takes %then path, the scalar register will get
38// the %uniform value.
39//
40// For pattern B, we will replace the undef operand with the other defined value
41// in this pass. So the scalar register allocated for such PHI will get correct
42// liveness. Without this transformation, the scalar register may be overwritten
43// in the %then block.
44//
45// Limitation note:
46// If the join block of divergent threads is a loop header, the pass cannot
47// handle it correctly right now. For below case, the undef in %phi should also
48// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49// block to get a separate join block, then we can rewrite the undef correctly.
50// %if
51// | \
52// | %then
53// | /
54// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55// | |
56// \---
57
58#include "AMDGPU.h"
59#include "llvm/Analysis/UniformityAnalysis.h"
60#include "llvm/IR/BasicBlock.h"
61#include "llvm/IR/Constants.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/Instructions.h"
64#include "llvm/InitializePasses.h"
65
66using namespace llvm;
67
68#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69
70namespace {
71
72class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
73public:
74 static char ID;
75 AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {
76 initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry());
77 }
78 bool runOnFunction(Function &F) override;
79 StringRef getPassName() const override {
80 return "AMDGPU Rewrite Undef for PHI";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.addRequired<UniformityInfoWrapperPass>();
85 AU.addRequired<DominatorTreeWrapperPass>();
86
87 AU.addPreserved<DominatorTreeWrapperPass>();
88 AU.setPreservesCFG();
89 }
90};
91
92} // end anonymous namespace
93char AMDGPURewriteUndefForPHILegacy::ID = 0;
94
95INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
96 "Rewrite undef for PHI", false, false)
97INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
98INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
99INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
100 "Rewrite undef for PHI", false, false)
101
102bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
103 bool Changed = false;
104 SmallVector<PHINode *> ToBeDeleted;
105 for (auto &BB : F) {
106 for (auto &PHI : BB.phis()) {
107 if (UA.isDivergent(I: &PHI))
108 continue;
109
110 // The unique incoming value except undef/poison for the PHI node.
111 Value *UniqueDefinedIncoming = nullptr;
112 // The divergent block with defined incoming value that dominates all
113 // other block with the same incoming value.
114 BasicBlock *DominateBB = nullptr;
115 // Predecessors with undefined incoming value (excluding loop backedge).
116 SmallVector<BasicBlock *> Undefs;
117
118 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
119 Value *Incoming = PHI.getIncomingValue(i);
120 BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
121
122 if (Incoming == &PHI)
123 continue;
124
125 if (isa<UndefValue>(Val: Incoming)) {
126 // Undef from loop backedge will not be replaced.
127 if (!DT->dominates(A: &BB, B: IncomingBB))
128 Undefs.push_back(Elt: IncomingBB);
129 continue;
130 }
131
132 if (!UniqueDefinedIncoming) {
133 UniqueDefinedIncoming = Incoming;
134 DominateBB = IncomingBB;
135 } else if (Incoming == UniqueDefinedIncoming) {
136 // Update DominateBB if necessary.
137 if (DT->dominates(A: IncomingBB, B: DominateBB))
138 DominateBB = IncomingBB;
139 } else {
140 UniqueDefinedIncoming = nullptr;
141 break;
142 }
143 }
144 // We only need to replace the undef for the PHI which is merging
145 // defined/undefined values from divergent threads.
146 // TODO: We should still be able to replace undef value if the unique
147 // value is a Constant.
148 if (!UniqueDefinedIncoming || Undefs.empty() ||
149 !UA.isDivergent(I: DominateBB->getTerminator()))
150 continue;
151
152 // We only replace the undef when DominateBB truly dominates all the
153 // other predecessors with undefined incoming value. Make sure DominateBB
154 // dominates BB so that UniqueDefinedIncoming is available in BB and
155 // afterwards.
156 if (DT->dominates(A: DominateBB, B: &BB) && all_of(Range&: Undefs, P: [&](BasicBlock *UD) {
157 return DT->dominates(A: DominateBB, B: UD);
158 })) {
159 PHI.replaceAllUsesWith(V: UniqueDefinedIncoming);
160 ToBeDeleted.push_back(Elt: &PHI);
161 Changed = true;
162 }
163 }
164 }
165
166 for (auto *PHI : ToBeDeleted)
167 PHI->eraseFromParent();
168
169 return Changed;
170}
171
172bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
173 UniformityInfo &UA =
174 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
175 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
176 return rewritePHIs(F, UA, DT);
177}
178
179PreservedAnalyses
180AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
181 UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(IR&: F);
182 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
183 bool Changed = rewritePHIs(F, UA, DT);
184 if (Changed) {
185 PreservedAnalyses PA;
186 PA.preserveSet<CFGAnalyses>();
187 return PA;
188 }
189
190 return PreservedAnalyses::all();
191}
192
193FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
194 return new AMDGPURewriteUndefForPHILegacy();
195}
196