AMDGPURewriteUndefForPHI.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp]

1	//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This file implements the idea to rewrite undef incoming operand for certain
9	// PHIs in structurized CFG. This pass only works on IR that has gone through
10	// StructurizedCFG pass, and this pass has some additional limitation that make
11	// it can only run after SIAnnotateControlFlow.
12	//
13	// To achieve optimal code generation for AMDGPU, we assume that uniformity
14	// analysis reports the PHI in join block of divergent branch as uniform if
15	// it has one unique uniform value plus additional undefined/poisoned incoming
16	// value. That is to say the later compiler pipeline will ensure such PHI always
17	// return uniform value and ensure it work correctly. Let's take a look at two
18	// typical patterns in structured CFG that need to be taken care: (In both
19	// patterns, block %if terminate with divergent branch.)
20	//
21	// Pattern A: Block with undefined incoming value dominates defined predecessor
22	// %if
23	// \| \
24	// \| %then
25	// \| /
26	// %endif: %phi = phi [%undef, %if], [%uniform, %then]
27	//
28	// Pattern B: Block with defined incoming value dominates undefined predecessor
29	// %if
30	// \| \
31	// \| %then
32	// \| /
33	// %endif: %phi = phi [%uniform, %if], [%undef, %then]
34	//
35	// For pattern A, by reporting %phi as uniform, the later pipeline need to make
36	// sure it be handled correctly. The backend usually allocates a scalar register
37	// and if any thread in a wave takes %then path, the scalar register will get
38	// the %uniform value.
39	//
40	// For pattern B, we will replace the undef operand with the other defined value
41	// in this pass. So the scalar register allocated for such PHI will get correct
42	// liveness. Without this transformation, the scalar register may be overwritten
43	// in the %then block.
44	//
45	// Limitation note:
46	// If the join block of divergent threads is a loop header, the pass cannot
47	// handle it correctly right now. For below case, the undef in %phi should also
48	// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49	// block to get a separate join block, then we can rewrite the undef correctly.
50	// %if
51	// \| \
52	// \| %then
53	// \| /
54	// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55	// \| \|
56	// \---
57
58	#include "AMDGPU.h"
59	#include "llvm/Analysis/UniformityAnalysis.h"
60	#include "llvm/IR/BasicBlock.h"
61	#include "llvm/IR/Constants.h"
62	#include "llvm/IR/Dominators.h"
63	#include "llvm/IR/Instructions.h"
64	#include "llvm/InitializePasses.h"
65
66	using namespace llvm;
67
68	#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69
70	namespace {
71
72	class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
73	public:
74	static char ID;
75	AMDGPURewriteUndefForPHILegacy() : FunctionPass (ID) {}
76	bool runOnFunction(Function &F) override;
77	StringRef getPassName() const override {
78	return "AMDGPU Rewrite Undef for PHI";
79	}
80
81	void getAnalysisUsage(AnalysisUsage &AU) const override {
82	AU.addRequired<UniformityInfoWrapperPass>();
83	AU.addRequired<DominatorTreeWrapperPass>();
84
85	AU.addPreserved<DominatorTreeWrapperPass>();
86	AU.setPreservesCFG();
87	}
88	};
89
90	} // end anonymous namespace
91	char AMDGPURewriteUndefForPHILegacy::ID = `0`;
92
93	INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
94	"Rewrite undef for PHI", false, false)
95	INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
96	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
97	INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
98	"Rewrite undef for PHI", false, false)
99
100	bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
101	bool Changed = false;
102	SmallVector<PHINode *> ToBeDeleted;
103	for (auto &BB : F) {
104	for (auto &PHI : BB.phis()) {
105	if (UA.isDivergent(I: &PHI))
106	continue;
107
108	// The unique incoming value except undef/poison for the PHI node.
109	Value UniqueDefinedIncoming = nullptr*;
110	// The divergent block with defined incoming value that dominates all
111	// other block with the same incoming value.
112	BasicBlock DominateBB = nullptr*;
113	// Predecessors with undefined incoming value (excluding loop backedge).
114	SmallVector<BasicBlock *> Undefs;
115
116	for (unsigned i = `0`; i < PHI.getNumIncomingValues(); i++) {
117	Value *Incoming = PHI.getIncomingValue(i);
118	BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
119
120	if (Incoming == &PHI)
121	continue;
122
123	if (isa<UndefValue>(Val: Incoming)) {
124	// Undef from loop backedge will not be replaced.
125	if (!DT->dominates(A: &BB, B: IncomingBB))
126	Undefs.push_back(Elt: IncomingBB);
127	continue;
128	}
129
130	if (!UniqueDefinedIncoming) {
131	UniqueDefinedIncoming = Incoming;
132	DominateBB = IncomingBB;
133	} else if (Incoming == UniqueDefinedIncoming) {
134	// Update DominateBB if necessary.
135	if (DT->dominates(A: IncomingBB, B: DominateBB))
136	DominateBB = IncomingBB;
137	} else {
138	UniqueDefinedIncoming = nullptr;
139	break;
140	}
141	}
142	// We only need to replace the undef for the PHI which is merging
143	// defined/undefined values from divergent threads.
144	// TODO: We should still be able to replace undef value if the unique
145	// value is a Constant.
146	if (!UniqueDefinedIncoming \|\| Undefs.empty() \|\|
147	!UA.isDivergent(I: DominateBB->getTerminator()))
148	continue;
149
150	// We only replace the undef when DominateBB truly dominates all the
151	// other predecessors with undefined incoming value. Make sure DominateBB
152	// dominates BB so that UniqueDefinedIncoming is available in BB and
153	// afterwards.
154	if (DT->dominates(A: DominateBB, B: &BB) && all_of(Range&: Undefs, P: [&](BasicBlock *UD) {
155	return DT->dominates(A: DominateBB, B: UD);
156	})) {
157	PHI.replaceAllUsesWith(V: UniqueDefinedIncoming);
158	ToBeDeleted.push_back(Elt: &PHI);
159	Changed = true;
160	}
161	}
162	}
163
164	for (auto *PHI : ToBeDeleted)
165	PHI->eraseFromParent();
166
167	return Changed;
168	}
169
170	bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
171	UniformityInfo &UA =
172	getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
173	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
174	return rewritePHIs(F, UA, DT);
175	}
176
177	PreservedAnalyses
178	AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
179	UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(IR&: F);
180	DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
181	bool Changed = rewritePHIs(F, UA, DT);
182	if (Changed) {
183	PreservedAnalyses PA;
184	PA.preserveSet<CFGAnalyses>();
185	return PA;
186	}
187
188	return PreservedAnalyses::all();
189	}
190
191	FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
192	return new AMDGPURewriteUndefForPHILegacy ();
193	}
194

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp