| 1 | //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // This file implements the idea to rewrite undef incoming operand for certain |
| 9 | // PHIs in structurized CFG. This pass only works on IR that has gone through |
| 10 | // StructurizedCFG pass, and this pass has some additional limitation that make |
| 11 | // it can only run after SIAnnotateControlFlow. |
| 12 | // |
| 13 | // To achieve optimal code generation for AMDGPU, we assume that uniformity |
| 14 | // analysis reports the PHI in join block of divergent branch as uniform if |
| 15 | // it has one unique uniform value plus additional undefined/poisoned incoming |
| 16 | // value. That is to say the later compiler pipeline will ensure such PHI always |
| 17 | // return uniform value and ensure it work correctly. Let's take a look at two |
| 18 | // typical patterns in structured CFG that need to be taken care: (In both |
| 19 | // patterns, block %if terminate with divergent branch.) |
| 20 | // |
| 21 | // Pattern A: Block with undefined incoming value dominates defined predecessor |
| 22 | // %if |
| 23 | // | \ |
| 24 | // | %then |
| 25 | // | / |
| 26 | // %endif: %phi = phi [%undef, %if], [%uniform, %then] |
| 27 | // |
| 28 | // Pattern B: Block with defined incoming value dominates undefined predecessor |
| 29 | // %if |
| 30 | // | \ |
| 31 | // | %then |
| 32 | // | / |
| 33 | // %endif: %phi = phi [%uniform, %if], [%undef, %then] |
| 34 | // |
| 35 | // For pattern A, by reporting %phi as uniform, the later pipeline need to make |
| 36 | // sure it be handled correctly. The backend usually allocates a scalar register |
| 37 | // and if any thread in a wave takes %then path, the scalar register will get |
| 38 | // the %uniform value. |
| 39 | // |
| 40 | // For pattern B, we will replace the undef operand with the other defined value |
| 41 | // in this pass. So the scalar register allocated for such PHI will get correct |
| 42 | // liveness. Without this transformation, the scalar register may be overwritten |
| 43 | // in the %then block. |
| 44 | // |
| 45 | // Limitation note: |
| 46 | // If the join block of divergent threads is a loop header, the pass cannot |
| 47 | // handle it correctly right now. For below case, the undef in %phi should also |
| 48 | // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header |
| 49 | // block to get a separate join block, then we can rewrite the undef correctly. |
| 50 | // %if |
| 51 | // | \ |
| 52 | // | %then |
| 53 | // | / |
| 54 | // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] |
| 55 | // | | |
| 56 | // \--- |
| 57 | |
| 58 | #include "AMDGPU.h" |
| 59 | #include "llvm/Analysis/UniformityAnalysis.h" |
| 60 | #include "llvm/IR/BasicBlock.h" |
| 61 | #include "llvm/IR/Constants.h" |
| 62 | #include "llvm/IR/Dominators.h" |
| 63 | #include "llvm/IR/Instructions.h" |
| 64 | #include "llvm/InitializePasses.h" |
| 65 | |
| 66 | using namespace llvm; |
| 67 | |
| 68 | #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" |
| 69 | |
| 70 | namespace { |
| 71 | |
| 72 | class AMDGPURewriteUndefForPHILegacy : public FunctionPass { |
| 73 | public: |
| 74 | static char ID; |
| 75 | AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {} |
| 76 | bool runOnFunction(Function &F) override; |
| 77 | StringRef getPassName() const override { |
| 78 | return "AMDGPU Rewrite Undef for PHI" ; |
| 79 | } |
| 80 | |
| 81 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 82 | AU.addRequired<UniformityInfoWrapperPass>(); |
| 83 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 84 | |
| 85 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 86 | AU.setPreservesCFG(); |
| 87 | } |
| 88 | }; |
| 89 | |
| 90 | } // end anonymous namespace |
| 91 | char AMDGPURewriteUndefForPHILegacy::ID = 0; |
| 92 | |
| 93 | INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
| 94 | "Rewrite undef for PHI" , false, false) |
| 95 | INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) |
| 96 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 97 | INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, |
| 98 | "Rewrite undef for PHI" , false, false) |
| 99 | |
| 100 | bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { |
| 101 | bool Changed = false; |
| 102 | SmallVector<PHINode *> ToBeDeleted; |
| 103 | for (auto &BB : F) { |
| 104 | for (auto &PHI : BB.phis()) { |
| 105 | if (UA.isDivergent(I: &PHI)) |
| 106 | continue; |
| 107 | |
| 108 | // The unique incoming value except undef/poison for the PHI node. |
| 109 | Value *UniqueDefinedIncoming = nullptr; |
| 110 | // The divergent block with defined incoming value that dominates all |
| 111 | // other block with the same incoming value. |
| 112 | BasicBlock *DominateBB = nullptr; |
| 113 | // Predecessors with undefined incoming value (excluding loop backedge). |
| 114 | SmallVector<BasicBlock *> Undefs; |
| 115 | |
| 116 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { |
| 117 | Value *Incoming = PHI.getIncomingValue(i); |
| 118 | BasicBlock *IncomingBB = PHI.getIncomingBlock(i); |
| 119 | |
| 120 | if (Incoming == &PHI) |
| 121 | continue; |
| 122 | |
| 123 | if (isa<UndefValue>(Val: Incoming)) { |
| 124 | // Undef from loop backedge will not be replaced. |
| 125 | if (!DT->dominates(A: &BB, B: IncomingBB)) |
| 126 | Undefs.push_back(Elt: IncomingBB); |
| 127 | continue; |
| 128 | } |
| 129 | |
| 130 | if (!UniqueDefinedIncoming) { |
| 131 | UniqueDefinedIncoming = Incoming; |
| 132 | DominateBB = IncomingBB; |
| 133 | } else if (Incoming == UniqueDefinedIncoming) { |
| 134 | // Update DominateBB if necessary. |
| 135 | if (DT->dominates(A: IncomingBB, B: DominateBB)) |
| 136 | DominateBB = IncomingBB; |
| 137 | } else { |
| 138 | UniqueDefinedIncoming = nullptr; |
| 139 | break; |
| 140 | } |
| 141 | } |
| 142 | // We only need to replace the undef for the PHI which is merging |
| 143 | // defined/undefined values from divergent threads. |
| 144 | // TODO: We should still be able to replace undef value if the unique |
| 145 | // value is a Constant. |
| 146 | if (!UniqueDefinedIncoming || Undefs.empty() || |
| 147 | !UA.isDivergent(I: DominateBB->getTerminator())) |
| 148 | continue; |
| 149 | |
| 150 | // We only replace the undef when DominateBB truly dominates all the |
| 151 | // other predecessors with undefined incoming value. Make sure DominateBB |
| 152 | // dominates BB so that UniqueDefinedIncoming is available in BB and |
| 153 | // afterwards. |
| 154 | if (DT->dominates(A: DominateBB, B: &BB) && all_of(Range&: Undefs, P: [&](BasicBlock *UD) { |
| 155 | return DT->dominates(A: DominateBB, B: UD); |
| 156 | })) { |
| 157 | PHI.replaceAllUsesWith(V: UniqueDefinedIncoming); |
| 158 | ToBeDeleted.push_back(Elt: &PHI); |
| 159 | Changed = true; |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | for (auto *PHI : ToBeDeleted) |
| 165 | PHI->eraseFromParent(); |
| 166 | |
| 167 | return Changed; |
| 168 | } |
| 169 | |
| 170 | bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { |
| 171 | UniformityInfo &UA = |
| 172 | getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); |
| 173 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 174 | return rewritePHIs(F, UA, DT); |
| 175 | } |
| 176 | |
| 177 | PreservedAnalyses |
| 178 | AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 179 | UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(IR&: F); |
| 180 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 181 | bool Changed = rewritePHIs(F, UA, DT); |
| 182 | if (Changed) { |
| 183 | PreservedAnalyses PA; |
| 184 | PA.preserveSet<CFGAnalyses>(); |
| 185 | return PA; |
| 186 | } |
| 187 | |
| 188 | return PreservedAnalyses::all(); |
| 189 | } |
| 190 | |
| 191 | FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { |
| 192 | return new AMDGPURewriteUndefForPHILegacy(); |
| 193 | } |
| 194 | |