1 | //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This pass recursively promotes generic pointer arguments of a kernel |
10 | /// into the global address space. |
11 | /// |
12 | /// The pass walks kernel's pointer arguments, then loads from them. If a loaded |
13 | /// value is a pointer and loaded pointer is unmodified in the kernel before the |
14 | /// load, then promote loaded pointer to global. Then recursively continue. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "AMDGPU.h" |
19 | #include "Utils/AMDGPUMemoryUtils.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/Analysis/AliasAnalysis.h" |
22 | #include "llvm/Analysis/MemorySSA.h" |
23 | #include "llvm/IR/IRBuilder.h" |
24 | #include "llvm/InitializePasses.h" |
25 | |
26 | #define DEBUG_TYPE "amdgpu-promote-kernel-arguments" |
27 | |
28 | using namespace llvm; |
29 | |
30 | namespace { |
31 | |
32 | class AMDGPUPromoteKernelArguments : public FunctionPass { |
33 | MemorySSA *MSSA; |
34 | |
35 | AliasAnalysis *AA; |
36 | |
37 | Instruction *ArgCastInsertPt; |
38 | |
39 | SmallVector<Value *> Ptrs; |
40 | |
41 | void enqueueUsers(Value *Ptr); |
42 | |
43 | bool promotePointer(Value *Ptr); |
44 | |
45 | bool promoteLoad(LoadInst *LI); |
46 | |
47 | public: |
48 | static char ID; |
49 | |
50 | AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} |
51 | |
52 | bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); |
53 | |
54 | bool runOnFunction(Function &F) override; |
55 | |
56 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
57 | AU.addRequired<AAResultsWrapperPass>(); |
58 | AU.addRequired<MemorySSAWrapperPass>(); |
59 | AU.setPreservesAll(); |
60 | } |
61 | }; |
62 | |
63 | } // end anonymous namespace |
64 | |
65 | void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { |
66 | SmallVector<User *> PtrUsers(Ptr->users()); |
67 | |
68 | while (!PtrUsers.empty()) { |
69 | Instruction *U = dyn_cast<Instruction>(Val: PtrUsers.pop_back_val()); |
70 | if (!U) |
71 | continue; |
72 | |
73 | switch (U->getOpcode()) { |
74 | default: |
75 | break; |
76 | case Instruction::Load: { |
77 | LoadInst *LD = cast<LoadInst>(Val: U); |
78 | if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && |
79 | !AMDGPU::isClobberedInFunction(Load: LD, MSSA, AA)) |
80 | Ptrs.push_back(Elt: LD); |
81 | |
82 | break; |
83 | } |
84 | case Instruction::GetElementPtr: |
85 | case Instruction::AddrSpaceCast: |
86 | case Instruction::BitCast: |
87 | if (U->getOperand(i: 0)->stripInBoundsOffsets() == Ptr) |
88 | PtrUsers.append(in_start: U->user_begin(), in_end: U->user_end()); |
89 | break; |
90 | } |
91 | } |
92 | } |
93 | |
94 | bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { |
95 | bool Changed = false; |
96 | |
97 | LoadInst *LI = dyn_cast<LoadInst>(Val: Ptr); |
98 | if (LI) |
99 | Changed |= promoteLoad(LI); |
100 | |
101 | PointerType *PT = dyn_cast<PointerType>(Val: Ptr->getType()); |
102 | if (!PT) |
103 | return Changed; |
104 | |
105 | if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || |
106 | PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || |
107 | PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) |
108 | enqueueUsers(Ptr); |
109 | |
110 | if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) |
111 | return Changed; |
112 | |
113 | IRBuilder<> B(LI ? &*std::next(x: cast<Instruction>(Val: Ptr)->getIterator()) |
114 | : ArgCastInsertPt); |
115 | |
116 | // Cast pointer to global address space and back to flat and let |
117 | // Infer Address Spaces pass to do all necessary rewriting. |
118 | PointerType *NewPT = |
119 | PointerType::get(C&: PT->getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
120 | Value *Cast = |
121 | B.CreateAddrSpaceCast(V: Ptr, DestTy: NewPT, Name: Twine(Ptr->getName(), ".global" )); |
122 | Value *CastBack = |
123 | B.CreateAddrSpaceCast(V: Cast, DestTy: PT, Name: Twine(Ptr->getName(), ".flat" )); |
124 | Ptr->replaceUsesWithIf(New: CastBack, |
125 | ShouldReplace: [Cast](Use &U) { return U.getUser() != Cast; }); |
126 | |
127 | return true; |
128 | } |
129 | |
130 | bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { |
131 | if (!LI->isSimple()) |
132 | return false; |
133 | |
134 | LI->setMetadata(Kind: "amdgpu.noclobber" , Node: MDNode::get(Context&: LI->getContext(), MDs: {})); |
135 | return true; |
136 | } |
137 | |
138 | // skip allocas |
139 | static BasicBlock::iterator getInsertPt(BasicBlock &BB) { |
140 | BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); |
141 | for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { |
142 | AllocaInst *AI = dyn_cast<AllocaInst>(Val: &*InsPt); |
143 | |
144 | // If this is a dynamic alloca, the value may depend on the loaded kernargs, |
145 | // so loads will need to be inserted before it. |
146 | if (!AI || !AI->isStaticAlloca()) |
147 | break; |
148 | } |
149 | |
150 | return InsPt; |
151 | } |
152 | |
153 | bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, |
154 | AliasAnalysis &AA) { |
155 | if (skipFunction(F)) |
156 | return false; |
157 | |
158 | CallingConv::ID CC = F.getCallingConv(); |
159 | if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) |
160 | return false; |
161 | |
162 | ArgCastInsertPt = &*getInsertPt(BB&: *F.begin()); |
163 | this->MSSA = &MSSA; |
164 | this->AA = &AA; |
165 | |
166 | for (Argument &Arg : F.args()) { |
167 | if (Arg.use_empty()) |
168 | continue; |
169 | |
170 | PointerType *PT = dyn_cast<PointerType>(Val: Arg.getType()); |
171 | if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && |
172 | PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && |
173 | PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) |
174 | continue; |
175 | |
176 | Ptrs.push_back(Elt: &Arg); |
177 | } |
178 | |
179 | bool Changed = false; |
180 | while (!Ptrs.empty()) { |
181 | Value *Ptr = Ptrs.pop_back_val(); |
182 | Changed |= promotePointer(Ptr); |
183 | } |
184 | |
185 | return Changed; |
186 | } |
187 | |
188 | bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { |
189 | MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); |
190 | AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); |
191 | return run(F, MSSA, AA); |
192 | } |
193 | |
194 | INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, |
195 | "AMDGPU Promote Kernel Arguments" , false, false) |
196 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
197 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) |
198 | INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, |
199 | "AMDGPU Promote Kernel Arguments" , false, false) |
200 | |
201 | char AMDGPUPromoteKernelArguments::ID = 0; |
202 | |
203 | FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { |
204 | return new AMDGPUPromoteKernelArguments(); |
205 | } |
206 | |
207 | PreservedAnalyses |
208 | AMDGPUPromoteKernelArgumentsPass::run(Function &F, |
209 | FunctionAnalysisManager &AM) { |
210 | MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA(); |
211 | AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F); |
212 | if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { |
213 | PreservedAnalyses PA; |
214 | PA.preserveSet<CFGAnalyses>(); |
215 | PA.preserve<MemorySSAAnalysis>(); |
216 | return PA; |
217 | } |
218 | return PreservedAnalyses::all(); |
219 | } |
220 | |