1//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass recursively promotes generic pointer arguments of a kernel
10/// into the global address space.
11///
12/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13/// value is a pointer and loaded pointer is unmodified in the kernel before the
14/// load, then promote loaded pointer to global. Then recursively continue.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "Utils/AMDGPUMemoryUtils.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/MemorySSA.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/InitializePasses.h"
25
26#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27
28using namespace llvm;
29
30namespace {
31
32class AMDGPUPromoteKernelArguments : public FunctionPass {
33 MemorySSA *MSSA;
34
35 AliasAnalysis *AA;
36
37 Instruction *ArgCastInsertPt;
38
39 SmallVector<Value *> Ptrs;
40
41 void enqueueUsers(Value *Ptr);
42
43 bool promotePointer(Value *Ptr);
44
45 bool promoteLoad(LoadInst *LI);
46
47public:
48 static char ID;
49
50 AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51
52 bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53
54 bool runOnFunction(Function &F) override;
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.addRequired<AAResultsWrapperPass>();
58 AU.addRequired<MemorySSAWrapperPass>();
59 AU.setPreservesAll();
60 }
61};
62
63} // end anonymous namespace
64
65void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66 SmallVector<User *> PtrUsers(Ptr->users());
67
68 while (!PtrUsers.empty()) {
69 Instruction *U = dyn_cast<Instruction>(Val: PtrUsers.pop_back_val());
70 if (!U)
71 continue;
72
73 switch (U->getOpcode()) {
74 default:
75 break;
76 case Instruction::Load: {
77 LoadInst *LD = cast<LoadInst>(Val: U);
78 if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79 !AMDGPU::isClobberedInFunction(Load: LD, MSSA, AA))
80 Ptrs.push_back(Elt: LD);
81
82 break;
83 }
84 case Instruction::GetElementPtr:
85 case Instruction::AddrSpaceCast:
86 case Instruction::BitCast:
87 if (U->getOperand(i: 0)->stripInBoundsOffsets() == Ptr)
88 PtrUsers.append(in_start: U->user_begin(), in_end: U->user_end());
89 break;
90 }
91 }
92}
93
94bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95 bool Changed = false;
96
97 LoadInst *LI = dyn_cast<LoadInst>(Val: Ptr);
98 if (LI)
99 Changed |= promoteLoad(LI);
100
101 PointerType *PT = dyn_cast<PointerType>(Val: Ptr->getType());
102 if (!PT)
103 return Changed;
104
105 if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106 PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107 PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108 enqueueUsers(Ptr);
109
110 if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111 return Changed;
112
113 IRBuilder<> B(LI ? &*std::next(x: cast<Instruction>(Val: Ptr)->getIterator())
114 : ArgCastInsertPt);
115
116 // Cast pointer to global address space and back to flat and let
117 // Infer Address Spaces pass to do all necessary rewriting.
118 PointerType *NewPT =
119 PointerType::get(C&: PT->getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
120 Value *Cast =
121 B.CreateAddrSpaceCast(V: Ptr, DestTy: NewPT, Name: Twine(Ptr->getName(), ".global"));
122 Value *CastBack =
123 B.CreateAddrSpaceCast(V: Cast, DestTy: PT, Name: Twine(Ptr->getName(), ".flat"));
124 Ptr->replaceUsesWithIf(New: CastBack,
125 ShouldReplace: [Cast](Use &U) { return U.getUser() != Cast; });
126
127 return true;
128}
129
130bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131 if (!LI->isSimple())
132 return false;
133
134 LI->setMetadata(Kind: "amdgpu.noclobber", Node: MDNode::get(Context&: LI->getContext(), MDs: {}));
135 return true;
136}
137
138// skip allocas
139static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140 BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141 for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142 AllocaInst *AI = dyn_cast<AllocaInst>(Val: &*InsPt);
143
144 // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145 // so loads will need to be inserted before it.
146 if (!AI || !AI->isStaticAlloca())
147 break;
148 }
149
150 return InsPt;
151}
152
153bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154 AliasAnalysis &AA) {
155 if (skipFunction(F))
156 return false;
157
158 CallingConv::ID CC = F.getCallingConv();
159 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160 return false;
161
162 ArgCastInsertPt = &*getInsertPt(BB&: *F.begin());
163 this->MSSA = &MSSA;
164 this->AA = &AA;
165
166 for (Argument &Arg : F.args()) {
167 if (Arg.use_empty())
168 continue;
169
170 PointerType *PT = dyn_cast<PointerType>(Val: Arg.getType());
171 if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174 continue;
175
176 Ptrs.push_back(Elt: &Arg);
177 }
178
179 bool Changed = false;
180 while (!Ptrs.empty()) {
181 Value *Ptr = Ptrs.pop_back_val();
182 Changed |= promotePointer(Ptr);
183 }
184
185 return Changed;
186}
187
188bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191 return run(F, MSSA, AA);
192}
193
194INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195 "AMDGPU Promote Kernel Arguments", false, false)
196INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199 "AMDGPU Promote Kernel Arguments", false, false)
200
201char AMDGPUPromoteKernelArguments::ID = 0;
202
203FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204 return new AMDGPUPromoteKernelArguments();
205}
206
207PreservedAnalyses
208AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209 FunctionAnalysisManager &AM) {
210 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
211 AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
212 if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213 PreservedAnalyses PA;
214 PA.preserveSet<CFGAnalyses>();
215 PA.preserve<MemorySSAAnalysis>();
216 return PA;
217 }
218 return PreservedAnalyses::all();
219}
220