1 | //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file implements two passes that enable HIP C++ Standard Parallelism |
9 | // Support: |
10 | // |
11 | // 1. AcceleratorCodeSelection (required): Given that only algorithms are |
12 | // accelerated, and that the accelerated implementation exists in the form of |
13 | // a compute kernel, we assume that only the kernel, and all functions |
14 | // reachable from it, constitute code that the user expects the accelerator |
15 | // to execute. Thus, we identify the set of all functions reachable from |
16 | // kernels, and then remove all unreachable ones. This last part is necessary |
17 | // because it is possible for code that the user did not expect to execute on |
18 | // an accelerator to contain constructs that cannot be handled by the target |
19 | // BE, which cannot be provably demonstrated to be dead code in general, and |
20 | // thus can lead to mis-compilation. The degenerate case of this is when a |
21 | // Module contains no kernels (the parent TU had no algorithm invocations fit |
22 | // for acceleration), which we handle by completely emptying said module. |
23 | // **NOTE**: The above does not handle indirectly reachable functions i.e. |
24 | // it is possible to obtain a case where the target of an indirect |
25 | // call is otherwise unreachable and thus is removed; this |
26 | // restriction is aligned with the current `-hipstdpar` limitations |
27 | // and will be relaxed in the future. |
28 | // |
29 | // 2. AllocationInterposition (required only when on-demand paging is |
30 | // unsupported): Some accelerators or operating systems might not support |
31 | // transparent on-demand paging. Thus, they would only be able to access |
32 | // memory that is allocated by an accelerator-aware mechanism. For such cases |
33 | // the user can opt into enabling allocation / deallocation interposition, |
34 | // whereby we replace calls to known allocation / deallocation functions with |
35 | // calls to runtime implemented equivalents that forward the requests to |
36 | // accelerator-aware interfaces. We also support freeing system allocated |
37 | // memory that ends up in one of the runtime equivalents, since this can |
38 | // happen if e.g. a library that was compiled without interposition returns |
39 | // an allocation that can be validly passed to `free`. |
40 | //===----------------------------------------------------------------------===// |
41 | |
42 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
43 | |
44 | #include "llvm/ADT/STLExtras.h" |
45 | #include "llvm/ADT/SmallPtrSet.h" |
46 | #include "llvm/ADT/SmallVector.h" |
47 | #include "llvm/Analysis/CallGraph.h" |
48 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
49 | #include "llvm/IR/Constants.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/Module.h" |
52 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
53 | |
54 | #include <cassert> |
55 | #include <string> |
56 | #include <utility> |
57 | |
58 | using namespace llvm; |
59 | |
60 | template<typename T> |
61 | static inline void eraseFromModule(T &ToErase) { |
62 | ToErase.replaceAllUsesWith(PoisonValue::get(T: ToErase.getType())); |
63 | ToErase.eraseFromParent(); |
64 | } |
65 | |
66 | static inline bool checkIfSupported(GlobalVariable &G) { |
67 | if (!G.isThreadLocal()) |
68 | return true; |
69 | |
70 | G.dropDroppableUses(); |
71 | |
72 | if (!G.isConstantUsed()) |
73 | return true; |
74 | |
75 | std::string W; |
76 | raw_string_ostream OS(W); |
77 | |
78 | OS << "Accelerator does not support the thread_local variable " |
79 | << G.getName(); |
80 | |
81 | Instruction *I = nullptr; |
82 | SmallVector<User *> Tmp(G.users()); |
83 | SmallPtrSet<User *, 5> Visited; |
84 | do { |
85 | auto U = std::move(Tmp.back()); |
86 | Tmp.pop_back(); |
87 | |
88 | if (!Visited.insert(Ptr: U).second) |
89 | continue; |
90 | |
91 | if (isa<Instruction>(Val: U)) |
92 | I = cast<Instruction>(Val: U); |
93 | else |
94 | Tmp.insert(I: Tmp.end(), From: U->user_begin(), To: U->user_end()); |
95 | } while (!I && !Tmp.empty()); |
96 | |
97 | assert(I && "thread_local global should have at least one non-constant use." ); |
98 | |
99 | G.getContext().diagnose( |
100 | DI: DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, |
101 | I->getDebugLoc(), DS_Error)); |
102 | |
103 | return false; |
104 | } |
105 | |
106 | static inline void clearModule(Module &M) { // TODO: simplify. |
107 | while (!M.functions().empty()) |
108 | eraseFromModule(ToErase&: *M.begin()); |
109 | while (!M.globals().empty()) |
110 | eraseFromModule(ToErase&: *M.globals().begin()); |
111 | while (!M.aliases().empty()) |
112 | eraseFromModule(ToErase&: *M.aliases().begin()); |
113 | while (!M.ifuncs().empty()) |
114 | eraseFromModule(ToErase&: *M.ifuncs().begin()); |
115 | } |
116 | |
117 | static inline void maybeHandleGlobals(Module &M) { |
118 | unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); |
119 | for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? |
120 | if (!checkIfSupported(G)) |
121 | return clearModule(M); |
122 | |
123 | if (G.isThreadLocal()) |
124 | continue; |
125 | if (G.isConstant()) |
126 | continue; |
127 | if (G.getAddressSpace() != GlobAS) |
128 | continue; |
129 | if (G.getLinkage() != GlobalVariable::ExternalLinkage) |
130 | continue; |
131 | |
132 | G.setLinkage(GlobalVariable::ExternalWeakLinkage); |
133 | G.setInitializer(nullptr); |
134 | G.setExternallyInitialized(true); |
135 | } |
136 | } |
137 | |
138 | template<unsigned N> |
139 | static inline void removeUnreachableFunctions( |
140 | const SmallPtrSet<const Function *, N>& Reachable, Module &M) { |
141 | removeFromUsedLists(M, [&](Constant *C) { |
142 | if (auto F = dyn_cast<Function>(Val: C)) |
143 | return !Reachable.contains(F); |
144 | |
145 | return false; |
146 | }); |
147 | |
148 | SmallVector<std::reference_wrapper<Function>> ToRemove; |
149 | copy_if(M, std::back_inserter(x&: ToRemove), [&](auto &&F) { |
150 | return !F.isIntrinsic() && !Reachable.contains(&F); |
151 | }); |
152 | |
153 | for_each(Range&: ToRemove, F: eraseFromModule<Function>); |
154 | } |
155 | |
156 | static inline bool isAcceleratorExecutionRoot(const Function *F) { |
157 | if (!F) |
158 | return false; |
159 | |
160 | return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; |
161 | } |
162 | |
163 | static inline bool checkIfSupported(const Function *F, const CallBase *CB) { |
164 | const auto Dx = F->getName().rfind(Str: "__hipstdpar_unsupported" ); |
165 | |
166 | if (Dx == StringRef::npos) |
167 | return true; |
168 | |
169 | const auto N = F->getName().substr(Start: 0, N: Dx); |
170 | |
171 | std::string W; |
172 | raw_string_ostream OS(W); |
173 | |
174 | if (N == "__ASM" ) |
175 | OS << "Accelerator does not support the ASM block:\n" |
176 | << cast<ConstantDataArray>(Val: CB->getArgOperand(i: 0))->getAsCString(); |
177 | else |
178 | OS << "Accelerator does not support the " << N << " function." ; |
179 | |
180 | auto Caller = CB->getParent()->getParent(); |
181 | |
182 | Caller->getContext().diagnose( |
183 | DI: DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); |
184 | |
185 | return false; |
186 | } |
187 | |
188 | PreservedAnalyses |
189 | HipStdParAcceleratorCodeSelectionPass::run(Module &M, |
190 | ModuleAnalysisManager &MAM) { |
191 | auto &CGA = MAM.getResult<CallGraphAnalysis>(IR&: M); |
192 | |
193 | SmallPtrSet<const Function *, 32> Reachable; |
194 | for (auto &&CGN : CGA) { |
195 | if (!isAcceleratorExecutionRoot(F: CGN.first)) |
196 | continue; |
197 | |
198 | Reachable.insert(Ptr: CGN.first); |
199 | |
200 | SmallVector<const Function *> Tmp({CGN.first}); |
201 | do { |
202 | auto F = std::move(Tmp.back()); |
203 | Tmp.pop_back(); |
204 | |
205 | for (auto &&N : *CGA[F]) { |
206 | if (!N.second) |
207 | continue; |
208 | if (!N.second->getFunction()) |
209 | continue; |
210 | if (Reachable.contains(Ptr: N.second->getFunction())) |
211 | continue; |
212 | |
213 | if (!checkIfSupported(F: N.second->getFunction(), |
214 | CB: dyn_cast<CallBase>(Val&: *N.first))) |
215 | return PreservedAnalyses::none(); |
216 | |
217 | Reachable.insert(Ptr: N.second->getFunction()); |
218 | Tmp.push_back(Elt: N.second->getFunction()); |
219 | } |
220 | } while (!std::empty(cont: Tmp)); |
221 | } |
222 | |
223 | if (std::empty(cont: Reachable)) |
224 | clearModule(M); |
225 | else |
226 | removeUnreachableFunctions(Reachable, M); |
227 | |
228 | maybeHandleGlobals(M); |
229 | |
230 | return PreservedAnalyses::none(); |
231 | } |
232 | |
233 | static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{ |
234 | {"aligned_alloc" , "__hipstdpar_aligned_alloc" }, |
235 | {"calloc" , "__hipstdpar_calloc" }, |
236 | {"free" , "__hipstdpar_free" }, |
237 | {"malloc" , "__hipstdpar_malloc" }, |
238 | {"memalign" , "__hipstdpar_aligned_alloc" }, |
239 | {"mmap" , "__hipstdpar_mmap" }, |
240 | {"munmap" , "__hipstdpar_munmap" }, |
241 | {"posix_memalign" , "__hipstdpar_posix_aligned_alloc" }, |
242 | {"realloc" , "__hipstdpar_realloc" }, |
243 | {"reallocarray" , "__hipstdpar_realloc_array" }, |
244 | {"_ZdaPv" , "__hipstdpar_operator_delete" }, |
245 | {"_ZdaPvm" , "__hipstdpar_operator_delete_sized" }, |
246 | {"_ZdaPvSt11align_val_t" , "__hipstdpar_operator_delete_aligned" }, |
247 | {"_ZdaPvmSt11align_val_t" , "__hipstdpar_operator_delete_aligned_sized" }, |
248 | {"_ZdlPv" , "__hipstdpar_operator_delete" }, |
249 | {"_ZdlPvm" , "__hipstdpar_operator_delete_sized" }, |
250 | {"_ZdlPvSt11align_val_t" , "__hipstdpar_operator_delete_aligned" }, |
251 | {"_ZdlPvmSt11align_val_t" , "__hipstdpar_operator_delete_aligned_sized" }, |
252 | {"_Znam" , "__hipstdpar_operator_new" }, |
253 | {"_ZnamRKSt9nothrow_t" , "__hipstdpar_operator_new_nothrow" }, |
254 | {"_ZnamSt11align_val_t" , "__hipstdpar_operator_new_aligned" }, |
255 | {"_ZnamSt11align_val_tRKSt9nothrow_t" , |
256 | "__hipstdpar_operator_new_aligned_nothrow" }, |
257 | |
258 | {"_Znwm" , "__hipstdpar_operator_new" }, |
259 | {"_ZnwmRKSt9nothrow_t" , "__hipstdpar_operator_new_nothrow" }, |
260 | {"_ZnwmSt11align_val_t" , "__hipstdpar_operator_new_aligned" }, |
261 | {"_ZnwmSt11align_val_tRKSt9nothrow_t" , |
262 | "__hipstdpar_operator_new_aligned_nothrow" }, |
263 | {"__builtin_calloc" , "__hipstdpar_calloc" }, |
264 | {"__builtin_free" , "__hipstdpar_free" }, |
265 | {"__builtin_malloc" , "__hipstdpar_malloc" }, |
266 | {"__builtin_operator_delete" , "__hipstdpar_operator_delete" }, |
267 | {"__builtin_operator_new" , "__hipstdpar_operator_new" }, |
268 | {"__builtin_realloc" , "__hipstdpar_realloc" }, |
269 | {"__libc_calloc" , "__hipstdpar_calloc" }, |
270 | {"__libc_free" , "__hipstdpar_free" }, |
271 | {"__libc_malloc" , "__hipstdpar_malloc" }, |
272 | {"__libc_memalign" , "__hipstdpar_aligned_alloc" }, |
273 | {"__libc_realloc" , "__hipstdpar_realloc" }}; |
274 | |
275 | static constexpr std::pair<StringLiteral, StringLiteral> HiddenMap[]{ |
276 | // hidden_malloc and hidden_free are only kept for backwards compatibility / |
277 | // legacy purposes, and we should remove them in the future |
278 | {"__hipstdpar_hidden_malloc" , "__libc_malloc" }, |
279 | {"__hipstdpar_hidden_free" , "__libc_free" }, |
280 | {"__hipstdpar_hidden_memalign" , "__libc_memalign" }, |
281 | {"__hipstdpar_hidden_mmap" , "mmap" }, |
282 | {"__hipstdpar_hidden_munmap" , "munmap" }}; |
283 | |
284 | PreservedAnalyses |
285 | HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { |
286 | SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(cont: ReplaceMap), |
287 | std::cend(cont: ReplaceMap)); |
288 | |
289 | for (auto &&F : M) { |
290 | if (!F.hasName()) |
291 | continue; |
292 | auto It = AllocReplacements.find(Val: F.getName()); |
293 | if (It == AllocReplacements.end()) |
294 | continue; |
295 | |
296 | if (auto R = M.getFunction(Name: It->second)) { |
297 | F.replaceAllUsesWith(V: R); |
298 | } else { |
299 | std::string W; |
300 | raw_string_ostream OS(W); |
301 | |
302 | OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] |
303 | << ". Tried to run the allocation interposition pass without the " |
304 | << "replacement functions available." ; |
305 | |
306 | F.getContext().diagnose(DI: DiagnosticInfoUnsupported(F, W, |
307 | F.getSubprogram(), |
308 | DS_Warning)); |
309 | } |
310 | } |
311 | |
312 | for (auto &&HR : HiddenMap) { |
313 | if (auto F = M.getFunction(Name: HR.first)) { |
314 | auto R = M.getOrInsertFunction(Name: HR.second, T: F->getFunctionType(), |
315 | AttributeList: F->getAttributes()); |
316 | F->replaceAllUsesWith(V: R.getCallee()); |
317 | |
318 | eraseFromModule(ToErase&: *F); |
319 | } |
320 | } |
321 | |
322 | return PreservedAnalyses::none(); |
323 | } |
324 | |