1//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements two passes that enable HIP C++ Standard Parallelism
9// Support:
10//
11// 1. AcceleratorCodeSelection (required): Given that only algorithms are
12// accelerated, and that the accelerated implementation exists in the form of
13// a compute kernel, we assume that only the kernel, and all functions
14// reachable from it, constitute code that the user expects the accelerator
15// to execute. Thus, we identify the set of all functions reachable from
16// kernels, and then remove all unreachable ones. This last part is necessary
17// because it is possible for code that the user did not expect to execute on
18// an accelerator to contain constructs that cannot be handled by the target
19// BE, which cannot be provably demonstrated to be dead code in general, and
20// thus can lead to mis-compilation. The degenerate case of this is when a
21// Module contains no kernels (the parent TU had no algorithm invocations fit
22// for acceleration), which we handle by completely emptying said module.
23// **NOTE**: The above does not handle indirectly reachable functions i.e.
24// it is possible to obtain a case where the target of an indirect
25// call is otherwise unreachable and thus is removed; this
26// restriction is aligned with the current `-hipstdpar` limitations
27// and will be relaxed in the future.
28//
29// 2. AllocationInterposition (required only when on-demand paging is
30// unsupported): Some accelerators or operating systems might not support
31// transparent on-demand paging. Thus, they would only be able to access
32// memory that is allocated by an accelerator-aware mechanism. For such cases
33// the user can opt into enabling allocation / deallocation interposition,
34// whereby we replace calls to known allocation / deallocation functions with
35// calls to runtime implemented equivalents that forward the requests to
36// accelerator-aware interfaces. We also support freeing system allocated
37// memory that ends up in one of the runtime equivalents, since this can
38// happen if e.g. a library that was compiled without interposition returns
39// an allocation that can be validly passed to `free`.
40//
41// 3. MathFixup (required): Some accelerators might have an incomplete
42// implementation for the intrinsics used to implement some of the math
43// functions in <cmath> / their corresponding libcall lowerings. Since this
44// can vary quite significantly between accelerators, we replace calls to a
45// set of intrinsics / lib functions known to be problematic with calls to a
46// HIPSTDPAR specific forwarding layer, which gives an uniform interface for
47// accelerators to implement in their own runtime components. This pass
48// should run before AcceleratorCodeSelection so as to prevent the spurious
49// removal of the HIPSTDPAR specific forwarding functions.
50//===----------------------------------------------------------------------===//
51
52#include "llvm/Transforms/HipStdPar/HipStdPar.h"
53
54#include "llvm/ADT/STLExtras.h"
55#include "llvm/ADT/SmallPtrSet.h"
56#include "llvm/ADT/SmallVector.h"
57#include "llvm/Analysis/CallGraph.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/Function.h"
61#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/Intrinsics.h"
63#include "llvm/IR/Module.h"
64#include "llvm/Transforms/Utils/ModuleUtils.h"
65
66#include <cassert>
67#include <string>
68#include <utility>
69
70using namespace llvm;
71
72template<typename T>
73static inline void eraseFromModule(T &ToErase) {
74 ToErase.replaceAllUsesWith(PoisonValue::get(T: ToErase.getType()));
75 ToErase.eraseFromParent();
76}
77
78static bool checkIfSupported(GlobalVariable &G) {
79 if (!G.isThreadLocal())
80 return true;
81
82 G.dropDroppableUses();
83
84 if (!G.isConstantUsed())
85 return true;
86
87 std::string W;
88 raw_string_ostream OS(W);
89
90 OS << "Accelerator does not support the thread_local variable "
91 << G.getName();
92
93 Instruction *I = nullptr;
94 SmallVector<User *> Tmp(G.users());
95 SmallPtrSet<User *, 5> Visited;
96 do {
97 auto U = std::move(Tmp.back());
98 Tmp.pop_back();
99
100 if (!Visited.insert(Ptr: U).second)
101 continue;
102
103 if (isa<Instruction>(Val: U))
104 I = cast<Instruction>(Val: U);
105 else
106 Tmp.insert(I: Tmp.end(), From: U->user_begin(), To: U->user_end());
107 } while (!I && !Tmp.empty());
108
109 assert(I && "thread_local global should have at least one non-constant use.");
110
111 G.getContext().diagnose(
112 DI: DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
113 I->getDebugLoc(), DS_Error));
114
115 return false;
116}
117
118static inline void clearModule(Module &M) { // TODO: simplify.
119 while (!M.functions().empty())
120 eraseFromModule(ToErase&: *M.begin());
121 while (!M.globals().empty())
122 eraseFromModule(ToErase&: *M.globals().begin());
123 while (!M.aliases().empty())
124 eraseFromModule(ToErase&: *M.aliases().begin());
125 while (!M.ifuncs().empty())
126 eraseFromModule(ToErase&: *M.ifuncs().begin());
127}
128
129static SmallVector<std::reference_wrapper<Use>>
130collectIndirectableUses(GlobalVariable *G) {
131 // We are interested only in use chains that end in an Instruction.
132 SmallVector<std::reference_wrapper<Use>> Uses;
133
134 SmallVector<std::reference_wrapper<Use>> Stack(G->use_begin(), G->use_end());
135 while (!Stack.empty()) {
136 Use &U = Stack.pop_back_val();
137 if (isa<Instruction>(Val: U.getUser()))
138 Uses.emplace_back(Args&: U);
139 else
140 transform(Range: U.getUser()->uses(), d_first: std::back_inserter(x&: Stack),
141 F: [](auto &&U) { return std::ref(U); });
142 }
143
144 return Uses;
145}
146
147static inline GlobalVariable *getGlobalForName(GlobalVariable *G) {
148 // Create an anonymous global which stores the variable's name, which will be
149 // used by the HIPSTDPAR runtime to look up the program-wide symbol.
150 LLVMContext &Ctx = G->getContext();
151 auto *CDS = ConstantDataArray::getString(Context&: Ctx, Initializer: G->getName());
152
153 GlobalVariable *N = G->getParent()->getOrInsertGlobal(Name: "", Ty: CDS->getType());
154 N->setInitializer(CDS);
155 N->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
156 N->setConstant(true);
157
158 return N;
159}
160
161static inline GlobalVariable *getIndirectionGlobal(Module *M) {
162 // Create an anonymous global which stores a pointer to a pointer, which will
163 // be externally initialised by the HIPSTDPAR runtime with the address of the
164 // program-wide symbol.
165 Type *PtrTy = PointerType::get(
166 C&: M->getContext(), AddressSpace: M->getDataLayout().getDefaultGlobalsAddressSpace());
167 GlobalVariable *NewG = M->getOrInsertGlobal(Name: "", Ty: PtrTy);
168
169 NewG->setInitializer(PoisonValue::get(T: NewG->getValueType()));
170 NewG->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
171 NewG->setConstant(true);
172 NewG->setExternallyInitialized(true);
173
174 return NewG;
175}
176
177static Constant *
178appendIndirectedGlobal(const GlobalVariable *IndirectionTable,
179 SmallVector<Constant *> &SymbolIndirections,
180 GlobalVariable *ToIndirect) {
181 Module *M = ToIndirect->getParent();
182
183 auto *InitTy = cast<StructType>(Val: IndirectionTable->getValueType());
184 auto *SymbolListTy = cast<StructType>(Val: InitTy->getStructElementType(N: 2));
185 Type *NameTy = SymbolListTy->getElementType(N: 0);
186 Type *IndirectTy = SymbolListTy->getElementType(N: 1);
187
188 Constant *NameG = getGlobalForName(G: ToIndirect);
189 Constant *IndirectG = getIndirectionGlobal(M);
190 Constant *Entry = ConstantStruct::get(
191 T: SymbolListTy, V: {ConstantExpr::getAddrSpaceCast(C: NameG, Ty: NameTy),
192 ConstantExpr::getAddrSpaceCast(C: IndirectG, Ty: IndirectTy)});
193 SymbolIndirections.push_back(Elt: Entry);
194
195 return IndirectG;
196}
197
198static void fillIndirectionTable(GlobalVariable *IndirectionTable,
199 SmallVector<Constant *> Indirections) {
200 Module *M = IndirectionTable->getParent();
201 size_t SymCnt = Indirections.size();
202
203 auto *InitTy = cast<StructType>(Val: IndirectionTable->getValueType());
204 Type *SymbolListTy = InitTy->getStructElementType(N: 1);
205 auto *SymbolTy = cast<StructType>(Val: InitTy->getStructElementType(N: 2));
206
207 Constant *Count = ConstantInt::get(Ty: InitTy->getStructElementType(N: 0), V: SymCnt);
208 M->removeGlobalVariable(GV: IndirectionTable);
209 GlobalVariable *Symbols =
210 M->getOrInsertGlobal(Name: "", Ty: ArrayType::get(ElementType: SymbolTy, NumElements: SymCnt));
211 Symbols->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
212 Symbols->setInitializer(
213 ConstantArray::get(T: ArrayType::get(ElementType: SymbolTy, NumElements: SymCnt), V: {Indirections}));
214 Symbols->setConstant(true);
215
216 Constant *ASCSymbols = ConstantExpr::getAddrSpaceCast(C: Symbols, Ty: SymbolListTy);
217 Constant *Init = ConstantStruct::get(
218 T: InitTy, V: {Count, ASCSymbols, PoisonValue::get(T: SymbolTy)});
219 M->insertGlobalVariable(GV: IndirectionTable);
220 IndirectionTable->setInitializer(Init);
221}
222
223static void replaceWithIndirectUse(const Use &U, const GlobalVariable *G,
224 Constant *IndirectedG) {
225 auto *I = cast<Instruction>(Val: U.getUser());
226
227 IRBuilder<> Builder(I);
228 unsigned OpIdx = U.getOperandNo();
229 Value *Op = I->getOperand(i: OpIdx);
230
231 // We walk back up the use chain, which could be an arbitrarily long sequence
232 // of constexpr AS casts, ptr-to-int and GEP instructions, until we reach the
233 // indirected global.
234 while (auto *CE = dyn_cast<ConstantExpr>(Val: Op)) {
235 assert((CE->getOpcode() == Instruction::GetElementPtr ||
236 CE->getOpcode() == Instruction::AddrSpaceCast ||
237 CE->getOpcode() == Instruction::PtrToInt) &&
238 "Only GEP, ASCAST or PTRTOINT constant uses supported!");
239
240 Instruction *NewI = Builder.Insert(I: CE->getAsInstruction());
241 I->replaceUsesOfWith(From: Op, To: NewI);
242 I = NewI;
243 Op = I->getOperand(i: 0);
244 OpIdx = 0;
245 Builder.SetInsertPoint(I);
246 }
247
248 assert(Op == G && "Must reach indirected global!");
249
250 I->setOperand(i: OpIdx, Val: Builder.CreateLoad(Ty: G->getType(), Ptr: IndirectedG));
251}
252
253static inline bool isValidIndirectionTable(GlobalVariable *IndirectionTable) {
254 std::string W;
255 raw_string_ostream OS(W);
256
257 Type *Ty = IndirectionTable->getValueType();
258 bool Valid = false;
259
260 if (!isa<StructType>(Val: Ty)) {
261 OS << "The Indirection Table must be a struct type; ";
262 Ty->print(O&: OS);
263 OS << " is incorrect.\n";
264 } else if (cast<StructType>(Val: Ty)->getNumElements() != 3u) {
265 OS << "The Indirection Table must have 3 elements; "
266 << cast<StructType>(Val: Ty)->getNumElements() << " is incorrect.\n";
267 } else if (!isa<IntegerType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 0))) {
268 OS << "The first element in the Indirection Table must be an integer; ";
269 cast<StructType>(Val: Ty)->getStructElementType(N: 0)->print(O&: OS);
270 OS << " is incorrect.\n";
271 } else if (!isa<PointerType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 1))) {
272 OS << "The second element in the Indirection Table must be a pointer; ";
273 cast<StructType>(Val: Ty)->getStructElementType(N: 1)->print(O&: OS);
274 OS << " is incorrect.\n";
275 } else if (!isa<StructType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 2))) {
276 OS << "The third element in the Indirection Table must be a struct type; ";
277 cast<StructType>(Val: Ty)->getStructElementType(N: 2)->print(O&: OS);
278 OS << " is incorrect.\n";
279 } else {
280 Valid = true;
281 }
282
283 if (!Valid)
284 IndirectionTable->getContext().diagnose(DI: DiagnosticInfoGeneric(W, DS_Error));
285
286 return Valid;
287}
288
289static void indirectGlobals(GlobalVariable *IndirectionTable,
290 SmallVector<GlobalVariable *> ToIndirect) {
291 // We replace globals with an indirected access via a pointer that will get
292 // set by the HIPSTDPAR runtime, using their accessible, program-wide unique
293 // address as set by the host linker-loader.
294 SmallVector<Constant *> SymbolIndirections;
295 for (auto &&G : ToIndirect) {
296 SmallVector<std::reference_wrapper<Use>> Uses = collectIndirectableUses(G);
297
298 if (Uses.empty())
299 continue;
300
301 Constant *IndirectedGlobal =
302 appendIndirectedGlobal(IndirectionTable, SymbolIndirections, ToIndirect: G);
303
304 for_each(Range&: Uses,
305 F: [=](auto &&U) { replaceWithIndirectUse(U, G, IndirectedGlobal); });
306
307 eraseFromModule(ToErase&: *G);
308 }
309
310 if (SymbolIndirections.empty())
311 return;
312
313 fillIndirectionTable(IndirectionTable, Indirections: std::move(SymbolIndirections));
314}
315
316static inline void maybeHandleGlobals(Module &M) {
317 unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
318
319 SmallVector<GlobalVariable *> ToIndirect;
320 for (auto &&G : M.globals()) {
321 if (!checkIfSupported(G))
322 return clearModule(M);
323 if (G.getAddressSpace() != GlobAS)
324 continue;
325 if (G.isConstant() && G.hasInitializer() && G.hasAtLeastLocalUnnamedAddr())
326 continue;
327
328 ToIndirect.push_back(Elt: &G);
329 }
330
331 if (ToIndirect.empty())
332 return;
333
334 if (auto *IT = M.getNamedGlobal(Name: "__hipstdpar_symbol_indirection_table")) {
335 if (!isValidIndirectionTable(IndirectionTable: IT))
336 return clearModule(M);
337 return indirectGlobals(IndirectionTable: IT, ToIndirect: std::move(ToIndirect));
338 } else {
339 for (auto &&G : ToIndirect) {
340 // We will internalise these, so we provide a poison initialiser.
341 if (!G->hasInitializer())
342 G->setInitializer(PoisonValue::get(T: G->getValueType()));
343 }
344 }
345}
346
347template<unsigned N>
348static inline void removeUnreachableFunctions(
349 const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
350 removeFromUsedLists(M, [&](Constant *C) {
351 if (auto F = dyn_cast<Function>(Val: C))
352 return !Reachable.contains(F);
353
354 return false;
355 });
356
357 SmallVector<std::reference_wrapper<Function>> ToRemove;
358 copy_if(M, std::back_inserter(x&: ToRemove), [&](auto &&F) {
359 return !F.isIntrinsic() && !Reachable.contains(&F);
360 });
361
362 for_each(Range&: ToRemove, F: eraseFromModule<Function>);
363}
364
365static inline bool isAcceleratorExecutionRoot(const Function *F) {
366 if (!F)
367 return false;
368
369 return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
370}
371
372static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
373 const auto Dx = F->getName().rfind(Str: "__hipstdpar_unsupported");
374
375 if (Dx == StringRef::npos)
376 return true;
377
378 const auto N = F->getName().substr(Start: 0, N: Dx);
379
380 std::string W;
381 raw_string_ostream OS(W);
382
383 if (N == "__ASM")
384 OS << "Accelerator does not support the ASM block:\n"
385 << cast<ConstantDataArray>(Val: CB->getArgOperand(i: 0))->getAsCString();
386 else
387 OS << "Accelerator does not support the " << N << " function.";
388
389 auto Caller = CB->getParent()->getParent();
390
391 Caller->getContext().diagnose(
392 DI: DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
393
394 return false;
395}
396
397PreservedAnalyses
398 HipStdParAcceleratorCodeSelectionPass::run(Module &M,
399 ModuleAnalysisManager &MAM) {
400 auto &CGA = MAM.getResult<CallGraphAnalysis>(IR&: M);
401
402 SmallPtrSet<const Function *, 32> Reachable;
403 for (auto &&CGN : CGA) {
404 if (!isAcceleratorExecutionRoot(F: CGN.first))
405 continue;
406
407 Reachable.insert(Ptr: CGN.first);
408
409 SmallVector<const Function *> Tmp({CGN.first});
410 do {
411 auto F = std::move(Tmp.back());
412 Tmp.pop_back();
413
414 for (auto &&N : *CGA[F]) {
415 if (!N.second)
416 continue;
417 if (!N.second->getFunction())
418 continue;
419 if (Reachable.contains(Ptr: N.second->getFunction()))
420 continue;
421
422 if (!checkIfSupported(F: N.second->getFunction(),
423 CB: dyn_cast<CallBase>(Val&: *N.first)))
424 return PreservedAnalyses::none();
425
426 Reachable.insert(Ptr: N.second->getFunction());
427 Tmp.push_back(Elt: N.second->getFunction());
428 }
429 } while (!std::empty(cont: Tmp));
430 }
431
432 if (std::empty(cont: Reachable))
433 clearModule(M);
434 else
435 removeUnreachableFunctions(Reachable, M);
436
437 maybeHandleGlobals(M);
438
439 return PreservedAnalyses::none();
440}
441
442static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
443 {"aligned_alloc", "__hipstdpar_aligned_alloc"},
444 {"calloc", "__hipstdpar_calloc"},
445 {"free", "__hipstdpar_free"},
446 {"malloc", "__hipstdpar_malloc"},
447 {"memalign", "__hipstdpar_aligned_alloc"},
448 {"mmap", "__hipstdpar_mmap"},
449 {"munmap", "__hipstdpar_munmap"},
450 {"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
451 {"realloc", "__hipstdpar_realloc"},
452 {"reallocarray", "__hipstdpar_realloc_array"},
453 {"_ZdaPv", "__hipstdpar_operator_delete"},
454 {"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
455 {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
456 {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
457 {"_ZdlPv", "__hipstdpar_operator_delete"},
458 {"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
459 {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
460 {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
461 {"_Znam", "__hipstdpar_operator_new"},
462 {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
463 {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
464 {"_ZnamSt11align_val_tRKSt9nothrow_t",
465 "__hipstdpar_operator_new_aligned_nothrow"},
466
467 {"_Znwm", "__hipstdpar_operator_new"},
468 {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
469 {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
470 {"_ZnwmSt11align_val_tRKSt9nothrow_t",
471 "__hipstdpar_operator_new_aligned_nothrow"},
472 {"__builtin_calloc", "__hipstdpar_calloc"},
473 {"__builtin_free", "__hipstdpar_free"},
474 {"__builtin_malloc", "__hipstdpar_malloc"},
475 {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
476 {"__builtin_operator_new", "__hipstdpar_operator_new"},
477 {"__builtin_realloc", "__hipstdpar_realloc"},
478 {"__libc_calloc", "__hipstdpar_calloc"},
479 {"__libc_free", "__hipstdpar_free"},
480 {"__libc_malloc", "__hipstdpar_malloc"},
481 {"__libc_memalign", "__hipstdpar_aligned_alloc"},
482 {"__libc_realloc", "__hipstdpar_realloc"}};
483
484static constexpr std::pair<StringLiteral, StringLiteral> HiddenMap[]{
485 // hidden_malloc and hidden_free are only kept for backwards compatibility /
486 // legacy purposes, and we should remove them in the future
487 {"__hipstdpar_hidden_malloc", "__libc_malloc"},
488 {"__hipstdpar_hidden_free", "__libc_free"},
489 {"__hipstdpar_hidden_memalign", "__libc_memalign"},
490 {"__hipstdpar_hidden_mmap", "mmap"},
491 {"__hipstdpar_hidden_munmap", "munmap"}};
492
493PreservedAnalyses
494HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
495 SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(cont: ReplaceMap),
496 std::cend(cont: ReplaceMap));
497
498 for (auto &&F : M) {
499 if (!F.hasName())
500 continue;
501 auto It = AllocReplacements.find(Val: F.getName());
502 if (It == AllocReplacements.end())
503 continue;
504
505 if (auto R = M.getFunction(Name: It->second)) {
506 F.replaceAllUsesWith(V: R);
507 } else {
508 std::string W;
509 raw_string_ostream OS(W);
510
511 OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
512 << ". Tried to run the allocation interposition pass without the "
513 << "replacement functions available.";
514
515 F.getContext().diagnose(DI: DiagnosticInfoUnsupported(F, W,
516 F.getSubprogram(),
517 DS_Warning));
518 }
519 }
520
521 for (auto &&HR : HiddenMap) {
522 if (auto F = M.getFunction(Name: HR.first)) {
523 auto R = M.getOrInsertFunction(Name: HR.second, T: F->getFunctionType(),
524 AttributeList: F->getAttributes());
525 F->replaceAllUsesWith(V: R.getCallee());
526
527 eraseFromModule(ToErase&: *F);
528 }
529 }
530
531 return PreservedAnalyses::none();
532}
533
534static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
535 {"acosh", "__hipstdpar_acosh_f64"},
536 {"acoshf", "__hipstdpar_acosh_f32"},
537 {"asinh", "__hipstdpar_asinh_f64"},
538 {"asinhf", "__hipstdpar_asinh_f32"},
539 {"atanh", "__hipstdpar_atanh_f64"},
540 {"atanhf", "__hipstdpar_atanh_f32"},
541 {"cbrt", "__hipstdpar_cbrt_f64"},
542 {"cbrtf", "__hipstdpar_cbrt_f32"},
543 {"erf", "__hipstdpar_erf_f64"},
544 {"erff", "__hipstdpar_erf_f32"},
545 {"erfc", "__hipstdpar_erfc_f64"},
546 {"erfcf", "__hipstdpar_erfc_f32"},
547 {"fdim", "__hipstdpar_fdim_f64"},
548 {"fdimf", "__hipstdpar_fdim_f32"},
549 {"expm1", "__hipstdpar_expm1_f64"},
550 {"expm1f", "__hipstdpar_expm1_f32"},
551 {"hypot", "__hipstdpar_hypot_f64"},
552 {"hypotf", "__hipstdpar_hypot_f32"},
553 {"ilogb", "__hipstdpar_ilogb_f64"},
554 {"ilogbf", "__hipstdpar_ilogb_f32"},
555 {"lgamma", "__hipstdpar_lgamma_f64"},
556 {"lgammaf", "__hipstdpar_lgamma_f32"},
557 {"log1p", "__hipstdpar_log1p_f64"},
558 {"log1pf", "__hipstdpar_log1p_f32"},
559 {"logb", "__hipstdpar_logb_f64"},
560 {"logbf", "__hipstdpar_logb_f32"},
561 {"nextafter", "__hipstdpar_nextafter_f64"},
562 {"nextafterf", "__hipstdpar_nextafter_f32"},
563 {"nexttoward", "__hipstdpar_nexttoward_f64"},
564 {"nexttowardf", "__hipstdpar_nexttoward_f32"},
565 {"remainder", "__hipstdpar_remainder_f64"},
566 {"remainderf", "__hipstdpar_remainder_f32"},
567 {"remquo", "__hipstdpar_remquo_f64"},
568 {"remquof", "__hipstdpar_remquo_f32"},
569 {"scalbln", "__hipstdpar_scalbln_f64"},
570 {"scalblnf", "__hipstdpar_scalbln_f32"},
571 {"scalbn", "__hipstdpar_scalbn_f64"},
572 {"scalbnf", "__hipstdpar_scalbn_f32"},
573 {"tgamma", "__hipstdpar_tgamma_f64"},
574 {"tgammaf", "__hipstdpar_tgamma_f32"}};
575
576PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
577 ModuleAnalysisManager &) {
578 if (M.empty())
579 return PreservedAnalyses::all();
580
581 SmallVector<std::pair<Function *, std::string>> ToReplace;
582 for (auto &&F : M) {
583 if (!F.hasName())
584 continue;
585
586 StringRef N = F.getName();
587 Intrinsic::ID ID = F.getIntrinsicID();
588
589 switch (ID) {
590 case Intrinsic::not_intrinsic: {
591 auto It =
592 find_if(Range: MathLibToHipStdPar, P: [&](auto &&M) { return M.first == N; });
593 if (It == std::cend(cont: MathLibToHipStdPar))
594 continue;
595 ToReplace.emplace_back(Args: &F, Args: It->second);
596 break;
597 }
598 case Intrinsic::acos:
599 case Intrinsic::asin:
600 case Intrinsic::atan:
601 case Intrinsic::atan2:
602 case Intrinsic::cosh:
603 case Intrinsic::modf:
604 case Intrinsic::sinh:
605 case Intrinsic::tan:
606 case Intrinsic::tanh:
607 break;
608 default: {
609 if (F.getReturnType()->isDoubleTy()) {
610 switch (ID) {
611 case Intrinsic::cos:
612 case Intrinsic::exp:
613 case Intrinsic::exp2:
614 case Intrinsic::log:
615 case Intrinsic::log10:
616 case Intrinsic::log2:
617 case Intrinsic::pow:
618 case Intrinsic::sin:
619 break;
620 default:
621 continue;
622 }
623 break;
624 }
625 continue;
626 }
627 }
628
629 ToReplace.emplace_back(Args: &F, Args&: N);
630 llvm::replace(Range&: ToReplace.back().second, OldValue: '.', NewValue: '_');
631 StringRef Prefix = "llvm";
632 ToReplace.back().second.replace(pos: 0, n1: Prefix.size(), s: "__hipstdpar");
633 }
634 for (auto &&[F, NewF] : ToReplace)
635 F->replaceAllUsesWith(
636 V: M.getOrInsertFunction(Name: NewF, T: F->getFunctionType()).getCallee());
637
638 return PreservedAnalyses::none();
639}
640