| 1 | //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // This file implements two passes that enable HIP C++ Standard Parallelism |
| 9 | // Support: |
| 10 | // |
| 11 | // 1. AcceleratorCodeSelection (required): Given that only algorithms are |
| 12 | // accelerated, and that the accelerated implementation exists in the form of |
| 13 | // a compute kernel, we assume that only the kernel, and all functions |
| 14 | // reachable from it, constitute code that the user expects the accelerator |
| 15 | // to execute. Thus, we identify the set of all functions reachable from |
| 16 | // kernels, and then remove all unreachable ones. This last part is necessary |
| 17 | // because it is possible for code that the user did not expect to execute on |
| 18 | // an accelerator to contain constructs that cannot be handled by the target |
| 19 | // BE, which cannot be provably demonstrated to be dead code in general, and |
| 20 | // thus can lead to mis-compilation. The degenerate case of this is when a |
| 21 | // Module contains no kernels (the parent TU had no algorithm invocations fit |
| 22 | // for acceleration), which we handle by completely emptying said module. |
| 23 | // **NOTE**: The above does not handle indirectly reachable functions i.e. |
| 24 | // it is possible to obtain a case where the target of an indirect |
| 25 | // call is otherwise unreachable and thus is removed; this |
| 26 | // restriction is aligned with the current `-hipstdpar` limitations |
| 27 | // and will be relaxed in the future. |
| 28 | // |
| 29 | // 2. AllocationInterposition (required only when on-demand paging is |
| 30 | // unsupported): Some accelerators or operating systems might not support |
| 31 | // transparent on-demand paging. Thus, they would only be able to access |
| 32 | // memory that is allocated by an accelerator-aware mechanism. For such cases |
| 33 | // the user can opt into enabling allocation / deallocation interposition, |
| 34 | // whereby we replace calls to known allocation / deallocation functions with |
| 35 | // calls to runtime implemented equivalents that forward the requests to |
| 36 | // accelerator-aware interfaces. We also support freeing system allocated |
| 37 | // memory that ends up in one of the runtime equivalents, since this can |
| 38 | // happen if e.g. a library that was compiled without interposition returns |
| 39 | // an allocation that can be validly passed to `free`. |
| 40 | // |
| 41 | // 3. MathFixup (required): Some accelerators might have an incomplete |
| 42 | // implementation for the intrinsics used to implement some of the math |
| 43 | // functions in <cmath> / their corresponding libcall lowerings. Since this |
| 44 | // can vary quite significantly between accelerators, we replace calls to a |
| 45 | // set of intrinsics / lib functions known to be problematic with calls to a |
| 46 | // HIPSTDPAR specific forwarding layer, which gives an uniform interface for |
| 47 | // accelerators to implement in their own runtime components. This pass |
| 48 | // should run before AcceleratorCodeSelection so as to prevent the spurious |
| 49 | // removal of the HIPSTDPAR specific forwarding functions. |
| 50 | //===----------------------------------------------------------------------===// |
| 51 | |
| 52 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
| 53 | |
| 54 | #include "llvm/ADT/STLExtras.h" |
| 55 | #include "llvm/ADT/SmallPtrSet.h" |
| 56 | #include "llvm/ADT/SmallVector.h" |
| 57 | #include "llvm/Analysis/CallGraph.h" |
| 58 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 59 | #include "llvm/IR/Constants.h" |
| 60 | #include "llvm/IR/Function.h" |
| 61 | #include "llvm/IR/IRBuilder.h" |
| 62 | #include "llvm/IR/Intrinsics.h" |
| 63 | #include "llvm/IR/Module.h" |
| 64 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
| 65 | |
| 66 | #include <cassert> |
| 67 | #include <string> |
| 68 | #include <utility> |
| 69 | |
| 70 | using namespace llvm; |
| 71 | |
| 72 | template<typename T> |
| 73 | static inline void eraseFromModule(T &ToErase) { |
| 74 | ToErase.replaceAllUsesWith(PoisonValue::get(T: ToErase.getType())); |
| 75 | ToErase.eraseFromParent(); |
| 76 | } |
| 77 | |
| 78 | static bool checkIfSupported(GlobalVariable &G) { |
| 79 | if (!G.isThreadLocal()) |
| 80 | return true; |
| 81 | |
| 82 | G.dropDroppableUses(); |
| 83 | |
| 84 | if (!G.isConstantUsed()) |
| 85 | return true; |
| 86 | |
| 87 | std::string W; |
| 88 | raw_string_ostream OS(W); |
| 89 | |
| 90 | OS << "Accelerator does not support the thread_local variable " |
| 91 | << G.getName(); |
| 92 | |
| 93 | Instruction *I = nullptr; |
| 94 | SmallVector<User *> Tmp(G.users()); |
| 95 | SmallPtrSet<User *, 5> Visited; |
| 96 | do { |
| 97 | auto U = std::move(Tmp.back()); |
| 98 | Tmp.pop_back(); |
| 99 | |
| 100 | if (!Visited.insert(Ptr: U).second) |
| 101 | continue; |
| 102 | |
| 103 | if (isa<Instruction>(Val: U)) |
| 104 | I = cast<Instruction>(Val: U); |
| 105 | else |
| 106 | Tmp.insert(I: Tmp.end(), From: U->user_begin(), To: U->user_end()); |
| 107 | } while (!I && !Tmp.empty()); |
| 108 | |
| 109 | assert(I && "thread_local global should have at least one non-constant use." ); |
| 110 | |
| 111 | G.getContext().diagnose( |
| 112 | DI: DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, |
| 113 | I->getDebugLoc(), DS_Error)); |
| 114 | |
| 115 | return false; |
| 116 | } |
| 117 | |
| 118 | static inline void clearModule(Module &M) { // TODO: simplify. |
| 119 | while (!M.functions().empty()) |
| 120 | eraseFromModule(ToErase&: *M.begin()); |
| 121 | while (!M.globals().empty()) |
| 122 | eraseFromModule(ToErase&: *M.globals().begin()); |
| 123 | while (!M.aliases().empty()) |
| 124 | eraseFromModule(ToErase&: *M.aliases().begin()); |
| 125 | while (!M.ifuncs().empty()) |
| 126 | eraseFromModule(ToErase&: *M.ifuncs().begin()); |
| 127 | } |
| 128 | |
| 129 | static SmallVector<std::reference_wrapper<Use>> |
| 130 | collectIndirectableUses(GlobalVariable *G) { |
| 131 | // We are interested only in use chains that end in an Instruction. |
| 132 | SmallVector<std::reference_wrapper<Use>> Uses; |
| 133 | |
| 134 | SmallVector<std::reference_wrapper<Use>> Stack(G->use_begin(), G->use_end()); |
| 135 | while (!Stack.empty()) { |
| 136 | Use &U = Stack.pop_back_val(); |
| 137 | if (isa<Instruction>(Val: U.getUser())) |
| 138 | Uses.emplace_back(Args&: U); |
| 139 | else |
| 140 | transform(Range: U.getUser()->uses(), d_first: std::back_inserter(x&: Stack), |
| 141 | F: [](auto &&U) { return std::ref(U); }); |
| 142 | } |
| 143 | |
| 144 | return Uses; |
| 145 | } |
| 146 | |
| 147 | static inline GlobalVariable *getGlobalForName(GlobalVariable *G) { |
| 148 | // Create an anonymous global which stores the variable's name, which will be |
| 149 | // used by the HIPSTDPAR runtime to look up the program-wide symbol. |
| 150 | LLVMContext &Ctx = G->getContext(); |
| 151 | auto *CDS = ConstantDataArray::getString(Context&: Ctx, Initializer: G->getName()); |
| 152 | |
| 153 | GlobalVariable *N = G->getParent()->getOrInsertGlobal(Name: "" , Ty: CDS->getType()); |
| 154 | N->setInitializer(CDS); |
| 155 | N->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage); |
| 156 | N->setConstant(true); |
| 157 | |
| 158 | return N; |
| 159 | } |
| 160 | |
| 161 | static inline GlobalVariable *getIndirectionGlobal(Module *M) { |
| 162 | // Create an anonymous global which stores a pointer to a pointer, which will |
| 163 | // be externally initialised by the HIPSTDPAR runtime with the address of the |
| 164 | // program-wide symbol. |
| 165 | Type *PtrTy = PointerType::get( |
| 166 | C&: M->getContext(), AddressSpace: M->getDataLayout().getDefaultGlobalsAddressSpace()); |
| 167 | GlobalVariable *NewG = M->getOrInsertGlobal(Name: "" , Ty: PtrTy); |
| 168 | |
| 169 | NewG->setInitializer(PoisonValue::get(T: NewG->getValueType())); |
| 170 | NewG->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage); |
| 171 | NewG->setConstant(true); |
| 172 | NewG->setExternallyInitialized(true); |
| 173 | |
| 174 | return NewG; |
| 175 | } |
| 176 | |
| 177 | static Constant * |
| 178 | appendIndirectedGlobal(const GlobalVariable *IndirectionTable, |
| 179 | SmallVector<Constant *> &SymbolIndirections, |
| 180 | GlobalVariable *ToIndirect) { |
| 181 | Module *M = ToIndirect->getParent(); |
| 182 | |
| 183 | auto *InitTy = cast<StructType>(Val: IndirectionTable->getValueType()); |
| 184 | auto *SymbolListTy = cast<StructType>(Val: InitTy->getStructElementType(N: 2)); |
| 185 | Type *NameTy = SymbolListTy->getElementType(N: 0); |
| 186 | Type *IndirectTy = SymbolListTy->getElementType(N: 1); |
| 187 | |
| 188 | Constant *NameG = getGlobalForName(G: ToIndirect); |
| 189 | Constant *IndirectG = getIndirectionGlobal(M); |
| 190 | Constant *Entry = ConstantStruct::get( |
| 191 | T: SymbolListTy, V: {ConstantExpr::getAddrSpaceCast(C: NameG, Ty: NameTy), |
| 192 | ConstantExpr::getAddrSpaceCast(C: IndirectG, Ty: IndirectTy)}); |
| 193 | SymbolIndirections.push_back(Elt: Entry); |
| 194 | |
| 195 | return IndirectG; |
| 196 | } |
| 197 | |
| 198 | static void fillIndirectionTable(GlobalVariable *IndirectionTable, |
| 199 | SmallVector<Constant *> Indirections) { |
| 200 | Module *M = IndirectionTable->getParent(); |
| 201 | size_t SymCnt = Indirections.size(); |
| 202 | |
| 203 | auto *InitTy = cast<StructType>(Val: IndirectionTable->getValueType()); |
| 204 | Type *SymbolListTy = InitTy->getStructElementType(N: 1); |
| 205 | auto *SymbolTy = cast<StructType>(Val: InitTy->getStructElementType(N: 2)); |
| 206 | |
| 207 | Constant *Count = ConstantInt::get(Ty: InitTy->getStructElementType(N: 0), V: SymCnt); |
| 208 | M->removeGlobalVariable(GV: IndirectionTable); |
| 209 | GlobalVariable *Symbols = |
| 210 | M->getOrInsertGlobal(Name: "" , Ty: ArrayType::get(ElementType: SymbolTy, NumElements: SymCnt)); |
| 211 | Symbols->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage); |
| 212 | Symbols->setInitializer( |
| 213 | ConstantArray::get(T: ArrayType::get(ElementType: SymbolTy, NumElements: SymCnt), V: {Indirections})); |
| 214 | Symbols->setConstant(true); |
| 215 | |
| 216 | Constant *ASCSymbols = ConstantExpr::getAddrSpaceCast(C: Symbols, Ty: SymbolListTy); |
| 217 | Constant *Init = ConstantStruct::get( |
| 218 | T: InitTy, V: {Count, ASCSymbols, PoisonValue::get(T: SymbolTy)}); |
| 219 | M->insertGlobalVariable(GV: IndirectionTable); |
| 220 | IndirectionTable->setInitializer(Init); |
| 221 | } |
| 222 | |
| 223 | static void replaceWithIndirectUse(const Use &U, const GlobalVariable *G, |
| 224 | Constant *IndirectedG) { |
| 225 | auto *I = cast<Instruction>(Val: U.getUser()); |
| 226 | |
| 227 | IRBuilder<> Builder(I); |
| 228 | unsigned OpIdx = U.getOperandNo(); |
| 229 | Value *Op = I->getOperand(i: OpIdx); |
| 230 | |
| 231 | // We walk back up the use chain, which could be an arbitrarily long sequence |
| 232 | // of constexpr AS casts, ptr-to-int and GEP instructions, until we reach the |
| 233 | // indirected global. |
| 234 | while (auto *CE = dyn_cast<ConstantExpr>(Val: Op)) { |
| 235 | assert((CE->getOpcode() == Instruction::GetElementPtr || |
| 236 | CE->getOpcode() == Instruction::AddrSpaceCast || |
| 237 | CE->getOpcode() == Instruction::PtrToInt) && |
| 238 | "Only GEP, ASCAST or PTRTOINT constant uses supported!" ); |
| 239 | |
| 240 | Instruction *NewI = Builder.Insert(I: CE->getAsInstruction()); |
| 241 | I->replaceUsesOfWith(From: Op, To: NewI); |
| 242 | I = NewI; |
| 243 | Op = I->getOperand(i: 0); |
| 244 | OpIdx = 0; |
| 245 | Builder.SetInsertPoint(I); |
| 246 | } |
| 247 | |
| 248 | assert(Op == G && "Must reach indirected global!" ); |
| 249 | |
| 250 | I->setOperand(i: OpIdx, Val: Builder.CreateLoad(Ty: G->getType(), Ptr: IndirectedG)); |
| 251 | } |
| 252 | |
| 253 | static inline bool isValidIndirectionTable(GlobalVariable *IndirectionTable) { |
| 254 | std::string W; |
| 255 | raw_string_ostream OS(W); |
| 256 | |
| 257 | Type *Ty = IndirectionTable->getValueType(); |
| 258 | bool Valid = false; |
| 259 | |
| 260 | if (!isa<StructType>(Val: Ty)) { |
| 261 | OS << "The Indirection Table must be a struct type; " ; |
| 262 | Ty->print(O&: OS); |
| 263 | OS << " is incorrect.\n" ; |
| 264 | } else if (cast<StructType>(Val: Ty)->getNumElements() != 3u) { |
| 265 | OS << "The Indirection Table must have 3 elements; " |
| 266 | << cast<StructType>(Val: Ty)->getNumElements() << " is incorrect.\n" ; |
| 267 | } else if (!isa<IntegerType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 0))) { |
| 268 | OS << "The first element in the Indirection Table must be an integer; " ; |
| 269 | cast<StructType>(Val: Ty)->getStructElementType(N: 0)->print(O&: OS); |
| 270 | OS << " is incorrect.\n" ; |
| 271 | } else if (!isa<PointerType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 1))) { |
| 272 | OS << "The second element in the Indirection Table must be a pointer; " ; |
| 273 | cast<StructType>(Val: Ty)->getStructElementType(N: 1)->print(O&: OS); |
| 274 | OS << " is incorrect.\n" ; |
| 275 | } else if (!isa<StructType>(Val: cast<StructType>(Val: Ty)->getStructElementType(N: 2))) { |
| 276 | OS << "The third element in the Indirection Table must be a struct type; " ; |
| 277 | cast<StructType>(Val: Ty)->getStructElementType(N: 2)->print(O&: OS); |
| 278 | OS << " is incorrect.\n" ; |
| 279 | } else { |
| 280 | Valid = true; |
| 281 | } |
| 282 | |
| 283 | if (!Valid) |
| 284 | IndirectionTable->getContext().diagnose(DI: DiagnosticInfoGeneric(W, DS_Error)); |
| 285 | |
| 286 | return Valid; |
| 287 | } |
| 288 | |
| 289 | static void indirectGlobals(GlobalVariable *IndirectionTable, |
| 290 | SmallVector<GlobalVariable *> ToIndirect) { |
| 291 | // We replace globals with an indirected access via a pointer that will get |
| 292 | // set by the HIPSTDPAR runtime, using their accessible, program-wide unique |
| 293 | // address as set by the host linker-loader. |
| 294 | SmallVector<Constant *> SymbolIndirections; |
| 295 | for (auto &&G : ToIndirect) { |
| 296 | SmallVector<std::reference_wrapper<Use>> Uses = collectIndirectableUses(G); |
| 297 | |
| 298 | if (Uses.empty()) |
| 299 | continue; |
| 300 | |
| 301 | Constant *IndirectedGlobal = |
| 302 | appendIndirectedGlobal(IndirectionTable, SymbolIndirections, ToIndirect: G); |
| 303 | |
| 304 | for_each(Range&: Uses, |
| 305 | F: [=](auto &&U) { replaceWithIndirectUse(U, G, IndirectedGlobal); }); |
| 306 | |
| 307 | eraseFromModule(ToErase&: *G); |
| 308 | } |
| 309 | |
| 310 | if (SymbolIndirections.empty()) |
| 311 | return; |
| 312 | |
| 313 | fillIndirectionTable(IndirectionTable, Indirections: std::move(SymbolIndirections)); |
| 314 | } |
| 315 | |
| 316 | static inline void maybeHandleGlobals(Module &M) { |
| 317 | unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); |
| 318 | |
| 319 | SmallVector<GlobalVariable *> ToIndirect; |
| 320 | for (auto &&G : M.globals()) { |
| 321 | if (!checkIfSupported(G)) |
| 322 | return clearModule(M); |
| 323 | if (G.getAddressSpace() != GlobAS) |
| 324 | continue; |
| 325 | if (G.isConstant() && G.hasInitializer() && G.hasAtLeastLocalUnnamedAddr()) |
| 326 | continue; |
| 327 | |
| 328 | ToIndirect.push_back(Elt: &G); |
| 329 | } |
| 330 | |
| 331 | if (ToIndirect.empty()) |
| 332 | return; |
| 333 | |
| 334 | if (auto *IT = M.getNamedGlobal(Name: "__hipstdpar_symbol_indirection_table" )) { |
| 335 | if (!isValidIndirectionTable(IndirectionTable: IT)) |
| 336 | return clearModule(M); |
| 337 | return indirectGlobals(IndirectionTable: IT, ToIndirect: std::move(ToIndirect)); |
| 338 | } else { |
| 339 | for (auto &&G : ToIndirect) { |
| 340 | // We will internalise these, so we provide a poison initialiser. |
| 341 | if (!G->hasInitializer()) |
| 342 | G->setInitializer(PoisonValue::get(T: G->getValueType())); |
| 343 | } |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | template<unsigned N> |
| 348 | static inline void removeUnreachableFunctions( |
| 349 | const SmallPtrSet<const Function *, N>& Reachable, Module &M) { |
| 350 | removeFromUsedLists(M, [&](Constant *C) { |
| 351 | if (auto F = dyn_cast<Function>(Val: C)) |
| 352 | return !Reachable.contains(F); |
| 353 | |
| 354 | return false; |
| 355 | }); |
| 356 | |
| 357 | SmallVector<std::reference_wrapper<Function>> ToRemove; |
| 358 | copy_if(M, std::back_inserter(x&: ToRemove), [&](auto &&F) { |
| 359 | return !F.isIntrinsic() && !Reachable.contains(&F); |
| 360 | }); |
| 361 | |
| 362 | for_each(Range&: ToRemove, F: eraseFromModule<Function>); |
| 363 | } |
| 364 | |
| 365 | static inline bool isAcceleratorExecutionRoot(const Function *F) { |
| 366 | if (!F) |
| 367 | return false; |
| 368 | |
| 369 | return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; |
| 370 | } |
| 371 | |
| 372 | static inline bool checkIfSupported(const Function *F, const CallBase *CB) { |
| 373 | const auto Dx = F->getName().rfind(Str: "__hipstdpar_unsupported" ); |
| 374 | |
| 375 | if (Dx == StringRef::npos) |
| 376 | return true; |
| 377 | |
| 378 | const auto N = F->getName().substr(Start: 0, N: Dx); |
| 379 | |
| 380 | std::string W; |
| 381 | raw_string_ostream OS(W); |
| 382 | |
| 383 | if (N == "__ASM" ) |
| 384 | OS << "Accelerator does not support the ASM block:\n" |
| 385 | << cast<ConstantDataArray>(Val: CB->getArgOperand(i: 0))->getAsCString(); |
| 386 | else |
| 387 | OS << "Accelerator does not support the " << N << " function." ; |
| 388 | |
| 389 | auto Caller = CB->getParent()->getParent(); |
| 390 | |
| 391 | Caller->getContext().diagnose( |
| 392 | DI: DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); |
| 393 | |
| 394 | return false; |
| 395 | } |
| 396 | |
| 397 | PreservedAnalyses |
| 398 | HipStdParAcceleratorCodeSelectionPass::run(Module &M, |
| 399 | ModuleAnalysisManager &MAM) { |
| 400 | auto &CGA = MAM.getResult<CallGraphAnalysis>(IR&: M); |
| 401 | |
| 402 | SmallPtrSet<const Function *, 32> Reachable; |
| 403 | for (auto &&CGN : CGA) { |
| 404 | if (!isAcceleratorExecutionRoot(F: CGN.first)) |
| 405 | continue; |
| 406 | |
| 407 | Reachable.insert(Ptr: CGN.first); |
| 408 | |
| 409 | SmallVector<const Function *> Tmp({CGN.first}); |
| 410 | do { |
| 411 | auto F = std::move(Tmp.back()); |
| 412 | Tmp.pop_back(); |
| 413 | |
| 414 | for (auto &&N : *CGA[F]) { |
| 415 | if (!N.second) |
| 416 | continue; |
| 417 | if (!N.second->getFunction()) |
| 418 | continue; |
| 419 | if (Reachable.contains(Ptr: N.second->getFunction())) |
| 420 | continue; |
| 421 | |
| 422 | if (!checkIfSupported(F: N.second->getFunction(), |
| 423 | CB: dyn_cast<CallBase>(Val&: *N.first))) |
| 424 | return PreservedAnalyses::none(); |
| 425 | |
| 426 | Reachable.insert(Ptr: N.second->getFunction()); |
| 427 | Tmp.push_back(Elt: N.second->getFunction()); |
| 428 | } |
| 429 | } while (!std::empty(cont: Tmp)); |
| 430 | } |
| 431 | |
| 432 | if (std::empty(cont: Reachable)) |
| 433 | clearModule(M); |
| 434 | else |
| 435 | removeUnreachableFunctions(Reachable, M); |
| 436 | |
| 437 | maybeHandleGlobals(M); |
| 438 | |
| 439 | return PreservedAnalyses::none(); |
| 440 | } |
| 441 | |
| 442 | static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{ |
| 443 | {"aligned_alloc" , "__hipstdpar_aligned_alloc" }, |
| 444 | {"calloc" , "__hipstdpar_calloc" }, |
| 445 | {"free" , "__hipstdpar_free" }, |
| 446 | {"malloc" , "__hipstdpar_malloc" }, |
| 447 | {"memalign" , "__hipstdpar_aligned_alloc" }, |
| 448 | {"mmap" , "__hipstdpar_mmap" }, |
| 449 | {"munmap" , "__hipstdpar_munmap" }, |
| 450 | {"posix_memalign" , "__hipstdpar_posix_aligned_alloc" }, |
| 451 | {"realloc" , "__hipstdpar_realloc" }, |
| 452 | {"reallocarray" , "__hipstdpar_realloc_array" }, |
| 453 | {"_ZdaPv" , "__hipstdpar_operator_delete" }, |
| 454 | {"_ZdaPvm" , "__hipstdpar_operator_delete_sized" }, |
| 455 | {"_ZdaPvSt11align_val_t" , "__hipstdpar_operator_delete_aligned" }, |
| 456 | {"_ZdaPvmSt11align_val_t" , "__hipstdpar_operator_delete_aligned_sized" }, |
| 457 | {"_ZdlPv" , "__hipstdpar_operator_delete" }, |
| 458 | {"_ZdlPvm" , "__hipstdpar_operator_delete_sized" }, |
| 459 | {"_ZdlPvSt11align_val_t" , "__hipstdpar_operator_delete_aligned" }, |
| 460 | {"_ZdlPvmSt11align_val_t" , "__hipstdpar_operator_delete_aligned_sized" }, |
| 461 | {"_Znam" , "__hipstdpar_operator_new" }, |
| 462 | {"_ZnamRKSt9nothrow_t" , "__hipstdpar_operator_new_nothrow" }, |
| 463 | {"_ZnamSt11align_val_t" , "__hipstdpar_operator_new_aligned" }, |
| 464 | {"_ZnamSt11align_val_tRKSt9nothrow_t" , |
| 465 | "__hipstdpar_operator_new_aligned_nothrow" }, |
| 466 | |
| 467 | {"_Znwm" , "__hipstdpar_operator_new" }, |
| 468 | {"_ZnwmRKSt9nothrow_t" , "__hipstdpar_operator_new_nothrow" }, |
| 469 | {"_ZnwmSt11align_val_t" , "__hipstdpar_operator_new_aligned" }, |
| 470 | {"_ZnwmSt11align_val_tRKSt9nothrow_t" , |
| 471 | "__hipstdpar_operator_new_aligned_nothrow" }, |
| 472 | {"__builtin_calloc" , "__hipstdpar_calloc" }, |
| 473 | {"__builtin_free" , "__hipstdpar_free" }, |
| 474 | {"__builtin_malloc" , "__hipstdpar_malloc" }, |
| 475 | {"__builtin_operator_delete" , "__hipstdpar_operator_delete" }, |
| 476 | {"__builtin_operator_new" , "__hipstdpar_operator_new" }, |
| 477 | {"__builtin_realloc" , "__hipstdpar_realloc" }, |
| 478 | {"__libc_calloc" , "__hipstdpar_calloc" }, |
| 479 | {"__libc_free" , "__hipstdpar_free" }, |
| 480 | {"__libc_malloc" , "__hipstdpar_malloc" }, |
| 481 | {"__libc_memalign" , "__hipstdpar_aligned_alloc" }, |
| 482 | {"__libc_realloc" , "__hipstdpar_realloc" }}; |
| 483 | |
| 484 | static constexpr std::pair<StringLiteral, StringLiteral> HiddenMap[]{ |
| 485 | // hidden_malloc and hidden_free are only kept for backwards compatibility / |
| 486 | // legacy purposes, and we should remove them in the future |
| 487 | {"__hipstdpar_hidden_malloc" , "__libc_malloc" }, |
| 488 | {"__hipstdpar_hidden_free" , "__libc_free" }, |
| 489 | {"__hipstdpar_hidden_memalign" , "__libc_memalign" }, |
| 490 | {"__hipstdpar_hidden_mmap" , "mmap" }, |
| 491 | {"__hipstdpar_hidden_munmap" , "munmap" }}; |
| 492 | |
| 493 | PreservedAnalyses |
| 494 | HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { |
| 495 | SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(cont: ReplaceMap), |
| 496 | std::cend(cont: ReplaceMap)); |
| 497 | |
| 498 | for (auto &&F : M) { |
| 499 | if (!F.hasName()) |
| 500 | continue; |
| 501 | auto It = AllocReplacements.find(Val: F.getName()); |
| 502 | if (It == AllocReplacements.end()) |
| 503 | continue; |
| 504 | |
| 505 | if (auto R = M.getFunction(Name: It->second)) { |
| 506 | F.replaceAllUsesWith(V: R); |
| 507 | } else { |
| 508 | std::string W; |
| 509 | raw_string_ostream OS(W); |
| 510 | |
| 511 | OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] |
| 512 | << ". Tried to run the allocation interposition pass without the " |
| 513 | << "replacement functions available." ; |
| 514 | |
| 515 | F.getContext().diagnose(DI: DiagnosticInfoUnsupported(F, W, |
| 516 | F.getSubprogram(), |
| 517 | DS_Warning)); |
| 518 | } |
| 519 | } |
| 520 | |
| 521 | for (auto &&HR : HiddenMap) { |
| 522 | if (auto F = M.getFunction(Name: HR.first)) { |
| 523 | auto R = M.getOrInsertFunction(Name: HR.second, T: F->getFunctionType(), |
| 524 | AttributeList: F->getAttributes()); |
| 525 | F->replaceAllUsesWith(V: R.getCallee()); |
| 526 | |
| 527 | eraseFromModule(ToErase&: *F); |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | return PreservedAnalyses::none(); |
| 532 | } |
| 533 | |
| 534 | static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{ |
| 535 | {"acosh" , "__hipstdpar_acosh_f64" }, |
| 536 | {"acoshf" , "__hipstdpar_acosh_f32" }, |
| 537 | {"asinh" , "__hipstdpar_asinh_f64" }, |
| 538 | {"asinhf" , "__hipstdpar_asinh_f32" }, |
| 539 | {"atanh" , "__hipstdpar_atanh_f64" }, |
| 540 | {"atanhf" , "__hipstdpar_atanh_f32" }, |
| 541 | {"cbrt" , "__hipstdpar_cbrt_f64" }, |
| 542 | {"cbrtf" , "__hipstdpar_cbrt_f32" }, |
| 543 | {"erf" , "__hipstdpar_erf_f64" }, |
| 544 | {"erff" , "__hipstdpar_erf_f32" }, |
| 545 | {"erfc" , "__hipstdpar_erfc_f64" }, |
| 546 | {"erfcf" , "__hipstdpar_erfc_f32" }, |
| 547 | {"fdim" , "__hipstdpar_fdim_f64" }, |
| 548 | {"fdimf" , "__hipstdpar_fdim_f32" }, |
| 549 | {"expm1" , "__hipstdpar_expm1_f64" }, |
| 550 | {"expm1f" , "__hipstdpar_expm1_f32" }, |
| 551 | {"hypot" , "__hipstdpar_hypot_f64" }, |
| 552 | {"hypotf" , "__hipstdpar_hypot_f32" }, |
| 553 | {"ilogb" , "__hipstdpar_ilogb_f64" }, |
| 554 | {"ilogbf" , "__hipstdpar_ilogb_f32" }, |
| 555 | {"lgamma" , "__hipstdpar_lgamma_f64" }, |
| 556 | {"lgammaf" , "__hipstdpar_lgamma_f32" }, |
| 557 | {"log1p" , "__hipstdpar_log1p_f64" }, |
| 558 | {"log1pf" , "__hipstdpar_log1p_f32" }, |
| 559 | {"logb" , "__hipstdpar_logb_f64" }, |
| 560 | {"logbf" , "__hipstdpar_logb_f32" }, |
| 561 | {"nextafter" , "__hipstdpar_nextafter_f64" }, |
| 562 | {"nextafterf" , "__hipstdpar_nextafter_f32" }, |
| 563 | {"nexttoward" , "__hipstdpar_nexttoward_f64" }, |
| 564 | {"nexttowardf" , "__hipstdpar_nexttoward_f32" }, |
| 565 | {"remainder" , "__hipstdpar_remainder_f64" }, |
| 566 | {"remainderf" , "__hipstdpar_remainder_f32" }, |
| 567 | {"remquo" , "__hipstdpar_remquo_f64" }, |
| 568 | {"remquof" , "__hipstdpar_remquo_f32" }, |
| 569 | {"scalbln" , "__hipstdpar_scalbln_f64" }, |
| 570 | {"scalblnf" , "__hipstdpar_scalbln_f32" }, |
| 571 | {"scalbn" , "__hipstdpar_scalbn_f64" }, |
| 572 | {"scalbnf" , "__hipstdpar_scalbn_f32" }, |
| 573 | {"tgamma" , "__hipstdpar_tgamma_f64" }, |
| 574 | {"tgammaf" , "__hipstdpar_tgamma_f32" }}; |
| 575 | |
| 576 | PreservedAnalyses HipStdParMathFixupPass::run(Module &M, |
| 577 | ModuleAnalysisManager &) { |
| 578 | if (M.empty()) |
| 579 | return PreservedAnalyses::all(); |
| 580 | |
| 581 | SmallVector<std::pair<Function *, std::string>> ToReplace; |
| 582 | for (auto &&F : M) { |
| 583 | if (!F.hasName()) |
| 584 | continue; |
| 585 | |
| 586 | StringRef N = F.getName(); |
| 587 | Intrinsic::ID ID = F.getIntrinsicID(); |
| 588 | |
| 589 | switch (ID) { |
| 590 | case Intrinsic::not_intrinsic: { |
| 591 | auto It = |
| 592 | find_if(Range: MathLibToHipStdPar, P: [&](auto &&M) { return M.first == N; }); |
| 593 | if (It == std::cend(cont: MathLibToHipStdPar)) |
| 594 | continue; |
| 595 | ToReplace.emplace_back(Args: &F, Args: It->second); |
| 596 | break; |
| 597 | } |
| 598 | case Intrinsic::acos: |
| 599 | case Intrinsic::asin: |
| 600 | case Intrinsic::atan: |
| 601 | case Intrinsic::atan2: |
| 602 | case Intrinsic::cosh: |
| 603 | case Intrinsic::modf: |
| 604 | case Intrinsic::sinh: |
| 605 | case Intrinsic::tan: |
| 606 | case Intrinsic::tanh: |
| 607 | break; |
| 608 | default: { |
| 609 | if (F.getReturnType()->isDoubleTy()) { |
| 610 | switch (ID) { |
| 611 | case Intrinsic::cos: |
| 612 | case Intrinsic::exp: |
| 613 | case Intrinsic::exp2: |
| 614 | case Intrinsic::log: |
| 615 | case Intrinsic::log10: |
| 616 | case Intrinsic::log2: |
| 617 | case Intrinsic::pow: |
| 618 | case Intrinsic::sin: |
| 619 | break; |
| 620 | default: |
| 621 | continue; |
| 622 | } |
| 623 | break; |
| 624 | } |
| 625 | continue; |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | ToReplace.emplace_back(Args: &F, Args&: N); |
| 630 | llvm::replace(Range&: ToReplace.back().second, OldValue: '.', NewValue: '_'); |
| 631 | StringRef Prefix = "llvm" ; |
| 632 | ToReplace.back().second.replace(pos: 0, n1: Prefix.size(), s: "__hipstdpar" ); |
| 633 | } |
| 634 | for (auto &&[F, NewF] : ToReplace) |
| 635 | F->replaceAllUsesWith( |
| 636 | V: M.getOrInsertFunction(Name: NewF, T: F->getFunctionType()).getCallee()); |
| 637 | |
| 638 | return PreservedAnalyses::none(); |
| 639 | } |
| 640 | |