| 1 | //===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | /// | 
|---|
| 9 | /// \file | 
|---|
| 10 | /// This pass creates a unified init and fini kernel with the required metadata | 
|---|
| 11 | //===----------------------------------------------------------------------===// | 
|---|
| 12 |  | 
|---|
| 13 | #include "NVPTXCtorDtorLowering.h" | 
|---|
| 14 | #include "MCTargetDesc/NVPTXBaseInfo.h" | 
|---|
| 15 | #include "NVPTX.h" | 
|---|
| 16 | #include "llvm/ADT/StringExtras.h" | 
|---|
| 17 | #include "llvm/IR/CallingConv.h" | 
|---|
| 18 | #include "llvm/IR/Constants.h" | 
|---|
| 19 | #include "llvm/IR/Function.h" | 
|---|
| 20 | #include "llvm/IR/GlobalVariable.h" | 
|---|
| 21 | #include "llvm/IR/IRBuilder.h" | 
|---|
| 22 | #include "llvm/IR/Module.h" | 
|---|
| 23 | #include "llvm/IR/Value.h" | 
|---|
| 24 | #include "llvm/Pass.h" | 
|---|
| 25 | #include "llvm/Support/CommandLine.h" | 
|---|
| 26 | #include "llvm/Support/MD5.h" | 
|---|
| 27 | #include "llvm/Transforms/Utils/ModuleUtils.h" | 
|---|
| 28 |  | 
|---|
| 29 | using namespace llvm; | 
|---|
| 30 |  | 
|---|
| 31 | #define DEBUG_TYPE "nvptx-lower-ctor-dtor" | 
|---|
| 32 |  | 
|---|
| 33 | static cl::opt<std::string> | 
|---|
| 34 | GlobalStr( "nvptx-lower-global-ctor-dtor-id", | 
|---|
| 35 | cl::desc( "Override unique ID of ctor/dtor globals."), | 
|---|
| 36 | cl::init(Val: ""), cl::Hidden); | 
|---|
| 37 |  | 
|---|
| 38 | static cl::opt<bool> | 
|---|
| 39 | CreateKernels( "nvptx-emit-init-fini-kernel", | 
|---|
| 40 | cl::desc( "Emit kernels to call ctor/dtor globals."), | 
|---|
| 41 | cl::init(Val: true), cl::Hidden); | 
|---|
| 42 |  | 
|---|
| 43 | namespace { | 
|---|
| 44 |  | 
|---|
| 45 | static std::string getHash(StringRef Str) { | 
|---|
| 46 | llvm::MD5 Hasher; | 
|---|
| 47 | llvm::MD5::MD5Result Hash; | 
|---|
| 48 | Hasher.update(Str); | 
|---|
| 49 | Hasher.final(Result&: Hash); | 
|---|
| 50 | return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true); | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | static void addKernelAttrs(Function *F) { | 
|---|
| 54 | F->addFnAttr(Kind: "nvvm.maxclusterrank", Val: "1"); | 
|---|
| 55 | F->addFnAttr(Kind: "nvvm.maxntid", Val: "1"); | 
|---|
| 56 | F->setCallingConv(CallingConv::PTX_Kernel); | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { | 
|---|
| 60 | StringRef InitOrFiniKernelName = | 
|---|
| 61 | IsCtor ? "nvptx$device$init": "nvptx$device$fini"; | 
|---|
| 62 | if (M.getFunction(Name: InitOrFiniKernelName)) | 
|---|
| 63 | return nullptr; | 
|---|
| 64 |  | 
|---|
| 65 | Function *InitOrFiniKernel = Function::createWithDefaultAttr( | 
|---|
| 66 | Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false), | 
|---|
| 67 | Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: InitOrFiniKernelName, M: &M); | 
|---|
| 68 | addKernelAttrs(F: InitOrFiniKernel); | 
|---|
| 69 |  | 
|---|
| 70 | return InitOrFiniKernel; | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | // We create the IR required to call each callback in this section. This is | 
|---|
| 74 | // equivalent to the following code. Normally, the linker would provide us with | 
|---|
| 75 | // the definitions of the init and fini array sections. The 'nvlink' linker does | 
|---|
| 76 | // not do this so initializing these values is done by the runtime. | 
|---|
| 77 | // | 
|---|
| 78 | // extern "C" void **__init_array_start = nullptr; | 
|---|
| 79 | // extern "C" void **__init_array_end = nullptr; | 
|---|
| 80 | // extern "C" void **__fini_array_start = nullptr; | 
|---|
| 81 | // extern "C" void **__fini_array_end = nullptr; | 
|---|
| 82 | // | 
|---|
| 83 | // using InitCallback = void(); | 
|---|
| 84 | // using FiniCallback = void(); | 
|---|
| 85 | // | 
|---|
| 86 | // void call_init_array_callbacks() { | 
|---|
| 87 | //   for (auto start = __init_array_start; start != __init_array_end; ++start) | 
|---|
| 88 | //     reinterpret_cast<InitCallback *>(*start)(); | 
|---|
| 89 | // } | 
|---|
| 90 | // | 
|---|
| 91 | // void call_init_array_callbacks() { | 
|---|
| 92 | //   size_t fini_array_size = __fini_array_end - __fini_array_start; | 
|---|
| 93 | //   for (size_t i = fini_array_size; i > 0; --i) | 
|---|
| 94 | //     reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])(); | 
|---|
| 95 | // } | 
|---|
| 96 | static void createInitOrFiniCalls(Function &F, bool IsCtor) { | 
|---|
| 97 | Module &M = *F.getParent(); | 
|---|
| 98 | LLVMContext &C = M.getContext(); | 
|---|
| 99 |  | 
|---|
| 100 | IRBuilder<> IRB(BasicBlock::Create(Context&: C, Name: "entry", Parent: &F)); | 
|---|
| 101 | auto *LoopBB = BasicBlock::Create(Context&: C, Name: "while.entry", Parent: &F); | 
|---|
| 102 | auto *ExitBB = BasicBlock::Create(Context&: C, Name: "while.end", Parent: &F); | 
|---|
| 103 | Type *PtrTy = IRB.getPtrTy(AddrSpace: llvm::ADDRESS_SPACE_GLOBAL); | 
|---|
| 104 |  | 
|---|
| 105 | auto *Begin = M.getOrInsertGlobal( | 
|---|
| 106 | Name: IsCtor ? "__init_array_start": "__fini_array_start", | 
|---|
| 107 | Ty: PointerType::get(C, AddressSpace: 0), CreateGlobalCallback: [&]() { | 
|---|
| 108 | auto *GV = new GlobalVariable( | 
|---|
| 109 | M, PointerType::get(C, AddressSpace: 0), | 
|---|
| 110 | /*isConstant=*/false, GlobalValue::WeakAnyLinkage, | 
|---|
| 111 | Constant::getNullValue(Ty: PointerType::get(C, AddressSpace: 0)), | 
|---|
| 112 | IsCtor ? "__init_array_start": "__fini_array_start", | 
|---|
| 113 | /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, | 
|---|
| 114 | /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL); | 
|---|
| 115 | GV->setVisibility(GlobalVariable::ProtectedVisibility); | 
|---|
| 116 | return GV; | 
|---|
| 117 | }); | 
|---|
| 118 | auto *End = M.getOrInsertGlobal( | 
|---|
| 119 | Name: IsCtor ? "__init_array_end": "__fini_array_end", Ty: PointerType::get(C, AddressSpace: 0), | 
|---|
| 120 | CreateGlobalCallback: [&]() { | 
|---|
| 121 | auto *GV = new GlobalVariable( | 
|---|
| 122 | M, PointerType::get(C, AddressSpace: 0), | 
|---|
| 123 | /*isConstant=*/false, GlobalValue::WeakAnyLinkage, | 
|---|
| 124 | Constant::getNullValue(Ty: PointerType::get(C, AddressSpace: 0)), | 
|---|
| 125 | IsCtor ? "__init_array_end": "__fini_array_end", | 
|---|
| 126 | /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, | 
|---|
| 127 | /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL); | 
|---|
| 128 | GV->setVisibility(GlobalVariable::ProtectedVisibility); | 
|---|
| 129 | return GV; | 
|---|
| 130 | }); | 
|---|
| 131 |  | 
|---|
| 132 | // The constructor type is suppoed to allow using the argument vectors, but | 
|---|
| 133 | // for now we just call them with no arguments. | 
|---|
| 134 | auto *CallBackTy = FunctionType::get(Result: IRB.getVoidTy(), isVarArg: {}); | 
|---|
| 135 |  | 
|---|
| 136 | // The destructor array must be called in reverse order. Get an expression to | 
|---|
| 137 | // the end of the array and iterate backwards in that case. | 
|---|
| 138 | Value *BeginVal = IRB.CreateLoad(Ty: Begin->getType(), Ptr: Begin, Name: "begin"); | 
|---|
| 139 | Value *EndVal = IRB.CreateLoad(Ty: Begin->getType(), Ptr: End, Name: "stop"); | 
|---|
| 140 | if (!IsCtor) { | 
|---|
| 141 | auto *BeginInt = IRB.CreatePtrToInt(V: BeginVal, DestTy: IntegerType::getInt64Ty(C)); | 
|---|
| 142 | auto *EndInt = IRB.CreatePtrToInt(V: EndVal, DestTy: IntegerType::getInt64Ty(C)); | 
|---|
| 143 | auto *SubInst = IRB.CreateSub(LHS: EndInt, RHS: BeginInt); | 
|---|
| 144 | auto *Offset = IRB.CreateAShr( | 
|---|
| 145 | LHS: SubInst, RHS: ConstantInt::get(Ty: IntegerType::getInt64Ty(C), V: 3), Name: "offset", | 
|---|
| 146 | /*IsExact=*/isExact: true); | 
|---|
| 147 | auto *ValuePtr = IRB.CreateGEP(Ty: PointerType::get(C, AddressSpace: 0), Ptr: BeginVal, | 
|---|
| 148 | IdxList: ArrayRef<Value *>({Offset})); | 
|---|
| 149 | EndVal = BeginVal; | 
|---|
| 150 | BeginVal = IRB.CreateInBoundsGEP( | 
|---|
| 151 | Ty: PointerType::get(C, AddressSpace: 0), Ptr: ValuePtr, | 
|---|
| 152 | IdxList: ArrayRef<Value *>(ConstantInt::get(Ty: IntegerType::getInt64Ty(C), V: -1)), | 
|---|
| 153 | Name: "start"); | 
|---|
| 154 | } | 
|---|
| 155 | IRB.CreateCondBr( | 
|---|
| 156 | Cond: IRB.CreateCmp(Pred: IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGT, LHS: BeginVal, | 
|---|
| 157 | RHS: EndVal), | 
|---|
| 158 | True: LoopBB, False: ExitBB); | 
|---|
| 159 | IRB.SetInsertPoint(LoopBB); | 
|---|
| 160 | auto *CallBackPHI = IRB.CreatePHI(Ty: PtrTy, NumReservedValues: 2, Name: "ptr"); | 
|---|
| 161 | auto *CallBack = IRB.CreateLoad(Ty: IRB.getPtrTy(AddrSpace: F.getAddressSpace()), | 
|---|
| 162 | Ptr: CallBackPHI, Name: "callback"); | 
|---|
| 163 | IRB.CreateCall(FTy: CallBackTy, Callee: CallBack); | 
|---|
| 164 | auto *NewCallBack = | 
|---|
| 165 | IRB.CreateConstGEP1_64(Ty: PtrTy, Ptr: CallBackPHI, Idx0: IsCtor ? 1 : -1, Name: "next"); | 
|---|
| 166 | auto *EndCmp = IRB.CreateCmp(Pred: IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT, | 
|---|
| 167 | LHS: NewCallBack, RHS: EndVal, Name: "end"); | 
|---|
| 168 | CallBackPHI->addIncoming(V: BeginVal, BB: &F.getEntryBlock()); | 
|---|
| 169 | CallBackPHI->addIncoming(V: NewCallBack, BB: LoopBB); | 
|---|
| 170 | IRB.CreateCondBr(Cond: EndCmp, True: ExitBB, False: LoopBB); | 
|---|
| 171 | IRB.SetInsertPoint(ExitBB); | 
|---|
| 172 | IRB.CreateRetVoid(); | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | static bool createInitOrFiniGlobals(Module &M, GlobalVariable *GV, | 
|---|
| 176 | bool IsCtor) { | 
|---|
| 177 | ConstantArray *GA = dyn_cast<ConstantArray>(Val: GV->getInitializer()); | 
|---|
| 178 | if (!GA || GA->getNumOperands() == 0) | 
|---|
| 179 | return false; | 
|---|
| 180 |  | 
|---|
| 181 | // NVPTX has no way to emit variables at specific sections or support for | 
|---|
| 182 | // the traditional constructor sections. Instead, we emit mangled global | 
|---|
| 183 | // names so the runtime can build the list manually. | 
|---|
| 184 | for (Value *V : GA->operands()) { | 
|---|
| 185 | auto *CS = cast<ConstantStruct>(Val: V); | 
|---|
| 186 | auto *F = cast<Constant>(Val: CS->getOperand(i_nocapture: 1)); | 
|---|
| 187 | uint64_t Priority = cast<ConstantInt>(Val: CS->getOperand(i_nocapture: 0))->getSExtValue(); | 
|---|
| 188 | std::string PriorityStr = "."+ std::to_string(val: Priority); | 
|---|
| 189 | // We append a semi-unique hash and the priority to the global name. | 
|---|
| 190 | std::string GlobalID = | 
|---|
| 191 | !GlobalStr.empty() ? GlobalStr : getHash(Str: M.getSourceFileName()); | 
|---|
| 192 | std::string NameStr = | 
|---|
| 193 | ((IsCtor ? "__init_array_object_": "__fini_array_object_") + | 
|---|
| 194 | F->getName() + "_"+ GlobalID + "_"+ std::to_string(val: Priority)) | 
|---|
| 195 | .str(); | 
|---|
| 196 | // PTX does not support exported names with '.' in them. | 
|---|
| 197 | llvm::transform(Range&: NameStr, d_first: NameStr.begin(), | 
|---|
| 198 | F: [](char c) { return c == '.' ? '_' : c; }); | 
|---|
| 199 |  | 
|---|
| 200 | auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true, | 
|---|
| 201 | GlobalValue::ExternalLinkage, F, NameStr, | 
|---|
| 202 | nullptr, GlobalValue::NotThreadLocal, | 
|---|
| 203 | /*AddressSpace=*/4); | 
|---|
| 204 | // This isn't respected by Nvidia, simply put here for clarity. | 
|---|
| 205 | GV->setSection(IsCtor ? ".init_array"+ PriorityStr | 
|---|
| 206 | : ".fini_array"+ PriorityStr); | 
|---|
| 207 | GV->setVisibility(GlobalVariable::ProtectedVisibility); | 
|---|
| 208 | appendToUsed(M, Values: {GV}); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | return true; | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | static bool createInitOrFiniKernel(Module &M, StringRef GlobalName, | 
|---|
| 215 | bool IsCtor) { | 
|---|
| 216 | GlobalVariable *GV = M.getGlobalVariable(Name: GlobalName); | 
|---|
| 217 | if (!GV || !GV->hasInitializer()) | 
|---|
| 218 | return false; | 
|---|
| 219 |  | 
|---|
| 220 | if (!createInitOrFiniGlobals(M, GV, IsCtor)) | 
|---|
| 221 | return false; | 
|---|
| 222 |  | 
|---|
| 223 | if (!CreateKernels) | 
|---|
| 224 | return true; | 
|---|
| 225 |  | 
|---|
| 226 | Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor); | 
|---|
| 227 | if (!InitOrFiniKernel) | 
|---|
| 228 | return false; | 
|---|
| 229 |  | 
|---|
| 230 | createInitOrFiniCalls(F&: *InitOrFiniKernel, IsCtor); | 
|---|
| 231 |  | 
|---|
| 232 | GV->eraseFromParent(); | 
|---|
| 233 | return true; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | static bool lowerCtorsAndDtors(Module &M) { | 
|---|
| 237 | bool Modified = false; | 
|---|
| 238 | Modified |= createInitOrFiniKernel(M, GlobalName: "llvm.global_ctors", /*IsCtor =*/true); | 
|---|
| 239 | Modified |= createInitOrFiniKernel(M, GlobalName: "llvm.global_dtors", /*IsCtor =*/false); | 
|---|
| 240 | return Modified; | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | class NVPTXCtorDtorLoweringLegacy final : public ModulePass { | 
|---|
| 244 | public: | 
|---|
| 245 | static char ID; | 
|---|
| 246 | NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {} | 
|---|
| 247 | bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); } | 
|---|
| 248 | }; | 
|---|
| 249 |  | 
|---|
| 250 | } // End anonymous namespace | 
|---|
| 251 |  | 
|---|
| 252 | PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M, | 
|---|
| 253 | ModuleAnalysisManager &AM) { | 
|---|
| 254 | return lowerCtorsAndDtors(M) ? PreservedAnalyses::none() | 
|---|
| 255 | : PreservedAnalyses::all(); | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | char NVPTXCtorDtorLoweringLegacy::ID = 0; | 
|---|
| 259 | INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE, | 
|---|
| 260 | "Lower ctors and dtors for NVPTX", false, false) | 
|---|
| 261 |  | 
|---|
| 262 | ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() { | 
|---|
| 263 | return new NVPTXCtorDtorLoweringLegacy(); | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|