| 1 | //===-- AMDGPUCtorDtorLowering.cpp - Handle global ctors and dtors --------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | /// | 
|---|
| 9 | /// \file | 
|---|
| 10 | /// This pass creates a unified init and fini kernel with the required metadata | 
|---|
| 11 | //===----------------------------------------------------------------------===// | 
|---|
| 12 |  | 
|---|
| 13 | #include "AMDGPUCtorDtorLowering.h" | 
|---|
| 14 | #include "AMDGPU.h" | 
|---|
| 15 | #include "llvm/IR/Constants.h" | 
|---|
| 16 | #include "llvm/IR/Function.h" | 
|---|
| 17 | #include "llvm/IR/GlobalVariable.h" | 
|---|
| 18 | #include "llvm/IR/IRBuilder.h" | 
|---|
| 19 | #include "llvm/IR/Module.h" | 
|---|
| 20 | #include "llvm/IR/Value.h" | 
|---|
| 21 | #include "llvm/Pass.h" | 
|---|
| 22 | #include "llvm/Transforms/Utils/ModuleUtils.h" | 
|---|
| 23 |  | 
|---|
| 24 | using namespace llvm; | 
|---|
| 25 |  | 
|---|
| 26 | #define DEBUG_TYPE "amdgpu-lower-ctor-dtor" | 
|---|
| 27 |  | 
|---|
| 28 | namespace { | 
|---|
| 29 |  | 
|---|
| 30 | static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { | 
|---|
| 31 | StringRef InitOrFiniKernelName = "amdgcn.device.init"; | 
|---|
| 32 | if (!IsCtor) | 
|---|
| 33 | InitOrFiniKernelName = "amdgcn.device.fini"; | 
|---|
| 34 | if (M.getFunction(Name: InitOrFiniKernelName)) | 
|---|
| 35 | return nullptr; | 
|---|
| 36 |  | 
|---|
| 37 | Function *InitOrFiniKernel = Function::createWithDefaultAttr( | 
|---|
| 38 | Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false), | 
|---|
| 39 | Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: InitOrFiniKernelName, M: &M); | 
|---|
| 40 | InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL); | 
|---|
| 41 | InitOrFiniKernel->addFnAttr(Kind: "amdgpu-flat-work-group-size", Val: "1,1"); | 
|---|
| 42 | if (IsCtor) | 
|---|
| 43 | InitOrFiniKernel->addFnAttr(Kind: "device-init"); | 
|---|
| 44 | else | 
|---|
| 45 | InitOrFiniKernel->addFnAttr(Kind: "device-fini"); | 
|---|
| 46 | return InitOrFiniKernel; | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | // The linker will provide the associated symbols to allow us to traverse the | 
|---|
| 50 | // global constructors / destructors in priority order. We create the IR | 
|---|
| 51 | // required to call each callback in this section. This is equivalent to the | 
|---|
| 52 | // following code. | 
|---|
| 53 | // | 
|---|
| 54 | // extern "C" void * __init_array_start[]; | 
|---|
| 55 | // extern "C" void * __init_array_end[]; | 
|---|
| 56 | // extern "C" void * __fini_array_start[]; | 
|---|
| 57 | // extern "C" void * __fini_array_end[]; | 
|---|
| 58 | // | 
|---|
| 59 | // using InitCallback = void(); | 
|---|
| 60 | // using FiniCallback = void(void); | 
|---|
| 61 | // | 
|---|
| 62 | // void call_init_array_callbacks() { | 
|---|
| 63 | //   for (auto start = __init_array_start; start != __init_array_end; ++start) | 
|---|
| 64 | //     reinterpret_cast<InitCallback *>(*start)(); | 
|---|
| 65 | // } | 
|---|
| 66 | // | 
|---|
| 67 | // void call_fini_array_callbacks() { | 
|---|
| 68 | //  size_t fini_array_size = __fini_array_end - __fini_array_start; | 
|---|
| 69 | //  for (size_t i = fini_array_size; i > 0; --i) | 
|---|
| 70 | //    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])(); | 
|---|
| 71 | // } | 
|---|
| 72 | static void createInitOrFiniCalls(Function &F, bool IsCtor) { | 
|---|
| 73 | Module &M = *F.getParent(); | 
|---|
| 74 | LLVMContext &C = M.getContext(); | 
|---|
| 75 |  | 
|---|
| 76 | IRBuilder<> IRB(BasicBlock::Create(Context&: C, Name: "entry", Parent: &F)); | 
|---|
| 77 | auto *LoopBB = BasicBlock::Create(Context&: C, Name: "while.entry", Parent: &F); | 
|---|
| 78 | auto *ExitBB = BasicBlock::Create(Context&: C, Name: "while.end", Parent: &F); | 
|---|
| 79 | Type *PtrTy = IRB.getPtrTy(AddrSpace: AMDGPUAS::GLOBAL_ADDRESS); | 
|---|
| 80 | ArrayType *PtrArrayTy = ArrayType::get(ElementType: PtrTy, NumElements: 0); | 
|---|
| 81 |  | 
|---|
| 82 | auto *Begin = M.getOrInsertGlobal( | 
|---|
| 83 | Name: IsCtor ? "__init_array_start": "__fini_array_start", Ty: PtrArrayTy, CreateGlobalCallback: [&]() { | 
|---|
| 84 | return new GlobalVariable( | 
|---|
| 85 | M, PtrArrayTy, | 
|---|
| 86 | /*isConstant=*/true, GlobalValue::ExternalLinkage, | 
|---|
| 87 | /*Initializer=*/nullptr, | 
|---|
| 88 | IsCtor ? "__init_array_start": "__fini_array_start", | 
|---|
| 89 | /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, | 
|---|
| 90 | /*AddressSpace=*/AMDGPUAS::GLOBAL_ADDRESS); | 
|---|
| 91 | }); | 
|---|
| 92 | auto *End = M.getOrInsertGlobal( | 
|---|
| 93 | Name: IsCtor ? "__init_array_end": "__fini_array_end", Ty: PtrArrayTy, CreateGlobalCallback: [&]() { | 
|---|
| 94 | return new GlobalVariable( | 
|---|
| 95 | M, PtrArrayTy, | 
|---|
| 96 | /*isConstant=*/true, GlobalValue::ExternalLinkage, | 
|---|
| 97 | /*Initializer=*/nullptr, | 
|---|
| 98 | IsCtor ? "__init_array_end": "__fini_array_end", | 
|---|
| 99 | /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, | 
|---|
| 100 | /*AddressSpace=*/AMDGPUAS::GLOBAL_ADDRESS); | 
|---|
| 101 | }); | 
|---|
| 102 |  | 
|---|
| 103 | // The constructor type is suppoed to allow using the argument vectors, but | 
|---|
| 104 | // for now we just call them with no arguments. | 
|---|
| 105 | auto *CallBackTy = FunctionType::get(Result: IRB.getVoidTy(), isVarArg: {}); | 
|---|
| 106 |  | 
|---|
| 107 | Value *Start = Begin; | 
|---|
| 108 | Value *Stop = End; | 
|---|
| 109 | // The destructor array must be called in reverse order. Get a constant | 
|---|
| 110 | // expression to the end of the array and iterate backwards instead. | 
|---|
| 111 | if (!IsCtor) { | 
|---|
| 112 | Type *Int64Ty = IntegerType::getInt64Ty(C); | 
|---|
| 113 | auto *EndPtr = IRB.CreatePtrToInt(V: End, DestTy: Int64Ty); | 
|---|
| 114 | auto *BeginPtr = IRB.CreatePtrToInt(V: Begin, DestTy: Int64Ty); | 
|---|
| 115 | auto *ByteSize = IRB.CreateSub(LHS: EndPtr, RHS: BeginPtr, Name: "", /*HasNUW=*/true, | 
|---|
| 116 | /*HasNSW=*/true); | 
|---|
| 117 | auto *Size = IRB.CreateAShr(LHS: ByteSize, RHS: ConstantInt::get(Ty: Int64Ty, V: 3), Name: "", | 
|---|
| 118 | /*isExact=*/true); | 
|---|
| 119 | auto *Offset = | 
|---|
| 120 | IRB.CreateSub(LHS: Size, RHS: ConstantInt::get(Ty: Int64Ty, V: 1), Name: "", /*HasNUW=*/true, | 
|---|
| 121 | /*HasNSW=*/true); | 
|---|
| 122 | Start = IRB.CreateInBoundsGEP( | 
|---|
| 123 | Ty: PtrArrayTy, Ptr: Begin, | 
|---|
| 124 | IdxList: ArrayRef<Value *>({ConstantInt::get(Ty: Int64Ty, V: 0), Offset})); | 
|---|
| 125 | Stop = Begin; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | IRB.CreateCondBr( | 
|---|
| 129 | Cond: IRB.CreateCmp(Pred: IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, LHS: Start, | 
|---|
| 130 | RHS: Stop), | 
|---|
| 131 | True: LoopBB, False: ExitBB); | 
|---|
| 132 | IRB.SetInsertPoint(LoopBB); | 
|---|
| 133 | auto *CallBackPHI = IRB.CreatePHI(Ty: PtrTy, NumReservedValues: 2, Name: "ptr"); | 
|---|
| 134 | auto *CallBack = IRB.CreateLoad(Ty: F.getType(), Ptr: CallBackPHI, Name: "callback"); | 
|---|
| 135 | IRB.CreateCall(FTy: CallBackTy, Callee: CallBack); | 
|---|
| 136 | auto *NewCallBack = | 
|---|
| 137 | IRB.CreateConstGEP1_64(Ty: PtrTy, Ptr: CallBackPHI, Idx0: IsCtor ? 1 : -1, Name: "next"); | 
|---|
| 138 | auto *EndCmp = IRB.CreateCmp(Pred: IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT, | 
|---|
| 139 | LHS: NewCallBack, RHS: Stop, Name: "end"); | 
|---|
| 140 | CallBackPHI->addIncoming(V: Start, BB: &F.getEntryBlock()); | 
|---|
| 141 | CallBackPHI->addIncoming(V: NewCallBack, BB: LoopBB); | 
|---|
| 142 | IRB.CreateCondBr(Cond: EndCmp, True: ExitBB, False: LoopBB); | 
|---|
| 143 | IRB.SetInsertPoint(ExitBB); | 
|---|
| 144 | IRB.CreateRetVoid(); | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | static bool createInitOrFiniKernel(Module &M, StringRef GlobalName, | 
|---|
| 148 | bool IsCtor) { | 
|---|
| 149 | GlobalVariable *GV = M.getGlobalVariable(Name: GlobalName); | 
|---|
| 150 | if (!GV || !GV->hasInitializer()) | 
|---|
| 151 | return false; | 
|---|
| 152 | ConstantArray *GA = dyn_cast<ConstantArray>(Val: GV->getInitializer()); | 
|---|
| 153 | if (!GA || GA->getNumOperands() == 0) | 
|---|
| 154 | return false; | 
|---|
| 155 |  | 
|---|
| 156 | Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor); | 
|---|
| 157 | if (!InitOrFiniKernel) | 
|---|
| 158 | return false; | 
|---|
| 159 |  | 
|---|
| 160 | createInitOrFiniCalls(F&: *InitOrFiniKernel, IsCtor); | 
|---|
| 161 |  | 
|---|
| 162 | appendToUsed(M, Values: {InitOrFiniKernel}); | 
|---|
| 163 | return true; | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | static bool lowerCtorsAndDtors(Module &M) { | 
|---|
| 167 | bool Modified = false; | 
|---|
| 168 | Modified |= createInitOrFiniKernel(M, GlobalName: "llvm.global_ctors", /*IsCtor =*/true); | 
|---|
| 169 | Modified |= createInitOrFiniKernel(M, GlobalName: "llvm.global_dtors", /*IsCtor =*/false); | 
|---|
| 170 | return Modified; | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | class AMDGPUCtorDtorLoweringLegacy final : public ModulePass { | 
|---|
| 174 | public: | 
|---|
| 175 | static char ID; | 
|---|
| 176 | AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {} | 
|---|
| 177 | bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); } | 
|---|
| 178 | }; | 
|---|
| 179 |  | 
|---|
| 180 | } // End anonymous namespace | 
|---|
| 181 |  | 
|---|
| 182 | PreservedAnalyses AMDGPUCtorDtorLoweringPass::run(Module &M, | 
|---|
| 183 | ModuleAnalysisManager &AM) { | 
|---|
| 184 | return lowerCtorsAndDtors(M) ? PreservedAnalyses::none() | 
|---|
| 185 | : PreservedAnalyses::all(); | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | char AMDGPUCtorDtorLoweringLegacy::ID = 0; | 
|---|
| 189 | char &llvm::AMDGPUCtorDtorLoweringLegacyPassID = | 
|---|
| 190 | AMDGPUCtorDtorLoweringLegacy::ID; | 
|---|
| 191 | INITIALIZE_PASS(AMDGPUCtorDtorLoweringLegacy, DEBUG_TYPE, | 
|---|
| 192 | "Lower ctors and dtors for AMDGPU", false, false) | 
|---|
| 193 |  | 
|---|
| 194 | ModulePass *llvm::createAMDGPUCtorDtorLoweringLegacyPass() { | 
|---|
| 195 | return new AMDGPUCtorDtorLoweringLegacy(); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|