1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Frontend/Offloading/OffloadWrapper.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/SmallVector.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/Twine.h"
14#include "llvm/BinaryFormat/Magic.h"
15#include "llvm/Frontend/Offloading/Utility.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/DerivedTypes.h"
18#include "llvm/IR/GlobalVariable.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/Type.h"
23#include "llvm/Object/OffloadBinary.h"
24#include "llvm/Support/Error.h"
25#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/LineIterator.h"
27#include "llvm/Support/MemoryBufferRef.h"
28#include "llvm/TargetParser/Triple.h"
29#include "llvm/Transforms/Utils/ModuleUtils.h"
30
31#include <memory>
32#include <utility>
33
34using namespace llvm;
35using namespace llvm::object;
36using namespace llvm::offloading;
37
38namespace {
39/// Magic number that begins the section containing the CUDA fatbinary.
40constexpr unsigned CudaFatMagic = 0x466243b1;
41constexpr unsigned HIPFatMagic = 0x48495046;
42
43IntegerType *getSizeTTy(Module &M) {
44 return M.getDataLayout().getIntPtrType(C&: M.getContext());
45}
46
47// struct __tgt_device_image {
48// void *ImageStart;
49// void *ImageEnd;
50// __tgt_offload_entry *EntriesBegin;
51// __tgt_offload_entry *EntriesEnd;
52// };
53StructType *getDeviceImageTy(Module &M) {
54 LLVMContext &C = M.getContext();
55 StructType *ImageTy = StructType::getTypeByName(C, Name: "__tgt_device_image");
56 if (!ImageTy)
57 ImageTy =
58 StructType::create(Name: "__tgt_device_image", elt1: PointerType::getUnqual(C),
59 elts: PointerType::getUnqual(C), elts: PointerType::getUnqual(C),
60 elts: PointerType::getUnqual(C));
61 return ImageTy;
62}
63
64PointerType *getDeviceImagePtrTy(Module &M) {
65 return PointerType::getUnqual(C&: M.getContext());
66}
67
68// struct __tgt_bin_desc {
69// int32_t NumDeviceImages;
70// __tgt_device_image *DeviceImages;
71// __tgt_offload_entry *HostEntriesBegin;
72// __tgt_offload_entry *HostEntriesEnd;
73// };
74StructType *getBinDescTy(Module &M) {
75 LLVMContext &C = M.getContext();
76 StructType *DescTy = StructType::getTypeByName(C, Name: "__tgt_bin_desc");
77 if (!DescTy)
78 DescTy = StructType::create(
79 Name: "__tgt_bin_desc", elt1: Type::getInt32Ty(C), elts: getDeviceImagePtrTy(M),
80 elts: PointerType::getUnqual(C), elts: PointerType::getUnqual(C));
81 return DescTy;
82}
83
84PointerType *getBinDescPtrTy(Module &M) {
85 return PointerType::getUnqual(C&: M.getContext());
86}
87
88/// Creates binary descriptor for the given device images. Binary descriptor
89/// is an object that is passed to the offloading runtime at program startup
90/// and it describes all device images available in the executable or shared
91/// library. It is defined as follows
92///
93/// __attribute__((visibility("hidden")))
94/// extern __tgt_offload_entry *__start_omp_offloading_entries;
95/// __attribute__((visibility("hidden")))
96/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
97///
98/// static const char Image0[] = { <Bufs.front() contents> };
99/// ...
100/// static const char ImageN[] = { <Bufs.back() contents> };
101///
102/// static const __tgt_device_image Images[] = {
103/// {
104/// Image0, /*ImageStart*/
105/// Image0 + sizeof(Image0), /*ImageEnd*/
106/// __start_omp_offloading_entries, /*EntriesBegin*/
107/// __stop_omp_offloading_entries /*EntriesEnd*/
108/// },
109/// ...
110/// {
111/// ImageN, /*ImageStart*/
112/// ImageN + sizeof(ImageN), /*ImageEnd*/
113/// __start_omp_offloading_entries, /*EntriesBegin*/
114/// __stop_omp_offloading_entries /*EntriesEnd*/
115/// }
116/// };
117///
118/// static const __tgt_bin_desc BinDesc = {
119/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
120/// Images, /*DeviceImages*/
121/// __start_omp_offloading_entries, /*HostEntriesBegin*/
122/// __stop_omp_offloading_entries /*HostEntriesEnd*/
123/// };
124///
125/// Global variable that represents BinDesc is returned.
126GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
127 EntryArrayTy EntryArray, StringRef Suffix,
128 bool Relocatable) {
129 LLVMContext &C = M.getContext();
130 auto [EntriesB, EntriesE] = EntryArray;
131
132 auto *Zero = ConstantInt::get(Ty: getSizeTTy(M), V: 0u);
133
134 // Create initializer for the images array.
135 SmallVector<Constant *, 4u> ImagesInits;
136 ImagesInits.reserve(N: Bufs.size());
137 for (ArrayRef<char> Buf : Bufs) {
138 // We embed the full offloading entry so the binary utilities can parse it.
139 auto *Data = ConstantDataArray::get(Context&: C, Elts: Buf);
140 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
141 GlobalVariable::InternalLinkage, Data,
142 ".omp_offloading.device_image" + Suffix);
143 Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
144 Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
145 : ".llvm.offloading");
146 Image->setAlignment(Align(object::OffloadBinary::getAlignment()));
147
148 StringRef Binary(Buf.data(), Buf.size());
149
150 uint64_t BeginOffset = 0;
151 uint64_t EndOffset = Binary.size();
152
153 // Optionally use an offload binary for its offload dumping support.
154 // The device image struct contains the pointer to the beginning and end of
155 // the image stored inside of the offload binary. There should only be one
156 // of these for each buffer so we parse it out manually.
157 if (identify_magic(magic: Binary) == file_magic::offload_binary) {
158 const auto *Header =
159 reinterpret_cast<const object::OffloadBinary::Header *>(
160 Binary.bytes_begin());
161 const auto *Entry =
162 reinterpret_cast<const object::OffloadBinary::Entry *>(
163 Binary.bytes_begin() + Header->EntriesOffset);
164 BeginOffset = Entry->ImageOffset;
165 EndOffset = Entry->ImageOffset + Entry->ImageSize;
166 }
167
168 auto *Begin = ConstantInt::get(Ty: getSizeTTy(M), V: BeginOffset);
169 auto *Size = ConstantInt::get(Ty: getSizeTTy(M), V: EndOffset);
170 Constant *ZeroBegin[] = {Zero, Begin};
171 Constant *ZeroSize[] = {Zero, Size};
172
173 auto *ImageB =
174 ConstantExpr::getGetElementPtr(Ty: Image->getValueType(), C: Image, IdxList: ZeroBegin);
175 auto *ImageE =
176 ConstantExpr::getGetElementPtr(Ty: Image->getValueType(), C: Image, IdxList: ZeroSize);
177
178 ImagesInits.push_back(Elt: ConstantStruct::get(T: getDeviceImageTy(M), Vs: ImageB,
179 Vs: ImageE, Vs: EntriesB, Vs: EntriesE));
180 }
181
182 // Then create images array.
183 auto *ImagesData = ConstantArray::get(
184 T: ArrayType::get(ElementType: getDeviceImageTy(M), NumElements: ImagesInits.size()), V: ImagesInits);
185
186 auto *Images =
187 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
188 GlobalValue::InternalLinkage, ImagesData,
189 ".omp_offloading.device_images" + Suffix);
190 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
191
192 // And finally create the binary descriptor object.
193 auto *DescInit = ConstantStruct::get(
194 T: getBinDescTy(M),
195 Vs: ConstantInt::get(Ty: Type::getInt32Ty(C), V: ImagesInits.size()), Vs: Images,
196 Vs: EntriesB, Vs: EntriesE);
197
198 return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
199 GlobalValue::InternalLinkage, DescInit,
200 ".omp_offloading.descriptor" + Suffix);
201}
202
203Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
204 StringRef Suffix) {
205 LLVMContext &C = M.getContext();
206 auto *FuncTy = FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
207 auto *Func =
208 Function::Create(Ty: FuncTy, Linkage: GlobalValue::InternalLinkage,
209 N: ".omp_offloading.descriptor_unreg" + Suffix, M: &M);
210 Func->setSection(".text.startup");
211
212 // Get __tgt_unregister_lib function declaration.
213 auto *UnRegFuncTy = FunctionType::get(Result: Type::getVoidTy(C), Params: getBinDescPtrTy(M),
214 /*isVarArg*/ false);
215 FunctionCallee UnRegFuncC =
216 M.getOrInsertFunction(Name: "__tgt_unregister_lib", T: UnRegFuncTy);
217
218 // Construct function body
219 IRBuilder<> Builder(BasicBlock::Create(Context&: C, Name: "entry", Parent: Func));
220 Builder.CreateCall(Callee: UnRegFuncC, Args: BinDesc);
221 Builder.CreateRetVoid();
222
223 return Func;
224}
225
226void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
227 StringRef Suffix) {
228 LLVMContext &C = M.getContext();
229 auto *FuncTy = FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
230 auto *Func = Function::Create(Ty: FuncTy, Linkage: GlobalValue::InternalLinkage,
231 N: ".omp_offloading.descriptor_reg" + Suffix, M: &M);
232 Func->setSection(".text.startup");
233
234 // Get __tgt_register_lib function declaration.
235 auto *RegFuncTy = FunctionType::get(Result: Type::getVoidTy(C), Params: getBinDescPtrTy(M),
236 /*isVarArg*/ false);
237 FunctionCallee RegFuncC =
238 M.getOrInsertFunction(Name: "__tgt_register_lib", T: RegFuncTy);
239
240 auto *AtExitTy = FunctionType::get(
241 Result: Type::getInt32Ty(C), Params: PointerType::getUnqual(C), /*isVarArg=*/false);
242 FunctionCallee AtExit = M.getOrInsertFunction(Name: "atexit", T: AtExitTy);
243
244 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
245
246 // Construct function body
247 IRBuilder<> Builder(BasicBlock::Create(Context&: C, Name: "entry", Parent: Func));
248
249 Builder.CreateCall(Callee: RegFuncC, Args: BinDesc);
250
251 // Register the destructors with 'atexit'. This is expected by the CUDA
252 // runtime and ensures that we clean up before dynamic objects are destroyed.
253 // This needs to be done after plugin initialization to ensure that it is
254 // called before the plugin runtime is destroyed.
255 Builder.CreateCall(Callee: AtExit, Args: UnregFunc);
256 Builder.CreateRetVoid();
257
258 // Add this function to constructors.
259 appendToGlobalCtors(M, F: Func, /*Priority=*/101);
260}
261
262// struct fatbin_wrapper {
263// int32_t magic;
264// int32_t version;
265// void *image;
266// void *reserved;
267//};
268StructType *getFatbinWrapperTy(Module &M) {
269 LLVMContext &C = M.getContext();
270 StructType *FatbinTy = StructType::getTypeByName(C, Name: "fatbin_wrapper");
271 if (!FatbinTy)
272 FatbinTy = StructType::create(
273 Name: "fatbin_wrapper", elt1: Type::getInt32Ty(C), elts: Type::getInt32Ty(C),
274 elts: PointerType::getUnqual(C), elts: PointerType::getUnqual(C));
275 return FatbinTy;
276}
277
278/// Embed the image \p Image into the module \p M so it can be found by the
279/// runtime.
280GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
281 StringRef Suffix) {
282 LLVMContext &C = M.getContext();
283 llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
284 const llvm::Triple &Triple = M.getTargetTriple();
285
286 // Create the global string containing the fatbinary.
287 StringRef FatbinConstantSection =
288 IsHIP ? ".hip_fatbin"
289 : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
290 auto *Data = ConstantDataArray::get(Context&: C, Elts: Image);
291 auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
292 GlobalVariable::InternalLinkage, Data,
293 ".fatbin_image" + Suffix);
294 Fatbin->setSection(FatbinConstantSection);
295
296 // Create the fatbinary wrapper
297 StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
298 : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
299 : ".nvFatBinSegment";
300 Constant *FatbinWrapper[] = {
301 ConstantInt::get(Ty: Type::getInt32Ty(C), V: IsHIP ? HIPFatMagic : CudaFatMagic),
302 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1),
303 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Fatbin, Ty: Int8PtrTy),
304 ConstantPointerNull::get(T: PointerType::getUnqual(C))};
305
306 Constant *FatbinInitializer =
307 ConstantStruct::get(T: getFatbinWrapperTy(M), V: FatbinWrapper);
308
309 auto *FatbinDesc =
310 new GlobalVariable(M, getFatbinWrapperTy(M),
311 /*isConstant*/ true, GlobalValue::InternalLinkage,
312 FatbinInitializer, ".fatbin_wrapper" + Suffix);
313 FatbinDesc->setSection(FatbinWrapperSection);
314 FatbinDesc->setAlignment(Align(8));
315
316 return FatbinDesc;
317}
318
319/// Create the register globals function. We will iterate all of the offloading
320/// entries stored at the begin / end symbols and register them according to
321/// their type. This creates the following function in IR:
322///
323/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
324/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
325///
326/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
327/// void *, void *, void *, void *, int *);
328/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
329/// int64_t, int32_t, int32_t);
330///
331/// void __cudaRegisterTest(void **fatbinHandle) {
332/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
333/// entry != &__stop_cuda_offloading_entries; ++entry) {
334/// if (entry->Kind != OFK_CUDA)
335/// continue
336///
337/// if (!entry->Size)
338/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
339/// entry->name, -1, 0, 0, 0, 0, 0);
340/// else
341/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
342/// 0, entry->size, 0, 0);
343/// }
344/// }
345Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
346 EntryArrayTy EntryArray,
347 StringRef Suffix,
348 bool EmitSurfacesAndTextures) {
349 LLVMContext &C = M.getContext();
350 auto [EntriesB, EntriesE] = EntryArray;
351
352 // Get the __cudaRegisterFunction function declaration.
353 PointerType *Int8PtrTy = PointerType::get(C, AddressSpace: 0);
354 PointerType *Int8PtrPtrTy = PointerType::get(C, AddressSpace: 0);
355 PointerType *Int32PtrTy = PointerType::get(C, AddressSpace: 0);
356 auto *RegFuncTy = FunctionType::get(
357 Result: Type::getInt32Ty(C),
358 Params: {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
359 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
360 /*isVarArg*/ false);
361 FunctionCallee RegFunc = M.getOrInsertFunction(
362 Name: IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", T: RegFuncTy);
363
364 // Get the __cudaRegisterVar function declaration.
365 auto *RegVarTy = FunctionType::get(
366 Result: Type::getVoidTy(C),
367 Params: {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
368 getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)},
369 /*isVarArg*/ false);
370 FunctionCallee RegVar = M.getOrInsertFunction(
371 Name: IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", T: RegVarTy);
372
373 // Get the __cudaRegisterSurface function declaration.
374 FunctionType *RegManagedVarTy =
375 FunctionType::get(Result: Type::getVoidTy(C),
376 Params: {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
377 getSizeTTy(M), Type::getInt32Ty(C)},
378 /*isVarArg=*/false);
379 FunctionCallee RegManagedVar = M.getOrInsertFunction(
380 Name: IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
381 T: RegManagedVarTy);
382
383 // Get the __cudaRegisterSurface function declaration.
384 FunctionType *RegSurfaceTy =
385 FunctionType::get(Result: Type::getVoidTy(C),
386 Params: {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
387 Type::getInt32Ty(C), Type::getInt32Ty(C)},
388 /*isVarArg=*/false);
389 FunctionCallee RegSurface = M.getOrInsertFunction(
390 Name: IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", T: RegSurfaceTy);
391
392 // Get the __cudaRegisterTexture function declaration.
393 FunctionType *RegTextureTy = FunctionType::get(
394 Result: Type::getVoidTy(C),
395 Params: {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
396 Type::getInt32Ty(C), Type::getInt32Ty(C)},
397 /*isVarArg=*/false);
398 FunctionCallee RegTexture = M.getOrInsertFunction(
399 Name: IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", T: RegTextureTy);
400
401 auto *RegGlobalsTy = FunctionType::get(Result: Type::getVoidTy(C), Params: Int8PtrPtrTy,
402 /*isVarArg*/ false);
403 auto *RegGlobalsFn =
404 Function::Create(Ty: RegGlobalsTy, Linkage: GlobalValue::InternalLinkage,
405 N: IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", M: &M);
406 RegGlobalsFn->setSection(".text.startup");
407
408 // Create the loop to register all the entries.
409 IRBuilder<> Builder(BasicBlock::Create(Context&: C, Name: "entry", Parent: RegGlobalsFn));
410 auto *EntryBB = BasicBlock::Create(Context&: C, Name: "while.entry", Parent: RegGlobalsFn);
411 auto *IfKindBB = BasicBlock::Create(Context&: C, Name: "if.kind", Parent: RegGlobalsFn);
412 auto *IfThenBB = BasicBlock::Create(Context&: C, Name: "if.then", Parent: RegGlobalsFn);
413 auto *IfElseBB = BasicBlock::Create(Context&: C, Name: "if.else", Parent: RegGlobalsFn);
414 auto *SwGlobalBB = BasicBlock::Create(Context&: C, Name: "sw.global", Parent: RegGlobalsFn);
415 auto *SwManagedBB = BasicBlock::Create(Context&: C, Name: "sw.managed", Parent: RegGlobalsFn);
416 auto *SwSurfaceBB = BasicBlock::Create(Context&: C, Name: "sw.surface", Parent: RegGlobalsFn);
417 auto *SwTextureBB = BasicBlock::Create(Context&: C, Name: "sw.texture", Parent: RegGlobalsFn);
418 auto *IfEndBB = BasicBlock::Create(Context&: C, Name: "if.end", Parent: RegGlobalsFn);
419 auto *ExitBB = BasicBlock::Create(Context&: C, Name: "while.end", Parent: RegGlobalsFn);
420
421 auto *EntryCmp = Builder.CreateICmpNE(LHS: EntriesB, RHS: EntriesE);
422 Builder.CreateCondBr(Cond: EntryCmp, True: EntryBB, False: ExitBB);
423 Builder.SetInsertPoint(EntryBB);
424 auto *Entry = Builder.CreatePHI(Ty: PointerType::getUnqual(C), NumReservedValues: 2, Name: "entry");
425 auto *AddrPtr =
426 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
427 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
428 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 4)});
429 auto *Addr = Builder.CreateLoad(Ty: Int8PtrTy, Ptr: AddrPtr, Name: "addr");
430 auto *AuxAddrPtr =
431 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
432 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
433 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 8)});
434 auto *AuxAddr = Builder.CreateLoad(Ty: Int8PtrTy, Ptr: AuxAddrPtr, Name: "aux_addr");
435 auto *KindPtr =
436 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
437 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
438 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 2)});
439 auto *Kind = Builder.CreateLoad(Ty: Type::getInt16Ty(C), Ptr: KindPtr, Name: "kind");
440 auto *NamePtr =
441 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
442 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
443 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 5)});
444 auto *Name = Builder.CreateLoad(Ty: Int8PtrTy, Ptr: NamePtr, Name: "name");
445 auto *SizePtr =
446 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
447 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
448 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 6)});
449 auto *Size = Builder.CreateLoad(Ty: Type::getInt64Ty(C), Ptr: SizePtr, Name: "size");
450 auto *FlagsPtr =
451 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
452 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
453 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 3)});
454 auto *Flags = Builder.CreateLoad(Ty: Type::getInt32Ty(C), Ptr: FlagsPtr, Name: "flags");
455 auto *DataPtr =
456 Builder.CreateInBoundsGEP(Ty: offloading::getEntryTy(M), Ptr: Entry,
457 IdxList: {ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
458 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 7)});
459 auto *Data = Builder.CreateTrunc(
460 V: Builder.CreateLoad(Ty: Type::getInt64Ty(C), Ptr: DataPtr, Name: "data"),
461 DestTy: Type::getInt32Ty(C));
462 auto *Type = Builder.CreateAnd(
463 LHS: Flags, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0x7), Name: "type");
464
465 // Extract the flags stored in the bit-field and convert them to C booleans.
466 auto *ExternBit = Builder.CreateAnd(
467 LHS: Flags, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C),
468 V: llvm::offloading::OffloadGlobalExtern));
469 auto *Extern = Builder.CreateLShr(
470 LHS: ExternBit, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 3), Name: "extern");
471 auto *ConstantBit = Builder.CreateAnd(
472 LHS: Flags, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C),
473 V: llvm::offloading::OffloadGlobalConstant));
474 auto *Const = Builder.CreateLShr(
475 LHS: ConstantBit, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 4), Name: "constant");
476 auto *NormalizedBit = Builder.CreateAnd(
477 LHS: Flags, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C),
478 V: llvm::offloading::OffloadGlobalNormalized));
479 auto *Normalized = Builder.CreateLShr(
480 LHS: NormalizedBit, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 5), Name: "normalized");
481 auto *KindCond = Builder.CreateICmpEQ(
482 LHS: Kind, RHS: ConstantInt::get(Ty: Type::getInt16Ty(C),
483 V: IsHIP ? object::OffloadKind::OFK_HIP
484 : object::OffloadKind::OFK_Cuda));
485 Builder.CreateCondBr(Cond: KindCond, True: IfKindBB, False: IfEndBB);
486 Builder.SetInsertPoint(IfKindBB);
487 auto *FnCond = Builder.CreateICmpEQ(
488 LHS: Size, RHS: ConstantInt::getNullValue(Ty: Type::getInt64Ty(C)));
489 Builder.CreateCondBr(Cond: FnCond, True: IfThenBB, False: IfElseBB);
490
491 // Create kernel registration code.
492 Builder.SetInsertPoint(IfThenBB);
493 Builder.CreateCall(
494 Callee: RegFunc,
495 Args: {RegGlobalsFn->arg_begin(), Addr, Name, Name,
496 ConstantInt::getAllOnesValue(Ty: Type::getInt32Ty(C)),
497 ConstantPointerNull::get(T: Int8PtrTy), ConstantPointerNull::get(T: Int8PtrTy),
498 ConstantPointerNull::get(T: Int8PtrTy), ConstantPointerNull::get(T: Int8PtrTy),
499 ConstantPointerNull::get(T: Int32PtrTy)});
500 Builder.CreateBr(Dest: IfEndBB);
501 Builder.SetInsertPoint(IfElseBB);
502
503 auto *Switch = Builder.CreateSwitch(V: Type, Dest: IfEndBB);
504 // Create global variable registration code.
505 Builder.SetInsertPoint(SwGlobalBB);
506 Builder.CreateCall(Callee: RegVar,
507 Args: {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
508 Const, ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0)});
509 Builder.CreateBr(Dest: IfEndBB);
510 Switch->addCase(OnVal: Builder.getInt32(C: llvm::offloading::OffloadGlobalEntry),
511 Dest: SwGlobalBB);
512
513 // Create managed variable registration code.
514 Builder.SetInsertPoint(SwManagedBB);
515 Builder.CreateCall(Callee: RegManagedVar, Args: {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
516 Name, Size, Data});
517 Builder.CreateBr(Dest: IfEndBB);
518 Switch->addCase(OnVal: Builder.getInt32(C: llvm::offloading::OffloadGlobalManagedEntry),
519 Dest: SwManagedBB);
520 // Create surface variable registration code.
521 Builder.SetInsertPoint(SwSurfaceBB);
522 if (EmitSurfacesAndTextures)
523 Builder.CreateCall(Callee: RegSurface, Args: {RegGlobalsFn->arg_begin(), Addr, Name, Name,
524 Data, Extern});
525 Builder.CreateBr(Dest: IfEndBB);
526 Switch->addCase(OnVal: Builder.getInt32(C: llvm::offloading::OffloadGlobalSurfaceEntry),
527 Dest: SwSurfaceBB);
528
529 // Create texture variable registration code.
530 Builder.SetInsertPoint(SwTextureBB);
531 if (EmitSurfacesAndTextures)
532 Builder.CreateCall(Callee: RegTexture, Args: {RegGlobalsFn->arg_begin(), Addr, Name, Name,
533 Data, Normalized, Extern});
534 Builder.CreateBr(Dest: IfEndBB);
535 Switch->addCase(OnVal: Builder.getInt32(C: llvm::offloading::OffloadGlobalTextureEntry),
536 Dest: SwTextureBB);
537
538 Builder.SetInsertPoint(IfEndBB);
539 auto *NewEntry = Builder.CreateInBoundsGEP(
540 Ty: offloading::getEntryTy(M), Ptr: Entry, IdxList: ConstantInt::get(Ty: getSizeTTy(M), V: 1));
541 auto *Cmp = Builder.CreateICmpEQ(LHS: NewEntry, RHS: EntriesE);
542 Entry->addIncoming(V: EntriesB, BB: &RegGlobalsFn->getEntryBlock());
543 Entry->addIncoming(V: NewEntry, BB: IfEndBB);
544 Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: EntryBB);
545 Builder.SetInsertPoint(ExitBB);
546 Builder.CreateRetVoid();
547
548 return RegGlobalsFn;
549}
550
551// Create the constructor and destructor to register the fatbinary with the CUDA
552// runtime.
553void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
554 bool IsHIP, EntryArrayTy EntryArray,
555 StringRef Suffix,
556 bool EmitSurfacesAndTextures) {
557 LLVMContext &C = M.getContext();
558 auto *CtorFuncTy = FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
559 auto *CtorFunc = Function::Create(
560 Ty: CtorFuncTy, Linkage: GlobalValue::InternalLinkage,
561 N: (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, M: &M);
562 CtorFunc->setSection(".text.startup");
563
564 auto *DtorFuncTy = FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
565 auto *DtorFunc = Function::Create(
566 Ty: DtorFuncTy, Linkage: GlobalValue::InternalLinkage,
567 N: (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, M: &M);
568 DtorFunc->setSection(".text.startup");
569
570 auto *PtrTy = PointerType::getUnqual(C);
571
572 // Get the __cudaRegisterFatBinary function declaration.
573 auto *RegFatTy = FunctionType::get(Result: PtrTy, Params: PtrTy, /*isVarArg=*/false);
574 FunctionCallee RegFatbin = M.getOrInsertFunction(
575 Name: IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", T: RegFatTy);
576 // Get the __cudaRegisterFatBinaryEnd function declaration.
577 auto *RegFatEndTy =
578 FunctionType::get(Result: Type::getVoidTy(C), Params: PtrTy, /*isVarArg=*/false);
579 FunctionCallee RegFatbinEnd =
580 M.getOrInsertFunction(Name: "__cudaRegisterFatBinaryEnd", T: RegFatEndTy);
581 // Get the __cudaUnregisterFatBinary function declaration.
582 auto *UnregFatTy =
583 FunctionType::get(Result: Type::getVoidTy(C), Params: PtrTy, /*isVarArg=*/false);
584 FunctionCallee UnregFatbin = M.getOrInsertFunction(
585 Name: IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
586 T: UnregFatTy);
587
588 auto *AtExitTy =
589 FunctionType::get(Result: Type::getInt32Ty(C), Params: PtrTy, /*isVarArg=*/false);
590 FunctionCallee AtExit = M.getOrInsertFunction(Name: "atexit", T: AtExitTy);
591
592 auto *BinaryHandleGlobal = new llvm::GlobalVariable(
593 M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
594 llvm::ConstantPointerNull::get(T: PtrTy),
595 (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
596
597 // Create the constructor to register this image with the runtime.
598 IRBuilder<> CtorBuilder(BasicBlock::Create(Context&: C, Name: "entry", Parent: CtorFunc));
599 CallInst *Handle = CtorBuilder.CreateCall(
600 Callee: RegFatbin,
601 Args: ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: FatbinDesc, Ty: PtrTy));
602 CtorBuilder.CreateAlignedStore(
603 Val: Handle, Ptr: BinaryHandleGlobal,
604 Align: Align(M.getDataLayout().getPointerTypeSize(Ty: PtrTy)));
605 CtorBuilder.CreateCall(Callee: createRegisterGlobalsFunction(M, IsHIP, EntryArray,
606 Suffix,
607 EmitSurfacesAndTextures),
608 Args: Handle);
609 if (!IsHIP)
610 CtorBuilder.CreateCall(Callee: RegFatbinEnd, Args: Handle);
611 CtorBuilder.CreateCall(Callee: AtExit, Args: DtorFunc);
612 CtorBuilder.CreateRetVoid();
613
614 // Create the destructor to unregister the image with the runtime. We cannot
615 // use a standard global destructor after CUDA 9.2 so this must be called by
616 // `atexit()` instead.
617 IRBuilder<> DtorBuilder(BasicBlock::Create(Context&: C, Name: "entry", Parent: DtorFunc));
618 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
619 Ty: PtrTy, Ptr: BinaryHandleGlobal,
620 Align: Align(M.getDataLayout().getPointerTypeSize(Ty: PtrTy)));
621 DtorBuilder.CreateCall(Callee: UnregFatbin, Args: BinaryHandle);
622 DtorBuilder.CreateRetVoid();
623
624 // Add this function to constructors.
625 appendToGlobalCtors(M, F: CtorFunc, /*Priority=*/101);
626}
627
628/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
629class SYCLWrapper {
630public:
631 SYCLWrapper(Module &M, const SYCLJITOptions &Options)
632 : M(M), C(M.getContext()), Options(Options) {
633 EntryTy = offloading::getEntryTy(M);
634 SyclDeviceImageTy = getSyclDeviceImageTy();
635 SyclBinDescTy = getSyclBinDescTy();
636 }
637
638 /// Creates binary descriptor for the given device images. Binary descriptor
639 /// is an object that is passed to the offloading runtime at program startup
640 /// and it describes all device images available in the executable or shared
641 /// library. It is defined as follows:
642 ///
643 /// \code
644 /// __attribute__((visibility("hidden")))
645 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
646 /// ...
647 /// __attribute__((visibility("hidden")))
648 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
649 ///
650 /// __attribute__((visibility("hidden")))
651 /// extern const char *CompileOptions = "...";
652 /// ...
653 /// __attribute__((visibility("hidden")))
654 /// extern const char *LinkOptions = "...";
655 /// ...
656 ///
657 /// static const char Image0[] = { ... };
658 /// ...
659 /// static const char ImageN[] = { ... };
660 ///
661 /// static const __sycl.tgt_device_image Images[] = {
662 /// {
663 /// Version, // Version
664 /// OffloadKind, // OffloadKind
665 /// Format, // Format of the image.
666 // TripleString, // Arch
667 /// CompileOptions, // CompileOptions
668 /// LinkOptions, // LinkOptions
669 /// Image0, // ImageStart
670 /// Image0 + IMAGE0_SIZE, // ImageEnd
671 /// __sycl_offload_entries_arr0, // EntriesBegin
672 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
673 /// NULL, // PropertiesBegin
674 /// NULL, // PropertiesEnd
675 /// },
676 /// ...
677 /// };
678 ///
679 /// static const __sycl.tgt_bin_desc FatbinDesc = {
680 /// Version, //Version
681 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
682 /// Images, //DeviceImages
683 /// NULL, //HostEntriesBegin
684 /// NULL //HostEntriesEnd
685 /// };
686 /// \endcode
687 ///
688 /// \returns Global variable that represents FatbinDesc.
689 GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
690 StringRef OffloadKindTag = ".sycl_offloading.";
691 SmallVector<Constant *> WrappedImages;
692 WrappedImages.reserve(N: OffloadFiles.size());
693 for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
694 WrappedImages.push_back(
695 Elt: wrapImage(OB: *OffloadFiles[I].getBinary(), ImageID: Twine(I), OffloadKindTag));
696
697 return combineWrappedImages(WrappedImages, OffloadKindTag);
698 }
699
700 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
701 FunctionType *FuncTy =
702 FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
703 Function *Func = Function::Create(Ty: FuncTy, Linkage: GlobalValue::InternalLinkage,
704 N: Twine("sycl") + ".descriptor_reg", M: &M);
705 Func->setSection(".text.startup");
706
707 // Get RegFuncName function declaration.
708 FunctionType *RegFuncTy =
709 FunctionType::get(Result: Type::getVoidTy(C), Params: PointerType::getUnqual(C),
710 /*isVarArg=*/false);
711 FunctionCallee RegFuncC =
712 M.getOrInsertFunction(Name: "__sycl_register_lib", T: RegFuncTy);
713
714 // Construct function body.
715 IRBuilder Builder(BasicBlock::Create(Context&: C, Name: "entry", Parent: Func));
716 Builder.CreateCall(Callee: RegFuncC, Args: FatbinDesc);
717 Builder.CreateRetVoid();
718
719 // Add this function to constructors.
720 appendToGlobalCtors(M, F: Func, /*Priority*/ 1);
721 }
722
723 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
724 FunctionType *FuncTy =
725 FunctionType::get(Result: Type::getVoidTy(C), /*isVarArg*/ false);
726 Function *Func = Function::Create(Ty: FuncTy, Linkage: GlobalValue::InternalLinkage,
727 N: "sycl.descriptor_unreg", M: &M);
728 Func->setSection(".text.startup");
729
730 // Get UnregFuncName function declaration.
731 FunctionType *UnRegFuncTy =
732 FunctionType::get(Result: Type::getVoidTy(C), Params: PointerType::getUnqual(C),
733 /*isVarArg=*/false);
734 FunctionCallee UnRegFuncC =
735 M.getOrInsertFunction(Name: "__sycl_unregister_lib", T: UnRegFuncTy);
736
737 // Construct function body
738 IRBuilder<> Builder(BasicBlock::Create(Context&: C, Name: "entry", Parent: Func));
739 Builder.CreateCall(Callee: UnRegFuncC, Args: FatbinDesc);
740 Builder.CreateRetVoid();
741
742 // Add this function to global destructors.
743 appendToGlobalDtors(M, F: Func, /*Priority*/ 1);
744 }
745
746private:
747 IntegerType *getSizeTTy() {
748 switch (M.getDataLayout().getPointerSize()) {
749 case 4:
750 return Type::getInt32Ty(C);
751 case 8:
752 return Type::getInt64Ty(C);
753 }
754 llvm_unreachable("unsupported pointer type size");
755 }
756
757 SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
758 IntegerType *SizeTTy = getSizeTTy();
759 return SmallVector<Constant *, 2>{ConstantInt::get(Ty: SizeTTy, V: First),
760 ConstantInt::get(Ty: SizeTTy, V: Second)};
761 }
762
763 /// Note: Properties aren't supported and the support is going
764 /// to be added later.
765 /// Creates a structure corresponding to:
766 /// SYCL specific image descriptor type.
767 /// \code
768 /// struct __sycl.tgt_device_image {
769 /// // Version of this structure - for backward compatibility;
770 /// // all modifications which change order/type/offsets of existing fields
771 /// // should increment the version.
772 /// uint16_t Version;
773 /// // The kind of offload model the image employs.
774 /// uint8_t OffloadKind;
775 /// // Format of the image data - SPIRV, LLVMIR bitcode, etc.
776 /// uint8_t Format;
777 /// // Null-terminated string representation of the device's target
778 /// // architecture.
779 /// const char *Arch;
780 /// // A null-terminated string; target- and compiler-specific options
781 /// // which are passed to the device compiler at runtime.
782 /// const char *CompileOptions;
783 /// // A null-terminated string; target- and compiler-specific options
784 /// // which are passed to the device linker at runtime.
785 /// const char *LinkOptions;
786 /// // Pointer to the device binary image start.
787 /// void *ImageStart;
788 /// // Pointer to the device binary image end.
789 /// void *ImageEnd;
790 /// // The entry table.
791 /// __tgt_offload_entry *EntriesBegin;
792 /// __tgt_offload_entry *EntriesEnd;
793 /// const char *PropertiesBegin;
794 /// const char *PropertiesEnd;
795 /// };
796 /// \endcode
797 StructType *getSyclDeviceImageTy() {
798 return StructType::create(
799 Elements: {
800 Type::getInt16Ty(C), // Version
801 Type::getInt8Ty(C), // OffloadKind
802 Type::getInt8Ty(C), // Format
803 PointerType::getUnqual(C), // Arch
804 PointerType::getUnqual(C), // CompileOptions
805 PointerType::getUnqual(C), // LinkOptions
806 PointerType::getUnqual(C), // ImageStart
807 PointerType::getUnqual(C), // ImageEnd
808 PointerType::getUnqual(C), // EntriesBegin
809 PointerType::getUnqual(C), // EntriesEnd
810 PointerType::getUnqual(C), // PropertiesBegin
811 PointerType::getUnqual(C) // PropertiesEnd
812 },
813 Name: "__sycl.tgt_device_image");
814 }
815
816 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
817 /// to:
818 ///
819 /// \code
820 /// struct __sycl.tgt_bin_desc {
821 /// // version of this structure - for backward compatibility;
822 /// // all modifications which change order/type/offsets of existing fields
823 /// // should increment the version.
824 /// uint16_t Version;
825 /// uint16_t NumDeviceImages;
826 /// __sycl.tgt_device_image *DeviceImages;
827 /// // the offload entry table
828 /// __tgt_offload_entry *HostEntriesBegin;
829 /// __tgt_offload_entry *HostEntriesEnd;
830 /// };
831 /// \endcode
832 StructType *getSyclBinDescTy() {
833 return StructType::create(
834 Elements: {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
835 PointerType::getUnqual(C), PointerType::getUnqual(C)},
836 Name: "__sycl.tgt_bin_desc");
837 }
838
839 /// Adds a global readonly variable that is initialized by given
840 /// \p Initializer to the module.
841 GlobalVariable *addGlobalArrayVariable(const Twine &Name,
842 ArrayRef<char> Initializer,
843 const Twine &Section = "") {
844 Constant *Arr = ConstantDataArray::get(Context&: M.getContext(), Elts: Initializer);
845 GlobalVariable *Var =
846 new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
847 GlobalVariable::InternalLinkage, Arr, Name);
848 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
849
850 SmallVector<char, 32> NameBuf;
851 StringRef SectionName = Section.toStringRef(Out&: NameBuf);
852 if (!SectionName.empty())
853 Var->setSection(SectionName);
854 return Var;
855 }
856
857 /// Adds given \p Buf as a global variable into the module.
858 /// \returns Pair of pointers that point at the beginning and the end of the
859 /// variable.
860 std::pair<Constant *, Constant *>
861 addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
862 const Twine &Section = "") {
863 GlobalVariable *Var = addGlobalArrayVariable(Name, Initializer: Buf, Section);
864 Constant *ImageB = ConstantExpr::getGetElementPtr(Ty: Var->getValueType(), C: Var,
865 IdxList: getSizetConstPair(First: 0, Second: 0));
866 Constant *ImageE = ConstantExpr::getGetElementPtr(
867 Ty: Var->getValueType(), C: Var, IdxList: getSizetConstPair(First: 0, Second: Buf.size()));
868 return std::make_pair(x&: ImageB, y&: ImageE);
869 }
870
871 /// Adds given \p Data as constant byte array in the module.
872 /// \returns Constant pointer to the added data. The pointer type does not
873 /// carry size information.
874 Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
875 GlobalVariable *Var = addGlobalArrayVariable(Name, Initializer: Data);
876 Constant *DataPtr = ConstantExpr::getGetElementPtr(Ty: Var->getValueType(), C: Var,
877 IdxList: getSizetConstPair(First: 0, Second: 0));
878 return DataPtr;
879 }
880
881 /// Creates a global variable of const char* type and creates an
882 /// initializer that initializes it with \p Str.
883 ///
884 /// \returns Link-time constant pointer (constant expr) to that
885 /// variable.
886 Constant *addStringToModule(StringRef Str, const Twine &Name) {
887 Constant *Arr = ConstantDataArray::getString(Context&: C, Initializer: Str);
888 GlobalVariable *Var =
889 new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
890 GlobalVariable::InternalLinkage, Arr, Name);
891 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
892 return Var;
893 }
894
895 /// Each image contains its own set of symbols, which may contain different
896 /// symbols than other images. This function constructs an array of
897 /// symbol entries for a particular image.
898 ///
899 /// \returns Pointers to the beginning and end of the array.
900 std::pair<Constant *, Constant *>
901 initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
902 SmallVector<Constant *> EntriesInits;
903 const char *Current = Entries.data();
904 const char *End = Current + Entries.size();
905 while (Current < End) {
906 StringRef Name(Current);
907 Current += Name.size() + 1;
908
909 if (Name.empty())
910 continue;
911
912 GlobalVariable *GV = emitOffloadingEntry(
913 M, /*Kind*/ OffloadKind::OFK_SYCL,
914 Addr: Constant::getNullValue(Ty: PointerType::getUnqual(C)), Name, /*Size*/ 0,
915 /*Flags*/ 0, /*Data*/ 0);
916 EntriesInits.push_back(Elt: GV->getInitializer());
917 }
918
919 Constant *Arr = ConstantArray::get(
920 T: ArrayType::get(ElementType: EntryTy, NumElements: EntriesInits.size()), V: EntriesInits);
921 GlobalVariable *EntriesGV = new GlobalVariable(
922 M, Arr->getType(), /*isConstant*/ true, GlobalVariable::InternalLinkage,
923 Arr, OffloadKindTag + "entries_arr");
924
925 Constant *EntriesB = ConstantExpr::getGetElementPtr(
926 Ty: EntriesGV->getValueType(), C: EntriesGV, IdxList: getSizetConstPair(First: 0, Second: 0));
927 Constant *EntriesE = ConstantExpr::getGetElementPtr(
928 Ty: EntriesGV->getValueType(), C: EntriesGV,
929 IdxList: getSizetConstPair(First: 0, Second: EntriesInits.size()));
930 return std::make_pair(x&: EntriesB, y&: EntriesE);
931 }
932
933 Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
934 StringRef OffloadKindTag) {
935 // Note: Intel DPC++ compiler had 2 versions of this structure
936 // and clang++ has a third different structure. To avoid ABI incompatibility
937 // between generated device images the Version here starts from 3.
938 constexpr uint16_t DeviceImageStructVersion = 3;
939 Constant *Version =
940 ConstantInt::get(Ty: Type::getInt16Ty(C), V: DeviceImageStructVersion);
941 Constant *OffloadKindConstant = ConstantInt::get(
942 Ty: Type::getInt8Ty(C), V: static_cast<uint8_t>(OB.getOffloadKind()));
943 Constant *ImageKindConstant = ConstantInt::get(
944 Ty: Type::getInt8Ty(C), V: static_cast<uint8_t>(OB.getImageKind()));
945 StringRef Triple = OB.getString(Key: "triple");
946 Constant *TripleConstant =
947 addStringToModule(Str: Triple, Name: Twine(OffloadKindTag) + "target." + ImageID);
948 Constant *CompileOptions =
949 addStringToModule(Str: Options.CompileOptions,
950 Name: Twine(OffloadKindTag) + "opts.compile." + ImageID);
951 Constant *LinkOptions = addStringToModule(
952 Str: Options.LinkOptions, Name: Twine(OffloadKindTag) + "opts.link." + ImageID);
953
954 // Note: NULL for now.
955 std::pair<Constant *, Constant *> PropertiesConstants = {
956 Constant::getNullValue(Ty: PointerType::getUnqual(C)),
957 Constant::getNullValue(Ty: PointerType::getUnqual(C))};
958
959 StringRef RawImage = OB.getImage();
960 std::pair<Constant *, Constant *> Binary = addArrayToModule(
961 Buf: ArrayRef<char>(RawImage.begin(), RawImage.end()),
962 Name: Twine(OffloadKindTag) + ImageID + ".data", Section: ".llvm.offloading");
963
964 // For SYCL images offload entries are defined here per image.
965 std::pair<Constant *, Constant *> ImageEntriesPtrs =
966 initOffloadEntriesPerImage(Entries: OB.getString(Key: "symbols"), OffloadKindTag);
967
968 // .first and .second arguments below correspond to start and end pointers
969 // respectively.
970 Constant *WrappedBinary = ConstantStruct::get(
971 T: SyclDeviceImageTy, Vs: Version, Vs: OffloadKindConstant, Vs: ImageKindConstant,
972 Vs: TripleConstant, Vs: CompileOptions, Vs: LinkOptions, Vs: Binary.first,
973 Vs: Binary.second, Vs: ImageEntriesPtrs.first, Vs: ImageEntriesPtrs.second,
974 Vs: PropertiesConstants.first, Vs: PropertiesConstants.second);
975
976 return WrappedBinary;
977 }
978
979 GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
980 StringRef OffloadKindTag) {
981 Constant *ImagesData = ConstantArray::get(
982 T: ArrayType::get(ElementType: SyclDeviceImageTy, NumElements: WrappedImages.size()), V: WrappedImages);
983 GlobalVariable *ImagesGV =
984 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
985 GlobalValue::InternalLinkage, ImagesData,
986 Twine(OffloadKindTag) + "device_images");
987 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
988
989 Constant *EntriesB = Constant::getNullValue(Ty: PointerType::getUnqual(C));
990 Constant *EntriesE = Constant::getNullValue(Ty: PointerType::getUnqual(C));
991 static constexpr uint16_t BinDescStructVersion = 1;
992 Constant *DescInit = ConstantStruct::get(
993 T: SyclBinDescTy,
994 Vs: ConstantInt::get(Ty: Type::getInt16Ty(C), V: BinDescStructVersion),
995 Vs: ConstantInt::get(Ty: Type::getInt16Ty(C), V: WrappedImages.size()), Vs: ImagesGV,
996 Vs: EntriesB, Vs: EntriesE);
997
998 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
999 GlobalValue::InternalLinkage, DescInit,
1000 Twine(OffloadKindTag) + "descriptor");
1001 }
1002
1003 Module &M;
1004 LLVMContext &C;
1005 SYCLJITOptions Options;
1006
1007 StructType *EntryTy = nullptr;
1008 StructType *SyclDeviceImageTy = nullptr;
1009 StructType *SyclBinDescTy = nullptr;
1010}; // end of SYCLWrapper
1011
1012} // namespace
1013
1014Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images,
1015 EntryArrayTy EntryArray,
1016 llvm::StringRef Suffix, bool Relocatable) {
1017 GlobalVariable *Desc =
1018 createBinDesc(M, Bufs: Images, EntryArray, Suffix, Relocatable);
1019 if (!Desc)
1020 return createStringError(EC: inconvertibleErrorCode(),
1021 S: "No binary descriptors created.");
1022 createRegisterFunction(M, BinDesc: Desc, Suffix);
1023 return Error::success();
1024}
1025
1026Error offloading::wrapCudaBinary(Module &M, ArrayRef<char> Image,
1027 EntryArrayTy EntryArray,
1028 llvm::StringRef Suffix,
1029 bool EmitSurfacesAndTextures) {
1030 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/IsHIP: false, Suffix);
1031 if (!Desc)
1032 return createStringError(EC: inconvertibleErrorCode(),
1033 S: "No fatbin section created.");
1034
1035 createRegisterFatbinFunction(M, FatbinDesc: Desc, /*IsHip=*/IsHIP: false, EntryArray, Suffix,
1036 EmitSurfacesAndTextures);
1037 return Error::success();
1038}
1039
1040Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image,
1041 EntryArrayTy EntryArray, llvm::StringRef Suffix,
1042 bool EmitSurfacesAndTextures) {
1043 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/IsHIP: true, Suffix);
1044 if (!Desc)
1045 return createStringError(EC: inconvertibleErrorCode(),
1046 S: "No fatbin section created.");
1047
1048 createRegisterFatbinFunction(M, FatbinDesc: Desc, /*IsHip=*/IsHIP: true, EntryArray, Suffix,
1049 EmitSurfacesAndTextures);
1050 return Error::success();
1051}
1052
1053Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M, ArrayRef<char> Buffer,
1054 SYCLJITOptions Options) {
1055 SYCLWrapper W(M, Options);
1056 MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
1057 /*Identifier*/ "");
1058 SmallVector<OffloadFile> OffloadFiles;
1059 if (Error E = extractOffloadBinaries(Buffer: MBR, Binaries&: OffloadFiles))
1060 return E;
1061
1062 GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
1063 if (!Desc)
1064 return createStringError(EC: inconvertibleErrorCode(),
1065 S: "No binary descriptors created.");
1066
1067 W.createRegisterFatbinFunction(FatbinDesc: Desc);
1068 W.createUnregisterFunction(FatbinDesc: Desc);
1069 return Error::success();
1070}
1071