1//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Frontend/Offloading/Utility.h"
10#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11#include "llvm/BinaryFormat/ELF.h"
12#include "llvm/BinaryFormat/MsgPackDocument.h"
13#include "llvm/IR/Constants.h"
14#include "llvm/IR/GlobalValue.h"
15#include "llvm/IR/GlobalVariable.h"
16#include "llvm/IR/Value.h"
17#include "llvm/Object/ELFObjectFile.h"
18#include "llvm/Object/OffloadBinary.h"
19#include "llvm/ObjectYAML/ELFYAML.h"
20#include "llvm/ObjectYAML/yaml2obj.h"
21#include "llvm/Support/MemoryBufferRef.h"
22#include "llvm/Transforms/Utils/ModuleUtils.h"
23
24using namespace llvm;
25using namespace llvm::offloading;
26using namespace llvm::offloading::sycl;
27
28StructType *offloading::getEntryTy(Module &M) {
29 LLVMContext &C = M.getContext();
30 StructType *EntryTy =
31 StructType::getTypeByName(C, Name: "struct.__tgt_offload_entry");
32 if (!EntryTy)
33 EntryTy = StructType::create(
34 Name: "struct.__tgt_offload_entry", elt1: Type::getInt64Ty(C), elts: Type::getInt16Ty(C),
35 elts: Type::getInt16Ty(C), elts: Type::getInt32Ty(C), elts: PointerType::getUnqual(C),
36 elts: PointerType::getUnqual(C), elts: Type::getInt64Ty(C), elts: Type::getInt64Ty(C),
37 elts: PointerType::getUnqual(C));
38 return EntryTy;
39}
40
41std::pair<Constant *, GlobalVariable *>
42offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
43 Constant *Addr, StringRef Name,
44 uint64_t Size, uint32_t Flags,
45 uint64_t Data, Constant *AuxAddr) {
46 const llvm::Triple &Triple = M.getTargetTriple();
47 Type *PtrTy = PointerType::getUnqual(C&: M.getContext());
48 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
49 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
50 Type *Int16Ty = Type::getInt16Ty(C&: M.getContext());
51
52 Constant *AddrName = ConstantDataArray::getString(Context&: M.getContext(), Initializer: Name);
53
54 StringRef Prefix =
55 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
56
57 // Create the constant string used to look up the symbol in the device.
58 auto *Str =
59 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
60 GlobalValue::InternalLinkage, AddrName, Prefix);
61 StringRef SectionName = ".llvm.rodata.offloading";
62 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
63 Str->setSection(SectionName);
64 Str->setAlignment(Align(1));
65
66 // Make a metadata node for these constants so it can be queried from IR.
67 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.offloading.symbols");
68 Metadata *MDVals[] = {ConstantAsMetadata::get(C: Str)};
69 MD->addOperand(M: llvm::MDNode::get(Context&: M.getContext(), MDs: MDVals));
70
71 // Construct the offloading entry.
72 Constant *EntryData[] = {
73 ConstantExpr::getNullValue(Ty: Int64Ty),
74 ConstantInt::get(Ty: Int16Ty, V: 1),
75 ConstantInt::get(Ty: Int16Ty, V: Kind),
76 ConstantInt::get(Ty: Int32Ty, V: Flags),
77 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Addr, Ty: PtrTy),
78 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Str, Ty: PtrTy),
79 ConstantInt::get(Ty: Int64Ty, V: Size),
80 ConstantInt::get(Ty: Int64Ty, V: Data),
81 AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: AuxAddr, Ty: PtrTy)
82 : ConstantExpr::getNullValue(Ty: PtrTy)};
83 Constant *EntryInitializer = ConstantStruct::get(T: getEntryTy(M), V: EntryData);
84 return {EntryInitializer, Str};
85}
86
87StringRef offloading::getOffloadEntrySection(Module &M) {
88 return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries"
89 : "llvm_offload_entries";
90}
91
92/// Returns the start/end symbol names for iterating offloading entries in a
93/// given section. Mach-O uses \1section$start$/\1section$end$ convention;
94/// ELF/COFF use __start_/__stop_ prefixes.
95static std::pair<std::string, std::string>
96getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
97 if (T.isOSBinFormatMachO()) {
98 std::string SymSection = SectionName.str();
99 std::replace(first: SymSection.begin(), last: SymSection.end(), old_value: ',', new_value: '$');
100 return {"\1section$start$" + SymSection, "\1section$end$" + SymSection};
101 }
102 return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
103}
104
105GlobalVariable *offloading::emitOffloadingEntry(
106 Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name,
107 uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) {
108 const llvm::Triple &Triple = M.getTargetTriple();
109 StringRef SectionName = getOffloadEntrySection(M);
110
111 auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
112 M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
113
114 StringRef Prefix =
115 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
116 auto *Entry = new GlobalVariable(
117 M, getEntryTy(M),
118 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
119 Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
120 M.getDataLayout().getDefaultGlobalsAddressSpace());
121
122 // The entry has to be created in the section the linker expects it to be.
123 if (Triple.isOSBinFormatCOFF())
124 Entry->setSection((SectionName + "$OE").str());
125 else
126 Entry->setSection(SectionName);
127 Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
128 return Entry;
129}
130
131std::pair<GlobalVariable *, GlobalVariable *>
132offloading::getOffloadEntryArray(Module &M) {
133 const llvm::Triple &Triple = M.getTargetTriple();
134 StringRef SectionName = getOffloadEntrySection(M);
135
136 auto *ZeroInitilaizer =
137 ConstantAggregateZero::get(Ty: ArrayType::get(ElementType: getEntryTy(M), NumElements: 0u));
138 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
139 auto *EntryType = ArrayType::get(ElementType: getEntryTy(M), NumElements: 0);
140 auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
141 : GlobalValue::ExternalLinkage;
142
143 auto [StartName, StopName] =
144 getOffloadEntryBoundarySymbols(T: Triple, SectionName);
145
146 auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
147 Linkage, EntryInit, StartName);
148 EntriesB->setVisibility(GlobalValue::HiddenVisibility);
149 auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
150 Linkage, EntryInit, StopName);
151 EntriesE->setVisibility(GlobalValue::HiddenVisibility);
152
153 if (Triple.isOSBinFormatELF()) {
154 // We assume that external begin/end symbols that we have created above will
155 // be defined by the linker. This is done whenever a section name with a
156 // valid C-identifier is present. We define a dummy variable here to force
157 // the linker to always provide these symbols.
158 auto *DummyEntry = new GlobalVariable(
159 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
160 ZeroInitilaizer, "__dummy." + SectionName);
161 DummyEntry->setSection(SectionName);
162 DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
163 appendToCompilerUsed(M, Values: DummyEntry);
164 } else if (Triple.isOSBinFormatMachO()) {
165 // Mach-O needs a dummy variable in the section (like ELF) to ensure the
166 // linker provides the section boundary symbols.
167 auto *DummyEntry = new GlobalVariable(
168 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
169 ZeroInitilaizer, "__dummy." + SectionName);
170 DummyEntry->setSection(SectionName);
171 DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
172 appendToCompilerUsed(M, Values: DummyEntry);
173 } else {
174 // The COFF linker will merge sections containing a '$' together into a
175 // single section. The order of entries in this section will be sorted
176 // alphabetically by the characters following the '$' in the name. Set the
177 // sections here to ensure that the beginning and end symbols are sorted.
178 EntriesB->setSection((SectionName + "$OA").str());
179 EntriesE->setSection((SectionName + "$OZ").str());
180 }
181
182 return std::make_pair(x&: EntriesB, y&: EntriesE);
183}
184
185bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
186 uint32_t ImageFlags,
187 StringRef EnvTargetID) {
188 using namespace llvm::ELF;
189 StringRef EnvArch = EnvTargetID.split(Separator: ":").first;
190
191 // Trivial check if the base processors match.
192 if (EnvArch != ImageArch)
193 return false;
194
195 // Check if the image is requesting xnack on or off.
196 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
197 case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
198 // The image is 'xnack-' so the environment must be 'xnack-'.
199 if (!EnvTargetID.contains(Other: "xnack-"))
200 return false;
201 break;
202 case EF_AMDGPU_FEATURE_XNACK_ON_V4:
203 // The image is 'xnack+' so the environment must be 'xnack+'.
204 if (!EnvTargetID.contains(Other: "xnack+"))
205 return false;
206 break;
207 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
208 case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
209 default:
210 break;
211 }
212
213 // Check if the image is requesting sramecc on or off.
214 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
215 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
216 // The image is 'sramecc-' so the environment must be 'sramecc-'.
217 if (!EnvTargetID.contains(Other: "sramecc-"))
218 return false;
219 break;
220 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
221 // The image is 'sramecc+' so the environment must be 'sramecc+'.
222 if (!EnvTargetID.contains(Other: "sramecc+"))
223 return false;
224 break;
225 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
226 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
227 break;
228 }
229
230 return true;
231}
232
233namespace {
234/// Reads the AMDGPU specific per-kernel-metadata from an image.
235class KernelInfoReader {
236public:
237 KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
238 : KernelInfoMap(KIM) {}
239
240 /// Process ELF note to read AMDGPU metadata from respective information
241 /// fields.
242 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
243 if (Note.getName() != "AMDGPU")
244 return Error::success(); // We are not interested in other things
245
246 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
247 "Parse AMDGPU MetaData");
248 auto Desc = Note.getDesc(Align);
249 StringRef MsgPackString =
250 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
251 msgpack::Document MsgPackDoc;
252 if (!MsgPackDoc.readFromBlob(Blob: MsgPackString, /*Multi=*/false))
253 return Error::success();
254
255 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
256 if (!Verifier.verify(HSAMetadataRoot&: MsgPackDoc.getRoot()))
257 return Error::success();
258
259 auto RootMap = MsgPackDoc.getRoot().getMap(Convert: true);
260
261 if (auto Err = iterateAMDKernels(MDN&: RootMap))
262 return Err;
263
264 return Error::success();
265 }
266
267private:
268 /// Extracts the relevant information via simple string look-up in the msgpack
269 /// document elements.
270 Error
271 extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
272 std::string &KernelName,
273 offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
274 if (!V.first.isString())
275 return Error::success();
276
277 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
278 return DK.getString() == SK;
279 };
280
281 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
282 uint32_t *Vals) {
283 assert(DN.isArray() && "MsgPack DocNode is an array node");
284 auto DNA = DN.getArray();
285 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
286
287 int I = 0;
288 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
289 ++DNABegin) {
290 Vals[I++] = DNABegin->getUInt();
291 }
292 };
293
294 if (IsKey(V.first, ".name")) {
295 KernelName = V.second.toString();
296 } else if (IsKey(V.first, ".sgpr_count")) {
297 KernelData.SGPRCount = V.second.getUInt();
298 } else if (IsKey(V.first, ".sgpr_spill_count")) {
299 KernelData.SGPRSpillCount = V.second.getUInt();
300 } else if (IsKey(V.first, ".vgpr_count")) {
301 KernelData.VGPRCount = V.second.getUInt();
302 } else if (IsKey(V.first, ".vgpr_spill_count")) {
303 KernelData.VGPRSpillCount = V.second.getUInt();
304 } else if (IsKey(V.first, ".agpr_count")) {
305 KernelData.AGPRCount = V.second.getUInt();
306 } else if (IsKey(V.first, ".private_segment_fixed_size")) {
307 KernelData.PrivateSegmentSize = V.second.getUInt();
308 } else if (IsKey(V.first, ".group_segment_fixed_size")) {
309 KernelData.GroupSegmentList = V.second.getUInt();
310 } else if (IsKey(V.first, ".reqd_workgroup_size")) {
311 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
312 } else if (IsKey(V.first, ".workgroup_size_hint")) {
313 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
314 } else if (IsKey(V.first, ".wavefront_size")) {
315 KernelData.WavefrontSize = V.second.getUInt();
316 } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
317 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
318 } else if (IsKey(V.first, ".args")) {
319 auto ArgsArray = V.second.getArray();
320 for (auto ArgIt = ArgsArray.begin(), ArgEnd = ArgsArray.end();
321 ArgIt != ArgEnd; ++ArgIt) {
322 auto ArgMap = ArgIt->getMap();
323
324 auto OffsetIt = ArgMap.find(Key: ".offset");
325 if (OffsetIt == ArgMap.end())
326 return createStringError(
327 EC: inconvertibleErrorCode(),
328 S: "Missing required .offset key in kernel argument metadata map");
329
330 auto SizeIt = ArgMap.find(Key: ".size");
331 if (SizeIt == ArgMap.end())
332 return createStringError(
333 EC: inconvertibleErrorCode(),
334 S: "Missing required .size key in kernel argument metadata map");
335
336 KernelData.ArgMDs.emplace_back(Args&: OffsetIt->second.getUInt(),
337 Args&: SizeIt->second.getUInt());
338 }
339 }
340
341 return Error::success();
342 }
343
344 /// Get the "amdhsa.kernels" element from the msgpack Document
345 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
346 auto Res = MDN.find(Key: "amdhsa.kernels");
347 if (Res == MDN.end())
348 return createStringError(EC: inconvertibleErrorCode(),
349 S: "Could not find amdhsa.kernels key");
350
351 auto Pair = *Res;
352 assert(Pair.second.isArray() &&
353 "AMDGPU kernel entries are arrays of entries");
354
355 return Pair.second.getArray();
356 }
357
358 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
359 /// MapDocNode that either maps a string to a single value (most of them) or
360 /// to another array of things. Currently, we only handle the case that maps
361 /// to scalar value.
362 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
363 offloading::amdgpu::AMDGPUKernelMetaData KernelData;
364 std::string KernelName;
365 auto Entry = (*It).getMap();
366 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
367 if (auto Err = extractKernelData(V: *MI, KernelName, KernelData))
368 return Err;
369
370 KernelInfoMap.insert(KV: {KernelName, KernelData});
371 return Error::success();
372 }
373
374 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
375 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
376 auto KernelsOrErr = getAMDKernelsArray(MDN);
377 if (auto Err = KernelsOrErr.takeError())
378 return Err;
379
380 auto KernelsArr = *KernelsOrErr;
381 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
382 if (!It->isMap())
383 continue; // we expect <key,value> pairs
384
385 // Obtain the value for the different entries. Each array entry is a
386 // MapDocNode
387 if (auto Err = generateKernelInfo(It))
388 return Err;
389 }
390 return Error::success();
391 }
392
393 // Kernel names are the keys
394 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
395};
396} // namespace
397
398Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
399 MemoryBufferRef MemBuffer,
400 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
401 uint16_t &ELFABIVersion) {
402 Error Err = Error::success(); // Used later as out-parameter
403
404 auto ELFOrError = object::ELF64LEFile::create(Object: MemBuffer.getBuffer());
405 if (auto Err = ELFOrError.takeError())
406 return Err;
407
408 const object::ELF64LEFile ELFObj = ELFOrError.get();
409 Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
410 if (!Sections)
411 return Sections.takeError();
412 KernelInfoReader Reader(KernelInfoMap);
413
414 // Read the code object version from ELF image header
415 auto Header = ELFObj.getHeader();
416 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
417 for (const auto &S : *Sections) {
418 if (S.sh_type != ELF::SHT_NOTE)
419 continue;
420
421 for (const auto N : ELFObj.notes(Shdr: S, Err)) {
422 if (Err)
423 return Err;
424 // Fills the KernelInfoTabel entries in the reader
425 if ((Err = Reader.processNote(Note: N, Align: S.sh_addralign)))
426 return Err;
427 }
428 }
429 return Error::success();
430}
431
432Error offloading::containerizeImage(std::unique_ptr<MemoryBuffer> &Img,
433 llvm::Triple Triple,
434 object::ImageKind ImageKind,
435 object::OffloadKind OffloadKind,
436 int32_t ImageFlags,
437 MapVector<StringRef, StringRef> &MetaData) {
438 using namespace object;
439
440 // Create inner OffloadBinary containing the raw image.
441 OffloadBinary::OffloadingImage InnerImage;
442 InnerImage.TheImageKind = ImageKind;
443 InnerImage.TheOffloadKind = OffloadKind;
444 InnerImage.Flags = ImageFlags;
445
446 InnerImage.StringData["triple"] = Triple.getTriple();
447 for (const auto &[Key, Value] : MetaData)
448 InnerImage.StringData[Key] = Value;
449
450 InnerImage.Image = std::move(Img);
451
452 SmallString<0> InnerBinaryData = OffloadBinary::write(OffloadingData: InnerImage);
453
454 Img = MemoryBuffer::getMemBufferCopy(InputData: InnerBinaryData);
455 return Error::success();
456}
457
458Error offloading::intel::containerizeOpenMPSPIRVImage(
459 std::unique_ptr<MemoryBuffer> &Binary, llvm::Triple Triple,
460 StringRef CompileOpts, StringRef LinkOpts) {
461 constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
462
463 assert(Triple.isSPIRV() && Triple.getVendor() == llvm::Triple::Intel &&
464 "Expected SPIR-V triple with Intel vendor");
465
466 MapVector<StringRef, StringRef> MetaData;
467 MetaData["version"] = INTEL_ONEOMP_OFFLOAD_VERSION;
468 if (!CompileOpts.empty())
469 MetaData["compile-opts"] = CompileOpts;
470 if (!LinkOpts.empty())
471 MetaData["link-opts"] = LinkOpts;
472
473 return containerizeImage(Img&: Binary, Triple, ImageKind: object::ImageKind::IMG_SPIRV,
474 OffloadKind: object::OffloadKind::OFK_OpenMP, /*ImageFlags=*/0,
475 MetaData);
476}
477
478void sycl::writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out) {
479 uint32_t Count = Names.size();
480
481 // Compute the byte offset where string data begins: right after the header
482 // and the entry array.
483 uint32_t StringDataOffset =
484 sizeof(SymbolTableHeader) + Count * sizeof(SymbolTableEntry);
485
486 // Compute total size and reserve to prevent reallocation while writing
487 // entries via pointer (append() could otherwise invalidate the pointer).
488 uint32_t TotalSize = StringDataOffset;
489 for (StringRef N : Names)
490 TotalSize += N.size() + 1;
491 Out.reserve(N: TotalSize);
492 Out.resize(N: StringDataOffset);
493
494 // Write the header.
495 auto *Header = reinterpret_cast<SymbolTableHeader *>(Out.data());
496 Header->Count = Count;
497
498 // Write each entry and append the corresponding null-terminated name.
499 auto *Entries = reinterpret_cast<SymbolTableEntry *>(Header + 1);
500 uint32_t CurrentOffset = StringDataOffset;
501 for (uint32_t I = 0; I < Count; ++I) {
502 Entries[I].OffsetToSymbol = CurrentOffset;
503 Entries[I].SymbolSize = Names[I].size();
504 Out.append(RHS: Names[I]);
505 Out.push_back(Elt: '\0');
506 CurrentOffset += Names[I].size() + 1;
507 }
508}
509