1//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Frontend/Offloading/Utility.h"
10#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11#include "llvm/BinaryFormat/ELF.h"
12#include "llvm/BinaryFormat/MsgPackDocument.h"
13#include "llvm/IR/Constants.h"
14#include "llvm/IR/GlobalValue.h"
15#include "llvm/IR/GlobalVariable.h"
16#include "llvm/IR/Value.h"
17#include "llvm/Object/ELFObjectFile.h"
18#include "llvm/ObjectYAML/ELFYAML.h"
19#include "llvm/ObjectYAML/yaml2obj.h"
20#include "llvm/Support/MemoryBufferRef.h"
21#include "llvm/Transforms/Utils/ModuleUtils.h"
22
23using namespace llvm;
24using namespace llvm::offloading;
25
26StructType *offloading::getEntryTy(Module &M) {
27 LLVMContext &C = M.getContext();
28 StructType *EntryTy =
29 StructType::getTypeByName(C, Name: "struct.__tgt_offload_entry");
30 if (!EntryTy)
31 EntryTy = StructType::create(
32 Name: "struct.__tgt_offload_entry", elt1: Type::getInt64Ty(C), elts: Type::getInt16Ty(C),
33 elts: Type::getInt16Ty(C), elts: Type::getInt32Ty(C), elts: PointerType::getUnqual(C),
34 elts: PointerType::getUnqual(C), elts: Type::getInt64Ty(C), elts: Type::getInt64Ty(C),
35 elts: PointerType::getUnqual(C));
36 return EntryTy;
37}
38
39std::pair<Constant *, GlobalVariable *>
40offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
41 Constant *Addr, StringRef Name,
42 uint64_t Size, uint32_t Flags,
43 uint64_t Data, Constant *AuxAddr) {
44 const llvm::Triple &Triple = M.getTargetTriple();
45 Type *PtrTy = PointerType::getUnqual(C&: M.getContext());
46 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
47 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
48 Type *Int16Ty = Type::getInt16Ty(C&: M.getContext());
49
50 Constant *AddrName = ConstantDataArray::getString(Context&: M.getContext(), Initializer: Name);
51
52 StringRef Prefix =
53 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
54
55 // Create the constant string used to look up the symbol in the device.
56 auto *Str =
57 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
58 GlobalValue::InternalLinkage, AddrName, Prefix);
59 StringRef SectionName = ".llvm.rodata.offloading";
60 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
61 Str->setSection(SectionName);
62 Str->setAlignment(Align(1));
63
64 // Make a metadata node for these constants so it can be queried from IR.
65 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.offloading.symbols");
66 Metadata *MDVals[] = {ConstantAsMetadata::get(C: Str)};
67 MD->addOperand(M: llvm::MDNode::get(Context&: M.getContext(), MDs: MDVals));
68
69 // Construct the offloading entry.
70 Constant *EntryData[] = {
71 ConstantExpr::getNullValue(Ty: Int64Ty),
72 ConstantInt::get(Ty: Int16Ty, V: 1),
73 ConstantInt::get(Ty: Int16Ty, V: Kind),
74 ConstantInt::get(Ty: Int32Ty, V: Flags),
75 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Addr, Ty: PtrTy),
76 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Str, Ty: PtrTy),
77 ConstantInt::get(Ty: Int64Ty, V: Size),
78 ConstantInt::get(Ty: Int64Ty, V: Data),
79 AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: AuxAddr, Ty: PtrTy)
80 : ConstantExpr::getNullValue(Ty: PtrTy)};
81 Constant *EntryInitializer = ConstantStruct::get(T: getEntryTy(M), V: EntryData);
82 return {EntryInitializer, Str};
83}
84
85GlobalVariable *
86offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
87 Constant *Addr, StringRef Name, uint64_t Size,
88 uint32_t Flags, uint64_t Data,
89 Constant *AuxAddr, StringRef SectionName) {
90 const llvm::Triple &Triple = M.getTargetTriple();
91
92 auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
93 M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
94
95 StringRef Prefix =
96 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
97 auto *Entry = new GlobalVariable(
98 M, getEntryTy(M),
99 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
100 Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
101 M.getDataLayout().getDefaultGlobalsAddressSpace());
102
103 // The entry has to be created in the section the linker expects it to be.
104 if (Triple.isOSBinFormatCOFF())
105 Entry->setSection((SectionName + "$OE").str());
106 else
107 Entry->setSection(SectionName);
108 Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
109 return Entry;
110}
111
112std::pair<GlobalVariable *, GlobalVariable *>
113offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
114 const llvm::Triple &Triple = M.getTargetTriple();
115
116 auto *ZeroInitilaizer =
117 ConstantAggregateZero::get(Ty: ArrayType::get(ElementType: getEntryTy(M), NumElements: 0u));
118 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
119 auto *EntryType = ArrayType::get(ElementType: getEntryTy(M), NumElements: 0);
120 auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
121 : GlobalValue::ExternalLinkage;
122
123 auto *EntriesB =
124 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
125 "__start_" + SectionName);
126 EntriesB->setVisibility(GlobalValue::HiddenVisibility);
127 auto *EntriesE =
128 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
129 "__stop_" + SectionName);
130 EntriesE->setVisibility(GlobalValue::HiddenVisibility);
131
132 if (Triple.isOSBinFormatELF()) {
133 // We assume that external begin/end symbols that we have created above will
134 // be defined by the linker. This is done whenever a section name with a
135 // valid C-identifier is present. We define a dummy variable here to force
136 // the linker to always provide these symbols.
137 auto *DummyEntry = new GlobalVariable(
138 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
139 ZeroInitilaizer, "__dummy." + SectionName);
140 DummyEntry->setSection(SectionName);
141 DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
142 appendToCompilerUsed(M, Values: DummyEntry);
143 } else {
144 // The COFF linker will merge sections containing a '$' together into a
145 // single section. The order of entries in this section will be sorted
146 // alphabetically by the characters following the '$' in the name. Set the
147 // sections here to ensure that the beginning and end symbols are sorted.
148 EntriesB->setSection((SectionName + "$OA").str());
149 EntriesE->setSection((SectionName + "$OZ").str());
150 }
151
152 return std::make_pair(x&: EntriesB, y&: EntriesE);
153}
154
155bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
156 uint32_t ImageFlags,
157 StringRef EnvTargetID) {
158 using namespace llvm::ELF;
159 StringRef EnvArch = EnvTargetID.split(Separator: ":").first;
160
161 // Trivial check if the base processors match.
162 if (EnvArch != ImageArch)
163 return false;
164
165 // Check if the image is requesting xnack on or off.
166 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
167 case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
168 // The image is 'xnack-' so the environment must be 'xnack-'.
169 if (!EnvTargetID.contains(Other: "xnack-"))
170 return false;
171 break;
172 case EF_AMDGPU_FEATURE_XNACK_ON_V4:
173 // The image is 'xnack+' so the environment must be 'xnack+'.
174 if (!EnvTargetID.contains(Other: "xnack+"))
175 return false;
176 break;
177 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
178 case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
179 default:
180 break;
181 }
182
183 // Check if the image is requesting sramecc on or off.
184 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
185 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
186 // The image is 'sramecc-' so the environment must be 'sramecc-'.
187 if (!EnvTargetID.contains(Other: "sramecc-"))
188 return false;
189 break;
190 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
191 // The image is 'sramecc+' so the environment must be 'sramecc+'.
192 if (!EnvTargetID.contains(Other: "sramecc+"))
193 return false;
194 break;
195 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
196 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
197 break;
198 }
199
200 return true;
201}
202
203namespace {
204/// Reads the AMDGPU specific per-kernel-metadata from an image.
205class KernelInfoReader {
206public:
207 KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
208 : KernelInfoMap(KIM) {}
209
210 /// Process ELF note to read AMDGPU metadata from respective information
211 /// fields.
212 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
213 if (Note.getName() != "AMDGPU")
214 return Error::success(); // We are not interested in other things
215
216 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
217 "Parse AMDGPU MetaData");
218 auto Desc = Note.getDesc(Align);
219 StringRef MsgPackString =
220 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
221 msgpack::Document MsgPackDoc;
222 if (!MsgPackDoc.readFromBlob(Blob: MsgPackString, /*Multi=*/false))
223 return Error::success();
224
225 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
226 if (!Verifier.verify(HSAMetadataRoot&: MsgPackDoc.getRoot()))
227 return Error::success();
228
229 auto RootMap = MsgPackDoc.getRoot().getMap(Convert: true);
230
231 if (auto Err = iterateAMDKernels(MDN&: RootMap))
232 return Err;
233
234 return Error::success();
235 }
236
237private:
238 /// Extracts the relevant information via simple string look-up in the msgpack
239 /// document elements.
240 Error
241 extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
242 std::string &KernelName,
243 offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
244 if (!V.first.isString())
245 return Error::success();
246
247 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
248 return DK.getString() == SK;
249 };
250
251 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
252 uint32_t *Vals) {
253 assert(DN.isArray() && "MsgPack DocNode is an array node");
254 auto DNA = DN.getArray();
255 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
256
257 int I = 0;
258 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
259 ++DNABegin) {
260 Vals[I++] = DNABegin->getUInt();
261 }
262 };
263
264 if (IsKey(V.first, ".name")) {
265 KernelName = V.second.toString();
266 } else if (IsKey(V.first, ".sgpr_count")) {
267 KernelData.SGPRCount = V.second.getUInt();
268 } else if (IsKey(V.first, ".sgpr_spill_count")) {
269 KernelData.SGPRSpillCount = V.second.getUInt();
270 } else if (IsKey(V.first, ".vgpr_count")) {
271 KernelData.VGPRCount = V.second.getUInt();
272 } else if (IsKey(V.first, ".vgpr_spill_count")) {
273 KernelData.VGPRSpillCount = V.second.getUInt();
274 } else if (IsKey(V.first, ".agpr_count")) {
275 KernelData.AGPRCount = V.second.getUInt();
276 } else if (IsKey(V.first, ".private_segment_fixed_size")) {
277 KernelData.PrivateSegmentSize = V.second.getUInt();
278 } else if (IsKey(V.first, ".group_segment_fixed_size")) {
279 KernelData.GroupSegmentList = V.second.getUInt();
280 } else if (IsKey(V.first, ".reqd_workgroup_size")) {
281 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
282 } else if (IsKey(V.first, ".workgroup_size_hint")) {
283 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
284 } else if (IsKey(V.first, ".wavefront_size")) {
285 KernelData.WavefrontSize = V.second.getUInt();
286 } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
287 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
288 }
289
290 return Error::success();
291 }
292
293 /// Get the "amdhsa.kernels" element from the msgpack Document
294 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
295 auto Res = MDN.find(Key: "amdhsa.kernels");
296 if (Res == MDN.end())
297 return createStringError(EC: inconvertibleErrorCode(),
298 S: "Could not find amdhsa.kernels key");
299
300 auto Pair = *Res;
301 assert(Pair.second.isArray() &&
302 "AMDGPU kernel entries are arrays of entries");
303
304 return Pair.second.getArray();
305 }
306
307 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
308 /// MapDocNode that either maps a string to a single value (most of them) or
309 /// to another array of things. Currently, we only handle the case that maps
310 /// to scalar value.
311 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
312 offloading::amdgpu::AMDGPUKernelMetaData KernelData;
313 std::string KernelName;
314 auto Entry = (*It).getMap();
315 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
316 if (auto Err = extractKernelData(V: *MI, KernelName, KernelData))
317 return Err;
318
319 KernelInfoMap.insert(KV: {KernelName, KernelData});
320 return Error::success();
321 }
322
323 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
324 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
325 auto KernelsOrErr = getAMDKernelsArray(MDN);
326 if (auto Err = KernelsOrErr.takeError())
327 return Err;
328
329 auto KernelsArr = *KernelsOrErr;
330 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
331 if (!It->isMap())
332 continue; // we expect <key,value> pairs
333
334 // Obtain the value for the different entries. Each array entry is a
335 // MapDocNode
336 if (auto Err = generateKernelInfo(It))
337 return Err;
338 }
339 return Error::success();
340 }
341
342 // Kernel names are the keys
343 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
344};
345} // namespace
346
347Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
348 MemoryBufferRef MemBuffer,
349 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
350 uint16_t &ELFABIVersion) {
351 Error Err = Error::success(); // Used later as out-parameter
352
353 auto ELFOrError = object::ELF64LEFile::create(Object: MemBuffer.getBuffer());
354 if (auto Err = ELFOrError.takeError())
355 return Err;
356
357 const object::ELF64LEFile ELFObj = ELFOrError.get();
358 Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
359 if (!Sections)
360 return Sections.takeError();
361 KernelInfoReader Reader(KernelInfoMap);
362
363 // Read the code object version from ELF image header
364 auto Header = ELFObj.getHeader();
365 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
366 for (const auto &S : *Sections) {
367 if (S.sh_type != ELF::SHT_NOTE)
368 continue;
369
370 for (const auto N : ELFObj.notes(Shdr: S, Err)) {
371 if (Err)
372 return Err;
373 // Fills the KernelInfoTabel entries in the reader
374 if ((Err = Reader.processNote(Note: N, Align: S.sh_addralign)))
375 return Err;
376 }
377 }
378 return Error::success();
379}
380Error offloading::intel::containerizeOpenMPSPIRVImage(
381 std::unique_ptr<MemoryBuffer> &Img) {
382 constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
383 constexpr int NT_INTEL_ONEOMP_OFFLOAD_VERSION = 1;
384 constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT = 2;
385 constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX = 3;
386
387 // Start creating notes for the ELF container.
388 std::vector<ELFYAML::NoteEntry> Notes;
389 std::string Version = toHex(Input: INTEL_ONEOMP_OFFLOAD_VERSION);
390 Notes.emplace_back(args: ELFYAML::NoteEntry{.Name: "INTELONEOMPOFFLOAD",
391 .Desc: yaml::BinaryRef(Version),
392 .Type: NT_INTEL_ONEOMP_OFFLOAD_VERSION});
393
394 // The AuxInfo string will hold auxiliary information for the image.
395 // ELFYAML::NoteEntry structures will hold references to the
396 // string, so we have to make sure the string is valid.
397 std::string AuxInfo;
398
399 // TODO: Pass compile/link opts
400 StringRef CompileOpts = "";
401 StringRef LinkOpts = "";
402
403 unsigned ImageFmt = 1; // SPIR-V format
404
405 AuxInfo = toHex(Input: (Twine(0) + Twine('\0') + Twine(ImageFmt) + Twine('\0') +
406 CompileOpts + Twine('\0') + LinkOpts)
407 .str());
408 Notes.emplace_back(args: ELFYAML::NoteEntry{.Name: "INTELONEOMPOFFLOAD",
409 .Desc: yaml::BinaryRef(AuxInfo),
410 .Type: NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX});
411
412 std::string ImgCount = toHex(Input: Twine(1).str()); // always one image per ELF
413 Notes.emplace_back(args: ELFYAML::NoteEntry{.Name: "INTELONEOMPOFFLOAD",
414 .Desc: yaml::BinaryRef(ImgCount),
415 .Type: NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT});
416
417 std::string YamlFile;
418 llvm::raw_string_ostream YamlFileStream(YamlFile);
419
420 // Write the YAML template file.
421
422 // We use 64-bit little-endian ELF currently.
423 ELFYAML::FileHeader Header{};
424 Header.Class = ELF::ELFCLASS64;
425 Header.Data = ELF::ELFDATA2LSB;
426 Header.Type = ELF::ET_DYN;
427 Header.Machine = ELF::EM_INTELGT;
428
429 // Create a section with notes.
430 ELFYAML::NoteSection Section{};
431 Section.Type = ELF::SHT_NOTE;
432 Section.AddressAlign = 0;
433 Section.Name = ".note.inteloneompoffload";
434 Section.Notes.emplace(args: std::move(Notes));
435
436 ELFYAML::Object Object{};
437 Object.Header = Header;
438 Object.Chunks.push_back(
439 x: std::make_unique<ELFYAML::NoteSection>(args: std::move(Section)));
440
441 // Create the section that will hold the image
442 ELFYAML::RawContentSection ImageSection{};
443 ImageSection.Type = ELF::SHT_PROGBITS;
444 ImageSection.AddressAlign = 0;
445 std::string Name = "__openmp_offload_spirv_0";
446 ImageSection.Name = Name;
447 ImageSection.Content =
448 llvm::yaml::BinaryRef(arrayRefFromStringRef(Input: Img->getBuffer()));
449 Object.Chunks.push_back(
450 x: std::make_unique<ELFYAML::RawContentSection>(args: std::move(ImageSection)));
451 Error Err = Error::success();
452 llvm::yaml::yaml2elf(
453 Doc&: Object, Out&: YamlFileStream,
454 EH: [&Err](const Twine &Msg) { Err = createStringError(S: Msg); }, UINT64_MAX);
455 if (Err)
456 return Err;
457
458 Img = MemoryBuffer::getMemBufferCopy(InputData: YamlFile);
459 return Error::success();
460}
461