1//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Frontend/Offloading/Utility.h"
10#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11#include "llvm/BinaryFormat/ELF.h"
12#include "llvm/BinaryFormat/MsgPackDocument.h"
13#include "llvm/IR/Constants.h"
14#include "llvm/IR/GlobalValue.h"
15#include "llvm/IR/GlobalVariable.h"
16#include "llvm/IR/Value.h"
17#include "llvm/Object/ELFObjectFile.h"
18#include "llvm/Object/OffloadBinary.h"
19#include "llvm/ObjectYAML/ELFYAML.h"
20#include "llvm/ObjectYAML/yaml2obj.h"
21#include "llvm/Support/MemoryBufferRef.h"
22#include "llvm/Transforms/Utils/ModuleUtils.h"
23
24using namespace llvm;
25using namespace llvm::offloading;
26
27StructType *offloading::getEntryTy(Module &M) {
28 LLVMContext &C = M.getContext();
29 StructType *EntryTy =
30 StructType::getTypeByName(C, Name: "struct.__tgt_offload_entry");
31 if (!EntryTy)
32 EntryTy = StructType::create(
33 Name: "struct.__tgt_offload_entry", elt1: Type::getInt64Ty(C), elts: Type::getInt16Ty(C),
34 elts: Type::getInt16Ty(C), elts: Type::getInt32Ty(C), elts: PointerType::getUnqual(C),
35 elts: PointerType::getUnqual(C), elts: Type::getInt64Ty(C), elts: Type::getInt64Ty(C),
36 elts: PointerType::getUnqual(C));
37 return EntryTy;
38}
39
40std::pair<Constant *, GlobalVariable *>
41offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
42 Constant *Addr, StringRef Name,
43 uint64_t Size, uint32_t Flags,
44 uint64_t Data, Constant *AuxAddr) {
45 const llvm::Triple &Triple = M.getTargetTriple();
46 Type *PtrTy = PointerType::getUnqual(C&: M.getContext());
47 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
48 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
49 Type *Int16Ty = Type::getInt16Ty(C&: M.getContext());
50
51 Constant *AddrName = ConstantDataArray::getString(Context&: M.getContext(), Initializer: Name);
52
53 StringRef Prefix =
54 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
55
56 // Create the constant string used to look up the symbol in the device.
57 auto *Str =
58 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
59 GlobalValue::InternalLinkage, AddrName, Prefix);
60 StringRef SectionName = ".llvm.rodata.offloading";
61 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
62 Str->setSection(SectionName);
63 Str->setAlignment(Align(1));
64
65 // Make a metadata node for these constants so it can be queried from IR.
66 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.offloading.symbols");
67 Metadata *MDVals[] = {ConstantAsMetadata::get(C: Str)};
68 MD->addOperand(M: llvm::MDNode::get(Context&: M.getContext(), MDs: MDVals));
69
70 // Construct the offloading entry.
71 Constant *EntryData[] = {
72 ConstantExpr::getNullValue(Ty: Int64Ty),
73 ConstantInt::get(Ty: Int16Ty, V: 1),
74 ConstantInt::get(Ty: Int16Ty, V: Kind),
75 ConstantInt::get(Ty: Int32Ty, V: Flags),
76 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Addr, Ty: PtrTy),
77 ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: Str, Ty: PtrTy),
78 ConstantInt::get(Ty: Int64Ty, V: Size),
79 ConstantInt::get(Ty: Int64Ty, V: Data),
80 AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: AuxAddr, Ty: PtrTy)
81 : ConstantExpr::getNullValue(Ty: PtrTy)};
82 Constant *EntryInitializer = ConstantStruct::get(T: getEntryTy(M), V: EntryData);
83 return {EntryInitializer, Str};
84}
85
86GlobalVariable *
87offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
88 Constant *Addr, StringRef Name, uint64_t Size,
89 uint32_t Flags, uint64_t Data,
90 Constant *AuxAddr, StringRef SectionName) {
91 const llvm::Triple &Triple = M.getTargetTriple();
92
93 auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
94 M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
95
96 StringRef Prefix =
97 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
98 auto *Entry = new GlobalVariable(
99 M, getEntryTy(M),
100 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
101 Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
102 M.getDataLayout().getDefaultGlobalsAddressSpace());
103
104 // The entry has to be created in the section the linker expects it to be.
105 if (Triple.isOSBinFormatCOFF())
106 Entry->setSection((SectionName + "$OE").str());
107 else
108 Entry->setSection(SectionName);
109 Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
110 return Entry;
111}
112
113std::pair<GlobalVariable *, GlobalVariable *>
114offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
115 const llvm::Triple &Triple = M.getTargetTriple();
116
117 auto *ZeroInitilaizer =
118 ConstantAggregateZero::get(Ty: ArrayType::get(ElementType: getEntryTy(M), NumElements: 0u));
119 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
120 auto *EntryType = ArrayType::get(ElementType: getEntryTy(M), NumElements: 0);
121 auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
122 : GlobalValue::ExternalLinkage;
123
124 auto *EntriesB =
125 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
126 "__start_" + SectionName);
127 EntriesB->setVisibility(GlobalValue::HiddenVisibility);
128 auto *EntriesE =
129 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
130 "__stop_" + SectionName);
131 EntriesE->setVisibility(GlobalValue::HiddenVisibility);
132
133 if (Triple.isOSBinFormatELF()) {
134 // We assume that external begin/end symbols that we have created above will
135 // be defined by the linker. This is done whenever a section name with a
136 // valid C-identifier is present. We define a dummy variable here to force
137 // the linker to always provide these symbols.
138 auto *DummyEntry = new GlobalVariable(
139 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
140 ZeroInitilaizer, "__dummy." + SectionName);
141 DummyEntry->setSection(SectionName);
142 DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
143 appendToCompilerUsed(M, Values: DummyEntry);
144 } else {
145 // The COFF linker will merge sections containing a '$' together into a
146 // single section. The order of entries in this section will be sorted
147 // alphabetically by the characters following the '$' in the name. Set the
148 // sections here to ensure that the beginning and end symbols are sorted.
149 EntriesB->setSection((SectionName + "$OA").str());
150 EntriesE->setSection((SectionName + "$OZ").str());
151 }
152
153 return std::make_pair(x&: EntriesB, y&: EntriesE);
154}
155
156bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
157 uint32_t ImageFlags,
158 StringRef EnvTargetID) {
159 using namespace llvm::ELF;
160 StringRef EnvArch = EnvTargetID.split(Separator: ":").first;
161
162 // Trivial check if the base processors match.
163 if (EnvArch != ImageArch)
164 return false;
165
166 // Check if the image is requesting xnack on or off.
167 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
168 case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
169 // The image is 'xnack-' so the environment must be 'xnack-'.
170 if (!EnvTargetID.contains(Other: "xnack-"))
171 return false;
172 break;
173 case EF_AMDGPU_FEATURE_XNACK_ON_V4:
174 // The image is 'xnack+' so the environment must be 'xnack+'.
175 if (!EnvTargetID.contains(Other: "xnack+"))
176 return false;
177 break;
178 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
179 case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
180 default:
181 break;
182 }
183
184 // Check if the image is requesting sramecc on or off.
185 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
186 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
187 // The image is 'sramecc-' so the environment must be 'sramecc-'.
188 if (!EnvTargetID.contains(Other: "sramecc-"))
189 return false;
190 break;
191 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
192 // The image is 'sramecc+' so the environment must be 'sramecc+'.
193 if (!EnvTargetID.contains(Other: "sramecc+"))
194 return false;
195 break;
196 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
197 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
198 break;
199 }
200
201 return true;
202}
203
204namespace {
205/// Reads the AMDGPU specific per-kernel-metadata from an image.
206class KernelInfoReader {
207public:
208 KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
209 : KernelInfoMap(KIM) {}
210
211 /// Process ELF note to read AMDGPU metadata from respective information
212 /// fields.
213 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
214 if (Note.getName() != "AMDGPU")
215 return Error::success(); // We are not interested in other things
216
217 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
218 "Parse AMDGPU MetaData");
219 auto Desc = Note.getDesc(Align);
220 StringRef MsgPackString =
221 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
222 msgpack::Document MsgPackDoc;
223 if (!MsgPackDoc.readFromBlob(Blob: MsgPackString, /*Multi=*/false))
224 return Error::success();
225
226 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
227 if (!Verifier.verify(HSAMetadataRoot&: MsgPackDoc.getRoot()))
228 return Error::success();
229
230 auto RootMap = MsgPackDoc.getRoot().getMap(Convert: true);
231
232 if (auto Err = iterateAMDKernels(MDN&: RootMap))
233 return Err;
234
235 return Error::success();
236 }
237
238private:
239 /// Extracts the relevant information via simple string look-up in the msgpack
240 /// document elements.
241 Error
242 extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
243 std::string &KernelName,
244 offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
245 if (!V.first.isString())
246 return Error::success();
247
248 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
249 return DK.getString() == SK;
250 };
251
252 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
253 uint32_t *Vals) {
254 assert(DN.isArray() && "MsgPack DocNode is an array node");
255 auto DNA = DN.getArray();
256 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
257
258 int I = 0;
259 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
260 ++DNABegin) {
261 Vals[I++] = DNABegin->getUInt();
262 }
263 };
264
265 if (IsKey(V.first, ".name")) {
266 KernelName = V.second.toString();
267 } else if (IsKey(V.first, ".sgpr_count")) {
268 KernelData.SGPRCount = V.second.getUInt();
269 } else if (IsKey(V.first, ".sgpr_spill_count")) {
270 KernelData.SGPRSpillCount = V.second.getUInt();
271 } else if (IsKey(V.first, ".vgpr_count")) {
272 KernelData.VGPRCount = V.second.getUInt();
273 } else if (IsKey(V.first, ".vgpr_spill_count")) {
274 KernelData.VGPRSpillCount = V.second.getUInt();
275 } else if (IsKey(V.first, ".agpr_count")) {
276 KernelData.AGPRCount = V.second.getUInt();
277 } else if (IsKey(V.first, ".private_segment_fixed_size")) {
278 KernelData.PrivateSegmentSize = V.second.getUInt();
279 } else if (IsKey(V.first, ".group_segment_fixed_size")) {
280 KernelData.GroupSegmentList = V.second.getUInt();
281 } else if (IsKey(V.first, ".reqd_workgroup_size")) {
282 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
283 } else if (IsKey(V.first, ".workgroup_size_hint")) {
284 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
285 } else if (IsKey(V.first, ".wavefront_size")) {
286 KernelData.WavefrontSize = V.second.getUInt();
287 } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
288 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
289 }
290
291 return Error::success();
292 }
293
294 /// Get the "amdhsa.kernels" element from the msgpack Document
295 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
296 auto Res = MDN.find(Key: "amdhsa.kernels");
297 if (Res == MDN.end())
298 return createStringError(EC: inconvertibleErrorCode(),
299 S: "Could not find amdhsa.kernels key");
300
301 auto Pair = *Res;
302 assert(Pair.second.isArray() &&
303 "AMDGPU kernel entries are arrays of entries");
304
305 return Pair.second.getArray();
306 }
307
308 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
309 /// MapDocNode that either maps a string to a single value (most of them) or
310 /// to another array of things. Currently, we only handle the case that maps
311 /// to scalar value.
312 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
313 offloading::amdgpu::AMDGPUKernelMetaData KernelData;
314 std::string KernelName;
315 auto Entry = (*It).getMap();
316 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
317 if (auto Err = extractKernelData(V: *MI, KernelName, KernelData))
318 return Err;
319
320 KernelInfoMap.insert(KV: {KernelName, KernelData});
321 return Error::success();
322 }
323
324 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
325 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
326 auto KernelsOrErr = getAMDKernelsArray(MDN);
327 if (auto Err = KernelsOrErr.takeError())
328 return Err;
329
330 auto KernelsArr = *KernelsOrErr;
331 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
332 if (!It->isMap())
333 continue; // we expect <key,value> pairs
334
335 // Obtain the value for the different entries. Each array entry is a
336 // MapDocNode
337 if (auto Err = generateKernelInfo(It))
338 return Err;
339 }
340 return Error::success();
341 }
342
343 // Kernel names are the keys
344 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
345};
346} // namespace
347
348Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
349 MemoryBufferRef MemBuffer,
350 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
351 uint16_t &ELFABIVersion) {
352 Error Err = Error::success(); // Used later as out-parameter
353
354 auto ELFOrError = object::ELF64LEFile::create(Object: MemBuffer.getBuffer());
355 if (auto Err = ELFOrError.takeError())
356 return Err;
357
358 const object::ELF64LEFile ELFObj = ELFOrError.get();
359 Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
360 if (!Sections)
361 return Sections.takeError();
362 KernelInfoReader Reader(KernelInfoMap);
363
364 // Read the code object version from ELF image header
365 auto Header = ELFObj.getHeader();
366 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
367 for (const auto &S : *Sections) {
368 if (S.sh_type != ELF::SHT_NOTE)
369 continue;
370
371 for (const auto N : ELFObj.notes(Shdr: S, Err)) {
372 if (Err)
373 return Err;
374 // Fills the KernelInfoTabel entries in the reader
375 if ((Err = Reader.processNote(Note: N, Align: S.sh_addralign)))
376 return Err;
377 }
378 }
379 return Error::success();
380}
381
382Error offloading::containerizeImage(std::unique_ptr<MemoryBuffer> &Img,
383 llvm::Triple Triple,
384 object::ImageKind ImageKind,
385 object::OffloadKind OffloadKind,
386 int32_t ImageFlags,
387 MapVector<StringRef, StringRef> &MetaData) {
388 using namespace object;
389
390 // Create inner OffloadBinary containing the raw image.
391 OffloadBinary::OffloadingImage InnerImage;
392 InnerImage.TheImageKind = ImageKind;
393 InnerImage.TheOffloadKind = OffloadKind;
394 InnerImage.Flags = ImageFlags;
395
396 InnerImage.StringData["triple"] = Triple.getTriple();
397 for (const auto &[Key, Value] : MetaData)
398 InnerImage.StringData[Key] = Value;
399
400 InnerImage.Image = std::move(Img);
401
402 SmallString<0> InnerBinaryData = OffloadBinary::write(OffloadingData: InnerImage);
403
404 Img = MemoryBuffer::getMemBufferCopy(InputData: InnerBinaryData);
405 return Error::success();
406}
407
408Error offloading::intel::containerizeOpenMPSPIRVImage(
409 std::unique_ptr<MemoryBuffer> &Binary, llvm::Triple Triple,
410 StringRef CompileOpts, StringRef LinkOpts) {
411 constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
412
413 assert(Triple.isSPIRV() && Triple.getVendor() == llvm::Triple::Intel &&
414 "Expected SPIR-V triple with Intel vendor");
415
416 MapVector<StringRef, StringRef> MetaData;
417 MetaData["version"] = INTEL_ONEOMP_OFFLOAD_VERSION;
418 if (!CompileOpts.empty())
419 MetaData["compile-opts"] = CompileOpts;
420 if (!LinkOpts.empty())
421 MetaData["link-opts"] = LinkOpts;
422
423 return containerizeImage(Img&: Binary, Triple, ImageKind: object::ImageKind::IMG_SPIRV,
424 OffloadKind: object::OffloadKind::OFK_OpenMP, /*ImageFlags=*/0,
425 MetaData);
426}
427