1//===- SPIR.cpp -----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "HLSLBufferLayoutBuilder.h"
11#include "TargetInfo.h"
12#include "clang/Basic/LangOptions.h"
13#include "llvm/IR/DerivedTypes.h"
14
15#include <stdint.h>
16#include <utility>
17
18using namespace clang;
19using namespace clang::CodeGen;
20
21//===----------------------------------------------------------------------===//
22// Base ABI and target codegen info implementation common between SPIR and
23// SPIR-V.
24//===----------------------------------------------------------------------===//
25
26namespace {
27class CommonSPIRABIInfo : public DefaultABIInfo {
28public:
29 CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
30
31private:
32 void setCCs();
33};
34
35class SPIRVABIInfo : public CommonSPIRABIInfo {
36public:
37 SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
38 void computeInfo(CGFunctionInfo &FI) const override;
39 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
40 AggValueSlot Slot) const override;
41
42private:
43 ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
44};
45
46class AMDGCNSPIRVABIInfo : public SPIRVABIInfo {
47 // TODO: this should be unified / shared with AMDGPU, ideally we'd like to
48 // re-use AMDGPUABIInfo eventually, rather than duplicate.
49 static constexpr unsigned MaxNumRegsForArgsRet = 16; // 16 32-bit registers
50 mutable unsigned NumRegsLeft = 0;
51
52 uint64_t numRegsForType(QualType Ty) const;
53
54 bool isHomogeneousAggregateBaseType(QualType Ty) const override {
55 return true;
56 }
57 bool isHomogeneousAggregateSmallEnough(const Type *Base,
58 uint64_t Members) const override {
59 uint32_t NumRegs = (getContext().getTypeSize(T: Base) + 31) / 32;
60
61 // Homogeneous Aggregates may occupy at most 16 registers.
62 return Members * NumRegs <= MaxNumRegsForArgsRet;
63 }
64
65 // Coerce HIP scalar pointer arguments from generic pointers to global ones.
66 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
67 unsigned ToAS) const;
68
69 ABIArgInfo classifyReturnType(QualType RetTy) const;
70 ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
71 ABIArgInfo classifyArgumentType(QualType Ty) const;
72
73public:
74 AMDGCNSPIRVABIInfo(CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {}
75 void computeInfo(CGFunctionInfo &FI) const override;
76
77 llvm::FixedVectorType *
78 getOptimalVectorMemoryType(llvm::FixedVectorType *Ty,
79 const LangOptions &LangOpt) const override;
80};
81} // end anonymous namespace
82namespace {
83class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
84public:
85 CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
86 : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(args&: CGT)) {}
87 CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
88 : TargetCodeGenInfo(std::move(ABIInfo)) {}
89
90 LangAS getASTAllocaAddressSpace() const override {
91 return getLangASFromTargetAS(
92 TargetAS: getABIInfo().getDataLayout().getAllocaAddrSpace());
93 }
94
95 unsigned getDeviceKernelCallingConv() const override;
96 llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override;
97 llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *Ty,
98 const CGHLSLOffsetInfo &OffsetInfo) const override;
99
100 llvm::Type *getHLSLPadding(CodeGenModule &CGM,
101 CharUnits NumBytes) const override {
102 unsigned Size = NumBytes.getQuantity();
103 return llvm::TargetExtType::get(Context&: CGM.getLLVMContext(), Name: "spirv.Padding", Types: {},
104 Ints: {Size});
105 }
106
107 bool isHLSLPadding(llvm::Type *Ty) const override {
108 if (auto *TET = dyn_cast<llvm::TargetExtType>(Val: Ty))
109 return TET->getName() == "spirv.Padding";
110 return false;
111 }
112
113 llvm::Type *getSPIRVImageTypeFromHLSLResource(
114 const HLSLAttributedResourceType::Attributes &attributes,
115 QualType SampledType, CodeGenModule &CGM) const;
116 void
117 setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
118 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
119 llvm::PointerType *T,
120 QualType QT) const override;
121};
122class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
123public:
124 SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
125 : CommonSPIRTargetCodeGenInfo(
126 (CGT.getTarget().getTriple().getVendor() == llvm::Triple::AMD)
127 ? std::make_unique<AMDGCNSPIRVABIInfo>(args&: CGT)
128 : std::make_unique<SPIRVABIInfo>(args&: CGT)) {}
129 void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
130 LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
131 const VarDecl *D) const override;
132 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
133 CodeGen::CodeGenModule &M) const override;
134 StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
135 llvm::AtomicOrdering Ordering) const override;
136 bool supportsLibCall() const override {
137 return getABIInfo().getTarget().getTriple().getVendor() !=
138 llvm::Triple::AMD;
139 }
140};
141} // End anonymous namespace.
142
143void CommonSPIRABIInfo::setCCs() {
144 assert(getRuntimeCC() == llvm::CallingConv::C);
145 RuntimeCC = llvm::CallingConv::SPIR_FUNC;
146}
147
148ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
149 if (getContext().getLangOpts().isTargetDevice()) {
150 // Coerce pointer arguments with default address space to CrossWorkGroup
151 // pointers for target devices as default address space kernel arguments
152 // are not allowed. We use the opencl_global language address space which
153 // always maps to CrossWorkGroup.
154 llvm::Type *LTy = CGT.ConvertType(T: Ty);
155 auto DefaultAS = getContext().getTargetAddressSpace(AS: LangAS::Default);
156 auto GlobalAS = getContext().getTargetAddressSpace(AS: LangAS::opencl_global);
157 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Val: LTy);
158 if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
159 LTy = llvm::PointerType::get(C&: PtrTy->getContext(), AddressSpace: GlobalAS);
160 return ABIArgInfo::getDirect(T: LTy, Offset: 0, Padding: nullptr, CanBeFlattened: false);
161 }
162
163 if (isAggregateTypeForABI(T: Ty)) {
164 // Force copying aggregate type in kernel arguments by value when
165 // compiling CUDA targeting SPIR-V. This is required for the object
166 // copied to be valid on the device.
167 // This behavior follows the CUDA spec
168 // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
169 // and matches the NVPTX implementation. TODO: hardcoding to 0 should be
170 // revisited if HIPSPV / byval starts making use of the AS of an indirect
171 // arg.
172 return getNaturalAlignIndirect(Ty, /*AddrSpace=*/0, /*byval=*/ByVal: true);
173 }
174 }
175 return classifyArgumentType(RetTy: Ty);
176}
177
178void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
179 // The logic is same as in DefaultABIInfo with an exception on the kernel
180 // arguments handling.
181 llvm::CallingConv::ID CC = FI.getCallingConvention();
182
183 for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
184 I.info = ArgumentsCount < FI.getNumRequiredArgs()
185 ? classifyArgumentType(RetTy: I.type)
186 : ABIArgInfo::getDirect();
187
188 if (!getCXXABI().classifyReturnType(FI))
189 FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
190
191 for (auto &I : FI.arguments()) {
192 if (CC == llvm::CallingConv::SPIR_KERNEL) {
193 I.info = classifyKernelArgumentType(Ty: I.type);
194 } else {
195 I.info = classifyArgumentType(RetTy: I.type);
196 }
197 }
198}
199
200RValue SPIRVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
201 QualType Ty, AggValueSlot Slot) const {
202 return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false,
203 ValueInfo: getContext().getTypeInfoInChars(T: Ty),
204 SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1),
205 /*AllowHigherAlign=*/true, Slot);
206}
207
208uint64_t AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
209 // This duplicates the AMDGPUABI computation.
210 uint64_t NumRegs = 0;
211
212 if (const VectorType *VT = Ty->getAs<VectorType>()) {
213 // Compute from the number of elements. The reported size is based on the
214 // in-memory size, which includes the padding 4th element for 3-vectors.
215 QualType EltTy = VT->getElementType();
216 uint64_t EltSize = getContext().getTypeSize(T: EltTy);
217
218 // 16-bit element vectors should be passed as packed.
219 if (EltSize == 16)
220 return (VT->getNumElements() + 1) / 2;
221
222 uint64_t EltNumRegs = (EltSize + 31) / 32;
223 return EltNumRegs * VT->getNumElements();
224 }
225
226 if (const auto *RD = Ty->getAsRecordDecl()) {
227 assert(!RD->hasFlexibleArrayMember());
228
229 for (const FieldDecl *Field : RD->fields()) {
230 QualType FieldTy = Field->getType();
231 NumRegs += numRegsForType(Ty: FieldTy);
232 }
233
234 return NumRegs;
235 }
236
237 return (getContext().getTypeSize(T: Ty) + 31) / 32;
238}
239
240llvm::Type *AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty,
241 unsigned FromAS,
242 unsigned ToAS) const {
243 // Single value types.
244 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Val: Ty);
245 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
246 return llvm::PointerType::get(C&: Ty->getContext(), AddressSpace: ToAS);
247 return Ty;
248}
249
250ABIArgInfo AMDGCNSPIRVABIInfo::classifyReturnType(QualType RetTy) const {
251 if (!isAggregateTypeForABI(T: RetTy) || getRecordArgABI(T: RetTy, CXXABI&: getCXXABI()))
252 return DefaultABIInfo::classifyReturnType(RetTy);
253
254 // Ignore empty structs/unions.
255 if (isEmptyRecord(Context&: getContext(), T: RetTy, AllowArrays: true))
256 return ABIArgInfo::getIgnore();
257
258 // Lower single-element structs to just return a regular value.
259 if (const Type *SeltTy = isSingleElementStruct(T: RetTy, Context&: getContext()))
260 return ABIArgInfo::getDirect(T: CGT.ConvertType(T: QualType(SeltTy, 0)));
261
262 if (const auto *RD = RetTy->getAsRecordDecl();
263 RD && RD->hasFlexibleArrayMember())
264 return DefaultABIInfo::classifyReturnType(RetTy);
265
266 // Pack aggregates <= 4 bytes into single VGPR or pair.
267 uint64_t Size = getContext().getTypeSize(T: RetTy);
268 if (Size <= 16)
269 return ABIArgInfo::getDirect(T: llvm::Type::getInt16Ty(C&: getVMContext()));
270
271 if (Size <= 32)
272 return ABIArgInfo::getDirect(T: llvm::Type::getInt32Ty(C&: getVMContext()));
273
274 // TODO: This carried over from AMDGPU oddity, we retain it to
275 // ensure consistency, but it might be reasonable to return Int64.
276 if (Size <= 64) {
277 llvm::Type *I32Ty = llvm::Type::getInt32Ty(C&: getVMContext());
278 return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: I32Ty, NumElements: 2));
279 }
280
281 if (numRegsForType(Ty: RetTy) <= MaxNumRegsForArgsRet)
282 return ABIArgInfo::getDirect();
283 return DefaultABIInfo::classifyReturnType(RetTy);
284}
285
286/// For kernels all parameters are really passed in a special buffer. It doesn't
287/// make sense to pass anything byval, so everything must be direct.
288ABIArgInfo AMDGCNSPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
289 Ty = useFirstFieldIfTransparentUnion(Ty);
290
291 // TODO: Can we omit empty structs?
292
293 if (const Type *SeltTy = isSingleElementStruct(T: Ty, Context&: getContext()))
294 Ty = QualType(SeltTy, 0);
295
296 llvm::Type *OrigLTy = CGT.ConvertType(T: Ty);
297 llvm::Type *LTy = OrigLTy;
298 if (getContext().getLangOpts().isTargetDevice()) {
299 LTy = coerceKernelArgumentType(
300 Ty: OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(AS: LangAS::Default),
301 /*ToAS=*/getContext().getTargetAddressSpace(AS: LangAS::opencl_global));
302 }
303
304 // FIXME: This doesn't apply the optimization of coercing pointers in structs
305 // to global address space when using byref. This would require implementing a
306 // new kind of coercion of the in-memory type when for indirect arguments.
307 if (LTy == OrigLTy && isAggregateTypeForABI(T: Ty)) {
308 return ABIArgInfo::getIndirectAliased(
309 Alignment: getContext().getTypeAlignInChars(T: Ty),
310 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_constant),
311 Realign: false /*Realign*/, Padding: nullptr /*Padding*/);
312 }
313
314 // TODO: inhibiting flattening is an AMDGPU workaround for Clover, which might
315 // be vestigial and should be revisited.
316 return ABIArgInfo::getDirect(T: LTy, Offset: 0, Padding: nullptr, CanBeFlattened: false);
317}
318
319ABIArgInfo AMDGCNSPIRVABIInfo::classifyArgumentType(QualType Ty) const {
320 assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
321
322 Ty = useFirstFieldIfTransparentUnion(Ty);
323
324 // TODO: support for variadics.
325
326 if (!isAggregateTypeForABI(T: Ty)) {
327 ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(RetTy: Ty);
328 if (!ArgInfo.isIndirect()) {
329 uint64_t NumRegs = numRegsForType(Ty);
330 NumRegsLeft -= std::min(a: NumRegs, b: uint64_t{NumRegsLeft});
331 }
332
333 return ArgInfo;
334 }
335
336 // Records with non-trivial destructors/copy-constructors should not be
337 // passed by value.
338 if (auto RAA = getRecordArgABI(T: Ty, CXXABI&: getCXXABI()))
339 return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
340 ByVal: RAA == CGCXXABI::RAA_DirectInMemory);
341
342 // Ignore empty structs/unions.
343 if (isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true))
344 return ABIArgInfo::getIgnore();
345
346 // Lower single-element structs to just pass a regular value. TODO: We
347 // could do reasonable-size multiple-element structs too, using getExpand(),
348 // though watch out for things like bitfields.
349 if (const Type *SeltTy = isSingleElementStruct(T: Ty, Context&: getContext()))
350 return ABIArgInfo::getDirect(T: CGT.ConvertType(T: QualType(SeltTy, 0)));
351
352 if (const auto *RD = Ty->getAsRecordDecl();
353 RD && RD->hasFlexibleArrayMember())
354 return DefaultABIInfo::classifyArgumentType(RetTy: Ty);
355
356 uint64_t Size = getContext().getTypeSize(T: Ty);
357 if (Size <= 64) {
358 // Pack aggregates <= 8 bytes into single VGPR or pair.
359 unsigned NumRegs = (Size + 31) / 32;
360 NumRegsLeft -= std::min(a: NumRegsLeft, b: NumRegs);
361
362 if (Size <= 16)
363 return ABIArgInfo::getDirect(T: llvm::Type::getInt16Ty(C&: getVMContext()));
364
365 if (Size <= 32)
366 return ABIArgInfo::getDirect(T: llvm::Type::getInt32Ty(C&: getVMContext()));
367
368 // TODO: This is an AMDGPU oddity, and might be vestigial, we retain it to
369 // ensure consistency, but it should be revisited.
370 llvm::Type *I32Ty = llvm::Type::getInt32Ty(C&: getVMContext());
371 return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: I32Ty, NumElements: 2));
372 }
373
374 if (NumRegsLeft > 0) {
375 uint64_t NumRegs = numRegsForType(Ty);
376 if (NumRegsLeft >= NumRegs) {
377 NumRegsLeft -= NumRegs;
378 return ABIArgInfo::getDirect();
379 }
380 }
381
382 // Use pass-by-reference in stead of pass-by-value for struct arguments in
383 // function ABI.
384 return ABIArgInfo::getIndirectAliased(
385 Alignment: getContext().getTypeAlignInChars(T: Ty),
386 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_private));
387}
388
389void AMDGCNSPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
390 llvm::CallingConv::ID CC = FI.getCallingConvention();
391
392 if (!getCXXABI().classifyReturnType(FI))
393 FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
394
395 NumRegsLeft = MaxNumRegsForArgsRet;
396 for (auto &I : FI.arguments()) {
397 if (CC == llvm::CallingConv::SPIR_KERNEL)
398 I.info = classifyKernelArgumentType(Ty: I.type);
399 else
400 I.info = classifyArgumentType(Ty: I.type);
401 }
402}
403
404llvm::FixedVectorType *AMDGCNSPIRVABIInfo::getOptimalVectorMemoryType(
405 llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const {
406 // AMDGPU has legal instructions for 96-bit so 3x32 can be supported.
407 if (Ty->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(Ty) == 96)
408 return Ty;
409 return DefaultABIInfo::getOptimalVectorMemoryType(T: Ty, Opt: LangOpt);
410}
411
412namespace clang {
413namespace CodeGen {
414void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
415 if (CGM.getTarget().getTriple().isSPIRV()) {
416 if (CGM.getTarget().getTriple().getVendor() == llvm::Triple::AMD)
417 AMDGCNSPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
418 else
419 SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
420 } else {
421 CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
422 }
423}
424}
425}
426
427unsigned CommonSPIRTargetCodeGenInfo::getDeviceKernelCallingConv() const {
428 return llvm::CallingConv::SPIR_KERNEL;
429}
430
431void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
432 const FunctionType *&FT) const {
433 // Convert HIP kernels to SPIR-V kernels.
434 if (getABIInfo().getContext().getLangOpts().HIP) {
435 FT = getABIInfo().getContext().adjustFunctionType(
436 Fn: FT, EInfo: FT->getExtInfo().withCallingConv(cc: CC_DeviceKernel));
437 return;
438 }
439}
440
441void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
442 const FunctionType *&FT) const {
443 FT = getABIInfo().getContext().adjustFunctionType(
444 Fn: FT, EInfo: FT->getExtInfo().withCallingConv(cc: CC_SpirFunction));
445}
446
447// LLVM currently assumes a null pointer has the bit pattern 0, but some GPU
448// targets use a non-zero encoding for null in certain address spaces.
449// Because SPIR(-V) is a generic target and the bit pattern of null in
450// non-generic AS is unspecified, materialize null in non-generic AS via an
451// addrspacecast from null in generic AS. This allows later lowering to
452// substitute the target's real sentinel value.
453llvm::Constant *
454CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
455 llvm::PointerType *PT,
456 QualType QT) const {
457 LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()
458 ? LangAS::Default
459 : QT->getPointeeType().getAddressSpace();
460 unsigned ASAsInt = static_cast<unsigned>(AS);
461 unsigned FirstTargetASAsInt =
462 static_cast<unsigned>(LangAS::FirstTargetAddressSpace);
463 unsigned CodeSectionINTELAS = FirstTargetASAsInt + 9;
464 // As per SPV_INTEL_function_pointers, it is illegal to addrspacecast
465 // function pointers to/from the generic AS.
466 bool IsFunctionPtrAS =
467 CGM.getTriple().isSPIRV() && ASAsInt == CodeSectionINTELAS;
468 if (AS == LangAS::Default || AS == LangAS::opencl_generic ||
469 AS == LangAS::opencl_constant || IsFunctionPtrAS)
470 return llvm::ConstantPointerNull::get(T: PT);
471
472 auto &Ctx = CGM.getContext();
473 auto NPT = llvm::PointerType::get(
474 C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
475 return llvm::ConstantExpr::getAddrSpaceCast(
476 C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
477}
478
479LangAS
480SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
481 const VarDecl *D) const {
482 assert(!CGM.getLangOpts().OpenCL &&
483 !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
484 "Address space agnostic languages only");
485 // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
486 // the global AS we can rely on either cuda_device or sycl_global to be
487 // correct; however, since this is not a CUDA Device context, we use
488 // sycl_global to prevent confusion with the assertion.
489 LangAS DefaultGlobalAS = getLangASFromTargetAS(
490 TargetAS: CGM.getContext().getTargetAddressSpace(AS: LangAS::sycl_global));
491 if (!D)
492 return DefaultGlobalAS;
493
494 LangAS AddrSpace = D->getType().getAddressSpace();
495 if (AddrSpace != LangAS::Default)
496 return AddrSpace;
497
498 return DefaultGlobalAS;
499}
500
501void SPIRVTargetCodeGenInfo::setTargetAttributes(
502 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
503 if (GV->isDeclaration())
504 return;
505
506 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
507 if (!FD)
508 return;
509
510 llvm::Function *F = dyn_cast<llvm::Function>(Val: GV);
511 assert(F && "Expected GlobalValue to be a Function");
512
513 if (!M.getLangOpts().HIP ||
514 M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
515 return;
516
517 if (!FD->hasAttr<CUDAGlobalAttr>())
518 return;
519
520 unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
521 if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
522 N = FlatWGS->getMax()->EvaluateKnownConstInt(Ctx: M.getContext()).getExtValue();
523
524 // We encode the maximum flat WG size in the first component of the 3D
525 // max_work_group_size attribute, which will get reverse translated into the
526 // original AMDGPU attribute when targeting AMDGPU.
527 auto Int32Ty = llvm::IntegerType::getInt32Ty(C&: M.getLLVMContext());
528 llvm::Metadata *AttrMDArgs[] = {
529 llvm::ConstantAsMetadata::get(C: llvm::ConstantInt::get(Ty: Int32Ty, V: N)),
530 llvm::ConstantAsMetadata::get(C: llvm::ConstantInt::get(Ty: Int32Ty, V: 1)),
531 llvm::ConstantAsMetadata::get(C: llvm::ConstantInt::get(Ty: Int32Ty, V: 1))};
532
533 F->setMetadata(Kind: "max_work_group_size",
534 Node: llvm::MDNode::get(Context&: M.getLLVMContext(), MDs: AttrMDArgs));
535}
536
537StringRef SPIRVTargetCodeGenInfo::getLLVMSyncScopeStr(
538 const LangOptions &, SyncScope Scope, llvm::AtomicOrdering) const {
539 switch (Scope) {
540 case SyncScope::HIPSingleThread:
541 case SyncScope::SingleScope:
542 return "singlethread";
543 case SyncScope::HIPWavefront:
544 case SyncScope::OpenCLSubGroup:
545 case SyncScope::WavefrontScope:
546 return "subgroup";
547 case SyncScope::HIPCluster:
548 case SyncScope::ClusterScope:
549 case SyncScope::HIPWorkgroup:
550 case SyncScope::OpenCLWorkGroup:
551 case SyncScope::WorkgroupScope:
552 return "workgroup";
553 case SyncScope::HIPAgent:
554 case SyncScope::OpenCLDevice:
555 case SyncScope::DeviceScope:
556 return "device";
557 case SyncScope::SystemScope:
558 case SyncScope::HIPSystem:
559 case SyncScope::OpenCLAllSVMDevices:
560 return "";
561 }
562 return "";
563}
564
565/// Construct a SPIR-V target extension type for the given OpenCL image type.
566static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
567 StringRef OpenCLName,
568 unsigned AccessQualifier) {
569 // These parameters compare to the operands of OpTypeImage (see
570 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage
571 // for more details). The first 6 integer parameters all default to 0, and
572 // will be changed to 1 only for the image type(s) that set the parameter to
573 // one. The 7th integer parameter is the access qualifier, which is tacked on
574 // at the end.
575 SmallVector<unsigned, 7> IntParams = {0, 0, 0, 0, 0, 0};
576
577 // Choose the dimension of the image--this corresponds to the Dim enum in
578 // SPIR-V (first integer parameter of OpTypeImage).
579 if (OpenCLName.starts_with(Prefix: "image2d"))
580 IntParams[0] = 1;
581 else if (OpenCLName.starts_with(Prefix: "image3d"))
582 IntParams[0] = 2;
583 else if (OpenCLName == "image1d_buffer")
584 IntParams[0] = 5; // Buffer
585 else
586 assert(OpenCLName.starts_with("image1d") && "Unknown image type");
587
588 // Set the other integer parameters of OpTypeImage if necessary. Note that the
589 // OpenCL image types don't provide any information for the Sampled or
590 // Image Format parameters.
591 if (OpenCLName.contains(Other: "_depth"))
592 IntParams[1] = 1;
593 if (OpenCLName.contains(Other: "_array"))
594 IntParams[2] = 1;
595 if (OpenCLName.contains(Other: "_msaa"))
596 IntParams[3] = 1;
597
598 // Access qualifier
599 IntParams.push_back(Elt: AccessQualifier);
600
601 return llvm::TargetExtType::get(Context&: Ctx, Name: BaseType, Types: {llvm::Type::getVoidTy(C&: Ctx)},
602 Ints: IntParams);
603}
604
605llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM,
606 const Type *Ty) const {
607 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
608 if (auto *PipeTy = dyn_cast<PipeType>(Val: Ty))
609 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Pipe", Types: {},
610 Ints: {!PipeTy->isReadOnly()});
611 if (auto *BuiltinTy = dyn_cast<BuiltinType>(Val: Ty)) {
612 enum AccessQualifier : unsigned { AQ_ro = 0, AQ_wo = 1, AQ_rw = 2 };
613 switch (BuiltinTy->getKind()) {
614#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
615 case BuiltinType::Id: \
616 return getSPIRVImageType(Ctx, "spirv.Image", #ImgType, AQ_##Suffix);
617#include "clang/Basic/OpenCLImageTypes.def"
618 case BuiltinType::OCLSampler:
619 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Sampler");
620 case BuiltinType::OCLEvent:
621 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Event");
622 case BuiltinType::OCLClkEvent:
623 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.DeviceEvent");
624 case BuiltinType::OCLQueue:
625 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Queue");
626 case BuiltinType::OCLReserveID:
627 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.ReserveId");
628#define INTEL_SUBGROUP_AVC_TYPE(Name, Id) \
629 case BuiltinType::OCLIntelSubgroupAVC##Id: \
630 return llvm::TargetExtType::get(Ctx, "spirv.Avc" #Id "INTEL");
631#include "clang/Basic/OpenCLExtensionTypes.def"
632 default:
633 return nullptr;
634 }
635 }
636
637 return nullptr;
638}
639
640// Gets a spirv.IntegralConstant or spirv.Literal. If IntegralType is present,
641// returns an IntegralConstant, otherwise returns a Literal.
642static llvm::Type *getInlineSpirvConstant(CodeGenModule &CGM,
643 llvm::Type *IntegralType,
644 llvm::APInt Value) {
645 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
646
647 // Convert the APInt value to an array of uint32_t words
648 llvm::SmallVector<uint32_t> Words;
649
650 while (Value.ugt(RHS: 0)) {
651 uint32_t Word = Value.trunc(width: 32).getZExtValue();
652 Value.lshrInPlace(ShiftAmt: 32);
653
654 Words.push_back(Elt: Word);
655 }
656 if (Words.size() == 0)
657 Words.push_back(Elt: 0);
658
659 if (IntegralType)
660 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.IntegralConstant",
661 Types: {IntegralType}, Ints: Words);
662 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Literal", Types: {}, Ints: Words);
663}
664
665static llvm::Type *getInlineSpirvType(CodeGenModule &CGM,
666 const HLSLInlineSpirvType *SpirvType) {
667 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
668
669 llvm::SmallVector<llvm::Type *> Operands;
670
671 for (auto &Operand : SpirvType->getOperands()) {
672 using SpirvOperandKind = SpirvOperand::SpirvOperandKind;
673
674 llvm::Type *Result = nullptr;
675 switch (Operand.getKind()) {
676 case SpirvOperandKind::ConstantId: {
677 llvm::Type *IntegralType =
678 CGM.getTypes().ConvertType(T: Operand.getResultType());
679
680 Result = getInlineSpirvConstant(CGM, IntegralType, Value: Operand.getValue());
681 break;
682 }
683 case SpirvOperandKind::Literal: {
684 Result = getInlineSpirvConstant(CGM, IntegralType: nullptr, Value: Operand.getValue());
685 break;
686 }
687 case SpirvOperandKind::TypeId: {
688 QualType TypeOperand = Operand.getResultType();
689 if (const auto *RD = TypeOperand->getAsRecordDecl()) {
690 assert(RD->isCompleteDefinition() &&
691 "Type completion should have been required in Sema");
692
693 const FieldDecl *HandleField = RD->findFirstNamedDataMember();
694 if (HandleField) {
695 QualType ResourceType = HandleField->getType();
696 if (ResourceType->getAs<HLSLAttributedResourceType>()) {
697 TypeOperand = ResourceType;
698 }
699 }
700 }
701 Result = CGM.getTypes().ConvertType(T: TypeOperand);
702 break;
703 }
704 default:
705 llvm_unreachable("HLSLInlineSpirvType had invalid operand!");
706 break;
707 }
708
709 assert(Result);
710 Operands.push_back(Elt: Result);
711 }
712
713 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Type", Types: Operands,
714 Ints: {SpirvType->getOpcode(), SpirvType->getSize(),
715 SpirvType->getAlignment()});
716}
717
718llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
719 CodeGenModule &CGM, const Type *Ty,
720 const CGHLSLOffsetInfo &OffsetInfo) const {
721 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
722
723 if (auto *SpirvType = dyn_cast<HLSLInlineSpirvType>(Val: Ty))
724 return getInlineSpirvType(CGM, SpirvType);
725
726 auto *ResType = dyn_cast<HLSLAttributedResourceType>(Val: Ty);
727 if (!ResType)
728 return nullptr;
729
730 const HLSLAttributedResourceType::Attributes &ResAttrs = ResType->getAttrs();
731 switch (ResAttrs.ResourceClass) {
732 case llvm::dxil::ResourceClass::UAV:
733 case llvm::dxil::ResourceClass::SRV: {
734 // TypedBuffer and RawBuffer both need element type
735 QualType ContainedTy = ResType->getContainedType();
736 if (ContainedTy.isNull())
737 return nullptr;
738
739 assert(!ResAttrs.IsROV &&
740 "Rasterizer order views not implemented for SPIR-V yet");
741
742 if (!ResAttrs.RawBuffer) {
743 // convert element type
744 return getSPIRVImageTypeFromHLSLResource(attributes: ResAttrs, SampledType: ContainedTy, CGM);
745 }
746
747 if (ResAttrs.IsCounter) {
748 llvm::Type *ElemType = llvm::Type::getInt32Ty(C&: Ctx);
749 uint32_t StorageClass = /* StorageBuffer storage class */ 12;
750 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.VulkanBuffer", Types: {ElemType},
751 Ints: {StorageClass, true});
752 }
753 llvm::Type *ElemType = CGM.getTypes().ConvertTypeForMem(T: ContainedTy);
754 llvm::ArrayType *RuntimeArrayType = llvm::ArrayType::get(ElementType: ElemType, NumElements: 0);
755 uint32_t StorageClass = /* StorageBuffer storage class */ 12;
756 bool IsWritable = ResAttrs.ResourceClass == llvm::dxil::ResourceClass::UAV;
757 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.VulkanBuffer",
758 Types: {RuntimeArrayType},
759 Ints: {StorageClass, IsWritable});
760 }
761 case llvm::dxil::ResourceClass::CBuffer: {
762 QualType ContainedTy = ResType->getContainedType();
763 if (ContainedTy.isNull() || !ContainedTy->isStructureType())
764 return nullptr;
765
766 llvm::StructType *BufferLayoutTy =
767 HLSLBufferLayoutBuilder(CGM).layOutStruct(
768 StructType: ContainedTy->getAsCanonical<RecordType>(), OffsetInfo);
769 uint32_t StorageClass = /* Uniform storage class */ 2;
770 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.VulkanBuffer", Types: {BufferLayoutTy},
771 Ints: {StorageClass, false});
772 break;
773 }
774 case llvm::dxil::ResourceClass::Sampler:
775 return llvm::TargetExtType::get(Context&: Ctx, Name: "spirv.Sampler");
776 }
777 return nullptr;
778}
779
780static unsigned
781getImageFormat(const LangOptions &LangOpts,
782 const HLSLAttributedResourceType::Attributes &attributes,
783 llvm::Type *SampledType, QualType Ty, unsigned NumChannels) {
784 // For images with `Sampled` operand equal to 2, there are restrictions on
785 // using the Unknown image format. To avoid these restrictions in common
786 // cases, we guess an image format for them based on the sampled type and the
787 // number of channels. This is intended to match the behaviour of DXC.
788 if (LangOpts.HLSLSpvUseUnknownImageFormat ||
789 attributes.ResourceClass != llvm::dxil::ResourceClass::UAV) {
790 return 0; // Unknown
791 }
792
793 if (SampledType->isIntegerTy(Bitwidth: 32)) {
794 if (Ty->isSignedIntegerType()) {
795 if (NumChannels == 1)
796 return 24; // R32i
797 if (NumChannels == 2)
798 return 25; // Rg32i
799 if (NumChannels == 4)
800 return 21; // Rgba32i
801 } else {
802 if (NumChannels == 1)
803 return 33; // R32ui
804 if (NumChannels == 2)
805 return 35; // Rg32ui
806 if (NumChannels == 4)
807 return 30; // Rgba32ui
808 }
809 } else if (SampledType->isIntegerTy(Bitwidth: 64)) {
810 if (NumChannels == 1) {
811 if (Ty->isSignedIntegerType()) {
812 return 41; // R64i
813 }
814 return 40; // R64ui
815 }
816 } else if (SampledType->isFloatTy()) {
817 if (NumChannels == 1)
818 return 3; // R32f
819 if (NumChannels == 2)
820 return 6; // Rg32f
821 if (NumChannels == 4)
822 return 1; // Rgba32f
823 }
824
825 return 0; // Unknown
826}
827
828llvm::Type *CommonSPIRTargetCodeGenInfo::getSPIRVImageTypeFromHLSLResource(
829 const HLSLAttributedResourceType::Attributes &attributes, QualType Ty,
830 CodeGenModule &CGM) const {
831 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
832
833 unsigned NumChannels = 1;
834 Ty = Ty->getCanonicalTypeUnqualified();
835 if (const VectorType *V = dyn_cast<VectorType>(Val&: Ty)) {
836 NumChannels = V->getNumElements();
837 Ty = V->getElementType();
838 }
839 assert(!Ty->isVectorType() && "We still have a vector type.");
840
841 llvm::Type *SampledType = CGM.getTypes().ConvertTypeForMem(T: Ty);
842
843 assert((SampledType->isIntegerTy() || SampledType->isFloatingPointTy()) &&
844 "The element type for a SPIR-V resource must be a scalar integer or "
845 "floating point type.");
846
847 // These parameters correspond to the operands to the OpTypeImage SPIR-V
848 // instruction. See
849 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage.
850 SmallVector<unsigned, 6> IntParams(6, 0);
851
852 const char *Name =
853 Ty->isSignedIntegerType() ? "spirv.SignedImage" : "spirv.Image";
854
855 // Dim
856 switch (attributes.ResourceDimension) {
857 case llvm::dxil::ResourceDimension::Dim1D:
858 IntParams[0] = 0;
859 break;
860 case llvm::dxil::ResourceDimension::Dim2D:
861 IntParams[0] = 1;
862 break;
863 case llvm::dxil::ResourceDimension::Dim3D:
864 IntParams[0] = 2;
865 break;
866 case llvm::dxil::ResourceDimension::Cube:
867 IntParams[0] = 3;
868 break;
869 case llvm::dxil::ResourceDimension::Unknown:
870 IntParams[0] = 5;
871 break;
872 }
873
874 // Depth
875 // HLSL does not indicate if it is a depth texture or not, so we use unknown.
876 IntParams[1] = 2;
877
878 // Arrayed
879 IntParams[2] = 0;
880
881 // MS
882 IntParams[3] = 0;
883
884 // Sampled
885 IntParams[4] =
886 attributes.ResourceClass == llvm::dxil::ResourceClass::UAV ? 2 : 1;
887
888 // Image format.
889 IntParams[5] = getImageFormat(LangOpts: CGM.getLangOpts(), attributes, SampledType, Ty,
890 NumChannels);
891
892 llvm::TargetExtType *ImageType =
893 llvm::TargetExtType::get(Context&: Ctx, Name, Types: {SampledType}, Ints: IntParams);
894 return ImageType;
895}
896
897std::unique_ptr<TargetCodeGenInfo>
898CodeGen::createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM) {
899 return std::make_unique<CommonSPIRTargetCodeGenInfo>(args&: CGM.getTypes());
900}
901
902std::unique_ptr<TargetCodeGenInfo>
903CodeGen::createSPIRVTargetCodeGenInfo(CodeGenModule &CGM) {
904 return std::make_unique<SPIRVTargetCodeGenInfo>(args&: CGM.getTypes());
905}
906