1//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit HLSL Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CGHLSLRuntime.h"
15#include "CodeGenFunction.h"
16#include "clang/AST/HLSLResource.h"
17#include "clang/AST/MatrixUtils.h"
18#include "llvm/IR/MatrixBuilder.h"
19
20using namespace clang;
21using namespace CodeGen;
22using namespace llvm;
23
24static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
25 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
26 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
27 "asdouble operands types mismatch");
28 Value *OpLowBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
29 Value *OpHighBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
30
31 llvm::Type *ResultType = CGF.DoubleTy;
32 int N = 1;
33 if (auto *VTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
34 N = VTy->getNumElements();
35 ResultType = llvm::FixedVectorType::get(ElementType: CGF.DoubleTy, NumElts: N);
36 }
37
38 if (CGF.CGM.getTarget().getTriple().isDXIL())
39 return CGF.Builder.CreateIntrinsic(
40 /*ReturnType=*/RetTy: ResultType, ID: Intrinsic::dx_asdouble,
41 Args: {OpLowBits, OpHighBits}, FMFSource: nullptr, Name: "hlsl.asdouble");
42
43 if (!E->getArg(Arg: 0)->getType()->isVectorType()) {
44 OpLowBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpLowBits);
45 OpHighBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpHighBits);
46 }
47
48 llvm::SmallVector<int> Mask;
49 for (int i = 0; i < N; i++) {
50 Mask.push_back(Elt: i);
51 Mask.push_back(Elt: i + N);
52 }
53
54 Value *BitVec = CGF.Builder.CreateShuffleVector(V1: OpLowBits, V2: OpHighBits, Mask);
55
56 return CGF.Builder.CreateBitCast(V: BitVec, DestTy: ResultType);
57}
58
59static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
60 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
61
62 Constant *FZeroConst = ConstantFP::getZero(Ty: CGF->FloatTy);
63 Value *CMP;
64 Value *LastInstr;
65
66 if (const auto *VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
67 FZeroConst = ConstantVector::getSplat(
68 EC: ElementCount::getFixed(MinVal: VecTy->getNumElements()), Elt: FZeroConst);
69 auto *FCompInst = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
70 CMP = CGF->Builder.CreateIntrinsic(
71 RetTy: CGF->Builder.getInt1Ty(), ID: CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
72 Args: {FCompInst});
73 } else {
74 CMP = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
75 }
76
77 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
78 LastInstr = CGF->Builder.CreateIntrinsic(ID: Intrinsic::dx_discard, Args: {CMP});
79 } else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
80 BasicBlock *LT0 = CGF->createBasicBlock(name: "lt0", parent: CGF->CurFn);
81 BasicBlock *End = CGF->createBasicBlock(name: "end", parent: CGF->CurFn);
82
83 CGF->Builder.CreateCondBr(Cond: CMP, True: LT0, False: End);
84
85 CGF->Builder.SetInsertPoint(LT0);
86
87 CGF->Builder.CreateIntrinsic(ID: Intrinsic::spv_discard, Args: {});
88
89 LastInstr = CGF->Builder.CreateBr(Dest: End);
90 CGF->Builder.SetInsertPoint(End);
91 } else {
92 llvm_unreachable("Backend Codegen not supported.");
93 }
94
95 return LastInstr;
96}
97
98static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
99 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
100 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 1));
101 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 2));
102
103 CallArgList Args;
104 LValue Op1TmpLValue =
105 CGF->EmitHLSLOutArgExpr(E: OutArg1, Args, Ty: OutArg1->getType());
106 LValue Op2TmpLValue =
107 CGF->EmitHLSLOutArgExpr(E: OutArg2, Args, Ty: OutArg2->getType());
108
109 if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
110 Args.reverseWritebacks();
111
112 Value *LowBits = nullptr;
113 Value *HighBits = nullptr;
114
115 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
116 llvm::Type *RetElementTy = CGF->Int32Ty;
117 if (auto *Op0VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
118 RetElementTy = llvm::VectorType::get(
119 ElementType: CGF->Int32Ty, EC: ElementCount::getFixed(MinVal: Op0VecTy->getNumElements()));
120 else if (auto *Op0MatTy =
121 E->getArg(Arg: 0)->getType()->getAs<ConstantMatrixType>())
122 RetElementTy = llvm::VectorType::get(
123 ElementType: CGF->Int32Ty, EC: ElementCount::getFixed(MinVal: Op0MatTy->getNumRows() *
124 Op0MatTy->getNumColumns()));
125
126 auto *RetTy = llvm::StructType::get(elt1: RetElementTy, elts: RetElementTy);
127
128 Value *CI = CGF->Builder.CreateIntrinsic(
129 RetTy, ID: Intrinsic::dx_splitdouble, Args: {Op0}, FMFSource: nullptr, Name: "hlsl.splitdouble");
130
131 LowBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 0);
132 HighBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 1);
133 } else {
134 // For Non DXIL targets we generate the instructions.
135
136 if (!Op0->getType()->isVectorTy()) {
137 FixedVectorType *DestTy = FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: 2);
138 Value *Bitcast = CGF->Builder.CreateBitCast(V: Op0, DestTy);
139
140 LowBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: (uint64_t)0);
141 HighBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: 1);
142 } else {
143 int NumElements = 1;
144 if (const auto *VecTy =
145 E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
146 NumElements = VecTy->getNumElements();
147 else if (const auto *MatTy =
148 E->getArg(Arg: 0)->getType()->getAs<ConstantMatrixType>())
149 NumElements = MatTy->getNumRows() * MatTy->getNumColumns();
150
151 FixedVectorType *Uint32VecTy =
152 FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: NumElements * 2);
153 Value *Uint32Vec = CGF->Builder.CreateBitCast(V: Op0, DestTy: Uint32VecTy);
154 if (NumElements == 1) {
155 LowBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: (uint64_t)0);
156 HighBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: 1);
157 } else {
158 SmallVector<int> EvenMask, OddMask;
159 for (int I = 0, E = NumElements; I != E; ++I) {
160 EvenMask.push_back(Elt: I * 2);
161 OddMask.push_back(Elt: I * 2 + 1);
162 }
163 LowBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: EvenMask);
164 HighBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: OddMask);
165 }
166 }
167 }
168 CGF->Builder.CreateStore(Val: LowBits, Addr: Op1TmpLValue.getAddress());
169 auto *LastInst =
170 CGF->Builder.CreateStore(Val: HighBits, Addr: Op2TmpLValue.getAddress());
171 CGF->EmitWritebacks(Args);
172 return LastInst;
173}
174
175static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
176 const CallExpr *E) {
177 Value *Cond = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
178 llvm::Type *I32 = CGF.Int32Ty;
179
180 llvm::Type *Vec4I32 = llvm::FixedVectorType::get(ElementType: I32, NumElts: 4);
181 [[maybe_unused]] llvm::StructType *Struct4I32 =
182 llvm::StructType::get(Context&: CGF.getLLVMContext(), Elements: {I32, I32, I32, I32});
183
184 if (CGF.CGM.getTarget().getTriple().isDXIL()) {
185 // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
186 Value *StructVal =
187 CGF.EmitIntrinsicCall(ID: Intrinsic::dx_wave_ballot, Types: {I32}, Args: {Cond});
188 assert(StructVal->getType() == Struct4I32 &&
189 "dx.wave.ballot must return {i32,i32,i32,i32}");
190
191 // Reassemble struct to <4 x i32>
192 llvm::Value *VecVal = llvm::PoisonValue::get(T: Vec4I32);
193 for (unsigned I = 0; I < 4; ++I) {
194 Value *Elt = CGF.Builder.CreateExtractValue(Agg: StructVal, Idxs: I);
195 VecVal =
196 CGF.Builder.CreateInsertElement(Vec: VecVal, NewElt: Elt, Idx: CGF.Builder.getInt32(C: I));
197 }
198
199 return VecVal;
200 }
201
202 if (CGF.CGM.getTarget().getTriple().isSPIRV())
203 return CGF.EmitIntrinsicCall(ID: Intrinsic::spv_subgroup_ballot, Args: {Cond});
204
205 llvm_unreachable(
206 "WaveActiveBallot is only supported for DXIL and SPIRV targets");
207}
208
209static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
210 const CallExpr *E) {
211 Value *Op0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
212 QualType Op0Ty = E->getArg(Arg: 0)->getType();
213 llvm::Type *ResType = CGF.FloatTy;
214 uint64_t NumElements = 0;
215 if (Op0->getType()->isVectorTy()) {
216 NumElements =
217 E->getArg(Arg: 0)->getType()->castAs<clang::VectorType>()->getNumElements();
218 ResType =
219 llvm::VectorType::get(ElementType: ResType, EC: ElementCount::getFixed(MinVal: NumElements));
220 }
221 if (!Op0Ty->hasUnsignedIntegerRepresentation())
222 llvm_unreachable(
223 "f16tof32 operand must have an unsigned int representation");
224
225 if (CGF.CGM.getTriple().isDXIL())
226 return CGF.Builder.CreateIntrinsic(RetTy: ResType, ID: Intrinsic::dx_legacyf16tof32,
227 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
228 Name: "hlsl.f16tof32");
229
230 if (CGF.CGM.getTriple().isSPIRV()) {
231 // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
232 // Int16 and Float16 capabilities
233 auto *UnpackType =
234 llvm::VectorType::get(ElementType: CGF.FloatTy, EC: ElementCount::getFixed(MinVal: 2));
235
236 if (NumElements == 0) {
237 // a scalar input - simply extract the first element of the unpacked
238 // vector
239 Value *Unpack = CGF.Builder.CreateIntrinsic(
240 RetTy: UnpackType, ID: Intrinsic::spv_unpackhalf2x16, Args: ArrayRef<Value *>{Op0});
241 return CGF.Builder.CreateExtractElement(Vec: Unpack, Idx: (uint64_t)0);
242 }
243
244 // a vector input - build a congruent output vector by iterating through
245 // the input vector calling unpackhalf2x16 for each element
246 Value *Result = PoisonValue::get(T: ResType);
247 for (uint64_t I = 0; I < NumElements; I++) {
248 Value *InVal = CGF.Builder.CreateExtractElement(Vec: Op0, Idx: I);
249 Value *Unpack = CGF.Builder.CreateIntrinsic(
250 RetTy: UnpackType, ID: Intrinsic::spv_unpackhalf2x16, Args: ArrayRef<Value *>{InVal});
251 Value *Res = CGF.Builder.CreateExtractElement(Vec: Unpack, Idx: (uint64_t)0);
252 Result = CGF.Builder.CreateInsertElement(Vec: Result, NewElt: Res, Idx: I);
253 }
254 return Result;
255 }
256
257 llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
258}
259
260static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
261 const CallExpr *E) {
262 Value *Op0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
263 QualType Op0Ty = E->getArg(Arg: 0)->getType();
264 llvm::Type *ResType = CGF.IntTy;
265 uint64_t NumElements = 0;
266 if (Op0->getType()->isVectorTy()) {
267 NumElements =
268 E->getArg(Arg: 0)->getType()->castAs<clang::VectorType>()->getNumElements();
269 ResType =
270 llvm::VectorType::get(ElementType: ResType, EC: ElementCount::getFixed(MinVal: NumElements));
271 }
272 if (!Op0Ty->hasFloatingRepresentation())
273 llvm_unreachable("f32tof16 operand must have a float representation");
274
275 if (CGF.CGM.getTriple().isDXIL())
276 return CGF.Builder.CreateIntrinsic(RetTy: ResType, ID: Intrinsic::dx_legacyf32tof16,
277 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
278 Name: "hlsl.f32tof16");
279
280 if (CGF.CGM.getTriple().isSPIRV()) {
281 // We use the SPIRV PackHalf2x16 operation to avoid the need for the
282 // Int16 and Float16 capabilities
283 auto *PackType =
284 llvm::VectorType::get(ElementType: CGF.FloatTy, EC: ElementCount::getFixed(MinVal: 2));
285
286 if (NumElements == 0) {
287 // a scalar input - simply insert the scalar in the first element
288 // of the 2 element float vector
289 Value *Float2 = Constant::getNullValue(Ty: PackType);
290 Float2 = CGF.Builder.CreateInsertElement(Vec: Float2, NewElt: Op0, Idx: (uint64_t)0);
291 Value *Result = CGF.Builder.CreateIntrinsic(
292 RetTy: ResType, ID: Intrinsic::spv_packhalf2x16, Args: ArrayRef<Value *>{Float2});
293 return Result;
294 }
295
296 // a vector input - build a congruent output vector by iterating through
297 // the input vector calling packhalf2x16 for each element
298 Value *Result = PoisonValue::get(T: ResType);
299 for (uint64_t I = 0; I < NumElements; I++) {
300 Value *Float2 = Constant::getNullValue(Ty: PackType);
301 Value *InVal = CGF.Builder.CreateExtractElement(Vec: Op0, Idx: I);
302 Float2 = CGF.Builder.CreateInsertElement(Vec: Float2, NewElt: InVal, Idx: (uint64_t)0);
303 Value *Res = CGF.Builder.CreateIntrinsic(
304 RetTy: CGF.IntTy, ID: Intrinsic::spv_packhalf2x16, Args: ArrayRef<Value *>{Float2});
305 Result = CGF.Builder.CreateInsertElement(Vec: Result, NewElt: Res, Idx: I);
306 }
307 return Result;
308 }
309
310 llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
311}
312
313static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
314 LValue &Stride) {
315 // Figure out the stride of the buffer elements from the handle type.
316 auto *HandleTy =
317 cast<HLSLAttributedResourceType>(Val: HandleExpr->getType().getTypePtr());
318 QualType ElementTy = HandleTy->getContainedType();
319 Value *StrideValue = CGF->getTypeSize(Ty: ElementTy);
320 return CGF->Builder.CreateStore(Val: StrideValue, Addr: Stride.getAddress());
321}
322
323// Return dot product intrinsic that corresponds to the QT scalar type
324static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
325 if (QT->isFloatingType())
326 return RT.getFDotIntrinsic();
327 if (QT->isSignedIntegerType())
328 return RT.getSDotIntrinsic();
329 assert(QT->isUnsignedIntegerType());
330 return RT.getUDotIntrinsic();
331}
332
333static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
334 if (QT->hasSignedIntegerRepresentation()) {
335 return RT.getFirstBitSHighIntrinsic();
336 }
337
338 assert(QT->hasUnsignedIntegerRepresentation());
339 return RT.getFirstBitUHighIntrinsic();
340}
341
342// Return wave active sum that corresponds to the QT scalar type
343static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
344 QualType QT) {
345 switch (Arch) {
346 case llvm::Triple::spirv:
347 return Intrinsic::spv_wave_reduce_sum;
348 case llvm::Triple::dxil: {
349 if (QT->isUnsignedIntegerType())
350 return Intrinsic::dx_wave_reduce_usum;
351 return Intrinsic::dx_wave_reduce_sum;
352 }
353 default:
354 llvm_unreachable("Intrinsic WaveActiveSum"
355 " not supported by target architecture");
356 }
357}
358
359// Return wave active product that corresponds to the QT scalar type
360static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch,
361 QualType QT) {
362 switch (Arch) {
363 case llvm::Triple::spirv:
364 return Intrinsic::spv_wave_product;
365 case llvm::Triple::dxil: {
366 if (QT->isUnsignedIntegerType())
367 return Intrinsic::dx_wave_uproduct;
368 return Intrinsic::dx_wave_product;
369 }
370 default:
371 llvm_unreachable("Intrinsic WaveActiveProduct"
372 " not supported by target architecture");
373 }
374}
375
376static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch) {
377 switch (Arch) {
378 case llvm::Triple::spirv:
379 return Intrinsic::spv_subgroup_prefix_bit_count;
380 case llvm::Triple::dxil: {
381 return Intrinsic::dx_wave_prefix_bit_count;
382 }
383 default:
384 llvm_unreachable(
385 "WavePrefixOp instruction not supported by target architecture");
386 }
387}
388
389// Return wave prefix sum that corresponds to the QT scalar type
390static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch,
391 QualType QT) {
392 switch (Arch) {
393 case llvm::Triple::spirv:
394 return Intrinsic::spv_wave_prefix_sum;
395 case llvm::Triple::dxil: {
396 if (QT->isUnsignedIntegerType())
397 return Intrinsic::dx_wave_prefix_usum;
398 return Intrinsic::dx_wave_prefix_sum;
399 }
400 default:
401 llvm_unreachable("Intrinsic WavePrefixSum"
402 " not supported by target architecture");
403 }
404}
405
406// Return wave prefix product that corresponds to the QT scalar type
407static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch,
408 QualType QT) {
409 switch (Arch) {
410 case llvm::Triple::spirv:
411 return Intrinsic::spv_wave_prefix_product;
412 case llvm::Triple::dxil: {
413 if (QT->isUnsignedIntegerType())
414 return Intrinsic::dx_wave_prefix_uproduct;
415 return Intrinsic::dx_wave_prefix_product;
416 }
417 default:
418 llvm_unreachable("Intrinsic WavePrefixProduct"
419 " not supported by target architecture");
420 }
421}
422
423// Returns the mangled name for a builtin function that the SPIR-V backend
424// will expand into a spec Constant.
425static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
426 ASTContext &Context) {
427 // The parameter types for our conceptual intrinsic function.
428 QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType};
429
430 // Create a temporary FunctionDecl for the builtin fuction. It won't be
431 // added to the AST.
432 FunctionProtoType::ExtProtoInfo EPI;
433 QualType FnType =
434 Context.getFunctionType(ResultTy: SpecConstantType, Args: ClangParamTypes, EPI);
435 DeclarationName FuncName = &Context.Idents.get(Name: "__spirv_SpecConstant");
436 FunctionDecl *FnDeclForMangling = FunctionDecl::Create(
437 C&: Context, DC: Context.getTranslationUnitDecl(), StartLoc: SourceLocation(),
438 NLoc: SourceLocation(), N: FuncName, T: FnType, /*TSI=*/TInfo: nullptr, SC: SC_Extern);
439
440 // Attach the created parameter declarations to the function declaration.
441 SmallVector<ParmVarDecl *, 2> ParamDecls;
442 for (QualType ParamType : ClangParamTypes) {
443 ParmVarDecl *PD = ParmVarDecl::Create(
444 C&: Context, DC: FnDeclForMangling, StartLoc: SourceLocation(), IdLoc: SourceLocation(),
445 /*IdentifierInfo*/ Id: nullptr, T: ParamType, /*TSI*/ TInfo: nullptr, S: SC_None,
446 /*DefaultArg*/ DefArg: nullptr);
447 ParamDecls.push_back(Elt: PD);
448 }
449 FnDeclForMangling->setParams(ParamDecls);
450
451 // Get the mangled name.
452 std::string Name;
453 llvm::raw_string_ostream MangledNameStream(Name);
454 std::unique_ptr<MangleContext> Mangler(Context.createMangleContext());
455 Mangler->mangleName(GD: FnDeclForMangling, MangledNameStream);
456 MangledNameStream.flush();
457
458 return Name;
459}
460
461static const HLSLAttributedResourceType *
462getHandleAttributedType(QualType HandleQT) {
463 if (const auto *RT = HandleQT->getAs<HLSLAttributedResourceType>())
464 return RT;
465 // If the expr is a texture/sampler record (or similar), peel to __handle.
466 if (const HLSLAttributedResourceType *RT =
467 HLSLAttributedResourceType::findHandleTypeOnResource(
468 RT: HandleQT.getTypePtr()))
469 return RT;
470 llvm_unreachable("attributed handle type not found");
471}
472
473static const HLSLAttributedResourceType *
474getRequiredHandleType(const CallExpr *E, unsigned ArgNo) {
475 return getHandleAttributedType(HandleQT: E->getArg(Arg: ArgNo)->getType());
476}
477
478static llvm::Type *getOffsetType(CodeGenModule &CGM,
479 const HLSLAttributedResourceType *RT) {
480 const auto &Attrs = RT->getAttrs();
481 unsigned OffsetSize =
482 clang::hlsl::getResourceDimensions(Dim: Attrs.ResourceDimension);
483 llvm::Type *Int32Ty = CGM.Int32Ty;
484 if (OffsetSize == 1)
485 return Int32Ty;
486 return llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: OffsetSize);
487}
488
489static Value *emitHlslOffset(CodeGenFunction &CGF, const CallExpr *E,
490 unsigned OffsetArgIndex, llvm::Type *OffsetTy) {
491 if (E->getNumArgs() > OffsetArgIndex)
492 return CGF.EmitScalarExpr(E: E->getArg(Arg: OffsetArgIndex));
493
494 return llvm::Constant::getNullValue(Ty: OffsetTy);
495}
496
497static Value *emitHlslClamp(CodeGenFunction &CGF, const CallExpr *E,
498 unsigned ClampArgIndex) {
499 Value *Clamp = CGF.EmitScalarExpr(E: E->getArg(Arg: ClampArgIndex));
500 // The builtin is defined with variadic arguments, so the clamp parameter
501 // might have been promoted to double. The intrinsic requires a 32-bit
502 // float.
503 if (Clamp->getType() != CGF.Builder.getFloatTy())
504 Clamp = CGF.Builder.CreateFPCast(V: Clamp, DestTy: CGF.Builder.getFloatTy());
505 return Clamp;
506}
507
508static Value *emitGetDimensions(CodeGenFunction &CGF, const CallExpr *E,
509 unsigned IntrinsicID, unsigned NumRetComps,
510 bool HasLod) {
511 Value *Handle = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
512
513 SmallVector<Value *> Args{Handle};
514 if (HasLod)
515 Args.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 1)));
516
517 Value *DimValue =
518 CGF.Builder.CreateIntrinsic(ID: IntrinsicID, OverloadTypes: {Handle->getType()}, Args);
519
520 Value *LastStore = nullptr;
521 unsigned ArgIndex = HasLod ? 2 : 1;
522 for (unsigned i = 0; i < NumRetComps; ++i) {
523 const Expr *Arg = E->getArg(Arg: ArgIndex++);
524 LValue DimOut = CGF.EmitLValue(E: Arg);
525 Value *Elem = DimValue;
526 if (NumRetComps > 1)
527 Elem = CGF.Builder.CreateExtractElement(Vec: DimValue, Idx: i);
528
529 // Handle float casting if needed
530 if (Arg->getType()->isFloatingType())
531 Elem = CGF.Builder.CreateUIToFP(
532 V: Elem, DestTy: llvm::Type::getFloatTy(C&: CGF.getLLVMContext()));
533
534 LastStore = CGF.Builder.CreateStore(Val: Elem, Addr: DimOut.getAddress());
535 }
536 return LastStore;
537}
538
539Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
540 const CallExpr *E,
541 ReturnValueSlot ReturnValue) {
542 if (!getLangOpts().HLSL)
543 return nullptr;
544
545 switch (BuiltinID) {
546 case Builtin::BI__builtin_hlsl_adduint64: {
547 Value *OpA = EmitScalarExpr(E: E->getArg(Arg: 0));
548 Value *OpB = EmitScalarExpr(E: E->getArg(Arg: 1));
549 QualType Arg0Ty = E->getArg(Arg: 0)->getType();
550 uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
551 assert(Arg0Ty == E->getArg(1)->getType() &&
552 "AddUint64 operand types must match");
553 assert(Arg0Ty->hasIntegerRepresentation() &&
554 "AddUint64 operands must have an integer representation");
555 assert((NumElements == 2 || NumElements == 4) &&
556 "AddUint64 operands must have 2 or 4 elements");
557
558 llvm::Value *LowA;
559 llvm::Value *HighA;
560 llvm::Value *LowB;
561 llvm::Value *HighB;
562
563 // Obtain low and high words of inputs A and B
564 if (NumElements == 2) {
565 LowA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)0, Name: "LowA");
566 HighA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)1, Name: "HighA");
567 LowB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)0, Name: "LowB");
568 HighB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)1, Name: "HighB");
569 } else {
570 LowA = Builder.CreateShuffleVector(V: OpA, Mask: {0, 2}, Name: "LowA");
571 HighA = Builder.CreateShuffleVector(V: OpA, Mask: {1, 3}, Name: "HighA");
572 LowB = Builder.CreateShuffleVector(V: OpB, Mask: {0, 2}, Name: "LowB");
573 HighB = Builder.CreateShuffleVector(V: OpB, Mask: {1, 3}, Name: "HighB");
574 }
575
576 // Use an uadd_with_overflow to compute the sum of low words and obtain a
577 // carry value
578 llvm::Value *Carry;
579 llvm::Value *LowSum = EmitOverflowIntrinsic(
580 CGF&: *this, IntrinsicID: Intrinsic::uadd_with_overflow, X: LowA, Y: LowB, Carry);
581 llvm::Value *ZExtCarry =
582 Builder.CreateZExt(V: Carry, DestTy: HighA->getType(), Name: "CarryZExt");
583
584 // Sum the high words and the carry
585 llvm::Value *HighSum = Builder.CreateAdd(LHS: HighA, RHS: HighB, Name: "HighSum");
586 llvm::Value *HighSumPlusCarry =
587 Builder.CreateAdd(LHS: HighSum, RHS: ZExtCarry, Name: "HighSumPlusCarry");
588
589 if (NumElements == 4) {
590 return Builder.CreateShuffleVector(V1: LowSum, V2: HighSumPlusCarry, Mask: {0, 2, 1, 3},
591 Name: "hlsl.AddUint64");
592 }
593
594 llvm::Value *Result = PoisonValue::get(T: OpA->getType());
595 Result = Builder.CreateInsertElement(Vec: Result, NewElt: LowSum, Idx: (uint64_t)0,
596 Name: "hlsl.AddUint64.upto0");
597 Result = Builder.CreateInsertElement(Vec: Result, NewElt: HighSumPlusCarry, Idx: (uint64_t)1,
598 Name: "hlsl.AddUint64");
599 return Result;
600 }
601 case Builtin::BI__builtin_hlsl_resource_getpointer:
602 case Builtin::BI__builtin_hlsl_resource_getpointer_typed: {
603 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
604 bool IsIndexed =
605 BuiltinID == Builtin::BI__builtin_hlsl_resource_getpointer_typed ||
606 E->getNumArgs() > 1;
607
608 llvm::Type *RetTy = ConvertType(T: E->getType());
609 llvm::Function *IntrFn = nullptr;
610 llvm::CallInst *CI = nullptr;
611 if (IsIndexed) {
612 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 1));
613 IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
614 M: &CGM.getModule(),
615 id: CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
616 OverloadTys: {RetTy, HandleOp->getType(), IndexOp->getType()});
617 CI = EmitRuntimeCall(callee: IntrFn, args: {HandleOp, IndexOp});
618 } else {
619 IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
620 M: &CGM.getModule(),
621 id: CGM.getHLSLRuntime().getCreateResourceGetBasePointerIntrinsic(),
622 OverloadTys: {RetTy, HandleOp->getType()});
623 CI = EmitRuntimeCall(callee: IntrFn, args: {HandleOp});
624 }
625 CI->setCallingConv(IntrFn->getCallingConv());
626 return CI;
627 }
628 case Builtin::BI__builtin_hlsl_resource_sample: {
629 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
630 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
631 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
632 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
633
634 SmallVector<Value *, 4> Args;
635 Args.push_back(Elt: HandleOp);
636 Args.push_back(Elt: SamplerOp);
637 Args.push_back(Elt: CoordOp);
638 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 3, OffsetTy: getOffsetType(CGM, RT)));
639
640 llvm::Type *RetTy = ConvertType(T: E->getType());
641 if (E->getNumArgs() <= 4) {
642 return Builder.CreateIntrinsic(
643 RetTy, ID: CGM.getHLSLRuntime().getSampleIntrinsic(), Args);
644 }
645
646 Args.push_back(Elt: emitHlslClamp(CGF&: *this, E, ClampArgIndex: 4));
647 return Builder.CreateIntrinsic(
648 RetTy, ID: CGM.getHLSLRuntime().getSampleClampIntrinsic(), Args);
649 }
650 case Builtin::BI__builtin_hlsl_resource_sample_bias: {
651 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
652 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
653 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
654 Value *BiasOp = EmitScalarExpr(E: E->getArg(Arg: 3));
655 if (BiasOp->getType() != Builder.getFloatTy())
656 BiasOp = Builder.CreateFPCast(V: BiasOp, DestTy: Builder.getFloatTy());
657 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
658
659 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleBias
660 Args.push_back(Elt: HandleOp);
661 Args.push_back(Elt: SamplerOp);
662 Args.push_back(Elt: CoordOp);
663 Args.push_back(Elt: BiasOp);
664 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 4, OffsetTy: getOffsetType(CGM, RT)));
665
666 llvm::Type *RetTy = ConvertType(T: E->getType());
667 if (E->getNumArgs() <= 5)
668 return Builder.CreateIntrinsic(
669 RetTy, ID: CGM.getHLSLRuntime().getSampleBiasIntrinsic(), Args);
670
671 Args.push_back(Elt: emitHlslClamp(CGF&: *this, E, ClampArgIndex: 5));
672 return Builder.CreateIntrinsic(
673 RetTy, ID: CGM.getHLSLRuntime().getSampleBiasClampIntrinsic(), Args);
674 }
675 case Builtin::BI__builtin_hlsl_resource_sample_grad: {
676 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
677 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
678 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
679 Value *DDXOp = EmitScalarExpr(E: E->getArg(Arg: 3));
680 Value *DDYOp = EmitScalarExpr(E: E->getArg(Arg: 4));
681 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
682
683 SmallVector<Value *, 7> Args;
684 Args.push_back(Elt: HandleOp);
685 Args.push_back(Elt: SamplerOp);
686 Args.push_back(Elt: CoordOp);
687 Args.push_back(Elt: DDXOp);
688 Args.push_back(Elt: DDYOp);
689 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 5, OffsetTy: getOffsetType(CGM, RT)));
690
691 llvm::Type *RetTy = ConvertType(T: E->getType());
692
693 if (E->getNumArgs() <= 6) {
694 return Builder.CreateIntrinsic(
695 RetTy, ID: CGM.getHLSLRuntime().getSampleGradIntrinsic(), Args);
696 }
697
698 Args.push_back(Elt: emitHlslClamp(CGF&: *this, E, ClampArgIndex: 6));
699 return Builder.CreateIntrinsic(
700 RetTy, ID: CGM.getHLSLRuntime().getSampleGradClampIntrinsic(), Args);
701 }
702 case Builtin::BI__builtin_hlsl_resource_sample_level: {
703 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
704 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
705 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
706 Value *LODOp = EmitScalarExpr(E: E->getArg(Arg: 3));
707 if (LODOp->getType() != Builder.getFloatTy())
708 LODOp = Builder.CreateFPCast(V: LODOp, DestTy: Builder.getFloatTy());
709 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
710
711 SmallVector<Value *, 5> Args; // Max 5 arguments for SampleLevel
712 Args.push_back(Elt: HandleOp);
713 Args.push_back(Elt: SamplerOp);
714 Args.push_back(Elt: CoordOp);
715 Args.push_back(Elt: LODOp);
716 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 4, OffsetTy: getOffsetType(CGM, RT)));
717
718 llvm::Type *RetTy = ConvertType(T: E->getType());
719 return Builder.CreateIntrinsic(
720 RetTy, ID: CGM.getHLSLRuntime().getSampleLevelIntrinsic(), Args);
721 }
722 case Builtin::BI__builtin_hlsl_resource_load_level: {
723 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
724 Value *CoordLODOp = EmitScalarExpr(E: E->getArg(Arg: 1));
725
726 auto *CoordLODVecTy = cast<llvm::FixedVectorType>(Val: CoordLODOp->getType());
727 unsigned NumElts = CoordLODVecTy->getNumElements();
728 assert(NumElts >= 2 && "CoordLOD must have at least 2 elements");
729
730 // Split CoordLOD into Coord and LOD
731 SmallVector<int, 4> Mask;
732 for (unsigned I = 0; I < NumElts - 1; ++I)
733 Mask.push_back(Elt: I);
734
735 Value *CoordOp =
736 Builder.CreateShuffleVector(V: CoordLODOp, Mask, Name: "hlsl.load.coord");
737 Value *LODOp =
738 Builder.CreateExtractElement(Vec: CoordLODOp, Idx: NumElts - 1, Name: "hlsl.load.lod");
739 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
740
741 SmallVector<Value *, 4> Args;
742 Args.push_back(Elt: HandleOp);
743 Args.push_back(Elt: CoordOp);
744 Args.push_back(Elt: LODOp);
745 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 2, OffsetTy: getOffsetType(CGM, RT)));
746
747 llvm::Type *RetTy = ConvertType(T: E->getType());
748 return Builder.CreateIntrinsic(
749 RetTy, ID: CGM.getHLSLRuntime().getLoadLevelIntrinsic(), Args);
750 }
751 case Builtin::BI__builtin_hlsl_resource_sample_cmp: {
752 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
753 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
754 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
755 Value *CmpOp = EmitScalarExpr(E: E->getArg(Arg: 3));
756 if (CmpOp->getType() != Builder.getFloatTy())
757 CmpOp = Builder.CreateFPCast(V: CmpOp, DestTy: Builder.getFloatTy());
758 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
759
760 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleCmp
761 Args.push_back(Elt: HandleOp);
762 Args.push_back(Elt: SamplerOp);
763 Args.push_back(Elt: CoordOp);
764 Args.push_back(Elt: CmpOp);
765 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 4, OffsetTy: getOffsetType(CGM, RT)));
766
767 llvm::Type *RetTy = ConvertType(T: E->getType());
768 if (E->getNumArgs() <= 5) {
769 return Builder.CreateIntrinsic(
770 RetTy, ID: CGM.getHLSLRuntime().getSampleCmpIntrinsic(), Args);
771 }
772
773 Args.push_back(Elt: emitHlslClamp(CGF&: *this, E, ClampArgIndex: 5));
774 return Builder.CreateIntrinsic(
775 RetTy, ID: CGM.getHLSLRuntime().getSampleCmpClampIntrinsic(), Args);
776 }
777 case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero: {
778 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
779 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
780 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
781 Value *CmpOp = EmitScalarExpr(E: E->getArg(Arg: 3));
782 if (CmpOp->getType() != Builder.getFloatTy())
783 CmpOp = Builder.CreateFPCast(V: CmpOp, DestTy: Builder.getFloatTy());
784 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
785
786 SmallVector<Value *, 5> Args;
787 Args.push_back(Elt: HandleOp);
788 Args.push_back(Elt: SamplerOp);
789 Args.push_back(Elt: CoordOp);
790 Args.push_back(Elt: CmpOp);
791 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 4, OffsetTy: getOffsetType(CGM, RT)));
792
793 llvm::Type *RetTy = ConvertType(T: E->getType());
794 return Builder.CreateIntrinsic(
795 RetTy, ID: CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args);
796 }
797 case Builtin::BI__builtin_hlsl_resource_calculate_lod: {
798 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
799 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
800 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
801
802 return Builder.CreateIntrinsic(
803 RetTy: ConvertType(T: E->getType()),
804 ID: CGM.getHLSLRuntime().getCalculateLodIntrinsic(),
805 Args: {HandleOp, SamplerOp, CoordOp});
806 }
807 case Builtin::BI__builtin_hlsl_resource_calculate_lod_unclamped: {
808 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
809 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
810 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
811
812 return Builder.CreateIntrinsic(
813 RetTy: ConvertType(T: E->getType()),
814 ID: CGM.getHLSLRuntime().getCalculateLodUnclampedIntrinsic(),
815 Args: {HandleOp, SamplerOp, CoordOp});
816 }
817 case Builtin::BI__builtin_hlsl_resource_gather: {
818 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
819 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
820 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
821 Value *ComponentOp = EmitScalarExpr(E: E->getArg(Arg: 3));
822 if (ComponentOp->getType() != Builder.getInt32Ty())
823 ComponentOp = Builder.CreateIntCast(V: ComponentOp, DestTy: Builder.getInt32Ty(),
824 /*isSigned=*/false);
825 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
826
827 SmallVector<Value *, 5> Args;
828 Args.push_back(Elt: HandleOp);
829 Args.push_back(Elt: SamplerOp);
830 Args.push_back(Elt: CoordOp);
831 Args.push_back(Elt: ComponentOp);
832 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 4, OffsetTy: getOffsetType(CGM, RT)));
833
834 llvm::Type *RetTy = ConvertType(T: E->getType());
835 return Builder.CreateIntrinsic(
836 RetTy, ID: CGM.getHLSLRuntime().getGatherIntrinsic(), Args);
837 }
838 case Builtin::BI__builtin_hlsl_resource_gather_cmp: {
839 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
840 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
841 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
842 Value *CompareOp = EmitScalarExpr(E: E->getArg(Arg: 3));
843 if (CompareOp->getType() != Builder.getFloatTy())
844 CompareOp = Builder.CreateFPCast(V: CompareOp, DestTy: Builder.getFloatTy());
845
846 SmallVector<Value *, 6> Args;
847 Args.push_back(Elt: HandleOp);
848 Args.push_back(Elt: SamplerOp);
849 Args.push_back(Elt: CoordOp);
850 Args.push_back(Elt: CompareOp);
851
852 if (CGM.getTarget().getTriple().isDXIL()) {
853 Value *ComponentOp = EmitScalarExpr(E: E->getArg(Arg: 4));
854 if (ComponentOp->getType() != Builder.getInt32Ty())
855 ComponentOp = Builder.CreateIntCast(V: ComponentOp, DestTy: Builder.getInt32Ty(),
856 /*isSigned=*/false);
857 Args.push_back(Elt: ComponentOp);
858 }
859
860 const HLSLAttributedResourceType *RT = getRequiredHandleType(E, ArgNo: 0);
861 Args.push_back(Elt: emitHlslOffset(CGF&: *this, E, OffsetArgIndex: 5, OffsetTy: getOffsetType(CGM, RT)));
862
863 llvm::Type *RetTy = ConvertType(T: E->getType());
864 return Builder.CreateIntrinsic(
865 RetTy, ID: CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args);
866 }
867 case Builtin::BI__builtin_hlsl_resource_load_with_status:
868 case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: {
869 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
870 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 1));
871
872 // Get the *address* of the status argument to write to it by reference
873 LValue StatusLVal = EmitLValue(E: E->getArg(Arg: 2));
874 Address StatusAddr = StatusLVal.getAddress();
875
876 QualType HandleTy = E->getArg(Arg: 0)->getType();
877 const HLSLAttributedResourceType *RT =
878 HandleTy->getAs<HLSLAttributedResourceType>();
879 assert(CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil &&
880 "Only DXIL currently implements load with status");
881
882 Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
883 ? llvm::Intrinsic::dx_resource_load_rawbuffer
884 : llvm::Intrinsic::dx_resource_load_typedbuffer;
885
886 llvm::Type *DataTy = ConvertType(T: E->getType());
887 llvm::Type *RetTy = llvm::StructType::get(Context&: Builder.getContext(),
888 Elements: {DataTy, Builder.getInt1Ty()});
889
890 SmallVector<Value *, 3> Args;
891 Args.push_back(Elt: HandleOp);
892 Args.push_back(Elt: IndexOp);
893
894 if (RT->isRaw()) {
895 Value *Offset = Builder.getInt32(C: 0);
896 // The offset parameter needs to be poison for ByteAddressBuffer
897 if (!RT->isStructured())
898 Offset = llvm::PoisonValue::get(T: Builder.getInt32Ty());
899 Args.push_back(Elt: Offset);
900 }
901
902 // The load intrinsics give us a (T value, i1 status) pair -
903 // shepherd these into the return value and out reference respectively.
904 Value *ResRet =
905 Builder.CreateIntrinsic(RetTy, ID: IntrID, Args, FMFSource: {}, Name: "ld.struct");
906 Value *LoadedValue = Builder.CreateExtractValue(Agg: ResRet, Idxs: {0}, Name: "ld.value");
907 Value *StatusBit = Builder.CreateExtractValue(Agg: ResRet, Idxs: {1}, Name: "ld.status");
908 Value *ExtendedStatus =
909 Builder.CreateZExt(V: StatusBit, DestTy: Builder.getInt32Ty(), Name: "ld.status.ext");
910 Builder.CreateStore(Val: ExtendedStatus, Addr: StatusAddr);
911
912 return LoadedValue;
913 }
914 case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
915 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
916 return llvm::PoisonValue::get(T: HandleTy);
917 }
918 case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
919 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
920 Value *RegisterOp = EmitScalarExpr(E: E->getArg(Arg: 1));
921 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
922 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 3));
923 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 4));
924 Value *Name = EmitScalarExpr(E: E->getArg(Arg: 5));
925 llvm::Intrinsic::ID IntrinsicID =
926 CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
927 SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp, Name};
928 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
929 }
930 case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
931 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
932 Value *OrderID = EmitScalarExpr(E: E->getArg(Arg: 1));
933 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
934 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 3));
935 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 4));
936 Value *Name = EmitScalarExpr(E: E->getArg(Arg: 5));
937 llvm::Intrinsic::ID IntrinsicID =
938 CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
939 SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
940 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
941 }
942 case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
943 Value *MainHandle = EmitScalarExpr(E: E->getArg(Arg: 0));
944 if (!CGM.getTriple().isSPIRV())
945 return MainHandle;
946
947 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
948 Value *OrderID = EmitScalarExpr(E: E->getArg(Arg: 1));
949 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
950 llvm::Intrinsic::ID IntrinsicID =
951 llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
952 SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
953 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
954 }
955 case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
956 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 0));
957 llvm::Type *RetTy = ConvertType(T: E->getType());
958 return Builder.CreateIntrinsic(
959 RetTy, ID: CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
960 Args: ArrayRef<Value *>{IndexOp});
961 }
962 case Builtin::BI__builtin_hlsl_resource_getdimensions_x:
963 case Builtin::BI__builtin_hlsl_resource_getdimensions_x_float:
964 return emitGetDimensions(CGF&: *this, E,
965 IntrinsicID: CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
966 NumRetComps: 1, /*HasLod=*/false);
967 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy:
968 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy_float:
969 return emitGetDimensions(CGF&: *this, E,
970 IntrinsicID: CGM.getHLSLRuntime().getGetDimensionsXYIntrinsic(),
971 NumRetComps: 2, /*HasLod=*/false);
972 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy:
973 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy_float:
974 return emitGetDimensions(
975 CGF&: *this, E, IntrinsicID: CGM.getHLSLRuntime().getGetDimensionsLevelsXYIntrinsic(), NumRetComps: 3,
976 /*HasLod=*/true);
977 case Builtin::BI__builtin_hlsl_resource_getstride: {
978 LValue Stride = EmitLValue(E: E->getArg(Arg: 1));
979 return emitBufferStride(CGF: this, HandleExpr: E->getArg(Arg: 0), Stride);
980 }
981 case Builtin::BI__builtin_hlsl_all: {
982 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
983 return Builder.CreateIntrinsic(
984 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
985 ID: CGM.getHLSLRuntime().getAllIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
986 Name: "hlsl.all");
987 }
988 case Builtin::BI__builtin_hlsl_and: {
989 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
990 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
991 return Builder.CreateAnd(LHS: Op0, RHS: Op1, Name: "hlsl.and");
992 }
993 case Builtin::BI__builtin_hlsl_or: {
994 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
995 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
996 return Builder.CreateOr(LHS: Op0, RHS: Op1, Name: "hlsl.or");
997 }
998 case Builtin::BI__builtin_hlsl_any: {
999 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1000 return Builder.CreateIntrinsic(
1001 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
1002 ID: CGM.getHLSLRuntime().getAnyIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1003 Name: "hlsl.any");
1004 }
1005 case Builtin::BI__builtin_hlsl_asdouble:
1006 return handleAsDoubleBuiltin(CGF&: *this, E);
1007 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
1008 Value *OpX = EmitScalarExpr(E: E->getArg(Arg: 0));
1009 Value *OpMin = EmitScalarExpr(E: E->getArg(Arg: 1));
1010 Value *OpMax = EmitScalarExpr(E: E->getArg(Arg: 2));
1011
1012 QualType Ty = E->getArg(Arg: 0)->getType();
1013 if (auto *VecTy = Ty->getAs<VectorType>())
1014 Ty = VecTy->getElementType();
1015
1016 Intrinsic::ID Intr;
1017 if (Ty->isFloatingType()) {
1018 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
1019 } else if (Ty->isUnsignedIntegerType()) {
1020 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
1021 } else {
1022 assert(Ty->isSignedIntegerType());
1023 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
1024 }
1025 return Builder.CreateIntrinsic(
1026 /*ReturnType=*/RetTy: OpX->getType(), ID: Intr,
1027 Args: ArrayRef<Value *>{OpX, OpMin, OpMax}, FMFSource: nullptr, Name: "hlsl.clamp");
1028 }
1029 case Builtin::BI__builtin_hlsl_crossf16:
1030 case Builtin::BI__builtin_hlsl_crossf32: {
1031 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1032 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
1033 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1034 E->getArg(1)->getType()->hasFloatingRepresentation() &&
1035 "cross operands must have a float representation");
1036 // make sure each vector has exactly 3 elements
1037 assert(
1038 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1039 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1040 "input vectors must have 3 elements each");
1041 return Builder.CreateIntrinsic(
1042 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getCrossIntrinsic(),
1043 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.cross");
1044 }
1045 case Builtin::BI__builtin_hlsl_dot: {
1046 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1047 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
1048 llvm::Type *T0 = Op0->getType();
1049 llvm::Type *T1 = Op1->getType();
1050
1051 // If the arguments are scalars, just emit a multiply
1052 if (!T0->isVectorTy() && !T1->isVectorTy()) {
1053 if (T0->isFloatingPointTy())
1054 return Builder.CreateFMul(L: Op0, R: Op1, Name: "hlsl.dot");
1055
1056 if (T0->isIntegerTy())
1057 return Builder.CreateMul(LHS: Op0, RHS: Op1, Name: "hlsl.dot");
1058
1059 llvm_unreachable(
1060 "Scalar dot product is only supported on ints and floats.");
1061 }
1062 // For vectors, validate types and emit the appropriate intrinsic
1063 assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
1064 E->getArg(1)->getType()) &&
1065 "Dot product operands must have the same type.");
1066
1067 auto *VecTy0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
1068 assert(VecTy0 && "Dot product argument must be a vector.");
1069
1070 return Builder.CreateIntrinsic(
1071 /*ReturnType=*/RetTy: T0->getScalarType(),
1072 ID: getDotProductIntrinsic(RT&: CGM.getHLSLRuntime(), QT: VecTy0->getElementType()),
1073 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.dot");
1074 }
1075 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
1076 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1077 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
1078 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
1079
1080 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
1081 // Note that the argument order disagrees between the builtin and the
1082 // intrinsic here.
1083 return Builder.CreateIntrinsic(
1084 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
1085 FMFSource: nullptr, Name: "hlsl.dot4add.i8packed");
1086 }
1087 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
1088 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1089 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
1090 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
1091
1092 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
1093 // Note that the argument order disagrees between the builtin and the
1094 // intrinsic here.
1095 return Builder.CreateIntrinsic(
1096 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
1097 FMFSource: nullptr, Name: "hlsl.dot4add.u8packed");
1098 }
1099 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1100 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1101
1102 return Builder.CreateIntrinsic(
1103 /*ReturnType=*/RetTy: ConvertType(T: E->getType()),
1104 ID: getFirstBitHighIntrinsic(RT&: CGM.getHLSLRuntime(), QT: E->getArg(Arg: 0)->getType()),
1105 Args: ArrayRef<Value *>{X}, FMFSource: nullptr, Name: "hlsl.firstbithigh");
1106 }
1107 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
1108 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1109
1110 return Builder.CreateIntrinsic(
1111 /*ReturnType=*/RetTy: ConvertType(T: E->getType()),
1112 ID: CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), Args: ArrayRef<Value *>{X},
1113 FMFSource: nullptr, Name: "hlsl.firstbitlow");
1114 }
1115 case Builtin::BI__builtin_hlsl_lerp: {
1116 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1117 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
1118 Value *S = EmitScalarExpr(E: E->getArg(Arg: 2));
1119 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1120 llvm_unreachable("lerp operand must have a float representation");
1121 return Builder.CreateIntrinsic(
1122 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getLerpIntrinsic(),
1123 Args: ArrayRef<Value *>{X, Y, S}, FMFSource: nullptr, Name: "hlsl.lerp");
1124 }
1125 case Builtin::BI__builtin_hlsl_normalize: {
1126 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1127
1128 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1129 "normalize operand must have a float representation");
1130
1131 return Builder.CreateIntrinsic(
1132 /*ReturnType=*/RetTy: X->getType(),
1133 ID: CGM.getHLSLRuntime().getNormalizeIntrinsic(), Args: ArrayRef<Value *>{X},
1134 FMFSource: nullptr, Name: "hlsl.normalize");
1135 }
1136 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
1137 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
1138
1139 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1140 "degree operand must have a float representation");
1141
1142 return Builder.CreateIntrinsic(
1143 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getDegreesIntrinsic(),
1144 Args: ArrayRef<Value *>{X}, FMFSource: nullptr, Name: "hlsl.degrees");
1145 }
1146 case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
1147 return handleElementwiseF16ToF32(CGF&: *this, E);
1148 }
1149 case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
1150 return handleElementwiseF32ToF16(CGF&: *this, E);
1151 }
1152 case Builtin::BI__builtin_hlsl_elementwise_frac: {
1153 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1154 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1155 llvm_unreachable("frac operand must have a float representation");
1156 return Builder.CreateIntrinsic(
1157 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getFracIntrinsic(),
1158 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.frac");
1159 }
1160 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1161 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1162 llvm::Type *Xty = Op0->getType();
1163 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
1164 if (Xty->isVectorTy()) {
1165 auto *XVecTy = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
1166 retType = llvm::VectorType::get(
1167 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
1168 }
1169 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1170 llvm_unreachable("isinf operand must have a float representation");
1171 return Builder.CreateIntrinsic(
1172 RetTy: retType, ID: CGM.getHLSLRuntime().getIsInfIntrinsic(),
1173 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.isinf");
1174 }
1175 case Builtin::BI__builtin_hlsl_elementwise_isnan: {
1176 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1177 llvm::Type *Xty = Op0->getType();
1178 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
1179 if (Xty->isVectorTy()) {
1180 auto *XVecTy = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
1181 retType = llvm::VectorType::get(
1182 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
1183 }
1184 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1185 llvm_unreachable("isnan operand must have a float representation");
1186 return Builder.CreateIntrinsic(
1187 RetTy: retType, ID: CGM.getHLSLRuntime().getIsNaNIntrinsic(),
1188 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.isnan");
1189 }
1190 case Builtin::BI__builtin_hlsl_mad: {
1191 Value *M = EmitScalarExpr(E: E->getArg(Arg: 0));
1192 Value *A = EmitScalarExpr(E: E->getArg(Arg: 1));
1193 Value *B = EmitScalarExpr(E: E->getArg(Arg: 2));
1194 if (E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1195 return Builder.CreateIntrinsic(
1196 /*ReturnType*/ RetTy: M->getType(), ID: Intrinsic::fmuladd,
1197 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "hlsl.fmad");
1198
1199 if (E->getArg(Arg: 0)->getType()->hasSignedIntegerRepresentation()) {
1200 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1201 return Builder.CreateIntrinsic(
1202 /*ReturnType*/ RetTy: M->getType(), ID: Intrinsic::dx_imad,
1203 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "dx.imad");
1204
1205 Value *Mul = Builder.CreateNSWMul(LHS: M, RHS: A);
1206 return Builder.CreateNSWAdd(LHS: Mul, RHS: B);
1207 }
1208 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
1209 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1210 return Builder.CreateIntrinsic(
1211 /*ReturnType=*/RetTy: M->getType(), ID: Intrinsic::dx_umad,
1212 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "dx.umad");
1213
1214 Value *Mul = Builder.CreateNUWMul(LHS: M, RHS: A);
1215 return Builder.CreateNUWAdd(LHS: Mul, RHS: B);
1216 }
1217 case Builtin::BI__builtin_hlsl_mul: {
1218 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1219 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
1220 QualType QTy0 = E->getArg(Arg: 0)->getType();
1221 QualType QTy1 = E->getArg(Arg: 1)->getType();
1222
1223 bool IsVec0 = QTy0->isVectorType();
1224 bool IsVec1 = QTy1->isVectorType();
1225 bool IsMat0 = QTy0->isConstantMatrixType();
1226 bool IsMat1 = QTy1->isConstantMatrixType();
1227
1228 // The matrix multiply intrinsic only operates on column-major order
1229 // matrices. Therefore matrix memory layout transforms must be inserted
1230 // before and after matrix multiply intrinsics.
1231 // Use whichever operand is a matrix to discover its declared layout.
1232 bool IsRowMajorMat0 = IsMat0 && isMatrixRowMajor(LangOpts: getLangOpts(), T: QTy0);
1233 bool IsRowMajorMat1 = IsMat1 && isMatrixRowMajor(LangOpts: getLangOpts(), T: QTy1);
1234
1235 llvm::MatrixBuilder MB(Builder);
1236 if (IsVec0 && IsMat1) {
1237 unsigned N = QTy0->castAs<VectorType>()->getNumElements();
1238 auto *MatTy = QTy1->castAs<ConstantMatrixType>();
1239 unsigned Rows = MatTy->getNumRows();
1240 unsigned Cols = MatTy->getNumColumns();
1241 assert(N == Rows && "vector length must match matrix row count");
1242 if (IsRowMajorMat1)
1243 Op1 = MB.CreateRowMajorToColumnMajorTransform(Matrix: Op1, Rows, Columns: Cols);
1244 return MB.CreateMatrixMultiply(LHS: Op0, RHS: Op1, LHSRows: 1, LHSColumns: N, RHSColumns: Cols, Name: "hlsl.mul");
1245 }
1246 if (IsMat0 && IsVec1) {
1247 auto *MatTy = QTy0->castAs<ConstantMatrixType>();
1248 unsigned Rows = MatTy->getNumRows();
1249 unsigned Cols = MatTy->getNumColumns();
1250 assert(QTy1->castAs<VectorType>()->getNumElements() == Cols &&
1251 "vector length must match matrix column count");
1252 if (IsRowMajorMat0)
1253 Op0 = MB.CreateRowMajorToColumnMajorTransform(Matrix: Op0, Rows, Columns: Cols);
1254 return MB.CreateMatrixMultiply(LHS: Op0, RHS: Op1, LHSRows: Rows, LHSColumns: Cols, RHSColumns: 1, Name: "hlsl.mul");
1255 }
1256 assert(IsMat0 && IsMat1);
1257 auto *MatTy0 = QTy0->castAs<ConstantMatrixType>();
1258 auto *MatTy1 = QTy1->castAs<ConstantMatrixType>();
1259 unsigned Rows0 = MatTy0->getNumRows();
1260 unsigned Rows1 = MatTy1->getNumRows();
1261 unsigned Cols0 = MatTy0->getNumColumns();
1262 unsigned Cols1 = MatTy1->getNumColumns();
1263 assert(Cols0 == Rows1 &&
1264 "inner matrix dimensions must match for multiplication");
1265 if (IsRowMajorMat0)
1266 Op0 = MB.CreateRowMajorToColumnMajorTransform(Matrix: Op0, Rows: Rows0, Columns: Cols0);
1267 if (IsRowMajorMat1)
1268 Op1 = MB.CreateRowMajorToColumnMajorTransform(Matrix: Op1, Rows: Rows1, Columns: Cols1);
1269
1270 Value *Result =
1271 MB.CreateMatrixMultiply(LHS: Op0, RHS: Op1, LHSRows: Rows0, LHSColumns: Cols0, RHSColumns: Cols1, Name: "hlsl.mul");
1272
1273 bool IsResultRowMajor = isMatrixRowMajor(LangOpts: getLangOpts(), T: E->getType());
1274 if (IsResultRowMajor)
1275 Result = MB.CreateColumnMajorToRowMajorTransform(Matrix: Result, Rows: Rows0, Columns: Cols1);
1276 return Result;
1277 }
1278 case Builtin::BI__builtin_hlsl_transpose: {
1279 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1280 auto *MatTy = E->getArg(Arg: 0)->getType()->castAs<ConstantMatrixType>();
1281 unsigned Rows = MatTy->getNumRows();
1282 unsigned Cols = MatTy->getNumColumns();
1283 llvm::MatrixBuilder MB(Builder);
1284 // The correct lowering of a transpose depends on both the source layout
1285 // and the result layout.
1286 bool SrcRowMajor = isMatrixRowMajor(LangOpts: getLangOpts(), T: E->getArg(Arg: 0)->getType());
1287 bool DstRowMajor = isMatrixRowMajor(LangOpts: getLangOpts(), T: E->getType());
1288 // When the source & result layouts differ, the operand already holds the
1289 // transposed result, ie transpose is a no-op on the underlying vector.
1290 if (SrcRowMajor != DstRowMajor)
1291 return Op0;
1292 // When the source and result share a layout, emit a transpose.
1293 if (SrcRowMajor)
1294 // For row-major operands the dimensions are swapped
1295 return MB.CreateMatrixTranspose(Matrix: Op0, Rows: Cols, Columns: Rows);
1296 return MB.CreateMatrixTranspose(Matrix: Op0, Rows, Columns: Cols);
1297 }
1298 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
1299 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1300 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1301 llvm_unreachable("rcp operand must have a float representation");
1302 llvm::Type *Ty = Op0->getType();
1303 llvm::Type *EltTy = Ty->getScalarType();
1304 Constant *One = Ty->isVectorTy()
1305 ? ConstantVector::getSplat(
1306 EC: ElementCount::getFixed(
1307 MinVal: cast<FixedVectorType>(Val: Ty)->getNumElements()),
1308 Elt: ConstantFP::get(Ty: EltTy, V: 1.0))
1309 : ConstantFP::get(Ty: EltTy, V: 1.0);
1310 return Builder.CreateFDiv(L: One, R: Op0, Name: "hlsl.rcp");
1311 }
1312 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
1313 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1314 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1315 llvm_unreachable("rsqrt operand must have a float representation");
1316 return Builder.CreateIntrinsic(
1317 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getRsqrtIntrinsic(),
1318 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.rsqrt");
1319 }
1320 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
1321 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1322 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1323 "saturate operand must have a float representation");
1324 return Builder.CreateIntrinsic(
1325 /*ReturnType=*/RetTy: Op0->getType(),
1326 ID: CGM.getHLSLRuntime().getSaturateIntrinsic(), Args: ArrayRef<Value *>{Op0},
1327 FMFSource: nullptr, Name: "hlsl.saturate");
1328 }
1329 case Builtin::BI__builtin_hlsl_wave_prefix_count_bits: {
1330 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1331 assert(Op->getType()->isIntegerTy(1) &&
1332 "WavePrefixBitCount operand must be a boolean type");
1333
1334 Intrinsic::ID IID =
1335 getPrefixCountBitsIntrinsic(Arch: getTarget().getTriple().getArch());
1336
1337 return EmitIntrinsicCall(ID: IID, Args: ArrayRef{Op}, Name: "hlsl.wave.prefix.bit.count");
1338 }
1339 case Builtin::BI__builtin_hlsl_select: {
1340 Value *OpCond = EmitScalarExpr(E: E->getArg(Arg: 0));
1341 RValue RValTrue = EmitAnyExpr(E: E->getArg(Arg: 1));
1342 Value *OpTrue =
1343 RValTrue.isScalar()
1344 ? RValTrue.getScalarVal()
1345 : Builder.CreateLoad(Addr: RValTrue.getAggregateAddress(), Name: "true_val");
1346 RValue RValFalse = EmitAnyExpr(E: E->getArg(Arg: 2));
1347 Value *OpFalse =
1348 RValFalse.isScalar()
1349 ? RValFalse.getScalarVal()
1350 : Builder.CreateLoad(Addr: RValFalse.getAggregateAddress(), Name: "false_val");
1351 if (auto *VTy = E->getType()->getAs<VectorType>()) {
1352 if (!OpTrue->getType()->isVectorTy())
1353 OpTrue =
1354 Builder.CreateVectorSplat(NumElts: VTy->getNumElements(), V: OpTrue, Name: "splat");
1355 if (!OpFalse->getType()->isVectorTy())
1356 OpFalse =
1357 Builder.CreateVectorSplat(NumElts: VTy->getNumElements(), V: OpFalse, Name: "splat");
1358 }
1359
1360 Value *SelectVal =
1361 Builder.CreateSelect(C: OpCond, True: OpTrue, False: OpFalse, Name: "hlsl.select");
1362 if (!RValTrue.isScalar())
1363 Builder.CreateStore(Val: SelectVal, Addr: ReturnValue.getAddress(),
1364 IsVolatile: ReturnValue.isVolatile());
1365
1366 return SelectVal;
1367 }
1368 case Builtin::BI__builtin_hlsl_step: {
1369 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1370 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
1371 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1372 E->getArg(1)->getType()->hasFloatingRepresentation() &&
1373 "step operands must have a float representation");
1374 return Builder.CreateIntrinsic(
1375 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getStepIntrinsic(),
1376 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.step");
1377 }
1378 case Builtin::BI__builtin_hlsl_wave_active_all_equal: {
1379 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1380
1381 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
1382 return EmitIntrinsicCall(ID, Types: {Op->getType()}, Args: {Op});
1383 }
1384 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
1385 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1386 assert(Op->getType()->isIntegerTy(1) &&
1387 "Intrinsic WaveActiveAllTrue operand must be a bool");
1388
1389 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
1390 return EmitIntrinsicCall(ID, Args: {Op});
1391 }
1392 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
1393 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1394 assert(Op->getType()->isIntegerTy(1) &&
1395 "Intrinsic WaveActiveAnyTrue operand must be a bool");
1396
1397 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
1398 return EmitIntrinsicCall(ID, Args: {Op});
1399 }
1400 case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
1401 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1402 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1403 "Intrinsic WaveActiveBitOr operand must have an unsigned integer "
1404 "representation");
1405
1406 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
1407 return EmitIntrinsicCall(ID, Types: {Op->getType()}, Args: ArrayRef{Op},
1408 Name: "hlsl.wave.active.bit.or");
1409 }
1410 case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
1411 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1412 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1413 "Intrinsic WaveActiveBitXor operand must have an unsigned integer "
1414 "representation");
1415
1416 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
1417 return EmitIntrinsicCall(ID, Types: {Op->getType()}, Args: ArrayRef{Op},
1418 Name: "hlsl.wave.active.bit.xor");
1419 }
1420 case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
1421 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1422 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1423 "Intrinsic WaveActiveBitAnd operand must have an unsigned integer "
1424 "representation");
1425
1426 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
1427 return EmitIntrinsicCall(ID, Types: {Op->getType()}, Args: ArrayRef{Op},
1428 Name: "hlsl.wave.active.bit.and");
1429 }
1430 case Builtin::BI__builtin_hlsl_interlocked_add: {
1431 // HLSL signatures (synthesized as overloads in HLSLExternalSemaSource):
1432 // void InterlockedAdd(groupshared|device T &dest, T value);
1433 // void InterlockedAdd(groupshared|device T &dest, T value,
1434 // T &original_value);
1435 // Both `dest` and `original_value` are plain references, so we can use
1436 // the underlying lvalue directly without HLSLOutArgExpr unwrapping.
1437 LValue DestLV = EmitLValue(E: E->getArg(Arg: 0));
1438 Value *Ptr = DestLV.getAddress().emitRawPointer(CGF&: *this);
1439 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
1440 assert(E->getArg(1)->getType()->isIntegerType() &&
1441 "Intrinsic InterlockedAdd value operand must be an integer");
1442
1443 Intrinsic::ID ID = CGM.getHLSLRuntime().getInterlockedAddIntrinsic();
1444 Value *Call = EmitRuntimeCall(
1445 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID,
1446 OverloadTys: {Val->getType(), Ptr->getType()}),
1447 args: ArrayRef<Value *>{Ptr, Val}, name: "hlsl.interlocked.add");
1448
1449 // The 3-arg overload writes the old value (the intrinsic's return value)
1450 // into the `original_value` reference parameter.
1451 if (E->getNumArgs() == 3) {
1452 LValue OrigLV = EmitLValue(E: E->getArg(Arg: 2));
1453 EmitStoreThroughLValue(Src: RValue::get(V: Call), Dst: OrigLV);
1454 }
1455 return Call;
1456 }
1457 case Builtin::BI__builtin_hlsl_wave_active_ballot: {
1458 [[maybe_unused]] Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
1459 assert(Op->getType()->isIntegerTy(1) &&
1460 "Intrinsic WaveActiveBallot operand must be a bool");
1461
1462 return handleHlslWaveActiveBallot(CGF&: *this, E);
1463 }
1464 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
1465 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1466 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
1467 return EmitIntrinsicCall(ID, Args: ArrayRef{OpExpr});
1468 }
1469 case Builtin::BI__builtin_hlsl_wave_active_sum: {
1470 // Due to the use of variadic arguments, explicitly retrieve argument
1471 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1472 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
1473 Arch: getTarget().getTriple().getArch(), QT: E->getArg(Arg: 0)->getType());
1474
1475 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1476 Name: "hlsl.wave.active.sum");
1477 }
1478 case Builtin::BI__builtin_hlsl_wave_active_product: {
1479 // Due to the use of variadic arguments, explicitly retrieve argument
1480 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1481 Intrinsic::ID IID = getWaveActiveProductIntrinsic(
1482 Arch: getTarget().getTriple().getArch(), QT: E->getArg(Arg: 0)->getType());
1483
1484 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1485 Name: "hlsl.wave.active.product");
1486 }
1487 case Builtin::BI__builtin_hlsl_wave_active_max: {
1488 // Due to the use of variadic arguments, explicitly retrieve argument
1489 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1490 QualType QT = E->getArg(Arg: 0)->getType();
1491 Intrinsic::ID IID;
1492 if (QT->isUnsignedIntegerType())
1493 IID = CGM.getHLSLRuntime().getWaveActiveUMaxIntrinsic();
1494 else
1495 IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
1496
1497 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1498 Name: "hlsl.wave.active.max");
1499 }
1500 case Builtin::BI__builtin_hlsl_wave_active_min: {
1501 // Due to the use of variadic arguments, explicitly retrieve argument
1502 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1503 QualType QT = E->getArg(Arg: 0)->getType();
1504 Intrinsic::ID IID;
1505 if (QT->isUnsignedIntegerType())
1506 IID = CGM.getHLSLRuntime().getWaveActiveUMinIntrinsic();
1507 else
1508 IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
1509
1510 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1511 Name: "hlsl.wave.active.min");
1512 }
1513 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
1514 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
1515 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
1516 // for the DirectX intrinsic and the demangled builtin name
1517 switch (CGM.getTarget().getTriple().getArch()) {
1518 case llvm::Triple::dxil:
1519 return EmitIntrinsicCall(ID: Intrinsic::dx_wave_getlaneindex);
1520 case llvm::Triple::spirv:
1521 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
1522 Ty: llvm::FunctionType::get(Result: IntTy, Params: {}, isVarArg: false),
1523 Name: "__hlsl_wave_get_lane_index", ExtraAttrs: {}, Local: false, AssumeConvergent: true));
1524 default:
1525 llvm_unreachable(
1526 "Intrinsic WaveGetLaneIndex not supported by target architecture");
1527 }
1528 }
1529 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
1530 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
1531 return EmitIntrinsicCall(ID);
1532 }
1533 case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
1534 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
1535 return EmitIntrinsicCall(ID);
1536 }
1537 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
1538 // Due to the use of variadic arguments we must explicitly retrieve them and
1539 // create our function type.
1540 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1541 Value *OpIndex = EmitScalarExpr(E: E->getArg(Arg: 1));
1542 return EmitIntrinsicCall(ID: CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
1543 Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr, OpIndex},
1544 Name: "hlsl.wave.readlane");
1545 }
1546 case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
1547 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1548 Intrinsic::ID IID = getWavePrefixSumIntrinsic(
1549 Arch: getTarget().getTriple().getArch(), QT: E->getArg(Arg: 0)->getType());
1550 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1551 Name: "hlsl.wave.prefix.sum");
1552 }
1553 case Builtin::BI__builtin_hlsl_wave_prefix_product: {
1554 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1555 Intrinsic::ID IID = getWavePrefixProductIntrinsic(
1556 Arch: getTarget().getTriple().getArch(), QT: E->getArg(Arg: 0)->getType());
1557 return EmitIntrinsicCall(ID: IID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1558 Name: "hlsl.wave.prefix.product");
1559 }
1560 case Builtin::BI__builtin_hlsl_quad_read_across_x: {
1561 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1562 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
1563 return EmitIntrinsicCall(ID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1564 Name: "hlsl.quad.read.across.x");
1565 }
1566 case Builtin::BI__builtin_hlsl_quad_read_across_y: {
1567 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1568 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
1569 return EmitIntrinsicCall(ID, Types: {OpExpr->getType()}, Args: ArrayRef{OpExpr},
1570 Name: "hlsl.quad.read.across.y");
1571 }
1572 case Builtin::BI__builtin_hlsl_quad_read_across_diagonal: {
1573 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1574 Intrinsic::ID ID =
1575 CGM.getHLSLRuntime().getQuadReadAcrossDiagonalIntrinsic();
1576 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
1577 M: &CGM.getModule(), id: ID, OverloadTys: {OpExpr->getType()}),
1578 args: ArrayRef{OpExpr}, name: "hlsl.quad.read.across.diagonal");
1579 }
1580 case Builtin::BI__builtin_hlsl_elementwise_sign: {
1581 auto *Arg0 = E->getArg(Arg: 0);
1582 Value *Op0 = EmitScalarExpr(E: Arg0);
1583 llvm::Type *Xty = Op0->getType();
1584 llvm::Type *retType = llvm::Type::getInt32Ty(C&: this->getLLVMContext());
1585 if (Xty->isVectorTy()) {
1586 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
1587 retType = llvm::VectorType::get(
1588 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
1589 }
1590 assert((Arg0->getType()->hasFloatingRepresentation() ||
1591 Arg0->getType()->hasIntegerRepresentation()) &&
1592 "sign operand must have a float or int representation");
1593
1594 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
1595 Value *Cmp = Builder.CreateICmpEQ(LHS: Op0, RHS: ConstantInt::get(Ty: Xty, V: 0));
1596 return Builder.CreateSelect(C: Cmp, True: ConstantInt::get(Ty: retType, V: 0),
1597 False: ConstantInt::get(Ty: retType, V: 1), Name: "hlsl.sign");
1598 }
1599
1600 return Builder.CreateIntrinsic(
1601 RetTy: retType, ID: CGM.getHLSLRuntime().getSignIntrinsic(),
1602 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.sign");
1603 }
1604 case Builtin::BI__builtin_hlsl_elementwise_radians: {
1605 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1606 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1607 "radians operand must have a float representation");
1608 return Builder.CreateIntrinsic(
1609 /*ReturnType=*/RetTy: Op0->getType(),
1610 ID: CGM.getHLSLRuntime().getRadiansIntrinsic(), Args: ArrayRef<Value *>{Op0},
1611 FMFSource: nullptr, Name: "hlsl.radians");
1612 }
1613 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
1614 Value *ResHandle = EmitScalarExpr(E: E->getArg(Arg: 0));
1615 Value *Offset = EmitScalarExpr(E: E->getArg(Arg: 1));
1616 Value *OffsetI8 = Builder.CreateIntCast(V: Offset, DestTy: Int8Ty, isSigned: true);
1617 return Builder.CreateIntrinsic(
1618 /*ReturnType=*/RetTy: Offset->getType(),
1619 ID: CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
1620 Args: ArrayRef<Value *>{ResHandle, OffsetI8}, FMFSource: nullptr);
1621 }
1622 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
1623
1624 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1625 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
1626 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
1627 "asuint operands types mismatch");
1628 return handleHlslSplitdouble(E, CGF: this);
1629 }
1630 case Builtin::BI__builtin_hlsl_elementwise_clip:
1631 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1632 "clip operands types mismatch");
1633 return handleHlslClip(E, CGF: this);
1634 case Builtin::BI__builtin_hlsl_all_memory_barrier: {
1635 Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
1636 return EmitIntrinsicCall(ID);
1637 }
1638 case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
1639 Intrinsic::ID ID =
1640 CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
1641 return EmitIntrinsicCall(ID);
1642 }
1643 case Builtin::BI__builtin_hlsl_device_memory_barrier: {
1644 Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
1645 return EmitIntrinsicCall(ID);
1646 }
1647 case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
1648 Intrinsic::ID ID =
1649 CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
1650 return EmitIntrinsicCall(ID);
1651 }
1652 case Builtin::BI__builtin_hlsl_group_memory_barrier: {
1653 Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
1654 return EmitIntrinsicCall(ID);
1655 }
1656 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
1657 Intrinsic::ID ID =
1658 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
1659 return EmitIntrinsicCall(ID);
1660 }
1661 case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
1662 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1663 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1664 llvm_unreachable("ddx_coarse operand must have a float representation");
1665 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
1666 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1667 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1668 Name: "hlsl.ddx.coarse");
1669 }
1670 case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
1671 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1672 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1673 llvm_unreachable("ddy_coarse operand must have a float representation");
1674 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
1675 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1676 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1677 Name: "hlsl.ddy.coarse");
1678 }
1679 case Builtin::BI__builtin_hlsl_elementwise_ddx_fine: {
1680 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1681 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1682 llvm_unreachable("ddx_fine operand must have a float representation");
1683 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxFineIntrinsic();
1684 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1685 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1686 Name: "hlsl.ddx.fine");
1687 }
1688 case Builtin::BI__builtin_hlsl_elementwise_ddy_fine: {
1689 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1690 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1691 llvm_unreachable("ddy_fine operand must have a float representation");
1692 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyFineIntrinsic();
1693 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1694 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1695 Name: "hlsl.ddy.fine");
1696 }
1697 case Builtin::BI__builtin_get_spirv_spec_constant_bool:
1698 case Builtin::BI__builtin_get_spirv_spec_constant_short:
1699 case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
1700 case Builtin::BI__builtin_get_spirv_spec_constant_int:
1701 case Builtin::BI__builtin_get_spirv_spec_constant_uint:
1702 case Builtin::BI__builtin_get_spirv_spec_constant_longlong:
1703 case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong:
1704 case Builtin::BI__builtin_get_spirv_spec_constant_half:
1705 case Builtin::BI__builtin_get_spirv_spec_constant_float:
1706 case Builtin::BI__builtin_get_spirv_spec_constant_double: {
1707 llvm::Function *SpecConstantFn = getSpecConstantFunction(SpecConstantType: E->getType());
1708 llvm::Value *SpecId = EmitScalarExpr(E: E->getArg(Arg: 0));
1709 llvm::Value *DefaultVal = EmitScalarExpr(E: E->getArg(Arg: 1));
1710 llvm::Value *Args[] = {SpecId, DefaultVal};
1711 return Builder.CreateCall(Callee: SpecConstantFn, Args);
1712 }
1713 }
1714 return nullptr;
1715}
1716
1717llvm::Function *clang::CodeGen::CodeGenFunction::getSpecConstantFunction(
1718 const clang::QualType &SpecConstantType) {
1719
1720 // Find or create the declaration for the function.
1721 llvm::Module *M = &CGM.getModule();
1722 std::string MangledName =
1723 getSpecConstantFunctionName(SpecConstantType, Context&: getContext());
1724 llvm::Function *SpecConstantFn = M->getFunction(Name: MangledName);
1725
1726 if (!SpecConstantFn) {
1727 llvm::Type *IntType = ConvertType(T: getContext().IntTy);
1728 llvm::Type *RetTy = ConvertType(T: SpecConstantType);
1729 llvm::Type *ArgTypes[] = {IntType, RetTy};
1730 llvm::FunctionType *FnTy = llvm::FunctionType::get(Result: RetTy, Params: ArgTypes, isVarArg: false);
1731 SpecConstantFn = llvm::Function::Create(
1732 Ty: FnTy, Linkage: llvm::GlobalValue::ExternalLinkage, N: MangledName, M);
1733 }
1734 return SpecConstantFn;
1735}
1736