1//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit HLSL Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CGHLSLRuntime.h"
15#include "CodeGenFunction.h"
16
17using namespace clang;
18using namespace CodeGen;
19using namespace llvm;
20
21static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
22 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
23 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
24 "asdouble operands types mismatch");
25 Value *OpLowBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
26 Value *OpHighBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
27
28 llvm::Type *ResultType = CGF.DoubleTy;
29 int N = 1;
30 if (auto *VTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
31 N = VTy->getNumElements();
32 ResultType = llvm::FixedVectorType::get(ElementType: CGF.DoubleTy, NumElts: N);
33 }
34
35 if (CGF.CGM.getTarget().getTriple().isDXIL())
36 return CGF.Builder.CreateIntrinsic(
37 /*ReturnType=*/RetTy: ResultType, ID: Intrinsic::dx_asdouble,
38 Args: {OpLowBits, OpHighBits}, FMFSource: nullptr, Name: "hlsl.asdouble");
39
40 if (!E->getArg(Arg: 0)->getType()->isVectorType()) {
41 OpLowBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpLowBits);
42 OpHighBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpHighBits);
43 }
44
45 llvm::SmallVector<int> Mask;
46 for (int i = 0; i < N; i++) {
47 Mask.push_back(Elt: i);
48 Mask.push_back(Elt: i + N);
49 }
50
51 Value *BitVec = CGF.Builder.CreateShuffleVector(V1: OpLowBits, V2: OpHighBits, Mask);
52
53 return CGF.Builder.CreateBitCast(V: BitVec, DestTy: ResultType);
54}
55
56static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
57 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
58
59 Constant *FZeroConst = ConstantFP::getZero(Ty: CGF->FloatTy);
60 Value *CMP;
61 Value *LastInstr;
62
63 if (const auto *VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
64 FZeroConst = ConstantVector::getSplat(
65 EC: ElementCount::getFixed(MinVal: VecTy->getNumElements()), Elt: FZeroConst);
66 auto *FCompInst = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
67 CMP = CGF->Builder.CreateIntrinsic(
68 RetTy: CGF->Builder.getInt1Ty(), ID: CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
69 Args: {FCompInst});
70 } else {
71 CMP = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
72 }
73
74 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
75 LastInstr = CGF->Builder.CreateIntrinsic(ID: Intrinsic::dx_discard, Args: {CMP});
76 } else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
77 BasicBlock *LT0 = CGF->createBasicBlock(name: "lt0", parent: CGF->CurFn);
78 BasicBlock *End = CGF->createBasicBlock(name: "end", parent: CGF->CurFn);
79
80 CGF->Builder.CreateCondBr(Cond: CMP, True: LT0, False: End);
81
82 CGF->Builder.SetInsertPoint(LT0);
83
84 CGF->Builder.CreateIntrinsic(ID: Intrinsic::spv_discard, Args: {});
85
86 LastInstr = CGF->Builder.CreateBr(Dest: End);
87 CGF->Builder.SetInsertPoint(End);
88 } else {
89 llvm_unreachable("Backend Codegen not supported.");
90 }
91
92 return LastInstr;
93}
94
95static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
96 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
97 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 1));
98 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 2));
99
100 CallArgList Args;
101 LValue Op1TmpLValue =
102 CGF->EmitHLSLOutArgExpr(E: OutArg1, Args, Ty: OutArg1->getType());
103 LValue Op2TmpLValue =
104 CGF->EmitHLSLOutArgExpr(E: OutArg2, Args, Ty: OutArg2->getType());
105
106 if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
107 Args.reverseWritebacks();
108
109 Value *LowBits = nullptr;
110 Value *HighBits = nullptr;
111
112 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
113 llvm::Type *RetElementTy = CGF->Int32Ty;
114 if (auto *Op0VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
115 RetElementTy = llvm::VectorType::get(
116 ElementType: CGF->Int32Ty, EC: ElementCount::getFixed(MinVal: Op0VecTy->getNumElements()));
117 auto *RetTy = llvm::StructType::get(elt1: RetElementTy, elts: RetElementTy);
118
119 CallInst *CI = CGF->Builder.CreateIntrinsic(
120 RetTy, ID: Intrinsic::dx_splitdouble, Args: {Op0}, FMFSource: nullptr, Name: "hlsl.splitdouble");
121
122 LowBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 0);
123 HighBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 1);
124 } else {
125 // For Non DXIL targets we generate the instructions.
126
127 if (!Op0->getType()->isVectorTy()) {
128 FixedVectorType *DestTy = FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: 2);
129 Value *Bitcast = CGF->Builder.CreateBitCast(V: Op0, DestTy);
130
131 LowBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: (uint64_t)0);
132 HighBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: 1);
133 } else {
134 int NumElements = 1;
135 if (const auto *VecTy =
136 E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
137 NumElements = VecTy->getNumElements();
138
139 FixedVectorType *Uint32VecTy =
140 FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: NumElements * 2);
141 Value *Uint32Vec = CGF->Builder.CreateBitCast(V: Op0, DestTy: Uint32VecTy);
142 if (NumElements == 1) {
143 LowBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: (uint64_t)0);
144 HighBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: 1);
145 } else {
146 SmallVector<int> EvenMask, OddMask;
147 for (int I = 0, E = NumElements; I != E; ++I) {
148 EvenMask.push_back(Elt: I * 2);
149 OddMask.push_back(Elt: I * 2 + 1);
150 }
151 LowBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: EvenMask);
152 HighBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: OddMask);
153 }
154 }
155 }
156 CGF->Builder.CreateStore(Val: LowBits, Addr: Op1TmpLValue.getAddress());
157 auto *LastInst =
158 CGF->Builder.CreateStore(Val: HighBits, Addr: Op2TmpLValue.getAddress());
159 CGF->EmitWritebacks(Args);
160 return LastInst;
161}
162
163static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
164 const CallExpr *E) {
165 Value *Cond = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
166 llvm::Type *I32 = CGF.Int32Ty;
167
168 llvm::Type *Vec4I32 = llvm::FixedVectorType::get(ElementType: I32, NumElts: 4);
169 [[maybe_unused]] llvm::StructType *Struct4I32 =
170 llvm::StructType::get(Context&: CGF.getLLVMContext(), Elements: {I32, I32, I32, I32});
171
172 if (CGF.CGM.getTarget().getTriple().isDXIL()) {
173 // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
174 llvm::Function *Fn = CGF.CGM.getIntrinsic(IID: Intrinsic::dx_wave_ballot, Tys: {I32});
175
176 Value *StructVal = CGF.EmitRuntimeCall(callee: Fn, args: Cond);
177 assert(StructVal->getType() == Struct4I32 &&
178 "dx.wave.ballot must return {i32,i32,i32,i32}");
179
180 // Reassemble struct to <4 x i32>
181 llvm::Value *VecVal = llvm::PoisonValue::get(T: Vec4I32);
182 for (unsigned I = 0; I < 4; ++I) {
183 Value *Elt = CGF.Builder.CreateExtractValue(Agg: StructVal, Idxs: I);
184 VecVal =
185 CGF.Builder.CreateInsertElement(Vec: VecVal, NewElt: Elt, Idx: CGF.Builder.getInt32(C: I));
186 }
187
188 return VecVal;
189 }
190
191 if (CGF.CGM.getTarget().getTriple().isSPIRV())
192 return CGF.EmitRuntimeCall(
193 callee: CGF.CGM.getIntrinsic(IID: Intrinsic::spv_subgroup_ballot), args: Cond);
194
195 llvm_unreachable(
196 "WaveActiveBallot is only supported for DXIL and SPIRV targets");
197}
198
199static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
200 const CallExpr *E) {
201 Value *Op0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
202 QualType Op0Ty = E->getArg(Arg: 0)->getType();
203 llvm::Type *ResType = CGF.FloatTy;
204 uint64_t NumElements = 0;
205 if (Op0->getType()->isVectorTy()) {
206 NumElements =
207 E->getArg(Arg: 0)->getType()->castAs<clang::VectorType>()->getNumElements();
208 ResType =
209 llvm::VectorType::get(ElementType: ResType, EC: ElementCount::getFixed(MinVal: NumElements));
210 }
211 if (!Op0Ty->hasUnsignedIntegerRepresentation())
212 llvm_unreachable(
213 "f16tof32 operand must have an unsigned int representation");
214
215 if (CGF.CGM.getTriple().isDXIL())
216 return CGF.Builder.CreateIntrinsic(RetTy: ResType, ID: Intrinsic::dx_legacyf16tof32,
217 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
218 Name: "hlsl.f16tof32");
219
220 if (CGF.CGM.getTriple().isSPIRV()) {
221 // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
222 // Int16 and Float16 capabilities
223 auto *UnpackType =
224 llvm::VectorType::get(ElementType: CGF.FloatTy, EC: ElementCount::getFixed(MinVal: 2));
225
226 if (NumElements == 0) {
227 // a scalar input - simply extract the first element of the unpacked
228 // vector
229 Value *Unpack = CGF.Builder.CreateIntrinsic(
230 RetTy: UnpackType, ID: Intrinsic::spv_unpackhalf2x16, Args: ArrayRef<Value *>{Op0});
231 return CGF.Builder.CreateExtractElement(Vec: Unpack, Idx: (uint64_t)0);
232 }
233
234 // a vector input - build a congruent output vector by iterating through
235 // the input vector calling unpackhalf2x16 for each element
236 Value *Result = PoisonValue::get(T: ResType);
237 for (uint64_t I = 0; I < NumElements; I++) {
238 Value *InVal = CGF.Builder.CreateExtractElement(Vec: Op0, Idx: I);
239 Value *Unpack = CGF.Builder.CreateIntrinsic(
240 RetTy: UnpackType, ID: Intrinsic::spv_unpackhalf2x16, Args: ArrayRef<Value *>{InVal});
241 Value *Res = CGF.Builder.CreateExtractElement(Vec: Unpack, Idx: (uint64_t)0);
242 Result = CGF.Builder.CreateInsertElement(Vec: Result, NewElt: Res, Idx: I);
243 }
244 return Result;
245 }
246
247 llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
248}
249
250static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
251 const CallExpr *E) {
252 Value *Op0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
253 QualType Op0Ty = E->getArg(Arg: 0)->getType();
254 llvm::Type *ResType = CGF.IntTy;
255 uint64_t NumElements = 0;
256 if (Op0->getType()->isVectorTy()) {
257 NumElements =
258 E->getArg(Arg: 0)->getType()->castAs<clang::VectorType>()->getNumElements();
259 ResType =
260 llvm::VectorType::get(ElementType: ResType, EC: ElementCount::getFixed(MinVal: NumElements));
261 }
262 if (!Op0Ty->hasFloatingRepresentation())
263 llvm_unreachable("f32tof16 operand must have a float representation");
264
265 if (CGF.CGM.getTriple().isDXIL())
266 return CGF.Builder.CreateIntrinsic(RetTy: ResType, ID: Intrinsic::dx_legacyf32tof16,
267 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
268 Name: "hlsl.f32tof16");
269
270 if (CGF.CGM.getTriple().isSPIRV()) {
271 // We use the SPIRV PackHalf2x16 operation to avoid the need for the
272 // Int16 and Float16 capabilities
273 auto *PackType =
274 llvm::VectorType::get(ElementType: CGF.FloatTy, EC: ElementCount::getFixed(MinVal: 2));
275
276 if (NumElements == 0) {
277 // a scalar input - simply insert the scalar in the first element
278 // of the 2 element float vector
279 Value *Float2 = Constant::getNullValue(Ty: PackType);
280 Float2 = CGF.Builder.CreateInsertElement(Vec: Float2, NewElt: Op0, Idx: (uint64_t)0);
281 Value *Result = CGF.Builder.CreateIntrinsic(
282 RetTy: ResType, ID: Intrinsic::spv_packhalf2x16, Args: ArrayRef<Value *>{Float2});
283 return Result;
284 }
285
286 // a vector input - build a congruent output vector by iterating through
287 // the input vector calling packhalf2x16 for each element
288 Value *Result = PoisonValue::get(T: ResType);
289 for (uint64_t I = 0; I < NumElements; I++) {
290 Value *Float2 = Constant::getNullValue(Ty: PackType);
291 Value *InVal = CGF.Builder.CreateExtractElement(Vec: Op0, Idx: I);
292 Float2 = CGF.Builder.CreateInsertElement(Vec: Float2, NewElt: InVal, Idx: (uint64_t)0);
293 Value *Res = CGF.Builder.CreateIntrinsic(
294 RetTy: CGF.IntTy, ID: Intrinsic::spv_packhalf2x16, Args: ArrayRef<Value *>{Float2});
295 Result = CGF.Builder.CreateInsertElement(Vec: Result, NewElt: Res, Idx: I);
296 }
297 return Result;
298 }
299
300 llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
301}
302
303static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
304 LValue &Stride) {
305 // Figure out the stride of the buffer elements from the handle type.
306 auto *HandleTy =
307 cast<HLSLAttributedResourceType>(Val: HandleExpr->getType().getTypePtr());
308 QualType ElementTy = HandleTy->getContainedType();
309 Value *StrideValue = CGF->getTypeSize(Ty: ElementTy);
310 return CGF->Builder.CreateStore(Val: StrideValue, Addr: Stride.getAddress());
311}
312
313// Return dot product intrinsic that corresponds to the QT scalar type
314static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
315 if (QT->isFloatingType())
316 return RT.getFDotIntrinsic();
317 if (QT->isSignedIntegerType())
318 return RT.getSDotIntrinsic();
319 assert(QT->isUnsignedIntegerType());
320 return RT.getUDotIntrinsic();
321}
322
323static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
324 if (QT->hasSignedIntegerRepresentation()) {
325 return RT.getFirstBitSHighIntrinsic();
326 }
327
328 assert(QT->hasUnsignedIntegerRepresentation());
329 return RT.getFirstBitUHighIntrinsic();
330}
331
332// Return wave active sum that corresponds to the QT scalar type
333static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
334 CGHLSLRuntime &RT, QualType QT) {
335 switch (Arch) {
336 case llvm::Triple::spirv:
337 return Intrinsic::spv_wave_reduce_sum;
338 case llvm::Triple::dxil: {
339 if (QT->isUnsignedIntegerType())
340 return Intrinsic::dx_wave_reduce_usum;
341 return Intrinsic::dx_wave_reduce_sum;
342 }
343 default:
344 llvm_unreachable("Intrinsic WaveActiveSum"
345 " not supported by target architecture");
346 }
347}
348
349static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch) {
350 switch (Arch) {
351 case llvm::Triple::spirv:
352 return Intrinsic::spv_subgroup_prefix_bit_count;
353 case llvm::Triple::dxil: {
354 return Intrinsic::dx_wave_prefix_bit_count;
355 }
356 default:
357 llvm_unreachable(
358 "WavePrefixOp instruction not supported by target architecture");
359 }
360}
361
362// Return wave prefix sum that corresponds to the QT scalar type
363static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch,
364 CGHLSLRuntime &RT, QualType QT) {
365 switch (Arch) {
366 case llvm::Triple::spirv:
367 return Intrinsic::spv_wave_prefix_sum;
368 case llvm::Triple::dxil: {
369 if (QT->isUnsignedIntegerType())
370 return Intrinsic::dx_wave_prefix_usum;
371 return Intrinsic::dx_wave_prefix_sum;
372 }
373 default:
374 llvm_unreachable("Intrinsic WavePrefixSum"
375 " not supported by target architecture");
376 }
377}
378
379// Returns the mangled name for a builtin function that the SPIR-V backend
380// will expand into a spec Constant.
381static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
382 ASTContext &Context) {
383 // The parameter types for our conceptual intrinsic function.
384 QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType};
385
386 // Create a temporary FunctionDecl for the builtin fuction. It won't be
387 // added to the AST.
388 FunctionProtoType::ExtProtoInfo EPI;
389 QualType FnType =
390 Context.getFunctionType(ResultTy: SpecConstantType, Args: ClangParamTypes, EPI);
391 DeclarationName FuncName = &Context.Idents.get(Name: "__spirv_SpecConstant");
392 FunctionDecl *FnDeclForMangling = FunctionDecl::Create(
393 C&: Context, DC: Context.getTranslationUnitDecl(), StartLoc: SourceLocation(),
394 NLoc: SourceLocation(), N: FuncName, T: FnType, /*TSI=*/TInfo: nullptr, SC: SC_Extern);
395
396 // Attach the created parameter declarations to the function declaration.
397 SmallVector<ParmVarDecl *, 2> ParamDecls;
398 for (QualType ParamType : ClangParamTypes) {
399 ParmVarDecl *PD = ParmVarDecl::Create(
400 C&: Context, DC: FnDeclForMangling, StartLoc: SourceLocation(), IdLoc: SourceLocation(),
401 /*IdentifierInfo*/ Id: nullptr, T: ParamType, /*TSI*/ TInfo: nullptr, S: SC_None,
402 /*DefaultArg*/ DefArg: nullptr);
403 ParamDecls.push_back(Elt: PD);
404 }
405 FnDeclForMangling->setParams(ParamDecls);
406
407 // Get the mangled name.
408 std::string Name;
409 llvm::raw_string_ostream MangledNameStream(Name);
410 std::unique_ptr<MangleContext> Mangler(Context.createMangleContext());
411 Mangler->mangleName(GD: FnDeclForMangling, MangledNameStream);
412 MangledNameStream.flush();
413
414 return Name;
415}
416
417Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
418 const CallExpr *E,
419 ReturnValueSlot ReturnValue) {
420 if (!getLangOpts().HLSL)
421 return nullptr;
422
423 switch (BuiltinID) {
424 case Builtin::BI__builtin_hlsl_adduint64: {
425 Value *OpA = EmitScalarExpr(E: E->getArg(Arg: 0));
426 Value *OpB = EmitScalarExpr(E: E->getArg(Arg: 1));
427 QualType Arg0Ty = E->getArg(Arg: 0)->getType();
428 uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
429 assert(Arg0Ty == E->getArg(1)->getType() &&
430 "AddUint64 operand types must match");
431 assert(Arg0Ty->hasIntegerRepresentation() &&
432 "AddUint64 operands must have an integer representation");
433 assert((NumElements == 2 || NumElements == 4) &&
434 "AddUint64 operands must have 2 or 4 elements");
435
436 llvm::Value *LowA;
437 llvm::Value *HighA;
438 llvm::Value *LowB;
439 llvm::Value *HighB;
440
441 // Obtain low and high words of inputs A and B
442 if (NumElements == 2) {
443 LowA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)0, Name: "LowA");
444 HighA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)1, Name: "HighA");
445 LowB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)0, Name: "LowB");
446 HighB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)1, Name: "HighB");
447 } else {
448 LowA = Builder.CreateShuffleVector(V: OpA, Mask: {0, 2}, Name: "LowA");
449 HighA = Builder.CreateShuffleVector(V: OpA, Mask: {1, 3}, Name: "HighA");
450 LowB = Builder.CreateShuffleVector(V: OpB, Mask: {0, 2}, Name: "LowB");
451 HighB = Builder.CreateShuffleVector(V: OpB, Mask: {1, 3}, Name: "HighB");
452 }
453
454 // Use an uadd_with_overflow to compute the sum of low words and obtain a
455 // carry value
456 llvm::Value *Carry;
457 llvm::Value *LowSum = EmitOverflowIntrinsic(
458 CGF&: *this, IntrinsicID: Intrinsic::uadd_with_overflow, X: LowA, Y: LowB, Carry);
459 llvm::Value *ZExtCarry =
460 Builder.CreateZExt(V: Carry, DestTy: HighA->getType(), Name: "CarryZExt");
461
462 // Sum the high words and the carry
463 llvm::Value *HighSum = Builder.CreateAdd(LHS: HighA, RHS: HighB, Name: "HighSum");
464 llvm::Value *HighSumPlusCarry =
465 Builder.CreateAdd(LHS: HighSum, RHS: ZExtCarry, Name: "HighSumPlusCarry");
466
467 if (NumElements == 4) {
468 return Builder.CreateShuffleVector(V1: LowSum, V2: HighSumPlusCarry, Mask: {0, 2, 1, 3},
469 Name: "hlsl.AddUint64");
470 }
471
472 llvm::Value *Result = PoisonValue::get(T: OpA->getType());
473 Result = Builder.CreateInsertElement(Vec: Result, NewElt: LowSum, Idx: (uint64_t)0,
474 Name: "hlsl.AddUint64.upto0");
475 Result = Builder.CreateInsertElement(Vec: Result, NewElt: HighSumPlusCarry, Idx: (uint64_t)1,
476 Name: "hlsl.AddUint64");
477 return Result;
478 }
479 case Builtin::BI__builtin_hlsl_resource_getpointer: {
480 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
481 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 1));
482
483 llvm::Type *RetTy = ConvertType(T: E->getType());
484 return Builder.CreateIntrinsic(
485 RetTy, ID: CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
486 Args: ArrayRef<Value *>{HandleOp, IndexOp});
487 }
488 case Builtin::BI__builtin_hlsl_resource_sample: {
489 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
490 Value *SamplerOp = EmitScalarExpr(E: E->getArg(Arg: 1));
491 Value *CoordOp = EmitScalarExpr(E: E->getArg(Arg: 2));
492
493 SmallVector<Value *, 4> Args;
494 Args.push_back(Elt: HandleOp);
495 Args.push_back(Elt: SamplerOp);
496 Args.push_back(Elt: CoordOp);
497 if (E->getNumArgs() > 3) {
498 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 3)));
499 } else {
500 // Default offset is 0.
501 // We need to know the type of the offset. It should be a vector of i32
502 // with the same number of elements as the coordinate, or scalar i32.
503 llvm::Type *CoordTy = CoordOp->getType();
504 llvm::Type *Int32Ty = Builder.getInt32Ty();
505 llvm::Type *OffsetTy = Int32Ty;
506 if (auto *VT = dyn_cast<llvm::FixedVectorType>(Val: CoordTy))
507 OffsetTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: VT->getNumElements());
508 Args.push_back(Elt: llvm::Constant::getNullValue(Ty: OffsetTy));
509 }
510
511 llvm::Type *RetTy = ConvertType(T: E->getType());
512 if (E->getNumArgs() <= 4) {
513 return Builder.CreateIntrinsic(
514 RetTy, ID: CGM.getHLSLRuntime().getSampleIntrinsic(), Args);
515 }
516
517 llvm::Value *Clamp = EmitScalarExpr(E: E->getArg(Arg: 4));
518 // The builtin is defined with variadic arguments, so the clamp parameter
519 // might have been promoted to double. The intrinsic requires a 32-bit
520 // float.
521 if (Clamp->getType() != Builder.getFloatTy())
522 Clamp = Builder.CreateFPCast(V: Clamp, DestTy: Builder.getFloatTy());
523 Args.push_back(Elt: Clamp);
524 return Builder.CreateIntrinsic(
525 RetTy, ID: CGM.getHLSLRuntime().getSampleClampIntrinsic(), Args);
526 }
527 case Builtin::BI__builtin_hlsl_resource_load_with_status: {
528 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
529 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 1));
530
531 // Get the *address* of the status argument to write to it by reference
532 LValue StatusLVal = EmitLValue(E: E->getArg(Arg: 2));
533 Address StatusAddr = StatusLVal.getAddress();
534
535 QualType HandleTy = E->getArg(Arg: 0)->getType();
536 const HLSLAttributedResourceType *RT =
537 HandleTy->getAs<HLSLAttributedResourceType>();
538 assert(CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil &&
539 "Only DXIL currently implements load with status");
540
541 Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
542 ? llvm::Intrinsic::dx_resource_load_rawbuffer
543 : llvm::Intrinsic::dx_resource_load_typedbuffer;
544
545 llvm::Type *DataTy = ConvertType(T: E->getType());
546 llvm::Type *RetTy = llvm::StructType::get(Context&: Builder.getContext(),
547 Elements: {DataTy, Builder.getInt1Ty()});
548
549 SmallVector<Value *, 3> Args;
550 Args.push_back(Elt: HandleOp);
551 Args.push_back(Elt: IndexOp);
552
553 if (RT->getAttrs().RawBuffer) {
554 Value *Offset = Builder.getInt32(C: 0);
555 Args.push_back(Elt: Offset);
556 }
557
558 // The load intrinsics give us a (T value, i1 status) pair -
559 // shepherd these into the return value and out reference respectively.
560 Value *ResRet =
561 Builder.CreateIntrinsic(RetTy, ID: IntrID, Args, FMFSource: {}, Name: "ld.struct");
562 Value *LoadedValue = Builder.CreateExtractValue(Agg: ResRet, Idxs: {0}, Name: "ld.value");
563 Value *StatusBit = Builder.CreateExtractValue(Agg: ResRet, Idxs: {1}, Name: "ld.status");
564 Value *ExtendedStatus =
565 Builder.CreateZExt(V: StatusBit, DestTy: Builder.getInt32Ty(), Name: "ld.status.ext");
566 Builder.CreateStore(Val: ExtendedStatus, Addr: StatusAddr);
567
568 return LoadedValue;
569 }
570 case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
571 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
572 return llvm::PoisonValue::get(T: HandleTy);
573 }
574 case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
575 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
576 Value *RegisterOp = EmitScalarExpr(E: E->getArg(Arg: 1));
577 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
578 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 3));
579 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 4));
580 Value *Name = EmitScalarExpr(E: E->getArg(Arg: 5));
581 llvm::Intrinsic::ID IntrinsicID =
582 CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
583 SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp, Name};
584 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
585 }
586 case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
587 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
588 Value *OrderID = EmitScalarExpr(E: E->getArg(Arg: 1));
589 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
590 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 3));
591 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 4));
592 Value *Name = EmitScalarExpr(E: E->getArg(Arg: 5));
593 llvm::Intrinsic::ID IntrinsicID =
594 CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
595 SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
596 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
597 }
598 case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
599 Value *MainHandle = EmitScalarExpr(E: E->getArg(Arg: 0));
600 if (!CGM.getTriple().isSPIRV())
601 return MainHandle;
602
603 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
604 Value *OrderID = EmitScalarExpr(E: E->getArg(Arg: 1));
605 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
606 llvm::Intrinsic::ID IntrinsicID =
607 llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
608 SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
609 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
610 }
611 case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
612 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 0));
613 llvm::Type *RetTy = ConvertType(T: E->getType());
614 return Builder.CreateIntrinsic(
615 RetTy, ID: CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
616 Args: ArrayRef<Value *>{IndexOp});
617 }
618 case Builtin::BI__builtin_hlsl_resource_getdimensions_x: {
619 Value *Handle = EmitScalarExpr(E: E->getArg(Arg: 0));
620 LValue Dim = EmitLValue(E: E->getArg(Arg: 1));
621 llvm::Type *RetTy = llvm::Type::getInt32Ty(C&: getLLVMContext());
622 Value *DimValue = Builder.CreateIntrinsic(
623 RetTy, ID: CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
624 Args: ArrayRef<Value *>{Handle});
625 return Builder.CreateStore(Val: DimValue, Addr: Dim.getAddress());
626 }
627 case Builtin::BI__builtin_hlsl_resource_getstride: {
628 LValue Stride = EmitLValue(E: E->getArg(Arg: 1));
629 return emitBufferStride(CGF: this, HandleExpr: E->getArg(Arg: 0), Stride);
630 }
631 case Builtin::BI__builtin_hlsl_all: {
632 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
633 return Builder.CreateIntrinsic(
634 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
635 ID: CGM.getHLSLRuntime().getAllIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
636 Name: "hlsl.all");
637 }
638 case Builtin::BI__builtin_hlsl_and: {
639 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
640 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
641 return Builder.CreateAnd(LHS: Op0, RHS: Op1, Name: "hlsl.and");
642 }
643 case Builtin::BI__builtin_hlsl_or: {
644 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
645 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
646 return Builder.CreateOr(LHS: Op0, RHS: Op1, Name: "hlsl.or");
647 }
648 case Builtin::BI__builtin_hlsl_any: {
649 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
650 return Builder.CreateIntrinsic(
651 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
652 ID: CGM.getHLSLRuntime().getAnyIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
653 Name: "hlsl.any");
654 }
655 case Builtin::BI__builtin_hlsl_asdouble:
656 return handleAsDoubleBuiltin(CGF&: *this, E);
657 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
658 Value *OpX = EmitScalarExpr(E: E->getArg(Arg: 0));
659 Value *OpMin = EmitScalarExpr(E: E->getArg(Arg: 1));
660 Value *OpMax = EmitScalarExpr(E: E->getArg(Arg: 2));
661
662 QualType Ty = E->getArg(Arg: 0)->getType();
663 if (auto *VecTy = Ty->getAs<VectorType>())
664 Ty = VecTy->getElementType();
665
666 Intrinsic::ID Intr;
667 if (Ty->isFloatingType()) {
668 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
669 } else if (Ty->isUnsignedIntegerType()) {
670 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
671 } else {
672 assert(Ty->isSignedIntegerType());
673 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
674 }
675 return Builder.CreateIntrinsic(
676 /*ReturnType=*/RetTy: OpX->getType(), ID: Intr,
677 Args: ArrayRef<Value *>{OpX, OpMin, OpMax}, FMFSource: nullptr, Name: "hlsl.clamp");
678 }
679 case Builtin::BI__builtin_hlsl_crossf16:
680 case Builtin::BI__builtin_hlsl_crossf32: {
681 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
682 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
683 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
684 E->getArg(1)->getType()->hasFloatingRepresentation() &&
685 "cross operands must have a float representation");
686 // make sure each vector has exactly 3 elements
687 assert(
688 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
689 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
690 "input vectors must have 3 elements each");
691 return Builder.CreateIntrinsic(
692 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getCrossIntrinsic(),
693 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.cross");
694 }
695 case Builtin::BI__builtin_hlsl_dot: {
696 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
697 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
698 llvm::Type *T0 = Op0->getType();
699 llvm::Type *T1 = Op1->getType();
700
701 // If the arguments are scalars, just emit a multiply
702 if (!T0->isVectorTy() && !T1->isVectorTy()) {
703 if (T0->isFloatingPointTy())
704 return Builder.CreateFMul(L: Op0, R: Op1, Name: "hlsl.dot");
705
706 if (T0->isIntegerTy())
707 return Builder.CreateMul(LHS: Op0, RHS: Op1, Name: "hlsl.dot");
708
709 llvm_unreachable(
710 "Scalar dot product is only supported on ints and floats.");
711 }
712 // For vectors, validate types and emit the appropriate intrinsic
713 assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
714 E->getArg(1)->getType()) &&
715 "Dot product operands must have the same type.");
716
717 auto *VecTy0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
718 assert(VecTy0 && "Dot product argument must be a vector.");
719
720 return Builder.CreateIntrinsic(
721 /*ReturnType=*/RetTy: T0->getScalarType(),
722 ID: getDotProductIntrinsic(RT&: CGM.getHLSLRuntime(), QT: VecTy0->getElementType()),
723 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.dot");
724 }
725 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
726 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
727 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
728 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
729
730 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
731 // Note that the argument order disagrees between the builtin and the
732 // intrinsic here.
733 return Builder.CreateIntrinsic(
734 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
735 FMFSource: nullptr, Name: "hlsl.dot4add.i8packed");
736 }
737 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
738 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
739 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
740 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
741
742 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
743 // Note that the argument order disagrees between the builtin and the
744 // intrinsic here.
745 return Builder.CreateIntrinsic(
746 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
747 FMFSource: nullptr, Name: "hlsl.dot4add.u8packed");
748 }
749 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
750 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
751
752 return Builder.CreateIntrinsic(
753 /*ReturnType=*/RetTy: ConvertType(T: E->getType()),
754 ID: getFirstBitHighIntrinsic(RT&: CGM.getHLSLRuntime(), QT: E->getArg(Arg: 0)->getType()),
755 Args: ArrayRef<Value *>{X}, FMFSource: nullptr, Name: "hlsl.firstbithigh");
756 }
757 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
758 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
759
760 return Builder.CreateIntrinsic(
761 /*ReturnType=*/RetTy: ConvertType(T: E->getType()),
762 ID: CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), Args: ArrayRef<Value *>{X},
763 FMFSource: nullptr, Name: "hlsl.firstbitlow");
764 }
765 case Builtin::BI__builtin_hlsl_lerp: {
766 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
767 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
768 Value *S = EmitScalarExpr(E: E->getArg(Arg: 2));
769 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
770 llvm_unreachable("lerp operand must have a float representation");
771 return Builder.CreateIntrinsic(
772 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getLerpIntrinsic(),
773 Args: ArrayRef<Value *>{X, Y, S}, FMFSource: nullptr, Name: "hlsl.lerp");
774 }
775 case Builtin::BI__builtin_hlsl_normalize: {
776 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
777
778 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
779 "normalize operand must have a float representation");
780
781 return Builder.CreateIntrinsic(
782 /*ReturnType=*/RetTy: X->getType(),
783 ID: CGM.getHLSLRuntime().getNormalizeIntrinsic(), Args: ArrayRef<Value *>{X},
784 FMFSource: nullptr, Name: "hlsl.normalize");
785 }
786 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
787 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
788
789 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
790 "degree operand must have a float representation");
791
792 return Builder.CreateIntrinsic(
793 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getDegreesIntrinsic(),
794 Args: ArrayRef<Value *>{X}, FMFSource: nullptr, Name: "hlsl.degrees");
795 }
796 case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
797 return handleElementwiseF16ToF32(CGF&: *this, E);
798 }
799 case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
800 return handleElementwiseF32ToF16(CGF&: *this, E);
801 }
802 case Builtin::BI__builtin_hlsl_elementwise_frac: {
803 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
804 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
805 llvm_unreachable("frac operand must have a float representation");
806 return Builder.CreateIntrinsic(
807 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getFracIntrinsic(),
808 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.frac");
809 }
810 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
811 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
812 llvm::Type *Xty = Op0->getType();
813 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
814 if (Xty->isVectorTy()) {
815 auto *XVecTy = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
816 retType = llvm::VectorType::get(
817 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
818 }
819 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
820 llvm_unreachable("isinf operand must have a float representation");
821 return Builder.CreateIntrinsic(
822 RetTy: retType, ID: CGM.getHLSLRuntime().getIsInfIntrinsic(),
823 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.isinf");
824 }
825 case Builtin::BI__builtin_hlsl_elementwise_isnan: {
826 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
827 llvm::Type *Xty = Op0->getType();
828 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
829 if (Xty->isVectorTy()) {
830 auto *XVecTy = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
831 retType = llvm::VectorType::get(
832 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
833 }
834 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
835 llvm_unreachable("isnan operand must have a float representation");
836 return Builder.CreateIntrinsic(
837 RetTy: retType, ID: CGM.getHLSLRuntime().getIsNaNIntrinsic(),
838 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.isnan");
839 }
840 case Builtin::BI__builtin_hlsl_mad: {
841 Value *M = EmitScalarExpr(E: E->getArg(Arg: 0));
842 Value *A = EmitScalarExpr(E: E->getArg(Arg: 1));
843 Value *B = EmitScalarExpr(E: E->getArg(Arg: 2));
844 if (E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
845 return Builder.CreateIntrinsic(
846 /*ReturnType*/ RetTy: M->getType(), ID: Intrinsic::fmuladd,
847 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "hlsl.fmad");
848
849 if (E->getArg(Arg: 0)->getType()->hasSignedIntegerRepresentation()) {
850 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
851 return Builder.CreateIntrinsic(
852 /*ReturnType*/ RetTy: M->getType(), ID: Intrinsic::dx_imad,
853 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "dx.imad");
854
855 Value *Mul = Builder.CreateNSWMul(LHS: M, RHS: A);
856 return Builder.CreateNSWAdd(LHS: Mul, RHS: B);
857 }
858 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
859 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
860 return Builder.CreateIntrinsic(
861 /*ReturnType=*/RetTy: M->getType(), ID: Intrinsic::dx_umad,
862 Args: ArrayRef<Value *>{M, A, B}, FMFSource: nullptr, Name: "dx.umad");
863
864 Value *Mul = Builder.CreateNUWMul(LHS: M, RHS: A);
865 return Builder.CreateNUWAdd(LHS: Mul, RHS: B);
866 }
867 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
868 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
869 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
870 llvm_unreachable("rcp operand must have a float representation");
871 llvm::Type *Ty = Op0->getType();
872 llvm::Type *EltTy = Ty->getScalarType();
873 Constant *One = Ty->isVectorTy()
874 ? ConstantVector::getSplat(
875 EC: ElementCount::getFixed(
876 MinVal: cast<FixedVectorType>(Val: Ty)->getNumElements()),
877 Elt: ConstantFP::get(Ty: EltTy, V: 1.0))
878 : ConstantFP::get(Ty: EltTy, V: 1.0);
879 return Builder.CreateFDiv(L: One, R: Op0, Name: "hlsl.rcp");
880 }
881 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
882 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
883 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
884 llvm_unreachable("rsqrt operand must have a float representation");
885 return Builder.CreateIntrinsic(
886 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getRsqrtIntrinsic(),
887 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.rsqrt");
888 }
889 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
890 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
891 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
892 "saturate operand must have a float representation");
893 return Builder.CreateIntrinsic(
894 /*ReturnType=*/RetTy: Op0->getType(),
895 ID: CGM.getHLSLRuntime().getSaturateIntrinsic(), Args: ArrayRef<Value *>{Op0},
896 FMFSource: nullptr, Name: "hlsl.saturate");
897 }
898 case Builtin::BI__builtin_hlsl_wave_prefix_count_bits: {
899 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
900 assert(Op->getType()->isIntegerTy(1) &&
901 "WavePrefixBitCount operand must be a boolean type");
902
903 Intrinsic::ID IID =
904 getPrefixCountBitsIntrinsic(Arch: getTarget().getTriple().getArch());
905
906 return EmitRuntimeCall(
907 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: IID), args: ArrayRef{Op},
908 name: "hlsl.wave.prefix.bit.count");
909 }
910 case Builtin::BI__builtin_hlsl_select: {
911 Value *OpCond = EmitScalarExpr(E: E->getArg(Arg: 0));
912 RValue RValTrue = EmitAnyExpr(E: E->getArg(Arg: 1));
913 Value *OpTrue =
914 RValTrue.isScalar()
915 ? RValTrue.getScalarVal()
916 : Builder.CreateLoad(Addr: RValTrue.getAggregateAddress(), Name: "true_val");
917 RValue RValFalse = EmitAnyExpr(E: E->getArg(Arg: 2));
918 Value *OpFalse =
919 RValFalse.isScalar()
920 ? RValFalse.getScalarVal()
921 : Builder.CreateLoad(Addr: RValFalse.getAggregateAddress(), Name: "false_val");
922 if (auto *VTy = E->getType()->getAs<VectorType>()) {
923 if (!OpTrue->getType()->isVectorTy())
924 OpTrue =
925 Builder.CreateVectorSplat(NumElts: VTy->getNumElements(), V: OpTrue, Name: "splat");
926 if (!OpFalse->getType()->isVectorTy())
927 OpFalse =
928 Builder.CreateVectorSplat(NumElts: VTy->getNumElements(), V: OpFalse, Name: "splat");
929 }
930
931 Value *SelectVal =
932 Builder.CreateSelect(C: OpCond, True: OpTrue, False: OpFalse, Name: "hlsl.select");
933 if (!RValTrue.isScalar())
934 Builder.CreateStore(Val: SelectVal, Addr: ReturnValue.getAddress(),
935 IsVolatile: ReturnValue.isVolatile());
936
937 return SelectVal;
938 }
939 case Builtin::BI__builtin_hlsl_step: {
940 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
941 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
942 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
943 E->getArg(1)->getType()->hasFloatingRepresentation() &&
944 "step operands must have a float representation");
945 return Builder.CreateIntrinsic(
946 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getStepIntrinsic(),
947 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.step");
948 }
949 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
950 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
951 assert(Op->getType()->isIntegerTy(1) &&
952 "Intrinsic WaveActiveAllTrue operand must be a bool");
953
954 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
955 return EmitRuntimeCall(
956 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID), args: {Op});
957 }
958 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
959 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
960 assert(Op->getType()->isIntegerTy(1) &&
961 "Intrinsic WaveActiveAnyTrue operand must be a bool");
962
963 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
964 return EmitRuntimeCall(
965 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID), args: {Op});
966 }
967 case Builtin::BI__builtin_hlsl_wave_active_ballot: {
968 [[maybe_unused]] Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
969 assert(Op->getType()->isIntegerTy(1) &&
970 "Intrinsic WaveActiveBallot operand must be a bool");
971
972 return handleHlslWaveActiveBallot(CGF&: *this, E);
973 }
974 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
975 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
976 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
977 return EmitRuntimeCall(
978 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID),
979 args: ArrayRef{OpExpr});
980 }
981 case Builtin::BI__builtin_hlsl_wave_active_sum: {
982 // Due to the use of variadic arguments, explicitly retreive argument
983 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
984 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
985 Arch: getTarget().getTriple().getArch(), RT&: CGM.getHLSLRuntime(),
986 QT: E->getArg(Arg: 0)->getType());
987
988 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
989 M: &CGM.getModule(), id: IID, Tys: {OpExpr->getType()}),
990 args: ArrayRef{OpExpr}, name: "hlsl.wave.active.sum");
991 }
992 case Builtin::BI__builtin_hlsl_wave_active_max: {
993 // Due to the use of variadic arguments, explicitly retreive argument
994 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
995 QualType QT = E->getArg(Arg: 0)->getType();
996 Intrinsic::ID IID;
997 if (QT->isUnsignedIntegerType())
998 IID = CGM.getHLSLRuntime().getWaveActiveUMaxIntrinsic();
999 else
1000 IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
1001
1002 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
1003 M: &CGM.getModule(), id: IID, Tys: {OpExpr->getType()}),
1004 args: ArrayRef{OpExpr}, name: "hlsl.wave.active.max");
1005 }
1006 case Builtin::BI__builtin_hlsl_wave_active_min: {
1007 // Due to the use of variadic arguments, explicitly retreive argument
1008 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1009 QualType QT = E->getArg(Arg: 0)->getType();
1010 Intrinsic::ID IID;
1011 if (QT->isUnsignedIntegerType())
1012 IID = CGM.getHLSLRuntime().getWaveActiveUMinIntrinsic();
1013 else
1014 IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
1015
1016 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
1017 M: &CGM.getModule(), id: IID, Tys: {OpExpr->getType()}),
1018 args: ArrayRef{OpExpr}, name: "hlsl.wave.active.min");
1019 }
1020 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
1021 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
1022 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
1023 // for the DirectX intrinsic and the demangled builtin name
1024 switch (CGM.getTarget().getTriple().getArch()) {
1025 case llvm::Triple::dxil:
1026 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
1027 M: &CGM.getModule(), id: Intrinsic::dx_wave_getlaneindex));
1028 case llvm::Triple::spirv:
1029 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
1030 Ty: llvm::FunctionType::get(Result: IntTy, Params: {}, isVarArg: false),
1031 Name: "__hlsl_wave_get_lane_index", ExtraAttrs: {}, Local: false, AssumeConvergent: true));
1032 default:
1033 llvm_unreachable(
1034 "Intrinsic WaveGetLaneIndex not supported by target architecture");
1035 }
1036 }
1037 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
1038 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
1039 return EmitRuntimeCall(
1040 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
1041 }
1042 case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
1043 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
1044 return EmitRuntimeCall(
1045 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
1046 }
1047 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
1048 // Due to the use of variadic arguments we must explicitly retreive them and
1049 // create our function type.
1050 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1051 Value *OpIndex = EmitScalarExpr(E: E->getArg(Arg: 1));
1052 return EmitRuntimeCall(
1053 callee: Intrinsic::getOrInsertDeclaration(
1054 M: &CGM.getModule(), id: CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
1055 Tys: {OpExpr->getType()}),
1056 args: ArrayRef{OpExpr, OpIndex}, name: "hlsl.wave.readlane");
1057 }
1058 case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
1059 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
1060 Intrinsic::ID IID = getWavePrefixSumIntrinsic(
1061 Arch: getTarget().getTriple().getArch(), RT&: CGM.getHLSLRuntime(),
1062 QT: E->getArg(Arg: 0)->getType());
1063 return EmitRuntimeCall(callee: Intrinsic::getOrInsertDeclaration(
1064 M: &CGM.getModule(), id: IID, Tys: {OpExpr->getType()}),
1065 args: ArrayRef{OpExpr}, name: "hlsl.wave.prefix.sum");
1066 }
1067 case Builtin::BI__builtin_hlsl_elementwise_sign: {
1068 auto *Arg0 = E->getArg(Arg: 0);
1069 Value *Op0 = EmitScalarExpr(E: Arg0);
1070 llvm::Type *Xty = Op0->getType();
1071 llvm::Type *retType = llvm::Type::getInt32Ty(C&: this->getLLVMContext());
1072 if (Xty->isVectorTy()) {
1073 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
1074 retType = llvm::VectorType::get(
1075 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
1076 }
1077 assert((Arg0->getType()->hasFloatingRepresentation() ||
1078 Arg0->getType()->hasIntegerRepresentation()) &&
1079 "sign operand must have a float or int representation");
1080
1081 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
1082 Value *Cmp = Builder.CreateICmpEQ(LHS: Op0, RHS: ConstantInt::get(Ty: Xty, V: 0));
1083 return Builder.CreateSelect(C: Cmp, True: ConstantInt::get(Ty: retType, V: 0),
1084 False: ConstantInt::get(Ty: retType, V: 1), Name: "hlsl.sign");
1085 }
1086
1087 return Builder.CreateIntrinsic(
1088 RetTy: retType, ID: CGM.getHLSLRuntime().getSignIntrinsic(),
1089 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.sign");
1090 }
1091 case Builtin::BI__builtin_hlsl_elementwise_radians: {
1092 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1093 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1094 "radians operand must have a float representation");
1095 return Builder.CreateIntrinsic(
1096 /*ReturnType=*/RetTy: Op0->getType(),
1097 ID: CGM.getHLSLRuntime().getRadiansIntrinsic(), Args: ArrayRef<Value *>{Op0},
1098 FMFSource: nullptr, Name: "hlsl.radians");
1099 }
1100 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
1101 Value *ResHandle = EmitScalarExpr(E: E->getArg(Arg: 0));
1102 Value *Offset = EmitScalarExpr(E: E->getArg(Arg: 1));
1103 Value *OffsetI8 = Builder.CreateIntCast(V: Offset, DestTy: Int8Ty, isSigned: true);
1104 return Builder.CreateIntrinsic(
1105 /*ReturnType=*/RetTy: Offset->getType(),
1106 ID: CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
1107 Args: ArrayRef<Value *>{ResHandle, OffsetI8}, FMFSource: nullptr);
1108 }
1109 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
1110
1111 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1112 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
1113 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
1114 "asuint operands types mismatch");
1115 return handleHlslSplitdouble(E, CGF: this);
1116 }
1117 case Builtin::BI__builtin_hlsl_elementwise_clip:
1118 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1119 "clip operands types mismatch");
1120 return handleHlslClip(E, CGF: this);
1121 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
1122 Intrinsic::ID ID =
1123 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
1124 return EmitRuntimeCall(
1125 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
1126 }
1127 case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
1128 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1129 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1130 llvm_unreachable("ddx_coarse operand must have a float representation");
1131 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
1132 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1133 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1134 Name: "hlsl.ddx.coarse");
1135 }
1136 case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
1137 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1138 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1139 llvm_unreachable("ddy_coarse operand must have a float representation");
1140 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
1141 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1142 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1143 Name: "hlsl.ddy.coarse");
1144 }
1145 case Builtin::BI__builtin_hlsl_elementwise_ddx_fine: {
1146 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1147 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1148 llvm_unreachable("ddx_fine operand must have a float representation");
1149 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxFineIntrinsic();
1150 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1151 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1152 Name: "hlsl.ddx.fine");
1153 }
1154 case Builtin::BI__builtin_hlsl_elementwise_ddy_fine: {
1155 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1156 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
1157 llvm_unreachable("ddy_fine operand must have a float representation");
1158 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyFineIntrinsic();
1159 return Builder.CreateIntrinsic(/*ReturnType=*/RetTy: Op0->getType(), ID,
1160 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
1161 Name: "hlsl.ddy.fine");
1162 }
1163 case Builtin::BI__builtin_get_spirv_spec_constant_bool:
1164 case Builtin::BI__builtin_get_spirv_spec_constant_short:
1165 case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
1166 case Builtin::BI__builtin_get_spirv_spec_constant_int:
1167 case Builtin::BI__builtin_get_spirv_spec_constant_uint:
1168 case Builtin::BI__builtin_get_spirv_spec_constant_longlong:
1169 case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong:
1170 case Builtin::BI__builtin_get_spirv_spec_constant_half:
1171 case Builtin::BI__builtin_get_spirv_spec_constant_float:
1172 case Builtin::BI__builtin_get_spirv_spec_constant_double: {
1173 llvm::Function *SpecConstantFn = getSpecConstantFunction(SpecConstantType: E->getType());
1174 llvm::Value *SpecId = EmitScalarExpr(E: E->getArg(Arg: 0));
1175 llvm::Value *DefaultVal = EmitScalarExpr(E: E->getArg(Arg: 1));
1176 llvm::Value *Args[] = {SpecId, DefaultVal};
1177 return Builder.CreateCall(Callee: SpecConstantFn, Args);
1178 }
1179 }
1180 return nullptr;
1181}
1182
1183llvm::Function *clang::CodeGen::CodeGenFunction::getSpecConstantFunction(
1184 const clang::QualType &SpecConstantType) {
1185
1186 // Find or create the declaration for the function.
1187 llvm::Module *M = &CGM.getModule();
1188 std::string MangledName =
1189 getSpecConstantFunctionName(SpecConstantType, Context&: getContext());
1190 llvm::Function *SpecConstantFn = M->getFunction(Name: MangledName);
1191
1192 if (!SpecConstantFn) {
1193 llvm::Type *IntType = ConvertType(T: getContext().IntTy);
1194 llvm::Type *RetTy = ConvertType(T: SpecConstantType);
1195 llvm::Type *ArgTypes[] = {IntType, RetTy};
1196 llvm::FunctionType *FnTy = llvm::FunctionType::get(Result: RetTy, Params: ArgTypes, isVarArg: false);
1197 SpecConstantFn = llvm::Function::Create(
1198 Ty: FnTy, Linkage: llvm::GlobalValue::ExternalLinkage, N: MangledName, M);
1199 }
1200 return SpecConstantFn;
1201}
1202