1//===------- AMDCPU.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CodeGenFunction.h"
15#include "clang/Basic/DiagnosticFrontend.h"
16#include "clang/Basic/SyncScope.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/Analysis/ValueTracking.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/IntrinsicsR600.h"
22#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
23#include "llvm/Support/AMDGPUAddrSpace.h"
24
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29namespace {
30
31// Has second type mangled argument.
32static Value *
33emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
34 Intrinsic::ID IntrinsicID,
35 Intrinsic::ID ConstrainedIntrinsicID) {
36 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
37 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
38
39 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
40 if (CGF.Builder.getIsFPConstrained()) {
41 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID,
42 Tys: {Src0->getType(), Src1->getType()});
43 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: {Src0, Src1});
44 }
45
46 Function *F =
47 CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {Src0->getType(), Src1->getType()});
48 return CGF.Builder.CreateCall(Callee: F, Args: {Src0, Src1});
49}
50
51// If \p E is not null pointer, insert address space cast to match return
52// type of \p E if necessary.
53Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
54 const CallExpr *E = nullptr) {
55 auto *F = CGF.CGM.getIntrinsic(IID: Intrinsic::amdgcn_dispatch_ptr);
56 auto *Call = CGF.Builder.CreateCall(Callee: F);
57 Call->addRetAttr(
58 Attr: Attribute::getWithDereferenceableBytes(Context&: Call->getContext(), Bytes: 64));
59 Call->addRetAttr(Attr: Attribute::getWithAlignment(Context&: Call->getContext(), Alignment: Align(4)));
60 if (!E)
61 return Call;
62 QualType BuiltinRetType = E->getType();
63 auto *RetTy = cast<llvm::PointerType>(Val: CGF.ConvertType(T: BuiltinRetType));
64 if (RetTy == Call->getType())
65 return Call;
66 return CGF.Builder.CreateAddrSpaceCast(V: Call, DestTy: RetTy);
67}
68
69Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
70 auto *F = CGF.CGM.getIntrinsic(IID: Intrinsic::amdgcn_implicitarg_ptr);
71 auto *Call = CGF.Builder.CreateCall(Callee: F);
72 Call->addRetAttr(
73 Attr: Attribute::getWithDereferenceableBytes(Context&: Call->getContext(), Bytes: 256));
74 Call->addRetAttr(Attr: Attribute::getWithAlignment(Context&: Call->getContext(), Alignment: Align(8)));
75 return Call;
76}
77
78// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
79/// Emit code based on Code Object ABI version.
80/// COV_4 : Emit code to use dispatch ptr
81/// COV_5+ : Emit code to use implicitarg ptr
82/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
83/// and use its value for COV_4 or COV_5+ approach. It is used for
84/// compiling device libraries in an ABI-agnostic way.
85Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
86 llvm::LoadInst *LD;
87
88 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
89
90 if (Cov == CodeObjectVersionKind::COV_None) {
91 StringRef Name = "__oclc_ABI_version";
92 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
93 if (!ABIVersionC)
94 ABIVersionC = new llvm::GlobalVariable(
95 CGF.CGM.getModule(), CGF.Int32Ty, false,
96 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
97 llvm::GlobalVariable::NotThreadLocal,
98 CGF.CGM.getContext().getTargetAddressSpace(AS: LangAS::opencl_constant));
99
100 // This load will be eliminated by the IPSCCP because it is constant
101 // weak_odr without externally_initialized. Either changing it to weak or
102 // adding externally_initialized will keep the load.
103 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(Ty: CGF.Int32Ty, Addr: ABIVersionC,
104 Align: CGF.CGM.getIntAlign());
105
106 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
107 LHS: ABIVersion,
108 RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: CodeObjectVersionKind::COV_5));
109
110 // Indexing the implicit kernarg segment.
111 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
112 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUImplicitArgPtr(CGF), Idx0: 12 + Index * 2);
113
114 // Indexing the HSA kernel_dispatch_packet struct.
115 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
116 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUDispatchPtr(CGF), Idx0: 4 + Index * 2);
117
118 auto Result = CGF.Builder.CreateSelect(C: IsCOV5, True: ImplicitGEP, False: DispatchGEP);
119 LD = CGF.Builder.CreateLoad(
120 Addr: Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(Quantity: 2)));
121 } else {
122 Value *GEP = nullptr;
123 if (Cov >= CodeObjectVersionKind::COV_5) {
124 // Indexing the implicit kernarg segment.
125 GEP = CGF.Builder.CreateConstGEP1_32(
126 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUImplicitArgPtr(CGF), Idx0: 12 + Index * 2);
127 } else {
128 // Indexing the HSA kernel_dispatch_packet struct.
129 GEP = CGF.Builder.CreateConstGEP1_32(
130 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUDispatchPtr(CGF), Idx0: 4 + Index * 2);
131 }
132 LD = CGF.Builder.CreateLoad(
133 Addr: Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(Quantity: 2)));
134 }
135
136 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
137 llvm::MDNode *RNode = MDHelper.createRange(Lo: APInt(16, 1),
138 Hi: APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
139 LD->setMetadata(KindID: llvm::LLVMContext::MD_range, Node: RNode);
140 LD->setMetadata(KindID: llvm::LLVMContext::MD_noundef,
141 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: {}));
142 LD->setMetadata(KindID: llvm::LLVMContext::MD_invariant_load,
143 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: {}));
144 return LD;
145}
146
147// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
148Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
149 const unsigned XOffset = 12;
150 auto *DP = EmitAMDGPUDispatchPtr(CGF);
151 // Indexing the HSA kernel_dispatch_packet struct.
152 auto *Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: XOffset + Index * 4);
153 auto *GEP = CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: DP, IdxList: Offset);
154 auto *LD = CGF.Builder.CreateLoad(
155 Addr: Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(Quantity: 4)));
156
157 llvm::MDBuilder MDB(CGF.getLLVMContext());
158
159 // Known non-zero.
160 LD->setMetadata(KindID: llvm::LLVMContext::MD_range,
161 Node: MDB.createRange(Lo: APInt(32, 1), Hi: APInt::getZero(numBits: 32)));
162 LD->setMetadata(KindID: llvm::LLVMContext::MD_invariant_load,
163 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: {}));
164 return LD;
165}
166} // namespace
167
168// Generates the IR for __builtin_read_exec_*.
169// Lowers the builtin to amdgcn_ballot intrinsic.
170static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
171 llvm::Type *RegisterType,
172 llvm::Type *ValueType, bool isExecHi) {
173 CodeGen::CGBuilderTy &Builder = CGF.Builder;
174 CodeGen::CodeGenModule &CGM = CGF.CGM;
175
176 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_ballot, Tys: {RegisterType});
177 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: {Builder.getInt1(V: true)});
178
179 if (isExecHi) {
180 Value *Rt2 = Builder.CreateLShr(LHS: Call, RHS: 32);
181 Rt2 = Builder.CreateTrunc(V: Rt2, DestTy: CGF.Int32Ty);
182 return Rt2;
183 }
184
185 return Call;
186}
187
188static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
189 llvm::Value *RsrcPtr) {
190 auto &B = CGF.Builder;
191 auto *VecTy = llvm::FixedVectorType::get(ElementType: B.getInt32Ty(), NumElts: 8);
192
193 if (RsrcPtr->getType() == VecTy)
194 return RsrcPtr;
195
196 if (RsrcPtr->getType()->isIntegerTy(Bitwidth: 32)) {
197 llvm::PointerType *VecPtrTy =
198 llvm::PointerType::get(C&: CGF.getLLVMContext(), AddressSpace: 8);
199 llvm::Value *Ptr = B.CreateIntToPtr(V: RsrcPtr, DestTy: VecPtrTy, Name: "tex.rsrc.from.int");
200 return B.CreateAlignedLoad(Ty: VecTy, Ptr, Align: llvm::Align(32), Name: "tex.rsrc.val");
201 }
202
203 if (RsrcPtr->getType()->isPointerTy()) {
204 auto *VecPtrTy = llvm::PointerType::get(
205 C&: CGF.getLLVMContext(), AddressSpace: RsrcPtr->getType()->getPointerAddressSpace());
206 llvm::Value *Typed = B.CreateBitCast(V: RsrcPtr, DestTy: VecPtrTy, Name: "tex.rsrc.typed");
207 return B.CreateAlignedLoad(Ty: VecTy, Ptr: Typed, Align: llvm::Align(32), Name: "tex.rsrc.val");
208 }
209
210 const auto &DL = CGF.CGM.getDataLayout();
211 if (DL.getTypeSizeInBits(Ty: RsrcPtr->getType()) == 256)
212 return B.CreateBitCast(V: RsrcPtr, DestTy: VecTy, Name: "tex.rsrc.val");
213
214 llvm::report_fatal_error(reason: "Unexpected texture resource argument form");
215}
216
217llvm::CallInst *
218emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
219 const clang::CallExpr *E,
220 unsigned IntrinsicID, bool IsImageStore) {
221 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
222 QualType TexQT = CGF.getContext().AMDGPUTextureTy;
223 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
224 QualType ArgTy = E->getArg(Arg: I)->getType();
225 if (ArgTy == TexQT) {
226 return I;
227 }
228
229 if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
230 return I;
231 }
232 }
233
234 return ~0U;
235 };
236
237 clang::SmallVector<llvm::Value *, 10> Args;
238 unsigned RsrcIndex = findTextureDescIndex(E);
239
240 if (RsrcIndex == ~0U) {
241 llvm::report_fatal_error(reason: "Invalid argument count for image builtin");
242 }
243
244 for (unsigned I = 0; I < E->getNumArgs(); ++I) {
245 llvm::Value *V = CGF.EmitScalarExpr(E: E->getArg(Arg: I));
246 if (I == RsrcIndex)
247 V = loadTextureDescPtorAsVec8I32(CGF, RsrcPtr: V);
248 Args.push_back(Elt: V);
249 }
250
251 llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(T: E->getType());
252 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, ID: IntrinsicID, Args);
253 return Call;
254}
255
256// Emit an intrinsic that has 1 float or double operand, and 1 integer.
257static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
258 const CallExpr *E,
259 unsigned IntrinsicID) {
260 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
261 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
262
263 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
264 return CGF.Builder.CreateCall(Callee: F, Args: {Src0, Src1});
265}
266
267static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
268 if (AMDGCNScope == "agent")
269 return "device";
270 if (AMDGCNScope == "wavefront")
271 return "subgroup";
272 return AMDGCNScope;
273}
274
275// For processing memory ordering and memory scope arguments of various
276// amdgcn builtins.
277// \p Order takes a C++11 compatible memory-ordering specifier and converts
278// it into LLVM's memory ordering specifier using atomic C ABI, and writes
279// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
280// specific SyncScopeID and writes it to \p SSID.
281void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
282 llvm::AtomicOrdering &AO,
283 llvm::SyncScope::ID &SSID) {
284 int ord = cast<llvm::ConstantInt>(Val: Order)->getZExtValue();
285
286 // Map C11/C++11 memory ordering to LLVM memory ordering
287 assert(llvm::isValidAtomicOrderingCABI(ord));
288 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
289 case llvm::AtomicOrderingCABI::acquire:
290 case llvm::AtomicOrderingCABI::consume:
291 AO = llvm::AtomicOrdering::Acquire;
292 break;
293 case llvm::AtomicOrderingCABI::release:
294 AO = llvm::AtomicOrdering::Release;
295 break;
296 case llvm::AtomicOrderingCABI::acq_rel:
297 AO = llvm::AtomicOrdering::AcquireRelease;
298 break;
299 case llvm::AtomicOrderingCABI::seq_cst:
300 AO = llvm::AtomicOrdering::SequentiallyConsistent;
301 break;
302 case llvm::AtomicOrderingCABI::relaxed:
303 AO = llvm::AtomicOrdering::Monotonic;
304 break;
305 }
306
307 // Some of the atomic builtins take the scope as a string name.
308 StringRef scp;
309 if (llvm::getConstantStringInfo(V: Scope, Str&: scp)) {
310 if (getTarget().getTriple().isSPIRV())
311 scp = mapScopeToSPIRV(AMDGCNScope: scp);
312 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN: scp);
313 return;
314 }
315
316 // Older builtins had an enum argument for the memory scope.
317 const char *SSN = nullptr;
318 int scope = cast<llvm::ConstantInt>(Val: Scope)->getZExtValue();
319 switch (scope) {
320 case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
321 SSID = llvm::SyncScope::System;
322 break;
323 case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
324 SSN = getTarget().getTriple().isSPIRV() ? "device" : "agent";
325 break;
326 case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
327 SSN = "workgroup";
328 break;
329 case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
330 SSN = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
331 break;
332 case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
333 SSN = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
334 break;
335 case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
336 SSID = llvm::SyncScope::SingleThread;
337 break;
338 default:
339 SSID = llvm::SyncScope::System;
340 break;
341 }
342 if (SSN)
343 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN);
344}
345
346void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
347 const CallExpr *E) {
348 constexpr const char *Tag = "amdgpu-synchronize-as";
349
350 LLVMContext &Ctx = Inst->getContext();
351 SmallVector<MMRAMetadata::TagT, 3> MMRAs;
352 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
353 llvm::Value *V = EmitScalarExpr(E: E->getArg(Arg: K));
354 StringRef AS;
355 if (llvm::getConstantStringInfo(V, Str&: AS)) {
356 MMRAs.push_back(Elt: {Tag, AS});
357 // TODO: Delete the resulting unused constant?
358 continue;
359 }
360 CGM.Error(loc: E->getExprLoc(),
361 error: "expected an address space name as a string literal");
362 }
363
364 llvm::sort(C&: MMRAs);
365 MMRAs.erase(CS: llvm::unique(R&: MMRAs), CE: MMRAs.end());
366 Inst->setMetadata(KindID: LLVMContext::MD_mmra, Node: MMRAMetadata::getMD(Ctx, Tags: MMRAs));
367}
368
369static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
370 switch (BuiltinID) {
371 default:
372 llvm_unreachable("Unknown BuiltinID for wave reduction");
373 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
374 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
375 return Intrinsic::amdgcn_wave_reduce_add;
376 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
377 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
378 return Intrinsic::amdgcn_wave_reduce_fadd;
379 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
380 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
381 return Intrinsic::amdgcn_wave_reduce_sub;
382 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
383 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
384 return Intrinsic::amdgcn_wave_reduce_fsub;
385 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
386 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
387 return Intrinsic::amdgcn_wave_reduce_min;
388 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
389 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
390 return Intrinsic::amdgcn_wave_reduce_fmin;
391 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
392 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
393 return Intrinsic::amdgcn_wave_reduce_umin;
394 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
395 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
396 return Intrinsic::amdgcn_wave_reduce_max;
397 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
398 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
399 return Intrinsic::amdgcn_wave_reduce_fmax;
400 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
401 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
402 return Intrinsic::amdgcn_wave_reduce_umax;
403 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
404 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
405 return Intrinsic::amdgcn_wave_reduce_and;
406 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
407 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
408 return Intrinsic::amdgcn_wave_reduce_or;
409 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
410 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
411 return Intrinsic::amdgcn_wave_reduce_xor;
412 }
413}
414
415Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
416 const CallExpr *E) {
417 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
418 llvm::SyncScope::ID SSID;
419 switch (BuiltinID) {
420 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
421 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
422 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
423 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
424 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
425 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
426 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
427 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
428 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
429 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
430 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
431 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
432 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
438 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
439 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
440 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
441 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
442 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
443 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
444 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
445 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
446 Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
447 llvm::Value *Value = EmitScalarExpr(E: E->getArg(Arg: 0));
448 llvm::Value *Strategy = EmitScalarExpr(E: E->getArg(Arg: 1));
449 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {Value->getType()});
450 return Builder.CreateCall(Callee: F, Args: {Value, Strategy});
451 }
452 case AMDGPU::BI__builtin_amdgcn_div_scale:
453 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
454 // Translate from the intrinsics's struct return to the builtin's out
455 // argument.
456
457 Address FlagOutPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 3));
458
459 llvm::Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
460 llvm::Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
461 llvm::Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
462
463 llvm::Function *Callee = CGM.getIntrinsic(IID: Intrinsic::amdgcn_div_scale,
464 Tys: X->getType());
465
466 llvm::Value *Tmp = Builder.CreateCall(Callee, Args: {X, Y, Z});
467
468 llvm::Value *Result = Builder.CreateExtractValue(Agg: Tmp, Idxs: 0);
469 llvm::Value *Flag = Builder.CreateExtractValue(Agg: Tmp, Idxs: 1);
470
471 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
472
473 llvm::Value *FlagExt = Builder.CreateZExt(V: Flag, DestTy: RealFlagType);
474 Builder.CreateStore(Val: FlagExt, Addr: FlagOutPtr);
475 return Result;
476 }
477 case AMDGPU::BI__builtin_amdgcn_div_fmas:
478 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
479 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
480 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
481 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
482 llvm::Value *Src3 = EmitScalarExpr(E: E->getArg(Arg: 3));
483
484 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_div_fmas,
485 Tys: Src0->getType());
486 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Arg: Src3);
487 return Builder.CreateCall(Callee: F, Args: {Src0, Src1, Src2, Src3ToBool});
488 }
489
490 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
491 return emitBuiltinWithOneOverloadedType<2>(CGF&: *this, E,
492 IntrinsicID: Intrinsic::amdgcn_ds_swizzle);
493 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
494 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
495 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
496 llvm::SmallVector<llvm::Value *, 6> Args;
497 // Find out if any arguments are required to be integer constant
498 // expressions.
499 unsigned ICEArguments = 0;
500 ASTContext::GetBuiltinTypeError Error;
501 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
502 assert(Error == ASTContext::GE_None && "Should not codegen an error");
503 llvm::Type *DataTy = ConvertType(T: E->getArg(Arg: 0)->getType());
504 unsigned Size = DataTy->getPrimitiveSizeInBits();
505 llvm::Type *IntTy =
506 llvm::IntegerType::get(C&: Builder.getContext(), NumBits: std::max(a: Size, b: 32u));
507 Function *F =
508 CGM.getIntrinsic(IID: BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
509 ? Intrinsic::amdgcn_mov_dpp8
510 : Intrinsic::amdgcn_update_dpp,
511 Tys: IntTy);
512 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
513 E->getNumArgs() == 2);
514 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
515 if (InsertOld)
516 Args.push_back(Elt: llvm::PoisonValue::get(T: IntTy));
517 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
518 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, Idx: I, E);
519 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
520 Size < 32) {
521 if (!DataTy->isIntegerTy())
522 V = Builder.CreateBitCast(
523 V, DestTy: llvm::IntegerType::get(C&: Builder.getContext(), NumBits: Size));
524 V = Builder.CreateZExtOrBitCast(V, DestTy: IntTy);
525 }
526 llvm::Type *ExpTy =
527 F->getFunctionType()->getFunctionParamType(i: I + InsertOld);
528 Args.push_back(Elt: Builder.CreateTruncOrBitCast(V, DestTy: ExpTy));
529 }
530 Value *V = Builder.CreateCall(Callee: F, Args);
531 if (Size < 32 && !DataTy->isIntegerTy())
532 V = Builder.CreateTrunc(
533 V, DestTy: llvm::IntegerType::get(C&: Builder.getContext(), NumBits: Size));
534 return Builder.CreateTruncOrBitCast(V, DestTy: DataTy);
535 }
536 case AMDGPU::BI__builtin_amdgcn_permlane16:
537 case AMDGPU::BI__builtin_amdgcn_permlanex16:
538 return emitBuiltinWithOneOverloadedType<6>(
539 CGF&: *this, E,
540 IntrinsicID: BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
541 ? Intrinsic::amdgcn_permlane16
542 : Intrinsic::amdgcn_permlanex16);
543 case AMDGPU::BI__builtin_amdgcn_permlane64:
544 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
545 IntrinsicID: Intrinsic::amdgcn_permlane64);
546 case AMDGPU::BI__builtin_amdgcn_readlane:
547 return emitBuiltinWithOneOverloadedType<2>(CGF&: *this, E,
548 IntrinsicID: Intrinsic::amdgcn_readlane);
549 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
550 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
551 IntrinsicID: Intrinsic::amdgcn_readfirstlane);
552 case AMDGPU::BI__builtin_amdgcn_div_fixup:
553 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
554 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
555 return emitBuiltinWithOneOverloadedType<3>(CGF&: *this, E,
556 IntrinsicID: Intrinsic::amdgcn_div_fixup);
557 case AMDGPU::BI__builtin_amdgcn_trig_preop:
558 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
559 return emitFPIntBuiltin(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_trig_preop);
560 case AMDGPU::BI__builtin_amdgcn_rcp:
561 case AMDGPU::BI__builtin_amdgcn_rcpf:
562 case AMDGPU::BI__builtin_amdgcn_rcph:
563 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
564 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_rcp);
565 case AMDGPU::BI__builtin_amdgcn_sqrt:
566 case AMDGPU::BI__builtin_amdgcn_sqrtf:
567 case AMDGPU::BI__builtin_amdgcn_sqrth:
568 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
569 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
570 IntrinsicID: Intrinsic::amdgcn_sqrt);
571 case AMDGPU::BI__builtin_amdgcn_rsq:
572 case AMDGPU::BI__builtin_amdgcn_rsqf:
573 case AMDGPU::BI__builtin_amdgcn_rsqh:
574 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
575 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_rsq);
576 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
577 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
578 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
579 IntrinsicID: Intrinsic::amdgcn_rsq_clamp);
580 case AMDGPU::BI__builtin_amdgcn_sinf:
581 case AMDGPU::BI__builtin_amdgcn_sinh:
582 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
583 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_sin);
584 case AMDGPU::BI__builtin_amdgcn_cosf:
585 case AMDGPU::BI__builtin_amdgcn_cosh:
586 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
587 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_cos);
588 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
589 return EmitAMDGPUDispatchPtr(CGF&: *this, E);
590 case AMDGPU::BI__builtin_amdgcn_logf:
591 case AMDGPU::BI__builtin_amdgcn_log_bf16:
592 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_log);
593 case AMDGPU::BI__builtin_amdgcn_exp2f:
594 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
595 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
596 IntrinsicID: Intrinsic::amdgcn_exp2);
597 case AMDGPU::BI__builtin_amdgcn_log_clampf:
598 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
599 IntrinsicID: Intrinsic::amdgcn_log_clamp);
600 case AMDGPU::BI__builtin_amdgcn_ldexp:
601 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
602 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
603 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
604 llvm::Function *F =
605 CGM.getIntrinsic(IID: Intrinsic::ldexp, Tys: {Src0->getType(), Src1->getType()});
606 return Builder.CreateCall(Callee: F, Args: {Src0, Src1});
607 }
608 case AMDGPU::BI__builtin_amdgcn_ldexph: {
609 // The raw instruction has a different behavior for out of bounds exponent
610 // values (implicit truncation instead of saturate to short_min/short_max).
611 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
612 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
613 llvm::Function *F =
614 CGM.getIntrinsic(IID: Intrinsic::ldexp, Tys: {Src0->getType(), Int16Ty});
615 return Builder.CreateCall(Callee: F, Args: {Src0, Builder.CreateTrunc(V: Src1, DestTy: Int16Ty)});
616 }
617 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
618 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
619 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
620 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
621 IntrinsicID: Intrinsic::amdgcn_frexp_mant);
622 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
623 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
624 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
625 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_frexp_exp,
626 Tys: { Builder.getInt32Ty(), Src0->getType() });
627 return Builder.CreateCall(Callee: F, Args: Src0);
628 }
629 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
630 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
631 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_frexp_exp,
632 Tys: { Builder.getInt16Ty(), Src0->getType() });
633 return Builder.CreateCall(Callee: F, Args: Src0);
634 }
635 case AMDGPU::BI__builtin_amdgcn_fract:
636 case AMDGPU::BI__builtin_amdgcn_fractf:
637 case AMDGPU::BI__builtin_amdgcn_fracth:
638 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
639 IntrinsicID: Intrinsic::amdgcn_fract);
640 case AMDGPU::BI__builtin_amdgcn_lerp:
641 return emitBuiltinWithOneOverloadedType<3>(CGF&: *this, E,
642 IntrinsicID: Intrinsic::amdgcn_lerp);
643 case AMDGPU::BI__builtin_amdgcn_ubfe:
644 return emitBuiltinWithOneOverloadedType<3>(CGF&: *this, E,
645 IntrinsicID: Intrinsic::amdgcn_ubfe);
646 case AMDGPU::BI__builtin_amdgcn_sbfe:
647 return emitBuiltinWithOneOverloadedType<3>(CGF&: *this, E,
648 IntrinsicID: Intrinsic::amdgcn_sbfe);
649 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
650 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
651 llvm::Type *ResultType = ConvertType(T: E->getType());
652 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
653 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_ballot, Tys: {ResultType});
654 return Builder.CreateCall(Callee: F, Args: {Src});
655 }
656 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
657 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
658 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
659 Function *F =
660 CGM.getIntrinsic(IID: Intrinsic::amdgcn_inverse_ballot, Tys: {Src->getType()});
661 return Builder.CreateCall(Callee: F, Args: {Src});
662 }
663 case AMDGPU::BI__builtin_amdgcn_tanhf:
664 case AMDGPU::BI__builtin_amdgcn_tanhh:
665 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
666 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
667 IntrinsicID: Intrinsic::amdgcn_tanh);
668 case AMDGPU::BI__builtin_amdgcn_uicmp:
669 case AMDGPU::BI__builtin_amdgcn_uicmpl:
670 case AMDGPU::BI__builtin_amdgcn_sicmp:
671 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
672 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
673 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
674 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
675
676 // FIXME-GFX10: How should 32 bit mask be handled?
677 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_icmp,
678 Tys: { Builder.getInt64Ty(), Src0->getType() });
679 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
680 }
681 case AMDGPU::BI__builtin_amdgcn_fcmp:
682 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
683 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
684 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
685 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
686
687 // FIXME-GFX10: How should 32 bit mask be handled?
688 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_fcmp,
689 Tys: { Builder.getInt64Ty(), Src0->getType() });
690 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
691 }
692 case AMDGPU::BI__builtin_amdgcn_class:
693 case AMDGPU::BI__builtin_amdgcn_classf:
694 case AMDGPU::BI__builtin_amdgcn_classh:
695 return emitFPIntBuiltin(CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_class);
696 case AMDGPU::BI__builtin_amdgcn_fmed3f:
697 case AMDGPU::BI__builtin_amdgcn_fmed3h:
698 return emitBuiltinWithOneOverloadedType<3>(CGF&: *this, E,
699 IntrinsicID: Intrinsic::amdgcn_fmed3);
700 case AMDGPU::BI__builtin_amdgcn_ds_append:
701 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
702 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
703 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
704 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
705 Function *F = CGM.getIntrinsic(IID: Intrin, Tys: { Src0->getType() });
706 return Builder.CreateCall(Callee: F, Args: { Src0, Builder.getFalse() });
707 }
708 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
709 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
710 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
711 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
712 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
713 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
714 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
715 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
716 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
717 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
718 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
719 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
720 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
721 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
722 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
723 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
724 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
725 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
726 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
727 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
728 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
729 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
730 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
731 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
732 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
733 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
734 Intrinsic::ID IID;
735 switch (BuiltinID) {
736 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
737 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
738 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
739 IID = Intrinsic::amdgcn_global_load_tr_b64;
740 break;
741 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
742 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
743 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
744 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
745 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
746 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
747 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
748 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
749 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
750 IID = Intrinsic::amdgcn_global_load_tr_b128;
751 break;
752 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
753 IID = Intrinsic::amdgcn_global_load_tr4_b64;
754 break;
755 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
756 IID = Intrinsic::amdgcn_global_load_tr6_b96;
757 break;
758 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
759 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
760 break;
761 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
762 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
763 break;
764 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
765 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
766 break;
767 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
768 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
769 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
770 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
771 break;
772 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
773 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
774 break;
775 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
776 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
777 break;
778 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
779 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
780 break;
781 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
782 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
783 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
784 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
785 break;
786 }
787 llvm::Type *LoadTy = ConvertType(T: E->getType());
788 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
789 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {LoadTy});
790 return Builder.CreateCall(Callee: F, Args: {Addr});
791 }
792 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
793 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
794 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
795 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
796 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
797 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
798
799 Intrinsic::ID IID;
800 switch (BuiltinID) {
801 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
802 IID = Intrinsic::amdgcn_global_load_monitor_b32;
803 break;
804 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
805 IID = Intrinsic::amdgcn_global_load_monitor_b64;
806 break;
807 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
808 IID = Intrinsic::amdgcn_global_load_monitor_b128;
809 break;
810 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
811 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
812 break;
813 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
814 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
815 break;
816 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
817 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
818 break;
819 }
820
821 llvm::Type *LoadTy = ConvertType(T: E->getType());
822 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
823 llvm::Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
824 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {LoadTy});
825 return Builder.CreateCall(Callee: F, Args: {Addr, Val});
826 }
827 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
828 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
829 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
830 Intrinsic::ID IID;
831 switch (BuiltinID) {
832 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
833 IID = Intrinsic::amdgcn_cluster_load_b32;
834 break;
835 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
836 IID = Intrinsic::amdgcn_cluster_load_b64;
837 break;
838 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
839 IID = Intrinsic::amdgcn_cluster_load_b128;
840 break;
841 }
842 SmallVector<Value *, 3> Args;
843 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
844 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
845 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {ConvertType(T: E->getType())});
846 return Builder.CreateCall(Callee: F, Args: {Args});
847 }
848 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
849 // Should this have asan instrumentation?
850 return emitBuiltinWithOneOverloadedType<5>(CGF&: *this, E,
851 IntrinsicID: Intrinsic::amdgcn_load_to_lds);
852 }
853 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
854 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
855 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
856 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
857 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
858 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
859 Intrinsic::ID IID;
860 switch (BuiltinID) {
861 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
862 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
863 break;
864 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
865 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
866 break;
867 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
868 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
869 break;
870 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
871 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
872 break;
873 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
874 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
875 break;
876 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
877 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
878 break;
879 }
880
881 LLVMContext &Ctx = CGM.getLLVMContext();
882 SmallVector<Value *, 5> Args;
883 // last argument is a MD string
884 const unsigned ScopeArg = E->getNumArgs() - 1;
885 for (unsigned i = 0; i != ScopeArg; ++i)
886 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
887 StringRef Arg = cast<StringLiteral>(Val: E->getArg(Arg: ScopeArg)->IgnoreParenCasts())
888 ->getString();
889 llvm::MDNode *MD = llvm::MDNode::get(Context&: Ctx, MDs: {llvm::MDString::get(Context&: Ctx, Str: Arg)});
890 Args.push_back(Elt: llvm::MetadataAsValue::get(Context&: Ctx, MD));
891 // Intrinsic is typed based on the pointer AS. Pointer is always the first
892 // argument.
893 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {Args[0]->getType()});
894 return Builder.CreateCall(Callee: F, Args: {Args});
895 }
896 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
897 Function *F = CGM.getIntrinsic(IID: Intrinsic::get_fpenv,
898 Tys: {llvm::Type::getInt64Ty(C&: getLLVMContext())});
899 return Builder.CreateCall(Callee: F);
900 }
901 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
902 Function *F = CGM.getIntrinsic(IID: Intrinsic::set_fpenv,
903 Tys: {llvm::Type::getInt64Ty(C&: getLLVMContext())});
904 llvm::Value *Env = EmitScalarExpr(E: E->getArg(Arg: 0));
905 return Builder.CreateCall(Callee: F, Args: {Env});
906 }
907 case AMDGPU::BI__builtin_amdgcn_read_exec:
908 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int64Ty, ValueType: Int64Ty, isExecHi: false);
909 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
910 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int32Ty, ValueType: Int32Ty, isExecHi: false);
911 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
912 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int64Ty, ValueType: Int64Ty, isExecHi: true);
913 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
914 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
915 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
916 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
917 llvm::Value *NodePtr = EmitScalarExpr(E: E->getArg(Arg: 0));
918 llvm::Value *RayExtent = EmitScalarExpr(E: E->getArg(Arg: 1));
919 llvm::Value *RayOrigin = EmitScalarExpr(E: E->getArg(Arg: 2));
920 llvm::Value *RayDir = EmitScalarExpr(E: E->getArg(Arg: 3));
921 llvm::Value *RayInverseDir = EmitScalarExpr(E: E->getArg(Arg: 4));
922 llvm::Value *TextureDescr = EmitScalarExpr(E: E->getArg(Arg: 5));
923
924 // The builtins take these arguments as vec4 where the last element is
925 // ignored. The intrinsic takes them as vec3.
926 RayOrigin = Builder.CreateShuffleVector(V1: RayOrigin, V2: RayOrigin,
927 Mask: {0, 1, 2});
928 RayDir =
929 Builder.CreateShuffleVector(V1: RayDir, V2: RayDir, Mask: {0, 1, 2});
930 RayInverseDir = Builder.CreateShuffleVector(V1: RayInverseDir, V2: RayInverseDir,
931 Mask: {0, 1, 2});
932
933 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_image_bvh_intersect_ray,
934 Tys: {NodePtr->getType(), RayDir->getType()});
935 return Builder.CreateCall(Callee: F, Args: {NodePtr, RayExtent, RayOrigin, RayDir,
936 RayInverseDir, TextureDescr});
937 }
938 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
939 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
940 Intrinsic::ID IID;
941 switch (BuiltinID) {
942 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
943 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
944 break;
945 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
946 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
947 break;
948 }
949 llvm::Value *NodePtr = EmitScalarExpr(E: E->getArg(Arg: 0));
950 llvm::Value *RayExtent = EmitScalarExpr(E: E->getArg(Arg: 1));
951 llvm::Value *InstanceMask = EmitScalarExpr(E: E->getArg(Arg: 2));
952 llvm::Value *RayOrigin = EmitScalarExpr(E: E->getArg(Arg: 3));
953 llvm::Value *RayDir = EmitScalarExpr(E: E->getArg(Arg: 4));
954 llvm::Value *Offset = EmitScalarExpr(E: E->getArg(Arg: 5));
955 llvm::Value *TextureDescr = EmitScalarExpr(E: E->getArg(Arg: 6));
956
957 Address RetRayOriginPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 7));
958 Address RetRayDirPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 8));
959
960 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
961
962 llvm::CallInst *CI = Builder.CreateCall(
963 Callee: IntrinsicFunc, Args: {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
964 Offset, TextureDescr});
965
966 llvm::Value *RetVData = Builder.CreateExtractValue(Agg: CI, Idxs: 0);
967 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(Agg: CI, Idxs: 1);
968 llvm::Value *RetRayDir = Builder.CreateExtractValue(Agg: CI, Idxs: 2);
969
970 Builder.CreateStore(Val: RetRayOrigin, Addr: RetRayOriginPtr);
971 Builder.CreateStore(Val: RetRayDir, Addr: RetRayDirPtr);
972
973 return RetVData;
974 }
975
976 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
977 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
978 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
979 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
980 Intrinsic::ID IID;
981 switch (BuiltinID) {
982 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
983 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
984 break;
985 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
986 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
987 break;
988 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
989 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
990 break;
991 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
992 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
993 break;
994 }
995
996 SmallVector<Value *, 4> Args;
997 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
998 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
999
1000 Function *F = CGM.getIntrinsic(IID);
1001 Value *Call = Builder.CreateCall(Callee: F, Args);
1002 Value *Rtn = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
1003 Value *A = Builder.CreateExtractValue(Agg: Call, Idxs: 1);
1004 llvm::Type *RetTy = ConvertType(T: E->getType());
1005 Value *I0 = Builder.CreateInsertElement(Vec: PoisonValue::get(T: RetTy), NewElt: Rtn,
1006 Idx: (uint64_t)0);
1007 // ds_bvh_stack_push8_pop2_rtn returns {i64, i32} but the builtin returns
1008 // <2 x i64>, zext the second value.
1009 if (A->getType()->getPrimitiveSizeInBits() <
1010 RetTy->getScalarType()->getPrimitiveSizeInBits())
1011 A = Builder.CreateZExt(V: A, DestTy: RetTy->getScalarType());
1012
1013 return Builder.CreateInsertElement(Vec: I0, NewElt: A, Idx: 1);
1014 }
1015 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1016 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1017 return emitAMDGCNImageOverloadedReturnType(
1018 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_1d, IsImageStore: false);
1019 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1020 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1021 return emitAMDGCNImageOverloadedReturnType(
1022 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_1darray, IsImageStore: false);
1023 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1024 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1025 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1026 return emitAMDGCNImageOverloadedReturnType(
1027 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_2d, IsImageStore: false);
1028 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1029 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1030 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1031 return emitAMDGCNImageOverloadedReturnType(
1032 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_2darray, IsImageStore: false);
1033 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1034 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1035 return emitAMDGCNImageOverloadedReturnType(
1036 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_3d, IsImageStore: false);
1037 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1038 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1039 return emitAMDGCNImageOverloadedReturnType(
1040 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_cube, IsImageStore: false);
1041 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1042 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1043 return emitAMDGCNImageOverloadedReturnType(
1044 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_1d, IsImageStore: false);
1045 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1046 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1047 return emitAMDGCNImageOverloadedReturnType(
1048 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_1darray, IsImageStore: false);
1049 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1050 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1052 return emitAMDGCNImageOverloadedReturnType(
1053 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_2d, IsImageStore: false);
1054 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1056 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1057 return emitAMDGCNImageOverloadedReturnType(
1058 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_2darray, IsImageStore: false);
1059 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1060 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1061 return emitAMDGCNImageOverloadedReturnType(
1062 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_3d, IsImageStore: false);
1063 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1064 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1065 return emitAMDGCNImageOverloadedReturnType(
1066 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_load_mip_cube, IsImageStore: false);
1067 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1068 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1069 return emitAMDGCNImageOverloadedReturnType(
1070 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_1d, IsImageStore: true);
1071 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1072 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1073 return emitAMDGCNImageOverloadedReturnType(
1074 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_1darray, IsImageStore: true);
1075 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1076 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1077 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1078 return emitAMDGCNImageOverloadedReturnType(
1079 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_2d, IsImageStore: true);
1080 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1081 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1082 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1083 return emitAMDGCNImageOverloadedReturnType(
1084 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_2darray, IsImageStore: true);
1085 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1086 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1087 return emitAMDGCNImageOverloadedReturnType(
1088 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_3d, IsImageStore: true);
1089 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1090 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1091 return emitAMDGCNImageOverloadedReturnType(
1092 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_cube, IsImageStore: true);
1093 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1094 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1095 return emitAMDGCNImageOverloadedReturnType(
1096 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_1d, IsImageStore: true);
1097 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1098 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1099 return emitAMDGCNImageOverloadedReturnType(
1100 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_1darray, IsImageStore: true);
1101 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1102 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1104 return emitAMDGCNImageOverloadedReturnType(
1105 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_2d, IsImageStore: true);
1106 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1108 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1109 return emitAMDGCNImageOverloadedReturnType(
1110 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_2darray, IsImageStore: true);
1111 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1112 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1113 return emitAMDGCNImageOverloadedReturnType(
1114 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_3d, IsImageStore: true);
1115 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1116 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1117 return emitAMDGCNImageOverloadedReturnType(
1118 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_store_mip_cube, IsImageStore: true);
1119 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1120 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1121 return emitAMDGCNImageOverloadedReturnType(
1122 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_1d, IsImageStore: false);
1123 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1124 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1125 return emitAMDGCNImageOverloadedReturnType(
1126 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_1darray, IsImageStore: false);
1127 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1128 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1129 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1130 return emitAMDGCNImageOverloadedReturnType(
1131 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_2d, IsImageStore: false);
1132 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1133 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1134 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1135 return emitAMDGCNImageOverloadedReturnType(
1136 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_2darray, IsImageStore: false);
1137 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1138 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1139 return emitAMDGCNImageOverloadedReturnType(
1140 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_3d, IsImageStore: false);
1141 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1142 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1143 return emitAMDGCNImageOverloadedReturnType(
1144 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_cube, IsImageStore: false);
1145 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1146 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1147 return emitAMDGCNImageOverloadedReturnType(
1148 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_1d, IsImageStore: false);
1149 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1150 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1151 return emitAMDGCNImageOverloadedReturnType(
1152 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_1d, IsImageStore: false);
1153 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1154 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1155 return emitAMDGCNImageOverloadedReturnType(
1156 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_d_1d, IsImageStore: false);
1157 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1158 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1159 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1160 return emitAMDGCNImageOverloadedReturnType(
1161 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_2d, IsImageStore: false);
1162 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1163 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1164 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1165 return emitAMDGCNImageOverloadedReturnType(
1166 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_2d, IsImageStore: false);
1167 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1168 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1169 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1170 return emitAMDGCNImageOverloadedReturnType(
1171 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_d_2d, IsImageStore: false);
1172 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1173 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1174 return emitAMDGCNImageOverloadedReturnType(
1175 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_3d, IsImageStore: false);
1176 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1177 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1178 return emitAMDGCNImageOverloadedReturnType(
1179 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_3d, IsImageStore: false);
1180 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1181 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1182 return emitAMDGCNImageOverloadedReturnType(
1183 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_d_3d, IsImageStore: false);
1184 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1185 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1186 return emitAMDGCNImageOverloadedReturnType(
1187 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_cube, IsImageStore: false);
1188 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1189 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1190 return emitAMDGCNImageOverloadedReturnType(
1191 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_cube, IsImageStore: false);
1192 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1193 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1194 return emitAMDGCNImageOverloadedReturnType(
1195 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_1darray, IsImageStore: false);
1196 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1197 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1198 return emitAMDGCNImageOverloadedReturnType(
1199 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_1darray, IsImageStore: false);
1200 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1201 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1202 return emitAMDGCNImageOverloadedReturnType(
1203 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_d_1darray, IsImageStore: false);
1204 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1205 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1206 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1207 return emitAMDGCNImageOverloadedReturnType(
1208 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_lz_2darray, IsImageStore: false);
1209 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1210 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1211 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1212 return emitAMDGCNImageOverloadedReturnType(
1213 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_l_2darray, IsImageStore: false);
1214 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1215 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1216 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1217 return emitAMDGCNImageOverloadedReturnType(
1218 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_sample_d_2darray, IsImageStore: false);
1219 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1220 return emitAMDGCNImageOverloadedReturnType(
1221 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_image_gather4_lz_2d, IsImageStore: false);
1222 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1223 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1224 llvm::FixedVectorType *VT = FixedVectorType::get(ElementType: Builder.getInt32Ty(), NumElts: 8);
1225 Function *F = CGM.getIntrinsic(
1226 IID: BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1227 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1228 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1229 Tys: {VT, VT});
1230
1231 SmallVector<Value *, 9> Args;
1232 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
1233 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: I)));
1234 return Builder.CreateCall(Callee: F, Args);
1235 }
1236 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1237 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1238 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1239 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1240 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1241 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1242 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1243 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1244 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1245 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1246 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1247 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1248 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1249 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1250 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1251 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1252 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1253 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1254 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1255 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1256 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1257 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1258 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1259 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1260 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1261 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1262 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1263 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1264 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1265 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1266 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1267 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1268 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1269 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1270 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1271 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1272 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1273 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1274 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1275 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1276 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1277 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1278 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1279 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1280 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1281 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1282 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1283 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1284 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1285 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1286 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1287 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1288 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1289 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1290 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1291 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1292 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1293 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1294 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1295 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1296 // GFX1250 WMMA builtins
1297 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1298 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1299 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1300 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1301 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1302 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1303 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1304 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1305 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1306 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1307 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1308 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1309 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1310 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1311 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1312 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1313 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1314 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1315 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1316 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1317 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1318 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1319 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1320 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1321 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1322 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1323 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1324 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1325 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1326 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1327 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1328 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1329 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1330 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1331 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1332 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1333 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1334 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1335 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1336 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1337 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1338 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1339 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1340
1341 // These operations perform a matrix multiplication and accumulation of
1342 // the form:
1343 // D = A * B + C
1344 // We need to specify one type for matrices AB and one for matrices CD.
1345 // Sparse matrix operations can have different types for A and B as well as
1346 // an additional type for sparsity index.
1347 // Destination type should be put before types used for source operands.
1348 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
1349 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
1350 // There is no need for the variable opsel argument, so always set it to
1351 // "false".
1352 bool AppendFalseForOpselArg = false;
1353 unsigned BuiltinWMMAOp;
1354 // Need return type when D and C are of different types.
1355 bool NeedReturnType = false;
1356
1357 switch (BuiltinID) {
1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1362 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1363 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1364 break;
1365 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1369 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1370 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1371 break;
1372 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1374 AppendFalseForOpselArg = true;
1375 [[fallthrough]];
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1378 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1379 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1380 break;
1381 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1383 AppendFalseForOpselArg = true;
1384 [[fallthrough]];
1385 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1387 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1388 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1389 break;
1390 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1391 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1392 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1393 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1394 break;
1395 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1396 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1397 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1398 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1399 break;
1400 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1401 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1402 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1403 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1404 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1405 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1406 break;
1407 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1408 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1409 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1411 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1412 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1413 break;
1414 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1416 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1417 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1418 break;
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1421 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1422 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1423 break;
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1426 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1427 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1428 break;
1429 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1431 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1432 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1433 break;
1434 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1436 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1437 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1438 break;
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1441 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1442 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1443 break;
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1446 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1447 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1448 break;
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1450 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1451 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1452 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1453 break;
1454 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1455 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1456 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1457 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1458 break;
1459 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1460 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1461 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1462 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1463 break;
1464 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1465 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1466 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1467 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1468 break;
1469 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1470 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1471 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1472 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1473 break;
1474 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1475 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1476 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1477 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1478 break;
1479 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1480 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1481 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1482 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1483 break;
1484 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1485 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1486 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1487 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1488 break;
1489 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1490 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1491 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1492 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1493 break;
1494 // GFX1250 WMMA builtins
1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1496 ArgsForMatchingMatrixTypes = {5, 1};
1497 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1498 break;
1499 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1500 ArgsForMatchingMatrixTypes = {5, 1};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1502 break;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1504 ArgsForMatchingMatrixTypes = {5, 1};
1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1506 break;
1507 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1508 ArgsForMatchingMatrixTypes = {5, 1};
1509 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1510 break;
1511 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1512 ArgsForMatchingMatrixTypes = {5, 1};
1513 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1514 break;
1515 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1516 NeedReturnType = true;
1517 ArgsForMatchingMatrixTypes = {1, 5};
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1519 break;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1521 ArgsForMatchingMatrixTypes = {3, 0};
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1523 break;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1525 ArgsForMatchingMatrixTypes = {3, 0};
1526 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1527 break;
1528 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1529 ArgsForMatchingMatrixTypes = {3, 0};
1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1531 break;
1532 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1533 ArgsForMatchingMatrixTypes = {3, 0};
1534 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1535 break;
1536 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1537 ArgsForMatchingMatrixTypes = {3, 0};
1538 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1539 break;
1540 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1541 ArgsForMatchingMatrixTypes = {3, 0};
1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1543 break;
1544 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1545 ArgsForMatchingMatrixTypes = {3, 0};
1546 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1547 break;
1548 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1549 ArgsForMatchingMatrixTypes = {3, 0};
1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1551 break;
1552 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1553 ArgsForMatchingMatrixTypes = {3, 0};
1554 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1555 break;
1556 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1557 ArgsForMatchingMatrixTypes = {3, 0};
1558 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1559 break;
1560 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1561 ArgsForMatchingMatrixTypes = {3, 0};
1562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1563 break;
1564 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1565 ArgsForMatchingMatrixTypes = {3, 0};
1566 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1567 break;
1568 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1569 ArgsForMatchingMatrixTypes = {3, 0};
1570 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1571 break;
1572 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1573 ArgsForMatchingMatrixTypes = {3, 0};
1574 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1575 break;
1576 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1577 ArgsForMatchingMatrixTypes = {3, 0};
1578 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1579 break;
1580 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1581 ArgsForMatchingMatrixTypes = {3, 0};
1582 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1583 break;
1584 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1585 ArgsForMatchingMatrixTypes = {4, 1};
1586 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1587 break;
1588 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1589 ArgsForMatchingMatrixTypes = {5, 1, 3};
1590 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1591 break;
1592 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1593 ArgsForMatchingMatrixTypes = {5, 1, 3};
1594 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1595 break;
1596 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1597 ArgsForMatchingMatrixTypes = {5, 1, 3};
1598 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1599 break;
1600 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1601 ArgsForMatchingMatrixTypes = {3, 0, 1};
1602 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1603 break;
1604 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1605 ArgsForMatchingMatrixTypes = {3, 0, 1};
1606 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1607 break;
1608 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1609 ArgsForMatchingMatrixTypes = {3, 0, 1};
1610 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1611 break;
1612 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1613 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1614 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1615 break;
1616 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1617 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1618 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1619 break;
1620 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1621 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1622 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1623 break;
1624 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1625 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1626 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1627 break;
1628 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1629 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1630 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1631 break;
1632 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1633 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1634 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1635 break;
1636 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1637 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1638 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1639 break;
1640 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1641 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1642 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1643 break;
1644 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1645 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1646 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1647 break;
1648 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1649 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1650 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1651 break;
1652 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1653 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1654 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1655 break;
1656 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1657 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1658 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1659 break;
1660 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1661 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1662 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1663 break;
1664 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1665 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1666 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1667 break;
1668 }
1669
1670 SmallVector<Value *, 6> Args;
1671 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1672 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
1673 if (AppendFalseForOpselArg)
1674 Args.push_back(Elt: Builder.getFalse());
1675
1676 // Handle the optional clamp argument of the following two builtins.
1677 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1678 if (Args.size() == 7)
1679 Args.push_back(Elt: Builder.getFalse());
1680 assert(Args.size() == 8 && "Expected 8 arguments");
1681 Args[7] = Builder.CreateZExtOrTrunc(V: Args[7], DestTy: Builder.getInt1Ty());
1682 } else if (BuiltinID ==
1683 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1684 if (Args.size() == 8)
1685 Args.push_back(Elt: Builder.getFalse());
1686 assert(Args.size() == 9 && "Expected 9 arguments");
1687 Args[8] = Builder.CreateZExtOrTrunc(V: Args[8], DestTy: Builder.getInt1Ty());
1688 }
1689
1690 SmallVector<llvm::Type *, 6> ArgTypes;
1691 if (NeedReturnType)
1692 ArgTypes.push_back(Elt: ConvertType(T: E->getType()));
1693 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
1694 ArgTypes.push_back(Elt: Args[ArgIdx]->getType());
1695
1696 Function *F = CGM.getIntrinsic(IID: BuiltinWMMAOp, Tys: ArgTypes);
1697 return Builder.CreateCall(Callee: F, Args);
1698 }
1699 // amdgcn workgroup size
1700 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1701 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 0);
1702 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1703 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 1);
1704 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1705 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 2);
1706
1707 // amdgcn grid size
1708 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1709 return EmitAMDGPUGridSize(CGF&: *this, Index: 0);
1710 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1711 return EmitAMDGPUGridSize(CGF&: *this, Index: 1);
1712 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1713 return EmitAMDGPUGridSize(CGF&: *this, Index: 2);
1714
1715 // r600 intrinsics
1716 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1717 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1718 return emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E,
1719 IntrinsicID: Intrinsic::r600_recipsqrt_ieee);
1720 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1721 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1722 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
1723 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
1724 Function *F = CGM.getIntrinsic(IID: Intrinsic::fshr, Tys: Src0->getType());
1725 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
1726 }
1727 case AMDGPU::BI__builtin_amdgcn_fence: {
1728 ProcessOrderScopeAMDGCN(Order: EmitScalarExpr(E: E->getArg(Arg: 0)),
1729 Scope: EmitScalarExpr(E: E->getArg(Arg: 1)), AO, SSID);
1730 FenceInst *Fence = Builder.CreateFence(Ordering: AO, SSID);
1731 if (E->getNumArgs() > 2)
1732 AddAMDGPUFenceAddressSpaceMMRA(Inst: Fence, E);
1733 return Fence;
1734 }
1735 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1736 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1737 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1738 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1739 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1740 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1741 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1742 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1743 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1744 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1745 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1746 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1747 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1748 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1749 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1750 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1751 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1752 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1753 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1754 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1755 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1756 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1757 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1758 llvm::AtomicRMWInst::BinOp BinOp;
1759 switch (BuiltinID) {
1760 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1761 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1762 BinOp = llvm::AtomicRMWInst::UIncWrap;
1763 break;
1764 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1765 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1766 BinOp = llvm::AtomicRMWInst::UDecWrap;
1767 break;
1768 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1769 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1770 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1771 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1772 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1773 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1774 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1775 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1776 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1777 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1778 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1779 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1780 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1781 BinOp = llvm::AtomicRMWInst::FAdd;
1782 break;
1783 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1784 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1785 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1786 BinOp = llvm::AtomicRMWInst::FMin;
1787 break;
1788 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1789 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1790 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1791 BinOp = llvm::AtomicRMWInst::FMax;
1792 break;
1793 }
1794
1795 Address Ptr = CheckAtomicAlignment(CGF&: *this, E);
1796 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
1797 llvm::Type *OrigTy = Val->getType();
1798 QualType PtrTy = E->getArg(Arg: 0)->IgnoreImpCasts()->getType();
1799
1800 bool Volatile;
1801
1802 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1803 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1804 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1805 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
1806 Volatile =
1807 cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 4)))->getZExtValue();
1808 } else {
1809 // Infer volatile from the passed type.
1810 Volatile =
1811 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1812 }
1813
1814 if (E->getNumArgs() >= 4) {
1815 // Some of the builtins have explicit ordering and scope arguments.
1816 ProcessOrderScopeAMDGCN(Order: EmitScalarExpr(E: E->getArg(Arg: 2)),
1817 Scope: EmitScalarExpr(E: E->getArg(Arg: 3)), AO, SSID);
1818 } else {
1819 // Most of the builtins do not have syncscope/order arguments. For DS
1820 // atomics the scope doesn't really matter, as they implicitly operate at
1821 // workgroup scope.
1822 //
1823 // The global/flat cases need to use agent scope to consistently produce
1824 // the native instruction instead of a cmpxchg expansion.
1825 if (getTarget().getTriple().isSPIRV())
1826 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN: "device");
1827 else
1828 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN: "agent");
1829 AO = AtomicOrdering::Monotonic;
1830
1831 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
1832 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1833 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1834 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1835 llvm::Type *V2BF16Ty = FixedVectorType::get(
1836 ElementType: llvm::Type::getBFloatTy(C&: Builder.getContext()), NumElts: 2);
1837 Val = Builder.CreateBitCast(V: Val, DestTy: V2BF16Ty);
1838 }
1839 }
1840
1841 llvm::AtomicRMWInst *RMW =
1842 Builder.CreateAtomicRMW(Op: BinOp, Addr: Ptr, Val, Ordering: AO, SSID);
1843 if (Volatile)
1844 RMW->setVolatile(true);
1845
1846 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1847 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1848 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
1849 // instruction for flat and global operations.
1850 llvm::MDTuple *EmptyMD = MDNode::get(Context&: getLLVMContext(), MDs: {});
1851 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
1852
1853 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
1854 // instruction, but this only matters for float fadd.
1855 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
1856 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
1857 }
1858
1859 return Builder.CreateBitCast(V: RMW, DestTy: OrigTy);
1860 }
1861 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1862 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1863 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
1864 llvm::Type *ResultType = ConvertType(T: E->getType());
1865 // s_sendmsg_rtn is mangled using return type only.
1866 Function *F =
1867 CGM.getIntrinsic(IID: Intrinsic::amdgcn_s_sendmsg_rtn, Tys: {ResultType});
1868 return Builder.CreateCall(Callee: F, Args: {Arg});
1869 }
1870 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1871 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1872 // Because builtin types are limited, and the intrinsic uses a struct/pair
1873 // output, marshal the pair-of-i32 to <2 x i32>.
1874 Value *VDstOld = EmitScalarExpr(E: E->getArg(Arg: 0));
1875 Value *VSrcOld = EmitScalarExpr(E: E->getArg(Arg: 1));
1876 Value *FI = EmitScalarExpr(E: E->getArg(Arg: 2));
1877 Value *BoundCtrl = EmitScalarExpr(E: E->getArg(Arg: 3));
1878 Function *F =
1879 CGM.getIntrinsic(IID: BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1880 ? Intrinsic::amdgcn_permlane16_swap
1881 : Intrinsic::amdgcn_permlane32_swap);
1882 llvm::CallInst *Call =
1883 Builder.CreateCall(Callee: F, Args: {VDstOld, VSrcOld, FI, BoundCtrl});
1884
1885 llvm::Value *Elt0 = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
1886 llvm::Value *Elt1 = Builder.CreateExtractValue(Agg: Call, Idxs: 1);
1887
1888 llvm::Type *ResultType = ConvertType(T: E->getType());
1889
1890 llvm::Value *Insert0 = Builder.CreateInsertElement(
1891 Vec: llvm::PoisonValue::get(T: ResultType), NewElt: Elt0, UINT64_C(0));
1892 llvm::Value *AsVector =
1893 Builder.CreateInsertElement(Vec: Insert0, NewElt: Elt1, UINT64_C(1));
1894 return AsVector;
1895 }
1896 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1897 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1898 return emitBuiltinWithOneOverloadedType<4>(CGF&: *this, E,
1899 IntrinsicID: Intrinsic::amdgcn_bitop3);
1900 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1901 // TODO: LLVM has this overloaded to allow for fat pointers, but since
1902 // those haven't been plumbed through to Clang yet, default to creating the
1903 // resource type.
1904 SmallVector<Value *, 4> Args;
1905 for (unsigned I = 0; I < 4; ++I)
1906 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: I)));
1907 llvm::PointerType *RetTy = llvm::PointerType::get(
1908 C&: Builder.getContext(), AddressSpace: llvm::AMDGPUAS::BUFFER_RESOURCE);
1909 Function *F = CGM.getIntrinsic(IID: Intrinsic::amdgcn_make_buffer_rsrc,
1910 Tys: {RetTy, Args[0]->getType()});
1911 return Builder.CreateCall(Callee: F, Args);
1912 }
1913 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1914 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1915 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1916 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1917 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1918 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1919 return emitBuiltinWithOneOverloadedType<5>(
1920 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_raw_ptr_buffer_store);
1921 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1922 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1923 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1924 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1925 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1926 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1927 llvm::Type *RetTy = nullptr;
1928 switch (BuiltinID) {
1929 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1930 RetTy = Int8Ty;
1931 break;
1932 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1933 RetTy = Int16Ty;
1934 break;
1935 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1936 RetTy = Int32Ty;
1937 break;
1938 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1939 RetTy = llvm::FixedVectorType::get(ElementType: Int32Ty, /*NumElements=*/NumElts: 2);
1940 break;
1941 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1942 RetTy = llvm::FixedVectorType::get(ElementType: Int32Ty, /*NumElements=*/NumElts: 3);
1943 break;
1944 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1945 RetTy = llvm::FixedVectorType::get(ElementType: Int32Ty, /*NumElements=*/NumElts: 4);
1946 break;
1947 }
1948 Function *F =
1949 CGM.getIntrinsic(IID: Intrinsic::amdgcn_raw_ptr_buffer_load, Tys: RetTy);
1950 return Builder.CreateCall(
1951 Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0)), EmitScalarExpr(E: E->getArg(Arg: 1)),
1952 EmitScalarExpr(E: E->getArg(Arg: 2)), EmitScalarExpr(E: E->getArg(Arg: 3))});
1953 }
1954 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1955 return emitBuiltinWithOneOverloadedType<5>(
1956 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1957 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1958 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1959 return emitBuiltinWithOneOverloadedType<5>(
1960 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1961 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1962 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1963 return emitBuiltinWithOneOverloadedType<5>(
1964 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1965 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1966 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1967 return emitBuiltinWithOneOverloadedType<5>(
1968 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1969 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1970 return emitBuiltinWithOneOverloadedType<2>(
1971 CGF&: *this, E, IntrinsicID: Intrinsic::amdgcn_s_prefetch_data);
1972 case Builtin::BIlogbf:
1973 case Builtin::BI__builtin_logbf: {
1974 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1975 Function *FrExpFunc = CGM.getIntrinsic(
1976 IID: Intrinsic::frexp, Tys: {Src0->getType(), Builder.getInt32Ty()});
1977 CallInst *FrExp = Builder.CreateCall(Callee: FrExpFunc, Args: Src0);
1978 Value *Exp = Builder.CreateExtractValue(Agg: FrExp, Idxs: 1);
1979 Value *Add = Builder.CreateAdd(
1980 LHS: Exp, RHS: ConstantInt::getSigned(Ty: Exp->getType(), V: -1), Name: "", HasNUW: false, HasNSW: true);
1981 Value *SIToFP = Builder.CreateSIToFP(V: Add, DestTy: Builder.getFloatTy());
1982 Value *Fabs =
1983 emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::fabs);
1984 Value *FCmpONE = Builder.CreateFCmpONE(
1985 LHS: Fabs, RHS: ConstantFP::getInfinity(Ty: Builder.getFloatTy()));
1986 Value *Sel1 = Builder.CreateSelect(C: FCmpONE, True: SIToFP, False: Fabs);
1987 Value *FCmpOEQ =
1988 Builder.CreateFCmpOEQ(LHS: Src0, RHS: ConstantFP::getZero(Ty: Builder.getFloatTy()));
1989 Value *Sel2 = Builder.CreateSelect(
1990 C: FCmpOEQ,
1991 True: ConstantFP::getInfinity(Ty: Builder.getFloatTy(), /*Negative=*/true), False: Sel1);
1992 return Sel2;
1993 }
1994 case Builtin::BIlogb:
1995 case Builtin::BI__builtin_logb: {
1996 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
1997 Function *FrExpFunc = CGM.getIntrinsic(
1998 IID: Intrinsic::frexp, Tys: {Src0->getType(), Builder.getInt32Ty()});
1999 CallInst *FrExp = Builder.CreateCall(Callee: FrExpFunc, Args: Src0);
2000 Value *Exp = Builder.CreateExtractValue(Agg: FrExp, Idxs: 1);
2001 Value *Add = Builder.CreateAdd(
2002 LHS: Exp, RHS: ConstantInt::getSigned(Ty: Exp->getType(), V: -1), Name: "", HasNUW: false, HasNSW: true);
2003 Value *SIToFP = Builder.CreateSIToFP(V: Add, DestTy: Builder.getDoubleTy());
2004 Value *Fabs =
2005 emitBuiltinWithOneOverloadedType<1>(CGF&: *this, E, IntrinsicID: Intrinsic::fabs);
2006 Value *FCmpONE = Builder.CreateFCmpONE(
2007 LHS: Fabs, RHS: ConstantFP::getInfinity(Ty: Builder.getDoubleTy()));
2008 Value *Sel1 = Builder.CreateSelect(C: FCmpONE, True: SIToFP, False: Fabs);
2009 Value *FCmpOEQ =
2010 Builder.CreateFCmpOEQ(LHS: Src0, RHS: ConstantFP::getZero(Ty: Builder.getDoubleTy()));
2011 Value *Sel2 = Builder.CreateSelect(
2012 C: FCmpOEQ,
2013 True: ConstantFP::getInfinity(Ty: Builder.getDoubleTy(), /*Negative=*/true),
2014 False: Sel1);
2015 return Sel2;
2016 }
2017 case Builtin::BIscalbnf:
2018 case Builtin::BI__builtin_scalbnf:
2019 case Builtin::BIscalbn:
2020 case Builtin::BI__builtin_scalbn:
2021 return emitBinaryExpMaybeConstrainedFPBuiltin(
2022 CGF&: *this, E, IntrinsicID: Intrinsic::ldexp, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_ldexp);
2023 default:
2024 return nullptr;
2025 }
2026}
2027