1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PTX-specific builtin function database. Users of
10// this file must define the BUILTIN macro to make use of this information.
11//
12//===----------------------------------------------------------------------===//
13
14// The format of this database matches clang/Basic/Builtins.def.
15
16#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18#endif
19
20#pragma push_macro("SM_53")
21#pragma push_macro("SM_70")
22#pragma push_macro("SM_72")
23#pragma push_macro("SM_75")
24#pragma push_macro("SM_80")
25#pragma push_macro("SM_86")
26#pragma push_macro("SM_87")
27#pragma push_macro("SM_89")
28#pragma push_macro("SM_90")
29#pragma push_macro("SM_90a")
30#define SM_90a "sm_90a"
31#define SM_90 "sm_90|" SM_90a
32#define SM_89 "sm_89|" SM_90
33#define SM_87 "sm_87|" SM_89
34#define SM_86 "sm_86|" SM_87
35#define SM_80 "sm_80|" SM_86
36#define SM_75 "sm_75|" SM_80
37#define SM_72 "sm_72|" SM_75
38#define SM_70 "sm_70|" SM_72
39
40#pragma push_macro("SM_60")
41#define SM_60 "sm_60|sm_61|sm_62|" SM_70
42#define SM_53 "sm_53|" SM_60
43
44#pragma push_macro("PTX42")
45#pragma push_macro("PTX60")
46#pragma push_macro("PTX61")
47#pragma push_macro("PTX62")
48#pragma push_macro("PTX63")
49#pragma push_macro("PTX64")
50#pragma push_macro("PTX65")
51#pragma push_macro("PTX70")
52#pragma push_macro("PTX71")
53#pragma push_macro("PTX72")
54#pragma push_macro("PTX73")
55#pragma push_macro("PTX74")
56#pragma push_macro("PTX75")
57#pragma push_macro("PTX76")
58#pragma push_macro("PTX77")
59#pragma push_macro("PTX78")
60#pragma push_macro("PTX80")
61#pragma push_macro("PTX81")
62#pragma push_macro("PTX82")
63#pragma push_macro("PTX83")
64#pragma push_macro("PTX84")
65#pragma push_macro("PTX85")
66#define PTX85 "ptx85"
67#define PTX84 "ptx84|" PTX85
68#define PTX83 "ptx83|" PTX84
69#define PTX82 "ptx82|" PTX83
70#define PTX81 "ptx81|" PTX82
71#define PTX80 "ptx80|" PTX81
72#define PTX78 "ptx78|" PTX80
73#define PTX77 "ptx77|" PTX78
74#define PTX76 "ptx76|" PTX77
75#define PTX75 "ptx75|" PTX76
76#define PTX74 "ptx74|" PTX75
77#define PTX73 "ptx73|" PTX74
78#define PTX72 "ptx72|" PTX73
79#define PTX71 "ptx71|" PTX72
80#define PTX70 "ptx70|" PTX71
81#define PTX65 "ptx65|" PTX70
82#define PTX64 "ptx64|" PTX65
83#define PTX63 "ptx63|" PTX64
84#define PTX62 "ptx62|" PTX63
85#define PTX61 "ptx61|" PTX62
86#define PTX60 "ptx60|" PTX61
87#define PTX42 "ptx42|" PTX60
88
89#pragma push_macro("AND")
90#define AND(a, b) "(" a "),(" b ")"
91
92// Special Registers
93
94BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
95BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
96BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
97BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
98
99BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
100BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
101BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
102BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
103
104BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
105BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
106BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
107BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
108
109BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
110BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
111BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
112BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
113
114TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
115TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
116TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
117TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
118
119TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
120TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
121TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
122TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
123
124TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
125TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
126TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
127TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
128
129TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
130TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
131TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
132TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
133
134TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
135TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
136
137TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
138
139BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
140BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
141BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
142
143BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
144BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
145BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
146
147BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
148BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
149BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
150BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
151BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
152
153BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
154BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
155BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")
156
157BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
158BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
159BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
160BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
161
162// MISC
163
164BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
165BUILTIN(__nvvm_exit, "v", "r")
166BUILTIN(__nvvm_reflect, "UicC*", "r")
167TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63))
168
169// Min Max
170
171TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
172TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
173TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
175TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
176TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
177TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
178TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
179 AND(SM_86, PTX72))
180TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
181TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
182TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
183TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
184TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
185 AND(SM_86, PTX72))
186TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
187 AND(SM_86, PTX72))
188TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
189 AND(SM_86, PTX72))
190TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
191 AND(SM_86, PTX72))
192TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70))
193TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
194TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70))
195TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
196TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
197TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "",
198 AND(SM_86, PTX72))
199TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
200TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
201TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
202TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
203TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "",
204 AND(SM_86, PTX72))
205TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
206 AND(SM_86, PTX72))
207BUILTIN(__nvvm_fmin_f, "fff", "")
208BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
209TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
210TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
211TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
212TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
213TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
214TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
215BUILTIN(__nvvm_fmin_d, "ddd", "")
216
217TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
218TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
219TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
221TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
222TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
223TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
224TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
225 AND(SM_86, PTX72))
226TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
227TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
228TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
229TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
230TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
231 AND(SM_86, PTX72))
232TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
233 AND(SM_86, PTX72))
234TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
235 AND(SM_86, PTX72))
236TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
237 AND(SM_86, PTX72))
238TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70))
239TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
240TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70))
241TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
242TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
243TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "",
244 AND(SM_86, PTX72))
245TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
246TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
247TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
248TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
249TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "",
250 AND(SM_86, PTX72))
251TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
252 AND(SM_86, PTX72))
253BUILTIN(__nvvm_fmax_f, "fff", "")
254BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
255TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
256TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
257TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
258TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
259TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
260TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
261BUILTIN(__nvvm_fmax_d, "ddd", "")
262
263// Multiplication
264
265BUILTIN(__nvvm_mulhi_i, "iii", "")
266BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
267BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
268BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
269
270BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "")
271BUILTIN(__nvvm_mul_rn_f, "fff", "")
272BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "")
273BUILTIN(__nvvm_mul_rz_f, "fff", "")
274BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "")
275BUILTIN(__nvvm_mul_rm_f, "fff", "")
276BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "")
277BUILTIN(__nvvm_mul_rp_f, "fff", "")
278
279BUILTIN(__nvvm_mul_rn_d, "ddd", "")
280BUILTIN(__nvvm_mul_rz_d, "ddd", "")
281BUILTIN(__nvvm_mul_rm_d, "ddd", "")
282BUILTIN(__nvvm_mul_rp_d, "ddd", "")
283
284BUILTIN(__nvvm_mul24_i, "iii", "")
285BUILTIN(__nvvm_mul24_ui, "UiUiUi", "")
286
287// Div
288
289BUILTIN(__nvvm_div_approx_ftz_f, "fff", "")
290BUILTIN(__nvvm_div_approx_f, "fff", "")
291
292BUILTIN(__nvvm_div_rn_ftz_f, "fff", "")
293BUILTIN(__nvvm_div_rn_f, "fff", "")
294BUILTIN(__nvvm_div_rz_ftz_f, "fff", "")
295BUILTIN(__nvvm_div_rz_f, "fff", "")
296BUILTIN(__nvvm_div_rm_ftz_f, "fff", "")
297BUILTIN(__nvvm_div_rm_f, "fff", "")
298BUILTIN(__nvvm_div_rp_ftz_f, "fff", "")
299BUILTIN(__nvvm_div_rp_f, "fff", "")
300
301BUILTIN(__nvvm_div_rn_d, "ddd", "")
302BUILTIN(__nvvm_div_rz_d, "ddd", "")
303BUILTIN(__nvvm_div_rm_d, "ddd", "")
304BUILTIN(__nvvm_div_rp_d, "ddd", "")
305
306// Sad
307
308BUILTIN(__nvvm_sad_i, "iiii", "")
309BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
310
311// Floor, Ceil
312
313BUILTIN(__nvvm_floor_ftz_f, "ff", "")
314BUILTIN(__nvvm_floor_f, "ff", "")
315BUILTIN(__nvvm_floor_d, "dd", "")
316
317BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
318BUILTIN(__nvvm_ceil_f, "ff", "")
319BUILTIN(__nvvm_ceil_d, "dd", "")
320
321// Abs
322
323BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
324BUILTIN(__nvvm_fabs_f, "ff", "")
325BUILTIN(__nvvm_fabs_d, "dd", "")
326
327// Round
328
329BUILTIN(__nvvm_round_ftz_f, "ff", "")
330BUILTIN(__nvvm_round_f, "ff", "")
331BUILTIN(__nvvm_round_d, "dd", "")
332
333// Trunc
334
335BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
336BUILTIN(__nvvm_trunc_f, "ff", "")
337BUILTIN(__nvvm_trunc_d, "dd", "")
338
339// Saturate
340
341BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
342BUILTIN(__nvvm_saturate_f, "ff", "")
343BUILTIN(__nvvm_saturate_d, "dd", "")
344
345// Exp2, Log2
346
347BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
348BUILTIN(__nvvm_ex2_approx_f, "ff", "")
349BUILTIN(__nvvm_ex2_approx_d, "dd", "")
350TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
351TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
352
353BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
354BUILTIN(__nvvm_lg2_approx_f, "ff", "")
355BUILTIN(__nvvm_lg2_approx_d, "dd", "")
356
357// Sin, Cos
358
359BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
360BUILTIN(__nvvm_sin_approx_f, "ff", "")
361
362BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
363BUILTIN(__nvvm_cos_approx_f, "ff", "")
364
365// Fma
366
367TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
368TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
369TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
370TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
371TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
372TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
373TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
374TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
375TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
376TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
377TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
378TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
379TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70))
380TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70))
381TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
382TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
383BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
384BUILTIN(__nvvm_fma_rn_f, "ffff", "")
385BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
386BUILTIN(__nvvm_fma_rz_f, "ffff", "")
387BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
388BUILTIN(__nvvm_fma_rm_f, "ffff", "")
389BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
390BUILTIN(__nvvm_fma_rp_f, "ffff", "")
391BUILTIN(__nvvm_fma_rn_d, "dddd", "")
392BUILTIN(__nvvm_fma_rz_d, "dddd", "")
393BUILTIN(__nvvm_fma_rm_d, "dddd", "")
394BUILTIN(__nvvm_fma_rp_d, "dddd", "")
395
396// Rcp
397
398BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
399BUILTIN(__nvvm_rcp_rn_f, "ff", "")
400BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
401BUILTIN(__nvvm_rcp_rz_f, "ff", "")
402BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
403BUILTIN(__nvvm_rcp_rm_f, "ff", "")
404BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
405BUILTIN(__nvvm_rcp_rp_f, "ff", "")
406
407BUILTIN(__nvvm_rcp_rn_d, "dd", "")
408BUILTIN(__nvvm_rcp_rz_d, "dd", "")
409BUILTIN(__nvvm_rcp_rm_d, "dd", "")
410BUILTIN(__nvvm_rcp_rp_d, "dd", "")
411
412BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
413BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
414
415// Sqrt
416
417BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
418BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
419BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
420BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
421BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
422BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
423BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
424BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
425BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
426BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
427
428BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
429BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
430BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
431BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
432
433// Rsqrt
434
435BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
436BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
437BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
438
439// Add
440
441BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
442BUILTIN(__nvvm_add_rn_f, "fff", "")
443BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
444BUILTIN(__nvvm_add_rz_f, "fff", "")
445BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
446BUILTIN(__nvvm_add_rm_f, "fff", "")
447BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
448BUILTIN(__nvvm_add_rp_f, "fff", "")
449
450BUILTIN(__nvvm_add_rn_d, "ddd", "")
451BUILTIN(__nvvm_add_rz_d, "ddd", "")
452BUILTIN(__nvvm_add_rm_d, "ddd", "")
453BUILTIN(__nvvm_add_rp_d, "ddd", "")
454
455// Convert
456
457BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
458BUILTIN(__nvvm_d2f_rn, "fd", "")
459BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
460BUILTIN(__nvvm_d2f_rz, "fd", "")
461BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
462BUILTIN(__nvvm_d2f_rm, "fd", "")
463BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
464BUILTIN(__nvvm_d2f_rp, "fd", "")
465
466BUILTIN(__nvvm_d2i_rn, "id", "")
467BUILTIN(__nvvm_d2i_rz, "id", "")
468BUILTIN(__nvvm_d2i_rm, "id", "")
469BUILTIN(__nvvm_d2i_rp, "id", "")
470
471BUILTIN(__nvvm_d2ui_rn, "Uid", "")
472BUILTIN(__nvvm_d2ui_rz, "Uid", "")
473BUILTIN(__nvvm_d2ui_rm, "Uid", "")
474BUILTIN(__nvvm_d2ui_rp, "Uid", "")
475
476BUILTIN(__nvvm_i2d_rn, "di", "")
477BUILTIN(__nvvm_i2d_rz, "di", "")
478BUILTIN(__nvvm_i2d_rm, "di", "")
479BUILTIN(__nvvm_i2d_rp, "di", "")
480
481BUILTIN(__nvvm_ui2d_rn, "dUi", "")
482BUILTIN(__nvvm_ui2d_rz, "dUi", "")
483BUILTIN(__nvvm_ui2d_rm, "dUi", "")
484BUILTIN(__nvvm_ui2d_rp, "dUi", "")
485
486BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
487BUILTIN(__nvvm_f2i_rn, "if", "")
488BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
489BUILTIN(__nvvm_f2i_rz, "if", "")
490BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
491BUILTIN(__nvvm_f2i_rm, "if", "")
492BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
493BUILTIN(__nvvm_f2i_rp, "if", "")
494
495BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
496BUILTIN(__nvvm_f2ui_rn, "Uif", "")
497BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
498BUILTIN(__nvvm_f2ui_rz, "Uif", "")
499BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
500BUILTIN(__nvvm_f2ui_rm, "Uif", "")
501BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
502BUILTIN(__nvvm_f2ui_rp, "Uif", "")
503
504BUILTIN(__nvvm_i2f_rn, "fi", "")
505BUILTIN(__nvvm_i2f_rz, "fi", "")
506BUILTIN(__nvvm_i2f_rm, "fi", "")
507BUILTIN(__nvvm_i2f_rp, "fi", "")
508
509BUILTIN(__nvvm_ui2f_rn, "fUi", "")
510BUILTIN(__nvvm_ui2f_rz, "fUi", "")
511BUILTIN(__nvvm_ui2f_rm, "fUi", "")
512BUILTIN(__nvvm_ui2f_rp, "fUi", "")
513
514BUILTIN(__nvvm_lohi_i2d, "dii", "")
515
516BUILTIN(__nvvm_d2i_lo, "id", "")
517BUILTIN(__nvvm_d2i_hi, "id", "")
518
519BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
520BUILTIN(__nvvm_f2ll_rn, "LLif", "")
521BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
522BUILTIN(__nvvm_f2ll_rz, "LLif", "")
523BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
524BUILTIN(__nvvm_f2ll_rm, "LLif", "")
525BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
526BUILTIN(__nvvm_f2ll_rp, "LLif", "")
527
528BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
529BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
530BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
531BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
532BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
533BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
534BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
535BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
536
537BUILTIN(__nvvm_d2ll_rn, "LLid", "")
538BUILTIN(__nvvm_d2ll_rz, "LLid", "")
539BUILTIN(__nvvm_d2ll_rm, "LLid", "")
540BUILTIN(__nvvm_d2ll_rp, "LLid", "")
541
542BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
543BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
544BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
545BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
546
547BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
548BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
549BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
550BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
551
552BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
553BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
554BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
555BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
556
557BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
558BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
559BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
560BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
561
562BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
563BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
564BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
565BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
566
567BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
568BUILTIN(__nvvm_f2h_rn, "Usf", "")
569
570TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70))
571TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70))
572TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70))
573TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70))
574
575TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
576TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
577TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
578TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
579
580TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70))
581TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70))
582TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70))
583TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70))
584
585TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
586
587// Bitcast
588
589BUILTIN(__nvvm_bitcast_f2i, "if", "")
590BUILTIN(__nvvm_bitcast_i2f, "fi", "")
591
592BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
593BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
594
595// FNS
596TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
597
598// Sync
599
600BUILTIN(__syncthreads, "v", "")
601BUILTIN(__nvvm_bar0_popc, "ii", "")
602BUILTIN(__nvvm_bar0_and, "ii", "")
603BUILTIN(__nvvm_bar0_or, "ii", "")
604BUILTIN(__nvvm_bar_sync, "vi", "n")
605TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
606TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
607TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
608
609TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78))
610TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80))
611TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78))
612TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78))
613
614// Shuffle
615
616BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
617BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
618BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
619BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
620BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
621BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
622BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
623BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
624
625TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
626TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
627TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
628TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
629TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
630TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
631TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
632TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
633
634// Vote
635BUILTIN(__nvvm_vote_all, "bb", "")
636BUILTIN(__nvvm_vote_any, "bb", "")
637BUILTIN(__nvvm_vote_uni, "bb", "")
638BUILTIN(__nvvm_vote_ballot, "Uib", "")
639
640TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
641TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
642TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
643TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
644
645// Mask
646TARGET_BUILTIN(__nvvm_activemask, "Ui", "n", PTX62)
647
648// Match
649TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
650TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
651// These return a pair {value, predicate}, which requires custom lowering.
652TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
653TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
654
655// Redux
656TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
657TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
658TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
659TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
660TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
661TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
662TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
663TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
664
665// Membar
666
667BUILTIN(__nvvm_membar_cta, "v", "")
668BUILTIN(__nvvm_membar_gl, "v", "")
669BUILTIN(__nvvm_membar_sys, "v", "")
670
671// mbarrier
672
673TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
674TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
675
676TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
677TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
678
679TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
680TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
681TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
682TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
683
684TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
685TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
686TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
687TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
688
689TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
690TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
691
692TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
693
694// Memcpy, Memset
695
696BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
697BUILTIN(__nvvm_memset, "vUc*Uczi","")
698
699// Image
700
701BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
702BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
703BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
704BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
705
706BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
707BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
708BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
709BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
710
711BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
712BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
713BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
714BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
715BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
716BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
717BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
718BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
719
720// Atomic
721//
722// We need the atom intrinsics because
723// - they are used in converging analysis
724// - they are used in address space analysis and optimization
725// So it does not hurt to expose them as builtins.
726//
727BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
728TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
729TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
730BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
731TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
732TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
733BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
734TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
735TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
736BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
737TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
738TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
739TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
740TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
741TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
742
743BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
744BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
745BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
746
747BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
748TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
749TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
750BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
751TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
752TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
753BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
754TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
755TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
756
757BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
758TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
759TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
760BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
761TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
762TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
763BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
764TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
765TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
766BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
767TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
768TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
769BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
770TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
771TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
772BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
773TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
774TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
775
776BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
777TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
778TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
779BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
780TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
781TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
782BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
783TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
784TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
785BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
786TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
787TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
788BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
789TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
790TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
791BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
792TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
793TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
794
795BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
796TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
797TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
798BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
799TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
800TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
801
802BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
803TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
804TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
805BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
806TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
807TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
808BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
809TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
810TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
811
812BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
813TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
814TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
815BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
816TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
817TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
818BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
819TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
820TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
821
822BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
823TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
824TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
825BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
826TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
827TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
828BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
829TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
830TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
831
832BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
833TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
834TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
835BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
836TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
837TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
838BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
839TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
840TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
841
842// Compiler Error Warn
843BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
844BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
845
846BUILTIN(__nvvm_ldu_c, "ccC*", "")
847BUILTIN(__nvvm_ldu_sc, "ScScC*", "")
848BUILTIN(__nvvm_ldu_s, "ssC*", "")
849BUILTIN(__nvvm_ldu_i, "iiC*", "")
850BUILTIN(__nvvm_ldu_l, "LiLiC*", "")
851BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "")
852
853BUILTIN(__nvvm_ldu_uc, "UcUcC*", "")
854BUILTIN(__nvvm_ldu_us, "UsUsC*", "")
855BUILTIN(__nvvm_ldu_ui, "UiUiC*", "")
856BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "")
857BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "")
858
859BUILTIN(__nvvm_ldu_h, "hhC*", "")
860BUILTIN(__nvvm_ldu_f, "ffC*", "")
861BUILTIN(__nvvm_ldu_d, "ddC*", "")
862
863BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "")
864BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "")
865BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "")
866BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "")
867BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "")
868BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "")
869BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "")
870BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "")
871BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "")
872BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "")
873
874BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "")
875BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "")
876BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "")
877BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "")
878BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "")
879BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "")
880BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "")
881BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "")
882
883BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "")
884BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "")
885BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "")
886BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "")
887
888BUILTIN(__nvvm_ldg_c, "ccC*", "")
889BUILTIN(__nvvm_ldg_sc, "ScScC*", "")
890BUILTIN(__nvvm_ldg_s, "ssC*", "")
891BUILTIN(__nvvm_ldg_i, "iiC*", "")
892BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
893BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
894
895BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
896BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
897BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
898BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
899BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
900
901BUILTIN(__nvvm_ldg_h, "hhC*", "")
902BUILTIN(__nvvm_ldg_f, "ffC*", "")
903BUILTIN(__nvvm_ldg_d, "ddC*", "")
904
905BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
906BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "")
907BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
908BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "")
909BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
910BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
911BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
912BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
913BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "")
914BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
915
916BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
917BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
918BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
919BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
920BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
921BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
922BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "")
923BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
924
925BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
926BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
927BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
928BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
929
930// Address space predicates.
931BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
932BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
933BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
934BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
935TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
936
937// Builtins to support WMMA instructions on sm_70
938TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
939TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
940TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
941TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
942TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
943TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
944
945TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
946TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
947TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
948TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
949TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
950TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
951
952TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
953TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
954TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
955TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
956TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
957TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
958
959TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
960TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
961TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
962TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
963
964TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
965TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
966TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
967TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
968
969TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
970TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
971TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
972TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
973
974// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
975TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
976TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
977TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
978TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
979TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
980TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
981TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
982TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
983TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
984TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
985TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
986TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
987TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
988TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
989TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
990TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
991TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
992TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
993TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
994TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
995TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
996TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
997TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
998TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
999TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
1000TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
1001TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
1002TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
1003TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
1004TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
1005TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1006TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1007TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1008TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1009TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1010TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
1011TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
1012TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1013
1014// Builtins to support double and alternate float WMMA instructions on sm_80
1015TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1016TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1017TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1018TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1019TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
1020
1021TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1022TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1023TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1024TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1025TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1026TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1027TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1028TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1029TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1030
1031TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1032TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1033TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1034TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1035TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1036
1037// Async Copy
1038TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
1039TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
1040TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
1041TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
1042
1043TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70))
1044TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70))
1045TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1046TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1047
1048TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
1049TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
1050TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
1051
1052
1053// bf16, bf16x2 abs, neg
1054TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70))
1055TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1056TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70))
1057TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1058
1059TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
1060TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
1061TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
1062TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1063
1064#undef BUILTIN
1065#undef TARGET_BUILTIN
1066#pragma pop_macro("AND")
1067#pragma pop_macro("SM_53")
1068#pragma pop_macro("SM_60")
1069#pragma pop_macro("SM_70")
1070#pragma pop_macro("SM_72")
1071#pragma pop_macro("SM_75")
1072#pragma pop_macro("SM_80")
1073#pragma pop_macro("SM_86")
1074#pragma pop_macro("SM_87")
1075#pragma pop_macro("SM_89")
1076#pragma pop_macro("SM_90")
1077#pragma pop_macro("SM_90a")
1078#pragma pop_macro("PTX42")
1079#pragma pop_macro("PTX60")
1080#pragma pop_macro("PTX61")
1081#pragma pop_macro("PTX62")
1082#pragma pop_macro("PTX63")
1083#pragma pop_macro("PTX64")
1084#pragma pop_macro("PTX65")
1085#pragma pop_macro("PTX70")
1086#pragma pop_macro("PTX71")
1087#pragma pop_macro("PTX72")
1088#pragma pop_macro("PTX73")
1089#pragma pop_macro("PTX74")
1090#pragma pop_macro("PTX75")
1091#pragma pop_macro("PTX76")
1092#pragma pop_macro("PTX77")
1093#pragma pop_macro("PTX78")
1094#pragma pop_macro("PTX80")
1095#pragma pop_macro("PTX81")
1096#pragma pop_macro("PTX82")
1097#pragma pop_macro("PTX83")
1098#pragma pop_macro("PTX84")
1099#pragma pop_macro("PTX85")
1100