1 | //===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PTX-specific builtin function database. Users of |
10 | // this file must define the BUILTIN macro to make use of this information. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | // The format of this database matches clang/Basic/Builtins.def. |
15 | |
16 | #if defined(BUILTIN) && !defined(TARGET_BUILTIN) |
17 | # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) |
18 | #endif |
19 | |
20 | #pragma push_macro("SM_53") |
21 | #pragma push_macro("SM_70") |
22 | #pragma push_macro("SM_72") |
23 | #pragma push_macro("SM_75") |
24 | #pragma push_macro("SM_80") |
25 | #pragma push_macro("SM_86") |
26 | #pragma push_macro("SM_87") |
27 | #pragma push_macro("SM_89") |
28 | #pragma push_macro("SM_90") |
29 | #pragma push_macro("SM_90a") |
30 | #define SM_90a "sm_90a" |
31 | #define SM_90 "sm_90|" SM_90a |
32 | #define SM_89 "sm_89|" SM_90 |
33 | #define SM_87 "sm_87|" SM_89 |
34 | #define SM_86 "sm_86|" SM_87 |
35 | #define SM_80 "sm_80|" SM_86 |
36 | #define SM_75 "sm_75|" SM_80 |
37 | #define SM_72 "sm_72|" SM_75 |
38 | #define SM_70 "sm_70|" SM_72 |
39 | |
40 | #pragma push_macro("SM_60") |
41 | #define SM_60 "sm_60|sm_61|sm_62|" SM_70 |
42 | #define SM_53 "sm_53|" SM_60 |
43 | |
44 | #pragma push_macro("PTX42") |
45 | #pragma push_macro("PTX60") |
46 | #pragma push_macro("PTX61") |
47 | #pragma push_macro("PTX62") |
48 | #pragma push_macro("PTX63") |
49 | #pragma push_macro("PTX64") |
50 | #pragma push_macro("PTX65") |
51 | #pragma push_macro("PTX70") |
52 | #pragma push_macro("PTX71") |
53 | #pragma push_macro("PTX72") |
54 | #pragma push_macro("PTX73") |
55 | #pragma push_macro("PTX74") |
56 | #pragma push_macro("PTX75") |
57 | #pragma push_macro("PTX76") |
58 | #pragma push_macro("PTX77") |
59 | #pragma push_macro("PTX78") |
60 | #pragma push_macro("PTX80") |
61 | #pragma push_macro("PTX81") |
62 | #pragma push_macro("PTX82") |
63 | #pragma push_macro("PTX83") |
64 | #pragma push_macro("PTX84") |
65 | #pragma push_macro("PTX85") |
66 | #define PTX85 "ptx85" |
67 | #define PTX84 "ptx84|" PTX85 |
68 | #define PTX83 "ptx83|" PTX84 |
69 | #define PTX82 "ptx82|" PTX83 |
70 | #define PTX81 "ptx81|" PTX82 |
71 | #define PTX80 "ptx80|" PTX81 |
72 | #define PTX78 "ptx78|" PTX80 |
73 | #define PTX77 "ptx77|" PTX78 |
74 | #define PTX76 "ptx76|" PTX77 |
75 | #define PTX75 "ptx75|" PTX76 |
76 | #define PTX74 "ptx74|" PTX75 |
77 | #define PTX73 "ptx73|" PTX74 |
78 | #define PTX72 "ptx72|" PTX73 |
79 | #define PTX71 "ptx71|" PTX72 |
80 | #define PTX70 "ptx70|" PTX71 |
81 | #define PTX65 "ptx65|" PTX70 |
82 | #define PTX64 "ptx64|" PTX65 |
83 | #define PTX63 "ptx63|" PTX64 |
84 | #define PTX62 "ptx62|" PTX63 |
85 | #define PTX61 "ptx61|" PTX62 |
86 | #define PTX60 "ptx60|" PTX61 |
87 | #define PTX42 "ptx42|" PTX60 |
88 | |
89 | #pragma push_macro("AND") |
90 | #define AND(a, b) "(" a "),(" b ")" |
91 | |
92 | // Special Registers |
93 | |
94 | BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i" , "nc" ) |
95 | BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i" , "nc" ) |
96 | BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i" , "nc" ) |
97 | BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i" , "nc" ) |
98 | |
99 | BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i" , "nc" ) |
100 | BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i" , "nc" ) |
101 | BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i" , "nc" ) |
102 | BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i" , "nc" ) |
103 | |
104 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i" , "nc" ) |
105 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i" , "nc" ) |
106 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i" , "nc" ) |
107 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i" , "nc" ) |
108 | |
109 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i" , "nc" ) |
110 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i" , "nc" ) |
111 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i" , "nc" ) |
112 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i" , "nc" ) |
113 | |
114 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i" , "nc" , AND(SM_90, PTX78)) |
115 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i" , "nc" , AND(SM_90, PTX78)) |
116 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i" , "nc" , AND(SM_90, PTX78)) |
117 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i" , "nc" , AND(SM_90, PTX78)) |
118 | |
119 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i" , "nc" , AND(SM_90, PTX78)) |
120 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i" , "nc" , AND(SM_90, PTX78)) |
121 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i" , "nc" , AND(SM_90, PTX78)) |
122 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i" , "nc" , AND(SM_90, PTX78)) |
123 | |
124 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i" , "nc" , AND(SM_90, PTX78)) |
125 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i" , "nc" , AND(SM_90, PTX78)) |
126 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i" , "nc" , AND(SM_90, PTX78)) |
127 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i" , "nc" , AND(SM_90, PTX78)) |
128 | |
129 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i" , "nc" , AND(SM_90, PTX78)) |
130 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i" , "nc" , AND(SM_90, PTX78)) |
131 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i" , "nc" , AND(SM_90, PTX78)) |
132 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i" , "nc" , AND(SM_90, PTX78)) |
133 | |
134 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i" , "nc" , AND(SM_90, PTX78)) |
135 | TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i" , "nc" , AND(SM_90, PTX78)) |
136 | |
137 | TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b" , "nc" , AND(SM_90, PTX78)) |
138 | |
139 | BUILTIN(__nvvm_read_ptx_sreg_laneid, "i" , "nc" ) |
140 | BUILTIN(__nvvm_read_ptx_sreg_warpid, "i" , "nc" ) |
141 | BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i" , "nc" ) |
142 | |
143 | BUILTIN(__nvvm_read_ptx_sreg_smid, "i" , "nc" ) |
144 | BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i" , "nc" ) |
145 | BUILTIN(__nvvm_read_ptx_sreg_gridid, "i" , "nc" ) |
146 | |
147 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i" , "nc" ) |
148 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i" , "nc" ) |
149 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i" , "nc" ) |
150 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i" , "nc" ) |
151 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i" , "nc" ) |
152 | |
153 | BUILTIN(__nvvm_read_ptx_sreg_clock, "i" , "n" ) |
154 | BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi" , "n" ) |
155 | BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi" , "n" ) |
156 | |
157 | BUILTIN(__nvvm_read_ptx_sreg_pm0, "i" , "n" ) |
158 | BUILTIN(__nvvm_read_ptx_sreg_pm1, "i" , "n" ) |
159 | BUILTIN(__nvvm_read_ptx_sreg_pm2, "i" , "n" ) |
160 | BUILTIN(__nvvm_read_ptx_sreg_pm3, "i" , "n" ) |
161 | |
162 | // MISC |
163 | |
164 | BUILTIN(__nvvm_prmt, "UiUiUiUi" , "" ) |
165 | BUILTIN(__nvvm_exit, "v" , "r" ) |
166 | BUILTIN(__nvvm_reflect, "UicC*" , "r" ) |
167 | TARGET_BUILTIN(__nvvm_nanosleep, "vUi" , "n" , AND(SM_70, PTX63)) |
168 | |
169 | // Min Max |
170 | |
171 | TARGET_BUILTIN(__nvvm_fmin_f16, "hhh" , "" , AND(SM_80, PTX70)) |
172 | TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh" , "" , AND(SM_80, PTX70)) |
173 | TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh" , "" , AND(SM_80, PTX70)) |
174 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh" , "" , AND(SM_80, PTX70)) |
175 | TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
176 | TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
177 | TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
178 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh" , "" , |
179 | AND(SM_86, PTX72)) |
180 | TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
181 | TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
182 | TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
183 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
184 | TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
185 | AND(SM_86, PTX72)) |
186 | TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
187 | AND(SM_86, PTX72)) |
188 | TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
189 | AND(SM_86, PTX72)) |
190 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
191 | AND(SM_86, PTX72)) |
192 | TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
193 | TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
194 | TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
195 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
196 | TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy" , "" , AND(SM_86, PTX72)) |
197 | TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy" , "" , |
198 | AND(SM_86, PTX72)) |
199 | TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
200 | TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
201 | TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
202 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
203 | TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y" , "" , |
204 | AND(SM_86, PTX72)) |
205 | TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y" , "" , |
206 | AND(SM_86, PTX72)) |
207 | BUILTIN(__nvvm_fmin_f, "fff" , "" ) |
208 | BUILTIN(__nvvm_fmin_ftz_f, "fff" , "" ) |
209 | TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff" , "" , AND(SM_80, PTX70)) |
210 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff" , "" , AND(SM_80, PTX70)) |
211 | TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
212 | TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
213 | TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
214 | TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
215 | BUILTIN(__nvvm_fmin_d, "ddd" , "" ) |
216 | |
217 | TARGET_BUILTIN(__nvvm_fmax_f16, "hhh" , "" , AND(SM_80, PTX70)) |
218 | TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh" , "" , AND(SM_80, PTX70)) |
219 | TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh" , "" , AND(SM_80, PTX70)) |
220 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh" , "" , AND(SM_80, PTX70)) |
221 | TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
222 | TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
223 | TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh" , "" , AND(SM_86, PTX72)) |
224 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh" , "" , |
225 | AND(SM_86, PTX72)) |
226 | TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
227 | TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
228 | TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
229 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h" , "" , AND(SM_80, PTX70)) |
230 | TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
231 | AND(SM_86, PTX72)) |
232 | TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
233 | AND(SM_86, PTX72)) |
234 | TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
235 | AND(SM_86, PTX72)) |
236 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h" , "" , |
237 | AND(SM_86, PTX72)) |
238 | TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
239 | TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
240 | TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
241 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy" , "" , AND(SM_80, PTX70)) |
242 | TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy" , "" , AND(SM_86, PTX72)) |
243 | TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy" , "" , |
244 | AND(SM_86, PTX72)) |
245 | TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
246 | TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
247 | TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
248 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y" , "" , AND(SM_80, PTX70)) |
249 | TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y" , "" , |
250 | AND(SM_86, PTX72)) |
251 | TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y" , "" , |
252 | AND(SM_86, PTX72)) |
253 | BUILTIN(__nvvm_fmax_f, "fff" , "" ) |
254 | BUILTIN(__nvvm_fmax_ftz_f, "fff" , "" ) |
255 | TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff" , "" , AND(SM_80, PTX70)) |
256 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff" , "" , AND(SM_80, PTX70)) |
257 | TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
258 | TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
259 | TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
260 | TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff" , "" , AND(SM_86, PTX72)) |
261 | BUILTIN(__nvvm_fmax_d, "ddd" , "" ) |
262 | |
263 | // Multiplication |
264 | |
265 | BUILTIN(__nvvm_mulhi_i, "iii" , "" ) |
266 | BUILTIN(__nvvm_mulhi_ui, "UiUiUi" , "" ) |
267 | BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi" , "" ) |
268 | BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi" , "" ) |
269 | |
270 | BUILTIN(__nvvm_mul_rn_ftz_f, "fff" , "" ) |
271 | BUILTIN(__nvvm_mul_rn_f, "fff" , "" ) |
272 | BUILTIN(__nvvm_mul_rz_ftz_f, "fff" , "" ) |
273 | BUILTIN(__nvvm_mul_rz_f, "fff" , "" ) |
274 | BUILTIN(__nvvm_mul_rm_ftz_f, "fff" , "" ) |
275 | BUILTIN(__nvvm_mul_rm_f, "fff" , "" ) |
276 | BUILTIN(__nvvm_mul_rp_ftz_f, "fff" , "" ) |
277 | BUILTIN(__nvvm_mul_rp_f, "fff" , "" ) |
278 | |
279 | BUILTIN(__nvvm_mul_rn_d, "ddd" , "" ) |
280 | BUILTIN(__nvvm_mul_rz_d, "ddd" , "" ) |
281 | BUILTIN(__nvvm_mul_rm_d, "ddd" , "" ) |
282 | BUILTIN(__nvvm_mul_rp_d, "ddd" , "" ) |
283 | |
284 | BUILTIN(__nvvm_mul24_i, "iii" , "" ) |
285 | BUILTIN(__nvvm_mul24_ui, "UiUiUi" , "" ) |
286 | |
287 | // Div |
288 | |
289 | BUILTIN(__nvvm_div_approx_ftz_f, "fff" , "" ) |
290 | BUILTIN(__nvvm_div_approx_f, "fff" , "" ) |
291 | |
292 | BUILTIN(__nvvm_div_rn_ftz_f, "fff" , "" ) |
293 | BUILTIN(__nvvm_div_rn_f, "fff" , "" ) |
294 | BUILTIN(__nvvm_div_rz_ftz_f, "fff" , "" ) |
295 | BUILTIN(__nvvm_div_rz_f, "fff" , "" ) |
296 | BUILTIN(__nvvm_div_rm_ftz_f, "fff" , "" ) |
297 | BUILTIN(__nvvm_div_rm_f, "fff" , "" ) |
298 | BUILTIN(__nvvm_div_rp_ftz_f, "fff" , "" ) |
299 | BUILTIN(__nvvm_div_rp_f, "fff" , "" ) |
300 | |
301 | BUILTIN(__nvvm_div_rn_d, "ddd" , "" ) |
302 | BUILTIN(__nvvm_div_rz_d, "ddd" , "" ) |
303 | BUILTIN(__nvvm_div_rm_d, "ddd" , "" ) |
304 | BUILTIN(__nvvm_div_rp_d, "ddd" , "" ) |
305 | |
306 | // Sad |
307 | |
308 | BUILTIN(__nvvm_sad_i, "iiii" , "" ) |
309 | BUILTIN(__nvvm_sad_ui, "UiUiUiUi" , "" ) |
310 | |
311 | // Floor, Ceil |
312 | |
313 | BUILTIN(__nvvm_floor_ftz_f, "ff" , "" ) |
314 | BUILTIN(__nvvm_floor_f, "ff" , "" ) |
315 | BUILTIN(__nvvm_floor_d, "dd" , "" ) |
316 | |
317 | BUILTIN(__nvvm_ceil_ftz_f, "ff" , "" ) |
318 | BUILTIN(__nvvm_ceil_f, "ff" , "" ) |
319 | BUILTIN(__nvvm_ceil_d, "dd" , "" ) |
320 | |
321 | // Abs |
322 | |
323 | BUILTIN(__nvvm_fabs_ftz_f, "ff" , "" ) |
324 | BUILTIN(__nvvm_fabs_f, "ff" , "" ) |
325 | BUILTIN(__nvvm_fabs_d, "dd" , "" ) |
326 | |
327 | // Round |
328 | |
329 | BUILTIN(__nvvm_round_ftz_f, "ff" , "" ) |
330 | BUILTIN(__nvvm_round_f, "ff" , "" ) |
331 | BUILTIN(__nvvm_round_d, "dd" , "" ) |
332 | |
333 | // Trunc |
334 | |
335 | BUILTIN(__nvvm_trunc_ftz_f, "ff" , "" ) |
336 | BUILTIN(__nvvm_trunc_f, "ff" , "" ) |
337 | BUILTIN(__nvvm_trunc_d, "dd" , "" ) |
338 | |
339 | // Saturate |
340 | |
341 | BUILTIN(__nvvm_saturate_ftz_f, "ff" , "" ) |
342 | BUILTIN(__nvvm_saturate_f, "ff" , "" ) |
343 | BUILTIN(__nvvm_saturate_d, "dd" , "" ) |
344 | |
345 | // Exp2, Log2 |
346 | |
347 | BUILTIN(__nvvm_ex2_approx_ftz_f, "ff" , "" ) |
348 | BUILTIN(__nvvm_ex2_approx_f, "ff" , "" ) |
349 | BUILTIN(__nvvm_ex2_approx_d, "dd" , "" ) |
350 | TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh" , "" , AND(SM_75, PTX70)) |
351 | TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h" , "" , AND(SM_75, PTX70)) |
352 | |
353 | BUILTIN(__nvvm_lg2_approx_ftz_f, "ff" , "" ) |
354 | BUILTIN(__nvvm_lg2_approx_f, "ff" , "" ) |
355 | BUILTIN(__nvvm_lg2_approx_d, "dd" , "" ) |
356 | |
357 | // Sin, Cos |
358 | |
359 | BUILTIN(__nvvm_sin_approx_ftz_f, "ff" , "" ) |
360 | BUILTIN(__nvvm_sin_approx_f, "ff" , "" ) |
361 | |
362 | BUILTIN(__nvvm_cos_approx_ftz_f, "ff" , "" ) |
363 | BUILTIN(__nvvm_cos_approx_f, "ff" , "" ) |
364 | |
365 | // Fma |
366 | |
367 | TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh" , "" , AND(SM_53, PTX42)) |
368 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh" , "" , AND(SM_53, PTX42)) |
369 | TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh" , "" , AND(SM_53, PTX42)) |
370 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh" , "" , AND(SM_53, PTX42)) |
371 | TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh" , "" , AND(SM_80, PTX70)) |
372 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh" , "" , AND(SM_80, PTX70)) |
373 | TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_53, PTX42)) |
374 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_53, PTX42)) |
375 | TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_53, PTX42)) |
376 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_53, PTX42)) |
377 | TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_80, PTX70)) |
378 | TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h" , "" , AND(SM_80, PTX70)) |
379 | TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy" , "" , AND(SM_80, PTX70)) |
380 | TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy" , "" , AND(SM_80, PTX70)) |
381 | TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y" , "" , AND(SM_80, PTX70)) |
382 | TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y" , "" , AND(SM_80, PTX70)) |
383 | BUILTIN(__nvvm_fma_rn_ftz_f, "ffff" , "" ) |
384 | BUILTIN(__nvvm_fma_rn_f, "ffff" , "" ) |
385 | BUILTIN(__nvvm_fma_rz_ftz_f, "ffff" , "" ) |
386 | BUILTIN(__nvvm_fma_rz_f, "ffff" , "" ) |
387 | BUILTIN(__nvvm_fma_rm_ftz_f, "ffff" , "" ) |
388 | BUILTIN(__nvvm_fma_rm_f, "ffff" , "" ) |
389 | BUILTIN(__nvvm_fma_rp_ftz_f, "ffff" , "" ) |
390 | BUILTIN(__nvvm_fma_rp_f, "ffff" , "" ) |
391 | BUILTIN(__nvvm_fma_rn_d, "dddd" , "" ) |
392 | BUILTIN(__nvvm_fma_rz_d, "dddd" , "" ) |
393 | BUILTIN(__nvvm_fma_rm_d, "dddd" , "" ) |
394 | BUILTIN(__nvvm_fma_rp_d, "dddd" , "" ) |
395 | |
396 | // Rcp |
397 | |
398 | BUILTIN(__nvvm_rcp_rn_ftz_f, "ff" , "" ) |
399 | BUILTIN(__nvvm_rcp_rn_f, "ff" , "" ) |
400 | BUILTIN(__nvvm_rcp_rz_ftz_f, "ff" , "" ) |
401 | BUILTIN(__nvvm_rcp_rz_f, "ff" , "" ) |
402 | BUILTIN(__nvvm_rcp_rm_ftz_f, "ff" , "" ) |
403 | BUILTIN(__nvvm_rcp_rm_f, "ff" , "" ) |
404 | BUILTIN(__nvvm_rcp_rp_ftz_f, "ff" , "" ) |
405 | BUILTIN(__nvvm_rcp_rp_f, "ff" , "" ) |
406 | |
407 | BUILTIN(__nvvm_rcp_rn_d, "dd" , "" ) |
408 | BUILTIN(__nvvm_rcp_rz_d, "dd" , "" ) |
409 | BUILTIN(__nvvm_rcp_rm_d, "dd" , "" ) |
410 | BUILTIN(__nvvm_rcp_rp_d, "dd" , "" ) |
411 | |
412 | BUILTIN(__nvvm_rcp_approx_ftz_f, "ff" , "" ) |
413 | BUILTIN(__nvvm_rcp_approx_ftz_d, "dd" , "" ) |
414 | |
415 | // Sqrt |
416 | |
417 | BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff" , "" ) |
418 | BUILTIN(__nvvm_sqrt_rn_f, "ff" , "" ) |
419 | BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff" , "" ) |
420 | BUILTIN(__nvvm_sqrt_rz_f, "ff" , "" ) |
421 | BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff" , "" ) |
422 | BUILTIN(__nvvm_sqrt_rm_f, "ff" , "" ) |
423 | BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff" , "" ) |
424 | BUILTIN(__nvvm_sqrt_rp_f, "ff" , "" ) |
425 | BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff" , "" ) |
426 | BUILTIN(__nvvm_sqrt_approx_f, "ff" , "" ) |
427 | |
428 | BUILTIN(__nvvm_sqrt_rn_d, "dd" , "" ) |
429 | BUILTIN(__nvvm_sqrt_rz_d, "dd" , "" ) |
430 | BUILTIN(__nvvm_sqrt_rm_d, "dd" , "" ) |
431 | BUILTIN(__nvvm_sqrt_rp_d, "dd" , "" ) |
432 | |
433 | // Rsqrt |
434 | |
435 | BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff" , "" ) |
436 | BUILTIN(__nvvm_rsqrt_approx_f, "ff" , "" ) |
437 | BUILTIN(__nvvm_rsqrt_approx_d, "dd" , "" ) |
438 | |
439 | // Add |
440 | |
441 | BUILTIN(__nvvm_add_rn_ftz_f, "fff" , "" ) |
442 | BUILTIN(__nvvm_add_rn_f, "fff" , "" ) |
443 | BUILTIN(__nvvm_add_rz_ftz_f, "fff" , "" ) |
444 | BUILTIN(__nvvm_add_rz_f, "fff" , "" ) |
445 | BUILTIN(__nvvm_add_rm_ftz_f, "fff" , "" ) |
446 | BUILTIN(__nvvm_add_rm_f, "fff" , "" ) |
447 | BUILTIN(__nvvm_add_rp_ftz_f, "fff" , "" ) |
448 | BUILTIN(__nvvm_add_rp_f, "fff" , "" ) |
449 | |
450 | BUILTIN(__nvvm_add_rn_d, "ddd" , "" ) |
451 | BUILTIN(__nvvm_add_rz_d, "ddd" , "" ) |
452 | BUILTIN(__nvvm_add_rm_d, "ddd" , "" ) |
453 | BUILTIN(__nvvm_add_rp_d, "ddd" , "" ) |
454 | |
455 | // Convert |
456 | |
457 | BUILTIN(__nvvm_d2f_rn_ftz, "fd" , "" ) |
458 | BUILTIN(__nvvm_d2f_rn, "fd" , "" ) |
459 | BUILTIN(__nvvm_d2f_rz_ftz, "fd" , "" ) |
460 | BUILTIN(__nvvm_d2f_rz, "fd" , "" ) |
461 | BUILTIN(__nvvm_d2f_rm_ftz, "fd" , "" ) |
462 | BUILTIN(__nvvm_d2f_rm, "fd" , "" ) |
463 | BUILTIN(__nvvm_d2f_rp_ftz, "fd" , "" ) |
464 | BUILTIN(__nvvm_d2f_rp, "fd" , "" ) |
465 | |
466 | BUILTIN(__nvvm_d2i_rn, "id" , "" ) |
467 | BUILTIN(__nvvm_d2i_rz, "id" , "" ) |
468 | BUILTIN(__nvvm_d2i_rm, "id" , "" ) |
469 | BUILTIN(__nvvm_d2i_rp, "id" , "" ) |
470 | |
471 | BUILTIN(__nvvm_d2ui_rn, "Uid" , "" ) |
472 | BUILTIN(__nvvm_d2ui_rz, "Uid" , "" ) |
473 | BUILTIN(__nvvm_d2ui_rm, "Uid" , "" ) |
474 | BUILTIN(__nvvm_d2ui_rp, "Uid" , "" ) |
475 | |
476 | BUILTIN(__nvvm_i2d_rn, "di" , "" ) |
477 | BUILTIN(__nvvm_i2d_rz, "di" , "" ) |
478 | BUILTIN(__nvvm_i2d_rm, "di" , "" ) |
479 | BUILTIN(__nvvm_i2d_rp, "di" , "" ) |
480 | |
481 | BUILTIN(__nvvm_ui2d_rn, "dUi" , "" ) |
482 | BUILTIN(__nvvm_ui2d_rz, "dUi" , "" ) |
483 | BUILTIN(__nvvm_ui2d_rm, "dUi" , "" ) |
484 | BUILTIN(__nvvm_ui2d_rp, "dUi" , "" ) |
485 | |
486 | BUILTIN(__nvvm_f2i_rn_ftz, "if" , "" ) |
487 | BUILTIN(__nvvm_f2i_rn, "if" , "" ) |
488 | BUILTIN(__nvvm_f2i_rz_ftz, "if" , "" ) |
489 | BUILTIN(__nvvm_f2i_rz, "if" , "" ) |
490 | BUILTIN(__nvvm_f2i_rm_ftz, "if" , "" ) |
491 | BUILTIN(__nvvm_f2i_rm, "if" , "" ) |
492 | BUILTIN(__nvvm_f2i_rp_ftz, "if" , "" ) |
493 | BUILTIN(__nvvm_f2i_rp, "if" , "" ) |
494 | |
495 | BUILTIN(__nvvm_f2ui_rn_ftz, "Uif" , "" ) |
496 | BUILTIN(__nvvm_f2ui_rn, "Uif" , "" ) |
497 | BUILTIN(__nvvm_f2ui_rz_ftz, "Uif" , "" ) |
498 | BUILTIN(__nvvm_f2ui_rz, "Uif" , "" ) |
499 | BUILTIN(__nvvm_f2ui_rm_ftz, "Uif" , "" ) |
500 | BUILTIN(__nvvm_f2ui_rm, "Uif" , "" ) |
501 | BUILTIN(__nvvm_f2ui_rp_ftz, "Uif" , "" ) |
502 | BUILTIN(__nvvm_f2ui_rp, "Uif" , "" ) |
503 | |
504 | BUILTIN(__nvvm_i2f_rn, "fi" , "" ) |
505 | BUILTIN(__nvvm_i2f_rz, "fi" , "" ) |
506 | BUILTIN(__nvvm_i2f_rm, "fi" , "" ) |
507 | BUILTIN(__nvvm_i2f_rp, "fi" , "" ) |
508 | |
509 | BUILTIN(__nvvm_ui2f_rn, "fUi" , "" ) |
510 | BUILTIN(__nvvm_ui2f_rz, "fUi" , "" ) |
511 | BUILTIN(__nvvm_ui2f_rm, "fUi" , "" ) |
512 | BUILTIN(__nvvm_ui2f_rp, "fUi" , "" ) |
513 | |
514 | BUILTIN(__nvvm_lohi_i2d, "dii" , "" ) |
515 | |
516 | BUILTIN(__nvvm_d2i_lo, "id" , "" ) |
517 | BUILTIN(__nvvm_d2i_hi, "id" , "" ) |
518 | |
519 | BUILTIN(__nvvm_f2ll_rn_ftz, "LLif" , "" ) |
520 | BUILTIN(__nvvm_f2ll_rn, "LLif" , "" ) |
521 | BUILTIN(__nvvm_f2ll_rz_ftz, "LLif" , "" ) |
522 | BUILTIN(__nvvm_f2ll_rz, "LLif" , "" ) |
523 | BUILTIN(__nvvm_f2ll_rm_ftz, "LLif" , "" ) |
524 | BUILTIN(__nvvm_f2ll_rm, "LLif" , "" ) |
525 | BUILTIN(__nvvm_f2ll_rp_ftz, "LLif" , "" ) |
526 | BUILTIN(__nvvm_f2ll_rp, "LLif" , "" ) |
527 | |
528 | BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif" , "" ) |
529 | BUILTIN(__nvvm_f2ull_rn, "ULLif" , "" ) |
530 | BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif" , "" ) |
531 | BUILTIN(__nvvm_f2ull_rz, "ULLif" , "" ) |
532 | BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif" , "" ) |
533 | BUILTIN(__nvvm_f2ull_rm, "ULLif" , "" ) |
534 | BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif" , "" ) |
535 | BUILTIN(__nvvm_f2ull_rp, "ULLif" , "" ) |
536 | |
537 | BUILTIN(__nvvm_d2ll_rn, "LLid" , "" ) |
538 | BUILTIN(__nvvm_d2ll_rz, "LLid" , "" ) |
539 | BUILTIN(__nvvm_d2ll_rm, "LLid" , "" ) |
540 | BUILTIN(__nvvm_d2ll_rp, "LLid" , "" ) |
541 | |
542 | BUILTIN(__nvvm_d2ull_rn, "ULLid" , "" ) |
543 | BUILTIN(__nvvm_d2ull_rz, "ULLid" , "" ) |
544 | BUILTIN(__nvvm_d2ull_rm, "ULLid" , "" ) |
545 | BUILTIN(__nvvm_d2ull_rp, "ULLid" , "" ) |
546 | |
547 | BUILTIN(__nvvm_ll2f_rn, "fLLi" , "" ) |
548 | BUILTIN(__nvvm_ll2f_rz, "fLLi" , "" ) |
549 | BUILTIN(__nvvm_ll2f_rm, "fLLi" , "" ) |
550 | BUILTIN(__nvvm_ll2f_rp, "fLLi" , "" ) |
551 | |
552 | BUILTIN(__nvvm_ull2f_rn, "fULLi" , "" ) |
553 | BUILTIN(__nvvm_ull2f_rz, "fULLi" , "" ) |
554 | BUILTIN(__nvvm_ull2f_rm, "fULLi" , "" ) |
555 | BUILTIN(__nvvm_ull2f_rp, "fULLi" , "" ) |
556 | |
557 | BUILTIN(__nvvm_ll2d_rn, "dLLi" , "" ) |
558 | BUILTIN(__nvvm_ll2d_rz, "dLLi" , "" ) |
559 | BUILTIN(__nvvm_ll2d_rm, "dLLi" , "" ) |
560 | BUILTIN(__nvvm_ll2d_rp, "dLLi" , "" ) |
561 | |
562 | BUILTIN(__nvvm_ull2d_rn, "dULLi" , "" ) |
563 | BUILTIN(__nvvm_ull2d_rz, "dULLi" , "" ) |
564 | BUILTIN(__nvvm_ull2d_rm, "dULLi" , "" ) |
565 | BUILTIN(__nvvm_ull2d_rp, "dULLi" , "" ) |
566 | |
567 | BUILTIN(__nvvm_f2h_rn_ftz, "Usf" , "" ) |
568 | BUILTIN(__nvvm_f2h_rn, "Usf" , "" ) |
569 | |
570 | TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff" , "" , AND(SM_80,PTX70)) |
571 | TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff" , "" , AND(SM_80,PTX70)) |
572 | TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff" , "" , AND(SM_80,PTX70)) |
573 | TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff" , "" , AND(SM_80,PTX70)) |
574 | |
575 | TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff" , "" , AND(SM_80,PTX70)) |
576 | TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff" , "" , AND(SM_80,PTX70)) |
577 | TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff" , "" , AND(SM_80,PTX70)) |
578 | TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff" , "" , AND(SM_80,PTX70)) |
579 | |
580 | TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf" , "" , AND(SM_80,PTX70)) |
581 | TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf" , "" , AND(SM_80,PTX70)) |
582 | TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf" , "" , AND(SM_80,PTX70)) |
583 | TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf" , "" , AND(SM_80,PTX70)) |
584 | |
585 | TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif" , "" , AND(SM_80,PTX70)) |
586 | |
587 | // Bitcast |
588 | |
589 | BUILTIN(__nvvm_bitcast_f2i, "if" , "" ) |
590 | BUILTIN(__nvvm_bitcast_i2f, "fi" , "" ) |
591 | |
592 | BUILTIN(__nvvm_bitcast_ll2d, "dLLi" , "" ) |
593 | BUILTIN(__nvvm_bitcast_d2ll, "LLid" , "" ) |
594 | |
595 | // FNS |
596 | TARGET_BUILTIN(__nvvm_fns, "UiUiUii" , "n" , PTX60) |
597 | |
598 | // Sync |
599 | |
600 | BUILTIN(__syncthreads, "v" , "" ) |
601 | BUILTIN(__nvvm_bar0_popc, "ii" , "" ) |
602 | BUILTIN(__nvvm_bar0_and, "ii" , "" ) |
603 | BUILTIN(__nvvm_bar0_or, "ii" , "" ) |
604 | BUILTIN(__nvvm_bar_sync, "vi" , "n" ) |
605 | TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi" , "n" , PTX60) |
606 | TARGET_BUILTIN(__nvvm_barrier_sync, "vUi" , "n" , PTX60) |
607 | TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi" , "n" , PTX60) |
608 | |
609 | TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v" , "n" , AND(SM_90,PTX78)) |
610 | TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v" , "n" , AND(SM_90,PTX80)) |
611 | TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v" , "n" , AND(SM_90,PTX78)) |
612 | TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v" , "n" , AND(SM_90,PTX78)) |
613 | |
614 | // Shuffle |
615 | |
616 | BUILTIN(__nvvm_shfl_down_i32, "iiii" , "" ) |
617 | BUILTIN(__nvvm_shfl_down_f32, "ffii" , "" ) |
618 | BUILTIN(__nvvm_shfl_up_i32, "iiii" , "" ) |
619 | BUILTIN(__nvvm_shfl_up_f32, "ffii" , "" ) |
620 | BUILTIN(__nvvm_shfl_bfly_i32, "iiii" , "" ) |
621 | BUILTIN(__nvvm_shfl_bfly_f32, "ffii" , "" ) |
622 | BUILTIN(__nvvm_shfl_idx_i32, "iiii" , "" ) |
623 | BUILTIN(__nvvm_shfl_idx_f32, "ffii" , "" ) |
624 | |
625 | TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii" , "" , PTX60) |
626 | TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii" , "" , PTX60) |
627 | TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii" , "" , PTX60) |
628 | TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii" , "" , PTX60) |
629 | TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii" , "" , PTX60) |
630 | TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii" , "" , PTX60) |
631 | TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii" , "" , PTX60) |
632 | TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii" , "" , PTX60) |
633 | |
634 | // Vote |
635 | BUILTIN(__nvvm_vote_all, "bb" , "" ) |
636 | BUILTIN(__nvvm_vote_any, "bb" , "" ) |
637 | BUILTIN(__nvvm_vote_uni, "bb" , "" ) |
638 | BUILTIN(__nvvm_vote_ballot, "Uib" , "" ) |
639 | |
640 | TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib" , "" , PTX60) |
641 | TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib" , "" , PTX60) |
642 | TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib" , "" , PTX60) |
643 | TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib" , "" , PTX60) |
644 | |
645 | // Mask |
646 | TARGET_BUILTIN(__nvvm_activemask, "Ui" , "n" , PTX62) |
647 | |
648 | // Match |
649 | TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi" , "" , AND(SM_70,PTX60)) |
650 | TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi" , "" , AND(SM_70,PTX60)) |
651 | // These return a pair {value, predicate}, which requires custom lowering. |
652 | TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*" , "" , AND(SM_70,PTX60)) |
653 | TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*" , "" , AND(SM_70,PTX60)) |
654 | |
655 | // Redux |
656 | TARGET_BUILTIN(__nvvm_redux_sync_add, "iii" , "" , AND(SM_80,PTX70)) |
657 | TARGET_BUILTIN(__nvvm_redux_sync_min, "iii" , "" , AND(SM_80,PTX70)) |
658 | TARGET_BUILTIN(__nvvm_redux_sync_max, "iii" , "" , AND(SM_80,PTX70)) |
659 | TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii" , "" , AND(SM_80,PTX70)) |
660 | TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii" , "" , AND(SM_80,PTX70)) |
661 | TARGET_BUILTIN(__nvvm_redux_sync_and, "iii" , "" , AND(SM_80,PTX70)) |
662 | TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii" , "" , AND(SM_80,PTX70)) |
663 | TARGET_BUILTIN(__nvvm_redux_sync_or, "iii" , "" , AND(SM_80,PTX70)) |
664 | |
665 | // Membar |
666 | |
667 | BUILTIN(__nvvm_membar_cta, "v" , "" ) |
668 | BUILTIN(__nvvm_membar_gl, "v" , "" ) |
669 | BUILTIN(__nvvm_membar_sys, "v" , "" ) |
670 | |
671 | // mbarrier |
672 | |
673 | TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i" , "" , AND(SM_80,PTX70)) |
674 | TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i" , "" , AND(SM_80,PTX70)) |
675 | |
676 | TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*" , "" , AND(SM_80,PTX70)) |
677 | TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3" , "" , AND(SM_80,PTX70)) |
678 | |
679 | TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*" , "" , AND(SM_80,PTX70)) |
680 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3" , "" , AND(SM_80,PTX70)) |
681 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i" , "" , AND(SM_80,PTX70)) |
682 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i" , "" , AND(SM_80,PTX70)) |
683 | |
684 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*" , "" , AND(SM_80,PTX70)) |
685 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3" , "" , AND(SM_80,PTX70)) |
686 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i" , "" , AND(SM_80,PTX70)) |
687 | TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i" , "" , AND(SM_80,PTX70)) |
688 | |
689 | TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi" , "" , AND(SM_80,PTX70)) |
690 | TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi" , "" , AND(SM_80,PTX70)) |
691 | |
692 | TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi" , "" , AND(SM_80,PTX70)) |
693 | |
694 | // Memcpy, Memset |
695 | |
696 | BUILTIN(__nvvm_memcpy, "vUc*Uc*zi" ,"" ) |
697 | BUILTIN(__nvvm_memset, "vUc*Uczi" ,"" ) |
698 | |
699 | // Image |
700 | |
701 | BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii" , "" ) |
702 | BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff" , "" ) |
703 | BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii" , "" ) |
704 | BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff" , "" ) |
705 | |
706 | BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii" , "" ) |
707 | BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff" , "" ) |
708 | BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii" , "" ) |
709 | BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff" , "" ) |
710 | |
711 | BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff" , "" ) |
712 | BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii" , "" ) |
713 | BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi" , "" ) |
714 | BUILTIN(__builtin_ptx_get_image_depthi_, "ii" , "" ) |
715 | BUILTIN(__builtin_ptx_get_image_heighti_, "ii" , "" ) |
716 | BUILTIN(__builtin_ptx_get_image_widthi_, "ii" , "" ) |
717 | BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii" , "" ) |
718 | BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii" , "" ) |
719 | |
720 | // Atomic |
721 | // |
722 | // We need the atom intrinsics because |
723 | // - they are used in converging analysis |
724 | // - they are used in address space analysis and optimization |
725 | // So it does not hurt to expose them as builtins. |
726 | // |
727 | BUILTIN(__nvvm_atom_add_gen_i, "iiD*i" , "n" ) |
728 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i" , "n" , SM_60) |
729 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i" , "n" , SM_60) |
730 | BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li" , "n" ) |
731 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li" , "n" , SM_60) |
732 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li" , "n" , SM_60) |
733 | BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi" , "n" ) |
734 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
735 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
736 | BUILTIN(__nvvm_atom_add_gen_f, "ffD*f" , "n" ) |
737 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f" , "n" , SM_60) |
738 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f" , "n" , SM_60) |
739 | TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d" , "n" , SM_60) |
740 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d" , "n" , SM_60) |
741 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d" , "n" , SM_60) |
742 | |
743 | BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i" , "n" ) |
744 | BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li" , "n" ) |
745 | BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi" , "n" ) |
746 | |
747 | BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i" , "n" ) |
748 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i" , "n" , SM_60) |
749 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i" , "n" , SM_60) |
750 | BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li" , "n" ) |
751 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li" , "n" , SM_60) |
752 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li" , "n" , SM_60) |
753 | BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi" , "n" ) |
754 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
755 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
756 | |
757 | BUILTIN(__nvvm_atom_max_gen_i, "iiD*i" , "n" ) |
758 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i" , "n" , SM_60) |
759 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i" , "n" , SM_60) |
760 | BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui" , "n" ) |
761 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
762 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
763 | BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li" , "n" ) |
764 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li" , "n" , SM_60) |
765 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li" , "n" , SM_60) |
766 | BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi" , "n" ) |
767 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi" , "n" , SM_60) |
768 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi" , "n" , SM_60) |
769 | BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi" , "n" ) |
770 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
771 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
772 | BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi" , "n" ) |
773 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi" , "n" , SM_60) |
774 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi" , "n" , SM_60) |
775 | |
776 | BUILTIN(__nvvm_atom_min_gen_i, "iiD*i" , "n" ) |
777 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i" , "n" , SM_60) |
778 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i" , "n" , SM_60) |
779 | BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui" , "n" ) |
780 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
781 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
782 | BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li" , "n" ) |
783 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li" , "n" , SM_60) |
784 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li" , "n" , SM_60) |
785 | BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi" , "n" ) |
786 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi" , "n" , SM_60) |
787 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi" , "n" , SM_60) |
788 | BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi" , "n" ) |
789 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
790 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
791 | BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi" , "n" ) |
792 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi" , "n" , SM_60) |
793 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi" , "n" , SM_60) |
794 | |
795 | BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui" , "n" ) |
796 | TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
797 | TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
798 | BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui" , "n" ) |
799 | TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
800 | TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui" , "n" , SM_60) |
801 | |
802 | BUILTIN(__nvvm_atom_and_gen_i, "iiD*i" , "n" ) |
803 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i" , "n" , SM_60) |
804 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i" , "n" , SM_60) |
805 | BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li" , "n" ) |
806 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li" , "n" , SM_60) |
807 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li" , "n" , SM_60) |
808 | BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi" , "n" ) |
809 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
810 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
811 | |
812 | BUILTIN(__nvvm_atom_or_gen_i, "iiD*i" , "n" ) |
813 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i" , "n" , SM_60) |
814 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i" , "n" , SM_60) |
815 | BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li" , "n" ) |
816 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li" , "n" , SM_60) |
817 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li" , "n" , SM_60) |
818 | BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi" , "n" ) |
819 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
820 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
821 | |
822 | BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i" , "n" ) |
823 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i" , "n" , SM_60) |
824 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i" , "n" , SM_60) |
825 | BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li" , "n" ) |
826 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li" , "n" , SM_60) |
827 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li" , "n" , SM_60) |
828 | BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi" , "n" ) |
829 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
830 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi" , "n" , SM_60) |
831 | |
832 | BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii" , "n" ) |
833 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii" , "n" , SM_60) |
834 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii" , "n" , SM_60) |
835 | BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi" , "n" ) |
836 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi" , "n" , SM_60) |
837 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi" , "n" , SM_60) |
838 | BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi" , "n" ) |
839 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi" , "n" , SM_60) |
840 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi" , "n" , SM_60) |
841 | |
842 | // Compiler Error Warn |
843 | BUILTIN(__nvvm_compiler_error, "vcC*4" , "n" ) |
844 | BUILTIN(__nvvm_compiler_warn, "vcC*4" , "n" ) |
845 | |
846 | BUILTIN(__nvvm_ldu_c, "ccC*" , "" ) |
847 | BUILTIN(__nvvm_ldu_sc, "ScScC*" , "" ) |
848 | BUILTIN(__nvvm_ldu_s, "ssC*" , "" ) |
849 | BUILTIN(__nvvm_ldu_i, "iiC*" , "" ) |
850 | BUILTIN(__nvvm_ldu_l, "LiLiC*" , "" ) |
851 | BUILTIN(__nvvm_ldu_ll, "LLiLLiC*" , "" ) |
852 | |
853 | BUILTIN(__nvvm_ldu_uc, "UcUcC*" , "" ) |
854 | BUILTIN(__nvvm_ldu_us, "UsUsC*" , "" ) |
855 | BUILTIN(__nvvm_ldu_ui, "UiUiC*" , "" ) |
856 | BUILTIN(__nvvm_ldu_ul, "ULiULiC*" , "" ) |
857 | BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*" , "" ) |
858 | |
859 | BUILTIN(__nvvm_ldu_h, "hhC*" , "" ) |
860 | BUILTIN(__nvvm_ldu_f, "ffC*" , "" ) |
861 | BUILTIN(__nvvm_ldu_d, "ddC*" , "" ) |
862 | |
863 | BUILTIN(__nvvm_ldu_c2, "E2cE2cC*" , "" ) |
864 | BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*" , "" ) |
865 | BUILTIN(__nvvm_ldu_c4, "E4cE4cC*" , "" ) |
866 | BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*" , "" ) |
867 | BUILTIN(__nvvm_ldu_s2, "E2sE2sC*" , "" ) |
868 | BUILTIN(__nvvm_ldu_s4, "E4sE4sC*" , "" ) |
869 | BUILTIN(__nvvm_ldu_i2, "E2iE2iC*" , "" ) |
870 | BUILTIN(__nvvm_ldu_i4, "E4iE4iC*" , "" ) |
871 | BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*" , "" ) |
872 | BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*" , "" ) |
873 | |
874 | BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*" , "" ) |
875 | BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*" , "" ) |
876 | BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*" , "" ) |
877 | BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*" , "" ) |
878 | BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*" , "" ) |
879 | BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*" , "" ) |
880 | BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*" , "" ) |
881 | BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*" , "" ) |
882 | |
883 | BUILTIN(__nvvm_ldu_h2, "E2hE2hC*" , "" ) |
884 | BUILTIN(__nvvm_ldu_f2, "E2fE2fC*" , "" ) |
885 | BUILTIN(__nvvm_ldu_f4, "E4fE4fC*" , "" ) |
886 | BUILTIN(__nvvm_ldu_d2, "E2dE2dC*" , "" ) |
887 | |
888 | BUILTIN(__nvvm_ldg_c, "ccC*" , "" ) |
889 | BUILTIN(__nvvm_ldg_sc, "ScScC*" , "" ) |
890 | BUILTIN(__nvvm_ldg_s, "ssC*" , "" ) |
891 | BUILTIN(__nvvm_ldg_i, "iiC*" , "" ) |
892 | BUILTIN(__nvvm_ldg_l, "LiLiC*" , "" ) |
893 | BUILTIN(__nvvm_ldg_ll, "LLiLLiC*" , "" ) |
894 | |
895 | BUILTIN(__nvvm_ldg_uc, "UcUcC*" , "" ) |
896 | BUILTIN(__nvvm_ldg_us, "UsUsC*" , "" ) |
897 | BUILTIN(__nvvm_ldg_ui, "UiUiC*" , "" ) |
898 | BUILTIN(__nvvm_ldg_ul, "ULiULiC*" , "" ) |
899 | BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*" , "" ) |
900 | |
901 | BUILTIN(__nvvm_ldg_h, "hhC*" , "" ) |
902 | BUILTIN(__nvvm_ldg_f, "ffC*" , "" ) |
903 | BUILTIN(__nvvm_ldg_d, "ddC*" , "" ) |
904 | |
905 | BUILTIN(__nvvm_ldg_c2, "E2cE2cC*" , "" ) |
906 | BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*" , "" ) |
907 | BUILTIN(__nvvm_ldg_c4, "E4cE4cC*" , "" ) |
908 | BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*" , "" ) |
909 | BUILTIN(__nvvm_ldg_s2, "E2sE2sC*" , "" ) |
910 | BUILTIN(__nvvm_ldg_s4, "E4sE4sC*" , "" ) |
911 | BUILTIN(__nvvm_ldg_i2, "E2iE2iC*" , "" ) |
912 | BUILTIN(__nvvm_ldg_i4, "E4iE4iC*" , "" ) |
913 | BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*" , "" ) |
914 | BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*" , "" ) |
915 | |
916 | BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*" , "" ) |
917 | BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*" , "" ) |
918 | BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*" , "" ) |
919 | BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*" , "" ) |
920 | BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*" , "" ) |
921 | BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*" , "" ) |
922 | BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*" , "" ) |
923 | BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*" , "" ) |
924 | |
925 | BUILTIN(__nvvm_ldg_h2, "E2hE2hC*" , "" ) |
926 | BUILTIN(__nvvm_ldg_f2, "E2fE2fC*" , "" ) |
927 | BUILTIN(__nvvm_ldg_f4, "E4fE4fC*" , "" ) |
928 | BUILTIN(__nvvm_ldg_d2, "E2dE2dC*" , "" ) |
929 | |
930 | // Address space predicates. |
931 | BUILTIN(__nvvm_isspacep_const, "bvC*" , "nc" ) |
932 | BUILTIN(__nvvm_isspacep_global, "bvC*" , "nc" ) |
933 | BUILTIN(__nvvm_isspacep_local, "bvC*" , "nc" ) |
934 | BUILTIN(__nvvm_isspacep_shared, "bvC*" , "nc" ) |
935 | TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*" , "nc" , AND(SM_90,PTX78)) |
936 | |
937 | // Builtins to support WMMA instructions on sm_70 |
938 | TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_70,PTX60)) |
939 | TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_70,PTX60)) |
940 | TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX60)) |
941 | TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX60)) |
942 | TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX60)) |
943 | TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX60)) |
944 | |
945 | TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
946 | TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
947 | TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
948 | TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX61)) |
949 | TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
950 | TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX61)) |
951 | |
952 | TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
953 | TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
954 | TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
955 | TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX61)) |
956 | TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi" , "" , AND(SM_70,PTX61)) |
957 | TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi" , "" , AND(SM_70,PTX61)) |
958 | |
959 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX60)) |
960 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX60)) |
961 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX60)) |
962 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX60)) |
963 | |
964 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX61)) |
965 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX61)) |
966 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX61)) |
967 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX61)) |
968 | |
969 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX61)) |
970 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi" , "" , AND(SM_70,PTX61)) |
971 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX61)) |
972 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi" , "" , AND(SM_70,PTX61)) |
973 | |
974 | // Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75 |
975 | TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
976 | TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
977 | TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
978 | TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii" , "" , AND(SM_80,PTX71)) |
979 | TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii" , "" , AND(SM_75,PTX63)) |
980 | TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
981 | TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
982 | TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
983 | TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
984 | TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
985 | TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
986 | TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
987 | TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
988 | TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
989 | TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
990 | TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
991 | TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
992 | TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
993 | TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
994 | TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
995 | TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
996 | TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
997 | TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
998 | TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
999 | TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
1000 | TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
1001 | TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
1002 | TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
1003 | TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi" , "" , AND(SM_72,PTX63)) |
1004 | TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi" , "" , AND(SM_72,PTX63)) |
1005 | TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1006 | TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1007 | TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1008 | TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1009 | TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1010 | TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi" , "" , AND(SM_75,PTX63)) |
1011 | TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi" , "" , AND(SM_75,PTX63)) |
1012 | TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi" , "" , AND(SM_75,PTX63)) |
1013 | |
1014 | // Builtins to support double and alternate float WMMA instructions on sm_80 |
1015 | TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi" , "" , AND(SM_80,PTX70)) |
1016 | TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi" , "" , AND(SM_80,PTX70)) |
1017 | TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi" , "" , AND(SM_80,PTX70)) |
1018 | TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi" , "" , AND(SM_80,PTX70)) |
1019 | TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi" , "" , AND(SM_80,PTX70)) |
1020 | |
1021 | TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1022 | TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1023 | TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_80,PTX70)) |
1024 | TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1025 | TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1026 | TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_80,PTX70)) |
1027 | TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1028 | TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1029 | TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_80,PTX70)) |
1030 | |
1031 | TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1032 | TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi" , "" , AND(SM_80,PTX70)) |
1033 | TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi" , "" , AND(SM_80,PTX70)) |
1034 | TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi" , "" , AND(SM_80,PTX70)) |
1035 | TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi" , "" , AND(SM_80,PTX70)) |
1036 | |
1037 | // Async Copy |
1038 | TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*" , "" , AND(SM_80,PTX70)) |
1039 | TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3" , "" , AND(SM_80,PTX70)) |
1040 | TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*" , "" , AND(SM_80,PTX70)) |
1041 | TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3" , "" , AND(SM_80,PTX70)) |
1042 | |
1043 | TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1." , "" , AND(SM_80,PTX70)) |
1044 | TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1." , "" , AND(SM_80,PTX70)) |
1045 | TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1." , "" , AND(SM_80,PTX70)) |
1046 | TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1." , "" , AND(SM_80,PTX70)) |
1047 | |
1048 | TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v" , "" , AND(SM_80,PTX70)) |
1049 | TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi" , "" , AND(SM_80,PTX70)) |
1050 | TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v" , "" , AND(SM_80,PTX70)) |
1051 | |
1052 | |
1053 | // bf16, bf16x2 abs, neg |
1054 | TARGET_BUILTIN(__nvvm_abs_bf16, "yy" , "" , AND(SM_80,PTX70)) |
1055 | TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y" , "" , AND(SM_80,PTX70)) |
1056 | TARGET_BUILTIN(__nvvm_neg_bf16, "yy" , "" , AND(SM_80,PTX70)) |
1057 | TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y" , "" , AND(SM_80,PTX70)) |
1058 | |
1059 | TARGET_BUILTIN(__nvvm_mapa, "v*v*i" , "" , AND(SM_90, PTX78)) |
1060 | TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i" , "" , AND(SM_90, PTX78)) |
1061 | TARGET_BUILTIN(__nvvm_getctarank, "iv*" , "" , AND(SM_90, PTX78)) |
1062 | TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3" , "" , AND(SM_90,PTX78)) |
1063 | |
1064 | #undef BUILTIN |
1065 | #undef TARGET_BUILTIN |
1066 | #pragma pop_macro("AND") |
1067 | #pragma pop_macro("SM_53") |
1068 | #pragma pop_macro("SM_60") |
1069 | #pragma pop_macro("SM_70") |
1070 | #pragma pop_macro("SM_72") |
1071 | #pragma pop_macro("SM_75") |
1072 | #pragma pop_macro("SM_80") |
1073 | #pragma pop_macro("SM_86") |
1074 | #pragma pop_macro("SM_87") |
1075 | #pragma pop_macro("SM_89") |
1076 | #pragma pop_macro("SM_90") |
1077 | #pragma pop_macro("SM_90a") |
1078 | #pragma pop_macro("PTX42") |
1079 | #pragma pop_macro("PTX60") |
1080 | #pragma pop_macro("PTX61") |
1081 | #pragma pop_macro("PTX62") |
1082 | #pragma pop_macro("PTX63") |
1083 | #pragma pop_macro("PTX64") |
1084 | #pragma pop_macro("PTX65") |
1085 | #pragma pop_macro("PTX70") |
1086 | #pragma pop_macro("PTX71") |
1087 | #pragma pop_macro("PTX72") |
1088 | #pragma pop_macro("PTX73") |
1089 | #pragma pop_macro("PTX74") |
1090 | #pragma pop_macro("PTX75") |
1091 | #pragma pop_macro("PTX76") |
1092 | #pragma pop_macro("PTX77") |
1093 | #pragma pop_macro("PTX78") |
1094 | #pragma pop_macro("PTX80") |
1095 | #pragma pop_macro("PTX81") |
1096 | #pragma pop_macro("PTX82") |
1097 | #pragma pop_macro("PTX83") |
1098 | #pragma pop_macro("PTX84") |
1099 | #pragma pop_macro("PTX85") |
1100 | |