1//===------ SemaAMDGPU.cpp ------- AMDGPU target-specific routines --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements semantic analysis functions specific to AMDGPU.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Sema/SemaAMDGPU.h"
14#include "clang/Basic/DiagnosticFrontend.h"
15#include "clang/Basic/DiagnosticSema.h"
16#include "clang/Basic/TargetBuiltins.h"
17#include "clang/Sema/Ownership.h"
18#include "clang/Sema/Sema.h"
19#include "llvm/Support/AMDGPUAddrSpace.h"
20#include "llvm/Support/AtomicOrdering.h"
21#include <cstdint>
22
23namespace clang {
24
25SemaAMDGPU::SemaAMDGPU(Sema &S) : SemaBase(S) {}
26
27bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
28 CallExpr *TheCall) {
29 // position of memory order and scope arguments in the builtin
30 unsigned OrderIndex, ScopeIndex;
31
32 const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
33 assert(FD && "AMDGPU builtins should not be used outside of a function");
34 llvm::StringMap<bool> CallerFeatureMap;
35 getASTContext().getFunctionFeatureMap(FeatureMap&: CallerFeatureMap, FD);
36 bool HasGFX950Insts =
37 Builtin::evaluateRequiredTargetFeatures(RequiredFatures: "gfx950-insts", TargetFetureMap: CallerFeatureMap);
38
39 switch (BuiltinID) {
40 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
41 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_async_lds:
42 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
43 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_async_lds:
44 case AMDGPU::BI__builtin_amdgcn_load_to_lds:
45 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds:
46 case AMDGPU::BI__builtin_amdgcn_global_load_lds:
47 case AMDGPU::BI__builtin_amdgcn_global_load_async_lds: {
48 constexpr const int SizeIdx = 2;
49 llvm::APSInt Size;
50 Expr *ArgExpr = TheCall->getArg(Arg: SizeIdx);
51 // Check for instantiation-dependent expressions (e.g., involving template
52 // parameters). These will be checked again during template instantiation.
53 if (ArgExpr->isInstantiationDependent())
54 return false;
55 [[maybe_unused]] ExprResult R =
56 SemaRef.VerifyIntegerConstantExpression(E: ArgExpr, Result: &Size);
57 assert(!R.isInvalid());
58 switch (Size.getSExtValue()) {
59 case 1:
60 case 2:
61 case 4:
62 return false;
63 case 12:
64 case 16: {
65 if (HasGFX950Insts)
66 return false;
67 [[fallthrough]];
68 }
69 default:
70 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
71 DiagID: diag::err_amdgcn_load_lds_size_invalid_value)
72 << ArgExpr->getSourceRange();
73 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
74 DiagID: diag::note_amdgcn_load_lds_size_valid_value)
75 << HasGFX950Insts << ArgExpr->getSourceRange();
76 return true;
77 }
78 }
79 case AMDGPU::BI__builtin_amdgcn_get_fpenv:
80 case AMDGPU::BI__builtin_amdgcn_set_fpenv:
81 return false;
82 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
83 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
84 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
85 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
86 OrderIndex = 2;
87 ScopeIndex = 3;
88 break;
89 case AMDGPU::BI__builtin_amdgcn_fence:
90 OrderIndex = 0;
91 ScopeIndex = 1;
92 break;
93 case AMDGPU::BI__builtin_amdgcn_s_setreg:
94 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
95 /*High=*/UINT16_MAX);
96 case AMDGPU::BI__builtin_amdgcn_s_wait_event: {
97 llvm::APSInt Result;
98 if (SemaRef.BuiltinConstantArg(TheCall, ArgNum: 0, Result))
99 return true;
100
101 bool IsGFX12Plus = Builtin::evaluateRequiredTargetFeatures(
102 RequiredFatures: "gfx12-insts", TargetFetureMap: CallerFeatureMap);
103
104 // gfx11 -> gfx12 changed the interpretation of the bitmask. gfx12 inverted
105 // the intepretation for export_ready, but shifted the used bit by 1. Thus
106 // waiting for the export_ready event can use a value of 2 universally.
107 if (((IsGFX12Plus && !Result[1]) || (!IsGFX12Plus && Result[0])) ||
108 Result.getZExtValue() > 2) {
109 Expr *ArgExpr = TheCall->getArg(Arg: 0);
110 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
111 DiagID: diag::warn_amdgpu_s_wait_event_mask_no_effect_target)
112 << ArgExpr->getSourceRange();
113 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
114 DiagID: diag::note_amdgpu_s_wait_event_suggested_value)
115 << ArgExpr->getSourceRange();
116 }
117
118 return false;
119 }
120 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
121 return checkMovDPPFunctionCall(TheCall, NumArgs: 5, NumDataArgs: 1);
122 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
123 return checkMovDPPFunctionCall(TheCall, NumArgs: 2, NumDataArgs: 1);
124 case AMDGPU::BI__builtin_amdgcn_update_dpp:
125 return checkMovDPPFunctionCall(TheCall, NumArgs: 6, NumDataArgs: 2);
126 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
127 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
128 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
129 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
130 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
131 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
132 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
133 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
134 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
135 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
136 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
137 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
138 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
139 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
140 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
141 return SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: 2, Low: 0, High: 15);
142 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
143 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
144 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
145 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
146 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
147 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
148 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
149 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
150 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
151 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
152 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
153 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
154 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
155 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
156 return checkAtomicMonitorLoad(TheCall);
157 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
158 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
159 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
160 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
161 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
162 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
163 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
164 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
165 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
166 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
167 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
168 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
169 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
170 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
171 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
172 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
173 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
174 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
175 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
176 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
177 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
178 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
179 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
180 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
181 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
182 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
183 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
184 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
185 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
186 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
187 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
188 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
189 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
190 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
191 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
192 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
193 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
194 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
195 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
196 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
197 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
198 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
199 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
200 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
201 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
202 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
203 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
204 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
205 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
206 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
207 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
208 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
209 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
210 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
211 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
212 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
213 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
214 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
215 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
216 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
217 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
218 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
219 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
220 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
221 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
222 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
223 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
224 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
225 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
226 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
227 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
228 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
229 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
230 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
231 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
232 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
233 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
234 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
235 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
236 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
237 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
238 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
239 case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
240 StringRef FeatureList(
241 getASTContext().BuiltinInfo.getRequiredFeatures(ID: BuiltinID));
242 if (!Builtin::evaluateRequiredTargetFeatures(RequiredFatures: FeatureList,
243 TargetFetureMap: CallerFeatureMap)) {
244 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_builtin_needs_feature)
245 << FD->getDeclName() << FeatureList;
246 return false;
247 }
248
249 unsigned ArgCount = TheCall->getNumArgs() - 1;
250 llvm::APSInt Result;
251
252 // Compilain about dmask values which are too huge to fully fit into 4 bits
253 // (which is the actual size of the dmask in corresponding HW instructions).
254 constexpr unsigned DMaskArgNo = 0;
255 constexpr int Low = 0;
256 constexpr int High = 15;
257 if (SemaRef.BuiltinConstantArg(TheCall, ArgNum: DMaskArgNo, Result) ||
258 SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: DMaskArgNo, Low, High,
259 /* RangeIsError = */ true))
260 return true;
261
262 // Dmask indicates which elements should be returned and it is not possible
263 // to return more values than there are elements in return type.
264 int NumElementsInRetTy = 1;
265 const Type *RetTy = TheCall->getType().getTypePtr();
266 if (auto *VTy = dyn_cast<VectorType>(Val: RetTy))
267 NumElementsInRetTy = VTy->getNumElements();
268 int NumActiveBitsInDMask =
269 llvm::popcount(Value: static_cast<uint8_t>(Result.getExtValue()));
270 if (NumActiveBitsInDMask > NumElementsInRetTy) {
271 Diag(Loc: TheCall->getBeginLoc(),
272 DiagID: diag::err_amdgcn_dmask_has_too_many_bits_set);
273 return true;
274 }
275
276 // For gather, only one bit can be set indicating which exact component to
277 // return.
278 bool ExtraGatherChecks =
279 BuiltinID == AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 &&
280 SemaRef.BuiltinConstantArgPower2(TheCall, ArgNum: 0);
281
282 return ExtraGatherChecks ||
283 (SemaRef.BuiltinConstantArg(TheCall, ArgNum: ArgCount, Result)) ||
284 (SemaRef.BuiltinConstantArg(TheCall, ArgNum: (ArgCount - 1), Result));
285 }
286 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
287 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
288 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
289 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
290 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
291 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
292 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
293 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
294 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
295 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
296 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
297 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
298 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
299 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
300 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
301 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
302 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
303 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
304 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
305 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
306 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
307 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
308 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
309 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
310 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
311 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
312 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
313 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
314 StringRef FeatureList(
315 getASTContext().BuiltinInfo.getRequiredFeatures(ID: BuiltinID));
316 if (!Builtin::evaluateRequiredTargetFeatures(RequiredFatures: FeatureList,
317 TargetFetureMap: CallerFeatureMap)) {
318 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_builtin_needs_feature)
319 << FD->getDeclName() << FeatureList;
320 return false;
321 }
322
323 unsigned ArgCount = TheCall->getNumArgs() - 1;
324 llvm::APSInt Result;
325
326 // Complain about dmask values which are too huge to fully fit into 4 bits
327 // (which is the actual size of the dmask in corresponding HW instructions).
328 constexpr unsigned DMaskArgNo = 1;
329 return SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: DMaskArgNo, /*Low=*/0,
330 /*High=*/15,
331 /*RangeIsError=*/true) ||
332 SemaRef.BuiltinConstantArg(TheCall, ArgNum: ArgCount, Result) ||
333 SemaRef.BuiltinConstantArg(TheCall, ArgNum: (ArgCount - 1), Result);
334 }
335 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
336 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
337 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
338 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: 7, MaxArgCount: 8))
339 return true;
340 if (TheCall->getNumArgs() == 7)
341 return false;
342 } else if (BuiltinID ==
343 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
344 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: 8, MaxArgCount: 9))
345 return true;
346 if (TheCall->getNumArgs() == 8)
347 return false;
348 }
349 // Check if the last argument (clamp operand) is a constant and is
350 // convertible to bool.
351 Expr *ClampArg = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
352 // 1) Ensure clamp argument is a constant expression
353 llvm::APSInt ClampValue;
354 if (!SemaRef.VerifyIntegerConstantExpression(E: ClampArg, Result: &ClampValue)
355 .isUsable())
356 return true;
357 // 2) Check if the argument can be converted to bool type
358 if (!SemaRef.Context.hasSameType(T1: ClampArg->getType(),
359 T2: SemaRef.Context.BoolTy)) {
360 // Try to convert to bool
361 QualType BoolTy = SemaRef.Context.BoolTy;
362 ExprResult ClampExpr(ClampArg);
363 SemaRef.CheckSingleAssignmentConstraints(LHSType: BoolTy, RHS&: ClampExpr);
364 if (ClampExpr.isInvalid())
365 return true;
366 }
367 return false;
368 }
369 default:
370 return false;
371 }
372
373 ExprResult Arg = TheCall->getArg(Arg: OrderIndex);
374 auto ArgExpr = Arg.get();
375 Expr::EvalResult ArgResult;
376
377 if (!ArgExpr->EvaluateAsInt(Result&: ArgResult, Ctx: getASTContext()))
378 return Diag(Loc: ArgExpr->getExprLoc(), DiagID: diag::err_typecheck_expect_int)
379 << ArgExpr->getType();
380 auto Ord = ArgResult.Val.getInt().getZExtValue();
381
382 // Check validity of memory ordering as per C11 / C++11's memory model.
383 // Only fence needs check. Atomic dec/inc allow all memory orders.
384 if (!llvm::isValidAtomicOrderingCABI(I: Ord))
385 return Diag(Loc: ArgExpr->getBeginLoc(),
386 DiagID: diag::warn_atomic_op_has_invalid_memory_order)
387 << 0 << ArgExpr->getSourceRange();
388 switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
389 case llvm::AtomicOrderingCABI::relaxed:
390 case llvm::AtomicOrderingCABI::consume:
391 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
392 return Diag(Loc: ArgExpr->getBeginLoc(),
393 DiagID: diag::warn_atomic_op_has_invalid_memory_order)
394 << 0 << ArgExpr->getSourceRange();
395 break;
396 case llvm::AtomicOrderingCABI::acquire:
397 case llvm::AtomicOrderingCABI::release:
398 case llvm::AtomicOrderingCABI::acq_rel:
399 case llvm::AtomicOrderingCABI::seq_cst:
400 break;
401 }
402
403 Arg = TheCall->getArg(Arg: ScopeIndex);
404 ArgExpr = Arg.get();
405 Expr::EvalResult ArgResult1;
406 // Check that sync scope is a constant literal
407 if (!ArgExpr->EvaluateAsConstantExpr(Result&: ArgResult1, Ctx: getASTContext()))
408 return Diag(Loc: ArgExpr->getExprLoc(), DiagID: diag::err_expr_not_string_literal)
409 << ArgExpr->getType();
410
411 return false;
412}
413
414bool SemaAMDGPU::checkAtomicOrderingCABIArg(Expr *E, bool MayLoad,
415 bool MayStore) {
416 Expr::EvalResult AtomicOrdArgRes;
417 if (!E->EvaluateAsInt(Result&: AtomicOrdArgRes, Ctx: getASTContext()))
418 llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
419 auto Ord =
420 llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
421
422 // Atomic ordering cannot be acq_rel in any case, acquire for stores or
423 // release for loads.
424 if (!llvm::isValidAtomicOrderingCABI(I: (unsigned)Ord) ||
425 (!(MayLoad && MayStore) && (Ord == llvm::AtomicOrderingCABI::acq_rel)) ||
426 (!MayLoad && Ord == llvm::AtomicOrderingCABI::acquire) ||
427 (!MayStore && Ord == llvm::AtomicOrderingCABI::release)) {
428 return Diag(Loc: E->getBeginLoc(), DiagID: diag::warn_atomic_op_has_invalid_memory_order)
429 << 0 << E->getSourceRange();
430 }
431
432 return false;
433}
434
435bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
436 bool Fail = false;
437
438 // First argument is a global or generic pointer.
439 Expr *PtrArg = TheCall->getArg(Arg: 0);
440 QualType PtrTy = PtrArg->getType()->getPointeeType();
441 unsigned AS = getASTContext().getTargetAddressSpace(AS: PtrTy.getAddressSpace());
442 if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
443 AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
444 Fail = true;
445 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_amdgcn_coop_atomic_invalid_as)
446 << PtrArg->getSourceRange();
447 }
448
449 Expr *AO = TheCall->getArg(Arg: IsStore ? 2 : 1);
450 Expr *Scope = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
451
452 if (AO->isValueDependent() || Scope->isValueDependent())
453 return false;
454
455 // Check atomic ordering
456 Fail |=
457 checkAtomicOrderingCABIArg(E: TheCall->getArg(Arg: IsStore ? 2 : 1),
458 /*MayLoad=*/!IsStore, /*MayStore=*/IsStore);
459
460 // Last argument is the syncscope as a string literal.
461 if (!isa<StringLiteral>(Val: Scope->IgnoreParenImpCasts())) {
462 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_expr_not_string_literal)
463 << Scope->getSourceRange();
464 Fail = true;
465 }
466
467 return Fail;
468}
469
470bool SemaAMDGPU::checkAtomicMonitorLoad(CallExpr *TheCall) {
471 bool Fail = false;
472
473 Expr *AO = TheCall->getArg(Arg: 1);
474 Expr *Scope = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
475
476 if (AO->isValueDependent() || Scope->isValueDependent())
477 return false;
478
479 Fail |= checkAtomicOrderingCABIArg(E: TheCall->getArg(Arg: 1), /*MayLoad=*/true,
480 /*MayStore=*/false);
481
482 auto ScopeModel = AtomicScopeModel::create(K: AtomicScopeModelKind::Generic);
483 if (std::optional<llvm::APSInt> Result =
484 Scope->getIntegerConstantExpr(Ctx: SemaRef.Context)) {
485 if (!ScopeModel->isValid(S: Result->getZExtValue())) {
486 Diag(Loc: Scope->getBeginLoc(), DiagID: diag::err_atomic_op_has_invalid_sync_scope)
487 << Scope->getSourceRange();
488 Fail = true;
489 }
490 }
491
492 return Fail;
493}
494
495bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
496 unsigned NumDataArgs) {
497 assert(NumDataArgs <= 2);
498 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: NumArgs, MaxArgCount: NumArgs))
499 return true;
500 Expr *Args[2];
501 QualType ArgTys[2];
502 for (unsigned I = 0; I != NumDataArgs; ++I) {
503 Args[I] = TheCall->getArg(Arg: I);
504 ArgTys[I] = Args[I]->getType();
505 // TODO: Vectors can also be supported.
506 if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
507 SemaRef.Diag(Loc: Args[I]->getBeginLoc(),
508 DiagID: diag::err_typecheck_cond_expect_int_float)
509 << ArgTys[I] << Args[I]->getSourceRange();
510 return true;
511 }
512 }
513 if (NumDataArgs < 2)
514 return false;
515
516 if (getASTContext().hasSameUnqualifiedType(T1: ArgTys[0], T2: ArgTys[1]))
517 return false;
518
519 if (((ArgTys[0]->isUnsignedIntegerType() &&
520 ArgTys[1]->isSignedIntegerType()) ||
521 (ArgTys[0]->isSignedIntegerType() &&
522 ArgTys[1]->isUnsignedIntegerType())) &&
523 getASTContext().getTypeSize(T: ArgTys[0]) ==
524 getASTContext().getTypeSize(T: ArgTys[1]))
525 return false;
526
527 SemaRef.Diag(Loc: Args[1]->getBeginLoc(),
528 DiagID: diag::err_typecheck_call_different_arg_types)
529 << ArgTys[0] << ArgTys[1];
530 return true;
531}
532
533static bool
534checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
535 const AMDGPUFlatWorkGroupSizeAttr &Attr) {
536 // Accept template arguments for now as they depend on something else.
537 // We'll get to check them when they eventually get instantiated.
538 if (MinExpr->isValueDependent() || MaxExpr->isValueDependent())
539 return false;
540
541 uint32_t Min = 0;
542 if (!S.checkUInt32Argument(AI: Attr, Expr: MinExpr, Val&: Min, Idx: 0))
543 return true;
544
545 uint32_t Max = 0;
546 if (!S.checkUInt32Argument(AI: Attr, Expr: MaxExpr, Val&: Max, Idx: 1))
547 return true;
548
549 if (Min == 0 && Max != 0) {
550 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
551 << &Attr << 0;
552 return true;
553 }
554 if (Min > Max) {
555 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
556 << &Attr << 1;
557 return true;
558 }
559
560 return false;
561}
562
563AMDGPUFlatWorkGroupSizeAttr *
564SemaAMDGPU::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI,
565 Expr *MinExpr, Expr *MaxExpr) {
566 ASTContext &Context = getASTContext();
567 AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
568
569 if (checkAMDGPUFlatWorkGroupSizeArguments(S&: SemaRef, MinExpr, MaxExpr, Attr: TmpAttr))
570 return nullptr;
571 return ::new (Context)
572 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
573}
574
575void SemaAMDGPU::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
576 const AttributeCommonInfo &CI,
577 Expr *MinExpr, Expr *MaxExpr) {
578 if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
579 D->addAttr(A: Attr);
580}
581
582void SemaAMDGPU::handleAMDGPUFlatWorkGroupSizeAttr(Decl *D,
583 const ParsedAttr &AL) {
584 Expr *MinExpr = AL.getArgAsExpr(Arg: 0);
585 Expr *MaxExpr = AL.getArgAsExpr(Arg: 1);
586
587 addAMDGPUFlatWorkGroupSizeAttr(D, CI: AL, MinExpr, MaxExpr);
588}
589
590static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
591 Expr *MaxExpr,
592 const AMDGPUWavesPerEUAttr &Attr) {
593 if (S.DiagnoseUnexpandedParameterPack(E: MinExpr) ||
594 (MaxExpr && S.DiagnoseUnexpandedParameterPack(E: MaxExpr)))
595 return true;
596
597 // Accept template arguments for now as they depend on something else.
598 // We'll get to check them when they eventually get instantiated.
599 if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent()))
600 return false;
601
602 uint32_t Min = 0;
603 if (!S.checkUInt32Argument(AI: Attr, Expr: MinExpr, Val&: Min, Idx: 0))
604 return true;
605
606 uint32_t Max = 0;
607 if (MaxExpr && !S.checkUInt32Argument(AI: Attr, Expr: MaxExpr, Val&: Max, Idx: 1))
608 return true;
609
610 if (Min == 0 && Max != 0) {
611 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
612 << &Attr << 0;
613 return true;
614 }
615 if (Max != 0 && Min > Max) {
616 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
617 << &Attr << 1;
618 return true;
619 }
620
621 return false;
622}
623
624AMDGPUWavesPerEUAttr *
625SemaAMDGPU::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI,
626 Expr *MinExpr, Expr *MaxExpr) {
627 ASTContext &Context = getASTContext();
628 AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
629
630 if (checkAMDGPUWavesPerEUArguments(S&: SemaRef, MinExpr, MaxExpr, Attr: TmpAttr))
631 return nullptr;
632
633 return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
634}
635
636void SemaAMDGPU::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
637 Expr *MinExpr, Expr *MaxExpr) {
638 if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
639 D->addAttr(A: Attr);
640}
641
642void SemaAMDGPU::handleAMDGPUWavesPerEUAttr(Decl *D, const ParsedAttr &AL) {
643 if (!AL.checkAtLeastNumArgs(S&: SemaRef, Num: 1) || !AL.checkAtMostNumArgs(S&: SemaRef, Num: 2))
644 return;
645
646 Expr *MinExpr = AL.getArgAsExpr(Arg: 0);
647 Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(Arg: 1) : nullptr;
648
649 addAMDGPUWavesPerEUAttr(D, CI: AL, MinExpr, MaxExpr);
650}
651
652void SemaAMDGPU::handleAMDGPUNumSGPRAttr(Decl *D, const ParsedAttr &AL) {
653 uint32_t NumSGPR = 0;
654 Expr *NumSGPRExpr = AL.getArgAsExpr(Arg: 0);
655 if (!SemaRef.checkUInt32Argument(AI: AL, Expr: NumSGPRExpr, Val&: NumSGPR))
656 return;
657
658 D->addAttr(A: ::new (getASTContext())
659 AMDGPUNumSGPRAttr(getASTContext(), AL, NumSGPR));
660}
661
662void SemaAMDGPU::handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL) {
663 uint32_t NumVGPR = 0;
664 Expr *NumVGPRExpr = AL.getArgAsExpr(Arg: 0);
665 if (!SemaRef.checkUInt32Argument(AI: AL, Expr: NumVGPRExpr, Val&: NumVGPR))
666 return;
667
668 D->addAttr(A: ::new (getASTContext())
669 AMDGPUNumVGPRAttr(getASTContext(), AL, NumVGPR));
670}
671
672static bool
673checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr,
674 Expr *ZExpr,
675 const AMDGPUMaxNumWorkGroupsAttr &Attr) {
676 if (S.DiagnoseUnexpandedParameterPack(E: XExpr) ||
677 (YExpr && S.DiagnoseUnexpandedParameterPack(E: YExpr)) ||
678 (ZExpr && S.DiagnoseUnexpandedParameterPack(E: ZExpr)))
679 return true;
680
681 // Accept template arguments for now as they depend on something else.
682 // We'll get to check them when they eventually get instantiated.
683 if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
684 (ZExpr && ZExpr->isValueDependent()))
685 return false;
686
687 uint32_t NumWG = 0;
688 Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
689 for (int i = 0; i < 3; i++) {
690 if (Exprs[i]) {
691 if (!S.checkUInt32Argument(AI: Attr, Expr: Exprs[i], Val&: NumWG, Idx: i,
692 /*StrictlyUnsigned=*/true))
693 return true;
694 if (NumWG == 0) {
695 S.Diag(Loc: Attr.getLoc(), DiagID: diag::err_attribute_argument_is_zero)
696 << &Attr << Exprs[i]->getSourceRange();
697 return true;
698 }
699 }
700 }
701
702 return false;
703}
704
705AMDGPUMaxNumWorkGroupsAttr *SemaAMDGPU::CreateAMDGPUMaxNumWorkGroupsAttr(
706 const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
707 ASTContext &Context = getASTContext();
708 AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
709 assert(!SemaRef.isSFINAEContext() &&
710 "Can't produce SFINAE diagnostic pointing to temporary attribute");
711
712 if (checkAMDGPUMaxNumWorkGroupsArguments(S&: SemaRef, XExpr, YExpr, ZExpr,
713 Attr: TmpAttr))
714 return nullptr;
715
716 return ::new (Context)
717 AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
718}
719
720void SemaAMDGPU::addAMDGPUMaxNumWorkGroupsAttr(Decl *D,
721 const AttributeCommonInfo &CI,
722 Expr *XExpr, Expr *YExpr,
723 Expr *ZExpr) {
724 if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
725 D->addAttr(A: Attr);
726}
727
728void SemaAMDGPU::handleAMDGPUMaxNumWorkGroupsAttr(Decl *D,
729 const ParsedAttr &AL) {
730 Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(Arg: 1) : nullptr;
731 Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(Arg: 2) : nullptr;
732 addAMDGPUMaxNumWorkGroupsAttr(D, CI: AL, XExpr: AL.getArgAsExpr(Arg: 0), YExpr, ZExpr);
733}
734
735} // namespace clang
736