1//===------ SemaAMDGPU.cpp ------- AMDGPU target-specific routines --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements semantic analysis functions specific to AMDGPU.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Sema/SemaAMDGPU.h"
14#include "clang/AST/Decl.h"
15#include "clang/AST/DynamicRecursiveASTVisitor.h"
16#include "clang/AST/Expr.h"
17#include "clang/Basic/DiagnosticFrontend.h"
18#include "clang/Basic/DiagnosticSema.h"
19#include "clang/Basic/TargetBuiltins.h"
20#include "clang/Basic/TargetInfo.h"
21#include "clang/Sema/Ownership.h"
22#include "clang/Sema/Scope.h"
23#include "clang/Sema/Sema.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringMap.h"
27#include "llvm/Support/AMDGPUAddrSpace.h"
28#include "llvm/Support/AtomicOrdering.h"
29#include "llvm/TargetParser/AMDGPUTargetParser.h"
30#include <cstdint>
31#include <utility>
32
33namespace clang {
34
35SemaAMDGPU::SemaAMDGPU(Sema &S) : SemaBase(S) {}
36
37bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
38 CallExpr *TheCall) {
39 // position of memory order and scope arguments in the builtin
40 unsigned OrderIndex, ScopeIndex;
41
42 const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
43 assert(FD && "AMDGPU builtins should not be used outside of a function");
44 llvm::StringMap<bool> CallerFeatureMap;
45 getASTContext().getFunctionFeatureMap(FeatureMap&: CallerFeatureMap, FD);
46 bool HasGFX950Insts =
47 Builtin::evaluateRequiredTargetFeatures(RequiredFatures: "gfx950-insts", TargetFetureMap: CallerFeatureMap);
48
49 switch (BuiltinID) {
50 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
51 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_async_lds:
52 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
53 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_async_lds:
54 case AMDGPU::BI__builtin_amdgcn_load_to_lds:
55 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds:
56 case AMDGPU::BI__builtin_amdgcn_global_load_lds:
57 case AMDGPU::BI__builtin_amdgcn_global_load_async_lds: {
58 constexpr const int SizeIdx = 2;
59 llvm::APSInt Size;
60 Expr *ArgExpr = TheCall->getArg(Arg: SizeIdx);
61 // Check for instantiation-dependent expressions (e.g., involving template
62 // parameters). These will be checked again during template instantiation.
63 if (ArgExpr->isInstantiationDependent())
64 return false;
65 [[maybe_unused]] ExprResult R =
66 SemaRef.VerifyIntegerConstantExpression(E: ArgExpr, Result: &Size);
67 assert(!R.isInvalid());
68 switch (Size.getSExtValue()) {
69 case 1:
70 case 2:
71 case 4:
72 return false;
73 case 12:
74 case 16: {
75 if (HasGFX950Insts)
76 return false;
77 [[fallthrough]];
78 }
79 default:
80 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
81 DiagID: diag::err_amdgcn_load_lds_size_invalid_value)
82 << ArgExpr->getSourceRange();
83 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
84 DiagID: diag::note_amdgcn_load_lds_size_valid_value)
85 << HasGFX950Insts << ArgExpr->getSourceRange();
86 return true;
87 }
88 }
89 case AMDGPU::BI__builtin_amdgcn_get_fpenv:
90 case AMDGPU::BI__builtin_amdgcn_set_fpenv:
91 return false;
92 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
93 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
94 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
95 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
96 OrderIndex = 2;
97 ScopeIndex = 3;
98 break;
99 case AMDGPU::BI__builtin_amdgcn_fence:
100 OrderIndex = 0;
101 ScopeIndex = 1;
102 break;
103 case AMDGPU::BI__builtin_amdgcn_s_setreg:
104 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
105 /*High=*/UINT16_MAX);
106 case AMDGPU::BI__builtin_amdgcn_s_wait_event: {
107 llvm::APSInt Result;
108 if (SemaRef.BuiltinConstantArg(TheCall, ArgNum: 0, Result))
109 return true;
110
111 bool IsGFX12Plus = Builtin::evaluateRequiredTargetFeatures(
112 RequiredFatures: "gfx12-insts", TargetFetureMap: CallerFeatureMap);
113
114 // gfx11 -> gfx12 changed the interpretation of the bitmask. gfx12 inverted
115 // the intepretation for export_ready, but shifted the used bit by 1. Thus
116 // waiting for the export_ready event can use a value of 2 universally.
117 if (((IsGFX12Plus && !Result[1]) || (!IsGFX12Plus && Result[0])) ||
118 Result.getZExtValue() > 2) {
119 Expr *ArgExpr = TheCall->getArg(Arg: 0);
120 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
121 DiagID: diag::warn_amdgpu_s_wait_event_mask_no_effect_target)
122 << ArgExpr->getSourceRange();
123 SemaRef.targetDiag(Loc: ArgExpr->getExprLoc(),
124 DiagID: diag::note_amdgpu_s_wait_event_suggested_value)
125 << ArgExpr->getSourceRange();
126 }
127
128 return false;
129 }
130 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
131 return checkMovDPPFunctionCall(TheCall, NumArgs: 5, NumDataArgs: 1);
132 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
133 return checkMovDPPFunctionCall(TheCall, NumArgs: 2, NumDataArgs: 1);
134 case AMDGPU::BI__builtin_amdgcn_update_dpp:
135 return checkMovDPPFunctionCall(TheCall, NumArgs: 6, NumDataArgs: 2);
136 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
137 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
138 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
139 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
140 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
141 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
142 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
143 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
144 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
145 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
146 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
147 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
148 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
149 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
150 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
151 return SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: 2, Low: 0, High: 15);
152 case AMDGPU::BI__builtin_amdgcn_av_load_b128:
153 return checkAVLoadStore(TheCall, /*IsStore=*/false);
154 case AMDGPU::BI__builtin_amdgcn_av_store_b128:
155 return checkAVLoadStore(TheCall, /*IsStore=*/true);
156 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
157 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
158 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
159 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
160 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
161 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
162 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
163 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
164 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
165 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
166 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
167 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
168 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
169 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
170 return checkAtomicMonitorLoad(TheCall);
171 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
172 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
173 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
174 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
175 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
176 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
177 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
178 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
179 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
180 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
181 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
182 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
183 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
184 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
185 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
186 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
187 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
188 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
189 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
190 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
191 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
192 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
193 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
194 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
195 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
196 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
197 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
198 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
199 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
200 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
201 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
202 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
203 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
204 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
205 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
206 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
207 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
208 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
209 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
210 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
211 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
212 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
213 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
214 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
215 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
216 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
217 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
218 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
219 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
220 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
221 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
222 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
223 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
224 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
225 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
226 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
227 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
228 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
229 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
230 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
231 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
232 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
233 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
234 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
235 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
236 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
237 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
238 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
239 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
240 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
241 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
242 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
243 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
244 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
245 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
246 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
247 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
248 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
249 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
250 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
251 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
252 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
253 case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
254 StringRef FeatureList(
255 getASTContext().BuiltinInfo.getRequiredFeatures(ID: BuiltinID));
256 if (!Builtin::evaluateRequiredTargetFeatures(RequiredFatures: FeatureList,
257 TargetFetureMap: CallerFeatureMap)) {
258 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_builtin_needs_feature)
259 << FD->getDeclName() << FeatureList;
260 return false;
261 }
262
263 unsigned ArgCount = TheCall->getNumArgs() - 1;
264 llvm::APSInt Result;
265
266 // Compilain about dmask values which are too huge to fully fit into 4 bits
267 // (which is the actual size of the dmask in corresponding HW instructions).
268 constexpr unsigned DMaskArgNo = 0;
269 constexpr int Low = 0;
270 constexpr int High = 15;
271 if (SemaRef.BuiltinConstantArg(TheCall, ArgNum: DMaskArgNo, Result) ||
272 SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: DMaskArgNo, Low, High,
273 /* RangeIsError = */ true))
274 return true;
275
276 // Dmask indicates which elements should be returned and it is not possible
277 // to return more values than there are elements in return type.
278 int NumElementsInRetTy = 1;
279 const Type *RetTy = TheCall->getType().getTypePtr();
280 if (auto *VTy = dyn_cast<VectorType>(Val: RetTy))
281 NumElementsInRetTy = VTy->getNumElements();
282 int NumActiveBitsInDMask =
283 llvm::popcount(Value: static_cast<uint8_t>(Result.getExtValue()));
284 if (NumActiveBitsInDMask > NumElementsInRetTy) {
285 Diag(Loc: TheCall->getBeginLoc(),
286 DiagID: diag::err_amdgcn_dmask_has_too_many_bits_set);
287 return true;
288 }
289
290 // For gather, only one bit can be set indicating which exact component to
291 // return.
292 bool ExtraGatherChecks =
293 BuiltinID == AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 &&
294 SemaRef.BuiltinConstantArgPower2(TheCall, ArgNum: 0);
295
296 return ExtraGatherChecks ||
297 (SemaRef.BuiltinConstantArg(TheCall, ArgNum: ArgCount, Result)) ||
298 (SemaRef.BuiltinConstantArg(TheCall, ArgNum: (ArgCount - 1), Result));
299 }
300 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
301 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
302 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
303 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
304 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
305 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
306 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
307 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
308 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
309 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
310 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
311 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
312 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
313 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
314 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
315 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
316 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
317 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
318 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
319 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
320 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
321 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
322 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
323 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
324 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
325 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
326 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
327 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
328 StringRef FeatureList(
329 getASTContext().BuiltinInfo.getRequiredFeatures(ID: BuiltinID));
330 if (!Builtin::evaluateRequiredTargetFeatures(RequiredFatures: FeatureList,
331 TargetFetureMap: CallerFeatureMap)) {
332 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_builtin_needs_feature)
333 << FD->getDeclName() << FeatureList;
334 return false;
335 }
336
337 unsigned ArgCount = TheCall->getNumArgs() - 1;
338 llvm::APSInt Result;
339
340 // Complain about dmask values which are too huge to fully fit into 4 bits
341 // (which is the actual size of the dmask in corresponding HW instructions).
342 constexpr unsigned DMaskArgNo = 1;
343 return SemaRef.BuiltinConstantArgRange(TheCall, ArgNum: DMaskArgNo, /*Low=*/0,
344 /*High=*/15,
345 /*RangeIsError=*/true) ||
346 SemaRef.BuiltinConstantArg(TheCall, ArgNum: ArgCount, Result) ||
347 SemaRef.BuiltinConstantArg(TheCall, ArgNum: (ArgCount - 1), Result);
348 }
349 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
350 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
351 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
352 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: 7, MaxArgCount: 8))
353 return true;
354 if (TheCall->getNumArgs() == 7)
355 return false;
356 } else if (BuiltinID ==
357 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
358 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: 8, MaxArgCount: 9))
359 return true;
360 if (TheCall->getNumArgs() == 8)
361 return false;
362 }
363 // Check if the last argument (clamp operand) is a constant and is
364 // convertible to bool.
365 Expr *ClampArg = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
366 // 1) Ensure clamp argument is a constant expression
367 llvm::APSInt ClampValue;
368 if (!SemaRef.VerifyIntegerConstantExpression(E: ClampArg, Result: &ClampValue)
369 .isUsable())
370 return true;
371 // 2) Check if the argument can be converted to bool type
372 if (!SemaRef.Context.hasSameType(T1: ClampArg->getType(),
373 T2: SemaRef.Context.BoolTy)) {
374 // Try to convert to bool
375 QualType BoolTy = SemaRef.Context.BoolTy;
376 ExprResult ClampExpr(ClampArg);
377 SemaRef.CheckSingleAssignmentConstraints(LHSType: BoolTy, RHS&: ClampExpr);
378 if (ClampExpr.isInvalid())
379 return true;
380 }
381 return false;
382 }
383 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
384 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
385 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
386 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
387 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
388 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
389 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
390 /*High=*/0) ||
391 SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/2, /*Low=*/0,
392 /*High=*/0);
393 default:
394 return false;
395 }
396
397 ExprResult Arg = TheCall->getArg(Arg: OrderIndex);
398 auto ArgExpr = Arg.get();
399 Expr::EvalResult ArgResult;
400
401 if (!ArgExpr->EvaluateAsInt(Result&: ArgResult, Ctx: getASTContext()))
402 return Diag(Loc: ArgExpr->getExprLoc(), DiagID: diag::err_typecheck_expect_int)
403 << ArgExpr->getType();
404 auto Ord = ArgResult.Val.getInt().getZExtValue();
405
406 // Check validity of memory ordering as per C11 / C++11's memory model.
407 // Only fence needs check. Atomic dec/inc allow all memory orders.
408 if (!llvm::isValidAtomicOrderingCABI(I: Ord))
409 return Diag(Loc: ArgExpr->getBeginLoc(),
410 DiagID: diag::warn_atomic_op_has_invalid_memory_order)
411 << 0 << ArgExpr->getSourceRange();
412 switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
413 case llvm::AtomicOrderingCABI::relaxed:
414 case llvm::AtomicOrderingCABI::consume:
415 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
416 return Diag(Loc: ArgExpr->getBeginLoc(),
417 DiagID: diag::warn_atomic_op_has_invalid_memory_order)
418 << 0 << ArgExpr->getSourceRange();
419 break;
420 case llvm::AtomicOrderingCABI::acquire:
421 case llvm::AtomicOrderingCABI::release:
422 case llvm::AtomicOrderingCABI::acq_rel:
423 case llvm::AtomicOrderingCABI::seq_cst:
424 break;
425 }
426
427 Arg = TheCall->getArg(Arg: ScopeIndex);
428 ArgExpr = Arg.get();
429 Expr::EvalResult ArgResult1;
430 // Check that sync scope is a constant literal
431 if (!ArgExpr->EvaluateAsConstantExpr(Result&: ArgResult1, Ctx: getASTContext()))
432 return Diag(Loc: ArgExpr->getExprLoc(), DiagID: diag::err_expr_not_string_literal)
433 << ArgExpr->getType();
434
435 return false;
436}
437
438bool SemaAMDGPU::checkAtomicOrderingCABIArg(Expr *E, bool MayLoad,
439 bool MayStore) {
440 Expr::EvalResult AtomicOrdArgRes;
441 if (!E->EvaluateAsInt(Result&: AtomicOrdArgRes, Ctx: getASTContext()))
442 llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
443 auto Ord =
444 llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
445
446 // Atomic ordering cannot be acq_rel in any case, acquire for stores or
447 // release for loads.
448 if (!llvm::isValidAtomicOrderingCABI(I: (unsigned)Ord) ||
449 (!(MayLoad && MayStore) && (Ord == llvm::AtomicOrderingCABI::acq_rel)) ||
450 (!MayLoad && Ord == llvm::AtomicOrderingCABI::acquire) ||
451 (!MayStore && Ord == llvm::AtomicOrderingCABI::release)) {
452 return Diag(Loc: E->getBeginLoc(), DiagID: diag::warn_atomic_op_has_invalid_memory_order)
453 << 0 << E->getSourceRange();
454 }
455
456 return false;
457}
458
459// Check that the first argument to TheCall is a global or generic pointer.
460static bool checkGlobalOrFlatPointerArg(SemaAMDGPU &S, CallExpr *TheCall) {
461 Expr *PtrArg = TheCall->getArg(Arg: 0);
462 QualType PtrTy = PtrArg->getType()->getPointeeType();
463 unsigned AS =
464 S.getASTContext().getTargetAddressSpace(AS: PtrTy.getAddressSpace());
465 if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
466 AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
467 return S.Diag(Loc: TheCall->getBeginLoc(),
468 DiagID: diag::err_amdgcn_global_or_flat_pointer_required)
469 << PtrArg->getSourceRange();
470 }
471 return false;
472}
473
474static bool checkScopeAsInt(SemaAMDGPU &S, Expr *Scope) {
475 if (Scope->isValueDependent())
476 return false;
477 auto ScopeModel = AtomicScopeModel::create(K: AtomicScopeModelKind::Generic);
478 if (std::optional<llvm::APSInt> Result =
479 Scope->getIntegerConstantExpr(Ctx: S.SemaRef.Context)) {
480 if (!ScopeModel->isValid(S: Result->getZExtValue())) {
481 return S.Diag(Loc: Scope->getBeginLoc(),
482 DiagID: diag::err_atomic_op_has_invalid_sync_scope)
483 << Scope->getSourceRange();
484 }
485 }
486 return false;
487}
488
489bool SemaAMDGPU::checkAVLoadStore(CallExpr *TheCall, bool IsStore) {
490 if (checkGlobalOrFlatPointerArg(S&: *this, TheCall))
491 return true;
492
493 Expr *Scope = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
494 return checkScopeAsInt(S&: *this, Scope);
495}
496
497bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
498 bool Fail = checkGlobalOrFlatPointerArg(S&: *this, TheCall);
499
500 Expr *AO = TheCall->getArg(Arg: IsStore ? 2 : 1);
501 Expr *Scope = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
502
503 if (AO->isValueDependent() || Scope->isValueDependent())
504 return false;
505
506 // Check atomic ordering
507 Fail |=
508 checkAtomicOrderingCABIArg(E: TheCall->getArg(Arg: IsStore ? 2 : 1),
509 /*MayLoad=*/!IsStore, /*MayStore=*/IsStore);
510
511 // Last argument is the syncscope as a string literal.
512 if (!isa<StringLiteral>(Val: Scope->IgnoreParenImpCasts())) {
513 Diag(Loc: TheCall->getBeginLoc(), DiagID: diag::err_expr_not_string_literal)
514 << Scope->getSourceRange();
515 Fail = true;
516 }
517
518 return Fail;
519}
520
521bool SemaAMDGPU::checkAtomicMonitorLoad(CallExpr *TheCall) {
522 Expr *AO = TheCall->getArg(Arg: 1);
523 Expr *Scope = TheCall->getArg(Arg: TheCall->getNumArgs() - 1);
524
525 if (AO->isValueDependent() || Scope->isValueDependent())
526 return false;
527
528 bool Fail = checkAtomicOrderingCABIArg(E: AO, /*MayLoad=*/true,
529 /*MayStore=*/false);
530 Fail |= checkScopeAsInt(S&: *this, Scope);
531 return Fail;
532}
533
534bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
535 unsigned NumDataArgs) {
536 assert(NumDataArgs <= 2);
537 if (SemaRef.checkArgCountRange(Call: TheCall, MinArgCount: NumArgs, MaxArgCount: NumArgs))
538 return true;
539 Expr *Args[2];
540 QualType ArgTys[2];
541 for (unsigned I = 0; I != NumDataArgs; ++I) {
542 Args[I] = TheCall->getArg(Arg: I);
543 ArgTys[I] = Args[I]->getType();
544 // TODO: Vectors can also be supported.
545 if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
546 SemaRef.Diag(Loc: Args[I]->getBeginLoc(),
547 DiagID: diag::err_typecheck_cond_expect_int_float)
548 << ArgTys[I] << Args[I]->getSourceRange();
549 return true;
550 }
551 }
552 if (NumDataArgs < 2)
553 return false;
554
555 if (getASTContext().hasSameUnqualifiedType(T1: ArgTys[0], T2: ArgTys[1]))
556 return false;
557
558 if (((ArgTys[0]->isUnsignedIntegerType() &&
559 ArgTys[1]->isSignedIntegerType()) ||
560 (ArgTys[0]->isSignedIntegerType() &&
561 ArgTys[1]->isUnsignedIntegerType())) &&
562 getASTContext().getTypeSize(T: ArgTys[0]) ==
563 getASTContext().getTypeSize(T: ArgTys[1]))
564 return false;
565
566 SemaRef.Diag(Loc: Args[1]->getBeginLoc(),
567 DiagID: diag::err_typecheck_call_different_arg_types)
568 << ArgTys[0] << ArgTys[1];
569 return true;
570}
571
572static bool
573checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
574 const AMDGPUFlatWorkGroupSizeAttr &Attr) {
575 // Accept template arguments for now as they depend on something else.
576 // We'll get to check them when they eventually get instantiated.
577 if (MinExpr->isValueDependent() || MaxExpr->isValueDependent())
578 return false;
579
580 uint32_t Min = 0;
581 if (!S.checkUInt32Argument(AI: Attr, Expr: MinExpr, Val&: Min, Idx: 0))
582 return true;
583
584 uint32_t Max = 0;
585 if (!S.checkUInt32Argument(AI: Attr, Expr: MaxExpr, Val&: Max, Idx: 1))
586 return true;
587
588 if (Min == 0 && Max != 0) {
589 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
590 << &Attr << 0;
591 return true;
592 }
593 if (Min > Max) {
594 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
595 << &Attr << 1;
596 return true;
597 }
598
599 return false;
600}
601
602AMDGPUFlatWorkGroupSizeAttr *
603SemaAMDGPU::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI,
604 Expr *MinExpr, Expr *MaxExpr) {
605 ASTContext &Context = getASTContext();
606 AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
607
608 if (checkAMDGPUFlatWorkGroupSizeArguments(S&: SemaRef, MinExpr, MaxExpr, Attr: TmpAttr))
609 return nullptr;
610 return ::new (Context)
611 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
612}
613
614void SemaAMDGPU::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
615 const AttributeCommonInfo &CI,
616 Expr *MinExpr, Expr *MaxExpr) {
617 if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
618 D->addAttr(A: Attr);
619}
620
621void SemaAMDGPU::handleAMDGPUFlatWorkGroupSizeAttr(Decl *D,
622 const ParsedAttr &AL) {
623 Expr *MinExpr = AL.getArgAsExpr(Arg: 0);
624 Expr *MaxExpr = AL.getArgAsExpr(Arg: 1);
625
626 addAMDGPUFlatWorkGroupSizeAttr(D, CI: AL, MinExpr, MaxExpr);
627}
628
629static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
630 Expr *MaxExpr,
631 const AMDGPUWavesPerEUAttr &Attr) {
632 if (S.DiagnoseUnexpandedParameterPack(E: MinExpr) ||
633 (MaxExpr && S.DiagnoseUnexpandedParameterPack(E: MaxExpr)))
634 return true;
635
636 // Accept template arguments for now as they depend on something else.
637 // We'll get to check them when they eventually get instantiated.
638 if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent()))
639 return false;
640
641 uint32_t Min = 0;
642 if (!S.checkUInt32Argument(AI: Attr, Expr: MinExpr, Val&: Min, Idx: 0))
643 return true;
644
645 uint32_t Max = 0;
646 if (MaxExpr && !S.checkUInt32Argument(AI: Attr, Expr: MaxExpr, Val&: Max, Idx: 1))
647 return true;
648
649 if (Min == 0 && Max != 0) {
650 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
651 << &Attr << 0;
652 return true;
653 }
654 if (Max != 0 && Min > Max) {
655 S.Diag(Loc: Attr.getLocation(), DiagID: diag::err_attribute_argument_invalid)
656 << &Attr << 1;
657 return true;
658 }
659
660 return false;
661}
662
663AMDGPUWavesPerEUAttr *
664SemaAMDGPU::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI,
665 Expr *MinExpr, Expr *MaxExpr) {
666 ASTContext &Context = getASTContext();
667 AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
668
669 if (checkAMDGPUWavesPerEUArguments(S&: SemaRef, MinExpr, MaxExpr, Attr: TmpAttr))
670 return nullptr;
671
672 return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
673}
674
675void SemaAMDGPU::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
676 Expr *MinExpr, Expr *MaxExpr) {
677 if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
678 D->addAttr(A: Attr);
679}
680
681void SemaAMDGPU::handleAMDGPUWavesPerEUAttr(Decl *D, const ParsedAttr &AL) {
682 if (!AL.checkAtLeastNumArgs(S&: SemaRef, Num: 1) || !AL.checkAtMostNumArgs(S&: SemaRef, Num: 2))
683 return;
684
685 Expr *MinExpr = AL.getArgAsExpr(Arg: 0);
686 Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(Arg: 1) : nullptr;
687
688 addAMDGPUWavesPerEUAttr(D, CI: AL, MinExpr, MaxExpr);
689}
690
691void SemaAMDGPU::handleAMDGPUNumSGPRAttr(Decl *D, const ParsedAttr &AL) {
692 Diag(Loc: AL.getLoc(), DiagID: diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
693
694 uint32_t NumSGPR = 0;
695 Expr *NumSGPRExpr = AL.getArgAsExpr(Arg: 0);
696 if (!SemaRef.checkUInt32Argument(AI: AL, Expr: NumSGPRExpr, Val&: NumSGPR))
697 return;
698
699 D->addAttr(A: ::new (getASTContext())
700 AMDGPUNumSGPRAttr(getASTContext(), AL, NumSGPR));
701}
702
703void SemaAMDGPU::handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL) {
704 Diag(Loc: AL.getLoc(), DiagID: diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
705
706 uint32_t NumVGPR = 0;
707 Expr *NumVGPRExpr = AL.getArgAsExpr(Arg: 0);
708 if (!SemaRef.checkUInt32Argument(AI: AL, Expr: NumVGPRExpr, Val&: NumVGPR))
709 return;
710
711 D->addAttr(A: ::new (getASTContext())
712 AMDGPUNumVGPRAttr(getASTContext(), AL, NumVGPR));
713}
714
715static bool
716checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr,
717 Expr *ZExpr,
718 const AMDGPUMaxNumWorkGroupsAttr &Attr) {
719 if (S.DiagnoseUnexpandedParameterPack(E: XExpr) ||
720 (YExpr && S.DiagnoseUnexpandedParameterPack(E: YExpr)) ||
721 (ZExpr && S.DiagnoseUnexpandedParameterPack(E: ZExpr)))
722 return true;
723
724 // Accept template arguments for now as they depend on something else.
725 // We'll get to check them when they eventually get instantiated.
726 if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
727 (ZExpr && ZExpr->isValueDependent()))
728 return false;
729
730 uint32_t NumWG = 0;
731 Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
732 for (int i = 0; i < 3; i++) {
733 if (Exprs[i]) {
734 if (!S.checkUInt32Argument(AI: Attr, Expr: Exprs[i], Val&: NumWG, Idx: i,
735 /*StrictlyUnsigned=*/true))
736 return true;
737 if (NumWG == 0) {
738 S.Diag(Loc: Attr.getLoc(), DiagID: diag::err_attribute_argument_is_zero)
739 << &Attr << Exprs[i]->getSourceRange();
740 return true;
741 }
742 }
743 }
744
745 return false;
746}
747
748AMDGPUMaxNumWorkGroupsAttr *SemaAMDGPU::CreateAMDGPUMaxNumWorkGroupsAttr(
749 const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
750 ASTContext &Context = getASTContext();
751 AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
752 assert(!SemaRef.isSFINAEContext() &&
753 "Can't produce SFINAE diagnostic pointing to temporary attribute");
754
755 if (checkAMDGPUMaxNumWorkGroupsArguments(S&: SemaRef, XExpr, YExpr, ZExpr,
756 Attr: TmpAttr))
757 return nullptr;
758
759 return ::new (Context)
760 AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
761}
762
763void SemaAMDGPU::addAMDGPUMaxNumWorkGroupsAttr(Decl *D,
764 const AttributeCommonInfo &CI,
765 Expr *XExpr, Expr *YExpr,
766 Expr *ZExpr) {
767 if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
768 D->addAttr(A: Attr);
769}
770
771void SemaAMDGPU::handleAMDGPUMaxNumWorkGroupsAttr(Decl *D,
772 const ParsedAttr &AL) {
773 Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(Arg: 1) : nullptr;
774 Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(Arg: 2) : nullptr;
775 addAMDGPUMaxNumWorkGroupsAttr(D, CI: AL, XExpr: AL.getArgAsExpr(Arg: 0), YExpr, ZExpr);
776}
777
778Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) {
779 CallExpr *CE = cast<CallExpr>(Val: E->IgnoreParens());
780 ASTContext &Ctx = getASTContext();
781 QualType BoolTy = Ctx.getLogicalOperationType();
782 SourceLocation Loc = CE->getExprLoc();
783
784 if (!CE->getBuiltinCallee())
785 return *ExpandedPredicates
786 .insert(Ptr: SemaRef.BuildBoolLiteral(Loc, Value: false).get())
787 .first;
788
789 bool P = false;
790 unsigned BI = CE->getBuiltinCallee();
791 if (Ctx.BuiltinInfo.isAuxBuiltinID(ID: BI))
792 BI = Ctx.BuiltinInfo.getAuxBuiltinID(ID: BI);
793
794 if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
795 auto *GFX = dyn_cast<StringLiteral>(Val: CE->getArg(Arg: 0)->IgnoreParenCasts());
796 if (!GFX) {
797 Diag(Loc, DiagID: diag::err_amdgcn_processor_is_arg_not_literal);
798 return nullptr;
799 }
800
801 StringRef N = GFX->getString();
802 const TargetInfo &TI = Ctx.getTargetInfo();
803 if (llvm::AMDGPU::parseArchAMDGCN(CPU: N) == llvm::AMDGPU::GK_NONE) {
804 Diag(Loc, DiagID: diag::err_amdgcn_processor_is_arg_invalid_value) << N;
805 SmallVector<StringRef, 64> ValidList;
806 llvm::AMDGPU::fillValidArchListAMDGCN(Values&: ValidList);
807 if (!ValidList.empty())
808 Diag(Loc, DiagID: diag::note_amdgcn_processor_is_valid_options)
809 << llvm::join(R&: ValidList, Separator: ", ");
810 return nullptr;
811 }
812 if (TI.getTriple().isSPIRV()) {
813 CE->setType(BoolTy);
814 return *ExpandedPredicates.insert(Ptr: CE).first;
815 }
816
817 P = TI.isProcessorName(Name: N);
818 } else {
819 Expr *Arg = CE->getArg(Arg: 0);
820 if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
821 Diag(Loc, DiagID: diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
822 return nullptr;
823 }
824
825 if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
826 CE->setType(BoolTy);
827 return *ExpandedPredicates.insert(Ptr: CE).first;
828 }
829
830 auto *FD = cast<FunctionDecl>(Val: Arg->getReferencedDeclOfCallee());
831
832 StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(ID: FD->getBuiltinID());
833 llvm::StringMap<bool> CF;
834 Ctx.getFunctionFeatureMap(FeatureMap&: CF, FD);
835
836 P = Builtin::evaluateRequiredTargetFeatures(RequiredFatures: RF, TargetFetureMap: CF);
837 }
838
839 return *ExpandedPredicates.insert(Ptr: SemaRef.BuildBoolLiteral(Loc, Value: P).get())
840 .first;
841}
842
843bool SemaAMDGPU::IsPredicate(Expr *E) const {
844 return ExpandedPredicates.contains(Ptr: E);
845}
846
847void SemaAMDGPU::AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD) {
848 PotentiallyUnguardedBuiltinUsers.insert(Ptr: FD);
849}
850
851bool SemaAMDGPU::HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const {
852 return PotentiallyUnguardedBuiltinUsers.contains(Ptr: FD);
853}
854
855namespace {
856/// This class implements -Wamdgpu-unguarded-builtin-usage.
857///
858/// This is done with a traversal of the AST of a function that includes a
859/// call to a target specific builtin. Whenever we encounter an \c if of the
860/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
861/// guarded.
862class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
863 // TODO: this could eventually be extended to consider what happens when there
864 // are multiple target architectures specified via target("arch=gfxXXX")
865 // target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
866 Sema &SemaRef;
867
868 SmallVector<StringRef> TargetFeatures;
869 SmallVector<std::pair<SourceLocation, StringRef>> CurrentGFXIP;
870 SmallVector<unsigned> GuardedBuiltins;
871
872 static Expr *FindPredicate(Expr *Cond) {
873 if (auto *CE = dyn_cast<CallExpr>(Val: Cond)) {
874 if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
875 CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
876 return Cond;
877 } else if (auto *UO = dyn_cast<UnaryOperator>(Val: Cond)) {
878 return FindPredicate(Cond: UO->getSubExpr());
879 } else if (auto *BO = dyn_cast<BinaryOperator>(Val: Cond)) {
880 if ((Cond = FindPredicate(Cond: BO->getLHS())))
881 return Cond;
882 return FindPredicate(Cond: BO->getRHS());
883 }
884 return nullptr;
885 }
886
887 bool EnterPredicateGuardedContext(CallExpr *P);
888 void ExitPredicateGuardedContext(bool WasProcessorCheck);
889 bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
890
891public:
892 DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
893 if (auto *TAT = SemaRef.getCurFunctionDecl(AllowLambda: true)->getAttr<TargetAttr>()) {
894 // We use the somewhat misnamed x86 accessors because they provide exactly
895 // what we require.
896 TAT->getX86AddedFeatures(Out&: TargetFeatures);
897 if (auto GFXIP = TAT->getX86Architecture())
898 CurrentGFXIP.emplace_back(Args: TAT->getLocation(), Args&: *GFXIP);
899 }
900 }
901
902 bool TraverseLambdaExpr(LambdaExpr *LE) override {
903 if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
904 FD: LE->getCallOperator()))
905 return true; // We have already handled this.
906 return DynamicRecursiveASTVisitor::TraverseLambdaExpr(S: LE);
907 }
908
909 bool TraverseStmt(Stmt *S) override {
910 if (!S)
911 return true;
912 return DynamicRecursiveASTVisitor::TraverseStmt(S);
913 }
914
915 void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
916
917 bool TraverseIfStmt(IfStmt *If) override {
918 if (auto *CE = dyn_cast_or_null<CallExpr>(Val: FindPredicate(Cond: If->getCond())))
919 return TraverseGuardedStmt(S: If, P: CE);
920 return DynamicRecursiveASTVisitor::TraverseIfStmt(S: If);
921 }
922
923 bool TraverseCaseStmt(CaseStmt *CS) override {
924 return TraverseStmt(S: CS->getSubStmt());
925 }
926
927 bool TraverseConditionalOperator(ConditionalOperator *CO) override {
928 if (auto *CE = dyn_cast_or_null<CallExpr>(Val: FindPredicate(Cond: CO->getCond())))
929 return TraverseGuardedStmt(S: CO, P: CE);
930 return DynamicRecursiveASTVisitor::TraverseConditionalOperator(S: CO);
931 }
932
933 bool VisitAsmStmt(AsmStmt *ASM) override;
934 bool VisitCallExpr(CallExpr *CE) override;
935};
936
937bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
938 bool IsProcessorCheck =
939 P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
940
941 if (IsProcessorCheck) {
942 StringRef G = cast<clang::StringLiteral>(Val: P->getArg(Arg: 0))->getString();
943 // TODO: handle generic ISAs.
944 if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
945 SemaRef.Diag(Loc: P->getExprLoc(),
946 DiagID: diag::err_amdgcn_conflicting_is_processor_options)
947 << P;
948 SemaRef.Diag(Loc: CurrentGFXIP.back().first,
949 DiagID: diag::note_amdgcn_previous_is_processor_guard);
950 }
951 CurrentGFXIP.emplace_back(Args: P->getExprLoc(), Args&: G);
952 } else {
953 auto *FD = cast<FunctionDecl>(
954 Val: cast<DeclRefExpr>(Val: P->getArg(Arg: 0))->getReferencedDeclOfCallee());
955 GuardedBuiltins.push_back(Elt: FD->getBuiltinID());
956 }
957
958 return IsProcessorCheck;
959}
960
961void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
962 if (WasProcCheck)
963 CurrentGFXIP.pop_back();
964 else
965 GuardedBuiltins.pop_back();
966}
967
968inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
969 std::pair<Stmt *, Stmt *> Ordered;
970 Expr *Condition = nullptr;
971
972 if (auto *CO = dyn_cast<ConditionalOperator>(Val: S)) {
973 Condition = CO->getCond();
974 Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
975 } else if (auto *If = dyn_cast<IfStmt>(Val: S)) {
976 Condition = If->getCond();
977 Ordered = {If->getThen(), If->getElse()};
978 }
979
980 if (auto *UO = dyn_cast<UnaryOperator>(Val: Condition))
981 if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
982 std::swap(a&: Ordered.first, b&: Ordered.second);
983
984 return Ordered;
985}
986
987bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
988 assert(S && "Unexpected missing Statement!");
989 assert(P && "Unexpected missing Predicate!");
990
991 auto [Guarded, Unguarded] = GetTraversalOrder(S);
992
993 bool WasProcessorCheck = EnterPredicateGuardedContext(P);
994
995 bool Continue = TraverseStmt(S: Guarded);
996
997 ExitPredicateGuardedContext(WasProcCheck: WasProcessorCheck);
998
999 return Continue && TraverseStmt(S: Unguarded);
1000}
1001
1002bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
1003 // TODO: should we check if the ASM is valid for the target? Can we?
1004 if (!CurrentGFXIP.empty())
1005 return true;
1006
1007 std::string S = ASM->generateAsmString(C: SemaRef.getASTContext());
1008 SemaRef.Diag(Loc: ASM->getAsmLoc(), DiagID: diag::warn_amdgcn_unguarded_asm_stmt) << S;
1009 SemaRef.Diag(Loc: ASM->getAsmLoc(), DiagID: diag::note_amdgcn_unguarded_asm_silence) << S;
1010
1011 return true;
1012}
1013
1014bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
1015 unsigned ID = CE->getBuiltinCallee();
1016 Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
1017
1018 if (!ID)
1019 return true;
1020 if (!BInfo.isTSBuiltin(ID))
1021 return true;
1022 if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
1023 ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
1024 return true;
1025 if (llvm::find(Range&: GuardedBuiltins, Val: ID) != GuardedBuiltins.end())
1026 return true;
1027
1028 StringRef FL(BInfo.getRequiredFeatures(ID));
1029 llvm::StringMap<bool> FeatureMap;
1030 if (CurrentGFXIP.empty()) {
1031 for (auto &&F : TargetFeatures)
1032 FeatureMap[F] = true;
1033 for (auto &&GID : GuardedBuiltins)
1034 for (auto &&F : llvm::split(Str: BInfo.getRequiredFeatures(ID: GID), Separator: ','))
1035 FeatureMap[F] = true;
1036 } else {
1037 static const llvm::Triple AMDGCN(llvm::Triple::amdgcn,
1038 llvm::Triple::NoSubArch, llvm::Triple::AMD,
1039 llvm::Triple::AMDHSA);
1040 llvm::AMDGPU::fillAMDGPUFeatureMap(GPU: CurrentGFXIP.back().second, T: AMDGCN,
1041 Features&: FeatureMap);
1042 }
1043
1044 FunctionDecl *BI = CE->getDirectCallee();
1045 SourceLocation BICallLoc = CE->getExprLoc();
1046 if (Builtin::evaluateRequiredTargetFeatures(RequiredFatures: FL, TargetFetureMap: FeatureMap)) {
1047 SemaRef.Diag(Loc: BICallLoc, DiagID: diag::warn_amdgcn_unguarded_builtin) << BI;
1048 SemaRef.Diag(Loc: BICallLoc, DiagID: diag::note_amdgcn_unguarded_builtin_silence) << BI;
1049 } else {
1050 StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
1051 SemaRef.Diag(Loc: BICallLoc, DiagID: diag::err_amdgcn_incompatible_builtin)
1052 << BI << FL << !CurrentGFXIP.empty() << GFXIP;
1053 if (!CurrentGFXIP.empty())
1054 SemaRef.Diag(Loc: CurrentGFXIP.back().first,
1055 DiagID: diag::note_amdgcn_previous_is_processor_guard);
1056 }
1057
1058 return true;
1059}
1060} // Unnamed namespace
1061
1062void SemaAMDGPU::DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD) {
1063 DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(S: FD->getBody());
1064}
1065} // namespace clang
1066