1//===-- VerifierAMDGPU.cpp - AMDGPU-specific IR verification ---------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains AMDGPU-specific IR verification logic that was extracted
10// from Verifier.cpp for code organization purposes only. These checks are
11// always compiled and linked as part of LLVMCore — this is not a target-
12// dependent IR verifier, which would require a different design.
13//
14// This file should only contain checks for AMDGPU-specific IR constructs
15// (e.g. amdgcn intrinsics, AMDGPU address spaces). It must not contain
16// checks for generic IR that might behave differently under AMDGPU.
17//
18//===----------------------------------------------------------------------===//
19
20#include "VerifierInternal.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/IR/CallingConv.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/DerivedTypes.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/IntrinsicInst.h"
27#include "llvm/IR/IntrinsicsAMDGPU.h"
28#include "llvm/Support/AMDGPUAddrSpace.h"
29
30using namespace llvm;
31
32#define Check(C, ...) \
33 do { \
34 if (!(C)) { \
35 VS.CheckFailed(__VA_ARGS__); \
36 return; \
37 } \
38 } while (false)
39
40void llvm::verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID,
41 Module::ModFlagBehavior MFB,
42 const MDNode *Op) {
43 if (ID->getString() != "amdgpu.buffer.oob.mode" &&
44 ID->getString() != "amdgpu.tbuffer.oob.mode")
45 return;
46
47 Check(MFB == Module::Max,
48 "'" + ID->getString() + "' module flag must use 'max' merge behaviour");
49 ConstantInt *Value =
50 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 2));
51 Check(Value, "'" + ID->getString() +
52 "' module flag must have a constant integer value");
53 Check(Value->getZExtValue() <= 2,
54 "'" + ID->getString() + "' module flag must be 0, 1, or 2");
55}
56
57// Verify that when a function has !reqd_work_group_size metadata, it also has
58// an amdgpu-flat-work-group-size attribute that matches the product of the
59// reqd_work_group_size operands.
60static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS,
61 const Function &F) {
62 // This is not required for other targets so we only check for AMDGPU.
63 if (!VS.TT.isAMDGPU())
64 return;
65
66 MDNode *ReqdWorkGroupSize = F.getMetadata(Kind: "reqd_work_group_size");
67 if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3)
68 return;
69
70 uint64_t Product = 1;
71 for (const MDOperand &Op : ReqdWorkGroupSize->operands()) {
72 ConstantInt *C = mdconst::dyn_extract<ConstantInt>(MD: Op);
73 if (!C || C->getValue().getActiveBits() > 64)
74 return;
75 uint64_t Dim = C->getZExtValue();
76 if (Dim != 0 && Product > std::numeric_limits<uint64_t>::max() / Dim)
77 return;
78 Product *= Dim;
79 }
80
81 Attribute FlatWorkGroupSize = F.getFnAttribute(Kind: "amdgpu-flat-work-group-size");
82 if (!FlatWorkGroupSize.isValid()) {
83 VS.CheckFailed(Message: "reqd_work_group_size requires amdgpu-flat-work-group-size",
84 V1: &F, Vs: ReqdWorkGroupSize);
85 return;
86 }
87
88 if (!FlatWorkGroupSize.isStringAttribute()) {
89 VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a string attribute",
90 V1: &F);
91 return;
92 }
93
94 StringRef AttrValue = FlatWorkGroupSize.getValueAsString();
95 std::pair<StringRef, StringRef> Values = AttrValue.split(Separator: ',');
96 uint64_t Min = 0;
97 uint64_t Max = 0;
98 bool Parsed = !Values.second.contains(C: ',') &&
99 llvm::to_integer(S: Values.first.trim(), Num&: Min) &&
100 llvm::to_integer(S: Values.second.trim(), Num&: Max);
101 if (!Parsed) {
102 VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a pair of unsigned "
103 "integers",
104 V1: &F);
105 return;
106 }
107
108 if (Min != Product || Max != Product) {
109 VS.CheckFailed(Message: "amdgpu-flat-work-group-size must equal the product of "
110 "reqd_work_group_size operands",
111 V1: &F, Vs: ReqdWorkGroupSize);
112 }
113}
114
115void llvm::verifyAMDGPUFunctionMetadata(VerifierSupport &VS,
116 const Function &F) {
117 verifyAMDGPUReqdWorkGroupSize(VS, F);
118}
119
120void llvm::verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI) {
121 // This is not required for other targets so we only check for AMDGPU.
122 if (!VS.TT.isAMDGPU())
123 return;
124
125 if (AI.getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
126 VS.CheckFailed(Message: "alloca on amdgpu must be in addrspace(5)", V1: &AI);
127}
128
129bool llvm::isAMDGPUCallBrIntrinsic(Intrinsic::ID ID) {
130 switch (ID) {
131 default:
132 return false;
133 case Intrinsic::amdgcn_kill:
134 return true;
135 }
136}
137
138void llvm::verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID,
139 CallBase &Call) {
140 switch (ID) {
141 default:
142 return;
143 case Intrinsic::amdgcn_kill: {
144 if (auto *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
145 Check(CBI->getNumIndirectDests() == 1,
146 "callbr amdgcn_kill only supports one indirect dest");
147 bool Unreachable = isa<UnreachableInst>(Val: CBI->getIndirectDest(i: 0)->begin());
148 CallInst *CI = dyn_cast<CallInst>(Val: CBI->getIndirectDest(i: 0)->begin());
149 Check(Unreachable ||
150 (CI && CI->getIntrinsicID() == Intrinsic::amdgcn_unreachable),
151 "callbr amdgcn_kill indirect dest needs to be unreachable");
152 }
153 break;
154 }
155 case Intrinsic::amdgcn_cs_chain: {
156 CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
157 switch (CallerCC) {
158 case CallingConv::AMDGPU_CS:
159 case CallingConv::AMDGPU_CS_Chain:
160 case CallingConv::AMDGPU_CS_ChainPreserve:
161 case CallingConv::AMDGPU_ES:
162 case CallingConv::AMDGPU_GS:
163 case CallingConv::AMDGPU_HS:
164 case CallingConv::AMDGPU_LS:
165 case CallingConv::AMDGPU_VS:
166 break;
167 default:
168 VS.CheckFailed(Message: "Intrinsic cannot be called from functions with this "
169 "calling convention",
170 V1: &Call);
171 break;
172 }
173
174 Check(Call.paramHasAttr(2, Attribute::InReg),
175 "SGPR arguments must have the `inreg` attribute", &Call);
176 Check(!Call.paramHasAttr(3, Attribute::InReg),
177 "VGPR arguments must not have the `inreg` attribute", &Call);
178
179 ConstantInt *FlagsArg = cast<ConstantInt>(Val: Call.getArgOperand(i: 4));
180 Check(FlagsArg->getValue().ult(2),
181 "flags must be 0 or 1 for llvm.amdgcn.cs.chain", &Call);
182
183 Instruction *Next = Call.getNextNode();
184 bool IsAMDUnreachable = isa_and_nonnull<IntrinsicInst>(Val: Next) &&
185 cast<IntrinsicInst>(Val: Next)->getIntrinsicID() ==
186 Intrinsic::amdgcn_unreachable;
187 Check(Next && (isa<UnreachableInst>(Next) || IsAMDUnreachable),
188 "llvm.amdgcn.cs.chain must be followed by unreachable", &Call);
189 break;
190 }
191 case Intrinsic::amdgcn_init_exec_from_input: {
192 const Argument *Arg = dyn_cast<Argument>(Val: Call.getOperand(i_nocapture: 0));
193 Check(Arg && Arg->hasInRegAttr(),
194 "only inreg arguments to the parent function are valid as inputs to "
195 "this intrinsic",
196 &Call);
197 break;
198 }
199 case Intrinsic::amdgcn_set_inactive_chain_arg: {
200 CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
201 switch (CallerCC) {
202 case CallingConv::AMDGPU_CS_Chain:
203 case CallingConv::AMDGPU_CS_ChainPreserve:
204 break;
205 default:
206 VS.CheckFailed(Message: "Intrinsic can only be used from functions with the "
207 "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
208 "calling conventions",
209 V1: &Call);
210 break;
211 }
212
213 unsigned InactiveIdx = 1;
214 Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
215 "Value for inactive lanes must not have the `inreg` attribute",
216 &Call);
217 Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
218 "Value for inactive lanes must be a function argument", &Call);
219 Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
220 "Value for inactive lanes must be a VGPR function argument", &Call);
221 break;
222 }
223 case Intrinsic::amdgcn_call_whole_wave: {
224 Function *F = dyn_cast<Function>(Val: Call.getArgOperand(i: 0));
225 Check(F, "Indirect whole wave calls are not allowed", &Call);
226
227 CallingConv::ID CC = F->getCallingConv();
228 Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
229 "Callee must have the amdgpu_gfx_whole_wave calling convention",
230 &Call);
231
232 Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
233
234 Check(Call.arg_size() == F->arg_size(),
235 "Call argument count must match callee argument count", &Call);
236
237 Check(F->arg_begin()->getType()->isIntegerTy(1),
238 "Callee must have i1 as its first argument", &Call);
239 for (auto [CallArg, FuncArg] :
240 drop_begin(RangeOrContainer: zip_equal(t: Call.args(), u: F->args()))) {
241 Check(CallArg->getType() == FuncArg.getType(),
242 "Argument types must match", &Call);
243
244 Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
245 FuncArg.hasInRegAttr(),
246 "Argument inreg attributes must match", &Call);
247 }
248 break;
249 }
250 case Intrinsic::amdgcn_s_prefetch_data: {
251 Check(
252 AMDGPU::isFlatGlobalAddrSpace(
253 Call.getArgOperand(0)->getType()->getPointerAddressSpace()),
254 "llvm.amdgcn.s.prefetch.data only supports global or constant memory");
255 break;
256 }
257 case Intrinsic::amdgcn_load_to_lds:
258 case Intrinsic::amdgcn_load_async_to_lds:
259 case Intrinsic::amdgcn_global_load_lds:
260 case Intrinsic::amdgcn_global_load_async_lds:
261 case Intrinsic::amdgcn_raw_buffer_load_lds:
262 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
263 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
264 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
265 case Intrinsic::amdgcn_struct_buffer_load_lds:
266 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
267 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
268 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
269 uint64_t Size = cast<ConstantInt>(Val: Call.getArgOperand(i: 2))->getZExtValue();
270 Check(Size == 1 || Size == 2 || Size == 4 || Size == 12 || Size == 16,
271 "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, "
272 "or 16",
273 &Call);
274 break;
275 }
276 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
277 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
278 Value *Src0 = Call.getArgOperand(i: 0);
279 Value *Src1 = Call.getArgOperand(i: 1);
280
281 uint64_t CBSZ = cast<ConstantInt>(Val: Call.getArgOperand(i: 3))->getZExtValue();
282 uint64_t BLGP = cast<ConstantInt>(Val: Call.getArgOperand(i: 4))->getZExtValue();
283 Check(CBSZ <= 4, "invalid value for cbsz format", Call,
284 Call.getArgOperand(3));
285 Check(BLGP <= 4, "invalid value for blgp format", Call,
286 Call.getArgOperand(4));
287
288 auto GetFormatNumRegs = [](unsigned FormatVal) {
289 switch (FormatVal) {
290 case 0:
291 case 1:
292 return 8u;
293 case 2:
294 case 3:
295 return 6u;
296 case 4:
297 return 4u;
298 default:
299 llvm_unreachable("invalid format value");
300 }
301 };
302
303 auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
304 if (!Ty || !Ty->getElementType()->isIntegerTy(BitWidth: 32))
305 return false;
306 unsigned NumElts = Ty->getNumElements();
307 return NumElts == 4 || NumElts == 6 || NumElts == 8;
308 };
309
310 FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType());
311 FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType());
312 Check(IsValidSrcASrcBVector(Src0Ty),
313 "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0);
314 Check(IsValidSrcASrcBVector(Src1Ty),
315 "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1);
316
317 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ),
318 "invalid vector type for format", &Call, Src0, Call.getArgOperand(3));
319 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP),
320 "invalid vector type for format", &Call, Src1, Call.getArgOperand(5));
321 break;
322 }
323 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
324 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
325 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
326 Value *Src0 = Call.getArgOperand(i: 1);
327 Value *Src1 = Call.getArgOperand(i: 3);
328
329 unsigned FmtA = cast<ConstantInt>(Val: Call.getArgOperand(i: 0))->getZExtValue();
330 unsigned FmtB = cast<ConstantInt>(Val: Call.getArgOperand(i: 2))->getZExtValue();
331 Check(FmtA <= 4, "invalid value for matrix format", Call,
332 Call.getArgOperand(0));
333 Check(FmtB <= 4, "invalid value for matrix format", Call,
334 Call.getArgOperand(2));
335
336 auto GetFormatNumRegs = [](unsigned FormatVal) {
337 switch (FormatVal) {
338 case 0:
339 case 1:
340 return 16u;
341 case 2:
342 case 3:
343 return 12u;
344 case 4:
345 return 8u;
346 default:
347 llvm_unreachable("invalid format value");
348 }
349 };
350
351 auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
352 if (!Ty || !Ty->getElementType()->isIntegerTy(BitWidth: 32))
353 return false;
354 unsigned NumElts = Ty->getNumElements();
355 return NumElts == 16 || NumElts == 12 || NumElts == 8;
356 };
357
358 FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType());
359 FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType());
360 Check(IsValidSrcASrcBVector(Src0Ty),
361 "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0);
362 Check(IsValidSrcASrcBVector(Src1Ty),
363 "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1);
364
365 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA),
366 "invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
367 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB),
368 "invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
369 break;
370 }
371 case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
372 case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
373 case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
374 case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
375 case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
376 case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
377 Value *PtrArg = Call.getArgOperand(i: 0);
378 const unsigned AS = PtrArg->getType()->getPointerAddressSpace();
379 Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS,
380 "cooperative atomic intrinsics require a generic or global pointer",
381 &Call, PtrArg);
382
383 MetadataAsValue *Op =
384 cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - 1));
385 MDNode *MD = cast<MDNode>(Val: Op->getMetadata());
386 Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
387 "cooperative atomic intrinsics require that the last argument is a "
388 "metadata string",
389 &Call, Op);
390 break;
391 }
392 case Intrinsic::amdgcn_av_load_b128:
393 case Intrinsic::amdgcn_av_store_b128: {
394 MetadataAsValue *Op =
395 cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - 1));
396 MDNode *MD = dyn_cast<MDNode>(Val: Op->getMetadata());
397 Check(MD && (MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
398 "the last argument to av load/store intrinsics must be a "
399 "metadata string",
400 &Call, Op);
401 break;
402 }
403 }
404}
405
406#undef Check
407