VerifierAMDGPU.cpp source code [llvm_projects/llvm/lib/IR/VerifierAMDGPU.cpp]

1	//===-- VerifierAMDGPU.cpp - AMDGPU-specific IR verification ---------------==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains AMDGPU-specific IR verification logic that was extracted
10	// from Verifier.cpp for code organization purposes only. These checks are
11	// always compiled and linked as part of LLVMCore — this is not a target-
12	// dependent IR verifier, which would require a different design.
13	//
14	// This file should only contain checks for AMDGPU-specific IR constructs
15	// (e.g. amdgcn intrinsics, AMDGPU address spaces). It must not contain
16	// checks for generic IR that might behave differently under AMDGPU.
17	//
18	//===----------------------------------------------------------------------===//
19
20	#include "VerifierInternal.h"
21	#include "llvm/ADT/StringExtras.h"
22	#include "llvm/IR/CallingConv.h"
23	#include "llvm/IR/Constants.h"
24	#include "llvm/IR/DerivedTypes.h"
25	#include "llvm/IR/Function.h"
26	#include "llvm/IR/IntrinsicInst.h"
27	#include "llvm/IR/IntrinsicsAMDGPU.h"
28	#include "llvm/Support/AMDGPUAddrSpace.h"
29
30	using namespace llvm;
31
32	#define Check(C, ...) \
33	do { \
34	if (!(C)) { \
35	VS.CheckFailed(__VA_ARGS__); \
36	return; \
37	} \
38	} while (false)
39
40	void llvm::verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID,
41	Module::ModFlagBehavior MFB,
42	const MDNode *Op) {
43	if (ID->getString() != "amdgpu.buffer.oob.mode" &&
44	ID->getString() != "amdgpu.tbuffer.oob.mode")
45	return;
46
47	Check(MFB == Module::Max,
48	"'" + ID->getString() + "' module flag must use 'max' merge behaviour");
49	ConstantInt *Value =
50	mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: `2`));
51	Check(Value, "'" + ID->getString() +
52	"' module flag must have a constant integer value");
53	Check(Value->getZExtValue() <= `2`,
54	"'" + ID->getString() + "' module flag must be 0, 1, or 2");
55	}
56
57	// Verify that when a function has !reqd_work_group_size metadata, it also has
58	// an amdgpu-flat-work-group-size attribute that matches the product of the
59	// reqd_work_group_size operands.
60	static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS,
61	const Function &F) {
62	// This is not required for other targets so we only check for AMDGPU.
63	if (!VS.TT.isAMDGPU())
64	return;
65
66	MDNode *ReqdWorkGroupSize = F.getMetadata(Kind: "reqd_work_group_size");
67	if (!ReqdWorkGroupSize \|\| ReqdWorkGroupSize->getNumOperands() != `3`)
68	return;
69
70	uint64_t Product = `1`;
71	for (const MDOperand &Op : ReqdWorkGroupSize->operands()) {
72	ConstantInt *C = mdconst::dyn_extract<ConstantInt>(MD: Op);
73	if (!C \|\| C->getValue().getActiveBits() > `64`)
74	return;
75	uint64_t Dim = C->getZExtValue();
76	if (Dim != `0` && Product > std::numeric_limits<uint64_t>::max() / Dim)
77	return;
78	Product *= Dim;
79	}
80
81	Attribute FlatWorkGroupSize = F.getFnAttribute(Kind: "amdgpu-flat-work-group-size");
82	if (!FlatWorkGroupSize.isValid()) {
83	VS.CheckFailed(Message: "reqd_work_group_size requires amdgpu-flat-work-group-size",
84	V1: &F, Vs: ReqdWorkGroupSize);
85	return;
86	}
87
88	if (!FlatWorkGroupSize.isStringAttribute()) {
89	VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a string attribute",
90	V1: &F);
91	return;
92	}
93
94	StringRef AttrValue = FlatWorkGroupSize.getValueAsString();
95	std::pair<StringRef, StringRef> Values = AttrValue.split(Separator: `','`);
96	uint64_t Min = `0`;
97	uint64_t Max = `0`;
98	bool Parsed = !Values.second.contains(C: `','`) &&
99	llvm::to_integer(S: Values.first.trim(), Num&: Min) &&
100	llvm::to_integer(S: Values.second.trim(), Num&: Max);
101	if (!Parsed) {
102	VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a pair of unsigned "
103	"integers",
104	V1: &F);
105	return;
106	}
107
108	if (Min != Product \|\| Max != Product) {
109	VS.CheckFailed(Message: "amdgpu-flat-work-group-size must equal the product of "
110	"reqd_work_group_size operands",
111	V1: &F, Vs: ReqdWorkGroupSize);
112	}
113	}
114
115	void llvm::verifyAMDGPUFunctionMetadata(VerifierSupport &VS,
116	const Function &F) {
117	verifyAMDGPUReqdWorkGroupSize(VS, F);
118	}
119
120	void llvm::verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI) {
121	// This is not required for other targets so we only check for AMDGPU.
122	if (!VS.TT.isAMDGPU())
123	return;
124
125	if (AI.getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
126	VS.CheckFailed(Message: "alloca on amdgpu must be in addrspace(5)", V1: &AI);
127	}
128
129	bool llvm::isAMDGPUCallBrIntrinsic(Intrinsic::ID ID) {
130	switch (ID) {
131	default:
132	return false;
133	case Intrinsic::amdgcn_kill:
134	return true;
135	}
136	}
137
138	void llvm::verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID,
139	CallBase &Call) {
140	switch (ID) {
141	default:
142	return;
143	case Intrinsic::amdgcn_kill: {
144	if (auto *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
145	Check(CBI->getNumIndirectDests() == `1`,
146	"callbr amdgcn_kill only supports one indirect dest");
147	// We assume that amdgcn_unreachable is only introduced by
148	// AMDGPUUnifyDivergentExitNodes, which replaces the block's original
149	// unreachable terminator by a call to amdgcn_unreachable + a return.
150	const Instruction *Term = CBI->getIndirectDest(i: `0`)->getTerminator();
151	const CallInst *CI =
152	Term ? dyn_cast_if_present<CallInst>(Val: Term->getPrevNode()) : nullptr;
153	Check(isa_and_nonnull<UnreachableInst>(Term) \|\|
154	(CI && CI->getIntrinsicID() == Intrinsic::amdgcn_unreachable),
155	"callbr amdgcn_kill indirect dest needs to be unreachable");
156	}
157	break;
158	}
159	case Intrinsic::amdgcn_cs_chain: {
160	CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
161	switch (CallerCC) {
162	case CallingConv::AMDGPU_CS:
163	case CallingConv::AMDGPU_CS_Chain:
164	case CallingConv::AMDGPU_CS_ChainPreserve:
165	case CallingConv::AMDGPU_ES:
166	case CallingConv::AMDGPU_GS:
167	case CallingConv::AMDGPU_HS:
168	case CallingConv::AMDGPU_LS:
169	case CallingConv::AMDGPU_VS:
170	break;
171	default:
172	VS.CheckFailed(Message: "Intrinsic cannot be called from functions with this "
173	"calling convention",
174	V1: &Call);
175	break;
176	}
177
178	Check(Call.paramHasAttr(`2`, Attribute::InReg),
179	"SGPR arguments must have the `inreg` attribute", &Call);
180	Check(!Call.paramHasAttr(`3`, Attribute::InReg),
181	"VGPR arguments must not have the `inreg` attribute", &Call);
182
183	ConstantInt *FlagsArg = cast<ConstantInt>(Val: Call.getArgOperand(i: `4`));
184	Check(FlagsArg->getValue().ult(`2`),
185	"flags must be 0 or 1 for llvm.amdgcn.cs.chain", &Call);
186
187	Instruction *Next = Call.getNextNode();
188	bool IsAMDUnreachable = isa_and_nonnull<IntrinsicInst>(Val: Next) &&
189	cast<IntrinsicInst>(Val: Next)->getIntrinsicID() ==
190	Intrinsic::amdgcn_unreachable;
191	Check(Next && (isa<UnreachableInst>(Next) \|\| IsAMDUnreachable),
192	"llvm.amdgcn.cs.chain must be followed by unreachable", &Call);
193	break;
194	}
195	case Intrinsic::amdgcn_init_exec_from_input: {
196	const Argument *Arg = dyn_cast<Argument>(Val: Call.getOperand(i_nocapture: `0`));
197	Check(Arg && Arg->hasInRegAttr(),
198	"only inreg arguments to the parent function are valid as inputs to "
199	"this intrinsic",
200	&Call);
201	break;
202	}
203	case Intrinsic::amdgcn_set_inactive_chain_arg: {
204	CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
205	switch (CallerCC) {
206	case CallingConv::AMDGPU_CS_Chain:
207	case CallingConv::AMDGPU_CS_ChainPreserve:
208	break;
209	default:
210	VS.CheckFailed(Message: "Intrinsic can only be used from functions with the "
211	"amdgpu_cs_chain or amdgpu_cs_chain_preserve "
212	"calling conventions",
213	V1: &Call);
214	break;
215	}
216
217	unsigned InactiveIdx = `1`;
218	Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
219	"Value for inactive lanes must not have the `inreg` attribute",
220	&Call);
221	Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
222	"Value for inactive lanes must be a function argument", &Call);
223	Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
224	"Value for inactive lanes must be a VGPR function argument", &Call);
225	break;
226	}
227	case Intrinsic::amdgcn_call_whole_wave: {
228	Function *F = dyn_cast<Function>(Val: Call.getArgOperand(i: `0`));
229	Check(F, "Indirect whole wave calls are not allowed", &Call);
230
231	CallingConv::ID CC = F->getCallingConv();
232	Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
233	"Callee must have the amdgpu_gfx_whole_wave calling convention",
234	&Call);
235
236	Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
237
238	Check(Call.arg_size() == F->arg_size(),
239	"Call argument count must match callee argument count", &Call);
240
241	Check(F->arg_begin()->getType()->isIntegerTy(`1`),
242	"Callee must have i1 as its first argument", &Call);
243	for (auto [CallArg, FuncArg] :
244	drop_begin(RangeOrContainer: zip_equal(t: Call.args(), u: F->args()))) {
245	Check(CallArg ->getType() == FuncArg.getType(),
246	"Argument types must match", &Call);
247
248	Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
249	FuncArg.hasInRegAttr(),
250	"Argument inreg attributes must match", &Call);
251	}
252	break;
253	}
254	case Intrinsic::amdgcn_s_prefetch_data: {
255	Check(
256	AMDGPU::isFlatGlobalAddrSpace(
257	Call.getArgOperand(`0`)->getType()->getPointerAddressSpace()),
258	"llvm.amdgcn.s.prefetch.data only supports global or constant memory");
259	break;
260	}
261	case Intrinsic::amdgcn_load_to_lds:
262	case Intrinsic::amdgcn_load_async_to_lds:
263	case Intrinsic::amdgcn_global_load_lds:
264	case Intrinsic::amdgcn_global_load_async_lds:
265	case Intrinsic::amdgcn_raw_buffer_load_lds:
266	case Intrinsic::amdgcn_raw_buffer_load_async_lds:
267	case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
268	case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
269	case Intrinsic::amdgcn_struct_buffer_load_lds:
270	case Intrinsic::amdgcn_struct_buffer_load_async_lds:
271	case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
272	case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
273	uint64_t Size = cast<ConstantInt>(Val: Call.getArgOperand(i: `2`))->getZExtValue();
274	Check(Size == `1` \|\| Size == `2` \|\| Size == `4` \|\| Size == `12` \|\| Size == `16`,
275	"invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, "
276	"or 16",
277	&Call);
278	break;
279	}
280	case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
281	case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
282	Value *Src0 = Call.getArgOperand(i: `0`);
283	Value *Src1 = Call.getArgOperand(i: `1`);
284
285	uint64_t CBSZ = cast<ConstantInt>(Val: Call.getArgOperand(i: `3`))->getZExtValue();
286	uint64_t BLGP = cast<ConstantInt>(Val: Call.getArgOperand(i: `4`))->getZExtValue();
287	Check(CBSZ <= `4`, "invalid value for cbsz format", Call,
288	Call.getArgOperand(`3`));
289	Check(BLGP <= `4`, "invalid value for blgp format", Call,
290	Call.getArgOperand(`4`));
291
292	auto GetFormatNumRegs = [](unsigned FormatVal) {
293	switch (FormatVal) {
294	case `0`:
295	case `1`:
296	return `8u`;
297	case `2`:
298	case `3`:
299	return `6u`;
300	case `4`:
301	return `4u`;
302	default:
303	llvm_unreachable("invalid format value");
304	}
305	};
306
307	auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
308	if (!Ty \|\| !Ty->getElementType()->isIntegerTy(BitWidth: `32`))
309	return false;
310	unsigned NumElts = Ty->getNumElements();
311	return NumElts == `4` \|\| NumElts == `6` \|\| NumElts == `8`;
312	};
313
314	FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType());
315	FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType());
316	Check(IsValidSrcASrcBVector(Src0Ty),
317	"operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0);
318	Check(IsValidSrcASrcBVector(Src1Ty),
319	"operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1);
320
321	Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ),
322	"invalid vector type for format", &Call, Src0, Call.getArgOperand(`3`));
323	Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP),
324	"invalid vector type for format", &Call, Src1, Call.getArgOperand(`5`));
325	break;
326	}
327	case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
328	case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
329	case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
330	Value *Src0 = Call.getArgOperand(i: `1`);
331	Value *Src1 = Call.getArgOperand(i: `3`);
332
333	unsigned FmtA = cast<ConstantInt>(Val: Call.getArgOperand(i: `0`))->getZExtValue();
334	unsigned FmtB = cast<ConstantInt>(Val: Call.getArgOperand(i: `2`))->getZExtValue();
335	Check(FmtA <= `4`, "invalid value for matrix format", Call,
336	Call.getArgOperand(`0`));
337	Check(FmtB <= `4`, "invalid value for matrix format", Call,
338	Call.getArgOperand(`2`));
339
340	auto GetFormatNumRegs = [](unsigned FormatVal) {
341	switch (FormatVal) {
342	case `0`:
343	case `1`:
344	return `16u`;
345	case `2`:
346	case `3`:
347	return `12u`;
348	case `4`:
349	return `8u`;
350	default:
351	llvm_unreachable("invalid format value");
352	}
353	};
354
355	auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
356	if (!Ty \|\| !Ty->getElementType()->isIntegerTy(BitWidth: `32`))
357	return false;
358	unsigned NumElts = Ty->getNumElements();
359	return NumElts == `16` \|\| NumElts == `12` \|\| NumElts == `8`;
360	};
361
362	FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType());
363	FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType());
364	Check(IsValidSrcASrcBVector(Src0Ty),
365	"operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0);
366	Check(IsValidSrcASrcBVector(Src1Ty),
367	"operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1);
368
369	Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA),
370	"invalid vector type for format", &Call, Src0, Call.getArgOperand(`0`));
371	Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB),
372	"invalid vector type for format", &Call, Src1, Call.getArgOperand(`2`));
373	break;
374	}
375	case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
376	case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
377	case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
378	case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
379	case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
380	case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
381	Value *PtrArg = Call.getArgOperand(i: `0`);
382	const unsigned AS = PtrArg->getType()->getPointerAddressSpace();
383	Check(AS == AMDGPUAS::FLAT_ADDRESS \|\| AS == AMDGPUAS::GLOBAL_ADDRESS,
384	"cooperative atomic intrinsics require a generic or global pointer",
385	&Call, PtrArg);
386
387	MetadataAsValue *Op =
388	cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - `1`));
389	MDNode *MD = cast<MDNode>(Val: Op->getMetadata());
390	Check((MD->getNumOperands() == `1`) && isa<MDString>(MD->getOperand(`0`)),
391	"cooperative atomic intrinsics require that the last argument is a "
392	"metadata string",
393	&Call, Op);
394	break;
395	}
396	case Intrinsic::amdgcn_av_load_b128:
397	case Intrinsic::amdgcn_av_store_b128: {
398	MetadataAsValue *Op =
399	cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - `1`));
400	MDNode *MD = dyn_cast<MDNode>(Val: Op->getMetadata());
401	Check(MD && (MD->getNumOperands() == `1`) && isa<MDString>(MD->getOperand(`0`)),
402	"the last argument to av load/store intrinsics must be a "
403	"metadata string",
404	&Call, Op);
405	break;
406	}
407	}
408	}
409
410	#undef Check
411

Browse the source code of llvm_projects/llvm/lib/IR/VerifierAMDGPU.cpp