| 1 | //===-- VerifierAMDGPU.cpp - AMDGPU-specific IR verification ---------------==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains AMDGPU-specific IR verification logic that was extracted |
| 10 | // from Verifier.cpp for code organization purposes only. These checks are |
| 11 | // always compiled and linked as part of LLVMCore — this is not a target- |
| 12 | // dependent IR verifier, which would require a different design. |
| 13 | // |
| 14 | // This file should only contain checks for AMDGPU-specific IR constructs |
| 15 | // (e.g. amdgcn intrinsics, AMDGPU address spaces). It must not contain |
| 16 | // checks for generic IR that might behave differently under AMDGPU. |
| 17 | // |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #include "VerifierInternal.h" |
| 21 | #include "llvm/ADT/StringExtras.h" |
| 22 | #include "llvm/IR/CallingConv.h" |
| 23 | #include "llvm/IR/Constants.h" |
| 24 | #include "llvm/IR/DerivedTypes.h" |
| 25 | #include "llvm/IR/Function.h" |
| 26 | #include "llvm/IR/IntrinsicInst.h" |
| 27 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 28 | #include "llvm/Support/AMDGPUAddrSpace.h" |
| 29 | |
| 30 | using namespace llvm; |
| 31 | |
| 32 | #define Check(C, ...) \ |
| 33 | do { \ |
| 34 | if (!(C)) { \ |
| 35 | VS.CheckFailed(__VA_ARGS__); \ |
| 36 | return; \ |
| 37 | } \ |
| 38 | } while (false) |
| 39 | |
| 40 | void llvm::verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID, |
| 41 | Module::ModFlagBehavior MFB, |
| 42 | const MDNode *Op) { |
| 43 | if (ID->getString() != "amdgpu.buffer.oob.mode" && |
| 44 | ID->getString() != "amdgpu.tbuffer.oob.mode" ) |
| 45 | return; |
| 46 | |
| 47 | Check(MFB == Module::Max, |
| 48 | "'" + ID->getString() + "' module flag must use 'max' merge behaviour" ); |
| 49 | ConstantInt *Value = |
| 50 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 2)); |
| 51 | Check(Value, "'" + ID->getString() + |
| 52 | "' module flag must have a constant integer value" ); |
| 53 | Check(Value->getZExtValue() <= 2, |
| 54 | "'" + ID->getString() + "' module flag must be 0, 1, or 2" ); |
| 55 | } |
| 56 | |
| 57 | // Verify that when a function has !reqd_work_group_size metadata, it also has |
| 58 | // an amdgpu-flat-work-group-size attribute that matches the product of the |
| 59 | // reqd_work_group_size operands. |
| 60 | static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS, |
| 61 | const Function &F) { |
| 62 | // This is not required for other targets so we only check for AMDGPU. |
| 63 | if (!VS.TT.isAMDGPU()) |
| 64 | return; |
| 65 | |
| 66 | MDNode *ReqdWorkGroupSize = F.getMetadata(Kind: "reqd_work_group_size" ); |
| 67 | if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3) |
| 68 | return; |
| 69 | |
| 70 | uint64_t Product = 1; |
| 71 | for (const MDOperand &Op : ReqdWorkGroupSize->operands()) { |
| 72 | ConstantInt *C = mdconst::dyn_extract<ConstantInt>(MD: Op); |
| 73 | if (!C || C->getValue().getActiveBits() > 64) |
| 74 | return; |
| 75 | uint64_t Dim = C->getZExtValue(); |
| 76 | if (Dim != 0 && Product > std::numeric_limits<uint64_t>::max() / Dim) |
| 77 | return; |
| 78 | Product *= Dim; |
| 79 | } |
| 80 | |
| 81 | Attribute FlatWorkGroupSize = F.getFnAttribute(Kind: "amdgpu-flat-work-group-size" ); |
| 82 | if (!FlatWorkGroupSize.isValid()) { |
| 83 | VS.CheckFailed(Message: "reqd_work_group_size requires amdgpu-flat-work-group-size" , |
| 84 | V1: &F, Vs: ReqdWorkGroupSize); |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | if (!FlatWorkGroupSize.isStringAttribute()) { |
| 89 | VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a string attribute" , |
| 90 | V1: &F); |
| 91 | return; |
| 92 | } |
| 93 | |
| 94 | StringRef AttrValue = FlatWorkGroupSize.getValueAsString(); |
| 95 | std::pair<StringRef, StringRef> Values = AttrValue.split(Separator: ','); |
| 96 | uint64_t Min = 0; |
| 97 | uint64_t Max = 0; |
| 98 | bool Parsed = !Values.second.contains(C: ',') && |
| 99 | llvm::to_integer(S: Values.first.trim(), Num&: Min) && |
| 100 | llvm::to_integer(S: Values.second.trim(), Num&: Max); |
| 101 | if (!Parsed) { |
| 102 | VS.CheckFailed(Message: "amdgpu-flat-work-group-size must be a pair of unsigned " |
| 103 | "integers" , |
| 104 | V1: &F); |
| 105 | return; |
| 106 | } |
| 107 | |
| 108 | if (Min != Product || Max != Product) { |
| 109 | VS.CheckFailed(Message: "amdgpu-flat-work-group-size must equal the product of " |
| 110 | "reqd_work_group_size operands" , |
| 111 | V1: &F, Vs: ReqdWorkGroupSize); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | void llvm::verifyAMDGPUFunctionMetadata(VerifierSupport &VS, |
| 116 | const Function &F) { |
| 117 | verifyAMDGPUReqdWorkGroupSize(VS, F); |
| 118 | } |
| 119 | |
| 120 | void llvm::verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI) { |
| 121 | // This is not required for other targets so we only check for AMDGPU. |
| 122 | if (!VS.TT.isAMDGPU()) |
| 123 | return; |
| 124 | |
| 125 | if (AI.getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) |
| 126 | VS.CheckFailed(Message: "alloca on amdgpu must be in addrspace(5)" , V1: &AI); |
| 127 | } |
| 128 | |
| 129 | bool llvm::isAMDGPUCallBrIntrinsic(Intrinsic::ID ID) { |
| 130 | switch (ID) { |
| 131 | default: |
| 132 | return false; |
| 133 | case Intrinsic::amdgcn_kill: |
| 134 | return true; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | void llvm::verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID, |
| 139 | CallBase &Call) { |
| 140 | switch (ID) { |
| 141 | default: |
| 142 | return; |
| 143 | case Intrinsic::amdgcn_kill: { |
| 144 | if (auto *CBI = dyn_cast<CallBrInst>(Val: &Call)) { |
| 145 | Check(CBI->getNumIndirectDests() == 1, |
| 146 | "callbr amdgcn_kill only supports one indirect dest" ); |
| 147 | bool Unreachable = isa<UnreachableInst>(Val: CBI->getIndirectDest(i: 0)->begin()); |
| 148 | CallInst *CI = dyn_cast<CallInst>(Val: CBI->getIndirectDest(i: 0)->begin()); |
| 149 | Check(Unreachable || |
| 150 | (CI && CI->getIntrinsicID() == Intrinsic::amdgcn_unreachable), |
| 151 | "callbr amdgcn_kill indirect dest needs to be unreachable" ); |
| 152 | } |
| 153 | break; |
| 154 | } |
| 155 | case Intrinsic::amdgcn_cs_chain: { |
| 156 | CallingConv::ID CallerCC = Call.getCaller()->getCallingConv(); |
| 157 | switch (CallerCC) { |
| 158 | case CallingConv::AMDGPU_CS: |
| 159 | case CallingConv::AMDGPU_CS_Chain: |
| 160 | case CallingConv::AMDGPU_CS_ChainPreserve: |
| 161 | case CallingConv::AMDGPU_ES: |
| 162 | case CallingConv::AMDGPU_GS: |
| 163 | case CallingConv::AMDGPU_HS: |
| 164 | case CallingConv::AMDGPU_LS: |
| 165 | case CallingConv::AMDGPU_VS: |
| 166 | break; |
| 167 | default: |
| 168 | VS.CheckFailed(Message: "Intrinsic cannot be called from functions with this " |
| 169 | "calling convention" , |
| 170 | V1: &Call); |
| 171 | break; |
| 172 | } |
| 173 | |
| 174 | Check(Call.paramHasAttr(2, Attribute::InReg), |
| 175 | "SGPR arguments must have the `inreg` attribute" , &Call); |
| 176 | Check(!Call.paramHasAttr(3, Attribute::InReg), |
| 177 | "VGPR arguments must not have the `inreg` attribute" , &Call); |
| 178 | |
| 179 | ConstantInt *FlagsArg = cast<ConstantInt>(Val: Call.getArgOperand(i: 4)); |
| 180 | Check(FlagsArg->getValue().ult(2), |
| 181 | "flags must be 0 or 1 for llvm.amdgcn.cs.chain" , &Call); |
| 182 | |
| 183 | Instruction *Next = Call.getNextNode(); |
| 184 | bool IsAMDUnreachable = isa_and_nonnull<IntrinsicInst>(Val: Next) && |
| 185 | cast<IntrinsicInst>(Val: Next)->getIntrinsicID() == |
| 186 | Intrinsic::amdgcn_unreachable; |
| 187 | Check(Next && (isa<UnreachableInst>(Next) || IsAMDUnreachable), |
| 188 | "llvm.amdgcn.cs.chain must be followed by unreachable" , &Call); |
| 189 | break; |
| 190 | } |
| 191 | case Intrinsic::amdgcn_init_exec_from_input: { |
| 192 | const Argument *Arg = dyn_cast<Argument>(Val: Call.getOperand(i_nocapture: 0)); |
| 193 | Check(Arg && Arg->hasInRegAttr(), |
| 194 | "only inreg arguments to the parent function are valid as inputs to " |
| 195 | "this intrinsic" , |
| 196 | &Call); |
| 197 | break; |
| 198 | } |
| 199 | case Intrinsic::amdgcn_set_inactive_chain_arg: { |
| 200 | CallingConv::ID CallerCC = Call.getCaller()->getCallingConv(); |
| 201 | switch (CallerCC) { |
| 202 | case CallingConv::AMDGPU_CS_Chain: |
| 203 | case CallingConv::AMDGPU_CS_ChainPreserve: |
| 204 | break; |
| 205 | default: |
| 206 | VS.CheckFailed(Message: "Intrinsic can only be used from functions with the " |
| 207 | "amdgpu_cs_chain or amdgpu_cs_chain_preserve " |
| 208 | "calling conventions" , |
| 209 | V1: &Call); |
| 210 | break; |
| 211 | } |
| 212 | |
| 213 | unsigned InactiveIdx = 1; |
| 214 | Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg), |
| 215 | "Value for inactive lanes must not have the `inreg` attribute" , |
| 216 | &Call); |
| 217 | Check(isa<Argument>(Call.getArgOperand(InactiveIdx)), |
| 218 | "Value for inactive lanes must be a function argument" , &Call); |
| 219 | Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(), |
| 220 | "Value for inactive lanes must be a VGPR function argument" , &Call); |
| 221 | break; |
| 222 | } |
| 223 | case Intrinsic::amdgcn_call_whole_wave: { |
| 224 | Function *F = dyn_cast<Function>(Val: Call.getArgOperand(i: 0)); |
| 225 | Check(F, "Indirect whole wave calls are not allowed" , &Call); |
| 226 | |
| 227 | CallingConv::ID CC = F->getCallingConv(); |
| 228 | Check(CC == CallingConv::AMDGPU_Gfx_WholeWave, |
| 229 | "Callee must have the amdgpu_gfx_whole_wave calling convention" , |
| 230 | &Call); |
| 231 | |
| 232 | Check(!F->isVarArg(), "Variadic whole wave calls are not allowed" , &Call); |
| 233 | |
| 234 | Check(Call.arg_size() == F->arg_size(), |
| 235 | "Call argument count must match callee argument count" , &Call); |
| 236 | |
| 237 | Check(F->arg_begin()->getType()->isIntegerTy(1), |
| 238 | "Callee must have i1 as its first argument" , &Call); |
| 239 | for (auto [CallArg, FuncArg] : |
| 240 | drop_begin(RangeOrContainer: zip_equal(t: Call.args(), u: F->args()))) { |
| 241 | Check(CallArg->getType() == FuncArg.getType(), |
| 242 | "Argument types must match" , &Call); |
| 243 | |
| 244 | Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) == |
| 245 | FuncArg.hasInRegAttr(), |
| 246 | "Argument inreg attributes must match" , &Call); |
| 247 | } |
| 248 | break; |
| 249 | } |
| 250 | case Intrinsic::amdgcn_s_prefetch_data: { |
| 251 | Check( |
| 252 | AMDGPU::isFlatGlobalAddrSpace( |
| 253 | Call.getArgOperand(0)->getType()->getPointerAddressSpace()), |
| 254 | "llvm.amdgcn.s.prefetch.data only supports global or constant memory" ); |
| 255 | break; |
| 256 | } |
| 257 | case Intrinsic::amdgcn_load_to_lds: |
| 258 | case Intrinsic::amdgcn_load_async_to_lds: |
| 259 | case Intrinsic::amdgcn_global_load_lds: |
| 260 | case Intrinsic::amdgcn_global_load_async_lds: |
| 261 | case Intrinsic::amdgcn_raw_buffer_load_lds: |
| 262 | case Intrinsic::amdgcn_raw_buffer_load_async_lds: |
| 263 | case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: |
| 264 | case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds: |
| 265 | case Intrinsic::amdgcn_struct_buffer_load_lds: |
| 266 | case Intrinsic::amdgcn_struct_buffer_load_async_lds: |
| 267 | case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: |
| 268 | case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: { |
| 269 | uint64_t Size = cast<ConstantInt>(Val: Call.getArgOperand(i: 2))->getZExtValue(); |
| 270 | Check(Size == 1 || Size == 2 || Size == 4 || Size == 12 || Size == 16, |
| 271 | "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, " |
| 272 | "or 16" , |
| 273 | &Call); |
| 274 | break; |
| 275 | } |
| 276 | case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: |
| 277 | case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { |
| 278 | Value *Src0 = Call.getArgOperand(i: 0); |
| 279 | Value *Src1 = Call.getArgOperand(i: 1); |
| 280 | |
| 281 | uint64_t CBSZ = cast<ConstantInt>(Val: Call.getArgOperand(i: 3))->getZExtValue(); |
| 282 | uint64_t BLGP = cast<ConstantInt>(Val: Call.getArgOperand(i: 4))->getZExtValue(); |
| 283 | Check(CBSZ <= 4, "invalid value for cbsz format" , Call, |
| 284 | Call.getArgOperand(3)); |
| 285 | Check(BLGP <= 4, "invalid value for blgp format" , Call, |
| 286 | Call.getArgOperand(4)); |
| 287 | |
| 288 | auto GetFormatNumRegs = [](unsigned FormatVal) { |
| 289 | switch (FormatVal) { |
| 290 | case 0: |
| 291 | case 1: |
| 292 | return 8u; |
| 293 | case 2: |
| 294 | case 3: |
| 295 | return 6u; |
| 296 | case 4: |
| 297 | return 4u; |
| 298 | default: |
| 299 | llvm_unreachable("invalid format value" ); |
| 300 | } |
| 301 | }; |
| 302 | |
| 303 | auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) { |
| 304 | if (!Ty || !Ty->getElementType()->isIntegerTy(BitWidth: 32)) |
| 305 | return false; |
| 306 | unsigned NumElts = Ty->getNumElements(); |
| 307 | return NumElts == 4 || NumElts == 6 || NumElts == 8; |
| 308 | }; |
| 309 | |
| 310 | FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType()); |
| 311 | FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType()); |
| 312 | Check(IsValidSrcASrcBVector(Src0Ty), |
| 313 | "operand 0 must be 4, 6 or 8 element i32 vector" , &Call, Src0); |
| 314 | Check(IsValidSrcASrcBVector(Src1Ty), |
| 315 | "operand 1 must be 4, 6 or 8 element i32 vector" , &Call, Src1); |
| 316 | |
| 317 | Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ), |
| 318 | "invalid vector type for format" , &Call, Src0, Call.getArgOperand(3)); |
| 319 | Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP), |
| 320 | "invalid vector type for format" , &Call, Src1, Call.getArgOperand(5)); |
| 321 | break; |
| 322 | } |
| 323 | case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4: |
| 324 | case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4: |
| 325 | case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: { |
| 326 | Value *Src0 = Call.getArgOperand(i: 1); |
| 327 | Value *Src1 = Call.getArgOperand(i: 3); |
| 328 | |
| 329 | unsigned FmtA = cast<ConstantInt>(Val: Call.getArgOperand(i: 0))->getZExtValue(); |
| 330 | unsigned FmtB = cast<ConstantInt>(Val: Call.getArgOperand(i: 2))->getZExtValue(); |
| 331 | Check(FmtA <= 4, "invalid value for matrix format" , Call, |
| 332 | Call.getArgOperand(0)); |
| 333 | Check(FmtB <= 4, "invalid value for matrix format" , Call, |
| 334 | Call.getArgOperand(2)); |
| 335 | |
| 336 | auto GetFormatNumRegs = [](unsigned FormatVal) { |
| 337 | switch (FormatVal) { |
| 338 | case 0: |
| 339 | case 1: |
| 340 | return 16u; |
| 341 | case 2: |
| 342 | case 3: |
| 343 | return 12u; |
| 344 | case 4: |
| 345 | return 8u; |
| 346 | default: |
| 347 | llvm_unreachable("invalid format value" ); |
| 348 | } |
| 349 | }; |
| 350 | |
| 351 | auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) { |
| 352 | if (!Ty || !Ty->getElementType()->isIntegerTy(BitWidth: 32)) |
| 353 | return false; |
| 354 | unsigned NumElts = Ty->getNumElements(); |
| 355 | return NumElts == 16 || NumElts == 12 || NumElts == 8; |
| 356 | }; |
| 357 | |
| 358 | FixedVectorType *Src0Ty = dyn_cast<FixedVectorType>(Val: Src0->getType()); |
| 359 | FixedVectorType *Src1Ty = dyn_cast<FixedVectorType>(Val: Src1->getType()); |
| 360 | Check(IsValidSrcASrcBVector(Src0Ty), |
| 361 | "operand 1 must be 8, 12 or 16 element i32 vector" , &Call, Src0); |
| 362 | Check(IsValidSrcASrcBVector(Src1Ty), |
| 363 | "operand 3 must be 8, 12 or 16 element i32 vector" , &Call, Src1); |
| 364 | |
| 365 | Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA), |
| 366 | "invalid vector type for format" , &Call, Src0, Call.getArgOperand(0)); |
| 367 | Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB), |
| 368 | "invalid vector type for format" , &Call, Src1, Call.getArgOperand(2)); |
| 369 | break; |
| 370 | } |
| 371 | case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: |
| 372 | case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: |
| 373 | case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: |
| 374 | case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: |
| 375 | case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: |
| 376 | case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { |
| 377 | Value *PtrArg = Call.getArgOperand(i: 0); |
| 378 | const unsigned AS = PtrArg->getType()->getPointerAddressSpace(); |
| 379 | Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS, |
| 380 | "cooperative atomic intrinsics require a generic or global pointer" , |
| 381 | &Call, PtrArg); |
| 382 | |
| 383 | MetadataAsValue *Op = |
| 384 | cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - 1)); |
| 385 | MDNode *MD = cast<MDNode>(Val: Op->getMetadata()); |
| 386 | Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)), |
| 387 | "cooperative atomic intrinsics require that the last argument is a " |
| 388 | "metadata string" , |
| 389 | &Call, Op); |
| 390 | break; |
| 391 | } |
| 392 | case Intrinsic::amdgcn_av_load_b128: |
| 393 | case Intrinsic::amdgcn_av_store_b128: { |
| 394 | MetadataAsValue *Op = |
| 395 | cast<MetadataAsValue>(Val: Call.getArgOperand(i: Call.arg_size() - 1)); |
| 396 | MDNode *MD = dyn_cast<MDNode>(Val: Op->getMetadata()); |
| 397 | Check(MD && (MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)), |
| 398 | "the last argument to av load/store intrinsics must be a " |
| 399 | "metadata string" , |
| 400 | &Call, Op); |
| 401 | break; |
| 402 | } |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | #undef Check |
| 407 | |