| 1 | //===-- AMDGPULowerKernelAttributes.cpp------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This pass does attempts to make use of reqd_work_group_size metadata |
| 10 | /// to eliminate loads from the dispatch packet and to constant fold OpenCL |
| 11 | /// get_local_size-like functions. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AMDGPU.h" |
| 16 | #include "Utils/AMDGPUBaseInfo.h" |
| 17 | #include "llvm/Analysis/ConstantFolding.h" |
| 18 | #include "llvm/Analysis/ValueTracking.h" |
| 19 | #include "llvm/CodeGen/Passes.h" |
| 20 | #include "llvm/IR/Constants.h" |
| 21 | #include "llvm/IR/Function.h" |
| 22 | #include "llvm/IR/IRBuilder.h" |
| 23 | #include "llvm/IR/InstIterator.h" |
| 24 | #include "llvm/IR/Instructions.h" |
| 25 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 26 | #include "llvm/IR/MDBuilder.h" |
| 27 | #include "llvm/IR/PatternMatch.h" |
| 28 | #include "llvm/Pass.h" |
| 29 | |
| 30 | #define DEBUG_TYPE "amdgpu-lower-kernel-attributes" |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | namespace { |
| 35 | |
| 36 | // Field offsets in hsa_kernel_dispatch_packet_t. |
| 37 | enum DispatchPackedOffsets { |
| 38 | WORKGROUP_SIZE_X = 4, |
| 39 | WORKGROUP_SIZE_Y = 6, |
| 40 | WORKGROUP_SIZE_Z = 8, |
| 41 | |
| 42 | GRID_SIZE_X = 12, |
| 43 | GRID_SIZE_Y = 16, |
| 44 | GRID_SIZE_Z = 20 |
| 45 | }; |
| 46 | |
| 47 | // Field offsets to implicit kernel argument pointer. |
| 48 | enum ImplicitArgOffsets { |
| 49 | HIDDEN_BLOCK_COUNT_X = 0, |
| 50 | HIDDEN_BLOCK_COUNT_Y = 4, |
| 51 | HIDDEN_BLOCK_COUNT_Z = 8, |
| 52 | |
| 53 | HIDDEN_GROUP_SIZE_X = 12, |
| 54 | HIDDEN_GROUP_SIZE_Y = 14, |
| 55 | HIDDEN_GROUP_SIZE_Z = 16, |
| 56 | |
| 57 | HIDDEN_REMAINDER_X = 18, |
| 58 | HIDDEN_REMAINDER_Y = 20, |
| 59 | HIDDEN_REMAINDER_Z = 22, |
| 60 | |
| 61 | GRID_DIMS = 64 |
| 62 | }; |
| 63 | |
| 64 | class AMDGPULowerKernelAttributes : public ModulePass { |
| 65 | public: |
| 66 | static char ID; |
| 67 | |
| 68 | AMDGPULowerKernelAttributes() : ModulePass(ID) {} |
| 69 | |
| 70 | bool runOnModule(Module &M) override; |
| 71 | |
| 72 | StringRef getPassName() const override { return "AMDGPU Kernel Attributes" ; } |
| 73 | |
| 74 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 75 | AU.setPreservesAll(); |
| 76 | } |
| 77 | }; |
| 78 | |
| 79 | Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) { |
| 80 | auto IntrinsicId = IsV5OrAbove ? Intrinsic::amdgcn_implicitarg_ptr |
| 81 | : Intrinsic::amdgcn_dispatch_ptr; |
| 82 | return Intrinsic::getDeclarationIfExists(M: &M, id: IntrinsicId); |
| 83 | } |
| 84 | |
| 85 | } // end anonymous namespace |
| 86 | |
| 87 | static bool annotateGridSizeLoadWithRangeMD(LoadInst *Load, |
| 88 | uint32_t MaxNumGroups) { |
| 89 | if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max()) |
| 90 | return false; |
| 91 | |
| 92 | if (!Load->getType()->isIntegerTy(Bitwidth: 32)) |
| 93 | return false; |
| 94 | |
| 95 | // TODO: If there is existing range metadata, preserve it if it is stricter. |
| 96 | if (Load->hasMetadata(KindID: LLVMContext::MD_range)) |
| 97 | return false; |
| 98 | |
| 99 | MDBuilder MDB(Load->getContext()); |
| 100 | MDNode *Range = MDB.createRange(Lo: APInt(32, 1), Hi: APInt(32, MaxNumGroups + 1)); |
| 101 | Load->setMetadata(KindID: LLVMContext::MD_range, Node: Range); |
| 102 | return true; |
| 103 | } |
| 104 | |
| 105 | static bool annotateGroupSizeLoadWithRangeMD(LoadInst *Load, bool IsRemainder) { |
| 106 | if (!Load->getType()->isIntegerTy(Bitwidth: 16)) |
| 107 | return false; |
| 108 | |
| 109 | // TODO: If there is existing range metadata, preserve it if it is stricter. |
| 110 | if (Load->hasMetadata(KindID: LLVMContext::MD_range)) |
| 111 | return false; |
| 112 | |
| 113 | MDBuilder MDB(Load->getContext()); |
| 114 | MDNode *Range = MDB.createRange( |
| 115 | Lo: APInt(16, !IsRemainder), |
| 116 | Hi: APInt(16, AMDGPU::IsaInfo::getMaxFlatWorkGroupSize() + 1 - IsRemainder)); |
| 117 | Load->setMetadata(KindID: LLVMContext::MD_range, Node: Range); |
| 118 | return true; |
| 119 | } |
| 120 | |
| 121 | static bool annotateGridDimsLoadWithRangeMD(LoadInst *Load, |
| 122 | unsigned KnownNumGridDims) { |
| 123 | IntegerType *Ty = dyn_cast<IntegerType>(Val: Load->getType()); |
| 124 | if (!Ty || Ty->getBitWidth() < 3) |
| 125 | return false; |
| 126 | |
| 127 | if (KnownNumGridDims != 0) { |
| 128 | Load->replaceAllUsesWith( |
| 129 | V: ConstantInt::get(Ty: Load->getType(), V: KnownNumGridDims)); |
| 130 | return true; |
| 131 | } |
| 132 | |
| 133 | // TODO: If there is existing range metadata, preserve it if it is stricter. |
| 134 | if (Load->hasMetadata(KindID: LLVMContext::MD_range)) |
| 135 | return false; |
| 136 | |
| 137 | MDBuilder MDB(Load->getContext()); |
| 138 | MDNode *Range = |
| 139 | MDB.createRange(Lo: APInt(Ty->getBitWidth(), 1), Hi: APInt(Ty->getBitWidth(), 4)); |
| 140 | Load->setMetadata(KindID: LLVMContext::MD_range, Node: Range); |
| 141 | return true; |
| 142 | } |
| 143 | |
| 144 | /// Compute the number of grid dimensions based on !reqd_work_group_size |
| 145 | /// metadata |
| 146 | static unsigned computeNumGridDims(const MDNode *ReqdWorkGroupSize) { |
| 147 | ConstantInt *KnownZ = |
| 148 | mdconst::extract<ConstantInt>(MD: ReqdWorkGroupSize->getOperand(I: 2)); |
| 149 | if (KnownZ->getZExtValue() != 1) |
| 150 | return 3; |
| 151 | |
| 152 | ConstantInt *KnownY = |
| 153 | mdconst::extract<ConstantInt>(MD: ReqdWorkGroupSize->getOperand(I: 1)); |
| 154 | if (KnownY->getZExtValue() != 1) |
| 155 | return 2; |
| 156 | |
| 157 | return 1; |
| 158 | } |
| 159 | |
| 160 | static bool processUse(CallInst *CI, bool IsV5OrAbove) { |
| 161 | Function *F = CI->getFunction(); |
| 162 | |
| 163 | auto *MD = F->getMetadata(Kind: "reqd_work_group_size" ); |
| 164 | const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3; |
| 165 | |
| 166 | const bool HasUniformWorkGroupSize = |
| 167 | F->hasFnAttribute(Kind: "uniform-work-group-size" ); |
| 168 | |
| 169 | SmallVector<unsigned> MaxNumWorkgroups = |
| 170 | AMDGPU::getIntegerVecAttribute(F: *F, Name: "amdgpu-max-num-workgroups" , |
| 171 | /*Size=*/3, /*DefaultVal=*/0); |
| 172 | |
| 173 | Value *BlockCounts[3] = {nullptr, nullptr, nullptr}; |
| 174 | Value *GroupSizes[3] = {nullptr, nullptr, nullptr}; |
| 175 | Value *Remainders[3] = {nullptr, nullptr, nullptr}; |
| 176 | Value *GridSizes[3] = {nullptr, nullptr, nullptr}; |
| 177 | |
| 178 | const DataLayout &DL = F->getDataLayout(); |
| 179 | bool MadeChange = false; |
| 180 | |
| 181 | unsigned KnownNumGridDims = HasReqdWorkGroupSize ? computeNumGridDims(ReqdWorkGroupSize: MD) : 0; |
| 182 | |
| 183 | // We expect to see several GEP users, casted to the appropriate type and |
| 184 | // loaded. |
| 185 | for (User *U : CI->users()) { |
| 186 | if (!U->hasOneUse()) |
| 187 | continue; |
| 188 | |
| 189 | int64_t Offset = 0; |
| 190 | auto *Load = dyn_cast<LoadInst>(Val: U); // Load from ImplicitArgPtr/DispatchPtr? |
| 191 | auto *BCI = dyn_cast<BitCastInst>(Val: U); |
| 192 | if (!Load && !BCI) { |
| 193 | if (GetPointerBaseWithConstantOffset(Ptr: U, Offset, DL) != CI) |
| 194 | continue; |
| 195 | Load = dyn_cast<LoadInst>(Val: *U->user_begin()); // Load from GEP? |
| 196 | BCI = dyn_cast<BitCastInst>(Val: *U->user_begin()); |
| 197 | } |
| 198 | |
| 199 | if (BCI) { |
| 200 | if (!BCI->hasOneUse()) |
| 201 | continue; |
| 202 | Load = dyn_cast<LoadInst>(Val: *BCI->user_begin()); // Load from BCI? |
| 203 | } |
| 204 | |
| 205 | if (!Load || !Load->isSimple()) |
| 206 | continue; |
| 207 | |
| 208 | unsigned LoadSize = DL.getTypeStoreSize(Ty: Load->getType()); |
| 209 | |
| 210 | // TODO: Handle merged loads. |
| 211 | if (IsV5OrAbove) { // Base is ImplicitArgPtr. |
| 212 | switch (Offset) { |
| 213 | case HIDDEN_BLOCK_COUNT_X: |
| 214 | if (LoadSize == 4) { |
| 215 | BlockCounts[0] = Load; |
| 216 | MadeChange |= |
| 217 | annotateGridSizeLoadWithRangeMD(Load, MaxNumGroups: MaxNumWorkgroups[0]); |
| 218 | } |
| 219 | break; |
| 220 | case HIDDEN_BLOCK_COUNT_Y: |
| 221 | if (LoadSize == 4) { |
| 222 | BlockCounts[1] = Load; |
| 223 | MadeChange |= |
| 224 | annotateGridSizeLoadWithRangeMD(Load, MaxNumGroups: MaxNumWorkgroups[1]); |
| 225 | } |
| 226 | break; |
| 227 | case HIDDEN_BLOCK_COUNT_Z: |
| 228 | if (LoadSize == 4) { |
| 229 | BlockCounts[2] = Load; |
| 230 | MadeChange |= |
| 231 | annotateGridSizeLoadWithRangeMD(Load, MaxNumGroups: MaxNumWorkgroups[2]); |
| 232 | } |
| 233 | break; |
| 234 | case HIDDEN_GROUP_SIZE_X: |
| 235 | if (LoadSize == 2) { |
| 236 | GroupSizes[0] = Load; |
| 237 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: false); |
| 238 | } |
| 239 | break; |
| 240 | case HIDDEN_GROUP_SIZE_Y: |
| 241 | if (LoadSize == 2) { |
| 242 | GroupSizes[1] = Load; |
| 243 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: false); |
| 244 | } |
| 245 | break; |
| 246 | case HIDDEN_GROUP_SIZE_Z: |
| 247 | if (LoadSize == 2) { |
| 248 | GroupSizes[2] = Load; |
| 249 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: false); |
| 250 | } |
| 251 | break; |
| 252 | case HIDDEN_REMAINDER_X: |
| 253 | if (LoadSize == 2) { |
| 254 | Remainders[0] = Load; |
| 255 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: true); |
| 256 | } |
| 257 | break; |
| 258 | case HIDDEN_REMAINDER_Y: |
| 259 | if (LoadSize == 2) { |
| 260 | Remainders[1] = Load; |
| 261 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: true); |
| 262 | } |
| 263 | break; |
| 264 | case HIDDEN_REMAINDER_Z: |
| 265 | if (LoadSize == 2) { |
| 266 | Remainders[2] = Load; |
| 267 | MadeChange |= annotateGroupSizeLoadWithRangeMD(Load, IsRemainder: true); |
| 268 | } |
| 269 | break; |
| 270 | |
| 271 | case GRID_DIMS: |
| 272 | if (LoadSize <= 2) |
| 273 | MadeChange |= annotateGridDimsLoadWithRangeMD(Load, KnownNumGridDims); |
| 274 | break; |
| 275 | default: |
| 276 | break; |
| 277 | } |
| 278 | } else { // Base is DispatchPtr. |
| 279 | switch (Offset) { |
| 280 | case WORKGROUP_SIZE_X: |
| 281 | if (LoadSize == 2) |
| 282 | GroupSizes[0] = Load; |
| 283 | break; |
| 284 | case WORKGROUP_SIZE_Y: |
| 285 | if (LoadSize == 2) |
| 286 | GroupSizes[1] = Load; |
| 287 | break; |
| 288 | case WORKGROUP_SIZE_Z: |
| 289 | if (LoadSize == 2) |
| 290 | GroupSizes[2] = Load; |
| 291 | break; |
| 292 | case GRID_SIZE_X: |
| 293 | if (LoadSize == 4) |
| 294 | GridSizes[0] = Load; |
| 295 | break; |
| 296 | case GRID_SIZE_Y: |
| 297 | if (LoadSize == 4) |
| 298 | GridSizes[1] = Load; |
| 299 | break; |
| 300 | case GRID_SIZE_Z: |
| 301 | if (LoadSize == 4) |
| 302 | GridSizes[2] = Load; |
| 303 | break; |
| 304 | default: |
| 305 | break; |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | if (IsV5OrAbove && HasUniformWorkGroupSize) { |
| 311 | // Under v5 __ockl_get_local_size returns the value computed by the |
| 312 | // expression: |
| 313 | // |
| 314 | // workgroup_id < hidden_block_count ? hidden_group_size : |
| 315 | // hidden_remainder |
| 316 | // |
| 317 | // For functions with the attribute uniform-work-group-size=true. we can |
| 318 | // evaluate workgroup_id < hidden_block_count as true, and thus |
| 319 | // hidden_group_size is returned for __ockl_get_local_size. |
| 320 | for (int I = 0; I < 3; ++I) { |
| 321 | Value *BlockCount = BlockCounts[I]; |
| 322 | if (!BlockCount) |
| 323 | continue; |
| 324 | |
| 325 | using namespace llvm::PatternMatch; |
| 326 | auto GroupIDIntrin = |
| 327 | I == 0 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_x>() |
| 328 | : (I == 1 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_y>() |
| 329 | : m_Intrinsic<Intrinsic::amdgcn_workgroup_id_z>()); |
| 330 | |
| 331 | for (User *ICmp : BlockCount->users()) { |
| 332 | if (match(V: ICmp, P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_ULT, L: GroupIDIntrin, |
| 333 | R: m_Specific(V: BlockCount)))) { |
| 334 | ICmp->replaceAllUsesWith(V: llvm::ConstantInt::getTrue(Ty: ICmp->getType())); |
| 335 | MadeChange = true; |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | // All remainders should be 0 with uniform work group size. |
| 341 | for (Value *Remainder : Remainders) { |
| 342 | if (!Remainder) |
| 343 | continue; |
| 344 | Remainder->replaceAllUsesWith( |
| 345 | V: Constant::getNullValue(Ty: Remainder->getType())); |
| 346 | MadeChange = true; |
| 347 | } |
| 348 | } else if (HasUniformWorkGroupSize) { // Pre-V5. |
| 349 | // Pattern match the code used to handle partial workgroup dispatches in the |
| 350 | // library implementation of get_local_size, so the entire function can be |
| 351 | // constant folded with a known group size. |
| 352 | // |
| 353 | // uint r = grid_size - group_id * group_size; |
| 354 | // get_local_size = (r < group_size) ? r : group_size; |
| 355 | // |
| 356 | // If we have uniform-work-group-size (which is the default in OpenCL 1.2), |
| 357 | // the grid_size is required to be a multiple of group_size). In this case: |
| 358 | // |
| 359 | // grid_size - (group_id * group_size) < group_size |
| 360 | // -> |
| 361 | // grid_size < group_size + (group_id * group_size) |
| 362 | // |
| 363 | // (grid_size / group_size) < 1 + group_id |
| 364 | // |
| 365 | // grid_size / group_size is at least 1, so we can conclude the select |
| 366 | // condition is false (except for group_id == 0, where the select result is |
| 367 | // the same). |
| 368 | for (int I = 0; I < 3; ++I) { |
| 369 | Value *GroupSize = GroupSizes[I]; |
| 370 | Value *GridSize = GridSizes[I]; |
| 371 | if (!GroupSize || !GridSize) |
| 372 | continue; |
| 373 | |
| 374 | using namespace llvm::PatternMatch; |
| 375 | auto GroupIDIntrin = |
| 376 | I == 0 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_x>() |
| 377 | : (I == 1 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_y>() |
| 378 | : m_Intrinsic<Intrinsic::amdgcn_workgroup_id_z>()); |
| 379 | |
| 380 | for (User *U : GroupSize->users()) { |
| 381 | auto *ZextGroupSize = dyn_cast<ZExtInst>(Val: U); |
| 382 | if (!ZextGroupSize) |
| 383 | continue; |
| 384 | |
| 385 | for (User *UMin : ZextGroupSize->users()) { |
| 386 | if (match(V: UMin, P: m_UMin(L: m_Sub(L: m_Specific(V: GridSize), |
| 387 | R: m_Mul(L: GroupIDIntrin, |
| 388 | R: m_Specific(V: ZextGroupSize))), |
| 389 | R: m_Specific(V: ZextGroupSize)))) { |
| 390 | if (HasReqdWorkGroupSize) { |
| 391 | ConstantInt *KnownSize = |
| 392 | mdconst::extract<ConstantInt>(MD: MD->getOperand(I)); |
| 393 | UMin->replaceAllUsesWith(V: ConstantFoldIntegerCast( |
| 394 | C: KnownSize, DestTy: UMin->getType(), IsSigned: false, DL)); |
| 395 | } else { |
| 396 | UMin->replaceAllUsesWith(V: ZextGroupSize); |
| 397 | } |
| 398 | |
| 399 | MadeChange = true; |
| 400 | } |
| 401 | } |
| 402 | } |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | // Upgrade the old method of calculating the block size using the grid size. |
| 407 | // We pattern match any case where the implicit argument group size is the |
| 408 | // divisor to a dispatch packet grid size read of the same dimension. |
| 409 | if (IsV5OrAbove) { |
| 410 | for (int I = 0; I < 3; I++) { |
| 411 | Value *GroupSize = GroupSizes[I]; |
| 412 | if (!GroupSize || !GroupSize->getType()->isIntegerTy(Bitwidth: 16)) |
| 413 | continue; |
| 414 | |
| 415 | for (User *U : GroupSize->users()) { |
| 416 | Instruction *Inst = cast<Instruction>(Val: U); |
| 417 | if (isa<ZExtInst>(Val: Inst) && !Inst->use_empty()) |
| 418 | Inst = cast<Instruction>(Val: *Inst->user_begin()); |
| 419 | |
| 420 | using namespace llvm::PatternMatch; |
| 421 | if (!match( |
| 422 | V: Inst, |
| 423 | P: m_UDiv(L: m_ZExtOrSelf(Op: m_Load(Op: m_GEP( |
| 424 | Ops: m_Intrinsic<Intrinsic::amdgcn_dispatch_ptr>(), |
| 425 | Ops: m_SpecificInt(V: GRID_SIZE_X + I * sizeof(uint32_t))))), |
| 426 | R: m_Value()))) |
| 427 | continue; |
| 428 | |
| 429 | IRBuilder<> Builder(Inst); |
| 430 | |
| 431 | Value *GEP = Builder.CreateInBoundsGEP( |
| 432 | Ty: Builder.getInt8Ty(), Ptr: CI, |
| 433 | IdxList: {ConstantInt::get(Ty: Type::getInt64Ty(C&: CI->getContext()), |
| 434 | V: HIDDEN_BLOCK_COUNT_X + I * sizeof(uint32_t))}); |
| 435 | Instruction *BlockCount = Builder.CreateLoad(Ty: Builder.getInt32Ty(), Ptr: GEP); |
| 436 | BlockCount->setMetadata(KindID: LLVMContext::MD_invariant_load, |
| 437 | Node: MDNode::get(Context&: CI->getContext(), MDs: {})); |
| 438 | BlockCount->setMetadata(KindID: LLVMContext::MD_noundef, |
| 439 | Node: MDNode::get(Context&: CI->getContext(), MDs: {})); |
| 440 | |
| 441 | Value *BlockCountExt = Builder.CreateZExt(V: BlockCount, DestTy: Inst->getType()); |
| 442 | Inst->replaceAllUsesWith(V: BlockCountExt); |
| 443 | Inst->eraseFromParent(); |
| 444 | MadeChange = true; |
| 445 | } |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | // If reqd_work_group_size is set, we can replace work group size with it. |
| 450 | if (!HasReqdWorkGroupSize) |
| 451 | return MadeChange; |
| 452 | |
| 453 | for (int I = 0; I < 3; I++) { |
| 454 | Value *GroupSize = GroupSizes[I]; |
| 455 | if (!GroupSize) |
| 456 | continue; |
| 457 | |
| 458 | ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD: MD->getOperand(I)); |
| 459 | GroupSize->replaceAllUsesWith( |
| 460 | V: ConstantFoldIntegerCast(C: KnownSize, DestTy: GroupSize->getType(), IsSigned: false, DL)); |
| 461 | MadeChange = true; |
| 462 | } |
| 463 | |
| 464 | return MadeChange; |
| 465 | } |
| 466 | |
| 467 | // TODO: Move makeLIDRangeMetadata usage into here. Seem to not get |
| 468 | // TargetPassConfig for subtarget. |
| 469 | bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { |
| 470 | bool MadeChange = false; |
| 471 | bool IsV5OrAbove = |
| 472 | AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5; |
| 473 | Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove); |
| 474 | |
| 475 | if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. |
| 476 | return false; |
| 477 | |
| 478 | SmallPtrSet<Instruction *, 4> HandledUses; |
| 479 | for (auto *U : BasePtr->users()) { |
| 480 | CallInst *CI = cast<CallInst>(Val: U); |
| 481 | if (HandledUses.insert(Ptr: CI).second) { |
| 482 | if (processUse(CI, IsV5OrAbove)) |
| 483 | MadeChange = true; |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | return MadeChange; |
| 488 | } |
| 489 | |
| 490 | INITIALIZE_PASS_BEGIN(AMDGPULowerKernelAttributes, DEBUG_TYPE, |
| 491 | "AMDGPU Kernel Attributes" , false, false) |
| 492 | INITIALIZE_PASS_END(AMDGPULowerKernelAttributes, DEBUG_TYPE, |
| 493 | "AMDGPU Kernel Attributes" , false, false) |
| 494 | |
| 495 | char AMDGPULowerKernelAttributes::ID = 0; |
| 496 | |
| 497 | ModulePass *llvm::createAMDGPULowerKernelAttributesPass() { |
| 498 | return new AMDGPULowerKernelAttributes(); |
| 499 | } |
| 500 | |
| 501 | PreservedAnalyses |
| 502 | AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 503 | bool IsV5OrAbove = |
| 504 | AMDGPU::getAMDHSACodeObjectVersion(M: *F.getParent()) >= AMDGPU::AMDHSA_COV5; |
| 505 | Function *BasePtr = getBasePtrIntrinsic(M&: *F.getParent(), IsV5OrAbove); |
| 506 | |
| 507 | if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. |
| 508 | return PreservedAnalyses::all(); |
| 509 | |
| 510 | bool Changed = false; |
| 511 | for (Instruction &I : instructions(F)) { |
| 512 | if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) { |
| 513 | if (CI->getCalledFunction() == BasePtr) |
| 514 | Changed |= processUse(CI, IsV5OrAbove); |
| 515 | } |
| 516 | } |
| 517 | |
| 518 | return !Changed ? PreservedAnalyses::all() |
| 519 | : PreservedAnalyses::none().preserveSet<CFGAnalyses>(); |
| 520 | } |
| 521 | |