1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUTargetMachine.h"
15#include "GCNSubtarget.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "llvm/IR/IntrinsicsAMDGPU.h"
18#include "llvm/IR/IntrinsicsR600.h"
19#include "llvm/Target/TargetMachine.h"
20#include "llvm/Transforms/IPO/Attributor.h"
21
22#define DEBUG_TYPE "amdgpu-attributor"
23
24using namespace llvm;
25
26static cl::opt<unsigned> IndirectCallSpecializationThreshold(
27 "amdgpu-indirect-call-specialization-threshold",
28 cl::desc(
29 "A threshold controls whether an indirect call will be specialized"),
30 cl::init(Val: 3));
31
32#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
33
34enum ImplicitArgumentPositions {
35#include "AMDGPUAttributes.def"
36 LAST_ARG_POS
37};
38
39#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
40
41enum ImplicitArgumentMask {
42 UNKNOWN_INTRINSIC = 0,
43#include "AMDGPUAttributes.def"
44 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
45 NOT_IMPLICIT_INPUT
46};
47
48#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
49static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
50 ImplicitAttrs[] = {
51#include "AMDGPUAttributes.def"
52};
53
54// We do not need to note the x workitem or workgroup id because they are always
55// initialized.
56//
57// TODO: We should not add the attributes if the known compile time workgroup
58// size is 1 for y/z.
59static ImplicitArgumentMask
60intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
61 bool HasApertureRegs, bool SupportsGetDoorBellID,
62 unsigned CodeObjectVersion) {
63 switch (ID) {
64 case Intrinsic::amdgcn_workitem_id_x:
65 NonKernelOnly = true;
66 return WORKITEM_ID_X;
67 case Intrinsic::amdgcn_workgroup_id_x:
68 NonKernelOnly = true;
69 return WORKGROUP_ID_X;
70 case Intrinsic::amdgcn_workitem_id_y:
71 case Intrinsic::r600_read_tidig_y:
72 return WORKITEM_ID_Y;
73 case Intrinsic::amdgcn_workitem_id_z:
74 case Intrinsic::r600_read_tidig_z:
75 return WORKITEM_ID_Z;
76 case Intrinsic::amdgcn_workgroup_id_y:
77 case Intrinsic::r600_read_tgid_y:
78 return WORKGROUP_ID_Y;
79 case Intrinsic::amdgcn_workgroup_id_z:
80 case Intrinsic::r600_read_tgid_z:
81 return WORKGROUP_ID_Z;
82 case Intrinsic::amdgcn_cluster_id_x:
83 NonKernelOnly = true;
84 return CLUSTER_ID_X;
85 case Intrinsic::amdgcn_cluster_id_y:
86 return CLUSTER_ID_Y;
87 case Intrinsic::amdgcn_cluster_id_z:
88 return CLUSTER_ID_Z;
89 case Intrinsic::amdgcn_lds_kernel_id:
90 return LDS_KERNEL_ID;
91 case Intrinsic::amdgcn_dispatch_ptr:
92 return DISPATCH_PTR;
93 case Intrinsic::amdgcn_dispatch_id:
94 return DISPATCH_ID;
95 case Intrinsic::amdgcn_implicitarg_ptr:
96 return IMPLICIT_ARG_PTR;
97 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
98 // queue_ptr.
99 case Intrinsic::amdgcn_queue_ptr:
100 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
101 return QUEUE_PTR;
102 case Intrinsic::amdgcn_is_shared:
103 case Intrinsic::amdgcn_is_private:
104 if (HasApertureRegs)
105 return NOT_IMPLICIT_INPUT;
106 // Under V5, we need implicitarg_ptr + offsets to access private_base or
107 // shared_base. For pre-V5, however, need to access them through queue_ptr +
108 // offsets.
109 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
110 : QUEUE_PTR;
111 case Intrinsic::trap:
112 case Intrinsic::debugtrap:
113 case Intrinsic::ubsantrap:
114 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
115 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
116 : QUEUE_PTR;
117 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
118 return QUEUE_PTR;
119 default:
120 return UNKNOWN_INTRINSIC;
121 }
122}
123
124static bool castRequiresQueuePtr(unsigned SrcAS) {
125 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
126}
127
128static bool isDSAddress(const Constant *C) {
129 const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C);
130 if (!GV)
131 return false;
132 unsigned AS = GV->getAddressSpace();
133 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
134}
135
136/// Returns true if sanitizer attributes are present on a function.
137static bool hasSanitizerAttributes(const Function &F) {
138 return F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
139 F.hasFnAttribute(Kind: Attribute::SanitizeThread) ||
140 F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
141 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) ||
142 F.hasFnAttribute(Kind: Attribute::SanitizeMemTag);
143}
144
145namespace {
146class AMDGPUInformationCache : public InformationCache {
147public:
148 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
149 BumpPtrAllocator &Allocator,
150 SetVector<Function *> *CGSCC, TargetMachine &TM)
151 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
152 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
153
154 TargetMachine &TM;
155
156 enum ConstantStatus : uint8_t {
157 NONE = 0,
158 DS_GLOBAL = 1 << 0,
159 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
160 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
161 ADDR_SPACE_CAST_BOTH_TO_FLAT =
162 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
163 };
164
165 /// Check if the subtarget has aperture regs.
166 bool hasApertureRegs(Function &F) {
167 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
168 return ST.hasApertureRegs();
169 }
170
171 /// Check if the subtarget supports GetDoorbellID.
172 bool supportsGetDoorbellID(Function &F) {
173 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
174 return ST.supportsGetDoorbellID();
175 }
176
177 std::optional<std::pair<unsigned, unsigned>>
178 getFlatWorkGroupSizeAttr(const Function &F) const {
179 auto R = AMDGPU::getIntegerPairAttribute(F, Name: "amdgpu-flat-work-group-size");
180 if (!R)
181 return std::nullopt;
182 return std::make_pair(x&: R->first, y&: *(R->second));
183 }
184
185 std::pair<unsigned, unsigned>
186 getDefaultFlatWorkGroupSize(const Function &F) const {
187 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
188 return ST.getDefaultFlatWorkGroupSize(CC: F.getCallingConv());
189 }
190
191 std::pair<unsigned, unsigned>
192 getMaximumFlatWorkGroupRange(const Function &F) {
193 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
194 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
195 }
196
197 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
198 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
199 return ST.getMaxNumWorkGroups(F);
200 }
201
202 /// Get code object version.
203 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
204
205 std::optional<std::pair<unsigned, unsigned>>
206 getWavesPerEUAttr(const Function &F) {
207 auto Val = AMDGPU::getIntegerPairAttribute(F, Name: "amdgpu-waves-per-eu",
208 /*OnlyFirstRequired=*/true);
209 if (!Val)
210 return std::nullopt;
211 if (!Val->second) {
212 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
213 Val->second = ST.getMaxWavesPerEU();
214 }
215 return std::make_pair(x&: Val->first, y&: *(Val->second));
216 }
217
218 unsigned getMaxWavesPerEU(const Function &F) {
219 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
220 return ST.getMaxWavesPerEU();
221 }
222
223 unsigned getMaxAddrSpace() const override {
224 return AMDGPUAS::MAX_AMDGPU_ADDRESS;
225 }
226
227private:
228 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
229 /// local to flat. These casts may require the queue pointer.
230 static uint8_t visitConstExpr(const ConstantExpr *CE) {
231 uint8_t Status = NONE;
232
233 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
234 unsigned SrcAS = CE->getOperand(i_nocapture: 0)->getType()->getPointerAddressSpace();
235 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
236 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
237 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
238 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
239 }
240
241 return Status;
242 }
243
244 /// Get the constant access bitmap for \p C.
245 uint8_t getConstantAccess(const Constant *C,
246 SmallPtrSetImpl<const Constant *> &Visited) {
247 auto It = ConstantStatus.find(Val: C);
248 if (It != ConstantStatus.end())
249 return It->second;
250
251 uint8_t Result = 0;
252 if (isDSAddress(C))
253 Result = DS_GLOBAL;
254
255 if (const auto *CE = dyn_cast<ConstantExpr>(Val: C))
256 Result |= visitConstExpr(CE);
257
258 for (const Use &U : C->operands()) {
259 const auto *OpC = dyn_cast<Constant>(Val: U);
260 if (!OpC || !Visited.insert(Ptr: OpC).second)
261 continue;
262
263 Result |= getConstantAccess(C: OpC, Visited);
264 }
265 return Result;
266 }
267
268public:
269 /// Returns true if \p Fn needs the queue pointer because of \p C.
270 bool needsQueuePtr(const Constant *C, Function &Fn) {
271 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: Fn.getCallingConv());
272 bool HasAperture = hasApertureRegs(F&: Fn);
273
274 // No need to explore the constants.
275 if (!IsNonEntryFunc && HasAperture)
276 return false;
277
278 SmallPtrSet<const Constant *, 8> Visited;
279 uint8_t Access = getConstantAccess(C, Visited);
280
281 // We need to trap on DS globals in non-entry functions.
282 if (IsNonEntryFunc && (Access & DS_GLOBAL))
283 return true;
284
285 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
286 }
287
288 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
289 SmallPtrSet<const Constant *, 8> Visited;
290 uint8_t Access = getConstantAccess(C, Visited);
291 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
292 }
293
294private:
295 /// Used to determine if the Constant needs the queue pointer.
296 DenseMap<const Constant *, uint8_t> ConstantStatus;
297 const unsigned CodeObjectVersion;
298};
299
300struct AAAMDAttributes
301 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
302 AbstractAttribute> {
303 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
304 AbstractAttribute>;
305
306 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
307
308 /// Create an abstract attribute view for the position \p IRP.
309 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
310 Attributor &A);
311
312 /// See AbstractAttribute::getName().
313 StringRef getName() const override { return "AAAMDAttributes"; }
314
315 /// See AbstractAttribute::getIdAddr().
316 const char *getIdAddr() const override { return &ID; }
317
318 /// This function should return true if the type of the \p AA is
319 /// AAAMDAttributes.
320 static bool classof(const AbstractAttribute *AA) {
321 return (AA->getIdAddr() == &ID);
322 }
323
324 /// Unique ID (due to the unique address)
325 static const char ID;
326};
327const char AAAMDAttributes::ID = 0;
328
329struct AAUniformWorkGroupSize
330 : public StateWrapper<BooleanState, AbstractAttribute> {
331 using Base = StateWrapper<BooleanState, AbstractAttribute>;
332 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
333
334 /// Create an abstract attribute view for the position \p IRP.
335 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
336 Attributor &A);
337
338 /// See AbstractAttribute::getName().
339 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
340
341 /// See AbstractAttribute::getIdAddr().
342 const char *getIdAddr() const override { return &ID; }
343
344 /// This function should return true if the type of the \p AA is
345 /// AAAMDAttributes.
346 static bool classof(const AbstractAttribute *AA) {
347 return (AA->getIdAddr() == &ID);
348 }
349
350 /// Unique ID (due to the unique address)
351 static const char ID;
352};
353const char AAUniformWorkGroupSize::ID = 0;
354
355struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
356 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
357 : AAUniformWorkGroupSize(IRP, A) {}
358
359 void initialize(Attributor &A) override {
360 Function *F = getAssociatedFunction();
361 CallingConv::ID CC = F->getCallingConv();
362
363 if (CC != CallingConv::AMDGPU_KERNEL)
364 return;
365
366 bool InitialValue = F->hasFnAttribute(Kind: "uniform-work-group-size");
367
368 if (InitialValue)
369 indicateOptimisticFixpoint();
370 else
371 indicatePessimisticFixpoint();
372 }
373
374 ChangeStatus updateImpl(Attributor &A) override {
375 ChangeStatus Change = ChangeStatus::UNCHANGED;
376
377 auto CheckCallSite = [&](AbstractCallSite CS) {
378 Function *Caller = CS.getInstruction()->getFunction();
379 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
380 << "->" << getAssociatedFunction()->getName() << "\n");
381
382 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
383 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
384 if (!CallerInfo || !CallerInfo->isValidState())
385 return false;
386
387 Change = Change | clampStateAndIndicateChange(S&: this->getState(),
388 R: CallerInfo->getState());
389
390 return true;
391 };
392
393 bool AllCallSitesKnown = true;
394 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this, RequireAllCallSites: true, UsedAssumedInformation&: AllCallSitesKnown))
395 return indicatePessimisticFixpoint();
396
397 return Change;
398 }
399
400 ChangeStatus manifest(Attributor &A) override {
401 if (!getAssumed())
402 return ChangeStatus::UNCHANGED;
403
404 LLVMContext &Ctx = getAssociatedFunction()->getContext();
405 return A.manifestAttrs(IRP: getIRPosition(),
406 DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: "uniform-work-group-size")},
407 /*ForceReplace=*/true);
408 }
409
410 bool isValidState() const override {
411 // This state is always valid, even when the state is false.
412 return true;
413 }
414
415 const std::string getAsStr(Attributor *) const override {
416 return "AMDWorkGroupSize[" + std::to_string(val: getAssumed()) + "]";
417 }
418
419 /// See AbstractAttribute::trackStatistics()
420 void trackStatistics() const override {}
421};
422
423AAUniformWorkGroupSize &
424AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
425 Attributor &A) {
426 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
427 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
428 llvm_unreachable(
429 "AAUniformWorkGroupSize is only valid for function position");
430}
431
432struct AAAMDAttributesFunction : public AAAMDAttributes {
433 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
434 : AAAMDAttributes(IRP, A) {}
435
436 void initialize(Attributor &A) override {
437 Function *F = getAssociatedFunction();
438
439 // If the function requires the implicit arg pointer due to sanitizers,
440 // assume it's needed even if explicitly marked as not requiring it.
441 // Flat scratch initialization is needed because `asan_malloc_impl`
442 // calls introduced later in pipeline will have flat scratch accesses.
443 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
444 // implementation for `asan_malloc_impl` is updated.
445 const bool HasSanitizerAttrs = hasSanitizerAttributes(F: *F);
446 if (HasSanitizerAttrs) {
447 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
448 removeAssumedBits(BitsEncoding: HOSTCALL_PTR);
449 removeAssumedBits(BitsEncoding: FLAT_SCRATCH_INIT);
450 }
451
452 for (auto Attr : ImplicitAttrs) {
453 if (HasSanitizerAttrs &&
454 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
455 Attr.first == FLAT_SCRATCH_INIT))
456 continue;
457
458 if (F->hasFnAttribute(Kind: Attr.second))
459 addKnownBits(Bits: Attr.first);
460 }
461
462 if (F->isDeclaration())
463 return;
464
465 // Ignore functions with graphics calling conventions, these are currently
466 // not allowed to have kernel arguments.
467 if (AMDGPU::isGraphics(CC: F->getCallingConv())) {
468 indicatePessimisticFixpoint();
469 return;
470 }
471 }
472
473 ChangeStatus updateImpl(Attributor &A) override {
474 Function *F = getAssociatedFunction();
475 // The current assumed state used to determine a change.
476 auto OrigAssumed = getAssumed();
477
478 // Check for Intrinsics and propagate attributes.
479 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
480 QueryingAA: *this, IRP: this->getIRPosition(), DepClass: DepClassTy::REQUIRED);
481 if (!AAEdges || !AAEdges->isValidState() ||
482 AAEdges->hasNonAsmUnknownCallee())
483 return indicatePessimisticFixpoint();
484
485 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: F->getCallingConv());
486
487 bool NeedsImplicit = false;
488 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
489 bool HasApertureRegs = InfoCache.hasApertureRegs(F&: *F);
490 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(F&: *F);
491 unsigned COV = InfoCache.getCodeObjectVersion();
492
493 for (Function *Callee : AAEdges->getOptimisticEdges()) {
494 Intrinsic::ID IID = Callee->getIntrinsicID();
495 if (IID == Intrinsic::not_intrinsic) {
496 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
497 QueryingAA: *this, IRP: IRPosition::function(F: *Callee), DepClass: DepClassTy::REQUIRED);
498 if (!AAAMD || !AAAMD->isValidState())
499 return indicatePessimisticFixpoint();
500 *this &= *AAAMD;
501 continue;
502 }
503
504 bool NonKernelOnly = false;
505 ImplicitArgumentMask AttrMask =
506 intrinsicToAttrMask(ID: IID, NonKernelOnly, NeedsImplicit,
507 HasApertureRegs, SupportsGetDoorBellID: SupportsGetDoorbellID, CodeObjectVersion: COV);
508
509 if (AttrMask == UNKNOWN_INTRINSIC) {
510 // Assume not-nocallback intrinsics may invoke a function which accesses
511 // implicit arguments.
512 //
513 // FIXME: This isn't really the correct check. We want to ensure it
514 // isn't calling any function that may use implicit arguments regardless
515 // of whether it's internal to the module or not.
516 //
517 // TODO: Ignoring callsite attributes.
518 if (!Callee->hasFnAttribute(Kind: Attribute::NoCallback))
519 return indicatePessimisticFixpoint();
520 continue;
521 }
522
523 if (AttrMask != NOT_IMPLICIT_INPUT) {
524 if ((IsNonEntryFunc || !NonKernelOnly))
525 removeAssumedBits(BitsEncoding: AttrMask);
526 }
527 }
528
529 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
530 if (NeedsImplicit)
531 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
532
533 if (isAssumed(BitsEncoding: QUEUE_PTR) && checkForQueuePtr(A)) {
534 // Under V5, we need implicitarg_ptr + offsets to access private_base or
535 // shared_base. We do not actually need queue_ptr.
536 if (COV >= 5)
537 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
538 else
539 removeAssumedBits(BitsEncoding: QUEUE_PTR);
540 }
541
542 if (funcRetrievesMultigridSyncArg(A, COV)) {
543 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
544 "multigrid_sync_arg needs implicitarg_ptr");
545 removeAssumedBits(BitsEncoding: MULTIGRID_SYNC_ARG);
546 }
547
548 if (funcRetrievesHostcallPtr(A, COV)) {
549 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
550 removeAssumedBits(BitsEncoding: HOSTCALL_PTR);
551 }
552
553 if (funcRetrievesHeapPtr(A, COV)) {
554 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
555 removeAssumedBits(BitsEncoding: HEAP_PTR);
556 }
557
558 if (isAssumed(BitsEncoding: QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
559 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
560 removeAssumedBits(BitsEncoding: QUEUE_PTR);
561 }
562
563 if (isAssumed(BitsEncoding: LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
564 removeAssumedBits(BitsEncoding: LDS_KERNEL_ID);
565 }
566
567 if (isAssumed(BitsEncoding: DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
568 removeAssumedBits(BitsEncoding: DEFAULT_QUEUE);
569
570 if (isAssumed(BitsEncoding: COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
571 removeAssumedBits(BitsEncoding: COMPLETION_ACTION);
572
573 if (isAssumed(BitsEncoding: FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
574 removeAssumedBits(BitsEncoding: FLAT_SCRATCH_INIT);
575
576 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
577 : ChangeStatus::UNCHANGED;
578 }
579
580 ChangeStatus manifest(Attributor &A) override {
581 SmallVector<Attribute, 8> AttrList;
582 LLVMContext &Ctx = getAssociatedFunction()->getContext();
583
584 for (auto Attr : ImplicitAttrs) {
585 if (isKnown(BitsEncoding: Attr.first))
586 AttrList.push_back(Elt: Attribute::get(Context&: Ctx, Kind: Attr.second));
587 }
588
589 return A.manifestAttrs(IRP: getIRPosition(), DeducedAttrs: AttrList,
590 /* ForceReplace */ true);
591 }
592
593 const std::string getAsStr(Attributor *) const override {
594 std::string Str;
595 raw_string_ostream OS(Str);
596 OS << "AMDInfo[";
597 for (auto Attr : ImplicitAttrs)
598 if (isAssumed(BitsEncoding: Attr.first))
599 OS << ' ' << Attr.second;
600 OS << " ]";
601 return OS.str();
602 }
603
604 /// See AbstractAttribute::trackStatistics()
605 void trackStatistics() const override {}
606
607private:
608 bool checkForQueuePtr(Attributor &A) {
609 Function *F = getAssociatedFunction();
610 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: F->getCallingConv());
611
612 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
613
614 bool NeedsQueuePtr = false;
615
616 auto CheckAddrSpaceCasts = [&](Instruction &I) {
617 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
618 if (castRequiresQueuePtr(SrcAS)) {
619 NeedsQueuePtr = true;
620 return false;
621 }
622 return true;
623 };
624
625 bool HasApertureRegs = InfoCache.hasApertureRegs(F&: *F);
626
627 // `checkForAllInstructions` is much more cheaper than going through all
628 // instructions, try it first.
629
630 // The queue pointer is not needed if aperture regs is present.
631 if (!HasApertureRegs) {
632 bool UsedAssumedInformation = false;
633 A.checkForAllInstructions(Pred: CheckAddrSpaceCasts, QueryingAA: *this,
634 Opcodes: {Instruction::AddrSpaceCast},
635 UsedAssumedInformation);
636 }
637
638 // If we found that we need the queue pointer, nothing else to do.
639 if (NeedsQueuePtr)
640 return true;
641
642 if (!IsNonEntryFunc && HasApertureRegs)
643 return false;
644
645 for (BasicBlock &BB : *F) {
646 for (Instruction &I : BB) {
647 for (const Use &U : I.operands()) {
648 if (const auto *C = dyn_cast<Constant>(Val: U)) {
649 if (InfoCache.needsQueuePtr(C, Fn&: *F))
650 return true;
651 }
652 }
653 }
654 }
655
656 return false;
657 }
658
659 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
660 auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
661 AA::RangeTy Range(Pos, 8);
662 return funcRetrievesImplicitKernelArg(A, Range);
663 }
664
665 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
666 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
667 AA::RangeTy Range(Pos, 8);
668 return funcRetrievesImplicitKernelArg(A, Range);
669 }
670
671 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
672 auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
673 AA::RangeTy Range(Pos, 8);
674 return funcRetrievesImplicitKernelArg(A, Range);
675 }
676
677 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
678 auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
679 AA::RangeTy Range(Pos, 8);
680 return funcRetrievesImplicitKernelArg(A, Range);
681 }
682
683 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
684 if (COV < 5)
685 return false;
686 AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
687 return funcRetrievesImplicitKernelArg(A, Range);
688 }
689
690 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
691 if (COV < 5)
692 return false;
693 AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
694 return funcRetrievesImplicitKernelArg(A, Range);
695 }
696
697 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
698 // Check if this is a call to the implicitarg_ptr builtin and it
699 // is used to retrieve the hostcall pointer. The implicit arg for
700 // hostcall is not used only if every use of the implicitarg_ptr
701 // is a load that clearly does not retrieve any byte of the
702 // hostcall pointer. We check this by tracing all the uses of the
703 // initial call to the implicitarg_ptr intrinsic.
704 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
705 auto &Call = cast<CallBase>(Val&: I);
706 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
707 return true;
708
709 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
710 QueryingAA: *this, IRP: IRPosition::callsite_returned(CB: Call), DepClass: DepClassTy::REQUIRED);
711 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
712 return false;
713
714 return PointerInfoAA->forallInterferingAccesses(
715 Range, CB: [](const AAPointerInfo::Access &Acc, bool IsExact) {
716 return Acc.getRemoteInst()->isDroppable();
717 });
718 };
719
720 bool UsedAssumedInformation = false;
721 return !A.checkForAllCallLikeInstructions(Pred: DoesNotLeadToKernelArgLoc, QueryingAA: *this,
722 UsedAssumedInformation);
723 }
724
725 bool funcRetrievesLDSKernelId(Attributor &A) {
726 auto DoesNotRetrieve = [&](Instruction &I) {
727 auto &Call = cast<CallBase>(Val&: I);
728 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
729 };
730 bool UsedAssumedInformation = false;
731 return !A.checkForAllCallLikeInstructions(Pred: DoesNotRetrieve, QueryingAA: *this,
732 UsedAssumedInformation);
733 }
734
735 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
736 // not to be set.
737 bool needFlatScratchInit(Attributor &A) {
738 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
739
740 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
741 // there is a cast from PRIVATE_ADDRESS.
742 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
743 return cast<AddrSpaceCastInst>(Val&: I).getSrcAddressSpace() !=
744 AMDGPUAS::PRIVATE_ADDRESS;
745 };
746
747 bool UsedAssumedInformation = false;
748 if (!A.checkForAllInstructions(Pred: AddrSpaceCastNotFromPrivate, QueryingAA: *this,
749 Opcodes: {Instruction::AddrSpaceCast},
750 UsedAssumedInformation))
751 return true;
752
753 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
754 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
755
756 Function *F = getAssociatedFunction();
757 for (Instruction &I : instructions(F)) {
758 for (const Use &U : I.operands()) {
759 if (const auto *C = dyn_cast<Constant>(Val: U)) {
760 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
761 return true;
762 }
763 }
764 }
765
766 // Finally check callees.
767
768 // This is called on each callee; false means callee shouldn't have
769 // no-flat-scratch-init.
770 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
771 const auto &CB = cast<CallBase>(Val&: I);
772 const Function *Callee = CB.getCalledFunction();
773
774 // Callee == 0 for inline asm or indirect call with known callees.
775 // In the latter case, updateImpl() already checked the callees and we
776 // know their FLAT_SCRATCH_INIT bit is set.
777 // If function has indirect call with unknown callees, the bit is
778 // already removed in updateImpl() and execution won't reach here.
779 if (!Callee)
780 return true;
781
782 return Callee->getIntrinsicID() !=
783 Intrinsic::amdgcn_addrspacecast_nonnull;
784 };
785
786 UsedAssumedInformation = false;
787 // If any callee is false (i.e. need FlatScratchInit),
788 // checkForAllCallLikeInstructions returns false, in which case this
789 // function returns true.
790 return !A.checkForAllCallLikeInstructions(Pred: CheckForNoFlatScratchInit, QueryingAA: *this,
791 UsedAssumedInformation);
792 }
793};
794
795AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
796 Attributor &A) {
797 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
798 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
799 llvm_unreachable("AAAMDAttributes is only valid for function position");
800}
801
802/// Base class to derive different size ranges.
803struct AAAMDSizeRangeAttribute
804 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
805 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
806
807 StringRef AttrName;
808
809 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
810 StringRef AttrName)
811 : Base(IRP, 32), AttrName(AttrName) {}
812
813 /// See AbstractAttribute::trackStatistics()
814 void trackStatistics() const override {}
815
816 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
817 ChangeStatus Change = ChangeStatus::UNCHANGED;
818
819 auto CheckCallSite = [&](AbstractCallSite CS) {
820 Function *Caller = CS.getInstruction()->getFunction();
821 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
822 << "->" << getAssociatedFunction()->getName() << '\n');
823
824 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
825 *this, IRPosition::function(F: *Caller), DepClassTy::REQUIRED);
826 if (!CallerInfo || !CallerInfo->isValidState())
827 return false;
828
829 Change |=
830 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
831
832 return true;
833 };
834
835 bool AllCallSitesKnown = true;
836 if (!A.checkForAllCallSites(CheckCallSite, *this,
837 /*RequireAllCallSites=*/true,
838 AllCallSitesKnown))
839 return indicatePessimisticFixpoint();
840
841 return Change;
842 }
843
844 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
845 /// attribute if it is not same as default.
846 ChangeStatus
847 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
848 std::pair<unsigned, unsigned> Default) {
849 auto [Min, Max] = Default;
850 unsigned Lower = getAssumed().getLower().getZExtValue();
851 unsigned Upper = getAssumed().getUpper().getZExtValue();
852
853 // Clamp the range to the default value.
854 if (Lower < Min)
855 Lower = Min;
856 if (Upper > Max + 1)
857 Upper = Max + 1;
858
859 // No manifest if the value is invalid or same as default after clamp.
860 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
861 return ChangeStatus::UNCHANGED;
862
863 Function *F = getAssociatedFunction();
864 LLVMContext &Ctx = F->getContext();
865 SmallString<10> Buffer;
866 raw_svector_ostream OS(Buffer);
867 OS << Lower << ',' << Upper - 1;
868 return A.manifestAttrs(IRP: getIRPosition(),
869 DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: AttrName, Val: OS.str())},
870 /*ForceReplace=*/true);
871 }
872
873 const std::string getAsStr(Attributor *) const override {
874 std::string Str;
875 raw_string_ostream OS(Str);
876 OS << getName() << '[';
877 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
878 OS << ']';
879 return OS.str();
880 }
881};
882
883/// Propagate amdgpu-flat-work-group-size attribute.
884struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
885 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
886 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
887
888 void initialize(Attributor &A) override {
889 Function *F = getAssociatedFunction();
890 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
891
892 bool HasAttr = false;
893 auto Range = InfoCache.getDefaultFlatWorkGroupSize(F: *F);
894 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(F: *F);
895
896 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(F: *F)) {
897 // We only consider an attribute that is not max range because the front
898 // end always emits the attribute, unfortunately, and sometimes it emits
899 // the max range.
900 if (*Attr != MaxRange) {
901 Range = *Attr;
902 HasAttr = true;
903 }
904 }
905
906 // We don't want to directly clamp the state if it's the max range because
907 // that is basically the worst state.
908 if (Range == MaxRange)
909 return;
910
911 auto [Min, Max] = Range;
912 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
913 IntegerRangeState IRS(CR);
914 clampStateAndIndicateChange(S&: this->getState(), R: IRS);
915
916 if (HasAttr || AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
917 indicateOptimisticFixpoint();
918 }
919
920 ChangeStatus updateImpl(Attributor &A) override {
921 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
922 }
923
924 /// Create an abstract attribute view for the position \p IRP.
925 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
926 Attributor &A);
927
928 ChangeStatus manifest(Attributor &A) override {
929 Function *F = getAssociatedFunction();
930 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
931 return emitAttributeIfNotDefaultAfterClamp(
932 A, Default: InfoCache.getMaximumFlatWorkGroupRange(F: *F));
933 }
934
935 /// See AbstractAttribute::getName()
936 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
937
938 /// See AbstractAttribute::getIdAddr()
939 const char *getIdAddr() const override { return &ID; }
940
941 /// This function should return true if the type of the \p AA is
942 /// AAAMDFlatWorkGroupSize
943 static bool classof(const AbstractAttribute *AA) {
944 return (AA->getIdAddr() == &ID);
945 }
946
947 /// Unique ID (due to the unique address)
948 static const char ID;
949};
950
951const char AAAMDFlatWorkGroupSize::ID = 0;
952
953AAAMDFlatWorkGroupSize &
954AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
955 Attributor &A) {
956 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
957 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
958 llvm_unreachable(
959 "AAAMDFlatWorkGroupSize is only valid for function position");
960}
961
962struct TupleDecIntegerRangeState : public AbstractState {
963 DecIntegerState<uint32_t> X, Y, Z;
964
965 bool isValidState() const override {
966 return X.isValidState() && Y.isValidState() && Z.isValidState();
967 }
968
969 bool isAtFixpoint() const override {
970 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
971 }
972
973 ChangeStatus indicateOptimisticFixpoint() override {
974 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
975 Z.indicateOptimisticFixpoint();
976 }
977
978 ChangeStatus indicatePessimisticFixpoint() override {
979 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
980 Z.indicatePessimisticFixpoint();
981 }
982
983 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
984 X ^= Other.X;
985 Y ^= Other.Y;
986 Z ^= Other.Z;
987 return *this;
988 }
989
990 bool operator==(const TupleDecIntegerRangeState &Other) const {
991 return X == Other.X && Y == Other.Y && Z == Other.Z;
992 }
993
994 TupleDecIntegerRangeState &getAssumed() { return *this; }
995 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
996};
997
998using AAAMDMaxNumWorkgroupsState =
999 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1000
1001/// Propagate amdgpu-max-num-workgroups attribute.
1002struct AAAMDMaxNumWorkgroups
1003 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1004 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1005
1006 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1007
1008 void initialize(Attributor &A) override {
1009 Function *F = getAssociatedFunction();
1010 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1011
1012 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(F: *F);
1013
1014 X.takeKnownMinimum(Value: MaxNumWorkgroups[0]);
1015 Y.takeKnownMinimum(Value: MaxNumWorkgroups[1]);
1016 Z.takeKnownMinimum(Value: MaxNumWorkgroups[2]);
1017
1018 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
1019 indicatePessimisticFixpoint();
1020 }
1021
1022 ChangeStatus updateImpl(Attributor &A) override {
1023 ChangeStatus Change = ChangeStatus::UNCHANGED;
1024
1025 auto CheckCallSite = [&](AbstractCallSite CS) {
1026 Function *Caller = CS.getInstruction()->getFunction();
1027 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1028 << "->" << getAssociatedFunction()->getName() << '\n');
1029
1030 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1031 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
1032 if (!CallerInfo || !CallerInfo->isValidState())
1033 return false;
1034
1035 Change |=
1036 clampStateAndIndicateChange(S&: this->getState(), R: CallerInfo->getState());
1037 return true;
1038 };
1039
1040 bool AllCallSitesKnown = true;
1041 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this,
1042 /*RequireAllCallSites=*/true,
1043 UsedAssumedInformation&: AllCallSitesKnown))
1044 return indicatePessimisticFixpoint();
1045
1046 return Change;
1047 }
1048
1049 /// Create an abstract attribute view for the position \p IRP.
1050 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1051 Attributor &A);
1052
1053 ChangeStatus manifest(Attributor &A) override {
1054 Function *F = getAssociatedFunction();
1055 LLVMContext &Ctx = F->getContext();
1056 SmallString<32> Buffer;
1057 raw_svector_ostream OS(Buffer);
1058 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1059
1060 // TODO: Should annotate loads of the group size for this to do anything
1061 // useful.
1062 return A.manifestAttrs(
1063 IRP: getIRPosition(),
1064 DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: "amdgpu-max-num-workgroups", Val: OS.str())},
1065 /* ForceReplace= */ true);
1066 }
1067
1068 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1069
1070 const std::string getAsStr(Attributor *) const override {
1071 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1072 raw_string_ostream OS(Buffer);
1073 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1074 << ']';
1075 return OS.str();
1076 }
1077
1078 const char *getIdAddr() const override { return &ID; }
1079
1080 /// This function should return true if the type of the \p AA is
1081 /// AAAMDMaxNumWorkgroups
1082 static bool classof(const AbstractAttribute *AA) {
1083 return (AA->getIdAddr() == &ID);
1084 }
1085
1086 void trackStatistics() const override {}
1087
1088 /// Unique ID (due to the unique address)
1089 static const char ID;
1090};
1091
1092const char AAAMDMaxNumWorkgroups::ID = 0;
1093
1094AAAMDMaxNumWorkgroups &
1095AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1096 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1097 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1098 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1099}
1100
1101/// Propagate amdgpu-waves-per-eu attribute.
1102struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1103 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1104 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1105
1106 void initialize(Attributor &A) override {
1107 Function *F = getAssociatedFunction();
1108 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1109
1110 // If the attribute exists, we will honor it if it is not the default.
1111 if (auto Attr = InfoCache.getWavesPerEUAttr(F: *F)) {
1112 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1113 1U, InfoCache.getMaxWavesPerEU(F: *F)};
1114 if (*Attr != MaxWavesPerEURange) {
1115 auto [Min, Max] = *Attr;
1116 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1117 IntegerRangeState RangeState(Range);
1118 this->getState() = RangeState;
1119 indicateOptimisticFixpoint();
1120 return;
1121 }
1122 }
1123
1124 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
1125 indicatePessimisticFixpoint();
1126 }
1127
1128 ChangeStatus updateImpl(Attributor &A) override {
1129 ChangeStatus Change = ChangeStatus::UNCHANGED;
1130
1131 auto CheckCallSite = [&](AbstractCallSite CS) {
1132 Function *Caller = CS.getInstruction()->getFunction();
1133 Function *Func = getAssociatedFunction();
1134 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1135 << "->" << Func->getName() << '\n');
1136 (void)Func;
1137
1138 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1139 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
1140 if (!CallerAA || !CallerAA->isValidState())
1141 return false;
1142
1143 ConstantRange Assumed = getAssumed();
1144 unsigned Min = std::max(a: Assumed.getLower().getZExtValue(),
1145 b: CallerAA->getAssumed().getLower().getZExtValue());
1146 unsigned Max = std::max(a: Assumed.getUpper().getZExtValue(),
1147 b: CallerAA->getAssumed().getUpper().getZExtValue());
1148 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1149 IntegerRangeState RangeState(Range);
1150 getState() = RangeState;
1151 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1152 : ChangeStatus::CHANGED;
1153
1154 return true;
1155 };
1156
1157 bool AllCallSitesKnown = true;
1158 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this, RequireAllCallSites: true, UsedAssumedInformation&: AllCallSitesKnown))
1159 return indicatePessimisticFixpoint();
1160
1161 return Change;
1162 }
1163
1164 /// Create an abstract attribute view for the position \p IRP.
1165 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1166 Attributor &A);
1167
1168 ChangeStatus manifest(Attributor &A) override {
1169 Function *F = getAssociatedFunction();
1170 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1171 return emitAttributeIfNotDefaultAfterClamp(
1172 A, Default: {1U, InfoCache.getMaxWavesPerEU(F: *F)});
1173 }
1174
1175 /// See AbstractAttribute::getName()
1176 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1177
1178 /// See AbstractAttribute::getIdAddr()
1179 const char *getIdAddr() const override { return &ID; }
1180
1181 /// This function should return true if the type of the \p AA is
1182 /// AAAMDWavesPerEU
1183 static bool classof(const AbstractAttribute *AA) {
1184 return (AA->getIdAddr() == &ID);
1185 }
1186
1187 /// Unique ID (due to the unique address)
1188 static const char ID;
1189};
1190
1191const char AAAMDWavesPerEU::ID = 0;
1192
1193AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1194 Attributor &A) {
1195 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1196 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1197 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1198}
1199
1200/// Compute the minimum number of AGPRs required to allocate the inline asm.
1201static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1202 const CallBase &Call) {
1203 unsigned ArgNo = 0;
1204 unsigned ResNo = 0;
1205 unsigned AGPRDefCount = 0;
1206 unsigned AGPRUseCount = 0;
1207 unsigned MaxPhysReg = 0;
1208 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1209
1210 // TODO: Overestimates due to not accounting for tied operands
1211 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1212 Type *Ty = nullptr;
1213 switch (CI.Type) {
1214 case InlineAsm::isOutput: {
1215 Ty = Call.getType();
1216 if (auto *STy = dyn_cast<StructType>(Val: Ty))
1217 Ty = STy->getElementType(N: ResNo);
1218 ++ResNo;
1219 break;
1220 }
1221 case InlineAsm::isInput: {
1222 Ty = Call.getArgOperand(i: ArgNo++)->getType();
1223 break;
1224 }
1225 case InlineAsm::isLabel:
1226 continue;
1227 case InlineAsm::isClobber:
1228 // Parse the physical register reference.
1229 break;
1230 }
1231
1232 for (StringRef Code : CI.Codes) {
1233 unsigned RegCount = 0;
1234 if (Code.starts_with(Prefix: "a")) {
1235 // Virtual register, compute number of registers based on the type.
1236 //
1237 // We ought to be going through TargetLowering to get the number of
1238 // registers, but we should avoid the dependence on CodeGen here.
1239 RegCount = divideCeil(Numerator: DL.getTypeSizeInBits(Ty), Denominator: 32);
1240 } else {
1241 // Physical register reference
1242 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Constraint: Code);
1243 if (Kind == 'a') {
1244 RegCount = NumRegs;
1245 MaxPhysReg = std::max(a: MaxPhysReg, b: std::min(a: RegIdx + NumRegs, b: 256u));
1246 }
1247
1248 continue;
1249 }
1250
1251 if (CI.Type == InlineAsm::isOutput) {
1252 // Apply tuple alignment requirement
1253 //
1254 // TODO: This is more conservative than necessary.
1255 AGPRDefCount = alignTo(Value: AGPRDefCount, Align: RegCount);
1256
1257 AGPRDefCount += RegCount;
1258 if (CI.isEarlyClobber) {
1259 AGPRUseCount = alignTo(Value: AGPRUseCount, Align: RegCount);
1260 AGPRUseCount += RegCount;
1261 }
1262 } else {
1263 AGPRUseCount = alignTo(Value: AGPRUseCount, Align: RegCount);
1264 AGPRUseCount += RegCount;
1265 }
1266 }
1267 }
1268
1269 unsigned MaxVirtReg = std::max(a: AGPRUseCount, b: AGPRDefCount);
1270
1271 // TODO: This is overly conservative. If there are any physical registers,
1272 // allocate any virtual registers after them so we don't have to solve optimal
1273 // packing.
1274 return std::min(a: MaxVirtReg + MaxPhysReg, b: 256u);
1275}
1276
1277struct AAAMDGPUMinAGPRAlloc
1278 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1279 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1280 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1281
1282 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1283 Attributor &A) {
1284 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1285 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1286 llvm_unreachable(
1287 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1288 }
1289
1290 void initialize(Attributor &A) override {
1291 Function *F = getAssociatedFunction();
1292 auto [MinNumAGPR, MaxNumAGPR] =
1293 AMDGPU::getIntegerPairAttribute(F: *F, Name: "amdgpu-agpr-alloc", Default: {~0u, ~0u},
1294 /*OnlyFirstRequired=*/true);
1295 if (MinNumAGPR == 0)
1296 indicateOptimisticFixpoint();
1297 }
1298
1299 const std::string getAsStr(Attributor *A) const override {
1300 std::string Str = "amdgpu-agpr-alloc=";
1301 raw_string_ostream OS(Str);
1302 OS << getAssumed();
1303 return OS.str();
1304 }
1305
1306 void trackStatistics() const override {}
1307
1308 ChangeStatus updateImpl(Attributor &A) override {
1309 DecIntegerState<> Maximum;
1310
1311 // Check for cases which require allocation of AGPRs. The only cases where
1312 // AGPRs are required are if there are direct references to AGPRs, so inline
1313 // assembly and special intrinsics.
1314 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1315 const auto &CB = cast<CallBase>(Val&: I);
1316 const Value *CalleeOp = CB.getCalledOperand();
1317
1318 if (const InlineAsm *IA = dyn_cast<InlineAsm>(Val: CalleeOp)) {
1319 // Technically, the inline asm could be invoking a call to an unknown
1320 // external function that requires AGPRs, but ignore that.
1321 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, Call: CB);
1322 Maximum.takeAssumedMaximum(Value: NumRegs);
1323 return true;
1324 }
1325 switch (CB.getIntrinsicID()) {
1326 case Intrinsic::not_intrinsic:
1327 break;
1328 case Intrinsic::write_register:
1329 case Intrinsic::read_register:
1330 case Intrinsic::read_volatile_register: {
1331 const MDString *RegName = cast<MDString>(
1332 Val: cast<MDNode>(
1333 Val: cast<MetadataAsValue>(Val: CB.getArgOperand(i: 0))->getMetadata())
1334 ->getOperand(I: 0));
1335 auto [Kind, RegIdx, NumRegs] =
1336 AMDGPU::parseAsmPhysRegName(TupleString: RegName->getString());
1337 if (Kind == 'a')
1338 Maximum.takeAssumedMaximum(Value: std::min(a: RegIdx + NumRegs, b: 256u));
1339
1340 return true;
1341 }
1342 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1343 // the nocallback attribute, so the AMDGPU attributor can conservatively
1344 // drop all implicitly-known inputs and AGPR allocation information. Make
1345 // sure we still infer that no implicit inputs are required and that the
1346 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1347 // function which requires AGPRs, so we need to check if the called
1348 // function has the "trap-func-name" attribute.
1349 case Intrinsic::trap:
1350 case Intrinsic::debugtrap:
1351 case Intrinsic::ubsantrap:
1352 return CB.hasFnAttr(Kind: Attribute::NoCallback) ||
1353 !CB.hasFnAttr(Kind: "trap-func-name");
1354 default:
1355 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1356 // required to use AGPRs.
1357 // Assume !nocallback intrinsics may call a function which requires
1358 // AGPRs.
1359 return CB.hasFnAttr(Kind: Attribute::NoCallback);
1360 }
1361
1362 // TODO: Handle callsite attributes
1363 auto *CBEdges = A.getAAFor<AACallEdges>(
1364 QueryingAA: *this, IRP: IRPosition::callsite_function(CB), DepClass: DepClassTy::REQUIRED);
1365 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1366 Maximum.indicatePessimisticFixpoint();
1367 return false;
1368 }
1369
1370 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1371 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1372 QueryingAA: *this, IRP: IRPosition::function(F: *PossibleCallee), DepClass: DepClassTy::REQUIRED);
1373 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1374 Maximum.indicatePessimisticFixpoint();
1375 return false;
1376 }
1377
1378 Maximum.takeAssumedMaximum(Value: CalleeInfo->getAssumed());
1379 }
1380
1381 return true;
1382 };
1383
1384 bool UsedAssumedInformation = false;
1385 if (!A.checkForAllCallLikeInstructions(Pred: CheckForMinAGPRAllocs, QueryingAA: *this,
1386 UsedAssumedInformation))
1387 return indicatePessimisticFixpoint();
1388
1389 return clampStateAndIndicateChange(S&: getState(), R: Maximum);
1390 }
1391
1392 ChangeStatus manifest(Attributor &A) override {
1393 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1394 SmallString<4> Buffer;
1395 raw_svector_ostream OS(Buffer);
1396 OS << getAssumed();
1397
1398 return A.manifestAttrs(
1399 IRP: getIRPosition(), DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: "amdgpu-agpr-alloc", Val: OS.str())});
1400 }
1401
1402 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1403 const char *getIdAddr() const override { return &ID; }
1404
1405 /// This function should return true if the type of the \p AA is
1406 /// AAAMDGPUMinAGPRAllocs
1407 static bool classof(const AbstractAttribute *AA) {
1408 return (AA->getIdAddr() == &ID);
1409 }
1410
1411 static const char ID;
1412};
1413
1414const char AAAMDGPUMinAGPRAlloc::ID = 0;
1415
1416/// An abstract attribute to propagate the function attribute
1417/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1418struct AAAMDGPUClusterDims
1419 : public StateWrapper<BooleanState, AbstractAttribute> {
1420 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1421 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1422
1423 /// Create an abstract attribute view for the position \p IRP.
1424 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1425 Attributor &A);
1426
1427 /// See AbstractAttribute::getName().
1428 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1429
1430 /// See AbstractAttribute::getIdAddr().
1431 const char *getIdAddr() const override { return &ID; }
1432
1433 /// This function should return true if the type of the \p AA is
1434 /// AAAMDGPUClusterDims.
1435 static bool classof(const AbstractAttribute *AA) {
1436 return AA->getIdAddr() == &ID;
1437 }
1438
1439 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1440
1441 /// Unique ID (due to the unique address)
1442 static const char ID;
1443};
1444
1445const char AAAMDGPUClusterDims::ID = 0;
1446
1447struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1448 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1449 : AAAMDGPUClusterDims(IRP, A) {}
1450
1451 void initialize(Attributor &A) override {
1452 Function *F = getAssociatedFunction();
1453 assert(F && "empty associated function");
1454
1455 Attr = AMDGPU::ClusterDimsAttr::get(F: *F);
1456
1457 // No matter what a kernel function has, it is final.
1458 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv())) {
1459 if (Attr.isUnknown())
1460 indicatePessimisticFixpoint();
1461 else
1462 indicateOptimisticFixpoint();
1463 }
1464 }
1465
1466 const std::string getAsStr(Attributor *A) const override {
1467 if (!getAssumed() || Attr.isUnknown())
1468 return "unknown";
1469 if (Attr.isNoCluster())
1470 return "no";
1471 if (Attr.isVariableDims())
1472 return "variable";
1473 return Attr.to_string();
1474 }
1475
1476 void trackStatistics() const override {}
1477
1478 ChangeStatus updateImpl(Attributor &A) override {
1479 auto OldState = Attr;
1480
1481 auto CheckCallSite = [&](AbstractCallSite CS) {
1482 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1483 QueryingAA: *this, IRP: IRPosition::function(F: *CS.getInstruction()->getFunction()),
1484 DepClass: DepClassTy::REQUIRED);
1485 if (!CallerAA || !CallerAA->isValidState())
1486 return false;
1487
1488 return merge(Other: CallerAA->getClusterDims());
1489 };
1490
1491 bool UsedAssumedInformation = false;
1492 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this,
1493 /*RequireAllCallSites=*/true,
1494 UsedAssumedInformation))
1495 return indicatePessimisticFixpoint();
1496
1497 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1498 }
1499
1500 ChangeStatus manifest(Attributor &A) override {
1501 if (Attr.isUnknown())
1502 return ChangeStatus::UNCHANGED;
1503 return A.manifestAttrs(
1504 IRP: getIRPosition(),
1505 DeducedAttrs: {Attribute::get(Context&: getAssociatedFunction()->getContext(), Kind: AttrName,
1506 Val: Attr.to_string())},
1507 /*ForceReplace=*/true);
1508 }
1509
1510 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1511 return Attr;
1512 }
1513
1514private:
1515 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1516 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1517 // propagation.
1518 if (Attr.isUnknown() && Other.isUnknown())
1519 return true;
1520
1521 // Case 2: The other is determined, but we are unknown yet, we simply take
1522 // the other's value.
1523 if (Attr.isUnknown()) {
1524 Attr = Other;
1525 return true;
1526 }
1527
1528 // Case 3: We are determined but the other is unknown yet, we simply keep
1529 // everything unchanged.
1530 if (Other.isUnknown())
1531 return true;
1532
1533 // After this point, both are determined.
1534
1535 // Case 4: If they are same, we do nothing.
1536 if (Attr == Other)
1537 return true;
1538
1539 // Now they are not same.
1540
1541 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1542 // would hold), then it is unknown whether cluster will be used, and the
1543 // state is final, unlike case 1.
1544 if (Attr.isNoCluster() || Other.isNoCluster()) {
1545 Attr.setUnknown();
1546 return false;
1547 }
1548
1549 // Case 6: Both of us use cluster, but the dims are different, so the result
1550 // is, cluster is used, but we just don't have a fixed dims.
1551 Attr.setVariableDims();
1552 return true;
1553 }
1554
1555 AMDGPU::ClusterDimsAttr Attr;
1556
1557 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1558};
1559
1560AAAMDGPUClusterDims &
1561AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1562 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1563 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1564 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1565}
1566
1567static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
1568 bool DeleteFns, Module &M, AnalysisGetter &AG,
1569 TargetMachine &TM, AMDGPUAttributorOptions Options,
1570 ThinOrFullLTOPhase LTOPhase) {
1571
1572 CallGraphUpdater CGUpdater;
1573 BumpPtrAllocator Allocator;
1574 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1575 DenseSet<const char *> Allowed(
1576 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1577 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1578 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1579 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1580 &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1581 &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1582 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1583
1584 AttributorConfig AC(CGUpdater);
1585 AC.IsClosedWorldModule = Options.IsClosedWorld;
1586 AC.Allowed = &Allowed;
1587 AC.IsModulePass = IsModulePass;
1588 AC.DeleteFns = DeleteFns;
1589 AC.DefaultInitializeLiveInternals = false;
1590 AC.IndirectCalleeSpecializationCallback =
1591 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1592 Function &Callee, unsigned NumAssumedCallees) {
1593 return !AMDGPU::isEntryFunctionCC(CC: Callee.getCallingConv()) &&
1594 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1595 };
1596 AC.IPOAmendableCB = [](const Function &F) {
1597 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1598 };
1599
1600 Attributor A(Functions, InfoCache, AC);
1601
1602 LLVM_DEBUG({
1603 StringRef LTOPhaseStr = to_string(LTOPhase);
1604 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1605 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1606 << (AC.IsClosedWorldModule ? "" : "not ")
1607 << "assumed to be a closed world.\n";
1608 });
1609
1610 for (auto *F : Functions) {
1611 A.getOrCreateAAFor<AAAMDAttributes>(IRP: IRPosition::function(F: *F));
1612 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRP: IRPosition::function(F: *F));
1613 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRP: IRPosition::function(F: *F));
1614 CallingConv::ID CC = F->getCallingConv();
1615 if (!AMDGPU::isEntryFunctionCC(CC)) {
1616 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRP: IRPosition::function(F: *F));
1617 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRP: IRPosition::function(F: *F));
1618 }
1619
1620 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F: *F);
1621 if (!F->isDeclaration() && ST.hasClusters())
1622 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRP: IRPosition::function(F: *F));
1623
1624 if (ST.hasGFX90AInsts())
1625 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRP: IRPosition::function(F: *F));
1626
1627 for (auto &I : instructions(F)) {
1628 Value *Ptr = nullptr;
1629 if (auto *LI = dyn_cast<LoadInst>(Val: &I))
1630 Ptr = LI->getPointerOperand();
1631 else if (auto *SI = dyn_cast<StoreInst>(Val: &I))
1632 Ptr = SI->getPointerOperand();
1633 else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I))
1634 Ptr = RMW->getPointerOperand();
1635 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I))
1636 Ptr = CmpX->getPointerOperand();
1637
1638 if (Ptr) {
1639 A.getOrCreateAAFor<AAAddressSpace>(IRP: IRPosition::value(V: *Ptr));
1640 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRP: IRPosition::value(V: *Ptr));
1641 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Ptr)) {
1642 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1643 A.getOrCreateAAFor<AAAlign>(IRP: IRPosition::value(V: *Ptr));
1644 }
1645 }
1646 }
1647 }
1648
1649 return A.run() == ChangeStatus::CHANGED;
1650}
1651} // namespace
1652
1653PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
1654 ModuleAnalysisManager &AM) {
1655
1656 FunctionAnalysisManager &FAM =
1657 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
1658 AnalysisGetter AG(FAM);
1659
1660 SetVector<Function *> Functions;
1661 for (Function &F : M) {
1662 if (!F.isIntrinsic())
1663 Functions.insert(X: &F);
1664 }
1665
1666 // TODO: Probably preserves CFG
1667 return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,
1668 TM, Options, LTOPhase)
1669 ? PreservedAnalyses::none()
1670 : PreservedAnalyses::all();
1671}
1672
1673PreservedAnalyses llvm::AMDGPUAttributorCGSCCPass::run(LazyCallGraph::SCC &C,
1674 CGSCCAnalysisManager &AM,
1675 LazyCallGraph &CG,
1676 CGSCCUpdateResult &UR) {
1677
1678 FunctionAnalysisManager &FAM =
1679 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
1680 AnalysisGetter AG(FAM);
1681
1682 SetVector<Function *> Functions;
1683 for (LazyCallGraph::Node &N : C) {
1684 Function *F = &N.getFunction();
1685 if (!F->isIntrinsic())
1686 Functions.insert(X: F);
1687 }
1688
1689 AMDGPUAttributorOptions Options;
1690 Module *M = C.begin()->getFunction().getParent();
1691 // In the CGSCC pipeline, avoid untracked call graph modifications by
1692 // disabling function deletion, mirroring the generic AttributorCGSCCPass.
1693 return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, M&: *M, AG,
1694 TM, Options, LTOPhase: ThinOrFullLTOPhase::None)
1695 ? PreservedAnalyses::none()
1696 : PreservedAnalyses::all();
1697}