1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
15#include "Utils/AMDGPUBaseInfo.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
18#include "llvm/Target/TargetMachine.h"
19#include "llvm/Transforms/IPO/Attributor.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
25static cl::opt<unsigned> IndirectCallSpecializationThreshold(
26 "amdgpu-indirect-call-specialization-threshold",
27 cl::desc(
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(Val: 3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
33enum ImplicitArgumentPositions {
34#include "AMDGPUAttributes.def"
35 LAST_ARG_POS
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
40enum ImplicitArgumentMask {
41 UNKNOWN_INTRINSIC = 0,
42#include "AMDGPUAttributes.def"
43 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
44 NOT_IMPLICIT_INPUT
45};
46
47#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
48static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49 ImplicitAttrs[] = {
50#include "AMDGPUAttributes.def"
51};
52
53// We do not need to note the x workitem or workgroup id because they are always
54// initialized.
55//
56// TODO: We should not add the attributes if the known compile time workgroup
57// size is 1 for y/z.
58static ImplicitArgumentMask
59intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
60 bool HasApertureRegs, bool SupportsGetDoorBellID,
61 unsigned CodeObjectVersion) {
62 switch (ID) {
63 case Intrinsic::amdgcn_workitem_id_x:
64 NonKernelOnly = true;
65 return WORKITEM_ID_X;
66 case Intrinsic::amdgcn_workgroup_id_x:
67 NonKernelOnly = true;
68 return WORKGROUP_ID_X;
69 case Intrinsic::amdgcn_workitem_id_y:
70 case Intrinsic::r600_read_tidig_y:
71 return WORKITEM_ID_Y;
72 case Intrinsic::amdgcn_workitem_id_z:
73 case Intrinsic::r600_read_tidig_z:
74 return WORKITEM_ID_Z;
75 case Intrinsic::amdgcn_workgroup_id_y:
76 case Intrinsic::r600_read_tgid_y:
77 return WORKGROUP_ID_Y;
78 case Intrinsic::amdgcn_workgroup_id_z:
79 case Intrinsic::r600_read_tgid_z:
80 return WORKGROUP_ID_Z;
81 case Intrinsic::amdgcn_cluster_id_x:
82 NonKernelOnly = true;
83 return CLUSTER_ID_X;
84 case Intrinsic::amdgcn_cluster_id_y:
85 return CLUSTER_ID_Y;
86 case Intrinsic::amdgcn_cluster_id_z:
87 return CLUSTER_ID_Z;
88 case Intrinsic::amdgcn_lds_kernel_id:
89 return LDS_KERNEL_ID;
90 case Intrinsic::amdgcn_dispatch_ptr:
91 return DISPATCH_PTR;
92 case Intrinsic::amdgcn_dispatch_id:
93 return DISPATCH_ID;
94 case Intrinsic::amdgcn_implicitarg_ptr:
95 return IMPLICIT_ARG_PTR;
96 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
97 // queue_ptr.
98 case Intrinsic::amdgcn_queue_ptr:
99 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
100 return QUEUE_PTR;
101 case Intrinsic::amdgcn_is_shared:
102 case Intrinsic::amdgcn_is_private:
103 if (HasApertureRegs)
104 return NOT_IMPLICIT_INPUT;
105 // Under V5, we need implicitarg_ptr + offsets to access private_base or
106 // shared_base. For pre-V5, however, need to access them through queue_ptr +
107 // offsets.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
109 : QUEUE_PTR;
110 case Intrinsic::trap:
111 case Intrinsic::debugtrap:
112 case Intrinsic::ubsantrap:
113 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
114 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
115 : QUEUE_PTR;
116 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
117 return QUEUE_PTR;
118 default:
119 return UNKNOWN_INTRINSIC;
120 }
121}
122
123static bool castRequiresQueuePtr(unsigned SrcAS) {
124 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
125}
126
127static bool isDSAddress(const Constant *C) {
128 const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C);
129 if (!GV)
130 return false;
131 unsigned AS = GV->getAddressSpace();
132 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
133}
134
135/// Returns true if sanitizer attributes are present on a function.
136static bool hasSanitizerAttributes(const Function &F) {
137 return F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
138 F.hasFnAttribute(Kind: Attribute::SanitizeThread) ||
139 F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
140 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) ||
141 F.hasFnAttribute(Kind: Attribute::SanitizeMemTag);
142}
143
144namespace {
145class AMDGPUInformationCache : public InformationCache {
146public:
147 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
148 BumpPtrAllocator &Allocator,
149 SetVector<Function *> *CGSCC, TargetMachine &TM)
150 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
151 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
152
153 TargetMachine &TM;
154
155 enum ConstantStatus : uint8_t {
156 NONE = 0,
157 DS_GLOBAL = 1 << 0,
158 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
159 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
160 ADDR_SPACE_CAST_BOTH_TO_FLAT =
161 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
162 };
163
164 /// Check if the subtarget has aperture regs.
165 bool hasApertureRegs(Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.hasApertureRegs();
168 }
169
170 /// Check if the subtarget supports GetDoorbellID.
171 bool supportsGetDoorbellID(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.supportsGetDoorbellID();
174 }
175
176 std::optional<std::pair<unsigned, unsigned>>
177 getFlatWorkGroupSizeAttr(const Function &F) const {
178 auto R = AMDGPU::getIntegerPairAttribute(F, Name: "amdgpu-flat-work-group-size");
179 if (!R)
180 return std::nullopt;
181 return std::make_pair(x&: R->first, y&: *(R->second));
182 }
183
184 std::pair<unsigned, unsigned>
185 getDefaultFlatWorkGroupSize(const Function &F) const {
186 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
187 return ST.getDefaultFlatWorkGroupSize(CC: F.getCallingConv());
188 }
189
190 std::pair<unsigned, unsigned>
191 getMaximumFlatWorkGroupRange(const Function &F) {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
194 }
195
196 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
197 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
198 return ST.getMaxNumWorkGroups(F);
199 }
200
201 /// Get code object version.
202 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
203
204 std::optional<std::pair<unsigned, unsigned>>
205 getWavesPerEUAttr(const Function &F) {
206 auto Val = AMDGPU::getIntegerPairAttribute(F, Name: "amdgpu-waves-per-eu",
207 /*OnlyFirstRequired=*/true);
208 if (!Val)
209 return std::nullopt;
210 if (!Val->second) {
211 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
212 Val->second = ST.getMaxWavesPerEU();
213 }
214 return std::make_pair(x&: Val->first, y&: *(Val->second));
215 }
216
217 unsigned getMaxWavesPerEU(const Function &F) {
218 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
219 return ST.getMaxWavesPerEU();
220 }
221
222 unsigned getMaxAddrSpace() const override {
223 return AMDGPUAS::MAX_AMDGPU_ADDRESS;
224 }
225
226private:
227 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
228 /// local to flat. These casts may require the queue pointer.
229 static uint8_t visitConstExpr(const ConstantExpr *CE) {
230 uint8_t Status = NONE;
231
232 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
233 unsigned SrcAS = CE->getOperand(i_nocapture: 0)->getType()->getPointerAddressSpace();
234 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
235 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
236 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
237 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
238 }
239
240 return Status;
241 }
242
243 /// Get the constant access bitmap for \p C.
244 uint8_t getConstantAccess(const Constant *C,
245 SmallPtrSetImpl<const Constant *> &Visited) {
246 auto It = ConstantStatus.find(Val: C);
247 if (It != ConstantStatus.end())
248 return It->second;
249
250 uint8_t Result = 0;
251 if (isDSAddress(C))
252 Result = DS_GLOBAL;
253
254 if (const auto *CE = dyn_cast<ConstantExpr>(Val: C))
255 Result |= visitConstExpr(CE);
256
257 for (const Use &U : C->operands()) {
258 const auto *OpC = dyn_cast<Constant>(Val: U);
259 if (!OpC || !Visited.insert(Ptr: OpC).second)
260 continue;
261
262 Result |= getConstantAccess(C: OpC, Visited);
263 }
264 return Result;
265 }
266
267public:
268 /// Returns true if \p Fn needs the queue pointer because of \p C.
269 bool needsQueuePtr(const Constant *C, Function &Fn) {
270 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: Fn.getCallingConv());
271 bool HasAperture = hasApertureRegs(F&: Fn);
272
273 // No need to explore the constants.
274 if (!IsNonEntryFunc && HasAperture)
275 return false;
276
277 SmallPtrSet<const Constant *, 8> Visited;
278 uint8_t Access = getConstantAccess(C, Visited);
279
280 // We need to trap on DS globals in non-entry functions.
281 if (IsNonEntryFunc && (Access & DS_GLOBAL))
282 return true;
283
284 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
285 }
286
287 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
288 SmallPtrSet<const Constant *, 8> Visited;
289 uint8_t Access = getConstantAccess(C, Visited);
290 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
291 }
292
293private:
294 /// Used to determine if the Constant needs the queue pointer.
295 DenseMap<const Constant *, uint8_t> ConstantStatus;
296 const unsigned CodeObjectVersion;
297};
298
299struct AAAMDAttributes
300 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
301 AbstractAttribute> {
302 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
303 AbstractAttribute>;
304
305 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
306
307 /// Create an abstract attribute view for the position \p IRP.
308 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
309 Attributor &A);
310
311 /// See AbstractAttribute::getName().
312 StringRef getName() const override { return "AAAMDAttributes"; }
313
314 /// See AbstractAttribute::getIdAddr().
315 const char *getIdAddr() const override { return &ID; }
316
317 /// This function should return true if the type of the \p AA is
318 /// AAAMDAttributes.
319 static bool classof(const AbstractAttribute *AA) {
320 return (AA->getIdAddr() == &ID);
321 }
322
323 /// Unique ID (due to the unique address)
324 static const char ID;
325};
326const char AAAMDAttributes::ID = 0;
327
328struct AAUniformWorkGroupSize
329 : public StateWrapper<BooleanState, AbstractAttribute> {
330 using Base = StateWrapper<BooleanState, AbstractAttribute>;
331 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
332
333 /// Create an abstract attribute view for the position \p IRP.
334 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
335 Attributor &A);
336
337 /// See AbstractAttribute::getName().
338 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
339
340 /// See AbstractAttribute::getIdAddr().
341 const char *getIdAddr() const override { return &ID; }
342
343 /// This function should return true if the type of the \p AA is
344 /// AAAMDAttributes.
345 static bool classof(const AbstractAttribute *AA) {
346 return (AA->getIdAddr() == &ID);
347 }
348
349 /// Unique ID (due to the unique address)
350 static const char ID;
351};
352const char AAUniformWorkGroupSize::ID = 0;
353
354struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
355 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
356 : AAUniformWorkGroupSize(IRP, A) {}
357
358 void initialize(Attributor &A) override {
359 Function *F = getAssociatedFunction();
360 CallingConv::ID CC = F->getCallingConv();
361
362 if (CC != CallingConv::AMDGPU_KERNEL)
363 return;
364
365 bool InitialValue = false;
366 if (F->hasFnAttribute(Kind: "uniform-work-group-size"))
367 InitialValue =
368 F->getFnAttribute(Kind: "uniform-work-group-size").getValueAsString() ==
369 "true";
370
371 if (InitialValue)
372 indicateOptimisticFixpoint();
373 else
374 indicatePessimisticFixpoint();
375 }
376
377 ChangeStatus updateImpl(Attributor &A) override {
378 ChangeStatus Change = ChangeStatus::UNCHANGED;
379
380 auto CheckCallSite = [&](AbstractCallSite CS) {
381 Function *Caller = CS.getInstruction()->getFunction();
382 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
383 << "->" << getAssociatedFunction()->getName() << "\n");
384
385 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
386 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
387 if (!CallerInfo || !CallerInfo->isValidState())
388 return false;
389
390 Change = Change | clampStateAndIndicateChange(S&: this->getState(),
391 R: CallerInfo->getState());
392
393 return true;
394 };
395
396 bool AllCallSitesKnown = true;
397 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this, RequireAllCallSites: true, UsedAssumedInformation&: AllCallSitesKnown))
398 return indicatePessimisticFixpoint();
399
400 return Change;
401 }
402
403 ChangeStatus manifest(Attributor &A) override {
404 SmallVector<Attribute, 8> AttrList;
405 LLVMContext &Ctx = getAssociatedFunction()->getContext();
406
407 AttrList.push_back(Elt: Attribute::get(Context&: Ctx, Kind: "uniform-work-group-size",
408 Val: getAssumed() ? "true" : "false"));
409 return A.manifestAttrs(IRP: getIRPosition(), DeducedAttrs: AttrList,
410 /* ForceReplace */ true);
411 }
412
413 bool isValidState() const override {
414 // This state is always valid, even when the state is false.
415 return true;
416 }
417
418 const std::string getAsStr(Attributor *) const override {
419 return "AMDWorkGroupSize[" + std::to_string(val: getAssumed()) + "]";
420 }
421
422 /// See AbstractAttribute::trackStatistics()
423 void trackStatistics() const override {}
424};
425
426AAUniformWorkGroupSize &
427AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
428 Attributor &A) {
429 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
430 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
431 llvm_unreachable(
432 "AAUniformWorkGroupSize is only valid for function position");
433}
434
435struct AAAMDAttributesFunction : public AAAMDAttributes {
436 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
437 : AAAMDAttributes(IRP, A) {}
438
439 void initialize(Attributor &A) override {
440 Function *F = getAssociatedFunction();
441
442 // If the function requires the implicit arg pointer due to sanitizers,
443 // assume it's needed even if explicitly marked as not requiring it.
444 // Flat scratch initialization is needed because `asan_malloc_impl`
445 // calls introduced later in pipeline will have flat scratch accesses.
446 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
447 // implementation for `asan_malloc_impl` is updated.
448 const bool HasSanitizerAttrs = hasSanitizerAttributes(F: *F);
449 if (HasSanitizerAttrs) {
450 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
451 removeAssumedBits(BitsEncoding: HOSTCALL_PTR);
452 removeAssumedBits(BitsEncoding: FLAT_SCRATCH_INIT);
453 }
454
455 for (auto Attr : ImplicitAttrs) {
456 if (HasSanitizerAttrs &&
457 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
458 Attr.first == FLAT_SCRATCH_INIT))
459 continue;
460
461 if (F->hasFnAttribute(Kind: Attr.second))
462 addKnownBits(Bits: Attr.first);
463 }
464
465 if (F->isDeclaration())
466 return;
467
468 // Ignore functions with graphics calling conventions, these are currently
469 // not allowed to have kernel arguments.
470 if (AMDGPU::isGraphics(CC: F->getCallingConv())) {
471 indicatePessimisticFixpoint();
472 return;
473 }
474 }
475
476 ChangeStatus updateImpl(Attributor &A) override {
477 Function *F = getAssociatedFunction();
478 // The current assumed state used to determine a change.
479 auto OrigAssumed = getAssumed();
480
481 // Check for Intrinsics and propagate attributes.
482 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
483 QueryingAA: *this, IRP: this->getIRPosition(), DepClass: DepClassTy::REQUIRED);
484 if (!AAEdges || !AAEdges->isValidState() ||
485 AAEdges->hasNonAsmUnknownCallee())
486 return indicatePessimisticFixpoint();
487
488 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: F->getCallingConv());
489
490 bool NeedsImplicit = false;
491 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
492 bool HasApertureRegs = InfoCache.hasApertureRegs(F&: *F);
493 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(F&: *F);
494 unsigned COV = InfoCache.getCodeObjectVersion();
495
496 for (Function *Callee : AAEdges->getOptimisticEdges()) {
497 Intrinsic::ID IID = Callee->getIntrinsicID();
498 if (IID == Intrinsic::not_intrinsic) {
499 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
500 QueryingAA: *this, IRP: IRPosition::function(F: *Callee), DepClass: DepClassTy::REQUIRED);
501 if (!AAAMD || !AAAMD->isValidState())
502 return indicatePessimisticFixpoint();
503 *this &= *AAAMD;
504 continue;
505 }
506
507 bool NonKernelOnly = false;
508 ImplicitArgumentMask AttrMask =
509 intrinsicToAttrMask(ID: IID, NonKernelOnly, NeedsImplicit,
510 HasApertureRegs, SupportsGetDoorBellID: SupportsGetDoorbellID, CodeObjectVersion: COV);
511
512 if (AttrMask == UNKNOWN_INTRINSIC) {
513 // Assume not-nocallback intrinsics may invoke a function which accesses
514 // implicit arguments.
515 //
516 // FIXME: This isn't really the correct check. We want to ensure it
517 // isn't calling any function that may use implicit arguments regardless
518 // of whether it's internal to the module or not.
519 //
520 // TODO: Ignoring callsite attributes.
521 if (!Callee->hasFnAttribute(Kind: Attribute::NoCallback))
522 return indicatePessimisticFixpoint();
523 continue;
524 }
525
526 if (AttrMask != NOT_IMPLICIT_INPUT) {
527 if ((IsNonEntryFunc || !NonKernelOnly))
528 removeAssumedBits(BitsEncoding: AttrMask);
529 }
530 }
531
532 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
533 if (NeedsImplicit)
534 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
535
536 if (isAssumed(BitsEncoding: QUEUE_PTR) && checkForQueuePtr(A)) {
537 // Under V5, we need implicitarg_ptr + offsets to access private_base or
538 // shared_base. We do not actually need queue_ptr.
539 if (COV >= 5)
540 removeAssumedBits(BitsEncoding: IMPLICIT_ARG_PTR);
541 else
542 removeAssumedBits(BitsEncoding: QUEUE_PTR);
543 }
544
545 if (funcRetrievesMultigridSyncArg(A, COV)) {
546 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
547 "multigrid_sync_arg needs implicitarg_ptr");
548 removeAssumedBits(BitsEncoding: MULTIGRID_SYNC_ARG);
549 }
550
551 if (funcRetrievesHostcallPtr(A, COV)) {
552 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
553 removeAssumedBits(BitsEncoding: HOSTCALL_PTR);
554 }
555
556 if (funcRetrievesHeapPtr(A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
558 removeAssumedBits(BitsEncoding: HEAP_PTR);
559 }
560
561 if (isAssumed(BitsEncoding: QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
562 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
563 removeAssumedBits(BitsEncoding: QUEUE_PTR);
564 }
565
566 if (isAssumed(BitsEncoding: LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
567 removeAssumedBits(BitsEncoding: LDS_KERNEL_ID);
568 }
569
570 if (isAssumed(BitsEncoding: DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
571 removeAssumedBits(BitsEncoding: DEFAULT_QUEUE);
572
573 if (isAssumed(BitsEncoding: COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
574 removeAssumedBits(BitsEncoding: COMPLETION_ACTION);
575
576 if (isAssumed(BitsEncoding: FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
577 removeAssumedBits(BitsEncoding: FLAT_SCRATCH_INIT);
578
579 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
580 : ChangeStatus::UNCHANGED;
581 }
582
583 ChangeStatus manifest(Attributor &A) override {
584 SmallVector<Attribute, 8> AttrList;
585 LLVMContext &Ctx = getAssociatedFunction()->getContext();
586
587 for (auto Attr : ImplicitAttrs) {
588 if (isKnown(BitsEncoding: Attr.first))
589 AttrList.push_back(Elt: Attribute::get(Context&: Ctx, Kind: Attr.second));
590 }
591
592 return A.manifestAttrs(IRP: getIRPosition(), DeducedAttrs: AttrList,
593 /* ForceReplace */ true);
594 }
595
596 const std::string getAsStr(Attributor *) const override {
597 std::string Str;
598 raw_string_ostream OS(Str);
599 OS << "AMDInfo[";
600 for (auto Attr : ImplicitAttrs)
601 if (isAssumed(BitsEncoding: Attr.first))
602 OS << ' ' << Attr.second;
603 OS << " ]";
604 return OS.str();
605 }
606
607 /// See AbstractAttribute::trackStatistics()
608 void trackStatistics() const override {}
609
610private:
611 bool checkForQueuePtr(Attributor &A) {
612 Function *F = getAssociatedFunction();
613 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(CC: F->getCallingConv());
614
615 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
616
617 bool NeedsQueuePtr = false;
618
619 auto CheckAddrSpaceCasts = [&](Instruction &I) {
620 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
621 if (castRequiresQueuePtr(SrcAS)) {
622 NeedsQueuePtr = true;
623 return false;
624 }
625 return true;
626 };
627
628 bool HasApertureRegs = InfoCache.hasApertureRegs(F&: *F);
629
630 // `checkForAllInstructions` is much more cheaper than going through all
631 // instructions, try it first.
632
633 // The queue pointer is not needed if aperture regs is present.
634 if (!HasApertureRegs) {
635 bool UsedAssumedInformation = false;
636 A.checkForAllInstructions(Pred: CheckAddrSpaceCasts, QueryingAA: *this,
637 Opcodes: {Instruction::AddrSpaceCast},
638 UsedAssumedInformation);
639 }
640
641 // If we found that we need the queue pointer, nothing else to do.
642 if (NeedsQueuePtr)
643 return true;
644
645 if (!IsNonEntryFunc && HasApertureRegs)
646 return false;
647
648 for (BasicBlock &BB : *F) {
649 for (Instruction &I : BB) {
650 for (const Use &U : I.operands()) {
651 if (const auto *C = dyn_cast<Constant>(Val: U)) {
652 if (InfoCache.needsQueuePtr(C, Fn&: *F))
653 return true;
654 }
655 }
656 }
657 }
658
659 return false;
660 }
661
662 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
663 auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
664 AA::RangeTy Range(Pos, 8);
665 return funcRetrievesImplicitKernelArg(A, Range);
666 }
667
668 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
669 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
670 AA::RangeTy Range(Pos, 8);
671 return funcRetrievesImplicitKernelArg(A, Range);
672 }
673
674 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
675 auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
676 AA::RangeTy Range(Pos, 8);
677 return funcRetrievesImplicitKernelArg(A, Range);
678 }
679
680 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
681 auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
682 AA::RangeTy Range(Pos, 8);
683 return funcRetrievesImplicitKernelArg(A, Range);
684 }
685
686 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
687 if (COV < 5)
688 return false;
689 AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
690 return funcRetrievesImplicitKernelArg(A, Range);
691 }
692
693 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
694 if (COV < 5)
695 return false;
696 AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
697 return funcRetrievesImplicitKernelArg(A, Range);
698 }
699
700 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
701 // Check if this is a call to the implicitarg_ptr builtin and it
702 // is used to retrieve the hostcall pointer. The implicit arg for
703 // hostcall is not used only if every use of the implicitarg_ptr
704 // is a load that clearly does not retrieve any byte of the
705 // hostcall pointer. We check this by tracing all the uses of the
706 // initial call to the implicitarg_ptr intrinsic.
707 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
708 auto &Call = cast<CallBase>(Val&: I);
709 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
710 return true;
711
712 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
713 QueryingAA: *this, IRP: IRPosition::callsite_returned(CB: Call), DepClass: DepClassTy::REQUIRED);
714 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
715 return false;
716
717 return PointerInfoAA->forallInterferingAccesses(
718 Range, CB: [](const AAPointerInfo::Access &Acc, bool IsExact) {
719 return Acc.getRemoteInst()->isDroppable();
720 });
721 };
722
723 bool UsedAssumedInformation = false;
724 return !A.checkForAllCallLikeInstructions(Pred: DoesNotLeadToKernelArgLoc, QueryingAA: *this,
725 UsedAssumedInformation);
726 }
727
728 bool funcRetrievesLDSKernelId(Attributor &A) {
729 auto DoesNotRetrieve = [&](Instruction &I) {
730 auto &Call = cast<CallBase>(Val&: I);
731 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
732 };
733 bool UsedAssumedInformation = false;
734 return !A.checkForAllCallLikeInstructions(Pred: DoesNotRetrieve, QueryingAA: *this,
735 UsedAssumedInformation);
736 }
737
738 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
739 // not to be set.
740 bool needFlatScratchInit(Attributor &A) {
741 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
742
743 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
744 // there is a cast from PRIVATE_ADDRESS.
745 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
746 return cast<AddrSpaceCastInst>(Val&: I).getSrcAddressSpace() !=
747 AMDGPUAS::PRIVATE_ADDRESS;
748 };
749
750 bool UsedAssumedInformation = false;
751 if (!A.checkForAllInstructions(Pred: AddrSpaceCastNotFromPrivate, QueryingAA: *this,
752 Opcodes: {Instruction::AddrSpaceCast},
753 UsedAssumedInformation))
754 return true;
755
756 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
757 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
758
759 Function *F = getAssociatedFunction();
760 for (Instruction &I : instructions(F)) {
761 for (const Use &U : I.operands()) {
762 if (const auto *C = dyn_cast<Constant>(Val: U)) {
763 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
764 return true;
765 }
766 }
767 }
768
769 // Finally check callees.
770
771 // This is called on each callee; false means callee shouldn't have
772 // no-flat-scratch-init.
773 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
774 const auto &CB = cast<CallBase>(Val&: I);
775 const Function *Callee = CB.getCalledFunction();
776
777 // Callee == 0 for inline asm or indirect call with known callees.
778 // In the latter case, updateImpl() already checked the callees and we
779 // know their FLAT_SCRATCH_INIT bit is set.
780 // If function has indirect call with unknown callees, the bit is
781 // already removed in updateImpl() and execution won't reach here.
782 if (!Callee)
783 return true;
784
785 return Callee->getIntrinsicID() !=
786 Intrinsic::amdgcn_addrspacecast_nonnull;
787 };
788
789 UsedAssumedInformation = false;
790 // If any callee is false (i.e. need FlatScratchInit),
791 // checkForAllCallLikeInstructions returns false, in which case this
792 // function returns true.
793 return !A.checkForAllCallLikeInstructions(Pred: CheckForNoFlatScratchInit, QueryingAA: *this,
794 UsedAssumedInformation);
795 }
796};
797
798AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
799 Attributor &A) {
800 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
801 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
802 llvm_unreachable("AAAMDAttributes is only valid for function position");
803}
804
805/// Base class to derive different size ranges.
806struct AAAMDSizeRangeAttribute
807 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
808 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
809
810 StringRef AttrName;
811
812 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
813 StringRef AttrName)
814 : Base(IRP, 32), AttrName(AttrName) {}
815
816 /// See AbstractAttribute::trackStatistics()
817 void trackStatistics() const override {}
818
819 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
820 ChangeStatus Change = ChangeStatus::UNCHANGED;
821
822 auto CheckCallSite = [&](AbstractCallSite CS) {
823 Function *Caller = CS.getInstruction()->getFunction();
824 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
825 << "->" << getAssociatedFunction()->getName() << '\n');
826
827 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
828 *this, IRPosition::function(F: *Caller), DepClassTy::REQUIRED);
829 if (!CallerInfo || !CallerInfo->isValidState())
830 return false;
831
832 Change |=
833 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
834
835 return true;
836 };
837
838 bool AllCallSitesKnown = true;
839 if (!A.checkForAllCallSites(CheckCallSite, *this,
840 /*RequireAllCallSites=*/true,
841 AllCallSitesKnown))
842 return indicatePessimisticFixpoint();
843
844 return Change;
845 }
846
847 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
848 /// attribute if it is not same as default.
849 ChangeStatus
850 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
851 std::pair<unsigned, unsigned> Default) {
852 auto [Min, Max] = Default;
853 unsigned Lower = getAssumed().getLower().getZExtValue();
854 unsigned Upper = getAssumed().getUpper().getZExtValue();
855
856 // Clamp the range to the default value.
857 if (Lower < Min)
858 Lower = Min;
859 if (Upper > Max + 1)
860 Upper = Max + 1;
861
862 // No manifest if the value is invalid or same as default after clamp.
863 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
864 return ChangeStatus::UNCHANGED;
865
866 Function *F = getAssociatedFunction();
867 LLVMContext &Ctx = F->getContext();
868 SmallString<10> Buffer;
869 raw_svector_ostream OS(Buffer);
870 OS << Lower << ',' << Upper - 1;
871 return A.manifestAttrs(IRP: getIRPosition(),
872 DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: AttrName, Val: OS.str())},
873 /*ForceReplace=*/true);
874 }
875
876 const std::string getAsStr(Attributor *) const override {
877 std::string Str;
878 raw_string_ostream OS(Str);
879 OS << getName() << '[';
880 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
881 OS << ']';
882 return OS.str();
883 }
884};
885
886/// Propagate amdgpu-flat-work-group-size attribute.
887struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
888 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
889 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
890
891 void initialize(Attributor &A) override {
892 Function *F = getAssociatedFunction();
893 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
894
895 bool HasAttr = false;
896 auto Range = InfoCache.getDefaultFlatWorkGroupSize(F: *F);
897 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(F: *F);
898
899 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(F: *F)) {
900 // We only consider an attribute that is not max range because the front
901 // end always emits the attribute, unfortunately, and sometimes it emits
902 // the max range.
903 if (*Attr != MaxRange) {
904 Range = *Attr;
905 HasAttr = true;
906 }
907 }
908
909 // We don't want to directly clamp the state if it's the max range because
910 // that is basically the worst state.
911 if (Range == MaxRange)
912 return;
913
914 auto [Min, Max] = Range;
915 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
916 IntegerRangeState IRS(CR);
917 clampStateAndIndicateChange(S&: this->getState(), R: IRS);
918
919 if (HasAttr || AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
920 indicateOptimisticFixpoint();
921 }
922
923 ChangeStatus updateImpl(Attributor &A) override {
924 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
925 }
926
927 /// Create an abstract attribute view for the position \p IRP.
928 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
929 Attributor &A);
930
931 ChangeStatus manifest(Attributor &A) override {
932 Function *F = getAssociatedFunction();
933 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
934 return emitAttributeIfNotDefaultAfterClamp(
935 A, Default: InfoCache.getMaximumFlatWorkGroupRange(F: *F));
936 }
937
938 /// See AbstractAttribute::getName()
939 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
940
941 /// See AbstractAttribute::getIdAddr()
942 const char *getIdAddr() const override { return &ID; }
943
944 /// This function should return true if the type of the \p AA is
945 /// AAAMDFlatWorkGroupSize
946 static bool classof(const AbstractAttribute *AA) {
947 return (AA->getIdAddr() == &ID);
948 }
949
950 /// Unique ID (due to the unique address)
951 static const char ID;
952};
953
954const char AAAMDFlatWorkGroupSize::ID = 0;
955
956AAAMDFlatWorkGroupSize &
957AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
958 Attributor &A) {
959 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
960 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
961 llvm_unreachable(
962 "AAAMDFlatWorkGroupSize is only valid for function position");
963}
964
965struct TupleDecIntegerRangeState : public AbstractState {
966 DecIntegerState<uint32_t> X, Y, Z;
967
968 bool isValidState() const override {
969 return X.isValidState() && Y.isValidState() && Z.isValidState();
970 }
971
972 bool isAtFixpoint() const override {
973 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
974 }
975
976 ChangeStatus indicateOptimisticFixpoint() override {
977 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
978 Z.indicateOptimisticFixpoint();
979 }
980
981 ChangeStatus indicatePessimisticFixpoint() override {
982 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
983 Z.indicatePessimisticFixpoint();
984 }
985
986 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
987 X ^= Other.X;
988 Y ^= Other.Y;
989 Z ^= Other.Z;
990 return *this;
991 }
992
993 bool operator==(const TupleDecIntegerRangeState &Other) const {
994 return X == Other.X && Y == Other.Y && Z == Other.Z;
995 }
996
997 TupleDecIntegerRangeState &getAssumed() { return *this; }
998 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
999};
1000
1001using AAAMDMaxNumWorkgroupsState =
1002 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1003
1004/// Propagate amdgpu-max-num-workgroups attribute.
1005struct AAAMDMaxNumWorkgroups
1006 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1007 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1008
1009 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1010
1011 void initialize(Attributor &A) override {
1012 Function *F = getAssociatedFunction();
1013 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1014
1015 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(F: *F);
1016
1017 X.takeKnownMinimum(Value: MaxNumWorkgroups[0]);
1018 Y.takeKnownMinimum(Value: MaxNumWorkgroups[1]);
1019 Z.takeKnownMinimum(Value: MaxNumWorkgroups[2]);
1020
1021 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
1022 indicatePessimisticFixpoint();
1023 }
1024
1025 ChangeStatus updateImpl(Attributor &A) override {
1026 ChangeStatus Change = ChangeStatus::UNCHANGED;
1027
1028 auto CheckCallSite = [&](AbstractCallSite CS) {
1029 Function *Caller = CS.getInstruction()->getFunction();
1030 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1031 << "->" << getAssociatedFunction()->getName() << '\n');
1032
1033 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1034 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
1035 if (!CallerInfo || !CallerInfo->isValidState())
1036 return false;
1037
1038 Change |=
1039 clampStateAndIndicateChange(S&: this->getState(), R: CallerInfo->getState());
1040 return true;
1041 };
1042
1043 bool AllCallSitesKnown = true;
1044 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this,
1045 /*RequireAllCallSites=*/true,
1046 UsedAssumedInformation&: AllCallSitesKnown))
1047 return indicatePessimisticFixpoint();
1048
1049 return Change;
1050 }
1051
1052 /// Create an abstract attribute view for the position \p IRP.
1053 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1054 Attributor &A);
1055
1056 ChangeStatus manifest(Attributor &A) override {
1057 Function *F = getAssociatedFunction();
1058 LLVMContext &Ctx = F->getContext();
1059 SmallString<32> Buffer;
1060 raw_svector_ostream OS(Buffer);
1061 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1062
1063 // TODO: Should annotate loads of the group size for this to do anything
1064 // useful.
1065 return A.manifestAttrs(
1066 IRP: getIRPosition(),
1067 DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: "amdgpu-max-num-workgroups", Val: OS.str())},
1068 /* ForceReplace= */ true);
1069 }
1070
1071 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1072
1073 const std::string getAsStr(Attributor *) const override {
1074 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1075 raw_string_ostream OS(Buffer);
1076 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1077 << ']';
1078 return OS.str();
1079 }
1080
1081 const char *getIdAddr() const override { return &ID; }
1082
1083 /// This function should return true if the type of the \p AA is
1084 /// AAAMDMaxNumWorkgroups
1085 static bool classof(const AbstractAttribute *AA) {
1086 return (AA->getIdAddr() == &ID);
1087 }
1088
1089 void trackStatistics() const override {}
1090
1091 /// Unique ID (due to the unique address)
1092 static const char ID;
1093};
1094
1095const char AAAMDMaxNumWorkgroups::ID = 0;
1096
1097AAAMDMaxNumWorkgroups &
1098AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1099 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1100 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1101 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1102}
1103
1104/// Propagate amdgpu-waves-per-eu attribute.
1105struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1106 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1107 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1108
1109 void initialize(Attributor &A) override {
1110 Function *F = getAssociatedFunction();
1111 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1112
1113 // If the attribute exists, we will honor it if it is not the default.
1114 if (auto Attr = InfoCache.getWavesPerEUAttr(F: *F)) {
1115 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1116 1U, InfoCache.getMaxWavesPerEU(F: *F)};
1117 if (*Attr != MaxWavesPerEURange) {
1118 auto [Min, Max] = *Attr;
1119 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1120 IntegerRangeState RangeState(Range);
1121 this->getState() = RangeState;
1122 indicateOptimisticFixpoint();
1123 return;
1124 }
1125 }
1126
1127 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv()))
1128 indicatePessimisticFixpoint();
1129 }
1130
1131 ChangeStatus updateImpl(Attributor &A) override {
1132 ChangeStatus Change = ChangeStatus::UNCHANGED;
1133
1134 auto CheckCallSite = [&](AbstractCallSite CS) {
1135 Function *Caller = CS.getInstruction()->getFunction();
1136 Function *Func = getAssociatedFunction();
1137 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1138 << "->" << Func->getName() << '\n');
1139 (void)Func;
1140
1141 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1142 QueryingAA: *this, IRP: IRPosition::function(F: *Caller), DepClass: DepClassTy::REQUIRED);
1143 if (!CallerAA || !CallerAA->isValidState())
1144 return false;
1145
1146 ConstantRange Assumed = getAssumed();
1147 unsigned Min = std::max(a: Assumed.getLower().getZExtValue(),
1148 b: CallerAA->getAssumed().getLower().getZExtValue());
1149 unsigned Max = std::max(a: Assumed.getUpper().getZExtValue(),
1150 b: CallerAA->getAssumed().getUpper().getZExtValue());
1151 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1152 IntegerRangeState RangeState(Range);
1153 getState() = RangeState;
1154 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1155 : ChangeStatus::CHANGED;
1156
1157 return true;
1158 };
1159
1160 bool AllCallSitesKnown = true;
1161 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this, RequireAllCallSites: true, UsedAssumedInformation&: AllCallSitesKnown))
1162 return indicatePessimisticFixpoint();
1163
1164 return Change;
1165 }
1166
1167 /// Create an abstract attribute view for the position \p IRP.
1168 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1169 Attributor &A);
1170
1171 ChangeStatus manifest(Attributor &A) override {
1172 Function *F = getAssociatedFunction();
1173 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1174 return emitAttributeIfNotDefaultAfterClamp(
1175 A, Default: {1U, InfoCache.getMaxWavesPerEU(F: *F)});
1176 }
1177
1178 /// See AbstractAttribute::getName()
1179 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1180
1181 /// See AbstractAttribute::getIdAddr()
1182 const char *getIdAddr() const override { return &ID; }
1183
1184 /// This function should return true if the type of the \p AA is
1185 /// AAAMDWavesPerEU
1186 static bool classof(const AbstractAttribute *AA) {
1187 return (AA->getIdAddr() == &ID);
1188 }
1189
1190 /// Unique ID (due to the unique address)
1191 static const char ID;
1192};
1193
1194const char AAAMDWavesPerEU::ID = 0;
1195
1196AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1197 Attributor &A) {
1198 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1199 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1200 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1201}
1202
1203/// Compute the minimum number of AGPRs required to allocate the inline asm.
1204static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1205 const CallBase &Call) {
1206 unsigned ArgNo = 0;
1207 unsigned ResNo = 0;
1208 unsigned AGPRDefCount = 0;
1209 unsigned AGPRUseCount = 0;
1210 unsigned MaxPhysReg = 0;
1211 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1212
1213 // TODO: Overestimates due to not accounting for tied operands
1214 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1215 Type *Ty = nullptr;
1216 switch (CI.Type) {
1217 case InlineAsm::isOutput: {
1218 Ty = Call.getType();
1219 if (auto *STy = dyn_cast<StructType>(Val: Ty))
1220 Ty = STy->getElementType(N: ResNo);
1221 ++ResNo;
1222 break;
1223 }
1224 case InlineAsm::isInput: {
1225 Ty = Call.getArgOperand(i: ArgNo++)->getType();
1226 break;
1227 }
1228 case InlineAsm::isLabel:
1229 continue;
1230 case InlineAsm::isClobber:
1231 // Parse the physical register reference.
1232 break;
1233 }
1234
1235 for (StringRef Code : CI.Codes) {
1236 unsigned RegCount = 0;
1237 if (Code.starts_with(Prefix: "a")) {
1238 // Virtual register, compute number of registers based on the type.
1239 //
1240 // We ought to be going through TargetLowering to get the number of
1241 // registers, but we should avoid the dependence on CodeGen here.
1242 RegCount = divideCeil(Numerator: DL.getTypeSizeInBits(Ty), Denominator: 32);
1243 } else {
1244 // Physical register reference
1245 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Constraint: Code);
1246 if (Kind == 'a') {
1247 RegCount = NumRegs;
1248 MaxPhysReg = std::max(a: MaxPhysReg, b: std::min(a: RegIdx + NumRegs, b: 256u));
1249 }
1250
1251 continue;
1252 }
1253
1254 if (CI.Type == InlineAsm::isOutput) {
1255 // Apply tuple alignment requirement
1256 //
1257 // TODO: This is more conservative than necessary.
1258 AGPRDefCount = alignTo(Value: AGPRDefCount, Align: RegCount);
1259
1260 AGPRDefCount += RegCount;
1261 if (CI.isEarlyClobber) {
1262 AGPRUseCount = alignTo(Value: AGPRUseCount, Align: RegCount);
1263 AGPRUseCount += RegCount;
1264 }
1265 } else {
1266 AGPRUseCount = alignTo(Value: AGPRUseCount, Align: RegCount);
1267 AGPRUseCount += RegCount;
1268 }
1269 }
1270 }
1271
1272 unsigned MaxVirtReg = std::max(a: AGPRUseCount, b: AGPRDefCount);
1273
1274 // TODO: This is overly conservative. If there are any physical registers,
1275 // allocate any virtual registers after them so we don't have to solve optimal
1276 // packing.
1277 return std::min(a: MaxVirtReg + MaxPhysReg, b: 256u);
1278}
1279
1280struct AAAMDGPUMinAGPRAlloc
1281 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1282 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1283 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1284
1285 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1286 Attributor &A) {
1287 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1288 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1289 llvm_unreachable(
1290 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1291 }
1292
1293 void initialize(Attributor &A) override {
1294 Function *F = getAssociatedFunction();
1295 auto [MinNumAGPR, MaxNumAGPR] =
1296 AMDGPU::getIntegerPairAttribute(F: *F, Name: "amdgpu-agpr-alloc", Default: {~0u, ~0u},
1297 /*OnlyFirstRequired=*/true);
1298 if (MinNumAGPR == 0)
1299 indicateOptimisticFixpoint();
1300 }
1301
1302 const std::string getAsStr(Attributor *A) const override {
1303 std::string Str = "amdgpu-agpr-alloc=";
1304 raw_string_ostream OS(Str);
1305 OS << getAssumed();
1306 return OS.str();
1307 }
1308
1309 void trackStatistics() const override {}
1310
1311 ChangeStatus updateImpl(Attributor &A) override {
1312 DecIntegerState<> Maximum;
1313
1314 // Check for cases which require allocation of AGPRs. The only cases where
1315 // AGPRs are required are if there are direct references to AGPRs, so inline
1316 // assembly and special intrinsics.
1317 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1318 const auto &CB = cast<CallBase>(Val&: I);
1319 const Value *CalleeOp = CB.getCalledOperand();
1320
1321 if (const InlineAsm *IA = dyn_cast<InlineAsm>(Val: CalleeOp)) {
1322 // Technically, the inline asm could be invoking a call to an unknown
1323 // external function that requires AGPRs, but ignore that.
1324 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, Call: CB);
1325 Maximum.takeAssumedMaximum(Value: NumRegs);
1326 return true;
1327 }
1328 switch (CB.getIntrinsicID()) {
1329 case Intrinsic::not_intrinsic:
1330 break;
1331 case Intrinsic::write_register:
1332 case Intrinsic::read_register:
1333 case Intrinsic::read_volatile_register: {
1334 const MDString *RegName = cast<MDString>(
1335 Val: cast<MDNode>(
1336 Val: cast<MetadataAsValue>(Val: CB.getArgOperand(i: 0))->getMetadata())
1337 ->getOperand(I: 0));
1338 auto [Kind, RegIdx, NumRegs] =
1339 AMDGPU::parseAsmPhysRegName(TupleString: RegName->getString());
1340 if (Kind == 'a')
1341 Maximum.takeAssumedMaximum(Value: std::min(a: RegIdx + NumRegs, b: 256u));
1342
1343 return true;
1344 }
1345 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1346 // the nocallback attribute, so the AMDGPU attributor can conservatively
1347 // drop all implicitly-known inputs and AGPR allocation information. Make
1348 // sure we still infer that no implicit inputs are required and that the
1349 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1350 // function which requires AGPRs, so we need to check if the called
1351 // function has the "trap-func-name" attribute.
1352 case Intrinsic::trap:
1353 case Intrinsic::debugtrap:
1354 case Intrinsic::ubsantrap:
1355 return CB.hasFnAttr(Kind: Attribute::NoCallback) ||
1356 !CB.hasFnAttr(Kind: "trap-func-name");
1357 default:
1358 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1359 // required to use AGPRs.
1360 // Assume !nocallback intrinsics may call a function which requires
1361 // AGPRs.
1362 return CB.hasFnAttr(Kind: Attribute::NoCallback);
1363 }
1364
1365 // TODO: Handle callsite attributes
1366 auto *CBEdges = A.getAAFor<AACallEdges>(
1367 QueryingAA: *this, IRP: IRPosition::callsite_function(CB), DepClass: DepClassTy::REQUIRED);
1368 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1369 Maximum.indicatePessimisticFixpoint();
1370 return false;
1371 }
1372
1373 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1374 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1375 QueryingAA: *this, IRP: IRPosition::function(F: *PossibleCallee), DepClass: DepClassTy::REQUIRED);
1376 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1377 Maximum.indicatePessimisticFixpoint();
1378 return false;
1379 }
1380
1381 Maximum.takeAssumedMaximum(Value: CalleeInfo->getAssumed());
1382 }
1383
1384 return true;
1385 };
1386
1387 bool UsedAssumedInformation = false;
1388 if (!A.checkForAllCallLikeInstructions(Pred: CheckForMinAGPRAllocs, QueryingAA: *this,
1389 UsedAssumedInformation))
1390 return indicatePessimisticFixpoint();
1391
1392 return clampStateAndIndicateChange(S&: getState(), R: Maximum);
1393 }
1394
1395 ChangeStatus manifest(Attributor &A) override {
1396 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1397 SmallString<4> Buffer;
1398 raw_svector_ostream OS(Buffer);
1399 OS << getAssumed();
1400
1401 return A.manifestAttrs(
1402 IRP: getIRPosition(), DeducedAttrs: {Attribute::get(Context&: Ctx, Kind: "amdgpu-agpr-alloc", Val: OS.str())});
1403 }
1404
1405 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1406 const char *getIdAddr() const override { return &ID; }
1407
1408 /// This function should return true if the type of the \p AA is
1409 /// AAAMDGPUMinAGPRAllocs
1410 static bool classof(const AbstractAttribute *AA) {
1411 return (AA->getIdAddr() == &ID);
1412 }
1413
1414 static const char ID;
1415};
1416
1417const char AAAMDGPUMinAGPRAlloc::ID = 0;
1418
1419/// An abstract attribute to propagate the function attribute
1420/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1421struct AAAMDGPUClusterDims
1422 : public StateWrapper<BooleanState, AbstractAttribute> {
1423 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1424 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1425
1426 /// Create an abstract attribute view for the position \p IRP.
1427 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1428 Attributor &A);
1429
1430 /// See AbstractAttribute::getName().
1431 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1432
1433 /// See AbstractAttribute::getIdAddr().
1434 const char *getIdAddr() const override { return &ID; }
1435
1436 /// This function should return true if the type of the \p AA is
1437 /// AAAMDGPUClusterDims.
1438 static bool classof(const AbstractAttribute *AA) {
1439 return AA->getIdAddr() == &ID;
1440 }
1441
1442 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1443
1444 /// Unique ID (due to the unique address)
1445 static const char ID;
1446};
1447
1448const char AAAMDGPUClusterDims::ID = 0;
1449
1450struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1451 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1452 : AAAMDGPUClusterDims(IRP, A) {}
1453
1454 void initialize(Attributor &A) override {
1455 Function *F = getAssociatedFunction();
1456 assert(F && "empty associated function");
1457
1458 Attr = AMDGPU::ClusterDimsAttr::get(F: *F);
1459
1460 // No matter what a kernel function has, it is final.
1461 if (AMDGPU::isEntryFunctionCC(CC: F->getCallingConv())) {
1462 if (Attr.isUnknown())
1463 indicatePessimisticFixpoint();
1464 else
1465 indicateOptimisticFixpoint();
1466 }
1467 }
1468
1469 const std::string getAsStr(Attributor *A) const override {
1470 if (!getAssumed() || Attr.isUnknown())
1471 return "unknown";
1472 if (Attr.isNoCluster())
1473 return "no";
1474 if (Attr.isVariableDims())
1475 return "variable";
1476 return Attr.to_string();
1477 }
1478
1479 void trackStatistics() const override {}
1480
1481 ChangeStatus updateImpl(Attributor &A) override {
1482 auto OldState = Attr;
1483
1484 auto CheckCallSite = [&](AbstractCallSite CS) {
1485 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1486 QueryingAA: *this, IRP: IRPosition::function(F: *CS.getInstruction()->getFunction()),
1487 DepClass: DepClassTy::REQUIRED);
1488 if (!CallerAA || !CallerAA->isValidState())
1489 return false;
1490
1491 return merge(Other: CallerAA->getClusterDims());
1492 };
1493
1494 bool UsedAssumedInformation = false;
1495 if (!A.checkForAllCallSites(Pred: CheckCallSite, QueryingAA: *this,
1496 /*RequireAllCallSites=*/true,
1497 UsedAssumedInformation))
1498 return indicatePessimisticFixpoint();
1499
1500 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1501 }
1502
1503 ChangeStatus manifest(Attributor &A) override {
1504 if (Attr.isUnknown())
1505 return ChangeStatus::UNCHANGED;
1506 return A.manifestAttrs(
1507 IRP: getIRPosition(),
1508 DeducedAttrs: {Attribute::get(Context&: getAssociatedFunction()->getContext(), Kind: AttrName,
1509 Val: Attr.to_string())},
1510 /*ForceReplace=*/true);
1511 }
1512
1513 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1514 return Attr;
1515 }
1516
1517private:
1518 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1519 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1520 // propagation.
1521 if (Attr.isUnknown() && Other.isUnknown())
1522 return true;
1523
1524 // Case 2: The other is determined, but we are unknown yet, we simply take
1525 // the other's value.
1526 if (Attr.isUnknown()) {
1527 Attr = Other;
1528 return true;
1529 }
1530
1531 // Case 3: We are determined but the other is unknown yet, we simply keep
1532 // everything unchanged.
1533 if (Other.isUnknown())
1534 return true;
1535
1536 // After this point, both are determined.
1537
1538 // Case 4: If they are same, we do nothing.
1539 if (Attr == Other)
1540 return true;
1541
1542 // Now they are not same.
1543
1544 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1545 // would hold), then it is unknown whether cluster will be used, and the
1546 // state is final, unlike case 1.
1547 if (Attr.isNoCluster() || Other.isNoCluster()) {
1548 Attr.setUnknown();
1549 return false;
1550 }
1551
1552 // Case 6: Both of us use cluster, but the dims are different, so the result
1553 // is, cluster is used, but we just don't have a fixed dims.
1554 Attr.setVariableDims();
1555 return true;
1556 }
1557
1558 AMDGPU::ClusterDimsAttr Attr;
1559
1560 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1561};
1562
1563AAAMDGPUClusterDims &
1564AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1565 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1566 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1567 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1568}
1569
1570static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1571 AMDGPUAttributorOptions Options,
1572 ThinOrFullLTOPhase LTOPhase) {
1573 SetVector<Function *> Functions;
1574 for (Function &F : M) {
1575 if (!F.isIntrinsic())
1576 Functions.insert(X: &F);
1577 }
1578
1579 CallGraphUpdater CGUpdater;
1580 BumpPtrAllocator Allocator;
1581 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1582 DenseSet<const char *> Allowed(
1583 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1584 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1585 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1586 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1587 &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1588 &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1589 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1590
1591 AttributorConfig AC(CGUpdater);
1592 AC.IsClosedWorldModule = Options.IsClosedWorld;
1593 AC.Allowed = &Allowed;
1594 AC.IsModulePass = true;
1595 AC.DefaultInitializeLiveInternals = false;
1596 AC.IndirectCalleeSpecializationCallback =
1597 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1598 Function &Callee, unsigned NumAssumedCallees) {
1599 return !AMDGPU::isEntryFunctionCC(CC: Callee.getCallingConv()) &&
1600 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1601 };
1602 AC.IPOAmendableCB = [](const Function &F) {
1603 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1604 };
1605
1606 Attributor A(Functions, InfoCache, AC);
1607
1608 LLVM_DEBUG({
1609 StringRef LTOPhaseStr = to_string(LTOPhase);
1610 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1611 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1612 << (AC.IsClosedWorldModule ? "" : "not ")
1613 << "assumed to be a closed world.\n";
1614 });
1615
1616 for (auto *F : Functions) {
1617 A.getOrCreateAAFor<AAAMDAttributes>(IRP: IRPosition::function(F: *F));
1618 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRP: IRPosition::function(F: *F));
1619 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRP: IRPosition::function(F: *F));
1620 CallingConv::ID CC = F->getCallingConv();
1621 if (!AMDGPU::isEntryFunctionCC(CC)) {
1622 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRP: IRPosition::function(F: *F));
1623 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRP: IRPosition::function(F: *F));
1624 }
1625
1626 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F: *F);
1627 if (!F->isDeclaration() && ST.hasClusters())
1628 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRP: IRPosition::function(F: *F));
1629
1630 if (ST.hasGFX90AInsts())
1631 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRP: IRPosition::function(F: *F));
1632
1633 for (auto &I : instructions(F)) {
1634 Value *Ptr = nullptr;
1635 if (auto *LI = dyn_cast<LoadInst>(Val: &I))
1636 Ptr = LI->getPointerOperand();
1637 else if (auto *SI = dyn_cast<StoreInst>(Val: &I))
1638 Ptr = SI->getPointerOperand();
1639 else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I))
1640 Ptr = RMW->getPointerOperand();
1641 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I))
1642 Ptr = CmpX->getPointerOperand();
1643
1644 if (Ptr) {
1645 A.getOrCreateAAFor<AAAddressSpace>(IRP: IRPosition::value(V: *Ptr));
1646 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRP: IRPosition::value(V: *Ptr));
1647 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Ptr)) {
1648 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1649 A.getOrCreateAAFor<AAAlign>(IRP: IRPosition::value(V: *Ptr));
1650 }
1651 }
1652 }
1653 }
1654
1655 return A.run() == ChangeStatus::CHANGED;
1656}
1657} // namespace
1658
1659PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
1660 ModuleAnalysisManager &AM) {
1661
1662 FunctionAnalysisManager &FAM =
1663 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
1664 AnalysisGetter AG(FAM);
1665
1666 // TODO: Probably preserves CFG
1667 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1668 : PreservedAnalyses::all();
1669}
1670