1//===- AMDGPUWaitcntUtils.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUWaitcntUtils.h"
10#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11#include "Utils/AMDGPUBaseInfo.h"
12
13namespace llvm::AMDGPU {
14
15iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
16 return enum_seq(Begin: LOAD_CNT, End: MaxCounter);
17}
18
19StringLiteral getInstCounterName(InstCounterType T) {
20 switch (T) {
21 case LOAD_CNT:
22 return "LOAD_CNT";
23 case DS_CNT:
24 return "DS_CNT";
25 case EXP_CNT:
26 return "EXP_CNT";
27 case STORE_CNT:
28 return "STORE_CNT";
29 case SAMPLE_CNT:
30 return "SAMPLE_CNT";
31 case BVH_CNT:
32 return "BVH_CNT";
33 case KM_CNT:
34 return "KM_CNT";
35 case X_CNT:
36 return "X_CNT";
37 case ASYNC_CNT:
38 return "ASYNC_CNT";
39 case TENSOR_CNT:
40 return "TENSOR_CNT";
41 case VA_VDST:
42 return "VA_VDST";
43 case VM_VSRC:
44 return "VM_VSRC";
45 case NUM_INST_CNTS:
46 return "NUM_INST_CNTS";
47 }
48 llvm_unreachable("Unhandled InstCounterType");
49}
50
51HardwareLimits::HardwareLimits(const IsaVersion &IV) {
52 bool HasExtendedWaitCounts = IV.Major >= 12;
53 if (HasExtendedWaitCounts) {
54 LoadcntMax = getLoadcntBitMask(Version: IV);
55 DscntMax = getDscntBitMask(Version: IV);
56 } else {
57 LoadcntMax = getVmcntBitMask(Version: IV);
58 DscntMax = getLgkmcntBitMask(Version: IV);
59 }
60 ExpcntMax = getExpcntBitMask(Version: IV);
61 StorecntMax = getStorecntBitMask(Version: IV);
62 SamplecntMax = getSamplecntBitMask(Version: IV);
63 BvhcntMax = getBvhcntBitMask(Version: IV);
64 KmcntMax = getKmcntBitMask(Version: IV);
65 XcntMax = getXcntBitMask(Version: IV);
66 AsyncMax = getAsynccntBitMask(Version: IV);
67 VaVdstMax = DepCtr::getVaVdstBitMask();
68 VmVsrcMax = DepCtr::getVmVsrcBitMask();
69}
70
71unsigned HardwareLimits::get(InstCounterType T) const {
72 switch (T) {
73 case AMDGPU::LOAD_CNT:
74 return LoadcntMax;
75 case AMDGPU::DS_CNT:
76 return DscntMax;
77 case AMDGPU::EXP_CNT:
78 return ExpcntMax;
79 case AMDGPU::STORE_CNT:
80 return StorecntMax;
81 case AMDGPU::SAMPLE_CNT:
82 return SamplecntMax;
83 case AMDGPU::BVH_CNT:
84 return BvhcntMax;
85 case AMDGPU::KM_CNT:
86 return KmcntMax;
87 case AMDGPU::X_CNT:
88 return XcntMax;
89 case AMDGPU::VA_VDST:
90 return VaVdstMax;
91 case AMDGPU::VM_VSRC:
92 return VmVsrcMax;
93 default:
94 return 0;
95 }
96}
97
98#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
99void Waitcnt::dump() const { dbgs() << *this << '\n'; }
100#endif
101
102Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
103 Waitcnt Decoded;
104 Decoded.set(T: LOAD_CNT, Val: decodeVmcnt(Version, Waitcnt: Encoded));
105 Decoded.set(T: EXP_CNT, Val: decodeExpcnt(Version, Waitcnt: Encoded));
106 Decoded.set(T: DS_CNT, Val: decodeLgkmcnt(Version, Waitcnt: Encoded));
107 return Decoded;
108}
109
110unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
111 return encodeWaitcnt(Version, Vmcnt: Decoded.get(T: LOAD_CNT), Expcnt: Decoded.get(T: EXP_CNT),
112 Lgkmcnt: Decoded.get(T: DS_CNT));
113}
114
115Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
116 Waitcnt Decoded;
117 Decoded.set(T: LOAD_CNT, Val: decodeLoadcnt(Version, Waitcnt: LoadcntDscnt));
118 Decoded.set(T: DS_CNT, Val: decodeDscnt(Version, Waitcnt: LoadcntDscnt));
119 return Decoded;
120}
121
122Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
123 Waitcnt Decoded;
124 Decoded.set(T: STORE_CNT, Val: decodeStorecnt(Version, Waitcnt: StorecntDscnt));
125 Decoded.set(T: DS_CNT, Val: decodeDscnt(Version, Waitcnt: StorecntDscnt));
126 return Decoded;
127}
128
129unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
130 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.get(T: LOAD_CNT),
131 Dscnt: Decoded.get(T: DS_CNT));
132}
133
134unsigned encodeStorecntDscnt(const IsaVersion &Version,
135 const Waitcnt &Decoded) {
136 return encodeStorecntDscnt(Version, Storecnt: Decoded.get(T: STORE_CNT),
137 Dscnt: Decoded.get(T: DS_CNT));
138}
139
140std::optional<AMDGPU::InstCounterType> counterTypeForInstr(unsigned Opcode) {
141 switch (Opcode) {
142 case AMDGPU::S_WAIT_LOADCNT:
143 return AMDGPU::LOAD_CNT;
144 case AMDGPU::S_WAIT_EXPCNT:
145 return AMDGPU::EXP_CNT;
146 case AMDGPU::S_WAIT_STORECNT:
147 return AMDGPU::STORE_CNT;
148 case AMDGPU::S_WAIT_SAMPLECNT:
149 return AMDGPU::SAMPLE_CNT;
150 case AMDGPU::S_WAIT_BVHCNT:
151 return AMDGPU::BVH_CNT;
152 case AMDGPU::S_WAIT_DSCNT:
153 return AMDGPU::DS_CNT;
154 case AMDGPU::S_WAIT_KMCNT:
155 return AMDGPU::KM_CNT;
156 case AMDGPU::S_WAIT_XCNT:
157 return AMDGPU::X_CNT;
158 case AMDGPU::S_WAIT_ASYNCCNT:
159 return AMDGPU::ASYNC_CNT;
160 case AMDGPU::S_WAIT_TENSORCNT:
161 return AMDGPU::TENSOR_CNT;
162 default:
163 return {};
164 }
165}
166
167} // namespace llvm::AMDGPU
168