| 1 | //===- AMDGPUWaitcntUtils.cpp ---------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AMDGPUWaitcntUtils.h" |
| 10 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 11 | #include "Utils/AMDGPUBaseInfo.h" |
| 12 | |
| 13 | namespace llvm::AMDGPU { |
| 14 | |
| 15 | iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) { |
| 16 | return enum_seq(Begin: LOAD_CNT, End: MaxCounter); |
| 17 | } |
| 18 | |
| 19 | StringLiteral getInstCounterName(InstCounterType T) { |
| 20 | switch (T) { |
| 21 | case LOAD_CNT: |
| 22 | return "LOAD_CNT" ; |
| 23 | case DS_CNT: |
| 24 | return "DS_CNT" ; |
| 25 | case EXP_CNT: |
| 26 | return "EXP_CNT" ; |
| 27 | case STORE_CNT: |
| 28 | return "STORE_CNT" ; |
| 29 | case SAMPLE_CNT: |
| 30 | return "SAMPLE_CNT" ; |
| 31 | case BVH_CNT: |
| 32 | return "BVH_CNT" ; |
| 33 | case KM_CNT: |
| 34 | return "KM_CNT" ; |
| 35 | case X_CNT: |
| 36 | return "X_CNT" ; |
| 37 | case ASYNC_CNT: |
| 38 | return "ASYNC_CNT" ; |
| 39 | case TENSOR_CNT: |
| 40 | return "TENSOR_CNT" ; |
| 41 | case VA_VDST: |
| 42 | return "VA_VDST" ; |
| 43 | case VM_VSRC: |
| 44 | return "VM_VSRC" ; |
| 45 | case NUM_INST_CNTS: |
| 46 | return "NUM_INST_CNTS" ; |
| 47 | } |
| 48 | llvm_unreachable("Unhandled InstCounterType" ); |
| 49 | } |
| 50 | |
| 51 | HardwareLimits::HardwareLimits(const IsaVersion &IV) { |
| 52 | bool HasExtendedWaitCounts = IV.Major >= 12; |
| 53 | if (HasExtendedWaitCounts) { |
| 54 | LoadcntMax = getLoadcntBitMask(Version: IV); |
| 55 | DscntMax = getDscntBitMask(Version: IV); |
| 56 | } else { |
| 57 | LoadcntMax = getVmcntBitMask(Version: IV); |
| 58 | DscntMax = getLgkmcntBitMask(Version: IV); |
| 59 | } |
| 60 | ExpcntMax = getExpcntBitMask(Version: IV); |
| 61 | StorecntMax = getStorecntBitMask(Version: IV); |
| 62 | SamplecntMax = getSamplecntBitMask(Version: IV); |
| 63 | BvhcntMax = getBvhcntBitMask(Version: IV); |
| 64 | KmcntMax = getKmcntBitMask(Version: IV); |
| 65 | XcntMax = getXcntBitMask(Version: IV); |
| 66 | AsyncMax = getAsynccntBitMask(Version: IV); |
| 67 | VaVdstMax = DepCtr::getVaVdstBitMask(); |
| 68 | VmVsrcMax = DepCtr::getVmVsrcBitMask(); |
| 69 | } |
| 70 | |
| 71 | unsigned HardwareLimits::get(InstCounterType T) const { |
| 72 | switch (T) { |
| 73 | case AMDGPU::LOAD_CNT: |
| 74 | return LoadcntMax; |
| 75 | case AMDGPU::DS_CNT: |
| 76 | return DscntMax; |
| 77 | case AMDGPU::EXP_CNT: |
| 78 | return ExpcntMax; |
| 79 | case AMDGPU::STORE_CNT: |
| 80 | return StorecntMax; |
| 81 | case AMDGPU::SAMPLE_CNT: |
| 82 | return SamplecntMax; |
| 83 | case AMDGPU::BVH_CNT: |
| 84 | return BvhcntMax; |
| 85 | case AMDGPU::KM_CNT: |
| 86 | return KmcntMax; |
| 87 | case AMDGPU::X_CNT: |
| 88 | return XcntMax; |
| 89 | case AMDGPU::VA_VDST: |
| 90 | return VaVdstMax; |
| 91 | case AMDGPU::VM_VSRC: |
| 92 | return VmVsrcMax; |
| 93 | default: |
| 94 | return 0; |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 99 | void Waitcnt::dump() const { dbgs() << *this << '\n'; } |
| 100 | #endif |
| 101 | |
| 102 | Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { |
| 103 | Waitcnt Decoded; |
| 104 | Decoded.set(T: LOAD_CNT, Val: decodeVmcnt(Version, Waitcnt: Encoded)); |
| 105 | Decoded.set(T: EXP_CNT, Val: decodeExpcnt(Version, Waitcnt: Encoded)); |
| 106 | Decoded.set(T: DS_CNT, Val: decodeLgkmcnt(Version, Waitcnt: Encoded)); |
| 107 | return Decoded; |
| 108 | } |
| 109 | |
| 110 | unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
| 111 | return encodeWaitcnt(Version, Vmcnt: Decoded.get(T: LOAD_CNT), Expcnt: Decoded.get(T: EXP_CNT), |
| 112 | Lgkmcnt: Decoded.get(T: DS_CNT)); |
| 113 | } |
| 114 | |
| 115 | Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { |
| 116 | Waitcnt Decoded; |
| 117 | Decoded.set(T: LOAD_CNT, Val: decodeLoadcnt(Version, Waitcnt: LoadcntDscnt)); |
| 118 | Decoded.set(T: DS_CNT, Val: decodeDscnt(Version, Waitcnt: LoadcntDscnt)); |
| 119 | return Decoded; |
| 120 | } |
| 121 | |
| 122 | Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { |
| 123 | Waitcnt Decoded; |
| 124 | Decoded.set(T: STORE_CNT, Val: decodeStorecnt(Version, Waitcnt: StorecntDscnt)); |
| 125 | Decoded.set(T: DS_CNT, Val: decodeDscnt(Version, Waitcnt: StorecntDscnt)); |
| 126 | return Decoded; |
| 127 | } |
| 128 | |
| 129 | unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
| 130 | return encodeLoadcntDscnt(Version, Loadcnt: Decoded.get(T: LOAD_CNT), |
| 131 | Dscnt: Decoded.get(T: DS_CNT)); |
| 132 | } |
| 133 | |
| 134 | unsigned encodeStorecntDscnt(const IsaVersion &Version, |
| 135 | const Waitcnt &Decoded) { |
| 136 | return encodeStorecntDscnt(Version, Storecnt: Decoded.get(T: STORE_CNT), |
| 137 | Dscnt: Decoded.get(T: DS_CNT)); |
| 138 | } |
| 139 | |
| 140 | std::optional<AMDGPU::InstCounterType> counterTypeForInstr(unsigned Opcode) { |
| 141 | switch (Opcode) { |
| 142 | case AMDGPU::S_WAIT_LOADCNT: |
| 143 | return AMDGPU::LOAD_CNT; |
| 144 | case AMDGPU::S_WAIT_EXPCNT: |
| 145 | return AMDGPU::EXP_CNT; |
| 146 | case AMDGPU::S_WAIT_STORECNT: |
| 147 | return AMDGPU::STORE_CNT; |
| 148 | case AMDGPU::S_WAIT_SAMPLECNT: |
| 149 | return AMDGPU::SAMPLE_CNT; |
| 150 | case AMDGPU::S_WAIT_BVHCNT: |
| 151 | return AMDGPU::BVH_CNT; |
| 152 | case AMDGPU::S_WAIT_DSCNT: |
| 153 | return AMDGPU::DS_CNT; |
| 154 | case AMDGPU::S_WAIT_KMCNT: |
| 155 | return AMDGPU::KM_CNT; |
| 156 | case AMDGPU::S_WAIT_XCNT: |
| 157 | return AMDGPU::X_CNT; |
| 158 | case AMDGPU::S_WAIT_ASYNCCNT: |
| 159 | return AMDGPU::ASYNC_CNT; |
| 160 | case AMDGPU::S_WAIT_TENSORCNT: |
| 161 | return AMDGPU::TENSOR_CNT; |
| 162 | default: |
| 163 | return {}; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | } // namespace llvm::AMDGPU |
| 168 | |