1//===- AMDGPUHWEvents.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUHWEvents.h"
10#include "GCNSubtarget.h"
11#include "SIInstrInfo.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/Support/Debug.h"
14#include "llvm/Support/raw_ostream.h"
15
16namespace llvm {
17namespace AMDGPU {
18
19#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
20LLVM_DUMP_METHOD void HWEvents::dump() const { dbgs() << *this << "\n"; }
21#endif
22
23static HWEvents getExpertSchedulingEventType(const MachineInstr &Inst,
24 const SIInstrInfo &TII) {
25 if (TII.isVALU(MI: Inst, /*AllowLDSDMA=*/true) && !SIInstrInfo::isLDSDMA(MI: Inst)) {
26 // Core/Side-, DP-, XDL- and TRANS-MACC VALU instructions complete
27 // out-of-order with respect to each other, so each of these classes
28 // has its own event.
29
30 if (TII.isXDL(MI: Inst))
31 return HWEvents::VGPR_XDL_WRITE;
32
33 if (TII.isTRANS(MI: Inst))
34 return HWEvents::VGPR_TRANS_WRITE;
35
36 if (AMDGPU::isDPMACCInstruction(Opc: Inst.getOpcode()))
37 return HWEvents::VGPR_DPMACC_WRITE;
38
39 return HWEvents::VGPR_CSMACC_WRITE;
40 }
41
42 // FLAT and LDS instructions may read their VGPR sources out-of-order
43 // with respect to each other and all other VMEM instructions, so
44 // each of these also has a separate event.
45
46 if (TII.isFLAT(MI: Inst))
47 return HWEvents::VGPR_FLAT_READ;
48
49 if (TII.isDS(MI: Inst))
50 return HWEvents::VGPR_LDS_READ;
51
52 if (TII.isVMEM(MI: Inst) || TII.isVIMAGE(MI: Inst) || TII.isVSAMPLE(MI: Inst))
53 return HWEvents::VGPR_VMEM_READ;
54
55 // Otherwise, no hazard.
56 return HWEvents::NONE;
57}
58
59static HWEvents getVmemHWEvent(const MachineInstr &Inst, const GCNSubtarget &ST,
60 const SIInstrInfo &TII) {
61 switch (Inst.getOpcode()) {
62 // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
63 case AMDGPU::GLOBAL_INV:
64 return HWEvents::GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't
65 // write VGPRs
66 case AMDGPU::GLOBAL_WB:
67 case AMDGPU::GLOBAL_WBINV:
68 return HWEvents::VMEM_WRITE_ACCESS; // tracked using storecnt
69 default:
70 break;
71 }
72
73 assert(SIInstrInfo::isVMEM(Inst));
74 // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
75 // these should use VM_CNT.
76 if (SIInstrInfo::mayWriteLDSThroughDMA(MI: Inst))
77 return HWEvents::VMEM_READ_ACCESS;
78
79 if (Inst.mayStore() &&
80 (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(MI: Inst))) {
81 if (TII.mayAccessScratch(MI: Inst))
82 return HWEvents::SCRATCH_WRITE_ACCESS;
83 return HWEvents::VMEM_WRITE_ACCESS;
84 }
85 if (!ST.hasExtendedWaitCounts() || SIInstrInfo::isFLAT(MI: Inst))
86 return HWEvents::VMEM_READ_ACCESS;
87
88 if (SIInstrInfo::isImage(MI: Inst)) {
89 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: Inst.getOpcode());
90 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
91 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
92
93 if (BaseInfo->BVH)
94 return HWEvents::VMEM_BVH_READ_ACCESS;
95
96 // We have to make an additional check for isVSAMPLE here since some
97 // instructions don't have a sampler, but are still classified as sampler
98 // instructions for the purposes of e.g. waitcnt.
99 if (BaseInfo->Sampler || BaseInfo->MSAA || SIInstrInfo::isVSAMPLE(MI: Inst))
100 return HWEvents::VMEM_SAMPLER_READ_ACCESS;
101 }
102
103 return HWEvents::VMEM_READ_ACCESS;
104}
105
106static HWEvents getEventsForImpl(const MachineInstr &Inst,
107 const GCNSubtarget &ST,
108 const SIInstrInfo &TII) {
109 if (TII.isDS(MI: Inst) && TII.usesLGKM_CNT(MI: Inst)) {
110 if (TII.isAlwaysGDS(Opcode: Inst.getOpcode()) ||
111 TII.hasModifiersSet(MI: Inst, OpName: AMDGPU::OpName::gds))
112 return HWEvents::GDS_ACCESS | HWEvents::GDS_GPR_LOCK;
113
114 return HWEvents::LDS_ACCESS;
115 }
116
117 if (TII.isFLAT(MI: Inst)) {
118 if (SIInstrInfo::isGFX12CacheInvOrWBInst(Opc: Inst.getOpcode()))
119 return getVmemHWEvent(Inst, ST, TII);
120
121 assert(Inst.mayLoadOrStore());
122 HWEvents E = HWEvents::NONE;
123 if (TII.mayAccessVMEMThroughFlat(MI: Inst)) {
124 if (ST.hasWaitXcnt())
125 E |= HWEvents::VMEM_GROUP;
126 E |= getVmemHWEvent(Inst, ST, TII);
127 }
128
129 if (TII.mayAccessLDSThroughFlat(MI: Inst))
130 E |= HWEvents::LDS_ACCESS;
131
132 if (SIInstrInfo::usesASYNC_CNT(MI: Inst))
133 E |= HWEvents::ASYNC_ACCESS;
134
135 return E;
136 }
137
138 if (SIInstrInfo::usesTENSOR_CNT(MI: Inst))
139 return HWEvents::TENSOR_ACCESS;
140
141 if (SIInstrInfo::isVMEM(MI: Inst) &&
142 (!AMDGPU::getMUBUFIsBufferInv(Opc: Inst.getOpcode()) ||
143 Inst.getOpcode() == AMDGPU::BUFFER_WBL2)) {
144 // BUFFER_WBL2 is included here because unlike invalidates, has to be
145 // followed "S_WAITCNT vmcnt(0)" is needed after to ensure the writeback has
146 // completed.
147 HWEvents E = getVmemHWEvent(Inst, ST, TII);
148 if (ST.hasWaitXcnt())
149 E |= HWEvents::VMEM_GROUP;
150 if (ST.vmemWriteNeedsExpWaitcnt() &&
151 (Inst.mayStore() || SIInstrInfo::isAtomicRet(MI: Inst)))
152 E |= HWEvents::VMW_GPR_LOCK;
153
154 return E;
155 }
156
157 if (TII.isSMRD(MI: Inst)) {
158 if (ST.hasWaitXcnt())
159 return HWEvents::SMEM_GROUP | HWEvents::SMEM_ACCESS;
160 return HWEvents::SMEM_ACCESS;
161 }
162
163 if (SIInstrInfo::isLDSDIR(MI: Inst)) {
164 return HWEvents::EXP_LDS_ACCESS;
165 }
166
167 if (SIInstrInfo::isEXP(MI: Inst)) {
168 unsigned Imm = TII.getNamedOperand(MI: Inst, OperandName: AMDGPU::OpName::tgt)->getImm();
169 if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
170 return HWEvents::EXP_PARAM_ACCESS;
171 if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
172 return HWEvents::EXP_POS_ACCESS;
173 return HWEvents::EXP_GPR_LOCK;
174 }
175
176 if (SIInstrInfo::isSBarrierSCCWrite(Opcode: Inst.getOpcode())) {
177 return HWEvents::SCC_WRITE;
178 }
179
180 switch (Inst.getOpcode()) {
181 case AMDGPU::S_SENDMSG:
182 case AMDGPU::S_SENDMSG_RTN_B32:
183 case AMDGPU::S_SENDMSG_RTN_B64:
184 case AMDGPU::S_SENDMSGHALT:
185 return HWEvents::SQ_MESSAGE;
186 case AMDGPU::S_MEMTIME:
187 case AMDGPU::S_MEMREALTIME:
188 case AMDGPU::S_GET_BARRIER_STATE_M0:
189 case AMDGPU::S_GET_BARRIER_STATE_IMM:
190 return HWEvents::SMEM_ACCESS;
191 }
192
193 return HWEvents::NONE;
194}
195
196HWEvents getEventsFor(const MachineInstr &Inst, const GCNSubtarget &ST,
197 bool IsExpertMode) {
198 const SIInstrInfo &TII = *ST.getInstrInfo();
199
200 if (IsExpertMode)
201 return getEventsForImpl(Inst, ST, TII) |
202 getExpertSchedulingEventType(Inst, TII);
203 return getEventsForImpl(Inst, ST, TII);
204}
205} // namespace AMDGPU
206
207raw_ostream &operator<<(raw_ostream &OS, AMDGPU::HWEvents Events) {
208 ListSeparator LS(" | ");
209#define AMDGPU_HW_EVENT(E, V) \
210 if (Events & AMDGPU::HWEvents::E) \
211 OS << LS << #E << " ";
212#include "AMDGPUHWEvents.def"
213 return OS;
214}
215
216} // namespace llvm
217