AMDGPUHWEvents.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUHWEvents.cpp]

1	//===- AMDGPUHWEvents.cpp ---------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDGPUHWEvents.h"
10	#include "GCNSubtarget.h"
11	#include "SIInstrInfo.h"
12	#include "llvm/ADT/StringExtras.h"
13	#include "llvm/Support/Debug.h"
14	#include "llvm/Support/raw_ostream.h"
15
16	namespace llvm {
17	namespace AMDGPU {
18
19	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
20	LLVM_DUMP_METHOD void HWEvents::dump() const { dbgs() << *this << "\n"; }
21	#endif
22
23	static HWEvents getExpertSchedulingEventType(const MachineInstr &Inst,
24	const SIInstrInfo &TII) {
25	if (TII.isVALU(MI: Inst, /AllowLDSDMA=/false)) {
26	// Core/Side-, DP-, XDL- and TRANS-MACC VALU instructions complete
27	// out-of-order with respect to each other, so each of these classes
28	// has its own event.
29
30	if (TII.isXDL(MI: Inst))
31	return HWEvents::VGPR_XDL_READ \| HWEvents::VGPR_XDL_WRITE;
32
33	if (TII.isTRANS(MI: Inst))
34	return HWEvents::VGPR_TRANS_READ \| HWEvents::VGPR_TRANS_WRITE;
35
36	if (AMDGPU::isDPMACCInstruction(Opc: Inst.getOpcode()))
37	return HWEvents::VGPR_DPMACC_READ \| HWEvents::VGPR_DPMACC_WRITE;
38
39	return HWEvents::VGPR_CSMACC_READ \| HWEvents::VGPR_CSMACC_WRITE;
40	}
41
42	// FLAT and LDS instructions may read their VGPR sources out-of-order
43	// with respect to each other and all other VMEM instructions, so
44	// each of these also has a separate event.
45
46	if (TII.isFLAT(MI: Inst))
47	return HWEvents::VGPR_FLAT_READ;
48
49	if (TII.isDS(MI: Inst))
50	return HWEvents::VGPR_LDS_READ;
51
52	if (TII.isVMEM(MI: Inst) \|\| TII.isVIMAGE(MI: Inst) \|\| TII.isVSAMPLE(MI: Inst))
53	return HWEvents::VGPR_VMEM_READ;
54
55	// Otherwise, no hazard.
56	return HWEvents::NONE;
57	}
58
59	HWEvents getSimplifiedVMEMEventsFor(const MachineInstr &Inst,
60	const SIInstrInfo &TII) {
61	switch (Inst.getOpcode()) {
62	// FIXME: GLOBAL_INV needs to be tracked with xcnt too.
63	case AMDGPU::GLOBAL_INV:
64	return HWEvents::GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't
65	// write VGPRs
66	case AMDGPU::GLOBAL_WB:
67	case AMDGPU::GLOBAL_WBINV:
68	return HWEvents::VMEM_WRITE_ACCESS; // tracked using storecnt
69	default:
70	break;
71	}
72
73	assert(SIInstrInfo::isVMEM(Inst));
74	// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
75	// these should use VM_CNT.
76	if (SIInstrInfo::mayWriteLDSThroughDMA(MI: Inst))
77	return HWEvents::VMEM_READ_ACCESS;
78
79	if (Inst.mayStore() &&
80	(!Inst.mayLoad() \|\| SIInstrInfo::isAtomicNoRet(MI: Inst))) {
81	if (TII.mayAccessScratch(MI: Inst))
82	return HWEvents::SCRATCH_WRITE_ACCESS;
83	return HWEvents::VMEM_WRITE_ACCESS;
84	}
85
86	if (SIInstrInfo::isFLAT(MI: Inst))
87	return HWEvents::VMEM_READ_ACCESS;
88
89	if (SIInstrInfo::isImage(MI: Inst)) {
90	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: Inst.getOpcode());
91	const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
92	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
93
94	if (BaseInfo->BVH)
95	return HWEvents::VMEM_BVH_READ_ACCESS;
96
97	// We have to make an additional check for isVSAMPLE here since some
98	// instructions don't have a sampler, but are still classified as sampler
99	// instructions for the purposes of e.g. waitcnt.
100	if (BaseInfo->Sampler \|\| BaseInfo->MSAA \|\| SIInstrInfo::isVSAMPLE(MI: Inst))
101	return HWEvents::VMEM_SAMPLER_READ_ACCESS;
102	}
103
104	return HWEvents::VMEM_READ_ACCESS;
105	}
106
107	static HWEvents getEventsForImpl(const MachineInstr &Inst,
108	const GCNSubtarget &ST, const SIInstrInfo &TII,
109	bool TgSplit) {
110	if (TII.isDS(MI: Inst) && TII.usesLGKM_CNT(MI: Inst)) {
111	if (TII.isAlwaysGDS(Opcode: Inst.getOpcode()) \|\|
112	TII.hasModifiersSet(MI: Inst, OpName: AMDGPU::OpName::gds))
113	return HWEvents::GDS_ACCESS \| HWEvents::GDS_GPR_LOCK;
114
115	return HWEvents::LDS_ACCESS;
116	}
117
118	if (TII.isFLAT(MI: Inst)) {
119	if (SIInstrInfo::isGFX12CacheInvOrWBInst(Opc: Inst.getOpcode()))
120	return getSimplifiedVMEMEventsFor(Inst, TII);
121
122	assert(Inst.mayLoadOrStore());
123	HWEvents E = HWEvents::NONE;
124	if (TII.mayAccessVMEMThroughFlat(MI: Inst)) {
125	if (ST.hasWaitXcnt())
126	E \|= HWEvents::VMEM_GROUP;
127	E \|= getSimplifiedVMEMEventsFor(Inst, TII);
128	}
129
130	if (TII.mayAccessLDSThroughFlat(MI: Inst, TgSplit))
131	E \|= HWEvents::LDS_ACCESS;
132
133	if (SIInstrInfo::usesASYNC_CNT(MI: Inst))
134	E \|= HWEvents::ASYNC_ACCESS;
135
136	return E;
137	}
138
139	if (SIInstrInfo::usesTENSOR_CNT(MI: Inst))
140	return HWEvents::TENSOR_ACCESS;
141
142	if (SIInstrInfo::isVMEM(MI: Inst) &&
143	(!AMDGPU::getMUBUFIsBufferInv(Opc: Inst.getOpcode()) \|\|
144	Inst.getOpcode() == AMDGPU::BUFFER_WBL2)) {
145	// BUFFER_WBL2 is included here because unlike invalidates, has to be
146	// followed "S_WAITCNT vmcnt(0)" is needed after to ensure the writeback has
147	// completed.
148	HWEvents E = getSimplifiedVMEMEventsFor(Inst, TII);
149	if (ST.hasWaitXcnt())
150	E \|= HWEvents::VMEM_GROUP;
151	if (ST.vmemWriteNeedsExpWaitcnt() &&
152	(Inst.mayStore() \|\| SIInstrInfo::isAtomicRet(MI: Inst)))
153	E \|= HWEvents::VMW_GPR_LOCK;
154
155	return E;
156	}
157
158	if (TII.isSMRD(MI: Inst)) {
159	if (ST.hasWaitXcnt())
160	return HWEvents::SMEM_GROUP \| HWEvents::SMEM_ACCESS;
161	return HWEvents::SMEM_ACCESS;
162	}
163
164	if (SIInstrInfo::isLDSDIR(MI: Inst)) {
165	return HWEvents::EXP_LDS_ACCESS;
166	}
167
168	if (SIInstrInfo::isEXP(MI: Inst)) {
169	unsigned Imm = TII.getNamedOperand(MI: Inst, OperandName: AMDGPU::OpName::tgt)->getImm();
170	if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
171	return HWEvents::EXP_PARAM_ACCESS;
172	if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
173	return HWEvents::EXP_POS_ACCESS;
174	return HWEvents::EXP_GPR_LOCK;
175	}
176
177	if (SIInstrInfo::isSBarrierSCCWrite(Opcode: Inst.getOpcode())) {
178	return HWEvents::SCC_WRITE;
179	}
180
181	switch (Inst.getOpcode()) {
182	case AMDGPU::S_SENDMSG:
183	case AMDGPU::S_SENDMSG_RTN_B32:
184	case AMDGPU::S_SENDMSG_RTN_B64:
185	case AMDGPU::S_SENDMSGHALT:
186	return HWEvents::SQ_MESSAGE;
187	case AMDGPU::S_MEMTIME:
188	case AMDGPU::S_MEMREALTIME:
189	case AMDGPU::S_GET_BARRIER_STATE_M0:
190	case AMDGPU::S_GET_BARRIER_STATE_IMM:
191	return HWEvents::SMEM_ACCESS;
192	}
193
194	return HWEvents::NONE;
195	}
196
197	HWEvents getEventsFor(const MachineInstr &Inst, const GCNSubtarget &ST,
198	bool IsExpertMode, bool TgSplit) {
199	const SIInstrInfo &TII = *ST.getInstrInfo();
200
201	if (IsExpertMode)
202	return getEventsForImpl(Inst, ST, TII, TgSplit) \|
203	getExpertSchedulingEventType(Inst, TII);
204	return getEventsForImpl(Inst, ST, TII, TgSplit);
205	}
206	} // namespace AMDGPU
207
208	raw_ostream &operator<<(raw_ostream &OS, AMDGPU::HWEvents Events) {
209	ListSeparator LS(" \| ");
210	#define AMDGPU_HW_EVENT(E, V) \
211	if (Events & AMDGPU::HWEvents::E) \
212	OS << LS << #E << " ";
213	#include "AMDGPUHWEvents.def"
214	return OS;
215	}
216
217	} // namespace llvm
218

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUHWEvents.cpp