AMDGPUWaitcntUtils.h source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUWaitcntUtils.h]

1	//===- AMDGPUWaitcntUtils.h -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
10	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
11
12	#include "llvm/ADT/Sequence.h"
13	#include "llvm/ADT/StringExtras.h"
14	#include "llvm/Support/Debug.h"
15	#include "llvm/Support/raw_ostream.h"
16	#include "llvm/TargetParser/AMDGPUTargetParser.h"
17
18	namespace llvm {
19
20	namespace AMDGPU {
21
22	enum InstCounterType {
23	LOAD_CNT = `0`, // VMcnt prior to gfx12.
24	DS_CNT, // LKGMcnt prior to gfx12.
25	EXP_CNT, //
26	STORE_CNT, // VScnt in gfx10/gfx11.
27	NUM_NORMAL_INST_CNTS,
28	SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
29	BVH_CNT, // gfx12+ only.
30	KM_CNT, // gfx12+ only.
31	X_CNT, // gfx1250.
32	ASYNC_CNT, // gfx1250.
33	TENSOR_CNT, // gfx1250.
34	NUM_EXTENDED_INST_CNTS,
35	VA_VDST_RD = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
36	VA_VDST_WR, // gfx12+ expert mode only.
37	VM_VSRC, // gfx12+ expert mode only.
38	NUM_EXPERT_INST_CNTS,
39	NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
40	};
41
42	StringLiteral getInstCounterName(InstCounterType T);
43
44	// Return an iterator over all counters between LOAD_CNT (the first counter)
45	// and \c MaxCounter (exclusive, default value yields an enumeration over
46	// all counters).
47	iota_range<InstCounterType>
48	inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
49
50	/// Represents the hardware counter limits for different wait count types.
51	struct HardwareLimits {
52	unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
53	unsigned ExpcntMax;
54	unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
55	unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
56	unsigned SamplecntMax; // gfx12+ only.
57	unsigned BvhcntMax; // gfx12+ only.
58	unsigned KmcntMax; // gfx12+ only.
59	unsigned XcntMax; // gfx1250.
60	unsigned AsyncMax; // gfx1250.
61	unsigned VaVdstMax; // gfx12+ expert mode only.
62	unsigned VmVsrcMax; // gfx12+ expert mode only.
63
64	HardwareLimits() = default;
65
66	/// Initializes hardware limits from ISA version.
67	HardwareLimits(const IsaVersion &IV);
68
69	unsigned get(InstCounterType T) const;
70	};
71
72	} // namespace AMDGPU
73
74	template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
75	static constexpr bool is_iterable = true;
76	};
77
78	namespace AMDGPU {
79
80	/// Represents the counter values to wait for in an s_waitcnt instruction.
81	///
82	/// Large values (including the maximum possible integer) can be used to
83	/// represent "don't care" waits.
84	class Waitcnt {
85	std::array<unsigned, NUM_INST_CNTS> Cnt;
86
87	public:
88	unsigned get(InstCounterType T) const { return Cnt [T]; }
89	void set(InstCounterType T, unsigned Val) { Cnt [T] = Val; }
90
91	Waitcnt() { fill(Range&: Cnt, Value: ~`0u`); }
92	// Pre-gfx12 constructor.
93	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
94	: Waitcnt () {
95	Cnt [LOAD_CNT] = VmCnt;
96	Cnt [EXP_CNT] = ExpCnt;
97	Cnt [DS_CNT] = LgkmCnt;
98	Cnt [STORE_CNT] = VsCnt;
99	}
100
101	// gfx12+ constructor.
102	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
103	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
104	unsigned AsyncCnt, unsigned TensorCnt, unsigned VaVdstRd,
105	unsigned VaVdstWr, unsigned VmVsrc)
106	: Waitcnt () {
107	Cnt [LOAD_CNT] = LoadCnt;
108	Cnt [DS_CNT] = DsCnt;
109	Cnt [EXP_CNT] = ExpCnt;
110	Cnt [STORE_CNT] = StoreCnt;
111	Cnt [SAMPLE_CNT] = SampleCnt;
112	Cnt [BVH_CNT] = BvhCnt;
113	Cnt [KM_CNT] = KmCnt;
114	Cnt [X_CNT] = XCnt;
115	Cnt [ASYNC_CNT] = AsyncCnt;
116	Cnt [TENSOR_CNT] = TensorCnt;
117	Cnt [VA_VDST_RD] = VaVdstRd;
118	Cnt [VA_VDST_WR] = VaVdstWr;
119	Cnt [VM_VSRC] = VmVsrc;
120	}
121
122	bool hasWait() const {
123	return any_of(Range: Cnt, P: [](unsigned Val) { return Val != ~`0u`; });
124	}
125
126	bool hasWaitExceptStoreCnt() const {
127	for (InstCounterType T : inst_counter_types()) {
128	if (T == STORE_CNT)
129	continue;
130	if (Cnt [T] != ~`0u`)
131	return true;
132	}
133	return false;
134	}
135
136	void add(AMDGPU::InstCounterType T, unsigned Count) {
137	set(T, Val: std::min(a: get(T), b: Count));
138	}
139
140	void clear(AMDGPU::InstCounterType T) { set(T, Val: ~`0u`); }
141
142	bool hasWaitStoreCnt() const { return Cnt [STORE_CNT] != ~`0u`; }
143
144	bool hasWaitDepctr() const {
145	return Cnt [VA_VDST_RD] != ~`0u` \|\| Cnt [VA_VDST_WR] != ~`0u` \|\|
146	Cnt [VM_VSRC] != ~`0u`;
147	}
148
149	Waitcnt combined(const Waitcnt &Other) const {
150	// Does the right thing provided self and Other are either both pre-gfx12
151	// or both gfx12+.
152	Waitcnt Wait;
153	for (InstCounterType T : inst_counter_types())
154	Wait.Cnt [T] = std::min(a: Cnt [T], b: Other.Cnt [T]);
155	return Wait;
156	}
157
158	void print(raw_ostream &OS) const {
159	ListSeparator LS;
160	for (InstCounterType T : inst_counter_types())
161	OS << LS << getInstCounterName(T) << ": " << Cnt [T];
162	if (LS.unused())
163	OS << "none";
164	OS << `'\n'`;
165	}
166
167	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
168	LLVM_DUMP_METHOD void dump() const;
169	#endif
170
171	friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
172	Wait.print(OS);
173	return OS;
174	}
175	};
176
177	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
178
179	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
180
181	// The following are only meaningful on targets that support
182	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
183
184	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
185	/// isa \p Version.
186	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
187
188	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
189	/// isa \p Version.
190	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
191
192	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
193	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
194	/// \p Version.
195	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
196
197	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
198	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
199	/// \p Version.
200	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
201
202	/// Determine if \p MI is a gfx12+ single-counter S_WAIT_CNT instruction,*
203	/// and if so, which counter it is waiting on.
204	std::optional<AMDGPU::InstCounterType> counterTypeForInstr(unsigned Opcode);
205
206	} // namespace AMDGPU
207
208	} // namespace llvm
209
210	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
211

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUWaitcntUtils.h