SIProgramInfo.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp]

1	//===-- SIProgramInfo.cpp ----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	///
11	/// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12	/// entry functions.
13	//
14	//===----------------------------------------------------------------------===//
15	//
16
17	#include "SIProgramInfo.h"
18	#include "GCNSubtarget.h"
19	#include "SIDefines.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "llvm/MC/MCExpr.h"
22
23	using namespace llvm;
24
25	void SIProgramInfo::reset(const MachineFunction &MF) {
26	MCContext &Ctx = MF.getContext();
27
28	const MCExpr *ZeroExpr = MCConstantExpr::create(Value: `0`, Ctx);
29
30	CodeSizeInBytes.reset();
31
32	VGPRBlocks = ZeroExpr;
33	SGPRBlocks = ZeroExpr;
34	Priority = `0`;
35	FloatMode = `0`;
36	Priv = `0`;
37	DX10Clamp = `0`;
38	DebugMode = `0`;
39	IEEEMode = `0`;
40	WgpMode = `0`;
41	MemOrdered = `0`;
42	FwdProgress = `0`;
43	RrWgMode = `0`;
44	ScratchSize = ZeroExpr;
45
46	LDSBlocks = `0`;
47	ScratchBlocks = ZeroExpr;
48
49	ScratchEnable = ZeroExpr;
50	UserSGPR = `0`;
51	TrapHandlerEnable = `0`;
52	TGIdXEnable = `0`;
53	TGIdYEnable = `0`;
54	TGIdZEnable = `0`;
55	TGSizeEnable = `0`;
56	TIdIGCompCount = `0`;
57	EXCPEnMSB = `0`;
58	LdsSize = `0`;
59	EXCPEnable = `0`;
60
61	ComputePGMRSrc3 = ZeroExpr;
62
63	NumVGPR = ZeroExpr;
64	NumArchVGPR = ZeroExpr;
65	NumAccVGPR = ZeroExpr;
66	AccumOffset = ZeroExpr;
67	TgSplit = `0`;
68	NumSGPR = ZeroExpr;
69	SGPRSpill = `0`;
70	VGPRSpill = `0`;
71	LDSSize = `0`;
72	FlatUsed = ZeroExpr;
73
74	NumSGPRsForWavesPerEU = ZeroExpr;
75	NumVGPRsForWavesPerEU = ZeroExpr;
76	Occupancy = ZeroExpr;
77	DynamicCallStack = ZeroExpr;
78	VCCUsed = ZeroExpr;
79	}
80
81	static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
82	const GCNSubtarget &ST) {
83	uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) \|
84	S_00B848_FLOAT_MODE(ProgInfo.FloatMode) \|
85	S_00B848_PRIV(ProgInfo.Priv) \|
86	S_00B848_DEBUG_MODE(ProgInfo.DebugMode) \|
87	S_00B848_WGP_MODE(ProgInfo.WgpMode) \|
88	S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
89
90	if (ST.hasDX10ClampMode())
91	Reg \|= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
92
93	if (ST.hasIEEEMode())
94	Reg \|= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
95
96	// TODO: in the long run we will want to enable this unconditionally.
97	if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
98	Reg \|= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
99
100	if (ST.hasRrWGMode())
101	Reg \|= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
102
103	return Reg;
104	}
105
106	static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
107	CallingConv::ID CC, const GCNSubtarget &ST) {
108	uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) \|
109	S_00B848_FLOAT_MODE(ProgInfo.FloatMode) \|
110	S_00B848_PRIV(ProgInfo.Priv) \|
111	S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
112
113	if (ST.hasDX10ClampMode())
114	Reg \|= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
115
116	if (ST.hasIEEEMode())
117	Reg \|= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
118
119	if (ST.hasRrWGMode())
120	Reg \|= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
121
122	switch (CC) {
123	case CallingConv::AMDGPU_PS:
124	Reg \|= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
125	break;
126	case CallingConv::AMDGPU_VS:
127	Reg \|= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
128	break;
129	case CallingConv::AMDGPU_GS:
130	Reg \|= S_00B228_WGP_MODE(ProgInfo.WgpMode) \|
131	S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
132	break;
133	case CallingConv::AMDGPU_HS:
134	Reg \|= S_00B428_WGP_MODE(ProgInfo.WgpMode) \|
135	S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
136	break;
137	default:
138	break;
139	}
140	return Reg;
141	}
142
143	static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
144	uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) \|
145	S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) \|
146	S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) \|
147	S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) \|
148	S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) \|
149	S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) \|
150	S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) \|
151	S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) \|
152	S_00B84C_LDS_SIZE(ProgInfo.LdsSize) \|
153	S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
154
155	return Reg;
156	}
157
158	static const MCExpr MaskShift(const* MCExpr *Val, uint32_t Mask, uint32_t Shift,
159	MCContext &Ctx) {
160	if (Mask) {
161	const MCExpr *MaskExpr = MCConstantExpr::create(Value: Mask, Ctx);
162	Val = MCBinaryExpr::createAnd(LHS: Val, RHS: MaskExpr, Ctx);
163	}
164	if (Shift) {
165	const MCExpr *ShiftExpr = MCConstantExpr::create(Value: Shift, Ctx);
166	Val = MCBinaryExpr::createShl(LHS: Val, RHS: ShiftExpr, Ctx);
167	}
168	return Val;
169	}
170
171	const MCExpr SIProgramInfo::getComputePGMRSrc1(const* GCNSubtarget &ST,
172	MCContext &Ctx) const {
173	uint64_t Reg = getComputePGMRSrc1Reg(ProgInfo: *this, ST);
174	const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
175	const MCExpr *Res = MCBinaryExpr::createOr(
176	LHS: MaskShift(Val: VGPRBlocks, /Mask=/`0x3F`, /Shift=/`0`, Ctx),
177	RHS: MaskShift(Val: SGPRBlocks, /Mask=/`0xF`, /Shift=/`6`, Ctx), Ctx);
178	return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx);
179	}
180
181	const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
182	const GCNSubtarget &ST,
183	MCContext &Ctx) const {
184	if (AMDGPU::isCompute(CC)) {
185	return getComputePGMRSrc1(ST, Ctx);
186	}
187
188	uint64_t Reg = getPGMRSrc1Reg(ProgInfo: *this, CC, ST);
189	const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
190	const MCExpr *Res = MCBinaryExpr::createOr(
191	LHS: MaskShift(Val: VGPRBlocks, /Mask=/`0x3F`, /Shift=/`0`, Ctx),
192	RHS: MaskShift(Val: SGPRBlocks, /Mask=/`0xF`, /Shift=/`6`, Ctx), Ctx);
193	return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx);
194	}
195
196	const MCExpr SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const* {
197	uint64_t Reg = getComputePGMRSrc2Reg(ProgInfo: *this);
198	const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
199	return MCBinaryExpr::createOr(LHS: ScratchEnable, RHS: RegExpr, Ctx);
200	}
201
202	const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
203	MCContext &Ctx) const {
204	if (AMDGPU::isCompute(CC))
205	return getComputePGMRSrc2(Ctx);
206
207	return MCConstantExpr::create(Value: `0`, Ctx);
208	}
209
210	uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF,
211	bool IsLowerBound) {
212	if (!IsLowerBound && CodeSizeInBytes.has_value())
213	return *CodeSizeInBytes;
214
215	const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
216	const SIInstrInfo *TII = STM.getInstrInfo();
217
218	uint64_t CodeSize = `0`;
219
220	for (const MachineBasicBlock &MBB : MF) {
221	// The amount of padding to align code can be both underestimated and
222	// overestimated. In case of inline asm used getInstSizeInBytes() will
223	// return a maximum size of a single instruction, where the real size may
224	// differ. At this point CodeSize may be already off.
225	if (!IsLowerBound)
226	CodeSize = alignTo(Size: CodeSize, A: MBB.getAlignment());
227
228	for (const MachineInstr &MI : MBB) {
229	// TODO: CodeSize should account for multiple functions.
230
231	if (MI.isMetaInstruction())
232	continue;
233
234	// We cannot properly estimate inline asm size. It can be as small as zero
235	// if that is just a comment.
236	if (IsLowerBound && MI.isInlineAsm())
237	continue;
238
239	CodeSize += TII->getInstSizeInBytes(MI);
240	}
241	}
242
243	CodeSizeInBytes = CodeSize;
244	return CodeSize;
245	}
246

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp