1//===-- SIProgramInfo.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12/// entry functions.
13//
14//===----------------------------------------------------------------------===//
15//
16
17#include "SIProgramInfo.h"
18#include "GCNSubtarget.h"
19#include "SIDefines.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/MC/MCExpr.h"
22
23using namespace llvm;
24
25void SIProgramInfo::reset(const MachineFunction &MF) {
26 MCContext &Ctx = MF.getContext();
27
28 const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx);
29
30 CodeSizeInBytes.reset();
31
32 VGPRBlocks = ZeroExpr;
33 SGPRBlocks = ZeroExpr;
34 Priority = 0;
35 FloatMode = 0;
36 Priv = 0;
37 DX10Clamp = 0;
38 DebugMode = 0;
39 IEEEMode = 0;
40 WgpMode = 0;
41 MemOrdered = 0;
42 FwdProgress = 0;
43 RrWgMode = 0;
44 ScratchSize = ZeroExpr;
45
46 LDSBlocks = 0;
47 ScratchBlocks = ZeroExpr;
48
49 ScratchEnable = ZeroExpr;
50 UserSGPR = 0;
51 TrapHandlerEnable = 0;
52 TGIdXEnable = 0;
53 TGIdYEnable = 0;
54 TGIdZEnable = 0;
55 TGSizeEnable = 0;
56 TIdIGCompCount = 0;
57 EXCPEnMSB = 0;
58 LdsSize = 0;
59 EXCPEnable = 0;
60
61 ComputePGMRSrc3 = ZeroExpr;
62
63 NumVGPR = ZeroExpr;
64 NumArchVGPR = ZeroExpr;
65 NumAccVGPR = ZeroExpr;
66 AccumOffset = ZeroExpr;
67 TgSplit = 0;
68 NumSGPR = ZeroExpr;
69 SGPRSpill = 0;
70 VGPRSpill = 0;
71 LDSSize = 0;
72 FlatUsed = ZeroExpr;
73
74 NumSGPRsForWavesPerEU = ZeroExpr;
75 NumVGPRsForWavesPerEU = ZeroExpr;
76 NamedBarCnt = ZeroExpr;
77 Occupancy = ZeroExpr;
78 DynamicCallStack = ZeroExpr;
79 VCCUsed = ZeroExpr;
80}
81
82static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
83 const GCNSubtarget &ST) {
84 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
85 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
86 S_00B848_PRIV(ProgInfo.Priv) |
87 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
88 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
89 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) |
90 S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
91
92 if (ST.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode)) {
93 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
94 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
95 }
96
97 if (ST.hasRrWGMode())
98 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
99
100 return Reg;
101}
102
103static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
104 CallingConv::ID CC, const GCNSubtarget &ST) {
105 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
106 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
107 S_00B848_PRIV(ProgInfo.Priv) |
108 S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
109
110 if (ST.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode)) {
111 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
112 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
113 }
114
115 if (ST.hasRrWGMode())
116 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
117
118 switch (CC) {
119 case CallingConv::AMDGPU_PS:
120 Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
121 break;
122 case CallingConv::AMDGPU_VS:
123 Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
124 break;
125 case CallingConv::AMDGPU_GS:
126 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
127 S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
128 break;
129 case CallingConv::AMDGPU_HS:
130 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
131 S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
132 break;
133 default:
134 break;
135 }
136 return Reg;
137}
138
139static uint64_t getComputePGMRSrc2Reg(const GCNSubtarget &ST,
140 const SIProgramInfo &ProgInfo) {
141 uint64_t MaxNumUserSGRPs = AMDGPU::getMaxNumUserSGPRs(STI: ST);
142 uint64_t Reg = 0;
143 if (MaxNumUserSGRPs == 32) {
144 Reg = S_00B84C_USER_SGPR_GFX1250(ProgInfo.UserSGPR);
145 } else if (MaxNumUserSGRPs == 16) {
146 Reg = (S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
147 S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable));
148 } else {
149 llvm_unreachable("max Number of User SGPRs are either 32 or 16");
150 }
151
152 Reg |= S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
153 S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
154 S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
155 S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
156 S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
157 S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
158 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
159 S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
160
161 return Reg;
162}
163
164static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
165 MCContext &Ctx) {
166 if (Mask) {
167 const MCExpr *MaskExpr = MCConstantExpr::create(Value: Mask, Ctx);
168 Val = MCBinaryExpr::createAnd(LHS: Val, RHS: MaskExpr, Ctx);
169 }
170 if (Shift) {
171 const MCExpr *ShiftExpr = MCConstantExpr::create(Value: Shift, Ctx);
172 Val = MCBinaryExpr::createShl(LHS: Val, RHS: ShiftExpr, Ctx);
173 }
174 return Val;
175}
176
177const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
178 MCContext &Ctx) const {
179 uint64_t Reg = getComputePGMRSrc1Reg(ProgInfo: *this, ST);
180 const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
181 const MCExpr *Res = MCBinaryExpr::createOr(
182 LHS: MaskShift(Val: VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
183 RHS: MaskShift(Val: SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
184 return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx);
185}
186
187const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
188 const GCNSubtarget &ST,
189 MCContext &Ctx) const {
190 if (AMDGPU::isCompute(CC)) {
191 return getComputePGMRSrc1(ST, Ctx);
192 }
193
194 uint64_t Reg = getPGMRSrc1Reg(ProgInfo: *this, CC, ST);
195 const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
196 const MCExpr *Res = MCBinaryExpr::createOr(
197 LHS: MaskShift(Val: VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
198 RHS: MaskShift(Val: SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
199 return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx);
200}
201
202const MCExpr *SIProgramInfo::getComputePGMRSrc2(const GCNSubtarget &ST,
203 MCContext &Ctx) const {
204 uint64_t Reg = getComputePGMRSrc2Reg(ST, ProgInfo: *this);
205 const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx);
206 return MCBinaryExpr::createOr(LHS: ScratchEnable, RHS: RegExpr, Ctx);
207}
208
209const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
210 const GCNSubtarget &ST,
211 MCContext &Ctx) const {
212 if (AMDGPU::isCompute(CC))
213 return getComputePGMRSrc2(ST, Ctx);
214
215 return MCConstantExpr::create(Value: 0, Ctx);
216}
217
218uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
219 if (CodeSizeInBytes.has_value())
220 return *CodeSizeInBytes;
221
222 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
223 const SIInstrInfo *TII = STM.getInstrInfo();
224
225 uint64_t CodeSize = 0;
226
227 for (const MachineBasicBlock &MBB : MF) {
228 CodeSize = alignTo(Size: CodeSize, A: MBB.getAlignment());
229
230 for (const MachineInstr &MI : MBB) {
231 // TODO: CodeSize should account for multiple functions.
232
233 if (MI.isMetaInstruction())
234 continue;
235
236 CodeSize += TII->getInstSizeInBytes(MI);
237 }
238 }
239
240 CodeSizeInBytes = CodeSize;
241 return CodeSize;
242}
243