1 | //===-- SIProgramInfo.cpp ----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// |
11 | /// The SIProgramInfo tracks resource usage and hardware flags for kernels and |
12 | /// entry functions. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | // |
16 | |
17 | #include "SIProgramInfo.h" |
18 | #include "GCNSubtarget.h" |
19 | #include "SIDefines.h" |
20 | #include "Utils/AMDGPUBaseInfo.h" |
21 | #include "llvm/MC/MCExpr.h" |
22 | |
23 | using namespace llvm; |
24 | |
25 | void SIProgramInfo::reset(const MachineFunction &MF) { |
26 | MCContext &Ctx = MF.getContext(); |
27 | |
28 | const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx); |
29 | |
30 | VGPRBlocks = ZeroExpr; |
31 | SGPRBlocks = ZeroExpr; |
32 | Priority = 0; |
33 | FloatMode = 0; |
34 | Priv = 0; |
35 | DX10Clamp = 0; |
36 | DebugMode = 0; |
37 | IEEEMode = 0; |
38 | WgpMode = 0; |
39 | MemOrdered = 0; |
40 | RrWgMode = 0; |
41 | ScratchSize = ZeroExpr; |
42 | |
43 | LDSBlocks = 0; |
44 | ScratchBlocks = ZeroExpr; |
45 | |
46 | ScratchEnable = ZeroExpr; |
47 | UserSGPR = 0; |
48 | TrapHandlerEnable = 0; |
49 | TGIdXEnable = 0; |
50 | TGIdYEnable = 0; |
51 | TGIdZEnable = 0; |
52 | TGSizeEnable = 0; |
53 | TIdIGCompCount = 0; |
54 | EXCPEnMSB = 0; |
55 | LdsSize = 0; |
56 | EXCPEnable = 0; |
57 | |
58 | ComputePGMRSrc3GFX90A = ZeroExpr; |
59 | |
60 | NumVGPR = ZeroExpr; |
61 | NumArchVGPR = ZeroExpr; |
62 | NumAccVGPR = ZeroExpr; |
63 | AccumOffset = ZeroExpr; |
64 | TgSplit = 0; |
65 | NumSGPR = ZeroExpr; |
66 | SGPRSpill = 0; |
67 | VGPRSpill = 0; |
68 | LDSSize = 0; |
69 | FlatUsed = ZeroExpr; |
70 | |
71 | NumSGPRsForWavesPerEU = ZeroExpr; |
72 | NumVGPRsForWavesPerEU = ZeroExpr; |
73 | Occupancy = ZeroExpr; |
74 | DynamicCallStack = ZeroExpr; |
75 | VCCUsed = ZeroExpr; |
76 | } |
77 | |
78 | static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, |
79 | const GCNSubtarget &ST) { |
80 | uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | |
81 | S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | |
82 | S_00B848_PRIV(ProgInfo.Priv) | |
83 | S_00B848_DEBUG_MODE(ProgInfo.DebugMode) | |
84 | S_00B848_WGP_MODE(ProgInfo.WgpMode) | |
85 | S_00B848_MEM_ORDERED(ProgInfo.MemOrdered); |
86 | |
87 | if (ST.hasDX10ClampMode()) |
88 | Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); |
89 | |
90 | if (ST.hasIEEEMode()) |
91 | Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); |
92 | |
93 | if (ST.hasRrWGMode()) |
94 | Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); |
95 | |
96 | return Reg; |
97 | } |
98 | |
99 | static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, |
100 | CallingConv::ID CC, const GCNSubtarget &ST) { |
101 | uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | |
102 | S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | |
103 | S_00B848_PRIV(ProgInfo.Priv) | |
104 | S_00B848_DEBUG_MODE(ProgInfo.DebugMode); |
105 | |
106 | if (ST.hasDX10ClampMode()) |
107 | Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); |
108 | |
109 | if (ST.hasIEEEMode()) |
110 | Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); |
111 | |
112 | if (ST.hasRrWGMode()) |
113 | Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); |
114 | |
115 | switch (CC) { |
116 | case CallingConv::AMDGPU_PS: |
117 | Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered); |
118 | break; |
119 | case CallingConv::AMDGPU_VS: |
120 | Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered); |
121 | break; |
122 | case CallingConv::AMDGPU_GS: |
123 | Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) | |
124 | S_00B228_MEM_ORDERED(ProgInfo.MemOrdered); |
125 | break; |
126 | case CallingConv::AMDGPU_HS: |
127 | Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) | |
128 | S_00B428_MEM_ORDERED(ProgInfo.MemOrdered); |
129 | break; |
130 | default: |
131 | break; |
132 | } |
133 | return Reg; |
134 | } |
135 | |
136 | static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) { |
137 | uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) | |
138 | S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) | |
139 | S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) | |
140 | S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) | |
141 | S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) | |
142 | S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) | |
143 | S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) | |
144 | S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) | |
145 | S_00B84C_LDS_SIZE(ProgInfo.LdsSize) | |
146 | S_00B84C_EXCP_EN(ProgInfo.EXCPEnable); |
147 | |
148 | return Reg; |
149 | } |
150 | |
151 | static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, |
152 | MCContext &Ctx) { |
153 | if (Mask) { |
154 | const MCExpr *MaskExpr = MCConstantExpr::create(Value: Mask, Ctx); |
155 | Val = MCBinaryExpr::createAnd(LHS: Val, RHS: MaskExpr, Ctx); |
156 | } |
157 | if (Shift) { |
158 | const MCExpr *ShiftExpr = MCConstantExpr::create(Value: Shift, Ctx); |
159 | Val = MCBinaryExpr::createShl(LHS: Val, RHS: ShiftExpr, Ctx); |
160 | } |
161 | return Val; |
162 | } |
163 | |
164 | const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST, |
165 | MCContext &Ctx) const { |
166 | uint64_t Reg = getComputePGMRSrc1Reg(ProgInfo: *this, ST); |
167 | const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx); |
168 | const MCExpr *Res = MCBinaryExpr::createOr( |
169 | LHS: MaskShift(Val: VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), |
170 | RHS: MaskShift(Val: SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); |
171 | return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx); |
172 | } |
173 | |
174 | const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC, |
175 | const GCNSubtarget &ST, |
176 | MCContext &Ctx) const { |
177 | if (AMDGPU::isCompute(CC)) { |
178 | return getComputePGMRSrc1(ST, Ctx); |
179 | } |
180 | |
181 | uint64_t Reg = getPGMRSrc1Reg(ProgInfo: *this, CC, ST); |
182 | const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx); |
183 | const MCExpr *Res = MCBinaryExpr::createOr( |
184 | LHS: MaskShift(Val: VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), |
185 | RHS: MaskShift(Val: SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); |
186 | return MCBinaryExpr::createOr(LHS: RegExpr, RHS: Res, Ctx); |
187 | } |
188 | |
189 | const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const { |
190 | uint64_t Reg = getComputePGMRSrc2Reg(ProgInfo: *this); |
191 | const MCExpr *RegExpr = MCConstantExpr::create(Value: Reg, Ctx); |
192 | return MCBinaryExpr::createOr(LHS: ScratchEnable, RHS: RegExpr, Ctx); |
193 | } |
194 | |
195 | const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC, |
196 | MCContext &Ctx) const { |
197 | if (AMDGPU::isCompute(CC)) |
198 | return getComputePGMRSrc2(Ctx); |
199 | |
200 | return MCConstantExpr::create(Value: 0, Ctx); |
201 | } |
202 | |