1 | //===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUMCExpr.h" |
10 | #include "GCNSubtarget.h" |
11 | #include "Utils/AMDGPUBaseInfo.h" |
12 | #include "llvm/IR/Function.h" |
13 | #include "llvm/MC/MCAssembler.h" |
14 | #include "llvm/MC/MCContext.h" |
15 | #include "llvm/MC/MCStreamer.h" |
16 | #include "llvm/MC/MCSymbol.h" |
17 | #include "llvm/MC/MCValue.h" |
18 | #include "llvm/Support/Allocator.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | #include <optional> |
21 | |
22 | using namespace llvm; |
23 | using namespace llvm::AMDGPU; |
24 | |
25 | AMDGPUMCExpr::AMDGPUMCExpr(VariantKind Kind, ArrayRef<const MCExpr *> Args, |
26 | MCContext &Ctx) |
27 | : Kind(Kind), Ctx(Ctx) { |
28 | assert(Args.size() >= 1 && "Needs a minimum of one expression." ); |
29 | assert(Kind != AGVK_None && "Cannot construct AMDGPUMCExpr of kind none." ); |
30 | |
31 | // Allocating the variadic arguments through the same allocation mechanism |
32 | // that the object itself is allocated with so they end up in the same memory. |
33 | // |
34 | // Will result in an asan failure if allocated on the heap through standard |
35 | // allocation (e.g., through SmallVector's grow). |
36 | RawArgs = static_cast<const MCExpr **>( |
37 | Ctx.allocate(Size: sizeof(const MCExpr *) * Args.size())); |
38 | std::uninitialized_copy(first: Args.begin(), last: Args.end(), result: RawArgs); |
39 | this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size()); |
40 | } |
41 | |
42 | AMDGPUMCExpr::~AMDGPUMCExpr() { Ctx.deallocate(Ptr: RawArgs); } |
43 | |
44 | const AMDGPUMCExpr *AMDGPUMCExpr::create(VariantKind Kind, |
45 | ArrayRef<const MCExpr *> Args, |
46 | MCContext &Ctx) { |
47 | return new (Ctx) AMDGPUMCExpr(Kind, Args, Ctx); |
48 | } |
49 | |
50 | const MCExpr *AMDGPUMCExpr::getSubExpr(size_t Index) const { |
51 | assert(Index < Args.size() && "Indexing out of bounds AMDGPUMCExpr sub-expr" ); |
52 | return Args[Index]; |
53 | } |
54 | |
55 | void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { |
56 | switch (Kind) { |
57 | default: |
58 | llvm_unreachable("Unknown AMDGPUMCExpr kind." ); |
59 | case AGVK_Or: |
60 | OS << "or(" ; |
61 | break; |
62 | case AGVK_Max: |
63 | OS << "max(" ; |
64 | break; |
65 | case AGVK_ExtraSGPRs: |
66 | OS << "extrasgprs(" ; |
67 | break; |
68 | case AGVK_TotalNumVGPRs: |
69 | OS << "totalnumvgprs(" ; |
70 | break; |
71 | case AGVK_AlignTo: |
72 | OS << "alignto(" ; |
73 | break; |
74 | case AGVK_Occupancy: |
75 | OS << "occupancy(" ; |
76 | break; |
77 | } |
78 | for (auto It = Args.begin(); It != Args.end(); ++It) { |
79 | (*It)->print(OS, MAI, /*InParens=*/false); |
80 | if ((It + 1) != Args.end()) |
81 | OS << ", " ; |
82 | } |
83 | OS << ')'; |
84 | } |
85 | |
86 | static int64_t op(AMDGPUMCExpr::VariantKind Kind, int64_t Arg1, int64_t Arg2) { |
87 | switch (Kind) { |
88 | default: |
89 | llvm_unreachable("Unknown AMDGPUMCExpr kind." ); |
90 | case AMDGPUMCExpr::AGVK_Max: |
91 | return std::max(a: Arg1, b: Arg2); |
92 | case AMDGPUMCExpr::AGVK_Or: |
93 | return Arg1 | Arg2; |
94 | } |
95 | } |
96 | |
97 | bool AMDGPUMCExpr::(MCValue &Res, const MCAssembler *Asm, |
98 | const MCFixup *Fixup) const { |
99 | auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { |
100 | MCValue MCVal; |
101 | if (!Arg->evaluateAsRelocatable(Res&: MCVal, Asm, Fixup) || !MCVal.isAbsolute()) |
102 | return false; |
103 | |
104 | ConstantValue = MCVal.getConstant(); |
105 | return true; |
106 | }; |
107 | |
108 | assert(Args.size() == 3 && |
109 | "AMDGPUMCExpr Argument count incorrect for ExtraSGPRs" ); |
110 | const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); |
111 | uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0; |
112 | |
113 | bool Success = TryGetMCExprValue(Args[2], XNACKUsed); |
114 | |
115 | assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant" ); |
116 | if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) || |
117 | !TryGetMCExprValue(Args[1], FlatScrUsed)) |
118 | return false; |
119 | |
120 | uint64_t = IsaInfo::getNumExtraSGPRs( |
121 | STI, VCCUsed: (bool)VCCUsed, FlatScrUsed: (bool)FlatScrUsed, XNACKUsed: (bool)XNACKUsed); |
122 | Res = MCValue::get(Val: ExtraSGPRs); |
123 | return true; |
124 | } |
125 | |
126 | bool AMDGPUMCExpr::evaluateTotalNumVGPR(MCValue &Res, const MCAssembler *Asm, |
127 | const MCFixup *Fixup) const { |
128 | auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { |
129 | MCValue MCVal; |
130 | if (!Arg->evaluateAsRelocatable(Res&: MCVal, Asm, Fixup) || !MCVal.isAbsolute()) |
131 | return false; |
132 | |
133 | ConstantValue = MCVal.getConstant(); |
134 | return true; |
135 | }; |
136 | assert(Args.size() == 2 && |
137 | "AMDGPUMCExpr Argument count incorrect for TotalNumVGPRs" ); |
138 | const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); |
139 | uint64_t NumAGPR = 0, NumVGPR = 0; |
140 | |
141 | bool Has90AInsts = AMDGPU::isGFX90A(STI: *STI); |
142 | |
143 | if (!TryGetMCExprValue(Args[0], NumAGPR) || |
144 | !TryGetMCExprValue(Args[1], NumVGPR)) |
145 | return false; |
146 | |
147 | uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(Value: NumVGPR, Align: 4) + NumAGPR |
148 | : std::max(a: NumVGPR, b: NumAGPR); |
149 | Res = MCValue::get(Val: TotalNum); |
150 | return true; |
151 | } |
152 | |
153 | bool AMDGPUMCExpr::evaluateAlignTo(MCValue &Res, const MCAssembler *Asm, |
154 | const MCFixup *Fixup) const { |
155 | auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { |
156 | MCValue MCVal; |
157 | if (!Arg->evaluateAsRelocatable(Res&: MCVal, Asm, Fixup) || !MCVal.isAbsolute()) |
158 | return false; |
159 | |
160 | ConstantValue = MCVal.getConstant(); |
161 | return true; |
162 | }; |
163 | |
164 | assert(Args.size() == 2 && |
165 | "AMDGPUMCExpr Argument count incorrect for AlignTo" ); |
166 | uint64_t Value = 0, Align = 0; |
167 | if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align)) |
168 | return false; |
169 | |
170 | Res = MCValue::get(Val: alignTo(Value, Align)); |
171 | return true; |
172 | } |
173 | |
174 | bool AMDGPUMCExpr::evaluateOccupancy(MCValue &Res, const MCAssembler *Asm, |
175 | const MCFixup *Fixup) const { |
176 | auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { |
177 | MCValue MCVal; |
178 | if (!Arg->evaluateAsRelocatable(Res&: MCVal, Asm, Fixup) || !MCVal.isAbsolute()) |
179 | return false; |
180 | |
181 | ConstantValue = MCVal.getConstant(); |
182 | return true; |
183 | }; |
184 | assert(Args.size() == 7 && |
185 | "AMDGPUMCExpr Argument count incorrect for Occupancy" ); |
186 | uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation, |
187 | NumSGPRs, NumVGPRs; |
188 | |
189 | bool Success = true; |
190 | Success &= TryGetMCExprValue(Args[0], MaxWaves); |
191 | Success &= TryGetMCExprValue(Args[1], Granule); |
192 | Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs); |
193 | Success &= TryGetMCExprValue(Args[3], Generation); |
194 | Success &= TryGetMCExprValue(Args[4], InitOccupancy); |
195 | |
196 | assert(Success && "Arguments 1 to 5 for Occupancy should be known constants" ); |
197 | |
198 | if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) || |
199 | !TryGetMCExprValue(Args[6], NumVGPRs)) |
200 | return false; |
201 | |
202 | unsigned Occupancy = InitOccupancy; |
203 | if (NumSGPRs) |
204 | Occupancy = std::min( |
205 | a: Occupancy, b: IsaInfo::getOccupancyWithNumSGPRs( |
206 | SGPRs: NumSGPRs, MaxWaves, |
207 | Gen: static_cast<AMDGPUSubtarget::Generation>(Generation))); |
208 | if (NumVGPRs) |
209 | Occupancy = std::min(a: Occupancy, |
210 | b: IsaInfo::getNumWavesPerEUWithNumVGPRs( |
211 | NumVGPRs, Granule, MaxWaves, TotalNumVGPRs: TargetTotalNumVGPRs)); |
212 | |
213 | Res = MCValue::get(Val: Occupancy); |
214 | return true; |
215 | } |
216 | |
217 | bool AMDGPUMCExpr::evaluateAsRelocatableImpl(MCValue &Res, |
218 | const MCAssembler *Asm, |
219 | const MCFixup *Fixup) const { |
220 | std::optional<int64_t> Total; |
221 | switch (Kind) { |
222 | default: |
223 | break; |
224 | case AGVK_ExtraSGPRs: |
225 | return evaluateExtraSGPRs(Res, Asm, Fixup); |
226 | case AGVK_AlignTo: |
227 | return evaluateAlignTo(Res, Asm, Fixup); |
228 | case AGVK_TotalNumVGPRs: |
229 | return evaluateTotalNumVGPR(Res, Asm, Fixup); |
230 | case AGVK_Occupancy: |
231 | return evaluateOccupancy(Res, Asm, Fixup); |
232 | } |
233 | |
234 | for (const MCExpr *Arg : Args) { |
235 | MCValue ArgRes; |
236 | if (!Arg->evaluateAsRelocatable(Res&: ArgRes, Asm, Fixup) || !ArgRes.isAbsolute()) |
237 | return false; |
238 | |
239 | if (!Total.has_value()) |
240 | Total = ArgRes.getConstant(); |
241 | Total = op(Kind, Arg1: *Total, Arg2: ArgRes.getConstant()); |
242 | } |
243 | |
244 | Res = MCValue::get(Val: *Total); |
245 | return true; |
246 | } |
247 | |
248 | void AMDGPUMCExpr::visitUsedExpr(MCStreamer &Streamer) const { |
249 | for (const MCExpr *Arg : Args) |
250 | Streamer.visitUsedExpr(Expr: *Arg); |
251 | } |
252 | |
253 | MCFragment *AMDGPUMCExpr::findAssociatedFragment() const { |
254 | for (const MCExpr *Arg : Args) { |
255 | if (Arg->findAssociatedFragment()) |
256 | return Arg->findAssociatedFragment(); |
257 | } |
258 | return nullptr; |
259 | } |
260 | |
261 | /// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed |
262 | /// are unresolvable but needed for further MCExprs). Derived from |
263 | /// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp. |
264 | /// |
265 | const AMDGPUMCExpr *AMDGPUMCExpr::(const MCExpr *VCCUsed, |
266 | const MCExpr *FlatScrUsed, |
267 | bool XNACKUsed, |
268 | MCContext &Ctx) { |
269 | |
270 | return create(Kind: AGVK_ExtraSGPRs, |
271 | Args: {VCCUsed, FlatScrUsed, MCConstantExpr::create(Value: XNACKUsed, Ctx)}, |
272 | Ctx); |
273 | } |
274 | |
275 | const AMDGPUMCExpr *AMDGPUMCExpr::createTotalNumVGPR(const MCExpr *NumAGPR, |
276 | const MCExpr *NumVGPR, |
277 | MCContext &Ctx) { |
278 | return create(Kind: AGVK_TotalNumVGPRs, Args: {NumAGPR, NumVGPR}, Ctx); |
279 | } |
280 | |
281 | /// Mimics GCNSubtarget::computeOccupancy for MCExpr. |
282 | /// |
283 | /// Remove dependency on GCNSubtarget and depend only only the necessary values |
284 | /// for said occupancy computation. Should match computeOccupancy implementation |
285 | /// without passing \p STM on. |
286 | const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc, |
287 | const MCExpr *NumSGPRs, |
288 | const MCExpr *NumVGPRs, |
289 | const GCNSubtarget &STM, |
290 | MCContext &Ctx) { |
291 | unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(STI: &STM); |
292 | unsigned Granule = IsaInfo::getVGPRAllocGranule(STI: &STM); |
293 | unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(STI: &STM); |
294 | unsigned Generation = STM.getGeneration(); |
295 | |
296 | auto CreateExpr = [&Ctx](unsigned Value) { |
297 | return MCConstantExpr::create(Value, Ctx); |
298 | }; |
299 | |
300 | return create(Kind: AGVK_Occupancy, |
301 | Args: {CreateExpr(MaxWaves), CreateExpr(Granule), |
302 | CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation), |
303 | CreateExpr(InitOcc), NumSGPRs, NumVGPRs}, |
304 | Ctx); |
305 | } |
306 | |