1//===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief MC infrastructure to propagate the function level resource usage
11/// info.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUMCResourceInfo.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/MC/MCAsmInfo.h"
19#include "llvm/MC/MCContext.h"
20#include "llvm/MC/MCSymbol.h"
21#include "llvm/Target/TargetMachine.h"
22
23#define DEBUG_TYPE "amdgpu-mc-resource-usage"
24
25using namespace llvm;
26
27MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
28 MCContext &OutContext, bool IsLocal) {
29 auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) {
30 StringRef Prefix =
31 IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
32 return OutContext.getOrCreateSymbol(Name: Twine(Prefix) + FuncName +
33 Twine(Suffix));
34 };
35 switch (RIK) {
36 case RIK_NumVGPR:
37 return GOCS(".num_vgpr");
38 case RIK_NumAGPR:
39 return GOCS(".num_agpr");
40 case RIK_NumSGPR:
41 return GOCS(".numbered_sgpr");
42 case RIK_PrivateSegSize:
43 return GOCS(".private_seg_size");
44 case RIK_UsesVCC:
45 return GOCS(".uses_vcc");
46 case RIK_UsesFlatScratch:
47 return GOCS(".uses_flat_scratch");
48 case RIK_HasDynSizedStack:
49 return GOCS(".has_dyn_sized_stack");
50 case RIK_HasRecursion:
51 return GOCS(".has_recursion");
52 case RIK_HasIndirectCall:
53 return GOCS(".has_indirect_call");
54 }
55 llvm_unreachable("Unexpected ResourceInfoKind.");
56}
57
58const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
59 ResourceInfoKind RIK,
60 MCContext &Ctx, bool IsLocal) {
61 return MCSymbolRefExpr::create(Symbol: getSymbol(FuncName, RIK, OutContext&: Ctx, IsLocal), Ctx);
62}
63
64void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
65 // Assign expression to get the max register use to the max_num_Xgpr symbol.
66 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
67 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
68 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
69
70 auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) {
71 const MCExpr *MaxExpr = MCConstantExpr::create(Value: RegCount, Ctx&: OutContext);
72 Sym->setVariableValue(MaxExpr);
73 };
74
75 assignMaxRegSym(MaxVGPRSym, MaxVGPR);
76 assignMaxRegSym(MaxAGPRSym, MaxAGPR);
77 assignMaxRegSym(MaxSGPRSym, MaxSGPR);
78}
79
80void MCResourceInfo::reset() { *this = MCResourceInfo(); }
81
82void MCResourceInfo::finalize(MCContext &OutContext) {
83 assert(!Finalized && "Cannot finalize ResourceInfo again.");
84 Finalized = true;
85 assignMaxRegs(OutContext);
86}
87
88MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
89 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_vgpr");
90}
91
92MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
93 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_agpr");
94}
95
96MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
97 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_sgpr");
98}
99
100// Tries to flatten recursive call register resource gathering. Simple cycle
101// avoiding dfs to find the constants in the propagated symbols.
102// Assumes:
103// - RecSym has been confirmed to recurse (this means the callee symbols should
104// all be populated, started at RecSym).
105// - Shape of the resource symbol's MCExpr (`max` args are order agnostic):
106// RecSym.MCExpr := max(<constant>+, <callee_symbol>*)
107const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym,
108 ResourceInfoKind RIK,
109 MCContext &OutContext) {
110 SmallPtrSet<const MCExpr *, 8> Seen;
111 SmallVector<const MCExpr *, 8> WorkList;
112 int64_t Maximum = 0;
113
114 const MCExpr *RecExpr = RecSym->getVariableValue();
115 WorkList.push_back(Elt: RecExpr);
116
117 while (!WorkList.empty()) {
118 const MCExpr *CurExpr = WorkList.pop_back_val();
119 switch (CurExpr->getKind()) {
120 default: {
121 // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)`
122 // where <callee_symbol> will eventually recurse. If this condition holds,
123 // the recursion occurs within some other (possibly unresolvable) MCExpr,
124 // thus using the worst case value then.
125 if (!AMDGPUMCExpr::isSymbolUsedInExpression(Sym: RecSym, E: CurExpr)) {
126 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
127 << ": Recursion in unexpected sub-expression, using "
128 "module maximum\n");
129 switch (RIK) {
130 default:
131 break;
132 case RIK_NumVGPR:
133 return MCSymbolRefExpr::create(Symbol: getMaxVGPRSymbol(OutContext),
134 Ctx&: OutContext);
135 break;
136 case RIK_NumSGPR:
137 return MCSymbolRefExpr::create(Symbol: getMaxSGPRSymbol(OutContext),
138 Ctx&: OutContext);
139 break;
140 case RIK_NumAGPR:
141 return MCSymbolRefExpr::create(Symbol: getMaxAGPRSymbol(OutContext),
142 Ctx&: OutContext);
143 break;
144 }
145 }
146 break;
147 }
148 case MCExpr::ExprKind::Constant: {
149 int64_t Val = cast<MCConstantExpr>(Val: CurExpr)->getValue();
150 Maximum = std::max(a: Maximum, b: Val);
151 break;
152 }
153 case MCExpr::ExprKind::SymbolRef: {
154 const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(Val: CurExpr);
155 const MCSymbol &SymRef = SymExpr->getSymbol();
156 if (SymRef.isVariable()) {
157 const MCExpr *SymVal = SymRef.getVariableValue();
158 if (Seen.insert(Ptr: SymVal).second)
159 WorkList.push_back(Elt: SymVal);
160 }
161 break;
162 }
163 case MCExpr::ExprKind::Target: {
164 const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(Val: CurExpr);
165 if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) {
166 for (auto &Arg : TargetExpr->getArgs())
167 WorkList.push_back(Elt: Arg);
168 }
169 break;
170 }
171 }
172 }
173
174 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
175 << ": Using flattened max: << " << Maximum << '\n');
176
177 return MCConstantExpr::create(Value: Maximum, Ctx&: OutContext);
178}
179
180void MCResourceInfo::assignResourceInfoExpr(
181 int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
182 const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
183 MCContext &OutContext) {
184 const TargetMachine &TM = MF.getTarget();
185 bool IsLocal = MF.getFunction().hasLocalLinkage();
186 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
187 const MCConstantExpr *LocalConstExpr =
188 MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext);
189 const MCExpr *SymVal = LocalConstExpr;
190 MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
191 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
192 << LocalValue << " as function local usage\n");
193 if (!Callees.empty()) {
194 SmallVector<const MCExpr *, 8> ArgExprs;
195 SmallPtrSet<const Function *, 8> Seen;
196 ArgExprs.push_back(Elt: LocalConstExpr);
197
198 for (const Function *Callee : Callees) {
199 if (!Seen.insert(Ptr: Callee).second)
200 continue;
201
202 bool IsCalleeLocal = Callee->hasLocalLinkage();
203 MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction());
204 MCSymbol *CalleeValSym =
205 getSymbol(FuncName: CalleeFnSym->getName(), RIK, OutContext, IsLocal: IsCalleeLocal);
206
207 // Avoid constructing recursive definitions by detecting whether `Sym` is
208 // found transitively within any of its `CalleeValSym`.
209 if (!CalleeValSym->isVariable() ||
210 !AMDGPUMCExpr::isSymbolUsedInExpression(
211 Sym, E: CalleeValSym->getVariableValue())) {
212 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
213 << CalleeValSym->getName() << " as callee\n");
214 ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext));
215 } else {
216 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName()
217 << ": Recursion found, attempt flattening of cycle "
218 "for resource usage\n");
219 // In case of recursion for vgpr/sgpr/agpr resource usage: try to
220 // flatten and use the max of the call cycle. May still end up emitting
221 // module max if not fully resolvable.
222 switch (RIK) {
223 default:
224 break;
225 case RIK_NumVGPR:
226 case RIK_NumSGPR:
227 case RIK_NumAGPR:
228 ArgExprs.push_back(Elt: flattenedCycleMax(RecSym: CalleeValSym, RIK, OutContext));
229 break;
230 }
231 }
232 }
233 if (ArgExprs.size() > 1)
234 SymVal = AMDGPUMCExpr::create(Kind, Args: ArgExprs, Ctx&: OutContext);
235 }
236 Sym->setVariableValue(SymVal);
237}
238
239void MCResourceInfo::gatherResourceInfo(
240 const MachineFunction &MF,
241 const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &FRI,
242 MCContext &OutContext) {
243 // Worst case VGPR use for non-hardware-entrypoints.
244 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
245 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
246 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
247 bool IsLocal = MF.getFunction().hasLocalLinkage();
248
249 if (!AMDGPU::isEntryFunctionCC(CC: MF.getFunction().getCallingConv())) {
250 addMaxVGPRCandidate(candidate: FRI.NumVGPR);
251 addMaxAGPRCandidate(candidate: FRI.NumAGPR);
252 addMaxSGPRCandidate(candidate: FRI.NumExplicitSGPR);
253 }
254
255 const TargetMachine &TM = MF.getTarget();
256 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
257
258 LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for "
259 << FnSym->getName() << '\n');
260 LLVM_DEBUG({
261 if (!FRI.Callees.empty()) {
262 dbgs() << "MCResUse: Callees:\n";
263 for (const Function *Callee : FRI.Callees) {
264 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
265 dbgs() << "MCResUse: " << CalleeFnSym->getName() << '\n';
266 }
267 }
268 });
269
270 auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
271 ResourceInfoKind RIK) {
272 if (!FRI.HasIndirectCall) {
273 assignResourceInfoExpr(LocalValue: numRegs, RIK, Kind: AMDGPUMCExpr::AGVK_Max, MF,
274 Callees: FRI.Callees, OutContext);
275 } else {
276 const MCExpr *SymRef = MCSymbolRefExpr::create(Symbol: MaxSym, Ctx&: OutContext);
277 MCSymbol *LocalNumSym =
278 getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
279 const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
280 Args: {MCConstantExpr::create(Value: numRegs, Ctx&: OutContext), SymRef}, Ctx&: OutContext);
281 LocalNumSym->setVariableValue(MaxWithLocal);
282 LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName()
283 << ": Indirect callee within, using module maximum\n");
284 }
285 };
286
287 LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n');
288 SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
289 SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
290 SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR);
291
292 {
293 // The expression for private segment size should be: FRI.PrivateSegmentSize
294 // + max(FRI.Callees, FRI.CalleeSegmentSize)
295 SmallVector<const MCExpr *, 8> ArgExprs;
296 MCSymbol *Sym =
297 getSymbol(FuncName: FnSym->getName(), RIK: RIK_PrivateSegSize, OutContext, IsLocal);
298 if (FRI.CalleeSegmentSize) {
299 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
300 << FRI.CalleeSegmentSize
301 << " for indirect/recursive callees within\n");
302 ArgExprs.push_back(
303 Elt: MCConstantExpr::create(Value: FRI.CalleeSegmentSize, Ctx&: OutContext));
304 }
305
306 SmallPtrSet<const Function *, 8> Seen;
307 Seen.insert(Ptr: &MF.getFunction());
308 for (const Function *Callee : FRI.Callees) {
309 if (!Seen.insert(Ptr: Callee).second)
310 continue;
311 if (!Callee->isDeclaration()) {
312 bool IsCalleeLocal = Callee->hasLocalLinkage();
313 MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction());
314 MCSymbol *CalleeValSym =
315 getSymbol(FuncName: CalleeFnSym->getName(), RIK: RIK_PrivateSegSize, OutContext,
316 IsLocal: IsCalleeLocal);
317
318 // Avoid constructing recursive definitions by detecting whether `Sym`
319 // is found transitively within any of its `CalleeValSym`.
320 if (!CalleeValSym->isVariable() ||
321 !AMDGPUMCExpr::isSymbolUsedInExpression(
322 Sym, E: CalleeValSym->getVariableValue())) {
323 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
324 << CalleeValSym->getName() << " as callee\n");
325 ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext));
326 }
327 }
328 }
329 const MCExpr *localConstExpr =
330 MCConstantExpr::create(Value: FRI.PrivateSegmentSize, Ctx&: OutContext);
331 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
332 << FRI.PrivateSegmentSize
333 << " as function local usage\n");
334 if (!ArgExprs.empty()) {
335 const AMDGPUMCExpr *transitiveExpr =
336 AMDGPUMCExpr::createMax(Args: ArgExprs, Ctx&: OutContext);
337 localConstExpr =
338 MCBinaryExpr::createAdd(LHS: localConstExpr, RHS: transitiveExpr, Ctx&: OutContext);
339 }
340 Sym->setVariableValue(localConstExpr);
341 }
342
343 auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
344 MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
345 LLVM_DEBUG(
346 dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue
347 << ", no further propagation as indirect callee found within\n");
348 Sym->setVariableValue(MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext));
349 };
350
351 if (!FRI.HasIndirectCall) {
352 assignResourceInfoExpr(LocalValue: FRI.UsesVCC, RIK: ResourceInfoKind::RIK_UsesVCC,
353 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
354 assignResourceInfoExpr(LocalValue: FRI.UsesFlatScratch,
355 RIK: ResourceInfoKind::RIK_UsesFlatScratch,
356 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
357 assignResourceInfoExpr(LocalValue: FRI.HasDynamicallySizedStack,
358 RIK: ResourceInfoKind::RIK_HasDynSizedStack,
359 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
360 assignResourceInfoExpr(LocalValue: FRI.HasRecursion, RIK: ResourceInfoKind::RIK_HasRecursion,
361 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
362 assignResourceInfoExpr(LocalValue: FRI.HasIndirectCall,
363 RIK: ResourceInfoKind::RIK_HasIndirectCall,
364 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
365 } else {
366 SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
367 SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch);
368 SetToLocal(FRI.HasDynamicallySizedStack,
369 ResourceInfoKind::RIK_HasDynSizedStack);
370 SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion);
371 SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall);
372 }
373}
374
375const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
376 MCContext &Ctx) {
377 const TargetMachine &TM = MF.getTarget();
378 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
379 bool IsLocal = MF.getFunction().hasLocalLinkage();
380 return AMDGPUMCExpr::createTotalNumVGPR(
381 NumAGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumAGPR, Ctx, IsLocal),
382 NumVGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumVGPR, Ctx, IsLocal), Ctx);
383}
384
385const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
386 bool hasXnack,
387 MCContext &Ctx) {
388 const TargetMachine &TM = MF.getTarget();
389 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
390 bool IsLocal = MF.getFunction().hasLocalLinkage();
391 return MCBinaryExpr::createAdd(
392 LHS: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumSGPR, Ctx, IsLocal),
393 RHS: AMDGPUMCExpr::createExtraSGPRs(
394 VCCUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesVCC, Ctx, IsLocal),
395 FlatScrUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesFlatScratch, Ctx, IsLocal),
396 XNACKUsed: hasXnack, Ctx),
397 Ctx);
398}
399