1//===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief MC infrastructure to propagate the function level resource usage
11/// info.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUMCResourceInfo.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/MC/MCAsmInfo.h"
19#include "llvm/MC/MCContext.h"
20#include "llvm/MC/MCSymbol.h"
21#include "llvm/Target/TargetMachine.h"
22
23#define DEBUG_TYPE "amdgpu-mc-resource-usage"
24
25using namespace llvm;
26
27MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
28 MCContext &OutContext, bool IsLocal) {
29 auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) {
30 StringRef Prefix =
31 IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
32 return OutContext.getOrCreateSymbol(Name: Twine(Prefix) + FuncName +
33 Twine(Suffix));
34 };
35 switch (RIK) {
36 case RIK_NumVGPR:
37 return GOCS(".num_vgpr");
38 case RIK_NumAGPR:
39 return GOCS(".num_agpr");
40 case RIK_NumSGPR:
41 return GOCS(".numbered_sgpr");
42 case RIK_NumNamedBarrier:
43 return GOCS(".num_named_barrier");
44 case RIK_PrivateSegSize:
45 return GOCS(".private_seg_size");
46 case RIK_UsesVCC:
47 return GOCS(".uses_vcc");
48 case RIK_UsesFlatScratch:
49 return GOCS(".uses_flat_scratch");
50 case RIK_HasDynSizedStack:
51 return GOCS(".has_dyn_sized_stack");
52 case RIK_HasRecursion:
53 return GOCS(".has_recursion");
54 case RIK_HasIndirectCall:
55 return GOCS(".has_indirect_call");
56 }
57 llvm_unreachable("Unexpected ResourceInfoKind.");
58}
59
60const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
61 ResourceInfoKind RIK,
62 MCContext &Ctx, bool IsLocal) {
63 return MCSymbolRefExpr::create(Symbol: getSymbol(FuncName, RIK, OutContext&: Ctx, IsLocal), Ctx);
64}
65
66void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
67 // Assign expression to get the max register use to the max_num_Xgpr symbol.
68 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
69 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
70 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
71 MCSymbol *MaxNamedBarrierSym = getMaxNamedBarrierSymbol(OutContext);
72
73 auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) {
74 const MCExpr *MaxExpr = MCConstantExpr::create(Value: RegCount, Ctx&: OutContext);
75 Sym->setVariableValue(MaxExpr);
76 };
77
78 assignMaxRegSym(MaxVGPRSym, MaxVGPR);
79 assignMaxRegSym(MaxAGPRSym, MaxAGPR);
80 assignMaxRegSym(MaxSGPRSym, MaxSGPR);
81 assignMaxRegSym(MaxNamedBarrierSym, MaxNamedBarrier);
82}
83
84void MCResourceInfo::reset() { *this = MCResourceInfo(); }
85
86void MCResourceInfo::finalize(MCContext &OutContext) {
87 assert(!Finalized && "Cannot finalize ResourceInfo again.");
88 Finalized = true;
89 assignMaxRegs(OutContext);
90}
91
92MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
93 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_vgpr");
94}
95
96MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
97 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_agpr");
98}
99
100MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
101 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_sgpr");
102}
103
104MCSymbol *MCResourceInfo::getMaxNamedBarrierSymbol(MCContext &OutContext) {
105 return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_named_barrier");
106}
107
108// Tries to flatten recursive call register resource gathering. Simple cycle
109// avoiding dfs to find the constants in the propagated symbols.
110// Assumes:
111// - RecSym has been confirmed to recurse (this means the callee symbols should
112// all be populated, started at RecSym).
113// - Shape of the resource symbol's MCExpr (`max` args are order agnostic):
114// RecSym.MCExpr := max(<constant>+, <callee_symbol>*)
115const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym,
116 ResourceInfoKind RIK,
117 MCContext &OutContext) {
118 SmallPtrSet<const MCExpr *, 8> Seen;
119 SmallVector<const MCExpr *, 8> WorkList;
120 int64_t Maximum = 0;
121
122 const MCExpr *RecExpr = RecSym->getVariableValue();
123 WorkList.push_back(Elt: RecExpr);
124
125 while (!WorkList.empty()) {
126 const MCExpr *CurExpr = WorkList.pop_back_val();
127 switch (CurExpr->getKind()) {
128 default: {
129 // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)`
130 // where <callee_symbol> will eventually recurse. If this condition holds,
131 // the recursion occurs within some other (possibly unresolvable) MCExpr,
132 // thus using the worst case value then.
133 if (!AMDGPUMCExpr::isSymbolUsedInExpression(Sym: RecSym, E: CurExpr)) {
134 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
135 << ": Recursion in unexpected sub-expression, using "
136 "module maximum\n");
137 switch (RIK) {
138 default:
139 break;
140 case RIK_NumVGPR:
141 return MCSymbolRefExpr::create(Symbol: getMaxVGPRSymbol(OutContext),
142 Ctx&: OutContext);
143 break;
144 case RIK_NumSGPR:
145 return MCSymbolRefExpr::create(Symbol: getMaxSGPRSymbol(OutContext),
146 Ctx&: OutContext);
147 break;
148 case RIK_NumAGPR:
149 return MCSymbolRefExpr::create(Symbol: getMaxAGPRSymbol(OutContext),
150 Ctx&: OutContext);
151 break;
152 }
153 }
154 break;
155 }
156 case MCExpr::ExprKind::Constant: {
157 int64_t Val = cast<MCConstantExpr>(Val: CurExpr)->getValue();
158 Maximum = std::max(a: Maximum, b: Val);
159 break;
160 }
161 case MCExpr::ExprKind::SymbolRef: {
162 const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(Val: CurExpr);
163 const MCSymbol &SymRef = SymExpr->getSymbol();
164 if (SymRef.isVariable()) {
165 const MCExpr *SymVal = SymRef.getVariableValue();
166 if (Seen.insert(Ptr: SymVal).second)
167 WorkList.push_back(Elt: SymVal);
168 }
169 break;
170 }
171 case MCExpr::ExprKind::Target: {
172 const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(Val: CurExpr);
173 if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) {
174 for (auto &Arg : TargetExpr->getArgs())
175 WorkList.push_back(Elt: Arg);
176 }
177 break;
178 }
179 }
180 }
181
182 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
183 << ": Using flattened max: << " << Maximum << '\n');
184
185 return MCConstantExpr::create(Value: Maximum, Ctx&: OutContext);
186}
187
188void MCResourceInfo::assignResourceInfoExpr(
189 int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
190 const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
191 MCContext &OutContext) {
192 const TargetMachine &TM = MF.getTarget();
193 bool IsLocal = MF.getFunction().hasLocalLinkage();
194 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
195 const MCConstantExpr *LocalConstExpr =
196 MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext);
197 const MCExpr *SymVal = LocalConstExpr;
198 MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
199 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
200 << LocalValue << " as function local usage\n");
201 if (!Callees.empty()) {
202 SmallVector<const MCExpr *, 8> ArgExprs;
203 SmallPtrSet<const Function *, 8> Seen;
204 ArgExprs.push_back(Elt: LocalConstExpr);
205
206 for (const Function *Callee : Callees) {
207 if (!Seen.insert(Ptr: Callee).second)
208 continue;
209
210 bool IsCalleeLocal = Callee->hasLocalLinkage();
211 MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction());
212 MCSymbol *CalleeValSym =
213 getSymbol(FuncName: CalleeFnSym->getName(), RIK, OutContext, IsLocal: IsCalleeLocal);
214
215 // Avoid constructing recursive definitions by detecting whether `Sym` is
216 // found transitively within any of its `CalleeValSym`.
217 if (!CalleeValSym->isVariable() ||
218 !AMDGPUMCExpr::isSymbolUsedInExpression(
219 Sym, E: CalleeValSym->getVariableValue())) {
220 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
221 << CalleeValSym->getName() << " as callee\n");
222 ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext));
223 } else {
224 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName()
225 << ": Recursion found, attempt flattening of cycle "
226 "for resource usage\n");
227 // In case of recursion for vgpr/sgpr/agpr resource usage: try to
228 // flatten and use the max of the call cycle. May still end up emitting
229 // module max if not fully resolvable.
230 switch (RIK) {
231 default:
232 break;
233 case RIK_NumVGPR:
234 case RIK_NumSGPR:
235 case RIK_NumAGPR:
236 ArgExprs.push_back(Elt: flattenedCycleMax(RecSym: CalleeValSym, RIK, OutContext));
237 break;
238 case RIK_NumNamedBarrier:
239 ArgExprs.push_back(Elt: MCSymbolRefExpr::create(
240 Symbol: getMaxNamedBarrierSymbol(OutContext), Ctx&: OutContext));
241 break;
242 }
243 }
244 }
245 if (ArgExprs.size() > 1)
246 SymVal = AMDGPUMCExpr::create(Kind, Args: ArgExprs, Ctx&: OutContext);
247 }
248 Sym->setVariableValue(SymVal);
249}
250
251void MCResourceInfo::gatherResourceInfo(
252 const MachineFunction &MF,
253 const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &FRI,
254 MCContext &OutContext) {
255 // Worst case VGPR use for non-hardware-entrypoints.
256 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
257 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
258 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
259 bool IsLocal = MF.getFunction().hasLocalLinkage();
260 MCSymbol *MaxNamedBarrierSym = getMaxNamedBarrierSymbol(OutContext);
261
262 if (!AMDGPU::isEntryFunctionCC(CC: MF.getFunction().getCallingConv())) {
263 addMaxVGPRCandidate(candidate: FRI.NumVGPR);
264 addMaxAGPRCandidate(candidate: FRI.NumAGPR);
265 addMaxSGPRCandidate(candidate: FRI.NumExplicitSGPR);
266 addMaxNamedBarrierCandidate(candidate: FRI.NumNamedBarrier);
267 }
268
269 const TargetMachine &TM = MF.getTarget();
270 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
271
272 LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for "
273 << FnSym->getName() << '\n');
274 LLVM_DEBUG({
275 if (!FRI.Callees.empty()) {
276 dbgs() << "MCResUse: Callees:\n";
277 for (const Function *Callee : FRI.Callees) {
278 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
279 dbgs() << "MCResUse: " << CalleeFnSym->getName() << '\n';
280 }
281 }
282 });
283
284 auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
285 ResourceInfoKind RIK) {
286 if (!FRI.HasIndirectCall) {
287 assignResourceInfoExpr(LocalValue: numRegs, RIK, Kind: AMDGPUMCExpr::AGVK_Max, MF,
288 Callees: FRI.Callees, OutContext);
289 } else {
290 const MCExpr *SymRef = MCSymbolRefExpr::create(Symbol: MaxSym, Ctx&: OutContext);
291 MCSymbol *LocalNumSym =
292 getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
293 const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
294 Args: {MCConstantExpr::create(Value: numRegs, Ctx&: OutContext), SymRef}, Ctx&: OutContext);
295 LocalNumSym->setVariableValue(MaxWithLocal);
296 LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName()
297 << ": Indirect callee within, using module maximum\n");
298 }
299 };
300
301 LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n');
302 SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
303 SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
304 SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR);
305 SetMaxReg(MaxNamedBarrierSym, FRI.NumNamedBarrier, RIK_NumNamedBarrier);
306
307 {
308 // The expression for private segment size should be: FRI.PrivateSegmentSize
309 // + max(FRI.Callees, FRI.CalleeSegmentSize)
310 SmallVector<const MCExpr *, 8> ArgExprs;
311 MCSymbol *Sym =
312 getSymbol(FuncName: FnSym->getName(), RIK: RIK_PrivateSegSize, OutContext, IsLocal);
313 if (FRI.CalleeSegmentSize) {
314 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
315 << FRI.CalleeSegmentSize
316 << " for indirect/recursive callees within\n");
317 ArgExprs.push_back(
318 Elt: MCConstantExpr::create(Value: FRI.CalleeSegmentSize, Ctx&: OutContext));
319 }
320
321 SmallPtrSet<const Function *, 8> Seen;
322 Seen.insert(Ptr: &MF.getFunction());
323 for (const Function *Callee : FRI.Callees) {
324 if (!Seen.insert(Ptr: Callee).second)
325 continue;
326 if (!Callee->isDeclaration()) {
327 bool IsCalleeLocal = Callee->hasLocalLinkage();
328 MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction());
329 MCSymbol *CalleeValSym =
330 getSymbol(FuncName: CalleeFnSym->getName(), RIK: RIK_PrivateSegSize, OutContext,
331 IsLocal: IsCalleeLocal);
332
333 // Avoid constructing recursive definitions by detecting whether `Sym`
334 // is found transitively within any of its `CalleeValSym`.
335 if (!CalleeValSym->isVariable() ||
336 !AMDGPUMCExpr::isSymbolUsedInExpression(
337 Sym, E: CalleeValSym->getVariableValue())) {
338 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
339 << CalleeValSym->getName() << " as callee\n");
340 ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext));
341 }
342 }
343 }
344 const MCExpr *localConstExpr =
345 MCConstantExpr::create(Value: FRI.PrivateSegmentSize, Ctx&: OutContext);
346 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
347 << FRI.PrivateSegmentSize
348 << " as function local usage\n");
349 if (!ArgExprs.empty()) {
350 const AMDGPUMCExpr *transitiveExpr =
351 AMDGPUMCExpr::createMax(Args: ArgExprs, Ctx&: OutContext);
352 localConstExpr =
353 MCBinaryExpr::createAdd(LHS: localConstExpr, RHS: transitiveExpr, Ctx&: OutContext);
354 }
355 Sym->setVariableValue(localConstExpr);
356 }
357
358 auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
359 MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal);
360 LLVM_DEBUG(
361 dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue
362 << ", no further propagation as indirect callee found within\n");
363 Sym->setVariableValue(MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext));
364 };
365
366 if (!FRI.HasIndirectCall) {
367 assignResourceInfoExpr(LocalValue: FRI.UsesVCC, RIK: ResourceInfoKind::RIK_UsesVCC,
368 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
369 assignResourceInfoExpr(LocalValue: FRI.UsesFlatScratch,
370 RIK: ResourceInfoKind::RIK_UsesFlatScratch,
371 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
372 assignResourceInfoExpr(LocalValue: FRI.HasDynamicallySizedStack,
373 RIK: ResourceInfoKind::RIK_HasDynSizedStack,
374 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
375 assignResourceInfoExpr(LocalValue: FRI.HasRecursion, RIK: ResourceInfoKind::RIK_HasRecursion,
376 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
377 assignResourceInfoExpr(LocalValue: FRI.HasIndirectCall,
378 RIK: ResourceInfoKind::RIK_HasIndirectCall,
379 Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext);
380 } else {
381 SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
382 SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch);
383 SetToLocal(FRI.HasDynamicallySizedStack,
384 ResourceInfoKind::RIK_HasDynSizedStack);
385 SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion);
386 SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall);
387 }
388}
389
390const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
391 MCContext &Ctx) {
392 const TargetMachine &TM = MF.getTarget();
393 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
394 bool IsLocal = MF.getFunction().hasLocalLinkage();
395 return AMDGPUMCExpr::createTotalNumVGPR(
396 NumAGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumAGPR, Ctx, IsLocal),
397 NumVGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumVGPR, Ctx, IsLocal), Ctx);
398}
399
400const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
401 bool hasXnack,
402 MCContext &Ctx) {
403 const TargetMachine &TM = MF.getTarget();
404 MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction());
405 bool IsLocal = MF.getFunction().hasLocalLinkage();
406 return MCBinaryExpr::createAdd(
407 LHS: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumSGPR, Ctx, IsLocal),
408 RHS: AMDGPUMCExpr::createExtraSGPRs(
409 VCCUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesVCC, Ctx, IsLocal),
410 FlatScrUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesFlatScratch, Ctx, IsLocal),
411 XNACKUsed: hasXnack, Ctx),
412 Ctx);
413}
414