1 | //===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// \brief MC infrastructure to propagate the function level resource usage |
11 | /// info. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AMDGPUMCResourceInfo.h" |
16 | #include "Utils/AMDGPUBaseInfo.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/MC/MCAsmInfo.h" |
19 | #include "llvm/MC/MCContext.h" |
20 | #include "llvm/MC/MCSymbol.h" |
21 | #include "llvm/Target/TargetMachine.h" |
22 | |
23 | #define DEBUG_TYPE "amdgpu-mc-resource-usage" |
24 | |
25 | using namespace llvm; |
26 | |
27 | MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK, |
28 | MCContext &OutContext, bool IsLocal) { |
29 | auto GOCS = [FuncName, &OutContext, IsLocal](StringRef Suffix) { |
30 | StringRef Prefix = |
31 | IsLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "" ; |
32 | return OutContext.getOrCreateSymbol(Name: Twine(Prefix) + FuncName + |
33 | Twine(Suffix)); |
34 | }; |
35 | switch (RIK) { |
36 | case RIK_NumVGPR: |
37 | return GOCS(".num_vgpr" ); |
38 | case RIK_NumAGPR: |
39 | return GOCS(".num_agpr" ); |
40 | case RIK_NumSGPR: |
41 | return GOCS(".numbered_sgpr" ); |
42 | case RIK_PrivateSegSize: |
43 | return GOCS(".private_seg_size" ); |
44 | case RIK_UsesVCC: |
45 | return GOCS(".uses_vcc" ); |
46 | case RIK_UsesFlatScratch: |
47 | return GOCS(".uses_flat_scratch" ); |
48 | case RIK_HasDynSizedStack: |
49 | return GOCS(".has_dyn_sized_stack" ); |
50 | case RIK_HasRecursion: |
51 | return GOCS(".has_recursion" ); |
52 | case RIK_HasIndirectCall: |
53 | return GOCS(".has_indirect_call" ); |
54 | } |
55 | llvm_unreachable("Unexpected ResourceInfoKind." ); |
56 | } |
57 | |
58 | const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName, |
59 | ResourceInfoKind RIK, |
60 | MCContext &Ctx, bool IsLocal) { |
61 | return MCSymbolRefExpr::create(Symbol: getSymbol(FuncName, RIK, OutContext&: Ctx, IsLocal), Ctx); |
62 | } |
63 | |
64 | void MCResourceInfo::assignMaxRegs(MCContext &OutContext) { |
65 | // Assign expression to get the max register use to the max_num_Xgpr symbol. |
66 | MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext); |
67 | MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext); |
68 | MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext); |
69 | |
70 | auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) { |
71 | const MCExpr *MaxExpr = MCConstantExpr::create(Value: RegCount, Ctx&: OutContext); |
72 | Sym->setVariableValue(MaxExpr); |
73 | }; |
74 | |
75 | assignMaxRegSym(MaxVGPRSym, MaxVGPR); |
76 | assignMaxRegSym(MaxAGPRSym, MaxAGPR); |
77 | assignMaxRegSym(MaxSGPRSym, MaxSGPR); |
78 | } |
79 | |
80 | void MCResourceInfo::reset() { *this = MCResourceInfo(); } |
81 | |
82 | void MCResourceInfo::finalize(MCContext &OutContext) { |
83 | assert(!Finalized && "Cannot finalize ResourceInfo again." ); |
84 | Finalized = true; |
85 | assignMaxRegs(OutContext); |
86 | } |
87 | |
88 | MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) { |
89 | return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_vgpr" ); |
90 | } |
91 | |
92 | MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) { |
93 | return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_agpr" ); |
94 | } |
95 | |
96 | MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) { |
97 | return OutContext.getOrCreateSymbol(Name: "amdgpu.max_num_sgpr" ); |
98 | } |
99 | |
100 | // Tries to flatten recursive call register resource gathering. Simple cycle |
101 | // avoiding dfs to find the constants in the propagated symbols. |
102 | // Assumes: |
103 | // - RecSym has been confirmed to recurse (this means the callee symbols should |
104 | // all be populated, started at RecSym). |
105 | // - Shape of the resource symbol's MCExpr (`max` args are order agnostic): |
106 | // RecSym.MCExpr := max(<constant>+, <callee_symbol>*) |
107 | const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym, |
108 | ResourceInfoKind RIK, |
109 | MCContext &OutContext) { |
110 | SmallPtrSet<const MCExpr *, 8> Seen; |
111 | SmallVector<const MCExpr *, 8> WorkList; |
112 | int64_t Maximum = 0; |
113 | |
114 | const MCExpr *RecExpr = RecSym->getVariableValue(); |
115 | WorkList.push_back(Elt: RecExpr); |
116 | |
117 | while (!WorkList.empty()) { |
118 | const MCExpr *CurExpr = WorkList.pop_back_val(); |
119 | switch (CurExpr->getKind()) { |
120 | default: { |
121 | // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)` |
122 | // where <callee_symbol> will eventually recurse. If this condition holds, |
123 | // the recursion occurs within some other (possibly unresolvable) MCExpr, |
124 | // thus using the worst case value then. |
125 | if (!AMDGPUMCExpr::isSymbolUsedInExpression(Sym: RecSym, E: CurExpr)) { |
126 | LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName() |
127 | << ": Recursion in unexpected sub-expression, using " |
128 | "module maximum\n" ); |
129 | switch (RIK) { |
130 | default: |
131 | break; |
132 | case RIK_NumVGPR: |
133 | return MCSymbolRefExpr::create(Symbol: getMaxVGPRSymbol(OutContext), |
134 | Ctx&: OutContext); |
135 | break; |
136 | case RIK_NumSGPR: |
137 | return MCSymbolRefExpr::create(Symbol: getMaxSGPRSymbol(OutContext), |
138 | Ctx&: OutContext); |
139 | break; |
140 | case RIK_NumAGPR: |
141 | return MCSymbolRefExpr::create(Symbol: getMaxAGPRSymbol(OutContext), |
142 | Ctx&: OutContext); |
143 | break; |
144 | } |
145 | } |
146 | break; |
147 | } |
148 | case MCExpr::ExprKind::Constant: { |
149 | int64_t Val = cast<MCConstantExpr>(Val: CurExpr)->getValue(); |
150 | Maximum = std::max(a: Maximum, b: Val); |
151 | break; |
152 | } |
153 | case MCExpr::ExprKind::SymbolRef: { |
154 | const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(Val: CurExpr); |
155 | const MCSymbol &SymRef = SymExpr->getSymbol(); |
156 | if (SymRef.isVariable()) { |
157 | const MCExpr *SymVal = SymRef.getVariableValue(); |
158 | if (Seen.insert(Ptr: SymVal).second) |
159 | WorkList.push_back(Elt: SymVal); |
160 | } |
161 | break; |
162 | } |
163 | case MCExpr::ExprKind::Target: { |
164 | const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(Val: CurExpr); |
165 | if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) { |
166 | for (auto &Arg : TargetExpr->getArgs()) |
167 | WorkList.push_back(Elt: Arg); |
168 | } |
169 | break; |
170 | } |
171 | } |
172 | } |
173 | |
174 | LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName() |
175 | << ": Using flattened max: << " << Maximum << '\n'); |
176 | |
177 | return MCConstantExpr::create(Value: Maximum, Ctx&: OutContext); |
178 | } |
179 | |
180 | void MCResourceInfo::assignResourceInfoExpr( |
181 | int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind, |
182 | const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees, |
183 | MCContext &OutContext) { |
184 | const TargetMachine &TM = MF.getTarget(); |
185 | bool IsLocal = MF.getFunction().hasLocalLinkage(); |
186 | MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction()); |
187 | const MCConstantExpr *LocalConstExpr = |
188 | MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext); |
189 | const MCExpr *SymVal = LocalConstExpr; |
190 | MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal); |
191 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " |
192 | << LocalValue << " as function local usage\n" ); |
193 | if (!Callees.empty()) { |
194 | SmallVector<const MCExpr *, 8> ArgExprs; |
195 | SmallPtrSet<const Function *, 8> Seen; |
196 | ArgExprs.push_back(Elt: LocalConstExpr); |
197 | |
198 | for (const Function *Callee : Callees) { |
199 | if (!Seen.insert(Ptr: Callee).second) |
200 | continue; |
201 | |
202 | bool IsCalleeLocal = Callee->hasLocalLinkage(); |
203 | MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction()); |
204 | MCSymbol *CalleeValSym = |
205 | getSymbol(FuncName: CalleeFnSym->getName(), RIK, OutContext, IsLocal: IsCalleeLocal); |
206 | |
207 | // Avoid constructing recursive definitions by detecting whether `Sym` is |
208 | // found transitively within any of its `CalleeValSym`. |
209 | if (!CalleeValSym->isVariable() || |
210 | !AMDGPUMCExpr::isSymbolUsedInExpression( |
211 | Sym, E: CalleeValSym->getVariableValue())) { |
212 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " |
213 | << CalleeValSym->getName() << " as callee\n" ); |
214 | ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext)); |
215 | } else { |
216 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() |
217 | << ": Recursion found, attempt flattening of cycle " |
218 | "for resource usage\n" ); |
219 | // In case of recursion for vgpr/sgpr/agpr resource usage: try to |
220 | // flatten and use the max of the call cycle. May still end up emitting |
221 | // module max if not fully resolvable. |
222 | switch (RIK) { |
223 | default: |
224 | break; |
225 | case RIK_NumVGPR: |
226 | case RIK_NumSGPR: |
227 | case RIK_NumAGPR: |
228 | ArgExprs.push_back(Elt: flattenedCycleMax(RecSym: CalleeValSym, RIK, OutContext)); |
229 | break; |
230 | } |
231 | } |
232 | } |
233 | if (ArgExprs.size() > 1) |
234 | SymVal = AMDGPUMCExpr::create(Kind, Args: ArgExprs, Ctx&: OutContext); |
235 | } |
236 | Sym->setVariableValue(SymVal); |
237 | } |
238 | |
239 | void MCResourceInfo::gatherResourceInfo( |
240 | const MachineFunction &MF, |
241 | const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &FRI, |
242 | MCContext &OutContext) { |
243 | // Worst case VGPR use for non-hardware-entrypoints. |
244 | MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext); |
245 | MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext); |
246 | MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext); |
247 | bool IsLocal = MF.getFunction().hasLocalLinkage(); |
248 | |
249 | if (!AMDGPU::isEntryFunctionCC(CC: MF.getFunction().getCallingConv())) { |
250 | addMaxVGPRCandidate(candidate: FRI.NumVGPR); |
251 | addMaxAGPRCandidate(candidate: FRI.NumAGPR); |
252 | addMaxSGPRCandidate(candidate: FRI.NumExplicitSGPR); |
253 | } |
254 | |
255 | const TargetMachine &TM = MF.getTarget(); |
256 | MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction()); |
257 | |
258 | LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for " |
259 | << FnSym->getName() << '\n'); |
260 | LLVM_DEBUG({ |
261 | if (!FRI.Callees.empty()) { |
262 | dbgs() << "MCResUse: Callees:\n" ; |
263 | for (const Function *Callee : FRI.Callees) { |
264 | MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); |
265 | dbgs() << "MCResUse: " << CalleeFnSym->getName() << '\n'; |
266 | } |
267 | } |
268 | }); |
269 | |
270 | auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs, |
271 | ResourceInfoKind RIK) { |
272 | if (!FRI.HasIndirectCall) { |
273 | assignResourceInfoExpr(LocalValue: numRegs, RIK, Kind: AMDGPUMCExpr::AGVK_Max, MF, |
274 | Callees: FRI.Callees, OutContext); |
275 | } else { |
276 | const MCExpr *SymRef = MCSymbolRefExpr::create(Symbol: MaxSym, Ctx&: OutContext); |
277 | MCSymbol *LocalNumSym = |
278 | getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal); |
279 | const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax( |
280 | Args: {MCConstantExpr::create(Value: numRegs, Ctx&: OutContext), SymRef}, Ctx&: OutContext); |
281 | LocalNumSym->setVariableValue(MaxWithLocal); |
282 | LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName() |
283 | << ": Indirect callee within, using module maximum\n" ); |
284 | } |
285 | }; |
286 | |
287 | LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n'); |
288 | SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR); |
289 | SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR); |
290 | SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR); |
291 | |
292 | { |
293 | // The expression for private segment size should be: FRI.PrivateSegmentSize |
294 | // + max(FRI.Callees, FRI.CalleeSegmentSize) |
295 | SmallVector<const MCExpr *, 8> ArgExprs; |
296 | MCSymbol *Sym = |
297 | getSymbol(FuncName: FnSym->getName(), RIK: RIK_PrivateSegSize, OutContext, IsLocal); |
298 | if (FRI.CalleeSegmentSize) { |
299 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " |
300 | << FRI.CalleeSegmentSize |
301 | << " for indirect/recursive callees within\n" ); |
302 | ArgExprs.push_back( |
303 | Elt: MCConstantExpr::create(Value: FRI.CalleeSegmentSize, Ctx&: OutContext)); |
304 | } |
305 | |
306 | SmallPtrSet<const Function *, 8> Seen; |
307 | Seen.insert(Ptr: &MF.getFunction()); |
308 | for (const Function *Callee : FRI.Callees) { |
309 | if (!Seen.insert(Ptr: Callee).second) |
310 | continue; |
311 | if (!Callee->isDeclaration()) { |
312 | bool IsCalleeLocal = Callee->hasLocalLinkage(); |
313 | MCSymbol *CalleeFnSym = TM.getSymbol(GV: &Callee->getFunction()); |
314 | MCSymbol *CalleeValSym = |
315 | getSymbol(FuncName: CalleeFnSym->getName(), RIK: RIK_PrivateSegSize, OutContext, |
316 | IsLocal: IsCalleeLocal); |
317 | |
318 | // Avoid constructing recursive definitions by detecting whether `Sym` |
319 | // is found transitively within any of its `CalleeValSym`. |
320 | if (!CalleeValSym->isVariable() || |
321 | !AMDGPUMCExpr::isSymbolUsedInExpression( |
322 | Sym, E: CalleeValSym->getVariableValue())) { |
323 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " |
324 | << CalleeValSym->getName() << " as callee\n" ); |
325 | ArgExprs.push_back(Elt: MCSymbolRefExpr::create(Symbol: CalleeValSym, Ctx&: OutContext)); |
326 | } |
327 | } |
328 | } |
329 | const MCExpr *localConstExpr = |
330 | MCConstantExpr::create(Value: FRI.PrivateSegmentSize, Ctx&: OutContext); |
331 | LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding " |
332 | << FRI.PrivateSegmentSize |
333 | << " as function local usage\n" ); |
334 | if (!ArgExprs.empty()) { |
335 | const AMDGPUMCExpr *transitiveExpr = |
336 | AMDGPUMCExpr::createMax(Args: ArgExprs, Ctx&: OutContext); |
337 | localConstExpr = |
338 | MCBinaryExpr::createAdd(LHS: localConstExpr, RHS: transitiveExpr, Ctx&: OutContext); |
339 | } |
340 | Sym->setVariableValue(localConstExpr); |
341 | } |
342 | |
343 | auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) { |
344 | MCSymbol *Sym = getSymbol(FuncName: FnSym->getName(), RIK, OutContext, IsLocal); |
345 | LLVM_DEBUG( |
346 | dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue |
347 | << ", no further propagation as indirect callee found within\n" ); |
348 | Sym->setVariableValue(MCConstantExpr::create(Value: LocalValue, Ctx&: OutContext)); |
349 | }; |
350 | |
351 | if (!FRI.HasIndirectCall) { |
352 | assignResourceInfoExpr(LocalValue: FRI.UsesVCC, RIK: ResourceInfoKind::RIK_UsesVCC, |
353 | Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext); |
354 | assignResourceInfoExpr(LocalValue: FRI.UsesFlatScratch, |
355 | RIK: ResourceInfoKind::RIK_UsesFlatScratch, |
356 | Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext); |
357 | assignResourceInfoExpr(LocalValue: FRI.HasDynamicallySizedStack, |
358 | RIK: ResourceInfoKind::RIK_HasDynSizedStack, |
359 | Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext); |
360 | assignResourceInfoExpr(LocalValue: FRI.HasRecursion, RIK: ResourceInfoKind::RIK_HasRecursion, |
361 | Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext); |
362 | assignResourceInfoExpr(LocalValue: FRI.HasIndirectCall, |
363 | RIK: ResourceInfoKind::RIK_HasIndirectCall, |
364 | Kind: AMDGPUMCExpr::AGVK_Or, MF, Callees: FRI.Callees, OutContext); |
365 | } else { |
366 | SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC); |
367 | SetToLocal(FRI.UsesFlatScratch, ResourceInfoKind::RIK_UsesFlatScratch); |
368 | SetToLocal(FRI.HasDynamicallySizedStack, |
369 | ResourceInfoKind::RIK_HasDynSizedStack); |
370 | SetToLocal(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion); |
371 | SetToLocal(FRI.HasIndirectCall, ResourceInfoKind::RIK_HasIndirectCall); |
372 | } |
373 | } |
374 | |
375 | const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF, |
376 | MCContext &Ctx) { |
377 | const TargetMachine &TM = MF.getTarget(); |
378 | MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction()); |
379 | bool IsLocal = MF.getFunction().hasLocalLinkage(); |
380 | return AMDGPUMCExpr::createTotalNumVGPR( |
381 | NumAGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumAGPR, Ctx, IsLocal), |
382 | NumVGPR: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumVGPR, Ctx, IsLocal), Ctx); |
383 | } |
384 | |
385 | const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF, |
386 | bool hasXnack, |
387 | MCContext &Ctx) { |
388 | const TargetMachine &TM = MF.getTarget(); |
389 | MCSymbol *FnSym = TM.getSymbol(GV: &MF.getFunction()); |
390 | bool IsLocal = MF.getFunction().hasLocalLinkage(); |
391 | return MCBinaryExpr::createAdd( |
392 | LHS: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_NumSGPR, Ctx, IsLocal), |
393 | RHS: AMDGPUMCExpr::createExtraSGPRs( |
394 | VCCUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesVCC, Ctx, IsLocal), |
395 | FlatScrUsed: getSymRefExpr(FuncName: FnSym->getName(), RIK: RIK_UsesFlatScratch, Ctx, IsLocal), |
396 | XNACKUsed: hasXnack, Ctx), |
397 | Ctx); |
398 | } |
399 | |