1//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the KernelInfoPrinter class used to emit remarks about
10// function properties from a GPU kernel.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/KernelInfo.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/Analysis/OptimizationRemarkEmitter.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/IR/DebugInfo.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/Metadata.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/PassManager.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "kernel-info"
29
30namespace {
31
32/// Data structure holding function info for kernels.
33class KernelInfo {
34 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
35
36public:
37 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
38 TargetMachine *TM);
39
40 /// Whether the function has external linkage and is not a kernel function.
41 bool ExternalNotKernel = false;
42
43 /// Launch bounds.
44 SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
45
46 /// The number of alloca instructions inside the function, the number of those
47 /// with allocation sizes that cannot be determined at compile time, and the
48 /// sum of the sizes that can be.
49 ///
50 /// With the current implementation for at least some GPU archs,
51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52 /// case the implementation changes.
53 int64_t Allocas = 0;
54 int64_t AllocasDyn = 0;
55 int64_t AllocasStaticSizeSum = 0;
56
57 /// Number of direct/indirect calls (anything derived from CallBase).
58 int64_t DirectCalls = 0;
59 int64_t IndirectCalls = 0;
60
61 /// Number of direct calls made from this function to other functions
62 /// defined in this module.
63 int64_t DirectCallsToDefinedFunctions = 0;
64
65 /// Number of direct calls to inline assembly.
66 int64_t InlineAssemblyCalls = 0;
67
68 /// Number of calls of type InvokeInst.
69 int64_t Invokes = 0;
70
71 /// Target-specific flat address space.
72 unsigned FlatAddrspace;
73
74 /// Number of flat address space memory accesses (via load, store, etc.).
75 int64_t FlatAddrspaceAccesses = 0;
76};
77
78} // end anonymous namespace
79
80static void identifyCallee(OptimizationRemark &R, const Module *M,
81 const Value *V, StringRef Kind = "") {
82 SmallString<100> Name; // might be function name or asm expression
83 if (const Function *F = dyn_cast<Function>(Val: V)) {
84 if (auto *SubProgram = F->getSubprogram()) {
85 if (SubProgram->isArtificial())
86 R << "artificial ";
87 Name = SubProgram->getName();
88 }
89 }
90 if (Name.empty()) {
91 raw_svector_ostream OS(Name);
92 V->printAsOperand(O&: OS, /*PrintType=*/false, M);
93 }
94 if (!Kind.empty())
95 R << Kind << " ";
96 R << "'" << Name << "'";
97}
98
99static void identifyFunction(OptimizationRemark &R, const Function &F) {
100 identifyCallee(R, M: F.getParent(), V: &F, Kind: "function");
101}
102
103static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
104 const AllocaInst &Alloca,
105 TypeSize::ScalarTy StaticSize) {
106 ORE.emit(RemarkBuilder: [&] {
107 StringRef DbgName;
108 DebugLoc Loc;
109 bool Artificial = false;
110 auto DVRs = findDVRDeclares(V: &const_cast<AllocaInst &>(Alloca));
111 if (!DVRs.empty()) {
112 const DbgVariableRecord &DVR = **DVRs.begin();
113 DbgName = DVR.getVariable()->getName();
114 Loc = DVR.getDebugLoc();
115 Artificial = DVR.Variable->isArtificial();
116 }
117 OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
118 Alloca.getParent());
119 R << "in ";
120 identifyFunction(R, F: Caller);
121 R << ", ";
122 if (Artificial)
123 R << "artificial ";
124 SmallString<20> ValName;
125 raw_svector_ostream OS(ValName);
126 Alloca.printAsOperand(O&: OS, /*PrintType=*/false, M: Caller.getParent());
127 R << "alloca ('" << ValName << "') ";
128 if (!DbgName.empty())
129 R << "for '" << DbgName << "' ";
130 else
131 R << "without debug info ";
132 R << "with ";
133 if (StaticSize)
134 R << "static size of " << itostr(X: StaticSize) << " bytes";
135 else
136 R << "dynamic size";
137 return R;
138 });
139}
140
141static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
142 const CallBase &Call, StringRef CallKind,
143 StringRef RemarkKind) {
144 ORE.emit(RemarkBuilder: [&] {
145 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
146 R << "in ";
147 identifyFunction(R, F: Caller);
148 R << ", " << CallKind << ", callee is ";
149 identifyCallee(R, M: Caller.getParent(), V: Call.getCalledOperand());
150 return R;
151 });
152}
153
154static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
155 const Function &Caller,
156 const Instruction &Inst) {
157 ORE.emit(RemarkBuilder: [&] {
158 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
159 R << "in ";
160 identifyFunction(R, F: Caller);
161 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &Inst)) {
162 R << ", '" << II->getCalledFunction()->getName() << "' call";
163 } else {
164 R << ", '" << Inst.getOpcodeName() << "' instruction";
165 }
166 if (!Inst.getType()->isVoidTy()) {
167 SmallString<20> Name;
168 raw_svector_ostream OS(Name);
169 Inst.printAsOperand(O&: OS, /*PrintType=*/false, M: Caller.getParent());
170 R << " ('" << Name << "')";
171 }
172 R << " accesses memory in flat address space";
173 return R;
174 });
175}
176
177void KernelInfo::updateForBB(const BasicBlock &BB,
178 OptimizationRemarkEmitter &ORE) {
179 const Function &F = *BB.getParent();
180 const Module &M = *F.getParent();
181 const DataLayout &DL = M.getDataLayout();
182 for (const Instruction &I : BB) {
183 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: &I)) {
184 ++Allocas;
185 TypeSize::ScalarTy StaticSize = 0;
186 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
187 StaticSize = Size->getFixedValue();
188 assert(StaticSize <=
189 (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
190 AllocasStaticSizeSum += StaticSize;
191 } else {
192 ++AllocasDyn;
193 }
194 remarkAlloca(ORE, Caller: F, Alloca: *Alloca, StaticSize);
195 } else if (const CallBase *Call = dyn_cast<CallBase>(Val: &I)) {
196 if (isa<PseudoProbeInst>(Val: Call))
197 continue;
198 SmallString<40> CallKind;
199 SmallString<40> RemarkKind;
200 if (Call->isIndirectCall()) {
201 ++IndirectCalls;
202 CallKind += "indirect";
203 RemarkKind += "Indirect";
204 } else {
205 ++DirectCalls;
206 CallKind += "direct";
207 RemarkKind += "Direct";
208 }
209 if (isa<InvokeInst>(Val: Call)) {
210 ++Invokes;
211 CallKind += " invoke";
212 RemarkKind += "Invoke";
213 } else {
214 CallKind += " call";
215 RemarkKind += "Call";
216 }
217 if (!Call->isIndirectCall()) {
218 if (const Function *Callee = Call->getCalledFunction()) {
219 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
220 ++DirectCallsToDefinedFunctions;
221 CallKind += " to defined function";
222 RemarkKind += "ToDefinedFunction";
223 }
224 } else if (Call->isInlineAsm()) {
225 ++InlineAssemblyCalls;
226 CallKind += " to inline assembly";
227 RemarkKind += "ToInlineAssembly";
228 }
229 }
230 remarkCall(ORE, Caller: F, Call: *Call, CallKind, RemarkKind);
231 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Val: Call)) {
232 if (MI->getDestAddressSpace() == FlatAddrspace) {
233 ++FlatAddrspaceAccesses;
234 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
235 } else if (const AnyMemTransferInst *MT =
236 dyn_cast<AnyMemTransferInst>(Val: MI)) {
237 if (MT->getSourceAddressSpace() == FlatAddrspace) {
238 ++FlatAddrspaceAccesses;
239 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
240 }
241 }
242 }
243 } else if (const LoadInst *Load = dyn_cast<LoadInst>(Val: &I)) {
244 if (Load->getPointerAddressSpace() == FlatAddrspace) {
245 ++FlatAddrspaceAccesses;
246 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
247 }
248 } else if (const StoreInst *Store = dyn_cast<StoreInst>(Val: &I)) {
249 if (Store->getPointerAddressSpace() == FlatAddrspace) {
250 ++FlatAddrspaceAccesses;
251 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
252 }
253 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(Val: &I)) {
254 if (At->getPointerAddressSpace() == FlatAddrspace) {
255 ++FlatAddrspaceAccesses;
256 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
257 }
258 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(Val: &I)) {
259 if (At->getPointerAddressSpace() == FlatAddrspace) {
260 ++FlatAddrspaceAccesses;
261 remarkFlatAddrspaceAccess(ORE, Caller: F, Inst: I);
262 }
263 }
264 }
265}
266
267static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
268 StringRef Name, int64_t Value) {
269 ORE.emit(RemarkBuilder: [&] {
270 OptimizationRemark R(DEBUG_TYPE, Name, &F);
271 R << "in ";
272 identifyFunction(R, F);
273 R << ", " << Name << " = " << itostr(X: Value);
274 return R;
275 });
276}
277
278static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
279 StringRef Name) {
280 if (!F.hasFnAttribute(Kind: Name))
281 return std::nullopt;
282 return F.getFnAttributeAsParsedInteger(Kind: Name);
283}
284
285void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
286 TargetMachine *TM) {
287 KernelInfo KI;
288 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
289 KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
290
291 // Record function properties.
292 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
293 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
294 if (auto Val = parseFnAttrAsInteger(F, Name))
295 KI.LaunchBounds.push_back(Elt: {Name, *Val});
296 }
297 TheTTI.collectKernelLaunchBounds(F, LB&: KI.LaunchBounds);
298
299 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
300 for (const auto &BB : F)
301 KI.updateForBB(BB, ORE);
302
303#define REMARK_PROPERTY(PROP_NAME) \
304 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
305 REMARK_PROPERTY(ExternalNotKernel);
306 for (auto LB : KI.LaunchBounds)
307 remarkProperty(ORE, F, Name: LB.first, Value: LB.second);
308 REMARK_PROPERTY(Allocas);
309 REMARK_PROPERTY(AllocasStaticSizeSum);
310 REMARK_PROPERTY(AllocasDyn);
311 REMARK_PROPERTY(DirectCalls);
312 REMARK_PROPERTY(IndirectCalls);
313 REMARK_PROPERTY(DirectCallsToDefinedFunctions);
314 REMARK_PROPERTY(InlineAssemblyCalls);
315 REMARK_PROPERTY(Invokes);
316 REMARK_PROPERTY(FlatAddrspaceAccesses);
317#undef REMARK_PROPERTY
318}
319
320PreservedAnalyses KernelInfoPrinter::run(Function &F,
321 FunctionAnalysisManager &AM) {
322 // Skip it if remarks are not enabled as it will do nothing useful.
323 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
324 KernelInfo::emitKernelInfo(F, FAM&: AM, TM);
325 return PreservedAnalyses::all();
326}
327