1 | //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass builds a ModuleSummaryIndex object for the module, to be written |
10 | // to bitcode or LLVM assembly. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
24 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
25 | #include "llvm/Analysis/ConstantFolding.h" |
26 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
27 | #include "llvm/Analysis/LoopInfo.h" |
28 | #include "llvm/Analysis/MemoryProfileInfo.h" |
29 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
30 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
31 | #include "llvm/Analysis/TypeMetadataUtils.h" |
32 | #include "llvm/IR/Attributes.h" |
33 | #include "llvm/IR/BasicBlock.h" |
34 | #include "llvm/IR/Constant.h" |
35 | #include "llvm/IR/Constants.h" |
36 | #include "llvm/IR/Dominators.h" |
37 | #include "llvm/IR/Function.h" |
38 | #include "llvm/IR/GlobalAlias.h" |
39 | #include "llvm/IR/GlobalValue.h" |
40 | #include "llvm/IR/GlobalVariable.h" |
41 | #include "llvm/IR/Instructions.h" |
42 | #include "llvm/IR/IntrinsicInst.h" |
43 | #include "llvm/IR/Metadata.h" |
44 | #include "llvm/IR/Module.h" |
45 | #include "llvm/IR/ModuleSummaryIndex.h" |
46 | #include "llvm/IR/Use.h" |
47 | #include "llvm/IR/User.h" |
48 | #include "llvm/InitializePasses.h" |
49 | #include "llvm/Object/ModuleSymbolTable.h" |
50 | #include "llvm/Object/SymbolicFile.h" |
51 | #include "llvm/Pass.h" |
52 | #include "llvm/Support/Casting.h" |
53 | #include "llvm/Support/CommandLine.h" |
54 | #include "llvm/Support/Compiler.h" |
55 | #include "llvm/Support/FileSystem.h" |
56 | #include <cassert> |
57 | #include <cstdint> |
58 | #include <vector> |
59 | |
60 | using namespace llvm; |
61 | using namespace llvm::memprof; |
62 | |
63 | #define DEBUG_TYPE "module-summary-analysis" |
64 | |
65 | // Option to force edges cold which will block importing when the |
66 | // -import-cold-multiplier is set to 0. Useful for debugging. |
67 | namespace llvm { |
68 | FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = |
69 | FunctionSummary::FSHT_None; |
70 | } // namespace llvm |
71 | |
72 | static cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( |
73 | "force-summary-edges-cold" , cl::Hidden, cl::location(L&: ForceSummaryEdgesCold), |
74 | cl::desc("Force all edges in the function summary to cold" ), |
75 | cl::values(clEnumValN(FunctionSummary::FSHT_None, "none" , "None." ), |
76 | clEnumValN(FunctionSummary::FSHT_AllNonCritical, |
77 | "all-non-critical" , "All non-critical edges." ), |
78 | clEnumValN(FunctionSummary::FSHT_All, "all" , "All edges." ))); |
79 | |
80 | static cl::opt<std::string> ModuleSummaryDotFile( |
81 | "module-summary-dot-file" , cl::Hidden, cl::value_desc("filename" ), |
82 | cl::desc("File to emit dot graph of new summary into" )); |
83 | |
84 | static cl::opt<bool> EnableMemProfIndirectCallSupport( |
85 | "enable-memprof-indirect-call-support" , cl::init(Val: true), cl::Hidden, |
86 | cl::desc( |
87 | "Enable MemProf support for summarizing and cloning indirect calls" )); |
88 | |
89 | LLVM_ABI extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize; |
90 | |
91 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
92 | |
93 | extern cl::opt<bool> MemProfReportHintedSizes; |
94 | |
95 | // Walk through the operands of a given User via worklist iteration and populate |
96 | // the set of GlobalValue references encountered. Invoked either on an |
97 | // Instruction or a GlobalVariable (which walks its initializer). |
98 | // Return true if any of the operands contains blockaddress. This is important |
99 | // to know when computing summary for global var, because if global variable |
100 | // references basic block address we can't import it separately from function |
101 | // containing that basic block. For simplicity we currently don't import such |
102 | // global vars at all. When importing function we aren't interested if any |
103 | // instruction in it takes an address of any basic block, because instruction |
104 | // can only take an address of basic block located in the same function. |
105 | // Set `RefLocalLinkageIFunc` to true if the analyzed value references a |
106 | // local-linkage ifunc. |
107 | static bool |
108 | findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, |
109 | SetVector<ValueInfo, SmallVector<ValueInfo, 0>> &RefEdges, |
110 | SmallPtrSet<const User *, 8> &Visited, |
111 | bool &RefLocalLinkageIFunc) { |
112 | bool HasBlockAddress = false; |
113 | SmallVector<const User *, 32> Worklist; |
114 | if (Visited.insert(Ptr: CurUser).second) |
115 | Worklist.push_back(Elt: CurUser); |
116 | |
117 | while (!Worklist.empty()) { |
118 | const User *U = Worklist.pop_back_val(); |
119 | const auto *CB = dyn_cast<CallBase>(Val: U); |
120 | |
121 | for (const auto &OI : U->operands()) { |
122 | const User *Operand = dyn_cast<User>(Val: OI); |
123 | if (!Operand) |
124 | continue; |
125 | if (isa<BlockAddress>(Val: Operand)) { |
126 | HasBlockAddress = true; |
127 | continue; |
128 | } |
129 | if (auto *GV = dyn_cast<GlobalValue>(Val: Operand)) { |
130 | // We have a reference to a global value. This should be added to |
131 | // the reference set unless it is a callee. Callees are handled |
132 | // specially by WriteFunction and are added to a separate list. |
133 | if (!(CB && CB->isCallee(U: &OI))) { |
134 | // If an ifunc has local linkage, do not add it into ref edges, and |
135 | // sets `RefLocalLinkageIFunc` to true. The referencer is not eligible |
136 | // for import. An ifunc doesn't have summary and ThinLTO cannot |
137 | // promote it; importing the referencer may cause linkage errors. |
138 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: GV); |
139 | GI && GI->hasLocalLinkage()) { |
140 | RefLocalLinkageIFunc = true; |
141 | continue; |
142 | } |
143 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
144 | } |
145 | continue; |
146 | } |
147 | if (Visited.insert(Ptr: Operand).second) |
148 | Worklist.push_back(Elt: Operand); |
149 | } |
150 | } |
151 | |
152 | const Instruction *I = dyn_cast<Instruction>(Val: CurUser); |
153 | if (I) { |
154 | uint64_t TotalCount = 0; |
155 | // MaxNumVTableAnnotations is the maximum number of vtables annotated on |
156 | // the instruction. |
157 | auto ValueDataArray = getValueProfDataFromInst( |
158 | Inst: *I, ValueKind: IPVK_VTableTarget, MaxNumValueData: MaxNumVTableAnnotations, TotalC&: TotalCount); |
159 | |
160 | for (const auto &V : ValueDataArray) |
161 | RefEdges.insert(X: Index.getOrInsertValueInfo(/* VTableGUID = */ |
162 | GUID: V.Value)); |
163 | } |
164 | return HasBlockAddress; |
165 | } |
166 | |
167 | static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, |
168 | ProfileSummaryInfo *PSI) { |
169 | if (!PSI) |
170 | return CalleeInfo::HotnessType::Unknown; |
171 | if (PSI->isHotCount(C: ProfileCount)) |
172 | return CalleeInfo::HotnessType::Hot; |
173 | if (PSI->isColdCount(C: ProfileCount)) |
174 | return CalleeInfo::HotnessType::Cold; |
175 | return CalleeInfo::HotnessType::None; |
176 | } |
177 | |
178 | static bool isNonRenamableLocal(const GlobalValue &GV) { |
179 | return GV.hasSection() && GV.hasLocalLinkage(); |
180 | } |
181 | |
182 | /// Determine whether this call has all constant integer arguments (excluding |
183 | /// "this") and summarize it to VCalls or ConstVCalls as appropriate. |
184 | static void addVCallToSet( |
185 | DevirtCallSite Call, GlobalValue::GUID Guid, |
186 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
187 | &VCalls, |
188 | SetVector<FunctionSummary::ConstVCall, |
189 | std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) { |
190 | std::vector<uint64_t> Args; |
191 | // Start from the second argument to skip the "this" pointer. |
192 | for (auto &Arg : drop_begin(RangeOrContainer: Call.CB.args())) { |
193 | auto *CI = dyn_cast<ConstantInt>(Val&: Arg); |
194 | if (!CI || CI->getBitWidth() > 64) { |
195 | VCalls.insert(X: {.GUID: Guid, .Offset: Call.Offset}); |
196 | return; |
197 | } |
198 | Args.push_back(x: CI->getZExtValue()); |
199 | } |
200 | ConstVCalls.insert(X: {.VFunc: {.GUID: Guid, .Offset: Call.Offset}, .Args: std::move(Args)}); |
201 | } |
202 | |
203 | /// If this intrinsic call requires that we add information to the function |
204 | /// summary, do so via the non-constant reference arguments. |
205 | static void addIntrinsicToSummary( |
206 | const CallInst *CI, |
207 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests, |
208 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
209 | &TypeTestAssumeVCalls, |
210 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
211 | &TypeCheckedLoadVCalls, |
212 | SetVector<FunctionSummary::ConstVCall, |
213 | std::vector<FunctionSummary::ConstVCall>> |
214 | &TypeTestAssumeConstVCalls, |
215 | SetVector<FunctionSummary::ConstVCall, |
216 | std::vector<FunctionSummary::ConstVCall>> |
217 | &TypeCheckedLoadConstVCalls, |
218 | DominatorTree &DT) { |
219 | switch (CI->getCalledFunction()->getIntrinsicID()) { |
220 | case Intrinsic::type_test: |
221 | case Intrinsic::public_type_test: { |
222 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
223 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
224 | if (!TypeId) |
225 | break; |
226 | GlobalValue::GUID Guid = |
227 | GlobalValue::getGUIDAssumingExternalLinkage(GlobalName: TypeId->getString()); |
228 | |
229 | // Produce a summary from type.test intrinsics. We only summarize type.test |
230 | // intrinsics that are used other than by an llvm.assume intrinsic. |
231 | // Intrinsics that are assumed are relevant only to the devirtualization |
232 | // pass, not the type test lowering pass. |
233 | bool HasNonAssumeUses = llvm::any_of(Range: CI->uses(), P: [](const Use &CIU) { |
234 | return !isa<AssumeInst>(Val: CIU.getUser()); |
235 | }); |
236 | if (HasNonAssumeUses) |
237 | TypeTests.insert(X: Guid); |
238 | |
239 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
240 | SmallVector<CallInst *, 4> Assumes; |
241 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
242 | for (auto &Call : DevirtCalls) |
243 | addVCallToSet(Call, Guid, VCalls&: TypeTestAssumeVCalls, |
244 | ConstVCalls&: TypeTestAssumeConstVCalls); |
245 | |
246 | break; |
247 | } |
248 | |
249 | case Intrinsic::type_checked_load_relative: |
250 | case Intrinsic::type_checked_load: { |
251 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2)); |
252 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
253 | if (!TypeId) |
254 | break; |
255 | GlobalValue::GUID Guid = |
256 | GlobalValue::getGUIDAssumingExternalLinkage(GlobalName: TypeId->getString()); |
257 | |
258 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
259 | SmallVector<Instruction *, 4> LoadedPtrs; |
260 | SmallVector<Instruction *, 4> Preds; |
261 | bool HasNonCallUses = false; |
262 | findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, |
263 | HasNonCallUses, CI, DT); |
264 | // Any non-call uses of the result of llvm.type.checked.load will |
265 | // prevent us from optimizing away the llvm.type.test. |
266 | if (HasNonCallUses) |
267 | TypeTests.insert(X: Guid); |
268 | for (auto &Call : DevirtCalls) |
269 | addVCallToSet(Call, Guid, VCalls&: TypeCheckedLoadVCalls, |
270 | ConstVCalls&: TypeCheckedLoadConstVCalls); |
271 | |
272 | break; |
273 | } |
274 | default: |
275 | break; |
276 | } |
277 | } |
278 | |
279 | static bool isNonVolatileLoad(const Instruction *I) { |
280 | if (const auto *LI = dyn_cast<LoadInst>(Val: I)) |
281 | return !LI->isVolatile(); |
282 | |
283 | return false; |
284 | } |
285 | |
286 | static bool isNonVolatileStore(const Instruction *I) { |
287 | if (const auto *SI = dyn_cast<StoreInst>(Val: I)) |
288 | return !SI->isVolatile(); |
289 | |
290 | return false; |
291 | } |
292 | |
293 | // Returns true if the function definition must be unreachable. |
294 | // |
295 | // Note if this helper function returns true, `F` is guaranteed |
296 | // to be unreachable; if it returns false, `F` might still |
297 | // be unreachable but not covered by this helper function. |
298 | static bool mustBeUnreachableFunction(const Function &F) { |
299 | // A function must be unreachable if its entry block ends with an |
300 | // 'unreachable'. |
301 | assert(!F.isDeclaration()); |
302 | return isa<UnreachableInst>(Val: F.getEntryBlock().getTerminator()); |
303 | } |
304 | |
305 | static void computeFunctionSummary( |
306 | ModuleSummaryIndex &Index, const Module &M, const Function &F, |
307 | BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, |
308 | bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted, |
309 | bool IsThinLTO, |
310 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
311 | // Summary not currently supported for anonymous functions, they should |
312 | // have been named. |
313 | assert(F.hasName()); |
314 | |
315 | unsigned NumInsts = 0; |
316 | // Map from callee ValueId to profile count. Used to accumulate profile |
317 | // counts for all static calls to a given callee. |
318 | MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>, |
319 | SmallVector<FunctionSummary::EdgeTy, 0>> |
320 | CallGraphEdges; |
321 | SetVector<ValueInfo, SmallVector<ValueInfo, 0>> RefEdges, LoadRefEdges, |
322 | StoreRefEdges; |
323 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests; |
324 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
325 | TypeTestAssumeVCalls, TypeCheckedLoadVCalls; |
326 | SetVector<FunctionSummary::ConstVCall, |
327 | std::vector<FunctionSummary::ConstVCall>> |
328 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls; |
329 | ICallPromotionAnalysis ICallAnalysis; |
330 | SmallPtrSet<const User *, 8> Visited; |
331 | |
332 | // Add personality function, prefix data and prologue data to function's ref |
333 | // list. |
334 | bool HasLocalIFuncCallOrRef = false; |
335 | findRefEdges(Index, CurUser: &F, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
336 | std::vector<const Instruction *> NonVolatileLoads; |
337 | std::vector<const Instruction *> NonVolatileStores; |
338 | |
339 | std::vector<CallsiteInfo> Callsites; |
340 | std::vector<AllocInfo> Allocs; |
341 | |
342 | #ifndef NDEBUG |
343 | DenseSet<const CallBase *> CallsThatMayHaveMemprofSummary; |
344 | #endif |
345 | |
346 | bool HasInlineAsmMaybeReferencingInternal = false; |
347 | bool HasIndirBranchToBlockAddress = false; |
348 | bool HasUnknownCall = false; |
349 | bool MayThrow = false; |
350 | for (const BasicBlock &BB : F) { |
351 | // We don't allow inlining of function with indirect branch to blockaddress. |
352 | // If the blockaddress escapes the function, e.g., via a global variable, |
353 | // inlining may lead to an invalid cross-function reference. So we shouldn't |
354 | // import such function either. |
355 | if (BB.hasAddressTaken()) { |
356 | for (User *U : BlockAddress::get(BB: const_cast<BasicBlock *>(&BB))->users()) |
357 | if (!isa<CallBrInst>(Val: *U)) { |
358 | HasIndirBranchToBlockAddress = true; |
359 | break; |
360 | } |
361 | } |
362 | |
363 | for (const Instruction &I : BB) { |
364 | if (I.isDebugOrPseudoInst()) |
365 | continue; |
366 | ++NumInsts; |
367 | |
368 | // Regular LTO module doesn't participate in ThinLTO import, |
369 | // so no reference from it can be read/writeonly, since this |
370 | // would require importing variable as local copy |
371 | if (IsThinLTO) { |
372 | if (isNonVolatileLoad(I: &I)) { |
373 | // Postpone processing of non-volatile load instructions |
374 | // See comments below |
375 | Visited.insert(Ptr: &I); |
376 | NonVolatileLoads.push_back(x: &I); |
377 | continue; |
378 | } else if (isNonVolatileStore(I: &I)) { |
379 | Visited.insert(Ptr: &I); |
380 | NonVolatileStores.push_back(x: &I); |
381 | // All references from second operand of store (destination address) |
382 | // can be considered write-only if they're not referenced by any |
383 | // non-store instruction. References from first operand of store |
384 | // (stored value) can't be treated either as read- or as write-only |
385 | // so we add them to RefEdges as we do with all other instructions |
386 | // except non-volatile load. |
387 | Value *Stored = I.getOperand(i: 0); |
388 | if (auto *GV = dyn_cast<GlobalValue>(Val: Stored)) |
389 | // findRefEdges will try to examine GV operands, so instead |
390 | // of calling it we should add GV to RefEdges directly. |
391 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
392 | else if (auto *U = dyn_cast<User>(Val: Stored)) |
393 | findRefEdges(Index, CurUser: U, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
394 | continue; |
395 | } |
396 | } |
397 | findRefEdges(Index, CurUser: &I, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
398 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
399 | if (!CB) { |
400 | if (I.mayThrow()) |
401 | MayThrow = true; |
402 | continue; |
403 | } |
404 | |
405 | const auto *CI = dyn_cast<CallInst>(Val: &I); |
406 | // Since we don't know exactly which local values are referenced in inline |
407 | // assembly, conservatively mark the function as possibly referencing |
408 | // a local value from inline assembly to ensure we don't export a |
409 | // reference (which would require renaming and promotion of the |
410 | // referenced value). |
411 | if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) |
412 | HasInlineAsmMaybeReferencingInternal = true; |
413 | |
414 | // Compute this once per indirect call. |
415 | uint32_t NumCandidates = 0; |
416 | uint64_t TotalCount = 0; |
417 | MutableArrayRef<InstrProfValueData> CandidateProfileData; |
418 | |
419 | auto *CalledValue = CB->getCalledOperand(); |
420 | auto *CalledFunction = CB->getCalledFunction(); |
421 | if (CalledValue && !CalledFunction) { |
422 | CalledValue = CalledValue->stripPointerCasts(); |
423 | // Stripping pointer casts can reveal a called function. |
424 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
425 | } |
426 | // Check if this is an alias to a function. If so, get the |
427 | // called aliasee for the checks below. |
428 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
429 | assert(!CalledFunction && "Expected null called function in callsite for alias" ); |
430 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
431 | } |
432 | // Check if this is a direct call to a known function or a known |
433 | // intrinsic, or an indirect call with profile data. |
434 | if (CalledFunction) { |
435 | if (CI && CalledFunction->isIntrinsic()) { |
436 | addIntrinsicToSummary( |
437 | CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls, |
438 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT); |
439 | continue; |
440 | } |
441 | // We should have named any anonymous globals |
442 | assert(CalledFunction->hasName()); |
443 | auto ScaledCount = PSI->getProfileCount(CallInst: *CB, BFI); |
444 | auto Hotness = ScaledCount ? getHotness(ProfileCount: *ScaledCount, PSI) |
445 | : CalleeInfo::HotnessType::Unknown; |
446 | if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) |
447 | Hotness = CalleeInfo::HotnessType::Cold; |
448 | |
449 | // Use the original CalledValue, in case it was an alias. We want |
450 | // to record the call edge to the alias in that case. Eventually |
451 | // an alias summary will be created to associate the alias and |
452 | // aliasee. |
453 | auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( |
454 | GV: cast<GlobalValue>(Val: CalledValue))]; |
455 | ValueInfo.updateHotness(OtherHotness: Hotness); |
456 | if (CB->isTailCall()) |
457 | ValueInfo.setHasTailCall(true); |
458 | // Add the relative block frequency to CalleeInfo if there is no profile |
459 | // information. |
460 | if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { |
461 | uint64_t BBFreq = BFI->getBlockFreq(BB: &BB).getFrequency(); |
462 | uint64_t EntryFreq = BFI->getEntryFreq().getFrequency(); |
463 | ValueInfo.updateRelBlockFreq(BlockFreq: BBFreq, EntryFreq); |
464 | } |
465 | } else { |
466 | HasUnknownCall = true; |
467 | // If F is imported, a local linkage ifunc (e.g. target_clones on a |
468 | // static function) called by F will be cloned. Since summaries don't |
469 | // track ifunc, we do not know implementation functions referenced by |
470 | // the ifunc resolver need to be promoted in the exporter, and we will |
471 | // get linker errors due to cloned declarations for implementation |
472 | // functions. As a simple fix, just mark F as not eligible for import. |
473 | // Non-local ifunc is not cloned and does not have the issue. |
474 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: CalledValue)) |
475 | if (GI->hasLocalLinkage()) |
476 | HasLocalIFuncCallOrRef = true; |
477 | // Skip inline assembly calls. |
478 | if (CI && CI->isInlineAsm()) |
479 | continue; |
480 | // Skip direct calls. |
481 | if (!CalledValue || isa<Constant>(Val: CalledValue)) |
482 | continue; |
483 | |
484 | // Check if the instruction has a callees metadata. If so, add callees |
485 | // to CallGraphEdges to reflect the references from the metadata, and |
486 | // to enable importing for subsequent indirect call promotion and |
487 | // inlining. |
488 | if (auto *MD = I.getMetadata(KindID: LLVMContext::MD_callees)) { |
489 | for (const auto &Op : MD->operands()) { |
490 | Function *Callee = mdconst::extract_or_null<Function>(MD: Op); |
491 | if (Callee) |
492 | CallGraphEdges[Index.getOrInsertValueInfo(GV: Callee)]; |
493 | } |
494 | } |
495 | |
496 | CandidateProfileData = |
497 | ICallAnalysis.getPromotionCandidatesForInstruction(I: &I, TotalCount, |
498 | NumCandidates); |
499 | for (const auto &Candidate : CandidateProfileData) |
500 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: Candidate.Value)] |
501 | .updateHotness(OtherHotness: getHotness(ProfileCount: Candidate.Count, PSI)); |
502 | } |
503 | |
504 | // Summarize memprof related metadata. This is only needed for ThinLTO. |
505 | if (!IsThinLTO) |
506 | continue; |
507 | |
508 | // Skip indirect calls if we haven't enabled memprof ICP. |
509 | if (!CalledFunction && !EnableMemProfIndirectCallSupport) |
510 | continue; |
511 | |
512 | // Ensure we keep this analysis in sync with the handling in the ThinLTO |
513 | // backend (see MemProfContextDisambiguation::applyImport). Save this call |
514 | // so that we can skip it in checking the reverse case later. |
515 | assert(mayHaveMemprofSummary(CB)); |
516 | #ifndef NDEBUG |
517 | CallsThatMayHaveMemprofSummary.insert(CB); |
518 | #endif |
519 | |
520 | // Compute the list of stack ids first (so we can trim them from the stack |
521 | // ids on any MIBs). |
522 | CallStack<MDNode, MDNode::op_iterator> InstCallsite( |
523 | I.getMetadata(KindID: LLVMContext::MD_callsite)); |
524 | auto *MemProfMD = I.getMetadata(KindID: LLVMContext::MD_memprof); |
525 | if (MemProfMD) { |
526 | std::vector<MIBInfo> MIBs; |
527 | std::vector<std::vector<ContextTotalSize>> ContextSizeInfos; |
528 | bool HasNonZeroContextSizeInfos = false; |
529 | for (auto &MDOp : MemProfMD->operands()) { |
530 | auto *MIBMD = cast<const MDNode>(Val: MDOp); |
531 | MDNode *StackNode = getMIBStackNode(MIB: MIBMD); |
532 | assert(StackNode); |
533 | SmallVector<unsigned> StackIdIndices; |
534 | CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); |
535 | // Collapse out any on the allocation call (inlining). |
536 | for (auto ContextIter = |
537 | StackContext.beginAfterSharedPrefix(Other: InstCallsite); |
538 | ContextIter != StackContext.end(); ++ContextIter) { |
539 | unsigned StackIdIdx = Index.addOrGetStackIdIndex(StackId: *ContextIter); |
540 | // If this is a direct recursion, simply skip the duplicate |
541 | // entries. If this is mutual recursion, handling is left to |
542 | // the LTO link analysis client. |
543 | if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx) |
544 | StackIdIndices.push_back(Elt: StackIdIdx); |
545 | } |
546 | // If we have context size information, collect it for inclusion in |
547 | // the summary. |
548 | assert(MIBMD->getNumOperands() > 2 || |
549 | !metadataIncludesAllContextSizeInfo()); |
550 | if (MIBMD->getNumOperands() > 2) { |
551 | std::vector<ContextTotalSize> ContextSizes; |
552 | for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) { |
553 | MDNode *ContextSizePair = dyn_cast<MDNode>(Val: MIBMD->getOperand(I)); |
554 | assert(ContextSizePair->getNumOperands() == 2); |
555 | uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>( |
556 | MD: ContextSizePair->getOperand(I: 0)) |
557 | ->getZExtValue(); |
558 | uint64_t TS = mdconst::dyn_extract<ConstantInt>( |
559 | MD: ContextSizePair->getOperand(I: 1)) |
560 | ->getZExtValue(); |
561 | ContextSizes.push_back(x: {.FullStackId: FullStackId, .TotalSize: TS}); |
562 | } |
563 | // Flag that we need to keep the ContextSizeInfos array for this |
564 | // alloc as it now contains non-zero context info sizes. |
565 | HasNonZeroContextSizeInfos = true; |
566 | ContextSizeInfos.push_back(x: std::move(ContextSizes)); |
567 | } else { |
568 | // The ContextSizeInfos must be in the same relative position as the |
569 | // associated MIB. In some cases we only include a ContextSizeInfo |
570 | // for a subset of MIBs in an allocation. To handle that, eagerly |
571 | // fill any MIB entries that don't have context size info metadata |
572 | // with a pair of 0s. Later on we will only use this array if it |
573 | // ends up containing any non-zero entries (see where we set |
574 | // HasNonZeroContextSizeInfos above). |
575 | ContextSizeInfos.push_back(x: {{.FullStackId: 0, .TotalSize: 0}}); |
576 | } |
577 | MIBs.push_back( |
578 | x: MIBInfo(getMIBAllocType(MIB: MIBMD), std::move(StackIdIndices))); |
579 | } |
580 | Allocs.push_back(x: AllocInfo(std::move(MIBs))); |
581 | assert(HasNonZeroContextSizeInfos || |
582 | !metadataIncludesAllContextSizeInfo()); |
583 | // We eagerly build the ContextSizeInfos array, but it will be filled |
584 | // with sub arrays of pairs of 0s if no MIBs on this alloc actually |
585 | // contained context size info metadata. Only save it if any MIBs had |
586 | // any such metadata. |
587 | if (HasNonZeroContextSizeInfos) { |
588 | assert(Allocs.back().MIBs.size() == ContextSizeInfos.size()); |
589 | Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos); |
590 | } |
591 | } else if (!InstCallsite.empty()) { |
592 | SmallVector<unsigned> StackIdIndices; |
593 | for (auto StackId : InstCallsite) |
594 | StackIdIndices.push_back(Elt: Index.addOrGetStackIdIndex(StackId)); |
595 | if (CalledFunction) { |
596 | // Use the original CalledValue, in case it was an alias. We want |
597 | // to record the call edge to the alias in that case. Eventually |
598 | // an alias summary will be created to associate the alias and |
599 | // aliasee. |
600 | auto CalleeValueInfo = |
601 | Index.getOrInsertValueInfo(GV: cast<GlobalValue>(Val: CalledValue)); |
602 | Callsites.push_back(x: {CalleeValueInfo, StackIdIndices}); |
603 | } else { |
604 | assert(EnableMemProfIndirectCallSupport); |
605 | // For indirect callsites, create multiple Callsites, one per target. |
606 | // This enables having a different set of clone versions per target, |
607 | // and we will apply the cloning decisions while speculatively |
608 | // devirtualizing in the ThinLTO backends. |
609 | for (const auto &Candidate : CandidateProfileData) { |
610 | auto CalleeValueInfo = Index.getOrInsertValueInfo(GUID: Candidate.Value); |
611 | Callsites.push_back(x: {CalleeValueInfo, StackIdIndices}); |
612 | } |
613 | } |
614 | } |
615 | } |
616 | } |
617 | |
618 | if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize) |
619 | Index.addBlockCount(C: F.size()); |
620 | |
621 | SmallVector<ValueInfo, 0> Refs; |
622 | if (IsThinLTO) { |
623 | auto AddRefEdges = |
624 | [&](const std::vector<const Instruction *> &Instrs, |
625 | SetVector<ValueInfo, SmallVector<ValueInfo, 0>> &Edges, |
626 | SmallPtrSet<const User *, 8> &Cache) { |
627 | for (const auto *I : Instrs) { |
628 | Cache.erase(Ptr: I); |
629 | findRefEdges(Index, CurUser: I, RefEdges&: Edges, Visited&: Cache, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
630 | } |
631 | }; |
632 | |
633 | // By now we processed all instructions in a function, except |
634 | // non-volatile loads and non-volatile value stores. Let's find |
635 | // ref edges for both of instruction sets |
636 | AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); |
637 | // We can add some values to the Visited set when processing load |
638 | // instructions which are also used by stores in NonVolatileStores. |
639 | // For example this can happen if we have following code: |
640 | // |
641 | // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) |
642 | // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) |
643 | // |
644 | // After processing loads we'll add bitcast to the Visited set, and if |
645 | // we use the same set while processing stores, we'll never see store |
646 | // to @bar and @bar will be mistakenly treated as readonly. |
647 | SmallPtrSet<const llvm::User *, 8> StoreCache; |
648 | AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); |
649 | |
650 | // If both load and store instruction reference the same variable |
651 | // we won't be able to optimize it. Add all such reference edges |
652 | // to RefEdges set. |
653 | for (const auto &VI : StoreRefEdges) |
654 | if (LoadRefEdges.remove(X: VI)) |
655 | RefEdges.insert(X: VI); |
656 | |
657 | unsigned RefCnt = RefEdges.size(); |
658 | // All new reference edges inserted in two loops below are either |
659 | // read or write only. They will be grouped in the end of RefEdges |
660 | // vector, so we can use a single integer value to identify them. |
661 | RefEdges.insert_range(R&: LoadRefEdges); |
662 | |
663 | unsigned FirstWORef = RefEdges.size(); |
664 | RefEdges.insert_range(R&: StoreRefEdges); |
665 | |
666 | Refs = RefEdges.takeVector(); |
667 | for (; RefCnt < FirstWORef; ++RefCnt) |
668 | Refs[RefCnt].setReadOnly(); |
669 | |
670 | for (; RefCnt < Refs.size(); ++RefCnt) |
671 | Refs[RefCnt].setWriteOnly(); |
672 | } else { |
673 | Refs = RefEdges.takeVector(); |
674 | } |
675 | // Explicit add hot edges to enforce importing for designated GUIDs for |
676 | // sample PGO, to enable the same inlines as the profiled optimized binary. |
677 | for (auto &I : F.getImportGUIDs()) |
678 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: I)].updateHotness( |
679 | OtherHotness: ForceSummaryEdgesCold == FunctionSummary::FSHT_All |
680 | ? CalleeInfo::HotnessType::Cold |
681 | : CalleeInfo::HotnessType::Critical); |
682 | |
683 | #ifndef NDEBUG |
684 | // Make sure that all calls we decided could not have memprof summaries get a |
685 | // false value for mayHaveMemprofSummary, to ensure that this handling remains |
686 | // in sync with the ThinLTO backend handling. |
687 | if (IsThinLTO) { |
688 | for (const BasicBlock &BB : F) { |
689 | for (const Instruction &I : BB) { |
690 | const auto *CB = dyn_cast<CallBase>(&I); |
691 | if (!CB) |
692 | continue; |
693 | // We already checked these above. |
694 | if (CallsThatMayHaveMemprofSummary.count(CB)) |
695 | continue; |
696 | assert(!mayHaveMemprofSummary(CB)); |
697 | } |
698 | } |
699 | } |
700 | #endif |
701 | |
702 | bool NonRenamableLocal = isNonRenamableLocal(GV: F); |
703 | bool NotEligibleForImport = |
704 | NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || |
705 | HasIndirBranchToBlockAddress || HasLocalIFuncCallOrRef; |
706 | GlobalValueSummary::GVFlags Flags( |
707 | F.getLinkage(), F.getVisibility(), NotEligibleForImport, |
708 | /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable(), |
709 | GlobalValueSummary::ImportKind::Definition); |
710 | FunctionSummary::FFlags FunFlags{ |
711 | .ReadNone: F.doesNotAccessMemory(), .ReadOnly: F.onlyReadsMemory() && !F.doesNotAccessMemory(), |
712 | .NoRecurse: F.hasFnAttribute(Kind: Attribute::NoRecurse), .ReturnDoesNotAlias: F.returnDoesNotAlias(), |
713 | // FIXME: refactor this to use the same code that inliner is using. |
714 | // Don't try to import functions with noinline attribute. |
715 | .NoInline: F.getAttributes().hasFnAttr(Kind: Attribute::NoInline), |
716 | .AlwaysInline: F.hasFnAttribute(Kind: Attribute::AlwaysInline), |
717 | .NoUnwind: F.hasFnAttribute(Kind: Attribute::NoUnwind), .MayThrow: MayThrow, .HasUnknownCall: HasUnknownCall, |
718 | .MustBeUnreachable: mustBeUnreachableFunction(F)}; |
719 | std::vector<FunctionSummary::ParamAccess> ParamAccesses; |
720 | if (auto *SSI = GetSSICallback(F)) |
721 | ParamAccesses = SSI->getParamAccesses(Index); |
722 | auto FuncSummary = std::make_unique<FunctionSummary>( |
723 | args&: Flags, args&: NumInsts, args&: FunFlags, args: std::move(Refs), args: CallGraphEdges.takeVector(), |
724 | args: TypeTests.takeVector(), args: TypeTestAssumeVCalls.takeVector(), |
725 | args: TypeCheckedLoadVCalls.takeVector(), |
726 | args: TypeTestAssumeConstVCalls.takeVector(), |
727 | args: TypeCheckedLoadConstVCalls.takeVector(), args: std::move(ParamAccesses), |
728 | args: std::move(Callsites), args: std::move(Allocs)); |
729 | if (NonRenamableLocal) |
730 | CantBePromoted.insert(V: F.getGUID()); |
731 | Index.addGlobalValueSummary(GV: F, Summary: std::move(FuncSummary)); |
732 | } |
733 | |
734 | /// Find function pointers referenced within the given vtable initializer |
735 | /// (or subset of an initializer) \p I. The starting offset of \p I within |
736 | /// the vtable initializer is \p StartingOffset. Any discovered function |
737 | /// pointers are added to \p VTableFuncs along with their cumulative offset |
738 | /// within the initializer. |
739 | static void findFuncPointers(const Constant *I, uint64_t StartingOffset, |
740 | const Module &M, ModuleSummaryIndex &Index, |
741 | VTableFuncList &VTableFuncs, |
742 | const GlobalVariable &OrigGV) { |
743 | // First check if this is a function pointer. |
744 | if (I->getType()->isPointerTy()) { |
745 | auto C = I->stripPointerCasts(); |
746 | auto A = dyn_cast<GlobalAlias>(Val: C); |
747 | if (isa<Function>(Val: C) || (A && isa<Function>(Val: A->getAliasee()))) { |
748 | auto GV = dyn_cast<GlobalValue>(Val: C); |
749 | assert(GV); |
750 | // We can disregard __cxa_pure_virtual as a possible call target, as |
751 | // calls to pure virtuals are UB. |
752 | if (GV && GV->getName() != "__cxa_pure_virtual" ) |
753 | VTableFuncs.push_back(x: {Index.getOrInsertValueInfo(GV), StartingOffset}); |
754 | return; |
755 | } |
756 | } |
757 | |
758 | // Walk through the elements in the constant struct or array and recursively |
759 | // look for virtual function pointers. |
760 | const DataLayout &DL = M.getDataLayout(); |
761 | if (auto *C = dyn_cast<ConstantStruct>(Val: I)) { |
762 | StructType *STy = dyn_cast<StructType>(Val: C->getType()); |
763 | assert(STy); |
764 | const StructLayout *SL = DL.getStructLayout(Ty: C->getType()); |
765 | |
766 | for (auto EI : llvm::enumerate(First: STy->elements())) { |
767 | auto Offset = SL->getElementOffset(Idx: EI.index()); |
768 | unsigned Op = SL->getElementContainingOffset(FixedOffset: Offset); |
769 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i: Op)), |
770 | StartingOffset: StartingOffset + Offset, M, Index, VTableFuncs, OrigGV); |
771 | } |
772 | } else if (auto *C = dyn_cast<ConstantArray>(Val: I)) { |
773 | ArrayType *ATy = C->getType(); |
774 | Type *EltTy = ATy->getElementType(); |
775 | uint64_t EltSize = DL.getTypeAllocSize(Ty: EltTy); |
776 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { |
777 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i)), |
778 | StartingOffset: StartingOffset + i * EltSize, M, Index, VTableFuncs, |
779 | OrigGV); |
780 | } |
781 | } else if (const auto *CE = dyn_cast<ConstantExpr>(Val: I)) { |
782 | // For relative vtables, the next sub-component should be a trunc. |
783 | if (CE->getOpcode() != Instruction::Trunc || |
784 | !(CE = dyn_cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0)))) |
785 | return; |
786 | |
787 | // If this constant can be reduced to the offset between a function and a |
788 | // global, then we know this is a valid virtual function if the RHS is the |
789 | // original vtable we're scanning through. |
790 | if (CE->getOpcode() == Instruction::Sub) { |
791 | GlobalValue *LHS, *RHS; |
792 | APSInt LHSOffset, RHSOffset; |
793 | if (IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 0), GV&: LHS, Offset&: LHSOffset, DL) && |
794 | IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 1), GV&: RHS, Offset&: RHSOffset, DL) && |
795 | RHS == &OrigGV && |
796 | |
797 | // For relative vtables, this component should point to the callable |
798 | // function without any offsets. |
799 | LHSOffset == 0 && |
800 | |
801 | // Also, the RHS should always point to somewhere within the vtable. |
802 | RHSOffset <= |
803 | static_cast<uint64_t>(DL.getTypeAllocSize(Ty: OrigGV.getInitializer()->getType()))) { |
804 | findFuncPointers(I: LHS, StartingOffset, M, Index, VTableFuncs, OrigGV); |
805 | } |
806 | } |
807 | } |
808 | } |
809 | |
810 | // Identify the function pointers referenced by vtable definition \p V. |
811 | static void computeVTableFuncs(ModuleSummaryIndex &Index, |
812 | const GlobalVariable &V, const Module &M, |
813 | VTableFuncList &VTableFuncs) { |
814 | if (!V.isConstant()) |
815 | return; |
816 | |
817 | findFuncPointers(I: V.getInitializer(), /*StartingOffset=*/0, M, Index, |
818 | VTableFuncs, OrigGV: V); |
819 | |
820 | #ifndef NDEBUG |
821 | // Validate that the VTableFuncs list is ordered by offset. |
822 | uint64_t PrevOffset = 0; |
823 | for (auto &P : VTableFuncs) { |
824 | // The findVFuncPointers traversal should have encountered the |
825 | // functions in offset order. We need to use ">=" since PrevOffset |
826 | // starts at 0. |
827 | assert(P.VTableOffset >= PrevOffset); |
828 | PrevOffset = P.VTableOffset; |
829 | } |
830 | #endif |
831 | } |
832 | |
833 | /// Record vtable definition \p V for each type metadata it references. |
834 | static void |
835 | recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, |
836 | const GlobalVariable &V, |
837 | SmallVectorImpl<MDNode *> &Types) { |
838 | for (MDNode *Type : Types) { |
839 | auto TypeID = Type->getOperand(I: 1).get(); |
840 | |
841 | uint64_t Offset = |
842 | cast<ConstantInt>( |
843 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
844 | ->getZExtValue(); |
845 | |
846 | if (auto *TypeId = dyn_cast<MDString>(Val: TypeID)) |
847 | Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId: TypeId->getString()) |
848 | .push_back(x: {Offset, Index.getOrInsertValueInfo(GV: &V)}); |
849 | } |
850 | } |
851 | |
852 | static void computeVariableSummary(ModuleSummaryIndex &Index, |
853 | const GlobalVariable &V, |
854 | DenseSet<GlobalValue::GUID> &CantBePromoted, |
855 | const Module &M, |
856 | SmallVectorImpl<MDNode *> &Types) { |
857 | SetVector<ValueInfo, SmallVector<ValueInfo, 0>> RefEdges; |
858 | SmallPtrSet<const User *, 8> Visited; |
859 | bool RefLocalIFunc = false; |
860 | bool HasBlockAddress = |
861 | findRefEdges(Index, CurUser: &V, RefEdges, Visited, RefLocalLinkageIFunc&: RefLocalIFunc); |
862 | const bool NotEligibleForImport = (HasBlockAddress || RefLocalIFunc); |
863 | bool NonRenamableLocal = isNonRenamableLocal(GV: V); |
864 | GlobalValueSummary::GVFlags Flags( |
865 | V.getLinkage(), V.getVisibility(), NonRenamableLocal, |
866 | /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable(), |
867 | GlobalValueSummary::Definition); |
868 | |
869 | VTableFuncList VTableFuncs; |
870 | // If splitting is not enabled, then we compute the summary information |
871 | // necessary for index-based whole program devirtualization. |
872 | if (!Index.enableSplitLTOUnit()) { |
873 | Types.clear(); |
874 | V.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
875 | if (!Types.empty()) { |
876 | // Identify the function pointers referenced by this vtable definition. |
877 | computeVTableFuncs(Index, V, M, VTableFuncs); |
878 | |
879 | // Record this vtable definition for each type metadata it references. |
880 | recordTypeIdCompatibleVtableReferences(Index, V, Types); |
881 | } |
882 | } |
883 | |
884 | // Don't mark variables we won't be able to internalize as read/write-only. |
885 | bool CanBeInternalized = |
886 | !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && |
887 | !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); |
888 | bool Constant = V.isConstant(); |
889 | GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, |
890 | Constant ? false : CanBeInternalized, |
891 | Constant, V.getVCallVisibility()); |
892 | auto GVarSummary = std::make_unique<GlobalVarSummary>(args&: Flags, args&: VarFlags, |
893 | args: RefEdges.takeVector()); |
894 | if (NonRenamableLocal) |
895 | CantBePromoted.insert(V: V.getGUID()); |
896 | if (NotEligibleForImport) |
897 | GVarSummary->setNotEligibleToImport(); |
898 | if (!VTableFuncs.empty()) |
899 | GVarSummary->setVTableFuncs(VTableFuncs); |
900 | Index.addGlobalValueSummary(GV: V, Summary: std::move(GVarSummary)); |
901 | } |
902 | |
903 | static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, |
904 | DenseSet<GlobalValue::GUID> &CantBePromoted) { |
905 | // Skip summary for indirect function aliases as summary for aliasee will not |
906 | // be emitted. |
907 | const GlobalObject *Aliasee = A.getAliaseeObject(); |
908 | if (isa<GlobalIFunc>(Val: Aliasee)) |
909 | return; |
910 | bool NonRenamableLocal = isNonRenamableLocal(GV: A); |
911 | GlobalValueSummary::GVFlags Flags( |
912 | A.getLinkage(), A.getVisibility(), NonRenamableLocal, |
913 | /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable(), |
914 | GlobalValueSummary::Definition); |
915 | auto AS = std::make_unique<AliasSummary>(args&: Flags); |
916 | auto AliaseeVI = Index.getValueInfo(GUID: Aliasee->getGUID()); |
917 | assert(AliaseeVI && "Alias expects aliasee summary to be available" ); |
918 | assert(AliaseeVI.getSummaryList().size() == 1 && |
919 | "Expected a single entry per aliasee in per-module index" ); |
920 | AS->setAliasee(AliaseeVI, Aliasee: AliaseeVI.getSummaryList()[0].get()); |
921 | if (NonRenamableLocal) |
922 | CantBePromoted.insert(V: A.getGUID()); |
923 | Index.addGlobalValueSummary(GV: A, Summary: std::move(AS)); |
924 | } |
925 | |
926 | // Set LiveRoot flag on entries matching the given value name. |
927 | static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { |
928 | if (ValueInfo VI = |
929 | Index.getValueInfo(GUID: GlobalValue::getGUIDAssumingExternalLinkage(GlobalName: Name))) |
930 | for (const auto &Summary : VI.getSummaryList()) |
931 | Summary->setLive(true); |
932 | } |
933 | |
934 | ModuleSummaryIndex llvm::buildModuleSummaryIndex( |
935 | const Module &M, |
936 | std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, |
937 | ProfileSummaryInfo *PSI, |
938 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
939 | assert(PSI); |
940 | bool EnableSplitLTOUnit = false; |
941 | bool UnifiedLTO = false; |
942 | if (auto *MD = mdconst::extract_or_null<ConstantInt>( |
943 | MD: M.getModuleFlag(Key: "EnableSplitLTOUnit" ))) |
944 | EnableSplitLTOUnit = MD->getZExtValue(); |
945 | if (auto *MD = |
946 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "UnifiedLTO" ))) |
947 | UnifiedLTO = MD->getZExtValue(); |
948 | ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); |
949 | |
950 | // Identify the local values in the llvm.used and llvm.compiler.used sets, |
951 | // which should not be exported as they would then require renaming and |
952 | // promotion, but we may have opaque uses e.g. in inline asm. We collect them |
953 | // here because we use this information to mark functions containing inline |
954 | // assembly calls as not importable. |
955 | SmallPtrSet<GlobalValue *, 4> LocalsUsed; |
956 | SmallVector<GlobalValue *, 4> Used; |
957 | // First collect those in the llvm.used set. |
958 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/false); |
959 | // Next collect those in the llvm.compiler.used set. |
960 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/true); |
961 | DenseSet<GlobalValue::GUID> CantBePromoted; |
962 | for (auto *V : Used) { |
963 | if (V->hasLocalLinkage()) { |
964 | LocalsUsed.insert(Ptr: V); |
965 | CantBePromoted.insert(V: V->getGUID()); |
966 | } |
967 | } |
968 | |
969 | bool HasLocalInlineAsmSymbol = false; |
970 | if (!M.getModuleInlineAsm().empty()) { |
971 | // Collect the local values defined by module level asm, and set up |
972 | // summaries for these symbols so that they can be marked as NoRename, |
973 | // to prevent export of any use of them in regular IR that would require |
974 | // renaming within the module level asm. Note we don't need to create a |
975 | // summary for weak or global defs, as they don't need to be flagged as |
976 | // NoRename, and defs in module level asm can't be imported anyway. |
977 | // Also, any values used but not defined within module level asm should |
978 | // be listed on the llvm.used or llvm.compiler.used global and marked as |
979 | // referenced from there. |
980 | ModuleSymbolTable::CollectAsmSymbols( |
981 | M, AsmSymbol: [&](StringRef Name, object::BasicSymbolRef::Flags Flags) { |
982 | // Symbols not marked as Weak or Global are local definitions. |
983 | if (Flags & (object::BasicSymbolRef::SF_Weak | |
984 | object::BasicSymbolRef::SF_Global)) |
985 | return; |
986 | HasLocalInlineAsmSymbol = true; |
987 | GlobalValue *GV = M.getNamedValue(Name); |
988 | if (!GV) |
989 | return; |
990 | assert(GV->isDeclaration() && "Def in module asm already has definition" ); |
991 | GlobalValueSummary::GVFlags GVFlags( |
992 | GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, |
993 | /* NotEligibleToImport = */ true, |
994 | /* Live = */ true, |
995 | /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable(), |
996 | GlobalValueSummary::Definition); |
997 | CantBePromoted.insert(V: GV->getGUID()); |
998 | // Create the appropriate summary type. |
999 | if (Function *F = dyn_cast<Function>(Val: GV)) { |
1000 | std::unique_ptr<FunctionSummary> Summary = |
1001 | std::make_unique<FunctionSummary>( |
1002 | args&: GVFlags, /*InstCount=*/args: 0, |
1003 | args: FunctionSummary::FFlags{ |
1004 | .ReadNone: F->hasFnAttribute(Kind: Attribute::ReadNone), |
1005 | .ReadOnly: F->hasFnAttribute(Kind: Attribute::ReadOnly), |
1006 | .NoRecurse: F->hasFnAttribute(Kind: Attribute::NoRecurse), |
1007 | .ReturnDoesNotAlias: F->returnDoesNotAlias(), |
1008 | /* NoInline = */ false, |
1009 | .AlwaysInline: F->hasFnAttribute(Kind: Attribute::AlwaysInline), |
1010 | .NoUnwind: F->hasFnAttribute(Kind: Attribute::NoUnwind), |
1011 | /* MayThrow */ true, |
1012 | /* HasUnknownCall */ true, |
1013 | /* MustBeUnreachable */ false}, |
1014 | args: SmallVector<ValueInfo, 0>{}, |
1015 | args: SmallVector<FunctionSummary::EdgeTy, 0>{}, |
1016 | args: ArrayRef<GlobalValue::GUID>{}, |
1017 | args: ArrayRef<FunctionSummary::VFuncId>{}, |
1018 | args: ArrayRef<FunctionSummary::VFuncId>{}, |
1019 | args: ArrayRef<FunctionSummary::ConstVCall>{}, |
1020 | args: ArrayRef<FunctionSummary::ConstVCall>{}, |
1021 | args: ArrayRef<FunctionSummary::ParamAccess>{}, |
1022 | args: ArrayRef<CallsiteInfo>{}, args: ArrayRef<AllocInfo>{}); |
1023 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
1024 | } else { |
1025 | std::unique_ptr<GlobalVarSummary> Summary = |
1026 | std::make_unique<GlobalVarSummary>( |
1027 | args&: GVFlags, |
1028 | args: GlobalVarSummary::GVarFlags( |
1029 | false, false, cast<GlobalVariable>(Val: GV)->isConstant(), |
1030 | GlobalObject::VCallVisibilityPublic), |
1031 | args: SmallVector<ValueInfo, 0>{}); |
1032 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
1033 | } |
1034 | }); |
1035 | } |
1036 | |
1037 | bool IsThinLTO = true; |
1038 | if (auto *MD = |
1039 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "ThinLTO" ))) |
1040 | IsThinLTO = MD->getZExtValue(); |
1041 | |
1042 | // Compute summaries for all functions defined in module, and save in the |
1043 | // index. |
1044 | for (const auto &F : M) { |
1045 | if (F.isDeclaration()) |
1046 | continue; |
1047 | |
1048 | DominatorTree DT(const_cast<Function &>(F)); |
1049 | BlockFrequencyInfo *BFI = nullptr; |
1050 | std::unique_ptr<BlockFrequencyInfo> BFIPtr; |
1051 | if (GetBFICallback) |
1052 | BFI = GetBFICallback(F); |
1053 | else if (F.hasProfileData()) { |
1054 | LoopInfo LI{DT}; |
1055 | BranchProbabilityInfo BPI{F, LI}; |
1056 | BFIPtr = std::make_unique<BlockFrequencyInfo>(args: F, args&: BPI, args&: LI); |
1057 | BFI = BFIPtr.get(); |
1058 | } |
1059 | |
1060 | computeFunctionSummary(Index, M, F, BFI, PSI, DT, |
1061 | HasLocalsInUsedOrAsm: !LocalsUsed.empty() || HasLocalInlineAsmSymbol, |
1062 | CantBePromoted, IsThinLTO, GetSSICallback); |
1063 | } |
1064 | |
1065 | // Compute summaries for all variables defined in module, and save in the |
1066 | // index. |
1067 | SmallVector<MDNode *, 2> Types; |
1068 | for (const GlobalVariable &G : M.globals()) { |
1069 | if (G.isDeclaration()) |
1070 | continue; |
1071 | computeVariableSummary(Index, V: G, CantBePromoted, M, Types); |
1072 | } |
1073 | |
1074 | // Compute summaries for all aliases defined in module, and save in the |
1075 | // index. |
1076 | for (const GlobalAlias &A : M.aliases()) |
1077 | computeAliasSummary(Index, A, CantBePromoted); |
1078 | |
1079 | // Iterate through ifuncs, set their resolvers all alive. |
1080 | for (const GlobalIFunc &I : M.ifuncs()) { |
1081 | I.applyAlongResolverPath(Op: [&Index](const GlobalValue &GV) { |
1082 | Index.getGlobalValueSummary(GV)->setLive(true); |
1083 | }); |
1084 | } |
1085 | |
1086 | for (auto *V : LocalsUsed) { |
1087 | auto *Summary = Index.getGlobalValueSummary(GV: *V); |
1088 | assert(Summary && "Missing summary for global value" ); |
1089 | Summary->setNotEligibleToImport(); |
1090 | } |
1091 | |
1092 | // The linker doesn't know about these LLVM produced values, so we need |
1093 | // to flag them as live in the index to ensure index-based dead value |
1094 | // analysis treats them as live roots of the analysis. |
1095 | setLiveRoot(Index, Name: "llvm.used" ); |
1096 | setLiveRoot(Index, Name: "llvm.compiler.used" ); |
1097 | setLiveRoot(Index, Name: "llvm.global_ctors" ); |
1098 | setLiveRoot(Index, Name: "llvm.global_dtors" ); |
1099 | setLiveRoot(Index, Name: "llvm.global.annotations" ); |
1100 | |
1101 | for (auto &GlobalList : Index) { |
1102 | // Ignore entries for references that are undefined in the current module. |
1103 | if (GlobalList.second.SummaryList.empty()) |
1104 | continue; |
1105 | |
1106 | assert(GlobalList.second.SummaryList.size() == 1 && |
1107 | "Expected module's index to have one summary per GUID" ); |
1108 | auto &Summary = GlobalList.second.SummaryList[0]; |
1109 | if (!IsThinLTO) { |
1110 | Summary->setNotEligibleToImport(); |
1111 | continue; |
1112 | } |
1113 | |
1114 | bool AllRefsCanBeExternallyReferenced = |
1115 | llvm::all_of(Range: Summary->refs(), P: [&](const ValueInfo &VI) { |
1116 | return !CantBePromoted.count(V: VI.getGUID()); |
1117 | }); |
1118 | if (!AllRefsCanBeExternallyReferenced) { |
1119 | Summary->setNotEligibleToImport(); |
1120 | continue; |
1121 | } |
1122 | |
1123 | if (auto *FuncSummary = dyn_cast<FunctionSummary>(Val: Summary.get())) { |
1124 | bool AllCallsCanBeExternallyReferenced = llvm::all_of( |
1125 | Range: FuncSummary->calls(), P: [&](const FunctionSummary::EdgeTy &Edge) { |
1126 | return !CantBePromoted.count(V: Edge.first.getGUID()); |
1127 | }); |
1128 | if (!AllCallsCanBeExternallyReferenced) |
1129 | Summary->setNotEligibleToImport(); |
1130 | } |
1131 | } |
1132 | |
1133 | if (!ModuleSummaryDotFile.empty()) { |
1134 | std::error_code EC; |
1135 | raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_Text); |
1136 | if (EC) |
1137 | report_fatal_error(reason: Twine("Failed to open dot file " ) + |
1138 | ModuleSummaryDotFile + ": " + EC.message() + "\n" ); |
1139 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols: {}); |
1140 | } |
1141 | |
1142 | return Index; |
1143 | } |
1144 | |
1145 | AnalysisKey ModuleSummaryIndexAnalysis::Key; |
1146 | |
1147 | ModuleSummaryIndex |
1148 | ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { |
1149 | ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1150 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1151 | bool NeedSSI = needsParamAccessSummary(M); |
1152 | return buildModuleSummaryIndex( |
1153 | M, |
1154 | GetBFICallback: [&FAM](const Function &F) { |
1155 | return &FAM.getResult<BlockFrequencyAnalysis>( |
1156 | IR&: *const_cast<Function *>(&F)); |
1157 | }, |
1158 | PSI: &PSI, |
1159 | GetSSICallback: [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { |
1160 | return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>( |
1161 | IR&: const_cast<Function &>(F)) |
1162 | : nullptr; |
1163 | }); |
1164 | } |
1165 | |
1166 | char ModuleSummaryIndexWrapperPass::ID = 0; |
1167 | |
1168 | INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1169 | "Module Summary Analysis" , false, true) |
1170 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) |
1171 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
1172 | INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) |
1173 | INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1174 | "Module Summary Analysis" , false, true) |
1175 | |
1176 | ModulePass *llvm::createModuleSummaryIndexWrapperPass() { |
1177 | return new ModuleSummaryIndexWrapperPass(); |
1178 | } |
1179 | |
1180 | ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() |
1181 | : ModulePass(ID) {} |
1182 | |
1183 | bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { |
1184 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
1185 | bool NeedSSI = needsParamAccessSummary(M); |
1186 | Index.emplace(args: buildModuleSummaryIndex( |
1187 | M, |
1188 | GetBFICallback: [this](const Function &F) { |
1189 | return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( |
1190 | F&: *const_cast<Function *>(&F)) |
1191 | .getBFI()); |
1192 | }, |
1193 | PSI, |
1194 | GetSSICallback: [&](const Function &F) -> const StackSafetyInfo * { |
1195 | return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>( |
1196 | F&: const_cast<Function &>(F)) |
1197 | .getResult() |
1198 | : nullptr; |
1199 | })); |
1200 | return false; |
1201 | } |
1202 | |
1203 | bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { |
1204 | Index.reset(); |
1205 | return false; |
1206 | } |
1207 | |
1208 | void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1209 | AU.setPreservesAll(); |
1210 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); |
1211 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
1212 | AU.addRequired<StackSafetyInfoWrapperPass>(); |
1213 | } |
1214 | |
1215 | char ImmutableModuleSummaryIndexWrapperPass::ID = 0; |
1216 | |
1217 | ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass( |
1218 | const ModuleSummaryIndex *Index) |
1219 | : ImmutablePass(ID), Index(Index) {} |
1220 | |
1221 | void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage( |
1222 | AnalysisUsage &AU) const { |
1223 | AU.setPreservesAll(); |
1224 | } |
1225 | |
1226 | ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass( |
1227 | const ModuleSummaryIndex *Index) { |
1228 | return new ImmutableModuleSummaryIndexWrapperPass(Index); |
1229 | } |
1230 | |
1231 | INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info" , |
1232 | "Module summary info" , false, true) |
1233 | |
1234 | bool llvm::mayHaveMemprofSummary(const CallBase *CB) { |
1235 | if (!CB) |
1236 | return false; |
1237 | if (CB->isDebugOrPseudoInst()) |
1238 | return false; |
1239 | auto *CI = dyn_cast<CallInst>(Val: CB); |
1240 | auto *CalledValue = CB->getCalledOperand(); |
1241 | auto *CalledFunction = CB->getCalledFunction(); |
1242 | if (CalledValue && !CalledFunction) { |
1243 | CalledValue = CalledValue->stripPointerCasts(); |
1244 | // Stripping pointer casts can reveal a called function. |
1245 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
1246 | } |
1247 | // Check if this is an alias to a function. If so, get the |
1248 | // called aliasee for the checks below. |
1249 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
1250 | assert(!CalledFunction && |
1251 | "Expected null called function in callsite for alias" ); |
1252 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
1253 | } |
1254 | // Check if this is a direct call to a known function or a known |
1255 | // intrinsic, or an indirect call with profile data. |
1256 | if (CalledFunction) { |
1257 | if (CI && CalledFunction->isIntrinsic()) |
1258 | return false; |
1259 | } else { |
1260 | // Skip indirect calls if we haven't enabled memprof ICP. |
1261 | if (!EnableMemProfIndirectCallSupport) |
1262 | return false; |
1263 | // Skip inline assembly calls. |
1264 | if (CI && CI->isInlineAsm()) |
1265 | return false; |
1266 | // Skip direct calls via Constant. |
1267 | if (!CalledValue || isa<Constant>(Val: CalledValue)) |
1268 | return false; |
1269 | return true; |
1270 | } |
1271 | return true; |
1272 | } |
1273 | |