1 | //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass builds a ModuleSummaryIndex object for the module, to be written |
10 | // to bitcode or LLVM assembly. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
24 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
25 | #include "llvm/Analysis/ConstantFolding.h" |
26 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
27 | #include "llvm/Analysis/LoopInfo.h" |
28 | #include "llvm/Analysis/MemoryProfileInfo.h" |
29 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
30 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
31 | #include "llvm/Analysis/TypeMetadataUtils.h" |
32 | #include "llvm/IR/Attributes.h" |
33 | #include "llvm/IR/BasicBlock.h" |
34 | #include "llvm/IR/Constant.h" |
35 | #include "llvm/IR/Constants.h" |
36 | #include "llvm/IR/Dominators.h" |
37 | #include "llvm/IR/Function.h" |
38 | #include "llvm/IR/GlobalAlias.h" |
39 | #include "llvm/IR/GlobalValue.h" |
40 | #include "llvm/IR/GlobalVariable.h" |
41 | #include "llvm/IR/Instructions.h" |
42 | #include "llvm/IR/IntrinsicInst.h" |
43 | #include "llvm/IR/Metadata.h" |
44 | #include "llvm/IR/Module.h" |
45 | #include "llvm/IR/ModuleSummaryIndex.h" |
46 | #include "llvm/IR/Use.h" |
47 | #include "llvm/IR/User.h" |
48 | #include "llvm/InitializePasses.h" |
49 | #include "llvm/Object/ModuleSymbolTable.h" |
50 | #include "llvm/Object/SymbolicFile.h" |
51 | #include "llvm/Pass.h" |
52 | #include "llvm/Support/Casting.h" |
53 | #include "llvm/Support/CommandLine.h" |
54 | #include "llvm/Support/FileSystem.h" |
55 | #include <algorithm> |
56 | #include <cassert> |
57 | #include <cstdint> |
58 | #include <vector> |
59 | |
60 | using namespace llvm; |
61 | using namespace llvm::memprof; |
62 | |
63 | #define DEBUG_TYPE "module-summary-analysis" |
64 | |
65 | // Option to force edges cold which will block importing when the |
66 | // -import-cold-multiplier is set to 0. Useful for debugging. |
67 | namespace llvm { |
68 | FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = |
69 | FunctionSummary::FSHT_None; |
70 | } // namespace llvm |
71 | |
72 | static cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( |
73 | "force-summary-edges-cold" , cl::Hidden, cl::location(L&: ForceSummaryEdgesCold), |
74 | cl::desc("Force all edges in the function summary to cold" ), |
75 | cl::values(clEnumValN(FunctionSummary::FSHT_None, "none" , "None." ), |
76 | clEnumValN(FunctionSummary::FSHT_AllNonCritical, |
77 | "all-non-critical" , "All non-critical edges." ), |
78 | clEnumValN(FunctionSummary::FSHT_All, "all" , "All edges." ))); |
79 | |
80 | static cl::opt<std::string> ModuleSummaryDotFile( |
81 | "module-summary-dot-file" , cl::Hidden, cl::value_desc("filename" ), |
82 | cl::desc("File to emit dot graph of new summary into" )); |
83 | |
84 | extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize; |
85 | |
86 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
87 | |
88 | extern cl::opt<bool> MemProfReportHintedSizes; |
89 | |
90 | // Walk through the operands of a given User via worklist iteration and populate |
91 | // the set of GlobalValue references encountered. Invoked either on an |
92 | // Instruction or a GlobalVariable (which walks its initializer). |
93 | // Return true if any of the operands contains blockaddress. This is important |
94 | // to know when computing summary for global var, because if global variable |
95 | // references basic block address we can't import it separately from function |
96 | // containing that basic block. For simplicity we currently don't import such |
97 | // global vars at all. When importing function we aren't interested if any |
98 | // instruction in it takes an address of any basic block, because instruction |
99 | // can only take an address of basic block located in the same function. |
100 | // Set `RefLocalLinkageIFunc` to true if the analyzed value references a |
101 | // local-linkage ifunc. |
102 | static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, |
103 | SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges, |
104 | SmallPtrSet<const User *, 8> &Visited, |
105 | bool &RefLocalLinkageIFunc) { |
106 | bool HasBlockAddress = false; |
107 | SmallVector<const User *, 32> Worklist; |
108 | if (Visited.insert(Ptr: CurUser).second) |
109 | Worklist.push_back(Elt: CurUser); |
110 | |
111 | while (!Worklist.empty()) { |
112 | const User *U = Worklist.pop_back_val(); |
113 | const auto *CB = dyn_cast<CallBase>(Val: U); |
114 | |
115 | for (const auto &OI : U->operands()) { |
116 | const User *Operand = dyn_cast<User>(Val: OI); |
117 | if (!Operand) |
118 | continue; |
119 | if (isa<BlockAddress>(Val: Operand)) { |
120 | HasBlockAddress = true; |
121 | continue; |
122 | } |
123 | if (auto *GV = dyn_cast<GlobalValue>(Val: Operand)) { |
124 | // We have a reference to a global value. This should be added to |
125 | // the reference set unless it is a callee. Callees are handled |
126 | // specially by WriteFunction and are added to a separate list. |
127 | if (!(CB && CB->isCallee(U: &OI))) { |
128 | // If an ifunc has local linkage, do not add it into ref edges, and |
129 | // sets `RefLocalLinkageIFunc` to true. The referencer is not eligible |
130 | // for import. An ifunc doesn't have summary and ThinLTO cannot |
131 | // promote it; importing the referencer may cause linkage errors. |
132 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: GV); |
133 | GI && GI->hasLocalLinkage()) { |
134 | RefLocalLinkageIFunc = true; |
135 | continue; |
136 | } |
137 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
138 | } |
139 | continue; |
140 | } |
141 | if (Visited.insert(Ptr: Operand).second) |
142 | Worklist.push_back(Elt: Operand); |
143 | } |
144 | } |
145 | |
146 | const Instruction *I = dyn_cast<Instruction>(Val: CurUser); |
147 | if (I) { |
148 | uint64_t TotalCount = 0; |
149 | // MaxNumVTableAnnotations is the maximum number of vtables annotated on |
150 | // the instruction. |
151 | auto ValueDataArray = getValueProfDataFromInst( |
152 | Inst: *I, ValueKind: IPVK_VTableTarget, MaxNumValueData: MaxNumVTableAnnotations, TotalC&: TotalCount); |
153 | |
154 | for (const auto &V : ValueDataArray) |
155 | RefEdges.insert(X: Index.getOrInsertValueInfo(/* VTableGUID = */ |
156 | GUID: V.Value)); |
157 | } |
158 | return HasBlockAddress; |
159 | } |
160 | |
161 | static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, |
162 | ProfileSummaryInfo *PSI) { |
163 | if (!PSI) |
164 | return CalleeInfo::HotnessType::Unknown; |
165 | if (PSI->isHotCount(C: ProfileCount)) |
166 | return CalleeInfo::HotnessType::Hot; |
167 | if (PSI->isColdCount(C: ProfileCount)) |
168 | return CalleeInfo::HotnessType::Cold; |
169 | return CalleeInfo::HotnessType::None; |
170 | } |
171 | |
172 | static bool isNonRenamableLocal(const GlobalValue &GV) { |
173 | return GV.hasSection() && GV.hasLocalLinkage(); |
174 | } |
175 | |
176 | /// Determine whether this call has all constant integer arguments (excluding |
177 | /// "this") and summarize it to VCalls or ConstVCalls as appropriate. |
178 | static void addVCallToSet( |
179 | DevirtCallSite Call, GlobalValue::GUID Guid, |
180 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
181 | &VCalls, |
182 | SetVector<FunctionSummary::ConstVCall, |
183 | std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) { |
184 | std::vector<uint64_t> Args; |
185 | // Start from the second argument to skip the "this" pointer. |
186 | for (auto &Arg : drop_begin(RangeOrContainer: Call.CB.args())) { |
187 | auto *CI = dyn_cast<ConstantInt>(Val&: Arg); |
188 | if (!CI || CI->getBitWidth() > 64) { |
189 | VCalls.insert(X: {.GUID: Guid, .Offset: Call.Offset}); |
190 | return; |
191 | } |
192 | Args.push_back(x: CI->getZExtValue()); |
193 | } |
194 | ConstVCalls.insert(X: {.VFunc: {.GUID: Guid, .Offset: Call.Offset}, .Args: std::move(Args)}); |
195 | } |
196 | |
197 | /// If this intrinsic call requires that we add information to the function |
198 | /// summary, do so via the non-constant reference arguments. |
199 | static void addIntrinsicToSummary( |
200 | const CallInst *CI, |
201 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests, |
202 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
203 | &TypeTestAssumeVCalls, |
204 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
205 | &TypeCheckedLoadVCalls, |
206 | SetVector<FunctionSummary::ConstVCall, |
207 | std::vector<FunctionSummary::ConstVCall>> |
208 | &TypeTestAssumeConstVCalls, |
209 | SetVector<FunctionSummary::ConstVCall, |
210 | std::vector<FunctionSummary::ConstVCall>> |
211 | &TypeCheckedLoadConstVCalls, |
212 | DominatorTree &DT) { |
213 | switch (CI->getCalledFunction()->getIntrinsicID()) { |
214 | case Intrinsic::type_test: |
215 | case Intrinsic::public_type_test: { |
216 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
217 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
218 | if (!TypeId) |
219 | break; |
220 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
221 | |
222 | // Produce a summary from type.test intrinsics. We only summarize type.test |
223 | // intrinsics that are used other than by an llvm.assume intrinsic. |
224 | // Intrinsics that are assumed are relevant only to the devirtualization |
225 | // pass, not the type test lowering pass. |
226 | bool HasNonAssumeUses = llvm::any_of(Range: CI->uses(), P: [](const Use &CIU) { |
227 | return !isa<AssumeInst>(Val: CIU.getUser()); |
228 | }); |
229 | if (HasNonAssumeUses) |
230 | TypeTests.insert(X: Guid); |
231 | |
232 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
233 | SmallVector<CallInst *, 4> Assumes; |
234 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
235 | for (auto &Call : DevirtCalls) |
236 | addVCallToSet(Call, Guid, VCalls&: TypeTestAssumeVCalls, |
237 | ConstVCalls&: TypeTestAssumeConstVCalls); |
238 | |
239 | break; |
240 | } |
241 | |
242 | case Intrinsic::type_checked_load_relative: |
243 | case Intrinsic::type_checked_load: { |
244 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2)); |
245 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
246 | if (!TypeId) |
247 | break; |
248 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
249 | |
250 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
251 | SmallVector<Instruction *, 4> LoadedPtrs; |
252 | SmallVector<Instruction *, 4> Preds; |
253 | bool HasNonCallUses = false; |
254 | findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, |
255 | HasNonCallUses, CI, DT); |
256 | // Any non-call uses of the result of llvm.type.checked.load will |
257 | // prevent us from optimizing away the llvm.type.test. |
258 | if (HasNonCallUses) |
259 | TypeTests.insert(X: Guid); |
260 | for (auto &Call : DevirtCalls) |
261 | addVCallToSet(Call, Guid, VCalls&: TypeCheckedLoadVCalls, |
262 | ConstVCalls&: TypeCheckedLoadConstVCalls); |
263 | |
264 | break; |
265 | } |
266 | default: |
267 | break; |
268 | } |
269 | } |
270 | |
271 | static bool isNonVolatileLoad(const Instruction *I) { |
272 | if (const auto *LI = dyn_cast<LoadInst>(Val: I)) |
273 | return !LI->isVolatile(); |
274 | |
275 | return false; |
276 | } |
277 | |
278 | static bool isNonVolatileStore(const Instruction *I) { |
279 | if (const auto *SI = dyn_cast<StoreInst>(Val: I)) |
280 | return !SI->isVolatile(); |
281 | |
282 | return false; |
283 | } |
284 | |
285 | // Returns true if the function definition must be unreachable. |
286 | // |
287 | // Note if this helper function returns true, `F` is guaranteed |
288 | // to be unreachable; if it returns false, `F` might still |
289 | // be unreachable but not covered by this helper function. |
290 | static bool mustBeUnreachableFunction(const Function &F) { |
291 | // A function must be unreachable if its entry block ends with an |
292 | // 'unreachable'. |
293 | assert(!F.isDeclaration()); |
294 | return isa<UnreachableInst>(Val: F.getEntryBlock().getTerminator()); |
295 | } |
296 | |
297 | static void computeFunctionSummary( |
298 | ModuleSummaryIndex &Index, const Module &M, const Function &F, |
299 | BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, |
300 | bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted, |
301 | bool IsThinLTO, |
302 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
303 | // Summary not currently supported for anonymous functions, they should |
304 | // have been named. |
305 | assert(F.hasName()); |
306 | |
307 | unsigned NumInsts = 0; |
308 | // Map from callee ValueId to profile count. Used to accumulate profile |
309 | // counts for all static calls to a given callee. |
310 | MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>, |
311 | std::vector<std::pair<ValueInfo, CalleeInfo>>> |
312 | CallGraphEdges; |
313 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges, |
314 | StoreRefEdges; |
315 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests; |
316 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
317 | TypeTestAssumeVCalls, TypeCheckedLoadVCalls; |
318 | SetVector<FunctionSummary::ConstVCall, |
319 | std::vector<FunctionSummary::ConstVCall>> |
320 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls; |
321 | ICallPromotionAnalysis ICallAnalysis; |
322 | SmallPtrSet<const User *, 8> Visited; |
323 | |
324 | // Add personality function, prefix data and prologue data to function's ref |
325 | // list. |
326 | bool HasLocalIFuncCallOrRef = false; |
327 | findRefEdges(Index, CurUser: &F, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
328 | std::vector<const Instruction *> NonVolatileLoads; |
329 | std::vector<const Instruction *> NonVolatileStores; |
330 | |
331 | std::vector<CallsiteInfo> Callsites; |
332 | std::vector<AllocInfo> Allocs; |
333 | |
334 | #ifndef NDEBUG |
335 | DenseSet<const CallBase *> CallsThatMayHaveMemprofSummary; |
336 | #endif |
337 | |
338 | bool HasInlineAsmMaybeReferencingInternal = false; |
339 | bool HasIndirBranchToBlockAddress = false; |
340 | bool HasUnknownCall = false; |
341 | bool MayThrow = false; |
342 | for (const BasicBlock &BB : F) { |
343 | // We don't allow inlining of function with indirect branch to blockaddress. |
344 | // If the blockaddress escapes the function, e.g., via a global variable, |
345 | // inlining may lead to an invalid cross-function reference. So we shouldn't |
346 | // import such function either. |
347 | if (BB.hasAddressTaken()) { |
348 | for (User *U : BlockAddress::get(BB: const_cast<BasicBlock *>(&BB))->users()) |
349 | if (!isa<CallBrInst>(Val: *U)) { |
350 | HasIndirBranchToBlockAddress = true; |
351 | break; |
352 | } |
353 | } |
354 | |
355 | for (const Instruction &I : BB) { |
356 | if (I.isDebugOrPseudoInst()) |
357 | continue; |
358 | ++NumInsts; |
359 | |
360 | // Regular LTO module doesn't participate in ThinLTO import, |
361 | // so no reference from it can be read/writeonly, since this |
362 | // would require importing variable as local copy |
363 | if (IsThinLTO) { |
364 | if (isNonVolatileLoad(I: &I)) { |
365 | // Postpone processing of non-volatile load instructions |
366 | // See comments below |
367 | Visited.insert(Ptr: &I); |
368 | NonVolatileLoads.push_back(x: &I); |
369 | continue; |
370 | } else if (isNonVolatileStore(I: &I)) { |
371 | Visited.insert(Ptr: &I); |
372 | NonVolatileStores.push_back(x: &I); |
373 | // All references from second operand of store (destination address) |
374 | // can be considered write-only if they're not referenced by any |
375 | // non-store instruction. References from first operand of store |
376 | // (stored value) can't be treated either as read- or as write-only |
377 | // so we add them to RefEdges as we do with all other instructions |
378 | // except non-volatile load. |
379 | Value *Stored = I.getOperand(i: 0); |
380 | if (auto *GV = dyn_cast<GlobalValue>(Val: Stored)) |
381 | // findRefEdges will try to examine GV operands, so instead |
382 | // of calling it we should add GV to RefEdges directly. |
383 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
384 | else if (auto *U = dyn_cast<User>(Val: Stored)) |
385 | findRefEdges(Index, CurUser: U, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
386 | continue; |
387 | } |
388 | } |
389 | findRefEdges(Index, CurUser: &I, RefEdges, Visited, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
390 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
391 | if (!CB) { |
392 | if (I.mayThrow()) |
393 | MayThrow = true; |
394 | continue; |
395 | } |
396 | |
397 | const auto *CI = dyn_cast<CallInst>(Val: &I); |
398 | // Since we don't know exactly which local values are referenced in inline |
399 | // assembly, conservatively mark the function as possibly referencing |
400 | // a local value from inline assembly to ensure we don't export a |
401 | // reference (which would require renaming and promotion of the |
402 | // referenced value). |
403 | if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) |
404 | HasInlineAsmMaybeReferencingInternal = true; |
405 | |
406 | auto *CalledValue = CB->getCalledOperand(); |
407 | auto *CalledFunction = CB->getCalledFunction(); |
408 | if (CalledValue && !CalledFunction) { |
409 | CalledValue = CalledValue->stripPointerCasts(); |
410 | // Stripping pointer casts can reveal a called function. |
411 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
412 | } |
413 | // Check if this is an alias to a function. If so, get the |
414 | // called aliasee for the checks below. |
415 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
416 | assert(!CalledFunction && "Expected null called function in callsite for alias" ); |
417 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
418 | } |
419 | // Check if this is a direct call to a known function or a known |
420 | // intrinsic, or an indirect call with profile data. |
421 | if (CalledFunction) { |
422 | if (CI && CalledFunction->isIntrinsic()) { |
423 | addIntrinsicToSummary( |
424 | CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls, |
425 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT); |
426 | continue; |
427 | } |
428 | // We should have named any anonymous globals |
429 | assert(CalledFunction->hasName()); |
430 | auto ScaledCount = PSI->getProfileCount(CallInst: *CB, BFI); |
431 | auto Hotness = ScaledCount ? getHotness(ProfileCount: *ScaledCount, PSI) |
432 | : CalleeInfo::HotnessType::Unknown; |
433 | if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) |
434 | Hotness = CalleeInfo::HotnessType::Cold; |
435 | |
436 | // Use the original CalledValue, in case it was an alias. We want |
437 | // to record the call edge to the alias in that case. Eventually |
438 | // an alias summary will be created to associate the alias and |
439 | // aliasee. |
440 | auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( |
441 | GV: cast<GlobalValue>(Val: CalledValue))]; |
442 | ValueInfo.updateHotness(OtherHotness: Hotness); |
443 | if (CB->isTailCall()) |
444 | ValueInfo.setHasTailCall(true); |
445 | // Add the relative block frequency to CalleeInfo if there is no profile |
446 | // information. |
447 | if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { |
448 | uint64_t BBFreq = BFI->getBlockFreq(BB: &BB).getFrequency(); |
449 | uint64_t EntryFreq = BFI->getEntryFreq().getFrequency(); |
450 | ValueInfo.updateRelBlockFreq(BlockFreq: BBFreq, EntryFreq); |
451 | } |
452 | } else { |
453 | HasUnknownCall = true; |
454 | // If F is imported, a local linkage ifunc (e.g. target_clones on a |
455 | // static function) called by F will be cloned. Since summaries don't |
456 | // track ifunc, we do not know implementation functions referenced by |
457 | // the ifunc resolver need to be promoted in the exporter, and we will |
458 | // get linker errors due to cloned declarations for implementation |
459 | // functions. As a simple fix, just mark F as not eligible for import. |
460 | // Non-local ifunc is not cloned and does not have the issue. |
461 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: CalledValue)) |
462 | if (GI->hasLocalLinkage()) |
463 | HasLocalIFuncCallOrRef = true; |
464 | // Skip inline assembly calls. |
465 | if (CI && CI->isInlineAsm()) |
466 | continue; |
467 | // Skip direct calls. |
468 | if (!CalledValue || isa<Constant>(Val: CalledValue)) |
469 | continue; |
470 | |
471 | // Check if the instruction has a callees metadata. If so, add callees |
472 | // to CallGraphEdges to reflect the references from the metadata, and |
473 | // to enable importing for subsequent indirect call promotion and |
474 | // inlining. |
475 | if (auto *MD = I.getMetadata(KindID: LLVMContext::MD_callees)) { |
476 | for (const auto &Op : MD->operands()) { |
477 | Function *Callee = mdconst::extract_or_null<Function>(MD: Op); |
478 | if (Callee) |
479 | CallGraphEdges[Index.getOrInsertValueInfo(GV: Callee)]; |
480 | } |
481 | } |
482 | |
483 | uint32_t NumCandidates; |
484 | uint64_t TotalCount; |
485 | auto CandidateProfileData = |
486 | ICallAnalysis.getPromotionCandidatesForInstruction(I: &I, TotalCount, |
487 | NumCandidates); |
488 | for (const auto &Candidate : CandidateProfileData) |
489 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: Candidate.Value)] |
490 | .updateHotness(OtherHotness: getHotness(ProfileCount: Candidate.Count, PSI)); |
491 | } |
492 | |
493 | // Summarize memprof related metadata. This is only needed for ThinLTO. |
494 | if (!IsThinLTO) |
495 | continue; |
496 | |
497 | // TODO: Skip indirect calls for now. Need to handle these better, likely |
498 | // by creating multiple Callsites, one per target, then speculatively |
499 | // devirtualize while applying clone info in the ThinLTO backends. This |
500 | // will also be important because we will have a different set of clone |
501 | // versions per target. This handling needs to match that in the ThinLTO |
502 | // backend so we handle things consistently for matching of callsite |
503 | // summaries to instructions. |
504 | if (!CalledFunction) |
505 | continue; |
506 | |
507 | // Ensure we keep this analysis in sync with the handling in the ThinLTO |
508 | // backend (see MemProfContextDisambiguation::applyImport). Save this call |
509 | // so that we can skip it in checking the reverse case later. |
510 | assert(mayHaveMemprofSummary(CB)); |
511 | #ifndef NDEBUG |
512 | CallsThatMayHaveMemprofSummary.insert(CB); |
513 | #endif |
514 | |
515 | // Compute the list of stack ids first (so we can trim them from the stack |
516 | // ids on any MIBs). |
517 | CallStack<MDNode, MDNode::op_iterator> InstCallsite( |
518 | I.getMetadata(KindID: LLVMContext::MD_callsite)); |
519 | auto *MemProfMD = I.getMetadata(KindID: LLVMContext::MD_memprof); |
520 | if (MemProfMD) { |
521 | std::vector<MIBInfo> MIBs; |
522 | std::vector<uint64_t> TotalSizes; |
523 | for (auto &MDOp : MemProfMD->operands()) { |
524 | auto *MIBMD = cast<const MDNode>(Val: MDOp); |
525 | MDNode *StackNode = getMIBStackNode(MIB: MIBMD); |
526 | assert(StackNode); |
527 | SmallVector<unsigned> StackIdIndices; |
528 | CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); |
529 | // Collapse out any on the allocation call (inlining). |
530 | for (auto ContextIter = |
531 | StackContext.beginAfterSharedPrefix(Other&: InstCallsite); |
532 | ContextIter != StackContext.end(); ++ContextIter) { |
533 | unsigned StackIdIdx = Index.addOrGetStackIdIndex(StackId: *ContextIter); |
534 | // If this is a direct recursion, simply skip the duplicate |
535 | // entries. If this is mutual recursion, handling is left to |
536 | // the LTO link analysis client. |
537 | if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx) |
538 | StackIdIndices.push_back(Elt: StackIdIdx); |
539 | } |
540 | MIBs.push_back( |
541 | x: MIBInfo(getMIBAllocType(MIB: MIBMD), std::move(StackIdIndices))); |
542 | if (MemProfReportHintedSizes) { |
543 | auto TotalSize = getMIBTotalSize(MIB: MIBMD); |
544 | assert(TotalSize); |
545 | TotalSizes.push_back(x: TotalSize); |
546 | } |
547 | } |
548 | Allocs.push_back(x: AllocInfo(std::move(MIBs))); |
549 | if (MemProfReportHintedSizes) { |
550 | assert(Allocs.back().MIBs.size() == TotalSizes.size()); |
551 | Allocs.back().TotalSizes = std::move(TotalSizes); |
552 | } |
553 | } else if (!InstCallsite.empty()) { |
554 | SmallVector<unsigned> StackIdIndices; |
555 | for (auto StackId : InstCallsite) |
556 | StackIdIndices.push_back(Elt: Index.addOrGetStackIdIndex(StackId)); |
557 | // Use the original CalledValue, in case it was an alias. We want |
558 | // to record the call edge to the alias in that case. Eventually |
559 | // an alias summary will be created to associate the alias and |
560 | // aliasee. |
561 | auto CalleeValueInfo = |
562 | Index.getOrInsertValueInfo(GV: cast<GlobalValue>(Val: CalledValue)); |
563 | Callsites.push_back(x: {CalleeValueInfo, StackIdIndices}); |
564 | } |
565 | } |
566 | } |
567 | |
568 | if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize) |
569 | Index.addBlockCount(C: F.size()); |
570 | |
571 | std::vector<ValueInfo> Refs; |
572 | if (IsThinLTO) { |
573 | auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs, |
574 | SetVector<ValueInfo, std::vector<ValueInfo>> &Edges, |
575 | SmallPtrSet<const User *, 8> &Cache) { |
576 | for (const auto *I : Instrs) { |
577 | Cache.erase(Ptr: I); |
578 | findRefEdges(Index, CurUser: I, RefEdges&: Edges, Visited&: Cache, RefLocalLinkageIFunc&: HasLocalIFuncCallOrRef); |
579 | } |
580 | }; |
581 | |
582 | // By now we processed all instructions in a function, except |
583 | // non-volatile loads and non-volatile value stores. Let's find |
584 | // ref edges for both of instruction sets |
585 | AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); |
586 | // We can add some values to the Visited set when processing load |
587 | // instructions which are also used by stores in NonVolatileStores. |
588 | // For example this can happen if we have following code: |
589 | // |
590 | // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) |
591 | // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) |
592 | // |
593 | // After processing loads we'll add bitcast to the Visited set, and if |
594 | // we use the same set while processing stores, we'll never see store |
595 | // to @bar and @bar will be mistakenly treated as readonly. |
596 | SmallPtrSet<const llvm::User *, 8> StoreCache; |
597 | AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); |
598 | |
599 | // If both load and store instruction reference the same variable |
600 | // we won't be able to optimize it. Add all such reference edges |
601 | // to RefEdges set. |
602 | for (const auto &VI : StoreRefEdges) |
603 | if (LoadRefEdges.remove(X: VI)) |
604 | RefEdges.insert(X: VI); |
605 | |
606 | unsigned RefCnt = RefEdges.size(); |
607 | // All new reference edges inserted in two loops below are either |
608 | // read or write only. They will be grouped in the end of RefEdges |
609 | // vector, so we can use a single integer value to identify them. |
610 | for (const auto &VI : LoadRefEdges) |
611 | RefEdges.insert(X: VI); |
612 | |
613 | unsigned FirstWORef = RefEdges.size(); |
614 | for (const auto &VI : StoreRefEdges) |
615 | RefEdges.insert(X: VI); |
616 | |
617 | Refs = RefEdges.takeVector(); |
618 | for (; RefCnt < FirstWORef; ++RefCnt) |
619 | Refs[RefCnt].setReadOnly(); |
620 | |
621 | for (; RefCnt < Refs.size(); ++RefCnt) |
622 | Refs[RefCnt].setWriteOnly(); |
623 | } else { |
624 | Refs = RefEdges.takeVector(); |
625 | } |
626 | // Explicit add hot edges to enforce importing for designated GUIDs for |
627 | // sample PGO, to enable the same inlines as the profiled optimized binary. |
628 | for (auto &I : F.getImportGUIDs()) |
629 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: I)].updateHotness( |
630 | OtherHotness: ForceSummaryEdgesCold == FunctionSummary::FSHT_All |
631 | ? CalleeInfo::HotnessType::Cold |
632 | : CalleeInfo::HotnessType::Critical); |
633 | |
634 | #ifndef NDEBUG |
635 | // Make sure that all calls we decided could not have memprof summaries get a |
636 | // false value for mayHaveMemprofSummary, to ensure that this handling remains |
637 | // in sync with the ThinLTO backend handling. |
638 | if (IsThinLTO) { |
639 | for (const BasicBlock &BB : F) { |
640 | for (const Instruction &I : BB) { |
641 | const auto *CB = dyn_cast<CallBase>(&I); |
642 | if (!CB) |
643 | continue; |
644 | // We already checked these above. |
645 | if (CallsThatMayHaveMemprofSummary.count(CB)) |
646 | continue; |
647 | assert(!mayHaveMemprofSummary(CB)); |
648 | } |
649 | } |
650 | } |
651 | #endif |
652 | |
653 | bool NonRenamableLocal = isNonRenamableLocal(GV: F); |
654 | bool NotEligibleForImport = |
655 | NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || |
656 | HasIndirBranchToBlockAddress || HasLocalIFuncCallOrRef; |
657 | GlobalValueSummary::GVFlags Flags( |
658 | F.getLinkage(), F.getVisibility(), NotEligibleForImport, |
659 | /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable(), |
660 | GlobalValueSummary::ImportKind::Definition); |
661 | FunctionSummary::FFlags FunFlags{ |
662 | .ReadNone: F.doesNotAccessMemory(), .ReadOnly: F.onlyReadsMemory() && !F.doesNotAccessMemory(), |
663 | .NoRecurse: F.hasFnAttribute(Kind: Attribute::NoRecurse), .ReturnDoesNotAlias: F.returnDoesNotAlias(), |
664 | // FIXME: refactor this to use the same code that inliner is using. |
665 | // Don't try to import functions with noinline attribute. |
666 | .NoInline: F.getAttributes().hasFnAttr(Kind: Attribute::NoInline), |
667 | .AlwaysInline: F.hasFnAttribute(Kind: Attribute::AlwaysInline), |
668 | .NoUnwind: F.hasFnAttribute(Kind: Attribute::NoUnwind), .MayThrow: MayThrow, .HasUnknownCall: HasUnknownCall, |
669 | .MustBeUnreachable: mustBeUnreachableFunction(F)}; |
670 | std::vector<FunctionSummary::ParamAccess> ParamAccesses; |
671 | if (auto *SSI = GetSSICallback(F)) |
672 | ParamAccesses = SSI->getParamAccesses(Index); |
673 | auto FuncSummary = std::make_unique<FunctionSummary>( |
674 | args&: Flags, args&: NumInsts, args&: FunFlags, /*EntryCount=*/args: 0, args: std::move(Refs), |
675 | args: CallGraphEdges.takeVector(), args: TypeTests.takeVector(), |
676 | args: TypeTestAssumeVCalls.takeVector(), args: TypeCheckedLoadVCalls.takeVector(), |
677 | args: TypeTestAssumeConstVCalls.takeVector(), |
678 | args: TypeCheckedLoadConstVCalls.takeVector(), args: std::move(ParamAccesses), |
679 | args: std::move(Callsites), args: std::move(Allocs)); |
680 | if (NonRenamableLocal) |
681 | CantBePromoted.insert(V: F.getGUID()); |
682 | Index.addGlobalValueSummary(GV: F, Summary: std::move(FuncSummary)); |
683 | } |
684 | |
685 | /// Find function pointers referenced within the given vtable initializer |
686 | /// (or subset of an initializer) \p I. The starting offset of \p I within |
687 | /// the vtable initializer is \p StartingOffset. Any discovered function |
688 | /// pointers are added to \p VTableFuncs along with their cumulative offset |
689 | /// within the initializer. |
690 | static void findFuncPointers(const Constant *I, uint64_t StartingOffset, |
691 | const Module &M, ModuleSummaryIndex &Index, |
692 | VTableFuncList &VTableFuncs, |
693 | const GlobalVariable &OrigGV) { |
694 | // First check if this is a function pointer. |
695 | if (I->getType()->isPointerTy()) { |
696 | auto C = I->stripPointerCasts(); |
697 | auto A = dyn_cast<GlobalAlias>(Val: C); |
698 | if (isa<Function>(Val: C) || (A && isa<Function>(Val: A->getAliasee()))) { |
699 | auto GV = dyn_cast<GlobalValue>(Val: C); |
700 | assert(GV); |
701 | // We can disregard __cxa_pure_virtual as a possible call target, as |
702 | // calls to pure virtuals are UB. |
703 | if (GV && GV->getName() != "__cxa_pure_virtual" ) |
704 | VTableFuncs.push_back(x: {Index.getOrInsertValueInfo(GV), StartingOffset}); |
705 | return; |
706 | } |
707 | } |
708 | |
709 | // Walk through the elements in the constant struct or array and recursively |
710 | // look for virtual function pointers. |
711 | const DataLayout &DL = M.getDataLayout(); |
712 | if (auto *C = dyn_cast<ConstantStruct>(Val: I)) { |
713 | StructType *STy = dyn_cast<StructType>(Val: C->getType()); |
714 | assert(STy); |
715 | const StructLayout *SL = DL.getStructLayout(Ty: C->getType()); |
716 | |
717 | for (auto EI : llvm::enumerate(First: STy->elements())) { |
718 | auto Offset = SL->getElementOffset(Idx: EI.index()); |
719 | unsigned Op = SL->getElementContainingOffset(FixedOffset: Offset); |
720 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i: Op)), |
721 | StartingOffset: StartingOffset + Offset, M, Index, VTableFuncs, OrigGV); |
722 | } |
723 | } else if (auto *C = dyn_cast<ConstantArray>(Val: I)) { |
724 | ArrayType *ATy = C->getType(); |
725 | Type *EltTy = ATy->getElementType(); |
726 | uint64_t EltSize = DL.getTypeAllocSize(Ty: EltTy); |
727 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { |
728 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i)), |
729 | StartingOffset: StartingOffset + i * EltSize, M, Index, VTableFuncs, |
730 | OrigGV); |
731 | } |
732 | } else if (const auto *CE = dyn_cast<ConstantExpr>(Val: I)) { |
733 | // For relative vtables, the next sub-component should be a trunc. |
734 | if (CE->getOpcode() != Instruction::Trunc || |
735 | !(CE = dyn_cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0)))) |
736 | return; |
737 | |
738 | // If this constant can be reduced to the offset between a function and a |
739 | // global, then we know this is a valid virtual function if the RHS is the |
740 | // original vtable we're scanning through. |
741 | if (CE->getOpcode() == Instruction::Sub) { |
742 | GlobalValue *LHS, *RHS; |
743 | APSInt LHSOffset, RHSOffset; |
744 | if (IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 0), GV&: LHS, Offset&: LHSOffset, DL) && |
745 | IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 1), GV&: RHS, Offset&: RHSOffset, DL) && |
746 | RHS == &OrigGV && |
747 | |
748 | // For relative vtables, this component should point to the callable |
749 | // function without any offsets. |
750 | LHSOffset == 0 && |
751 | |
752 | // Also, the RHS should always point to somewhere within the vtable. |
753 | RHSOffset <= |
754 | static_cast<uint64_t>(DL.getTypeAllocSize(Ty: OrigGV.getInitializer()->getType()))) { |
755 | findFuncPointers(I: LHS, StartingOffset, M, Index, VTableFuncs, OrigGV); |
756 | } |
757 | } |
758 | } |
759 | } |
760 | |
761 | // Identify the function pointers referenced by vtable definition \p V. |
762 | static void computeVTableFuncs(ModuleSummaryIndex &Index, |
763 | const GlobalVariable &V, const Module &M, |
764 | VTableFuncList &VTableFuncs) { |
765 | if (!V.isConstant()) |
766 | return; |
767 | |
768 | findFuncPointers(I: V.getInitializer(), /*StartingOffset=*/0, M, Index, |
769 | VTableFuncs, OrigGV: V); |
770 | |
771 | #ifndef NDEBUG |
772 | // Validate that the VTableFuncs list is ordered by offset. |
773 | uint64_t PrevOffset = 0; |
774 | for (auto &P : VTableFuncs) { |
775 | // The findVFuncPointers traversal should have encountered the |
776 | // functions in offset order. We need to use ">=" since PrevOffset |
777 | // starts at 0. |
778 | assert(P.VTableOffset >= PrevOffset); |
779 | PrevOffset = P.VTableOffset; |
780 | } |
781 | #endif |
782 | } |
783 | |
784 | /// Record vtable definition \p V for each type metadata it references. |
785 | static void |
786 | recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, |
787 | const GlobalVariable &V, |
788 | SmallVectorImpl<MDNode *> &Types) { |
789 | for (MDNode *Type : Types) { |
790 | auto TypeID = Type->getOperand(I: 1).get(); |
791 | |
792 | uint64_t Offset = |
793 | cast<ConstantInt>( |
794 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
795 | ->getZExtValue(); |
796 | |
797 | if (auto *TypeId = dyn_cast<MDString>(Val: TypeID)) |
798 | Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId: TypeId->getString()) |
799 | .push_back(x: {Offset, Index.getOrInsertValueInfo(GV: &V)}); |
800 | } |
801 | } |
802 | |
803 | static void computeVariableSummary(ModuleSummaryIndex &Index, |
804 | const GlobalVariable &V, |
805 | DenseSet<GlobalValue::GUID> &CantBePromoted, |
806 | const Module &M, |
807 | SmallVectorImpl<MDNode *> &Types) { |
808 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges; |
809 | SmallPtrSet<const User *, 8> Visited; |
810 | bool RefLocalIFunc = false; |
811 | bool HasBlockAddress = |
812 | findRefEdges(Index, CurUser: &V, RefEdges, Visited, RefLocalLinkageIFunc&: RefLocalIFunc); |
813 | const bool NotEligibleForImport = (HasBlockAddress || RefLocalIFunc); |
814 | bool NonRenamableLocal = isNonRenamableLocal(GV: V); |
815 | GlobalValueSummary::GVFlags Flags( |
816 | V.getLinkage(), V.getVisibility(), NonRenamableLocal, |
817 | /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable(), |
818 | GlobalValueSummary::Definition); |
819 | |
820 | VTableFuncList VTableFuncs; |
821 | // If splitting is not enabled, then we compute the summary information |
822 | // necessary for index-based whole program devirtualization. |
823 | if (!Index.enableSplitLTOUnit()) { |
824 | Types.clear(); |
825 | V.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
826 | if (!Types.empty()) { |
827 | // Identify the function pointers referenced by this vtable definition. |
828 | computeVTableFuncs(Index, V, M, VTableFuncs); |
829 | |
830 | // Record this vtable definition for each type metadata it references. |
831 | recordTypeIdCompatibleVtableReferences(Index, V, Types); |
832 | } |
833 | } |
834 | |
835 | // Don't mark variables we won't be able to internalize as read/write-only. |
836 | bool CanBeInternalized = |
837 | !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && |
838 | !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); |
839 | bool Constant = V.isConstant(); |
840 | GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, |
841 | Constant ? false : CanBeInternalized, |
842 | Constant, V.getVCallVisibility()); |
843 | auto GVarSummary = std::make_unique<GlobalVarSummary>(args&: Flags, args&: VarFlags, |
844 | args: RefEdges.takeVector()); |
845 | if (NonRenamableLocal) |
846 | CantBePromoted.insert(V: V.getGUID()); |
847 | if (NotEligibleForImport) |
848 | GVarSummary->setNotEligibleToImport(); |
849 | if (!VTableFuncs.empty()) |
850 | GVarSummary->setVTableFuncs(VTableFuncs); |
851 | Index.addGlobalValueSummary(GV: V, Summary: std::move(GVarSummary)); |
852 | } |
853 | |
854 | static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, |
855 | DenseSet<GlobalValue::GUID> &CantBePromoted) { |
856 | // Skip summary for indirect function aliases as summary for aliasee will not |
857 | // be emitted. |
858 | const GlobalObject *Aliasee = A.getAliaseeObject(); |
859 | if (isa<GlobalIFunc>(Val: Aliasee)) |
860 | return; |
861 | bool NonRenamableLocal = isNonRenamableLocal(GV: A); |
862 | GlobalValueSummary::GVFlags Flags( |
863 | A.getLinkage(), A.getVisibility(), NonRenamableLocal, |
864 | /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable(), |
865 | GlobalValueSummary::Definition); |
866 | auto AS = std::make_unique<AliasSummary>(args&: Flags); |
867 | auto AliaseeVI = Index.getValueInfo(GUID: Aliasee->getGUID()); |
868 | assert(AliaseeVI && "Alias expects aliasee summary to be available" ); |
869 | assert(AliaseeVI.getSummaryList().size() == 1 && |
870 | "Expected a single entry per aliasee in per-module index" ); |
871 | AS->setAliasee(AliaseeVI, Aliasee: AliaseeVI.getSummaryList()[0].get()); |
872 | if (NonRenamableLocal) |
873 | CantBePromoted.insert(V: A.getGUID()); |
874 | Index.addGlobalValueSummary(GV: A, Summary: std::move(AS)); |
875 | } |
876 | |
877 | // Set LiveRoot flag on entries matching the given value name. |
878 | static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { |
879 | if (ValueInfo VI = Index.getValueInfo(GUID: GlobalValue::getGUID(GlobalName: Name))) |
880 | for (const auto &Summary : VI.getSummaryList()) |
881 | Summary->setLive(true); |
882 | } |
883 | |
884 | ModuleSummaryIndex llvm::buildModuleSummaryIndex( |
885 | const Module &M, |
886 | std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, |
887 | ProfileSummaryInfo *PSI, |
888 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
889 | assert(PSI); |
890 | bool EnableSplitLTOUnit = false; |
891 | bool UnifiedLTO = false; |
892 | if (auto *MD = mdconst::extract_or_null<ConstantInt>( |
893 | MD: M.getModuleFlag(Key: "EnableSplitLTOUnit" ))) |
894 | EnableSplitLTOUnit = MD->getZExtValue(); |
895 | if (auto *MD = |
896 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "UnifiedLTO" ))) |
897 | UnifiedLTO = MD->getZExtValue(); |
898 | ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); |
899 | |
900 | // Identify the local values in the llvm.used and llvm.compiler.used sets, |
901 | // which should not be exported as they would then require renaming and |
902 | // promotion, but we may have opaque uses e.g. in inline asm. We collect them |
903 | // here because we use this information to mark functions containing inline |
904 | // assembly calls as not importable. |
905 | SmallPtrSet<GlobalValue *, 4> LocalsUsed; |
906 | SmallVector<GlobalValue *, 4> Used; |
907 | // First collect those in the llvm.used set. |
908 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/false); |
909 | // Next collect those in the llvm.compiler.used set. |
910 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/true); |
911 | DenseSet<GlobalValue::GUID> CantBePromoted; |
912 | for (auto *V : Used) { |
913 | if (V->hasLocalLinkage()) { |
914 | LocalsUsed.insert(Ptr: V); |
915 | CantBePromoted.insert(V: V->getGUID()); |
916 | } |
917 | } |
918 | |
919 | bool HasLocalInlineAsmSymbol = false; |
920 | if (!M.getModuleInlineAsm().empty()) { |
921 | // Collect the local values defined by module level asm, and set up |
922 | // summaries for these symbols so that they can be marked as NoRename, |
923 | // to prevent export of any use of them in regular IR that would require |
924 | // renaming within the module level asm. Note we don't need to create a |
925 | // summary for weak or global defs, as they don't need to be flagged as |
926 | // NoRename, and defs in module level asm can't be imported anyway. |
927 | // Also, any values used but not defined within module level asm should |
928 | // be listed on the llvm.used or llvm.compiler.used global and marked as |
929 | // referenced from there. |
930 | ModuleSymbolTable::CollectAsmSymbols( |
931 | M, AsmSymbol: [&](StringRef Name, object::BasicSymbolRef::Flags Flags) { |
932 | // Symbols not marked as Weak or Global are local definitions. |
933 | if (Flags & (object::BasicSymbolRef::SF_Weak | |
934 | object::BasicSymbolRef::SF_Global)) |
935 | return; |
936 | HasLocalInlineAsmSymbol = true; |
937 | GlobalValue *GV = M.getNamedValue(Name); |
938 | if (!GV) |
939 | return; |
940 | assert(GV->isDeclaration() && "Def in module asm already has definition" ); |
941 | GlobalValueSummary::GVFlags GVFlags( |
942 | GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, |
943 | /* NotEligibleToImport = */ true, |
944 | /* Live = */ true, |
945 | /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable(), |
946 | GlobalValueSummary::Definition); |
947 | CantBePromoted.insert(V: GV->getGUID()); |
948 | // Create the appropriate summary type. |
949 | if (Function *F = dyn_cast<Function>(Val: GV)) { |
950 | std::unique_ptr<FunctionSummary> Summary = |
951 | std::make_unique<FunctionSummary>( |
952 | args&: GVFlags, /*InstCount=*/args: 0, |
953 | args: FunctionSummary::FFlags{ |
954 | .ReadNone: F->hasFnAttribute(Kind: Attribute::ReadNone), |
955 | .ReadOnly: F->hasFnAttribute(Kind: Attribute::ReadOnly), |
956 | .NoRecurse: F->hasFnAttribute(Kind: Attribute::NoRecurse), |
957 | .ReturnDoesNotAlias: F->returnDoesNotAlias(), |
958 | /* NoInline = */ false, |
959 | .AlwaysInline: F->hasFnAttribute(Kind: Attribute::AlwaysInline), |
960 | .NoUnwind: F->hasFnAttribute(Kind: Attribute::NoUnwind), |
961 | /* MayThrow */ true, |
962 | /* HasUnknownCall */ true, |
963 | /* MustBeUnreachable */ false}, |
964 | /*EntryCount=*/args: 0, args: ArrayRef<ValueInfo>{}, |
965 | args: ArrayRef<FunctionSummary::EdgeTy>{}, |
966 | args: ArrayRef<GlobalValue::GUID>{}, |
967 | args: ArrayRef<FunctionSummary::VFuncId>{}, |
968 | args: ArrayRef<FunctionSummary::VFuncId>{}, |
969 | args: ArrayRef<FunctionSummary::ConstVCall>{}, |
970 | args: ArrayRef<FunctionSummary::ConstVCall>{}, |
971 | args: ArrayRef<FunctionSummary::ParamAccess>{}, |
972 | args: ArrayRef<CallsiteInfo>{}, args: ArrayRef<AllocInfo>{}); |
973 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
974 | } else { |
975 | std::unique_ptr<GlobalVarSummary> Summary = |
976 | std::make_unique<GlobalVarSummary>( |
977 | args&: GVFlags, |
978 | args: GlobalVarSummary::GVarFlags( |
979 | false, false, cast<GlobalVariable>(Val: GV)->isConstant(), |
980 | GlobalObject::VCallVisibilityPublic), |
981 | args: ArrayRef<ValueInfo>{}); |
982 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
983 | } |
984 | }); |
985 | } |
986 | |
987 | bool IsThinLTO = true; |
988 | if (auto *MD = |
989 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "ThinLTO" ))) |
990 | IsThinLTO = MD->getZExtValue(); |
991 | |
992 | // Compute summaries for all functions defined in module, and save in the |
993 | // index. |
994 | for (const auto &F : M) { |
995 | if (F.isDeclaration()) |
996 | continue; |
997 | |
998 | DominatorTree DT(const_cast<Function &>(F)); |
999 | BlockFrequencyInfo *BFI = nullptr; |
1000 | std::unique_ptr<BlockFrequencyInfo> BFIPtr; |
1001 | if (GetBFICallback) |
1002 | BFI = GetBFICallback(F); |
1003 | else if (F.hasProfileData()) { |
1004 | LoopInfo LI{DT}; |
1005 | BranchProbabilityInfo BPI{F, LI}; |
1006 | BFIPtr = std::make_unique<BlockFrequencyInfo>(args: F, args&: BPI, args&: LI); |
1007 | BFI = BFIPtr.get(); |
1008 | } |
1009 | |
1010 | computeFunctionSummary(Index, M, F, BFI, PSI, DT, |
1011 | HasLocalsInUsedOrAsm: !LocalsUsed.empty() || HasLocalInlineAsmSymbol, |
1012 | CantBePromoted, IsThinLTO, GetSSICallback); |
1013 | } |
1014 | |
1015 | // Compute summaries for all variables defined in module, and save in the |
1016 | // index. |
1017 | SmallVector<MDNode *, 2> Types; |
1018 | for (const GlobalVariable &G : M.globals()) { |
1019 | if (G.isDeclaration()) |
1020 | continue; |
1021 | computeVariableSummary(Index, V: G, CantBePromoted, M, Types); |
1022 | } |
1023 | |
1024 | // Compute summaries for all aliases defined in module, and save in the |
1025 | // index. |
1026 | for (const GlobalAlias &A : M.aliases()) |
1027 | computeAliasSummary(Index, A, CantBePromoted); |
1028 | |
1029 | // Iterate through ifuncs, set their resolvers all alive. |
1030 | for (const GlobalIFunc &I : M.ifuncs()) { |
1031 | I.applyAlongResolverPath(Op: [&Index](const GlobalValue &GV) { |
1032 | Index.getGlobalValueSummary(GV)->setLive(true); |
1033 | }); |
1034 | } |
1035 | |
1036 | for (auto *V : LocalsUsed) { |
1037 | auto *Summary = Index.getGlobalValueSummary(GV: *V); |
1038 | assert(Summary && "Missing summary for global value" ); |
1039 | Summary->setNotEligibleToImport(); |
1040 | } |
1041 | |
1042 | // The linker doesn't know about these LLVM produced values, so we need |
1043 | // to flag them as live in the index to ensure index-based dead value |
1044 | // analysis treats them as live roots of the analysis. |
1045 | setLiveRoot(Index, Name: "llvm.used" ); |
1046 | setLiveRoot(Index, Name: "llvm.compiler.used" ); |
1047 | setLiveRoot(Index, Name: "llvm.global_ctors" ); |
1048 | setLiveRoot(Index, Name: "llvm.global_dtors" ); |
1049 | setLiveRoot(Index, Name: "llvm.global.annotations" ); |
1050 | |
1051 | for (auto &GlobalList : Index) { |
1052 | // Ignore entries for references that are undefined in the current module. |
1053 | if (GlobalList.second.SummaryList.empty()) |
1054 | continue; |
1055 | |
1056 | assert(GlobalList.second.SummaryList.size() == 1 && |
1057 | "Expected module's index to have one summary per GUID" ); |
1058 | auto &Summary = GlobalList.second.SummaryList[0]; |
1059 | if (!IsThinLTO) { |
1060 | Summary->setNotEligibleToImport(); |
1061 | continue; |
1062 | } |
1063 | |
1064 | bool AllRefsCanBeExternallyReferenced = |
1065 | llvm::all_of(Range: Summary->refs(), P: [&](const ValueInfo &VI) { |
1066 | return !CantBePromoted.count(V: VI.getGUID()); |
1067 | }); |
1068 | if (!AllRefsCanBeExternallyReferenced) { |
1069 | Summary->setNotEligibleToImport(); |
1070 | continue; |
1071 | } |
1072 | |
1073 | if (auto *FuncSummary = dyn_cast<FunctionSummary>(Val: Summary.get())) { |
1074 | bool AllCallsCanBeExternallyReferenced = llvm::all_of( |
1075 | Range: FuncSummary->calls(), P: [&](const FunctionSummary::EdgeTy &Edge) { |
1076 | return !CantBePromoted.count(V: Edge.first.getGUID()); |
1077 | }); |
1078 | if (!AllCallsCanBeExternallyReferenced) |
1079 | Summary->setNotEligibleToImport(); |
1080 | } |
1081 | } |
1082 | |
1083 | if (!ModuleSummaryDotFile.empty()) { |
1084 | std::error_code EC; |
1085 | raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_Text); |
1086 | if (EC) |
1087 | report_fatal_error(reason: Twine("Failed to open dot file " ) + |
1088 | ModuleSummaryDotFile + ": " + EC.message() + "\n" ); |
1089 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols: {}); |
1090 | } |
1091 | |
1092 | return Index; |
1093 | } |
1094 | |
1095 | AnalysisKey ModuleSummaryIndexAnalysis::Key; |
1096 | |
1097 | ModuleSummaryIndex |
1098 | ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { |
1099 | ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1100 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1101 | bool NeedSSI = needsParamAccessSummary(M); |
1102 | return buildModuleSummaryIndex( |
1103 | M, |
1104 | GetBFICallback: [&FAM](const Function &F) { |
1105 | return &FAM.getResult<BlockFrequencyAnalysis>( |
1106 | IR&: *const_cast<Function *>(&F)); |
1107 | }, |
1108 | PSI: &PSI, |
1109 | GetSSICallback: [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { |
1110 | return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>( |
1111 | IR&: const_cast<Function &>(F)) |
1112 | : nullptr; |
1113 | }); |
1114 | } |
1115 | |
1116 | char ModuleSummaryIndexWrapperPass::ID = 0; |
1117 | |
1118 | INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1119 | "Module Summary Analysis" , false, true) |
1120 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) |
1121 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
1122 | INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) |
1123 | INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1124 | "Module Summary Analysis" , false, true) |
1125 | |
1126 | ModulePass *llvm::createModuleSummaryIndexWrapperPass() { |
1127 | return new ModuleSummaryIndexWrapperPass(); |
1128 | } |
1129 | |
1130 | ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() |
1131 | : ModulePass(ID) { |
1132 | initializeModuleSummaryIndexWrapperPassPass(Registry&: *PassRegistry::getPassRegistry()); |
1133 | } |
1134 | |
1135 | bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { |
1136 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
1137 | bool NeedSSI = needsParamAccessSummary(M); |
1138 | Index.emplace(args: buildModuleSummaryIndex( |
1139 | M, |
1140 | GetBFICallback: [this](const Function &F) { |
1141 | return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( |
1142 | F&: *const_cast<Function *>(&F)) |
1143 | .getBFI()); |
1144 | }, |
1145 | PSI, |
1146 | GetSSICallback: [&](const Function &F) -> const StackSafetyInfo * { |
1147 | return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>( |
1148 | F&: const_cast<Function &>(F)) |
1149 | .getResult() |
1150 | : nullptr; |
1151 | })); |
1152 | return false; |
1153 | } |
1154 | |
1155 | bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { |
1156 | Index.reset(); |
1157 | return false; |
1158 | } |
1159 | |
1160 | void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1161 | AU.setPreservesAll(); |
1162 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); |
1163 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
1164 | AU.addRequired<StackSafetyInfoWrapperPass>(); |
1165 | } |
1166 | |
1167 | char ImmutableModuleSummaryIndexWrapperPass::ID = 0; |
1168 | |
1169 | ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass( |
1170 | const ModuleSummaryIndex *Index) |
1171 | : ImmutablePass(ID), Index(Index) { |
1172 | initializeImmutableModuleSummaryIndexWrapperPassPass( |
1173 | *PassRegistry::getPassRegistry()); |
1174 | } |
1175 | |
1176 | void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage( |
1177 | AnalysisUsage &AU) const { |
1178 | AU.setPreservesAll(); |
1179 | } |
1180 | |
1181 | ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass( |
1182 | const ModuleSummaryIndex *Index) { |
1183 | return new ImmutableModuleSummaryIndexWrapperPass(Index); |
1184 | } |
1185 | |
1186 | INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info" , |
1187 | "Module summary info" , false, true) |
1188 | |
1189 | bool llvm::mayHaveMemprofSummary(const CallBase *CB) { |
1190 | if (!CB) |
1191 | return false; |
1192 | if (CB->isDebugOrPseudoInst()) |
1193 | return false; |
1194 | auto *CI = dyn_cast<CallInst>(Val: CB); |
1195 | auto *CalledValue = CB->getCalledOperand(); |
1196 | auto *CalledFunction = CB->getCalledFunction(); |
1197 | if (CalledValue && !CalledFunction) { |
1198 | CalledValue = CalledValue->stripPointerCasts(); |
1199 | // Stripping pointer casts can reveal a called function. |
1200 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
1201 | } |
1202 | // Check if this is an alias to a function. If so, get the |
1203 | // called aliasee for the checks below. |
1204 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
1205 | assert(!CalledFunction && |
1206 | "Expected null called function in callsite for alias" ); |
1207 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
1208 | } |
1209 | // Check if this is a direct call to a known function or a known |
1210 | // intrinsic, or an indirect call with profile data. |
1211 | if (CalledFunction) { |
1212 | if (CI && CalledFunction->isIntrinsic()) |
1213 | return false; |
1214 | } else { |
1215 | // TODO: For now skip indirect calls. See comments in |
1216 | // computeFunctionSummary for what is needed to handle this. |
1217 | return false; |
1218 | } |
1219 | return true; |
1220 | } |
1221 | |