1 | //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file implements interprocedural passes which walk the |
11 | /// call-graph deducing and/or propagating function attributes. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/SCCIterator.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/ADT/SetVector.h" |
21 | #include "llvm/ADT/SmallPtrSet.h" |
22 | #include "llvm/ADT/SmallSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/ADT/Statistic.h" |
25 | #include "llvm/Analysis/AssumptionCache.h" |
26 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
27 | #include "llvm/Analysis/CFG.h" |
28 | #include "llvm/Analysis/CGSCCPassManager.h" |
29 | #include "llvm/Analysis/CallGraph.h" |
30 | #include "llvm/Analysis/CallGraphSCCPass.h" |
31 | #include "llvm/Analysis/CaptureTracking.h" |
32 | #include "llvm/Analysis/LazyCallGraph.h" |
33 | #include "llvm/Analysis/MemoryLocation.h" |
34 | #include "llvm/Analysis/ValueTracking.h" |
35 | #include "llvm/IR/Argument.h" |
36 | #include "llvm/IR/Attributes.h" |
37 | #include "llvm/IR/BasicBlock.h" |
38 | #include "llvm/IR/Constant.h" |
39 | #include "llvm/IR/Constants.h" |
40 | #include "llvm/IR/Function.h" |
41 | #include "llvm/IR/InstIterator.h" |
42 | #include "llvm/IR/InstrTypes.h" |
43 | #include "llvm/IR/Instruction.h" |
44 | #include "llvm/IR/Instructions.h" |
45 | #include "llvm/IR/IntrinsicInst.h" |
46 | #include "llvm/IR/Metadata.h" |
47 | #include "llvm/IR/ModuleSummaryIndex.h" |
48 | #include "llvm/IR/PassManager.h" |
49 | #include "llvm/IR/Type.h" |
50 | #include "llvm/IR/Use.h" |
51 | #include "llvm/IR/User.h" |
52 | #include "llvm/IR/Value.h" |
53 | #include "llvm/Support/Casting.h" |
54 | #include "llvm/Support/CommandLine.h" |
55 | #include "llvm/Support/Compiler.h" |
56 | #include "llvm/Support/Debug.h" |
57 | #include "llvm/Support/ErrorHandling.h" |
58 | #include "llvm/Support/raw_ostream.h" |
59 | #include "llvm/Transforms/IPO.h" |
60 | #include "llvm/Transforms/Utils/Local.h" |
61 | #include <cassert> |
62 | #include <iterator> |
63 | #include <map> |
64 | #include <optional> |
65 | #include <vector> |
66 | |
67 | using namespace llvm; |
68 | |
69 | #define DEBUG_TYPE "function-attrs" |
70 | |
71 | STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute" ); |
72 | STATISTIC(NumNoCapture, "Number of arguments marked nocapture" ); |
73 | STATISTIC(NumReturned, "Number of arguments marked returned" ); |
74 | STATISTIC(NumReadNoneArg, "Number of arguments marked readnone" ); |
75 | STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly" ); |
76 | STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly" ); |
77 | STATISTIC(NumNoAlias, "Number of function returns marked noalias" ); |
78 | STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull" ); |
79 | STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef" ); |
80 | STATISTIC(NumNoRecurse, "Number of functions marked as norecurse" ); |
81 | STATISTIC(NumNoUnwind, "Number of functions marked as nounwind" ); |
82 | STATISTIC(NumNoFree, "Number of functions marked as nofree" ); |
83 | STATISTIC(NumWillReturn, "Number of functions marked as willreturn" ); |
84 | STATISTIC(NumNoSync, "Number of functions marked as nosync" ); |
85 | |
86 | STATISTIC(NumThinLinkNoRecurse, |
87 | "Number of functions marked as norecurse during thinlink" ); |
88 | STATISTIC(NumThinLinkNoUnwind, |
89 | "Number of functions marked as nounwind during thinlink" ); |
90 | |
91 | static cl::opt<bool> EnableNonnullArgPropagation( |
92 | "enable-nonnull-arg-prop" , cl::init(Val: true), cl::Hidden, |
93 | cl::desc("Try to propagate nonnull argument attributes from callsites to " |
94 | "caller functions." )); |
95 | |
96 | static cl::opt<bool> DisableNoUnwindInference( |
97 | "disable-nounwind-inference" , cl::Hidden, |
98 | cl::desc("Stop inferring nounwind attribute during function-attrs pass" )); |
99 | |
100 | static cl::opt<bool> DisableNoFreeInference( |
101 | "disable-nofree-inference" , cl::Hidden, |
102 | cl::desc("Stop inferring nofree attribute during function-attrs pass" )); |
103 | |
104 | static cl::opt<bool> DisableThinLTOPropagation( |
105 | "disable-thinlto-funcattrs" , cl::init(Val: true), cl::Hidden, |
106 | cl::desc("Don't propagate function-attrs in thinLTO" )); |
107 | |
108 | namespace { |
109 | |
110 | using SCCNodeSet = SmallSetVector<Function *, 8>; |
111 | |
112 | } // end anonymous namespace |
113 | |
114 | static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, |
115 | ModRefInfo MR, AAResults &AAR) { |
116 | // Ignore accesses to known-invariant or local memory. |
117 | MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/IgnoreLocals: true); |
118 | if (isNoModRef(MRI: MR)) |
119 | return; |
120 | |
121 | const Value *UO = getUnderlyingObject(V: Loc.Ptr); |
122 | assert(!isa<AllocaInst>(UO) && |
123 | "Should have been handled by getModRefInfoMask()" ); |
124 | if (isa<Argument>(Val: UO)) { |
125 | ME |= MemoryEffects::argMemOnly(MR); |
126 | return; |
127 | } |
128 | |
129 | // If it's not an identified object, it might be an argument. |
130 | if (!isIdentifiedObject(V: UO)) |
131 | ME |= MemoryEffects::argMemOnly(MR); |
132 | ME |= MemoryEffects(IRMemLocation::Other, MR); |
133 | } |
134 | |
135 | static void addArgLocs(MemoryEffects &ME, const CallBase *Call, |
136 | ModRefInfo ArgMR, AAResults &AAR) { |
137 | for (const Value *Arg : Call->args()) { |
138 | if (!Arg->getType()->isPtrOrPtrVectorTy()) |
139 | continue; |
140 | |
141 | addLocAccess(ME, |
142 | Loc: MemoryLocation::getBeforeOrAfter(Ptr: Arg, AATags: Call->getAAMetadata()), |
143 | MR: ArgMR, AAR); |
144 | } |
145 | } |
146 | |
147 | /// Returns the memory access attribute for function F using AAR for AA results, |
148 | /// where SCCNodes is the current SCC. |
149 | /// |
150 | /// If ThisBody is true, this function may examine the function body and will |
151 | /// return a result pertaining to this copy of the function. If it is false, the |
152 | /// result will be based only on AA results for the function declaration; it |
153 | /// will be assumed that some other (perhaps less optimized) version of the |
154 | /// function may be selected at link time. |
155 | /// |
156 | /// The return value is split into two parts: Memory effects that always apply, |
157 | /// and additional memory effects that apply if any of the functions in the SCC |
158 | /// can access argmem. |
159 | static std::pair<MemoryEffects, MemoryEffects> |
160 | checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, |
161 | const SCCNodeSet &SCCNodes) { |
162 | MemoryEffects OrigME = AAR.getMemoryEffects(F: &F); |
163 | if (OrigME.doesNotAccessMemory()) |
164 | // Already perfect! |
165 | return {OrigME, MemoryEffects::none()}; |
166 | |
167 | if (!ThisBody) |
168 | return {OrigME, MemoryEffects::none()}; |
169 | |
170 | MemoryEffects ME = MemoryEffects::none(); |
171 | // Additional locations accessed if the SCC accesses argmem. |
172 | MemoryEffects RecursiveArgME = MemoryEffects::none(); |
173 | |
174 | // Inalloca and preallocated arguments are always clobbered by the call. |
175 | if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) || |
176 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated)) |
177 | ME |= MemoryEffects::argMemOnly(MR: ModRefInfo::ModRef); |
178 | |
179 | // Scan the function body for instructions that may read or write memory. |
180 | for (Instruction &I : instructions(F)) { |
181 | // Some instructions can be ignored even if they read or write memory. |
182 | // Detect these now, skipping to the next instruction if one is found. |
183 | if (auto *Call = dyn_cast<CallBase>(Val: &I)) { |
184 | // We can optimistically ignore calls to functions in the same SCC, with |
185 | // two caveats: |
186 | // * Calls with operand bundles may have additional effects. |
187 | // * Argument memory accesses may imply additional effects depending on |
188 | // what the argument location is. |
189 | if (!Call->hasOperandBundles() && Call->getCalledFunction() && |
190 | SCCNodes.count(key: Call->getCalledFunction())) { |
191 | // Keep track of which additional locations are accessed if the SCC |
192 | // turns out to access argmem. |
193 | addArgLocs(ME&: RecursiveArgME, Call, ArgMR: ModRefInfo::ModRef, AAR); |
194 | continue; |
195 | } |
196 | |
197 | MemoryEffects CallME = AAR.getMemoryEffects(Call); |
198 | |
199 | // If the call doesn't access memory, we're done. |
200 | if (CallME.doesNotAccessMemory()) |
201 | continue; |
202 | |
203 | // A pseudo probe call shouldn't change any function attribute since it |
204 | // doesn't translate to a real instruction. It comes with a memory access |
205 | // tag to prevent itself being removed by optimizations and not block |
206 | // other instructions being optimized. |
207 | if (isa<PseudoProbeInst>(Val: I)) |
208 | continue; |
209 | |
210 | ME |= CallME.getWithoutLoc(Loc: IRMemLocation::ArgMem); |
211 | |
212 | // If the call accesses captured memory (currently part of "other") and |
213 | // an argument is captured (currently not tracked), then it may also |
214 | // access argument memory. |
215 | ModRefInfo OtherMR = CallME.getModRef(Loc: IRMemLocation::Other); |
216 | ME |= MemoryEffects::argMemOnly(MR: OtherMR); |
217 | |
218 | // Check whether all pointer arguments point to local memory, and |
219 | // ignore calls that only access local memory. |
220 | ModRefInfo ArgMR = CallME.getModRef(Loc: IRMemLocation::ArgMem); |
221 | if (ArgMR != ModRefInfo::NoModRef) |
222 | addArgLocs(ME, Call, ArgMR, AAR); |
223 | continue; |
224 | } |
225 | |
226 | ModRefInfo MR = ModRefInfo::NoModRef; |
227 | if (I.mayWriteToMemory()) |
228 | MR |= ModRefInfo::Mod; |
229 | if (I.mayReadFromMemory()) |
230 | MR |= ModRefInfo::Ref; |
231 | if (MR == ModRefInfo::NoModRef) |
232 | continue; |
233 | |
234 | std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(Inst: &I); |
235 | if (!Loc) { |
236 | // If no location is known, conservatively assume anything can be |
237 | // accessed. |
238 | ME |= MemoryEffects(MR); |
239 | continue; |
240 | } |
241 | |
242 | // Volatile operations may access inaccessible memory. |
243 | if (I.isVolatile()) |
244 | ME |= MemoryEffects::inaccessibleMemOnly(MR); |
245 | |
246 | addLocAccess(ME, Loc: *Loc, MR, AAR); |
247 | } |
248 | |
249 | return {OrigME & ME, RecursiveArgME}; |
250 | } |
251 | |
252 | MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, |
253 | AAResults &AAR) { |
254 | return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, SCCNodes: {}).first; |
255 | } |
256 | |
257 | /// Deduce readonly/readnone/writeonly attributes for the SCC. |
258 | template <typename AARGetterT> |
259 | static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, |
260 | SmallSet<Function *, 8> &Changed) { |
261 | MemoryEffects ME = MemoryEffects::none(); |
262 | MemoryEffects RecursiveArgME = MemoryEffects::none(); |
263 | for (Function *F : SCCNodes) { |
264 | // Call the callable parameter to look up AA results for this function. |
265 | AAResults &AAR = AARGetter(*F); |
266 | // Non-exact function definitions may not be selected at link time, and an |
267 | // alternative version that writes to memory may be selected. See the |
268 | // comment on GlobalValue::isDefinitionExact for more details. |
269 | auto [FnME, FnRecursiveArgME] = |
270 | checkFunctionMemoryAccess(F&: *F, ThisBody: F->hasExactDefinition(), AAR, SCCNodes); |
271 | ME |= FnME; |
272 | RecursiveArgME |= FnRecursiveArgME; |
273 | // Reached bottom of the lattice, we will not be able to improve the result. |
274 | if (ME == MemoryEffects::unknown()) |
275 | return; |
276 | } |
277 | |
278 | // If the SCC accesses argmem, add recursive accesses resulting from that. |
279 | ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem); |
280 | if (ArgMR != ModRefInfo::NoModRef) |
281 | ME |= RecursiveArgME & MemoryEffects(ArgMR); |
282 | |
283 | for (Function *F : SCCNodes) { |
284 | MemoryEffects OldME = F->getMemoryEffects(); |
285 | MemoryEffects NewME = ME & OldME; |
286 | if (NewME != OldME) { |
287 | ++NumMemoryAttr; |
288 | F->setMemoryEffects(NewME); |
289 | // Remove conflicting writable attributes. |
290 | if (!isModSet(MRI: NewME.getModRef(Loc: IRMemLocation::ArgMem))) |
291 | for (Argument &A : F->args()) |
292 | A.removeAttr(Kind: Attribute::Writable); |
293 | Changed.insert(Ptr: F); |
294 | } |
295 | } |
296 | } |
297 | |
298 | // Compute definitive function attributes for a function taking into account |
299 | // prevailing definitions and linkage types |
300 | static FunctionSummary *calculatePrevailingSummary( |
301 | ValueInfo VI, |
302 | DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, |
303 | function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
304 | IsPrevailing) { |
305 | |
306 | if (CachedPrevailingSummary.count(Val: VI)) |
307 | return CachedPrevailingSummary[VI]; |
308 | |
309 | /// At this point, prevailing symbols have been resolved. The following leads |
310 | /// to returning a conservative result: |
311 | /// - Multiple instances with local linkage. Normally local linkage would be |
312 | /// unique per module |
313 | /// as the GUID includes the module path. We could have a guid alias if |
314 | /// there wasn't any distinguishing path when each file was compiled, but |
315 | /// that should be rare so we'll punt on those. |
316 | |
317 | /// These next 2 cases should not happen and will assert: |
318 | /// - Multiple instances with external linkage. This should be caught in |
319 | /// symbol resolution |
320 | /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our |
321 | /// knowledge meaning we have to go conservative. |
322 | |
323 | /// Otherwise, we calculate attributes for a function as: |
324 | /// 1. If we have a local linkage, take its attributes. If there's somehow |
325 | /// multiple, bail and go conservative. |
326 | /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is |
327 | /// prevailing, take its attributes. |
328 | /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic |
329 | /// differences. However, if the prevailing copy is known it will be used |
330 | /// so take its attributes. If the prevailing copy is in a native file |
331 | /// all IR copies will be dead and propagation will go conservative. |
332 | /// 4. AvailableExternally summaries without a prevailing copy are known to |
333 | /// occur in a couple of circumstances: |
334 | /// a. An internal function gets imported due to its caller getting |
335 | /// imported, it becomes AvailableExternally but no prevailing |
336 | /// definition exists. Because it has to get imported along with its |
337 | /// caller the attributes will be captured by propagating on its |
338 | /// caller. |
339 | /// b. C++11 [temp.explicit]p10 can generate AvailableExternally |
340 | /// definitions of explicitly instanced template declarations |
341 | /// for inlining which are ultimately dropped from the TU. Since this |
342 | /// is localized to the TU the attributes will have already made it to |
343 | /// the callers. |
344 | /// These are edge cases and already captured by their callers so we |
345 | /// ignore these for now. If they become relevant to optimize in the |
346 | /// future this can be revisited. |
347 | /// 5. Otherwise, go conservative. |
348 | |
349 | CachedPrevailingSummary[VI] = nullptr; |
350 | FunctionSummary *Local = nullptr; |
351 | FunctionSummary *Prevailing = nullptr; |
352 | |
353 | for (const auto &GVS : VI.getSummaryList()) { |
354 | if (!GVS->isLive()) |
355 | continue; |
356 | |
357 | FunctionSummary *FS = dyn_cast<FunctionSummary>(Val: GVS->getBaseObject()); |
358 | // Virtual and Unknown (e.g. indirect) calls require going conservative |
359 | if (!FS || FS->fflags().HasUnknownCall) |
360 | return nullptr; |
361 | |
362 | const auto &Linkage = GVS->linkage(); |
363 | if (GlobalValue::isLocalLinkage(Linkage)) { |
364 | if (Local) { |
365 | LLVM_DEBUG( |
366 | dbgs() |
367 | << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " |
368 | "function " |
369 | << VI.name() << " from " << FS->modulePath() << ". Previous module " |
370 | << Local->modulePath() << "\n" ); |
371 | return nullptr; |
372 | } |
373 | Local = FS; |
374 | } else if (GlobalValue::isExternalLinkage(Linkage)) { |
375 | assert(IsPrevailing(VI.getGUID(), GVS.get())); |
376 | Prevailing = FS; |
377 | break; |
378 | } else if (GlobalValue::isWeakODRLinkage(Linkage) || |
379 | GlobalValue::isLinkOnceODRLinkage(Linkage) || |
380 | GlobalValue::isWeakAnyLinkage(Linkage) || |
381 | GlobalValue::isLinkOnceAnyLinkage(Linkage)) { |
382 | if (IsPrevailing(VI.getGUID(), GVS.get())) { |
383 | Prevailing = FS; |
384 | break; |
385 | } |
386 | } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { |
387 | // TODO: Handle these cases if they become meaningful |
388 | continue; |
389 | } |
390 | } |
391 | |
392 | if (Local) { |
393 | assert(!Prevailing); |
394 | CachedPrevailingSummary[VI] = Local; |
395 | } else if (Prevailing) { |
396 | assert(!Local); |
397 | CachedPrevailingSummary[VI] = Prevailing; |
398 | } |
399 | |
400 | return CachedPrevailingSummary[VI]; |
401 | } |
402 | |
403 | bool llvm::thinLTOPropagateFunctionAttrs( |
404 | ModuleSummaryIndex &Index, |
405 | function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
406 | IsPrevailing) { |
407 | // TODO: implement addNoAliasAttrs once |
408 | // there's more information about the return type in the summary |
409 | if (DisableThinLTOPropagation) |
410 | return false; |
411 | |
412 | DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; |
413 | bool Changed = false; |
414 | |
415 | auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { |
416 | // Assume we can propagate unless we discover otherwise |
417 | FunctionSummary::FFlags InferredFlags; |
418 | InferredFlags.NoRecurse = (SCCNodes.size() == 1); |
419 | InferredFlags.NoUnwind = true; |
420 | |
421 | for (auto &V : SCCNodes) { |
422 | FunctionSummary *CallerSummary = |
423 | calculatePrevailingSummary(VI: V, CachedPrevailingSummary, IsPrevailing); |
424 | |
425 | // Function summaries can fail to contain information such as declarations |
426 | if (!CallerSummary) |
427 | return; |
428 | |
429 | if (CallerSummary->fflags().MayThrow) |
430 | InferredFlags.NoUnwind = false; |
431 | |
432 | for (const auto &Callee : CallerSummary->calls()) { |
433 | FunctionSummary *CalleeSummary = calculatePrevailingSummary( |
434 | VI: Callee.first, CachedPrevailingSummary, IsPrevailing); |
435 | |
436 | if (!CalleeSummary) |
437 | return; |
438 | |
439 | if (!CalleeSummary->fflags().NoRecurse) |
440 | InferredFlags.NoRecurse = false; |
441 | |
442 | if (!CalleeSummary->fflags().NoUnwind) |
443 | InferredFlags.NoUnwind = false; |
444 | |
445 | if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) |
446 | break; |
447 | } |
448 | } |
449 | |
450 | if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { |
451 | Changed = true; |
452 | for (auto &V : SCCNodes) { |
453 | if (InferredFlags.NoRecurse) { |
454 | LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " |
455 | << V.name() << "\n" ); |
456 | ++NumThinLinkNoRecurse; |
457 | } |
458 | |
459 | if (InferredFlags.NoUnwind) { |
460 | LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " |
461 | << V.name() << "\n" ); |
462 | ++NumThinLinkNoUnwind; |
463 | } |
464 | |
465 | for (const auto &S : V.getSummaryList()) { |
466 | if (auto *FS = dyn_cast<FunctionSummary>(Val: S.get())) { |
467 | if (InferredFlags.NoRecurse) |
468 | FS->setNoRecurse(); |
469 | |
470 | if (InferredFlags.NoUnwind) |
471 | FS->setNoUnwind(); |
472 | } |
473 | } |
474 | } |
475 | } |
476 | }; |
477 | |
478 | // Call propagation functions on each SCC in the Index |
479 | for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(G: &Index); !I.isAtEnd(); |
480 | ++I) { |
481 | std::vector<ValueInfo> Nodes(*I); |
482 | PropagateAttributes(Nodes); |
483 | } |
484 | return Changed; |
485 | } |
486 | |
487 | namespace { |
488 | |
489 | /// For a given pointer Argument, this retains a list of Arguments of functions |
490 | /// in the same SCC that the pointer data flows into. We use this to build an |
491 | /// SCC of the arguments. |
492 | struct ArgumentGraphNode { |
493 | Argument *Definition; |
494 | SmallVector<ArgumentGraphNode *, 4> Uses; |
495 | }; |
496 | |
497 | class ArgumentGraph { |
498 | // We store pointers to ArgumentGraphNode objects, so it's important that |
499 | // that they not move around upon insert. |
500 | using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; |
501 | |
502 | ArgumentMapTy ArgumentMap; |
503 | |
504 | // There is no root node for the argument graph, in fact: |
505 | // void f(int *x, int *y) { if (...) f(x, y); } |
506 | // is an example where the graph is disconnected. The SCCIterator requires a |
507 | // single entry point, so we maintain a fake ("synthetic") root node that |
508 | // uses every node. Because the graph is directed and nothing points into |
509 | // the root, it will not participate in any SCCs (except for its own). |
510 | ArgumentGraphNode SyntheticRoot; |
511 | |
512 | public: |
513 | ArgumentGraph() { SyntheticRoot.Definition = nullptr; } |
514 | |
515 | using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; |
516 | |
517 | iterator begin() { return SyntheticRoot.Uses.begin(); } |
518 | iterator end() { return SyntheticRoot.Uses.end(); } |
519 | ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } |
520 | |
521 | ArgumentGraphNode *operator[](Argument *A) { |
522 | ArgumentGraphNode &Node = ArgumentMap[A]; |
523 | Node.Definition = A; |
524 | SyntheticRoot.Uses.push_back(Elt: &Node); |
525 | return &Node; |
526 | } |
527 | }; |
528 | |
529 | /// This tracker checks whether callees are in the SCC, and if so it does not |
530 | /// consider that a capture, instead adding it to the "Uses" list and |
531 | /// continuing with the analysis. |
532 | struct ArgumentUsesTracker : public CaptureTracker { |
533 | ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} |
534 | |
535 | void tooManyUses() override { Captured = true; } |
536 | |
537 | bool captured(const Use *U) override { |
538 | CallBase *CB = dyn_cast<CallBase>(Val: U->getUser()); |
539 | if (!CB) { |
540 | Captured = true; |
541 | return true; |
542 | } |
543 | |
544 | Function *F = CB->getCalledFunction(); |
545 | if (!F || !F->hasExactDefinition() || !SCCNodes.count(key: F)) { |
546 | Captured = true; |
547 | return true; |
548 | } |
549 | |
550 | assert(!CB->isCallee(U) && "callee operand reported captured?" ); |
551 | const unsigned UseIndex = CB->getDataOperandNo(U); |
552 | if (UseIndex >= CB->arg_size()) { |
553 | // Data operand, but not a argument operand -- must be a bundle operand |
554 | assert(CB->hasOperandBundles() && "Must be!" ); |
555 | |
556 | // CaptureTracking told us that we're being captured by an operand bundle |
557 | // use. In this case it does not matter if the callee is within our SCC |
558 | // or not -- we've been captured in some unknown way, and we have to be |
559 | // conservative. |
560 | Captured = true; |
561 | return true; |
562 | } |
563 | |
564 | if (UseIndex >= F->arg_size()) { |
565 | assert(F->isVarArg() && "More params than args in non-varargs call" ); |
566 | Captured = true; |
567 | return true; |
568 | } |
569 | |
570 | Uses.push_back(Elt: &*std::next(x: F->arg_begin(), n: UseIndex)); |
571 | return false; |
572 | } |
573 | |
574 | // True only if certainly captured (used outside our SCC). |
575 | bool Captured = false; |
576 | |
577 | // Uses within our SCC. |
578 | SmallVector<Argument *, 4> Uses; |
579 | |
580 | const SCCNodeSet &SCCNodes; |
581 | }; |
582 | |
583 | } // end anonymous namespace |
584 | |
585 | namespace llvm { |
586 | |
587 | template <> struct GraphTraits<ArgumentGraphNode *> { |
588 | using NodeRef = ArgumentGraphNode *; |
589 | using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; |
590 | |
591 | static NodeRef getEntryNode(NodeRef A) { return A; } |
592 | static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } |
593 | static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } |
594 | }; |
595 | |
596 | template <> |
597 | struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { |
598 | static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } |
599 | |
600 | static ChildIteratorType nodes_begin(ArgumentGraph *AG) { |
601 | return AG->begin(); |
602 | } |
603 | |
604 | static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } |
605 | }; |
606 | |
607 | } // end namespace llvm |
608 | |
609 | /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. |
610 | static Attribute::AttrKind |
611 | determinePointerAccessAttrs(Argument *A, |
612 | const SmallPtrSet<Argument *, 8> &SCCNodes) { |
613 | SmallVector<Use *, 32> Worklist; |
614 | SmallPtrSet<Use *, 32> Visited; |
615 | |
616 | // inalloca arguments are always clobbered by the call. |
617 | if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) |
618 | return Attribute::None; |
619 | |
620 | bool IsRead = false; |
621 | bool IsWrite = false; |
622 | |
623 | for (Use &U : A->uses()) { |
624 | Visited.insert(Ptr: &U); |
625 | Worklist.push_back(Elt: &U); |
626 | } |
627 | |
628 | while (!Worklist.empty()) { |
629 | if (IsWrite && IsRead) |
630 | // No point in searching further.. |
631 | return Attribute::None; |
632 | |
633 | Use *U = Worklist.pop_back_val(); |
634 | Instruction *I = cast<Instruction>(Val: U->getUser()); |
635 | |
636 | switch (I->getOpcode()) { |
637 | case Instruction::BitCast: |
638 | case Instruction::GetElementPtr: |
639 | case Instruction::PHI: |
640 | case Instruction::Select: |
641 | case Instruction::AddrSpaceCast: |
642 | // The original value is not read/written via this if the new value isn't. |
643 | for (Use &UU : I->uses()) |
644 | if (Visited.insert(Ptr: &UU).second) |
645 | Worklist.push_back(Elt: &UU); |
646 | break; |
647 | |
648 | case Instruction::Call: |
649 | case Instruction::Invoke: { |
650 | CallBase &CB = cast<CallBase>(Val&: *I); |
651 | if (CB.isCallee(U)) { |
652 | IsRead = true; |
653 | // Note that indirect calls do not capture, see comment in |
654 | // CaptureTracking for context |
655 | continue; |
656 | } |
657 | |
658 | // Given we've explictily handled the callee operand above, what's left |
659 | // must be a data operand (e.g. argument or operand bundle) |
660 | const unsigned UseIndex = CB.getDataOperandNo(U); |
661 | |
662 | // Some intrinsics (for instance ptrmask) do not capture their results, |
663 | // but return results thas alias their pointer argument, and thus should |
664 | // be handled like GEP or addrspacecast above. |
665 | if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( |
666 | Call: &CB, /*MustPreserveNullness=*/false)) { |
667 | for (Use &UU : CB.uses()) |
668 | if (Visited.insert(Ptr: &UU).second) |
669 | Worklist.push_back(Elt: &UU); |
670 | } else if (!CB.doesNotCapture(OpNo: UseIndex)) { |
671 | if (!CB.onlyReadsMemory()) |
672 | // If the callee can save a copy into other memory, then simply |
673 | // scanning uses of the call is insufficient. We have no way |
674 | // of tracking copies of the pointer through memory to see |
675 | // if a reloaded copy is written to, thus we must give up. |
676 | return Attribute::None; |
677 | // Push users for processing once we finish this one |
678 | if (!I->getType()->isVoidTy()) |
679 | for (Use &UU : I->uses()) |
680 | if (Visited.insert(Ptr: &UU).second) |
681 | Worklist.push_back(Elt: &UU); |
682 | } |
683 | |
684 | ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(Loc: IRMemLocation::ArgMem); |
685 | if (isNoModRef(MRI: ArgMR)) |
686 | continue; |
687 | |
688 | if (Function *F = CB.getCalledFunction()) |
689 | if (CB.isArgOperand(U) && UseIndex < F->arg_size() && |
690 | SCCNodes.count(Ptr: F->getArg(i: UseIndex))) |
691 | // This is an argument which is part of the speculative SCC. Note |
692 | // that only operands corresponding to formal arguments of the callee |
693 | // can participate in the speculation. |
694 | break; |
695 | |
696 | // The accessors used on call site here do the right thing for calls and |
697 | // invokes with operand bundles. |
698 | if (CB.doesNotAccessMemory(OpNo: UseIndex)) { |
699 | /* nop */ |
700 | } else if (!isModSet(MRI: ArgMR) || CB.onlyReadsMemory(OpNo: UseIndex)) { |
701 | IsRead = true; |
702 | } else if (!isRefSet(MRI: ArgMR) || |
703 | CB.dataOperandHasImpliedAttr(i: UseIndex, Kind: Attribute::WriteOnly)) { |
704 | IsWrite = true; |
705 | } else { |
706 | return Attribute::None; |
707 | } |
708 | break; |
709 | } |
710 | |
711 | case Instruction::Load: |
712 | // A volatile load has side effects beyond what readonly can be relied |
713 | // upon. |
714 | if (cast<LoadInst>(Val: I)->isVolatile()) |
715 | return Attribute::None; |
716 | |
717 | IsRead = true; |
718 | break; |
719 | |
720 | case Instruction::Store: |
721 | if (cast<StoreInst>(Val: I)->getValueOperand() == *U) |
722 | // untrackable capture |
723 | return Attribute::None; |
724 | |
725 | // A volatile store has side effects beyond what writeonly can be relied |
726 | // upon. |
727 | if (cast<StoreInst>(Val: I)->isVolatile()) |
728 | return Attribute::None; |
729 | |
730 | IsWrite = true; |
731 | break; |
732 | |
733 | case Instruction::ICmp: |
734 | case Instruction::Ret: |
735 | break; |
736 | |
737 | default: |
738 | return Attribute::None; |
739 | } |
740 | } |
741 | |
742 | if (IsWrite && IsRead) |
743 | return Attribute::None; |
744 | else if (IsRead) |
745 | return Attribute::ReadOnly; |
746 | else if (IsWrite) |
747 | return Attribute::WriteOnly; |
748 | else |
749 | return Attribute::ReadNone; |
750 | } |
751 | |
752 | /// Deduce returned attributes for the SCC. |
753 | static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, |
754 | SmallSet<Function *, 8> &Changed) { |
755 | // Check each function in turn, determining if an argument is always returned. |
756 | for (Function *F : SCCNodes) { |
757 | // We can infer and propagate function attributes only when we know that the |
758 | // definition we'll get at link time is *exactly* the definition we see now. |
759 | // For more details, see GlobalValue::mayBeDerefined. |
760 | if (!F->hasExactDefinition()) |
761 | continue; |
762 | |
763 | if (F->getReturnType()->isVoidTy()) |
764 | continue; |
765 | |
766 | // There is nothing to do if an argument is already marked as 'returned'. |
767 | if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::Returned)) |
768 | continue; |
769 | |
770 | auto FindRetArg = [&]() -> Argument * { |
771 | Argument *RetArg = nullptr; |
772 | for (BasicBlock &BB : *F) |
773 | if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) { |
774 | // Note that stripPointerCasts should look through functions with |
775 | // returned arguments. |
776 | auto *RetVal = |
777 | dyn_cast<Argument>(Val: Ret->getReturnValue()->stripPointerCasts()); |
778 | if (!RetVal || RetVal->getType() != F->getReturnType()) |
779 | return nullptr; |
780 | |
781 | if (!RetArg) |
782 | RetArg = RetVal; |
783 | else if (RetArg != RetVal) |
784 | return nullptr; |
785 | } |
786 | |
787 | return RetArg; |
788 | }; |
789 | |
790 | if (Argument *RetArg = FindRetArg()) { |
791 | RetArg->addAttr(Kind: Attribute::Returned); |
792 | ++NumReturned; |
793 | Changed.insert(Ptr: F); |
794 | } |
795 | } |
796 | } |
797 | |
798 | /// If a callsite has arguments that are also arguments to the parent function, |
799 | /// try to propagate attributes from the callsite's arguments to the parent's |
800 | /// arguments. This may be important because inlining can cause information loss |
801 | /// when attribute knowledge disappears with the inlined call. |
802 | static bool addArgumentAttrsFromCallsites(Function &F) { |
803 | if (!EnableNonnullArgPropagation) |
804 | return false; |
805 | |
806 | bool Changed = false; |
807 | |
808 | // For an argument attribute to transfer from a callsite to the parent, the |
809 | // call must be guaranteed to execute every time the parent is called. |
810 | // Conservatively, just check for calls in the entry block that are guaranteed |
811 | // to execute. |
812 | // TODO: This could be enhanced by testing if the callsite post-dominates the |
813 | // entry block or by doing simple forward walks or backward walks to the |
814 | // callsite. |
815 | BasicBlock &Entry = F.getEntryBlock(); |
816 | for (Instruction &I : Entry) { |
817 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) { |
818 | if (auto *CalledFunc = CB->getCalledFunction()) { |
819 | for (auto &CSArg : CalledFunc->args()) { |
820 | if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) |
821 | continue; |
822 | |
823 | // If the non-null callsite argument operand is an argument to 'F' |
824 | // (the caller) and the call is guaranteed to execute, then the value |
825 | // must be non-null throughout 'F'. |
826 | auto *FArg = dyn_cast<Argument>(Val: CB->getArgOperand(i: CSArg.getArgNo())); |
827 | if (FArg && !FArg->hasNonNullAttr()) { |
828 | FArg->addAttr(Kind: Attribute::NonNull); |
829 | Changed = true; |
830 | } |
831 | } |
832 | } |
833 | } |
834 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &I)) |
835 | break; |
836 | } |
837 | |
838 | return Changed; |
839 | } |
840 | |
841 | static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { |
842 | assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || |
843 | R == Attribute::WriteOnly) |
844 | && "Must be an access attribute." ); |
845 | assert(A && "Argument must not be null." ); |
846 | |
847 | // If the argument already has the attribute, nothing needs to be done. |
848 | if (A->hasAttribute(Kind: R)) |
849 | return false; |
850 | |
851 | // Otherwise, remove potentially conflicting attribute, add the new one, |
852 | // and update statistics. |
853 | A->removeAttr(Kind: Attribute::WriteOnly); |
854 | A->removeAttr(Kind: Attribute::ReadOnly); |
855 | A->removeAttr(Kind: Attribute::ReadNone); |
856 | // Remove conflicting writable attribute. |
857 | if (R == Attribute::ReadNone || R == Attribute::ReadOnly) |
858 | A->removeAttr(Kind: Attribute::Writable); |
859 | A->addAttr(Kind: R); |
860 | if (R == Attribute::ReadOnly) |
861 | ++NumReadOnlyArg; |
862 | else if (R == Attribute::WriteOnly) |
863 | ++NumWriteOnlyArg; |
864 | else |
865 | ++NumReadNoneArg; |
866 | return true; |
867 | } |
868 | |
869 | /// Deduce nocapture attributes for the SCC. |
870 | static void addArgumentAttrs(const SCCNodeSet &SCCNodes, |
871 | SmallSet<Function *, 8> &Changed) { |
872 | ArgumentGraph AG; |
873 | |
874 | // Check each function in turn, determining which pointer arguments are not |
875 | // captured. |
876 | for (Function *F : SCCNodes) { |
877 | // We can infer and propagate function attributes only when we know that the |
878 | // definition we'll get at link time is *exactly* the definition we see now. |
879 | // For more details, see GlobalValue::mayBeDerefined. |
880 | if (!F->hasExactDefinition()) |
881 | continue; |
882 | |
883 | if (addArgumentAttrsFromCallsites(F&: *F)) |
884 | Changed.insert(Ptr: F); |
885 | |
886 | // Functions that are readonly (or readnone) and nounwind and don't return |
887 | // a value can't capture arguments. Don't analyze them. |
888 | if (F->onlyReadsMemory() && F->doesNotThrow() && |
889 | F->getReturnType()->isVoidTy()) { |
890 | for (Argument &A : F->args()) { |
891 | if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { |
892 | A.addAttr(Kind: Attribute::NoCapture); |
893 | ++NumNoCapture; |
894 | Changed.insert(Ptr: F); |
895 | } |
896 | } |
897 | continue; |
898 | } |
899 | |
900 | for (Argument &A : F->args()) { |
901 | if (!A.getType()->isPointerTy()) |
902 | continue; |
903 | bool HasNonLocalUses = false; |
904 | if (!A.hasNoCaptureAttr()) { |
905 | ArgumentUsesTracker Tracker(SCCNodes); |
906 | PointerMayBeCaptured(V: &A, Tracker: &Tracker); |
907 | if (!Tracker.Captured) { |
908 | if (Tracker.Uses.empty()) { |
909 | // If it's trivially not captured, mark it nocapture now. |
910 | A.addAttr(Kind: Attribute::NoCapture); |
911 | ++NumNoCapture; |
912 | Changed.insert(Ptr: F); |
913 | } else { |
914 | // If it's not trivially captured and not trivially not captured, |
915 | // then it must be calling into another function in our SCC. Save |
916 | // its particulars for Argument-SCC analysis later. |
917 | ArgumentGraphNode *Node = AG[&A]; |
918 | for (Argument *Use : Tracker.Uses) { |
919 | Node->Uses.push_back(Elt: AG[Use]); |
920 | if (Use != &A) |
921 | HasNonLocalUses = true; |
922 | } |
923 | } |
924 | } |
925 | // Otherwise, it's captured. Don't bother doing SCC analysis on it. |
926 | } |
927 | if (!HasNonLocalUses && !A.onlyReadsMemory()) { |
928 | // Can we determine that it's readonly/readnone/writeonly without doing |
929 | // an SCC? Note that we don't allow any calls at all here, or else our |
930 | // result will be dependent on the iteration order through the |
931 | // functions in the SCC. |
932 | SmallPtrSet<Argument *, 8> Self; |
933 | Self.insert(Ptr: &A); |
934 | Attribute::AttrKind R = determinePointerAccessAttrs(A: &A, SCCNodes: Self); |
935 | if (R != Attribute::None) |
936 | if (addAccessAttr(A: &A, R)) |
937 | Changed.insert(Ptr: F); |
938 | } |
939 | } |
940 | } |
941 | |
942 | // The graph we've collected is partial because we stopped scanning for |
943 | // argument uses once we solved the argument trivially. These partial nodes |
944 | // show up as ArgumentGraphNode objects with an empty Uses list, and for |
945 | // these nodes the final decision about whether they capture has already been |
946 | // made. If the definition doesn't have a 'nocapture' attribute by now, it |
947 | // captures. |
948 | |
949 | for (scc_iterator<ArgumentGraph *> I = scc_begin(G: &AG); !I.isAtEnd(); ++I) { |
950 | const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; |
951 | if (ArgumentSCC.size() == 1) { |
952 | if (!ArgumentSCC[0]->Definition) |
953 | continue; // synthetic root node |
954 | |
955 | // eg. "void f(int* x) { if (...) f(x); }" |
956 | if (ArgumentSCC[0]->Uses.size() == 1 && |
957 | ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { |
958 | Argument *A = ArgumentSCC[0]->Definition; |
959 | A->addAttr(Kind: Attribute::NoCapture); |
960 | ++NumNoCapture; |
961 | Changed.insert(Ptr: A->getParent()); |
962 | |
963 | // Infer the access attributes given the new nocapture one |
964 | SmallPtrSet<Argument *, 8> Self; |
965 | Self.insert(Ptr: &*A); |
966 | Attribute::AttrKind R = determinePointerAccessAttrs(A: &*A, SCCNodes: Self); |
967 | if (R != Attribute::None) |
968 | addAccessAttr(A, R); |
969 | } |
970 | continue; |
971 | } |
972 | |
973 | bool SCCCaptured = false; |
974 | for (ArgumentGraphNode *Node : ArgumentSCC) { |
975 | if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { |
976 | SCCCaptured = true; |
977 | break; |
978 | } |
979 | } |
980 | if (SCCCaptured) |
981 | continue; |
982 | |
983 | SmallPtrSet<Argument *, 8> ArgumentSCCNodes; |
984 | // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for |
985 | // quickly looking up whether a given Argument is in this ArgumentSCC. |
986 | for (ArgumentGraphNode *I : ArgumentSCC) { |
987 | ArgumentSCCNodes.insert(Ptr: I->Definition); |
988 | } |
989 | |
990 | for (ArgumentGraphNode *N : ArgumentSCC) { |
991 | for (ArgumentGraphNode *Use : N->Uses) { |
992 | Argument *A = Use->Definition; |
993 | if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(Ptr: A)) |
994 | continue; |
995 | SCCCaptured = true; |
996 | break; |
997 | } |
998 | if (SCCCaptured) |
999 | break; |
1000 | } |
1001 | if (SCCCaptured) |
1002 | continue; |
1003 | |
1004 | for (ArgumentGraphNode *N : ArgumentSCC) { |
1005 | Argument *A = N->Definition; |
1006 | A->addAttr(Kind: Attribute::NoCapture); |
1007 | ++NumNoCapture; |
1008 | Changed.insert(Ptr: A->getParent()); |
1009 | } |
1010 | |
1011 | // We also want to compute readonly/readnone/writeonly. With a small number |
1012 | // of false negatives, we can assume that any pointer which is captured |
1013 | // isn't going to be provably readonly or readnone, since by definition |
1014 | // we can't analyze all uses of a captured pointer. |
1015 | // |
1016 | // The false negatives happen when the pointer is captured by a function |
1017 | // that promises readonly/readnone behaviour on the pointer, then the |
1018 | // pointer's lifetime ends before anything that writes to arbitrary memory. |
1019 | // Also, a readonly/readnone pointer may be returned, but returning a |
1020 | // pointer is capturing it. |
1021 | |
1022 | auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { |
1023 | if (A == B) |
1024 | return A; |
1025 | if (A == Attribute::ReadNone) |
1026 | return B; |
1027 | if (B == Attribute::ReadNone) |
1028 | return A; |
1029 | return Attribute::None; |
1030 | }; |
1031 | |
1032 | Attribute::AttrKind AccessAttr = Attribute::ReadNone; |
1033 | for (ArgumentGraphNode *N : ArgumentSCC) { |
1034 | Argument *A = N->Definition; |
1035 | Attribute::AttrKind K = determinePointerAccessAttrs(A, SCCNodes: ArgumentSCCNodes); |
1036 | AccessAttr = meetAccessAttr(AccessAttr, K); |
1037 | if (AccessAttr == Attribute::None) |
1038 | break; |
1039 | } |
1040 | |
1041 | if (AccessAttr != Attribute::None) { |
1042 | for (ArgumentGraphNode *N : ArgumentSCC) { |
1043 | Argument *A = N->Definition; |
1044 | if (addAccessAttr(A, R: AccessAttr)) |
1045 | Changed.insert(Ptr: A->getParent()); |
1046 | } |
1047 | } |
1048 | } |
1049 | } |
1050 | |
1051 | /// Tests whether a function is "malloc-like". |
1052 | /// |
1053 | /// A function is "malloc-like" if it returns either null or a pointer that |
1054 | /// doesn't alias any other pointer visible to the caller. |
1055 | static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { |
1056 | SmallSetVector<Value *, 8> FlowsToReturn; |
1057 | for (BasicBlock &BB : *F) |
1058 | if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) |
1059 | FlowsToReturn.insert(X: Ret->getReturnValue()); |
1060 | |
1061 | for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { |
1062 | Value *RetVal = FlowsToReturn[i]; |
1063 | |
1064 | if (Constant *C = dyn_cast<Constant>(Val: RetVal)) { |
1065 | if (!C->isNullValue() && !isa<UndefValue>(Val: C)) |
1066 | return false; |
1067 | |
1068 | continue; |
1069 | } |
1070 | |
1071 | if (isa<Argument>(Val: RetVal)) |
1072 | return false; |
1073 | |
1074 | if (Instruction *RVI = dyn_cast<Instruction>(Val: RetVal)) |
1075 | switch (RVI->getOpcode()) { |
1076 | // Extend the analysis by looking upwards. |
1077 | case Instruction::BitCast: |
1078 | case Instruction::GetElementPtr: |
1079 | case Instruction::AddrSpaceCast: |
1080 | FlowsToReturn.insert(X: RVI->getOperand(i: 0)); |
1081 | continue; |
1082 | case Instruction::Select: { |
1083 | SelectInst *SI = cast<SelectInst>(Val: RVI); |
1084 | FlowsToReturn.insert(X: SI->getTrueValue()); |
1085 | FlowsToReturn.insert(X: SI->getFalseValue()); |
1086 | continue; |
1087 | } |
1088 | case Instruction::PHI: { |
1089 | PHINode *PN = cast<PHINode>(Val: RVI); |
1090 | for (Value *IncValue : PN->incoming_values()) |
1091 | FlowsToReturn.insert(X: IncValue); |
1092 | continue; |
1093 | } |
1094 | |
1095 | // Check whether the pointer came from an allocation. |
1096 | case Instruction::Alloca: |
1097 | break; |
1098 | case Instruction::Call: |
1099 | case Instruction::Invoke: { |
1100 | CallBase &CB = cast<CallBase>(Val&: *RVI); |
1101 | if (CB.hasRetAttr(Kind: Attribute::NoAlias)) |
1102 | break; |
1103 | if (CB.getCalledFunction() && SCCNodes.count(key: CB.getCalledFunction())) |
1104 | break; |
1105 | [[fallthrough]]; |
1106 | } |
1107 | default: |
1108 | return false; // Did not come from an allocation. |
1109 | } |
1110 | |
1111 | if (PointerMayBeCaptured(V: RetVal, ReturnCaptures: false, /*StoreCaptures=*/false)) |
1112 | return false; |
1113 | } |
1114 | |
1115 | return true; |
1116 | } |
1117 | |
1118 | /// Deduce noalias attributes for the SCC. |
1119 | static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, |
1120 | SmallSet<Function *, 8> &Changed) { |
1121 | // Check each function in turn, determining which functions return noalias |
1122 | // pointers. |
1123 | for (Function *F : SCCNodes) { |
1124 | // Already noalias. |
1125 | if (F->returnDoesNotAlias()) |
1126 | continue; |
1127 | |
1128 | // We can infer and propagate function attributes only when we know that the |
1129 | // definition we'll get at link time is *exactly* the definition we see now. |
1130 | // For more details, see GlobalValue::mayBeDerefined. |
1131 | if (!F->hasExactDefinition()) |
1132 | return; |
1133 | |
1134 | // We annotate noalias return values, which are only applicable to |
1135 | // pointer types. |
1136 | if (!F->getReturnType()->isPointerTy()) |
1137 | continue; |
1138 | |
1139 | if (!isFunctionMallocLike(F, SCCNodes)) |
1140 | return; |
1141 | } |
1142 | |
1143 | for (Function *F : SCCNodes) { |
1144 | if (F->returnDoesNotAlias() || |
1145 | !F->getReturnType()->isPointerTy()) |
1146 | continue; |
1147 | |
1148 | F->setReturnDoesNotAlias(); |
1149 | ++NumNoAlias; |
1150 | Changed.insert(Ptr: F); |
1151 | } |
1152 | } |
1153 | |
1154 | /// Tests whether this function is known to not return null. |
1155 | /// |
1156 | /// Requires that the function returns a pointer. |
1157 | /// |
1158 | /// Returns true if it believes the function will not return a null, and sets |
1159 | /// \p Speculative based on whether the returned conclusion is a speculative |
1160 | /// conclusion due to SCC calls. |
1161 | static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, |
1162 | bool &Speculative) { |
1163 | assert(F->getReturnType()->isPointerTy() && |
1164 | "nonnull only meaningful on pointer types" ); |
1165 | Speculative = false; |
1166 | |
1167 | SmallSetVector<Value *, 8> FlowsToReturn; |
1168 | for (BasicBlock &BB : *F) |
1169 | if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) |
1170 | FlowsToReturn.insert(X: Ret->getReturnValue()); |
1171 | |
1172 | auto &DL = F->getDataLayout(); |
1173 | |
1174 | for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { |
1175 | Value *RetVal = FlowsToReturn[i]; |
1176 | |
1177 | // If this value is locally known to be non-null, we're good |
1178 | if (isKnownNonZero(V: RetVal, Q: DL)) |
1179 | continue; |
1180 | |
1181 | // Otherwise, we need to look upwards since we can't make any local |
1182 | // conclusions. |
1183 | Instruction *RVI = dyn_cast<Instruction>(Val: RetVal); |
1184 | if (!RVI) |
1185 | return false; |
1186 | switch (RVI->getOpcode()) { |
1187 | // Extend the analysis by looking upwards. |
1188 | case Instruction::BitCast: |
1189 | case Instruction::AddrSpaceCast: |
1190 | FlowsToReturn.insert(X: RVI->getOperand(i: 0)); |
1191 | continue; |
1192 | case Instruction::GetElementPtr: |
1193 | if (cast<GEPOperator>(Val: RVI)->isInBounds()) { |
1194 | FlowsToReturn.insert(X: RVI->getOperand(i: 0)); |
1195 | continue; |
1196 | } |
1197 | return false; |
1198 | case Instruction::Select: { |
1199 | SelectInst *SI = cast<SelectInst>(Val: RVI); |
1200 | FlowsToReturn.insert(X: SI->getTrueValue()); |
1201 | FlowsToReturn.insert(X: SI->getFalseValue()); |
1202 | continue; |
1203 | } |
1204 | case Instruction::PHI: { |
1205 | PHINode *PN = cast<PHINode>(Val: RVI); |
1206 | for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
1207 | FlowsToReturn.insert(X: PN->getIncomingValue(i)); |
1208 | continue; |
1209 | } |
1210 | case Instruction::Call: |
1211 | case Instruction::Invoke: { |
1212 | CallBase &CB = cast<CallBase>(Val&: *RVI); |
1213 | Function *Callee = CB.getCalledFunction(); |
1214 | // A call to a node within the SCC is assumed to return null until |
1215 | // proven otherwise |
1216 | if (Callee && SCCNodes.count(key: Callee)) { |
1217 | Speculative = true; |
1218 | continue; |
1219 | } |
1220 | return false; |
1221 | } |
1222 | default: |
1223 | return false; // Unknown source, may be null |
1224 | }; |
1225 | llvm_unreachable("should have either continued or returned" ); |
1226 | } |
1227 | |
1228 | return true; |
1229 | } |
1230 | |
1231 | /// Deduce nonnull attributes for the SCC. |
1232 | static void addNonNullAttrs(const SCCNodeSet &SCCNodes, |
1233 | SmallSet<Function *, 8> &Changed) { |
1234 | // Speculative that all functions in the SCC return only nonnull |
1235 | // pointers. We may refute this as we analyze functions. |
1236 | bool SCCReturnsNonNull = true; |
1237 | |
1238 | // Check each function in turn, determining which functions return nonnull |
1239 | // pointers. |
1240 | for (Function *F : SCCNodes) { |
1241 | // Already nonnull. |
1242 | if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull)) |
1243 | continue; |
1244 | |
1245 | // We can infer and propagate function attributes only when we know that the |
1246 | // definition we'll get at link time is *exactly* the definition we see now. |
1247 | // For more details, see GlobalValue::mayBeDerefined. |
1248 | if (!F->hasExactDefinition()) |
1249 | return; |
1250 | |
1251 | // We annotate nonnull return values, which are only applicable to |
1252 | // pointer types. |
1253 | if (!F->getReturnType()->isPointerTy()) |
1254 | continue; |
1255 | |
1256 | bool Speculative = false; |
1257 | if (isReturnNonNull(F, SCCNodes, Speculative)) { |
1258 | if (!Speculative) { |
1259 | // Mark the function eagerly since we may discover a function |
1260 | // which prevents us from speculating about the entire SCC |
1261 | LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() |
1262 | << " as nonnull\n" ); |
1263 | F->addRetAttr(Kind: Attribute::NonNull); |
1264 | ++NumNonNullReturn; |
1265 | Changed.insert(Ptr: F); |
1266 | } |
1267 | continue; |
1268 | } |
1269 | // At least one function returns something which could be null, can't |
1270 | // speculate any more. |
1271 | SCCReturnsNonNull = false; |
1272 | } |
1273 | |
1274 | if (SCCReturnsNonNull) { |
1275 | for (Function *F : SCCNodes) { |
1276 | if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull) || |
1277 | !F->getReturnType()->isPointerTy()) |
1278 | continue; |
1279 | |
1280 | LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n" ); |
1281 | F->addRetAttr(Kind: Attribute::NonNull); |
1282 | ++NumNonNullReturn; |
1283 | Changed.insert(Ptr: F); |
1284 | } |
1285 | } |
1286 | } |
1287 | |
1288 | /// Deduce noundef attributes for the SCC. |
1289 | static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, |
1290 | SmallSet<Function *, 8> &Changed) { |
1291 | // Check each function in turn, determining which functions return noundef |
1292 | // values. |
1293 | for (Function *F : SCCNodes) { |
1294 | // Already noundef. |
1295 | AttributeList Attrs = F->getAttributes(); |
1296 | if (Attrs.hasRetAttr(Kind: Attribute::NoUndef)) |
1297 | continue; |
1298 | |
1299 | // We can infer and propagate function attributes only when we know that the |
1300 | // definition we'll get at link time is *exactly* the definition we see now. |
1301 | // For more details, see GlobalValue::mayBeDerefined. |
1302 | if (!F->hasExactDefinition()) |
1303 | return; |
1304 | |
1305 | // MemorySanitizer assumes that the definition and declaration of a |
1306 | // function will be consistent. A function with sanitize_memory attribute |
1307 | // should be skipped from inference. |
1308 | if (F->hasFnAttribute(Kind: Attribute::SanitizeMemory)) |
1309 | continue; |
1310 | |
1311 | if (F->getReturnType()->isVoidTy()) |
1312 | continue; |
1313 | |
1314 | const DataLayout &DL = F->getDataLayout(); |
1315 | if (all_of(Range&: *F, P: [&](BasicBlock &BB) { |
1316 | if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) { |
1317 | // TODO: perform context-sensitive analysis? |
1318 | Value *RetVal = Ret->getReturnValue(); |
1319 | if (!isGuaranteedNotToBeUndefOrPoison(V: RetVal)) |
1320 | return false; |
1321 | |
1322 | // We know the original return value is not poison now, but it |
1323 | // could still be converted to poison by another return attribute. |
1324 | // Try to explicitly re-prove the relevant attributes. |
1325 | if (Attrs.hasRetAttr(Kind: Attribute::NonNull) && |
1326 | !isKnownNonZero(V: RetVal, Q: DL)) |
1327 | return false; |
1328 | |
1329 | if (MaybeAlign Align = Attrs.getRetAlignment()) |
1330 | if (RetVal->getPointerAlignment(DL) < *Align) |
1331 | return false; |
1332 | |
1333 | Attribute Attr = Attrs.getRetAttr(Kind: Attribute::Range); |
1334 | if (Attr.isValid() && |
1335 | !Attr.getRange().contains( |
1336 | CR: computeConstantRange(V: RetVal, /*ForSigned=*/false))) |
1337 | return false; |
1338 | } |
1339 | return true; |
1340 | })) { |
1341 | F->addRetAttr(Kind: Attribute::NoUndef); |
1342 | ++NumNoUndefReturn; |
1343 | Changed.insert(Ptr: F); |
1344 | } |
1345 | } |
1346 | } |
1347 | |
1348 | namespace { |
1349 | |
1350 | /// Collects a set of attribute inference requests and performs them all in one |
1351 | /// go on a single SCC Node. Inference involves scanning function bodies |
1352 | /// looking for instructions that violate attribute assumptions. |
1353 | /// As soon as all the bodies are fine we are free to set the attribute. |
1354 | /// Customization of inference for individual attributes is performed by |
1355 | /// providing a handful of predicates for each attribute. |
1356 | class AttributeInferer { |
1357 | public: |
1358 | /// Describes a request for inference of a single attribute. |
1359 | struct InferenceDescriptor { |
1360 | |
1361 | /// Returns true if this function does not have to be handled. |
1362 | /// General intent for this predicate is to provide an optimization |
1363 | /// for functions that do not need this attribute inference at all |
1364 | /// (say, for functions that already have the attribute). |
1365 | std::function<bool(const Function &)> SkipFunction; |
1366 | |
1367 | /// Returns true if this instruction violates attribute assumptions. |
1368 | std::function<bool(Instruction &)> InstrBreaksAttribute; |
1369 | |
1370 | /// Sets the inferred attribute for this function. |
1371 | std::function<void(Function &)> SetAttribute; |
1372 | |
1373 | /// Attribute we derive. |
1374 | Attribute::AttrKind AKind; |
1375 | |
1376 | /// If true, only "exact" definitions can be used to infer this attribute. |
1377 | /// See GlobalValue::isDefinitionExact. |
1378 | bool RequiresExactDefinition; |
1379 | |
1380 | InferenceDescriptor(Attribute::AttrKind AK, |
1381 | std::function<bool(const Function &)> SkipFunc, |
1382 | std::function<bool(Instruction &)> InstrScan, |
1383 | std::function<void(Function &)> SetAttr, |
1384 | bool ReqExactDef) |
1385 | : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), |
1386 | SetAttribute(SetAttr), AKind(AK), |
1387 | RequiresExactDefinition(ReqExactDef) {} |
1388 | }; |
1389 | |
1390 | private: |
1391 | SmallVector<InferenceDescriptor, 4> InferenceDescriptors; |
1392 | |
1393 | public: |
1394 | void registerAttrInference(InferenceDescriptor AttrInference) { |
1395 | InferenceDescriptors.push_back(Elt: AttrInference); |
1396 | } |
1397 | |
1398 | void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); |
1399 | }; |
1400 | |
1401 | /// Perform all the requested attribute inference actions according to the |
1402 | /// attribute predicates stored before. |
1403 | void AttributeInferer::run(const SCCNodeSet &SCCNodes, |
1404 | SmallSet<Function *, 8> &Changed) { |
1405 | SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; |
1406 | // Go through all the functions in SCC and check corresponding attribute |
1407 | // assumptions for each of them. Attributes that are invalid for this SCC |
1408 | // will be removed from InferInSCC. |
1409 | for (Function *F : SCCNodes) { |
1410 | |
1411 | // No attributes whose assumptions are still valid - done. |
1412 | if (InferInSCC.empty()) |
1413 | return; |
1414 | |
1415 | // Check if our attributes ever need scanning/can be scanned. |
1416 | llvm::erase_if(C&: InferInSCC, P: [F](const InferenceDescriptor &ID) { |
1417 | if (ID.SkipFunction(*F)) |
1418 | return false; |
1419 | |
1420 | // Remove from further inference (invalidate) when visiting a function |
1421 | // that has no instructions to scan/has an unsuitable definition. |
1422 | return F->isDeclaration() || |
1423 | (ID.RequiresExactDefinition && !F->hasExactDefinition()); |
1424 | }); |
1425 | |
1426 | // For each attribute still in InferInSCC that doesn't explicitly skip F, |
1427 | // set up the F instructions scan to verify assumptions of the attribute. |
1428 | SmallVector<InferenceDescriptor, 4> InferInThisFunc; |
1429 | llvm::copy_if( |
1430 | Range&: InferInSCC, Out: std::back_inserter(x&: InferInThisFunc), |
1431 | P: [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); |
1432 | |
1433 | if (InferInThisFunc.empty()) |
1434 | continue; |
1435 | |
1436 | // Start instruction scan. |
1437 | for (Instruction &I : instructions(F&: *F)) { |
1438 | llvm::erase_if(C&: InferInThisFunc, P: [&](const InferenceDescriptor &ID) { |
1439 | if (!ID.InstrBreaksAttribute(I)) |
1440 | return false; |
1441 | // Remove attribute from further inference on any other functions |
1442 | // because attribute assumptions have just been violated. |
1443 | llvm::erase_if(C&: InferInSCC, P: [&ID](const InferenceDescriptor &D) { |
1444 | return D.AKind == ID.AKind; |
1445 | }); |
1446 | // Remove attribute from the rest of current instruction scan. |
1447 | return true; |
1448 | }); |
1449 | |
1450 | if (InferInThisFunc.empty()) |
1451 | break; |
1452 | } |
1453 | } |
1454 | |
1455 | if (InferInSCC.empty()) |
1456 | return; |
1457 | |
1458 | for (Function *F : SCCNodes) |
1459 | // At this point InferInSCC contains only functions that were either: |
1460 | // - explicitly skipped from scan/inference, or |
1461 | // - verified to have no instructions that break attribute assumptions. |
1462 | // Hence we just go and force the attribute for all non-skipped functions. |
1463 | for (auto &ID : InferInSCC) { |
1464 | if (ID.SkipFunction(*F)) |
1465 | continue; |
1466 | Changed.insert(Ptr: F); |
1467 | ID.SetAttribute(*F); |
1468 | } |
1469 | } |
1470 | |
1471 | struct SCCNodesResult { |
1472 | SCCNodeSet SCCNodes; |
1473 | bool HasUnknownCall; |
1474 | }; |
1475 | |
1476 | } // end anonymous namespace |
1477 | |
1478 | /// Helper for non-Convergent inference predicate InstrBreaksAttribute. |
1479 | static bool InstrBreaksNonConvergent(Instruction &I, |
1480 | const SCCNodeSet &SCCNodes) { |
1481 | const CallBase *CB = dyn_cast<CallBase>(Val: &I); |
1482 | // Breaks non-convergent assumption if CS is a convergent call to a function |
1483 | // not in the SCC. |
1484 | return CB && CB->isConvergent() && |
1485 | !SCCNodes.contains(key: CB->getCalledFunction()); |
1486 | } |
1487 | |
1488 | /// Helper for NoUnwind inference predicate InstrBreaksAttribute. |
1489 | static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { |
1490 | if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) |
1491 | return false; |
1492 | if (const auto *CI = dyn_cast<CallInst>(Val: &I)) { |
1493 | if (Function *Callee = CI->getCalledFunction()) { |
1494 | // I is a may-throw call to a function inside our SCC. This doesn't |
1495 | // invalidate our current working assumption that the SCC is no-throw; we |
1496 | // just have to scan that other function. |
1497 | if (SCCNodes.contains(key: Callee)) |
1498 | return false; |
1499 | } |
1500 | } |
1501 | return true; |
1502 | } |
1503 | |
1504 | /// Helper for NoFree inference predicate InstrBreaksAttribute. |
1505 | static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { |
1506 | CallBase *CB = dyn_cast<CallBase>(Val: &I); |
1507 | if (!CB) |
1508 | return false; |
1509 | |
1510 | if (CB->hasFnAttr(Kind: Attribute::NoFree)) |
1511 | return false; |
1512 | |
1513 | // Speculatively assume in SCC. |
1514 | if (Function *Callee = CB->getCalledFunction()) |
1515 | if (SCCNodes.contains(key: Callee)) |
1516 | return false; |
1517 | |
1518 | return true; |
1519 | } |
1520 | |
1521 | // Return true if this is an atomic which has an ordering stronger than |
1522 | // unordered. Note that this is different than the predicate we use in |
1523 | // Attributor. Here we chose to be conservative and consider monotonic |
1524 | // operations potentially synchronizing. We generally don't do much with |
1525 | // monotonic operations, so this is simply risk reduction. |
1526 | static bool isOrderedAtomic(Instruction *I) { |
1527 | if (!I->isAtomic()) |
1528 | return false; |
1529 | |
1530 | if (auto *FI = dyn_cast<FenceInst>(Val: I)) |
1531 | // All legal orderings for fence are stronger than monotonic. |
1532 | return FI->getSyncScopeID() != SyncScope::SingleThread; |
1533 | else if (isa<AtomicCmpXchgInst>(Val: I) || isa<AtomicRMWInst>(Val: I)) |
1534 | return true; |
1535 | else if (auto *SI = dyn_cast<StoreInst>(Val: I)) |
1536 | return !SI->isUnordered(); |
1537 | else if (auto *LI = dyn_cast<LoadInst>(Val: I)) |
1538 | return !LI->isUnordered(); |
1539 | else { |
1540 | llvm_unreachable("unknown atomic instruction?" ); |
1541 | } |
1542 | } |
1543 | |
1544 | static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { |
1545 | // Volatile may synchronize |
1546 | if (I.isVolatile()) |
1547 | return true; |
1548 | |
1549 | // An ordered atomic may synchronize. (See comment about on monotonic.) |
1550 | if (isOrderedAtomic(I: &I)) |
1551 | return true; |
1552 | |
1553 | auto *CB = dyn_cast<CallBase>(Val: &I); |
1554 | if (!CB) |
1555 | // Non call site cases covered by the two checks above |
1556 | return false; |
1557 | |
1558 | if (CB->hasFnAttr(Kind: Attribute::NoSync)) |
1559 | return false; |
1560 | |
1561 | // Non volatile memset/memcpy/memmoves are nosync |
1562 | // NOTE: Only intrinsics with volatile flags should be handled here. All |
1563 | // others should be marked in Intrinsics.td. |
1564 | if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) |
1565 | if (!MI->isVolatile()) |
1566 | return false; |
1567 | |
1568 | // Speculatively assume in SCC. |
1569 | if (Function *Callee = CB->getCalledFunction()) |
1570 | if (SCCNodes.contains(key: Callee)) |
1571 | return false; |
1572 | |
1573 | return true; |
1574 | } |
1575 | |
1576 | /// Attempt to remove convergent function attribute when possible. |
1577 | /// |
1578 | /// Returns true if any changes to function attributes were made. |
1579 | static void inferConvergent(const SCCNodeSet &SCCNodes, |
1580 | SmallSet<Function *, 8> &Changed) { |
1581 | AttributeInferer AI; |
1582 | |
1583 | // Request to remove the convergent attribute from all functions in the SCC |
1584 | // if every callsite within the SCC is not convergent (except for calls |
1585 | // to functions within the SCC). |
1586 | // Note: Removal of the attr from the callsites will happen in |
1587 | // InstCombineCalls separately. |
1588 | AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{ |
1589 | Attribute::Convergent, |
1590 | // Skip non-convergent functions. |
1591 | [](const Function &F) { return !F.isConvergent(); }, |
1592 | // Instructions that break non-convergent assumption. |
1593 | [SCCNodes](Instruction &I) { |
1594 | return InstrBreaksNonConvergent(I, SCCNodes); |
1595 | }, |
1596 | [](Function &F) { |
1597 | LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() |
1598 | << "\n" ); |
1599 | F.setNotConvergent(); |
1600 | }, |
1601 | /* RequiresExactDefinition= */ false}); |
1602 | // Perform all the requested attribute inference actions. |
1603 | AI.run(SCCNodes, Changed); |
1604 | } |
1605 | |
1606 | /// Infer attributes from all functions in the SCC by scanning every |
1607 | /// instruction for compliance to the attribute assumptions. |
1608 | /// |
1609 | /// Returns true if any changes to function attributes were made. |
1610 | static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, |
1611 | SmallSet<Function *, 8> &Changed) { |
1612 | AttributeInferer AI; |
1613 | |
1614 | if (!DisableNoUnwindInference) |
1615 | // Request to infer nounwind attribute for all the functions in the SCC if |
1616 | // every callsite within the SCC is not throwing (except for calls to |
1617 | // functions within the SCC). Note that nounwind attribute suffers from |
1618 | // derefinement - results may change depending on how functions are |
1619 | // optimized. Thus it can be inferred only from exact definitions. |
1620 | AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{ |
1621 | Attribute::NoUnwind, |
1622 | // Skip non-throwing functions. |
1623 | [](const Function &F) { return F.doesNotThrow(); }, |
1624 | // Instructions that break non-throwing assumption. |
1625 | [&SCCNodes](Instruction &I) { |
1626 | return InstrBreaksNonThrowing(I, SCCNodes); |
1627 | }, |
1628 | [](Function &F) { |
1629 | LLVM_DEBUG(dbgs() |
1630 | << "Adding nounwind attr to fn " << F.getName() << "\n" ); |
1631 | F.setDoesNotThrow(); |
1632 | ++NumNoUnwind; |
1633 | }, |
1634 | /* RequiresExactDefinition= */ true}); |
1635 | |
1636 | if (!DisableNoFreeInference) |
1637 | // Request to infer nofree attribute for all the functions in the SCC if |
1638 | // every callsite within the SCC does not directly or indirectly free |
1639 | // memory (except for calls to functions within the SCC). Note that nofree |
1640 | // attribute suffers from derefinement - results may change depending on |
1641 | // how functions are optimized. Thus it can be inferred only from exact |
1642 | // definitions. |
1643 | AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{ |
1644 | Attribute::NoFree, |
1645 | // Skip functions known not to free memory. |
1646 | [](const Function &F) { return F.doesNotFreeMemory(); }, |
1647 | // Instructions that break non-deallocating assumption. |
1648 | [&SCCNodes](Instruction &I) { |
1649 | return InstrBreaksNoFree(I, SCCNodes); |
1650 | }, |
1651 | [](Function &F) { |
1652 | LLVM_DEBUG(dbgs() |
1653 | << "Adding nofree attr to fn " << F.getName() << "\n" ); |
1654 | F.setDoesNotFreeMemory(); |
1655 | ++NumNoFree; |
1656 | }, |
1657 | /* RequiresExactDefinition= */ true}); |
1658 | |
1659 | AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{ |
1660 | Attribute::NoSync, |
1661 | // Skip already marked functions. |
1662 | [](const Function &F) { return F.hasNoSync(); }, |
1663 | // Instructions that break nosync assumption. |
1664 | [&SCCNodes](Instruction &I) { |
1665 | return InstrBreaksNoSync(I, SCCNodes); |
1666 | }, |
1667 | [](Function &F) { |
1668 | LLVM_DEBUG(dbgs() |
1669 | << "Adding nosync attr to fn " << F.getName() << "\n" ); |
1670 | F.setNoSync(); |
1671 | ++NumNoSync; |
1672 | }, |
1673 | /* RequiresExactDefinition= */ true}); |
1674 | |
1675 | // Perform all the requested attribute inference actions. |
1676 | AI.run(SCCNodes, Changed); |
1677 | } |
1678 | |
1679 | static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, |
1680 | SmallSet<Function *, 8> &Changed) { |
1681 | // Try and identify functions that do not recurse. |
1682 | |
1683 | // If the SCC contains multiple nodes we know for sure there is recursion. |
1684 | if (SCCNodes.size() != 1) |
1685 | return; |
1686 | |
1687 | Function *F = *SCCNodes.begin(); |
1688 | if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) |
1689 | return; |
1690 | |
1691 | // If all of the calls in F are identifiable and are to norecurse functions, F |
1692 | // is norecurse. This check also detects self-recursion as F is not currently |
1693 | // marked norecurse, so any called from F to F will not be marked norecurse. |
1694 | for (auto &BB : *F) |
1695 | for (auto &I : BB.instructionsWithoutDebug()) |
1696 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) { |
1697 | Function *Callee = CB->getCalledFunction(); |
1698 | if (!Callee || Callee == F || |
1699 | (!Callee->doesNotRecurse() && |
1700 | !(Callee->isDeclaration() && |
1701 | Callee->hasFnAttribute(Kind: Attribute::NoCallback)))) |
1702 | // Function calls a potentially recursive function. |
1703 | return; |
1704 | } |
1705 | |
1706 | // Every call was to a non-recursive function other than this function, and |
1707 | // we have no indirect recursion as the SCC size is one. This function cannot |
1708 | // recurse. |
1709 | F->setDoesNotRecurse(); |
1710 | ++NumNoRecurse; |
1711 | Changed.insert(Ptr: F); |
1712 | } |
1713 | |
1714 | static bool instructionDoesNotReturn(Instruction &I) { |
1715 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) |
1716 | return CB->hasFnAttr(Kind: Attribute::NoReturn); |
1717 | return false; |
1718 | } |
1719 | |
1720 | // A basic block can only return if it terminates with a ReturnInst and does not |
1721 | // contain calls to noreturn functions. |
1722 | static bool basicBlockCanReturn(BasicBlock &BB) { |
1723 | if (!isa<ReturnInst>(Val: BB.getTerminator())) |
1724 | return false; |
1725 | return none_of(Range&: BB, P: instructionDoesNotReturn); |
1726 | } |
1727 | |
1728 | // FIXME: this doesn't handle recursion. |
1729 | static bool canReturn(Function &F) { |
1730 | SmallVector<BasicBlock *, 16> Worklist; |
1731 | SmallPtrSet<BasicBlock *, 16> Visited; |
1732 | |
1733 | Visited.insert(Ptr: &F.front()); |
1734 | Worklist.push_back(Elt: &F.front()); |
1735 | |
1736 | do { |
1737 | BasicBlock *BB = Worklist.pop_back_val(); |
1738 | if (basicBlockCanReturn(BB&: *BB)) |
1739 | return true; |
1740 | for (BasicBlock *Succ : successors(BB)) |
1741 | if (Visited.insert(Ptr: Succ).second) |
1742 | Worklist.push_back(Elt: Succ); |
1743 | } while (!Worklist.empty()); |
1744 | |
1745 | return false; |
1746 | } |
1747 | |
1748 | // Set the noreturn function attribute if possible. |
1749 | static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, |
1750 | SmallSet<Function *, 8> &Changed) { |
1751 | for (Function *F : SCCNodes) { |
1752 | if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) || |
1753 | F->doesNotReturn()) |
1754 | continue; |
1755 | |
1756 | if (!canReturn(F&: *F)) { |
1757 | F->setDoesNotReturn(); |
1758 | Changed.insert(Ptr: F); |
1759 | } |
1760 | } |
1761 | } |
1762 | |
1763 | static bool functionWillReturn(const Function &F) { |
1764 | // We can infer and propagate function attributes only when we know that the |
1765 | // definition we'll get at link time is *exactly* the definition we see now. |
1766 | // For more details, see GlobalValue::mayBeDerefined. |
1767 | if (!F.hasExactDefinition()) |
1768 | return false; |
1769 | |
1770 | // Must-progress function without side-effects must return. |
1771 | if (F.mustProgress() && F.onlyReadsMemory()) |
1772 | return true; |
1773 | |
1774 | // Can only analyze functions with a definition. |
1775 | if (F.isDeclaration()) |
1776 | return false; |
1777 | |
1778 | // Functions with loops require more sophisticated analysis, as the loop |
1779 | // may be infinite. For now, don't try to handle them. |
1780 | SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; |
1781 | FindFunctionBackedges(F, Result&: Backedges); |
1782 | if (!Backedges.empty()) |
1783 | return false; |
1784 | |
1785 | // If there are no loops, then the function is willreturn if all calls in |
1786 | // it are willreturn. |
1787 | return all_of(Range: instructions(F), P: [](const Instruction &I) { |
1788 | return I.willReturn(); |
1789 | }); |
1790 | } |
1791 | |
1792 | // Set the willreturn function attribute if possible. |
1793 | static void addWillReturn(const SCCNodeSet &SCCNodes, |
1794 | SmallSet<Function *, 8> &Changed) { |
1795 | for (Function *F : SCCNodes) { |
1796 | if (!F || F->willReturn() || !functionWillReturn(F: *F)) |
1797 | continue; |
1798 | |
1799 | F->setWillReturn(); |
1800 | NumWillReturn++; |
1801 | Changed.insert(Ptr: F); |
1802 | } |
1803 | } |
1804 | |
1805 | static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { |
1806 | SCCNodesResult Res; |
1807 | Res.HasUnknownCall = false; |
1808 | for (Function *F : Functions) { |
1809 | if (!F || F->hasOptNone() || F->hasFnAttribute(Kind: Attribute::Naked) || |
1810 | F->isPresplitCoroutine()) { |
1811 | // Treat any function we're trying not to optimize as if it were an |
1812 | // indirect call and omit it from the node set used below. |
1813 | Res.HasUnknownCall = true; |
1814 | continue; |
1815 | } |
1816 | // Track whether any functions in this SCC have an unknown call edge. |
1817 | // Note: if this is ever a performance hit, we can common it with |
1818 | // subsequent routines which also do scans over the instructions of the |
1819 | // function. |
1820 | if (!Res.HasUnknownCall) { |
1821 | for (Instruction &I : instructions(F&: *F)) { |
1822 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) { |
1823 | if (!CB->getCalledFunction()) { |
1824 | Res.HasUnknownCall = true; |
1825 | break; |
1826 | } |
1827 | } |
1828 | } |
1829 | } |
1830 | Res.SCCNodes.insert(X: F); |
1831 | } |
1832 | return Res; |
1833 | } |
1834 | |
1835 | template <typename AARGetterT> |
1836 | static SmallSet<Function *, 8> |
1837 | deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, |
1838 | bool ArgAttrsOnly) { |
1839 | SCCNodesResult Nodes = createSCCNodeSet(Functions); |
1840 | |
1841 | // Bail if the SCC only contains optnone functions. |
1842 | if (Nodes.SCCNodes.empty()) |
1843 | return {}; |
1844 | |
1845 | SmallSet<Function *, 8> Changed; |
1846 | if (ArgAttrsOnly) { |
1847 | addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1848 | return Changed; |
1849 | } |
1850 | |
1851 | addArgumentReturnedAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1852 | addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); |
1853 | addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1854 | inferConvergent(SCCNodes: Nodes.SCCNodes, Changed); |
1855 | addNoReturnAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1856 | addWillReturn(SCCNodes: Nodes.SCCNodes, Changed); |
1857 | addNoUndefAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1858 | |
1859 | // If we have no external nodes participating in the SCC, we can deduce some |
1860 | // more precise attributes as well. |
1861 | if (!Nodes.HasUnknownCall) { |
1862 | addNoAliasAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1863 | addNonNullAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1864 | inferAttrsFromFunctionBodies(SCCNodes: Nodes.SCCNodes, Changed); |
1865 | addNoRecurseAttrs(SCCNodes: Nodes.SCCNodes, Changed); |
1866 | } |
1867 | |
1868 | // Finally, infer the maximal set of attributes from the ones we've inferred |
1869 | // above. This is handling the cases where one attribute on a signature |
1870 | // implies another, but for implementation reasons the inference rule for |
1871 | // the later is missing (or simply less sophisticated). |
1872 | for (Function *F : Nodes.SCCNodes) |
1873 | if (F) |
1874 | if (inferAttributesFromOthers(F&: *F)) |
1875 | Changed.insert(Ptr: F); |
1876 | |
1877 | return Changed; |
1878 | } |
1879 | |
1880 | PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, |
1881 | CGSCCAnalysisManager &AM, |
1882 | LazyCallGraph &CG, |
1883 | CGSCCUpdateResult &) { |
1884 | // Skip non-recursive functions if requested. |
1885 | // Only infer argument attributes for non-recursive functions, because |
1886 | // it can affect optimization behavior in conjunction with noalias. |
1887 | bool ArgAttrsOnly = false; |
1888 | if (C.size() == 1 && SkipNonRecursive) { |
1889 | LazyCallGraph::Node &N = *C.begin(); |
1890 | if (!N->lookup(N)) |
1891 | ArgAttrsOnly = true; |
1892 | } |
1893 | |
1894 | FunctionAnalysisManager &FAM = |
1895 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager(); |
1896 | |
1897 | // We pass a lambda into functions to wire them up to the analysis manager |
1898 | // for getting function analyses. |
1899 | auto AARGetter = [&](Function &F) -> AAResults & { |
1900 | return FAM.getResult<AAManager>(IR&: F); |
1901 | }; |
1902 | |
1903 | SmallVector<Function *, 8> Functions; |
1904 | for (LazyCallGraph::Node &N : C) { |
1905 | Functions.push_back(Elt: &N.getFunction()); |
1906 | } |
1907 | |
1908 | auto ChangedFunctions = |
1909 | deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); |
1910 | if (ChangedFunctions.empty()) |
1911 | return PreservedAnalyses::all(); |
1912 | |
1913 | // Invalidate analyses for modified functions so that we don't have to |
1914 | // invalidate all analyses for all functions in this SCC. |
1915 | PreservedAnalyses FuncPA; |
1916 | // We haven't changed the CFG for modified functions. |
1917 | FuncPA.preserveSet<CFGAnalyses>(); |
1918 | for (Function *Changed : ChangedFunctions) { |
1919 | FAM.invalidate(IR&: *Changed, PA: FuncPA); |
1920 | // Also invalidate any direct callers of changed functions since analyses |
1921 | // may care about attributes of direct callees. For example, MemorySSA cares |
1922 | // about whether or not a call's callee modifies memory and queries that |
1923 | // through function attributes. |
1924 | for (auto *U : Changed->users()) { |
1925 | if (auto *Call = dyn_cast<CallBase>(Val: U)) { |
1926 | if (Call->getCalledFunction() == Changed) |
1927 | FAM.invalidate(IR&: *Call->getFunction(), PA: FuncPA); |
1928 | } |
1929 | } |
1930 | } |
1931 | |
1932 | PreservedAnalyses PA; |
1933 | // We have not added or removed functions. |
1934 | PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); |
1935 | // We already invalidated all relevant function analyses above. |
1936 | PA.preserveSet<AllAnalysesOn<Function>>(); |
1937 | return PA; |
1938 | } |
1939 | |
1940 | void PostOrderFunctionAttrsPass::printPipeline( |
1941 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
1942 | static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( |
1943 | OS, MapClassName2PassName); |
1944 | if (SkipNonRecursive) |
1945 | OS << "<skip-non-recursive-function-attrs>" ; |
1946 | } |
1947 | |
1948 | template <typename AARGetterT> |
1949 | static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { |
1950 | SmallVector<Function *, 8> Functions; |
1951 | for (CallGraphNode *I : SCC) { |
1952 | Functions.push_back(Elt: I->getFunction()); |
1953 | } |
1954 | |
1955 | return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); |
1956 | } |
1957 | |
1958 | static bool addNoRecurseAttrsTopDown(Function &F) { |
1959 | // We check the preconditions for the function prior to calling this to avoid |
1960 | // the cost of building up a reversible post-order list. We assert them here |
1961 | // to make sure none of the invariants this relies on were violated. |
1962 | assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!" ); |
1963 | assert(!F.doesNotRecurse() && |
1964 | "This function has already been deduced as norecurs!" ); |
1965 | assert(F.hasInternalLinkage() && |
1966 | "Can only do top-down deduction for internal linkage functions!" ); |
1967 | |
1968 | // If F is internal and all of its uses are calls from a non-recursive |
1969 | // functions, then none of its calls could in fact recurse without going |
1970 | // through a function marked norecurse, and so we can mark this function too |
1971 | // as norecurse. Note that the uses must actually be calls -- otherwise |
1972 | // a pointer to this function could be returned from a norecurse function but |
1973 | // this function could be recursively (indirectly) called. Note that this |
1974 | // also detects if F is directly recursive as F is not yet marked as |
1975 | // a norecurse function. |
1976 | for (auto &U : F.uses()) { |
1977 | auto *I = dyn_cast<Instruction>(Val: U.getUser()); |
1978 | if (!I) |
1979 | return false; |
1980 | CallBase *CB = dyn_cast<CallBase>(Val: I); |
1981 | if (!CB || !CB->isCallee(U: &U) || |
1982 | !CB->getParent()->getParent()->doesNotRecurse()) |
1983 | return false; |
1984 | } |
1985 | F.setDoesNotRecurse(); |
1986 | ++NumNoRecurse; |
1987 | return true; |
1988 | } |
1989 | |
1990 | static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { |
1991 | // We only have a post-order SCC traversal (because SCCs are inherently |
1992 | // discovered in post-order), so we accumulate them in a vector and then walk |
1993 | // it in reverse. This is simpler than using the RPO iterator infrastructure |
1994 | // because we need to combine SCC detection and the PO walk of the call |
1995 | // graph. We can also cheat egregiously because we're primarily interested in |
1996 | // synthesizing norecurse and so we can only save the singular SCCs as SCCs |
1997 | // with multiple functions in them will clearly be recursive. |
1998 | |
1999 | SmallVector<Function *, 16> Worklist; |
2000 | CG.buildRefSCCs(); |
2001 | for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { |
2002 | for (LazyCallGraph::SCC &SCC : RC) { |
2003 | if (SCC.size() != 1) |
2004 | continue; |
2005 | Function &F = SCC.begin()->getFunction(); |
2006 | if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) |
2007 | Worklist.push_back(Elt: &F); |
2008 | } |
2009 | } |
2010 | bool Changed = false; |
2011 | for (auto *F : llvm::reverse(C&: Worklist)) |
2012 | Changed |= addNoRecurseAttrsTopDown(F&: *F); |
2013 | |
2014 | return Changed; |
2015 | } |
2016 | |
2017 | PreservedAnalyses |
2018 | ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { |
2019 | auto &CG = AM.getResult<LazyCallGraphAnalysis>(IR&: M); |
2020 | |
2021 | if (!deduceFunctionAttributeInRPO(M, CG)) |
2022 | return PreservedAnalyses::all(); |
2023 | |
2024 | PreservedAnalyses PA; |
2025 | PA.preserve<LazyCallGraphAnalysis>(); |
2026 | return PA; |
2027 | } |
2028 | |