1//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements interprocedural passes which walk the
11/// call-graph deducing and/or propagating function attributes.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/FunctionAttrs.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SCCIterator.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/BasicAliasAnalysis.h"
27#include "llvm/Analysis/CFG.h"
28#include "llvm/Analysis/CGSCCPassManager.h"
29#include "llvm/Analysis/CallGraph.h"
30#include "llvm/Analysis/CallGraphSCCPass.h"
31#include "llvm/Analysis/CaptureTracking.h"
32#include "llvm/Analysis/LazyCallGraph.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/ConstantRangeList.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Instructions.h"
46#include "llvm/IR/IntrinsicInst.h"
47#include "llvm/IR/Metadata.h"
48#include "llvm/IR/ModuleSummaryIndex.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
54#include "llvm/Support/Casting.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Compiler.h"
57#include "llvm/Support/Debug.h"
58#include "llvm/Support/ErrorHandling.h"
59#include "llvm/Support/raw_ostream.h"
60#include "llvm/Transforms/IPO.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include <cassert>
63#include <iterator>
64#include <map>
65#include <optional>
66#include <vector>
67
68using namespace llvm;
69
70#define DEBUG_TYPE "function-attrs"
71
72STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
73STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)");
74STATISTIC(NumCapturesPartial, "Number of arguments marked with captures "
75 "attribute other than captures(none)");
76STATISTIC(NumReturned, "Number of arguments marked returned");
77STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
78STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
79STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
80STATISTIC(NumNoAlias, "Number of function returns marked noalias");
81STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
82STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
83STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
84STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
85STATISTIC(NumNoFree, "Number of functions marked as nofree");
86STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
87STATISTIC(NumNoSync, "Number of functions marked as nosync");
88STATISTIC(NumCold, "Number of functions marked as cold");
89
90STATISTIC(NumThinLinkNoRecurse,
91 "Number of functions marked as norecurse during thinlink");
92STATISTIC(NumThinLinkNoUnwind,
93 "Number of functions marked as nounwind during thinlink");
94
95static cl::opt<bool> EnableNonnullArgPropagation(
96 "enable-nonnull-arg-prop", cl::init(Val: true), cl::Hidden,
97 cl::desc("Try to propagate nonnull argument attributes from callsites to "
98 "caller functions."));
99
100static cl::opt<bool> DisableNoUnwindInference(
101 "disable-nounwind-inference", cl::Hidden,
102 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
103
104static cl::opt<bool> DisableNoFreeInference(
105 "disable-nofree-inference", cl::Hidden,
106 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
107
108static cl::opt<bool> DisableThinLTOPropagation(
109 "disable-thinlto-funcattrs", cl::init(Val: true), cl::Hidden,
110 cl::desc("Don't propagate function-attrs in thinLTO"));
111
112static void addCapturesStat(CaptureInfo CI) {
113 if (capturesNothing(CC: CI))
114 ++NumCapturesNone;
115 else
116 ++NumCapturesPartial;
117}
118
119namespace {
120
121using SCCNodeSet = SmallSetVector<Function *, 8>;
122
123} // end anonymous namespace
124
125static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
126 ModRefInfo MR, AAResults &AAR) {
127 // Ignore accesses to known-invariant or local memory.
128 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/IgnoreLocals: true);
129 if (isNoModRef(MRI: MR))
130 return;
131
132 const Value *UO = getUnderlyingObjectAggressive(V: Loc.Ptr);
133 if (isa<AllocaInst>(Val: UO))
134 return;
135 if (isa<Argument>(Val: UO)) {
136 ME |= MemoryEffects::argMemOnly(MR);
137 return;
138 }
139
140 // If it's not an identified object, it might be an argument.
141 if (!isIdentifiedObject(V: UO))
142 ME |= MemoryEffects::argMemOnly(MR);
143 ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR);
144 ME |= MemoryEffects(IRMemLocation::Other, MR);
145}
146
147static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
148 ModRefInfo ArgMR, AAResults &AAR) {
149 for (const Value *Arg : Call->args()) {
150 if (!Arg->getType()->isPtrOrPtrVectorTy())
151 continue;
152
153 addLocAccess(ME,
154 Loc: MemoryLocation::getBeforeOrAfter(Ptr: Arg, AATags: Call->getAAMetadata()),
155 MR: ArgMR, AAR);
156 }
157}
158
159/// Returns the memory access attribute for function F using AAR for AA results,
160/// where SCCNodes is the current SCC.
161///
162/// If ThisBody is true, this function may examine the function body and will
163/// return a result pertaining to this copy of the function. If it is false, the
164/// result will be based only on AA results for the function declaration; it
165/// will be assumed that some other (perhaps less optimized) version of the
166/// function may be selected at link time.
167///
168/// The return value is split into two parts: Memory effects that always apply,
169/// and additional memory effects that apply if any of the functions in the SCC
170/// can access argmem.
171static std::pair<MemoryEffects, MemoryEffects>
172checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
173 const SCCNodeSet &SCCNodes) {
174 MemoryEffects OrigME = AAR.getMemoryEffects(F: &F);
175 if (OrigME.doesNotAccessMemory())
176 // Already perfect!
177 return {OrigME, MemoryEffects::none()};
178
179 if (!ThisBody)
180 return {OrigME, MemoryEffects::none()};
181
182 MemoryEffects ME = MemoryEffects::none();
183 // Additional locations accessed if the SCC accesses argmem.
184 MemoryEffects RecursiveArgME = MemoryEffects::none();
185
186 // Inalloca and preallocated arguments are always clobbered by the call.
187 if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
188 F.getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
189 ME |= MemoryEffects::argMemOnly(MR: ModRefInfo::ModRef);
190
191 // Scan the function body for instructions that may read or write memory.
192 for (Instruction &I : instructions(F)) {
193 // Some instructions can be ignored even if they read or write memory.
194 // Detect these now, skipping to the next instruction if one is found.
195 if (auto *Call = dyn_cast<CallBase>(Val: &I)) {
196 // We can optimistically ignore calls to functions in the same SCC, with
197 // two caveats:
198 // * Calls with operand bundles may have additional effects.
199 // * Argument memory accesses may imply additional effects depending on
200 // what the argument location is.
201 if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
202 SCCNodes.count(key: Call->getCalledFunction())) {
203 // Keep track of which additional locations are accessed if the SCC
204 // turns out to access argmem.
205 addArgLocs(ME&: RecursiveArgME, Call, ArgMR: ModRefInfo::ModRef, AAR);
206 continue;
207 }
208
209 MemoryEffects CallME = AAR.getMemoryEffects(Call);
210
211 // If the call doesn't access memory, we're done.
212 if (CallME.doesNotAccessMemory())
213 continue;
214
215 // A pseudo probe call shouldn't change any function attribute since it
216 // doesn't translate to a real instruction. It comes with a memory access
217 // tag to prevent itself being removed by optimizations and not block
218 // other instructions being optimized.
219 if (isa<PseudoProbeInst>(Val: I))
220 continue;
221
222 // Merge callee's memory effects into caller's ones, including
223 // inaccessible and errno memory, but excluding argument memory, which is
224 // handled separately.
225 ME |= CallME.getWithoutLoc(Loc: IRMemLocation::ArgMem);
226
227 // If the call accesses captured memory (currently part of "other") and
228 // an argument is captured (currently not tracked), then it may also
229 // access argument memory.
230 ModRefInfo OtherMR = CallME.getModRef(Loc: IRMemLocation::Other);
231 ME |= MemoryEffects::argMemOnly(MR: OtherMR);
232
233 // Check whether all pointer arguments point to local memory, and
234 // ignore calls that only access local memory.
235 ModRefInfo ArgMR = CallME.getModRef(Loc: IRMemLocation::ArgMem);
236 if (ArgMR != ModRefInfo::NoModRef)
237 addArgLocs(ME, Call, ArgMR, AAR);
238 continue;
239 }
240
241 ModRefInfo MR = ModRefInfo::NoModRef;
242 if (I.mayWriteToMemory())
243 MR |= ModRefInfo::Mod;
244 if (I.mayReadFromMemory())
245 MR |= ModRefInfo::Ref;
246 if (MR == ModRefInfo::NoModRef)
247 continue;
248
249 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(Inst: &I);
250 if (!Loc) {
251 // If no location is known, conservatively assume anything can be
252 // accessed.
253 ME |= MemoryEffects(MR);
254 continue;
255 }
256
257 // Volatile operations may access inaccessible memory.
258 if (I.isVolatile())
259 ME |= MemoryEffects::inaccessibleMemOnly(MR);
260
261 addLocAccess(ME, Loc: *Loc, MR, AAR);
262 }
263
264 return {OrigME & ME, RecursiveArgME};
265}
266
267MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
268 AAResults &AAR) {
269 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, SCCNodes: {}).first;
270}
271
272/// Deduce readonly/readnone/writeonly attributes for the SCC.
273template <typename AARGetterT>
274static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
275 SmallPtrSet<Function *, 8> &Changed) {
276 MemoryEffects ME = MemoryEffects::none();
277 MemoryEffects RecursiveArgME = MemoryEffects::none();
278 for (Function *F : SCCNodes) {
279 // Call the callable parameter to look up AA results for this function.
280 AAResults &AAR = AARGetter(*F);
281 // Non-exact function definitions may not be selected at link time, and an
282 // alternative version that writes to memory may be selected. See the
283 // comment on GlobalValue::isDefinitionExact for more details.
284 auto [FnME, FnRecursiveArgME] =
285 checkFunctionMemoryAccess(F&: *F, ThisBody: F->hasExactDefinition(), AAR, SCCNodes);
286 ME |= FnME;
287 RecursiveArgME |= FnRecursiveArgME;
288 // Reached bottom of the lattice, we will not be able to improve the result.
289 if (ME == MemoryEffects::unknown())
290 return;
291 }
292
293 // If the SCC accesses argmem, add recursive accesses resulting from that.
294 ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
295 if (ArgMR != ModRefInfo::NoModRef)
296 ME |= RecursiveArgME & MemoryEffects(ArgMR);
297
298 for (Function *F : SCCNodes) {
299 MemoryEffects OldME = F->getMemoryEffects();
300 MemoryEffects NewME = ME & OldME;
301 if (NewME != OldME) {
302 ++NumMemoryAttr;
303 F->setMemoryEffects(NewME);
304 // Remove conflicting writable attributes.
305 if (!isModSet(MRI: NewME.getModRef(Loc: IRMemLocation::ArgMem)))
306 for (Argument &A : F->args())
307 A.removeAttr(Kind: Attribute::Writable);
308 Changed.insert(Ptr: F);
309 }
310 }
311}
312
313// Compute definitive function attributes for a function taking into account
314// prevailing definitions and linkage types
315static FunctionSummary *calculatePrevailingSummary(
316 ValueInfo VI,
317 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
318 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
319 IsPrevailing) {
320
321 auto [It, Inserted] = CachedPrevailingSummary.try_emplace(Key: VI);
322 if (!Inserted)
323 return It->second;
324
325 /// At this point, prevailing symbols have been resolved. The following leads
326 /// to returning a conservative result:
327 /// - Multiple instances with local linkage. Normally local linkage would be
328 /// unique per module
329 /// as the GUID includes the module path. We could have a guid alias if
330 /// there wasn't any distinguishing path when each file was compiled, but
331 /// that should be rare so we'll punt on those.
332
333 /// These next 2 cases should not happen and will assert:
334 /// - Multiple instances with external linkage. This should be caught in
335 /// symbol resolution
336 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
337 /// knowledge meaning we have to go conservative.
338
339 /// Otherwise, we calculate attributes for a function as:
340 /// 1. If we have a local linkage, take its attributes. If there's somehow
341 /// multiple, bail and go conservative.
342 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
343 /// prevailing, take its attributes.
344 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
345 /// differences. However, if the prevailing copy is known it will be used
346 /// so take its attributes. If the prevailing copy is in a native file
347 /// all IR copies will be dead and propagation will go conservative.
348 /// 4. AvailableExternally summaries without a prevailing copy are known to
349 /// occur in a couple of circumstances:
350 /// a. An internal function gets imported due to its caller getting
351 /// imported, it becomes AvailableExternally but no prevailing
352 /// definition exists. Because it has to get imported along with its
353 /// caller the attributes will be captured by propagating on its
354 /// caller.
355 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
356 /// definitions of explicitly instanced template declarations
357 /// for inlining which are ultimately dropped from the TU. Since this
358 /// is localized to the TU the attributes will have already made it to
359 /// the callers.
360 /// These are edge cases and already captured by their callers so we
361 /// ignore these for now. If they become relevant to optimize in the
362 /// future this can be revisited.
363 /// 5. Otherwise, go conservative.
364
365 FunctionSummary *Local = nullptr;
366 FunctionSummary *Prevailing = nullptr;
367
368 for (const auto &GVS : VI.getSummaryList()) {
369 if (!GVS->isLive())
370 continue;
371
372 FunctionSummary *FS = dyn_cast<FunctionSummary>(Val: GVS->getBaseObject());
373 // Virtual and Unknown (e.g. indirect) calls require going conservative
374 if (!FS || FS->fflags().HasUnknownCall)
375 return nullptr;
376
377 const auto &Linkage = GVS->linkage();
378 if (GlobalValue::isLocalLinkage(Linkage)) {
379 if (Local) {
380 LLVM_DEBUG(
381 dbgs()
382 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
383 "function "
384 << VI.name() << " from " << FS->modulePath() << ". Previous module "
385 << Local->modulePath() << "\n");
386 return nullptr;
387 }
388 Local = FS;
389 } else if (GlobalValue::isExternalLinkage(Linkage)) {
390 assert(IsPrevailing(VI.getGUID(), GVS.get()));
391 Prevailing = FS;
392 break;
393 } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
394 GlobalValue::isLinkOnceODRLinkage(Linkage) ||
395 GlobalValue::isWeakAnyLinkage(Linkage) ||
396 GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
397 if (IsPrevailing(VI.getGUID(), GVS.get())) {
398 Prevailing = FS;
399 break;
400 }
401 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
402 // TODO: Handle these cases if they become meaningful
403 continue;
404 }
405 }
406
407 auto &CPS = CachedPrevailingSummary[VI];
408 if (Local) {
409 assert(!Prevailing);
410 CPS = Local;
411 } else if (Prevailing) {
412 assert(!Local);
413 CPS = Prevailing;
414 }
415
416 return CPS;
417}
418
419bool llvm::thinLTOPropagateFunctionAttrs(
420 ModuleSummaryIndex &Index,
421 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
422 IsPrevailing) {
423 // TODO: implement addNoAliasAttrs once
424 // there's more information about the return type in the summary
425 if (DisableThinLTOPropagation)
426 return false;
427
428 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
429 bool Changed = false;
430
431 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
432 // Assume we can propagate unless we discover otherwise
433 FunctionSummary::FFlags InferredFlags;
434 InferredFlags.NoRecurse = (SCCNodes.size() == 1);
435 InferredFlags.NoUnwind = true;
436
437 for (auto &V : SCCNodes) {
438 FunctionSummary *CallerSummary =
439 calculatePrevailingSummary(VI: V, CachedPrevailingSummary, IsPrevailing);
440
441 // Function summaries can fail to contain information such as declarations
442 if (!CallerSummary)
443 return;
444
445 if (CallerSummary->fflags().MayThrow)
446 InferredFlags.NoUnwind = false;
447
448 for (const auto &Callee : CallerSummary->calls()) {
449 FunctionSummary *CalleeSummary = calculatePrevailingSummary(
450 VI: Callee.first, CachedPrevailingSummary, IsPrevailing);
451
452 if (!CalleeSummary)
453 return;
454
455 if (!CalleeSummary->fflags().NoRecurse)
456 InferredFlags.NoRecurse = false;
457
458 if (!CalleeSummary->fflags().NoUnwind)
459 InferredFlags.NoUnwind = false;
460
461 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
462 break;
463 }
464 }
465
466 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
467 Changed = true;
468 for (auto &V : SCCNodes) {
469 if (InferredFlags.NoRecurse) {
470 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
471 << V.name() << "\n");
472 ++NumThinLinkNoRecurse;
473 }
474
475 if (InferredFlags.NoUnwind) {
476 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
477 << V.name() << "\n");
478 ++NumThinLinkNoUnwind;
479 }
480
481 for (const auto &S : V.getSummaryList()) {
482 if (auto *FS = dyn_cast<FunctionSummary>(Val: S.get())) {
483 if (InferredFlags.NoRecurse)
484 FS->setNoRecurse();
485
486 if (InferredFlags.NoUnwind)
487 FS->setNoUnwind();
488 }
489 }
490 }
491 }
492 };
493
494 // Call propagation functions on each SCC in the Index
495 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(G: &Index); !I.isAtEnd();
496 ++I) {
497 std::vector<ValueInfo> Nodes(*I);
498 PropagateAttributes(Nodes);
499 }
500 return Changed;
501}
502
503namespace {
504
505/// For a given pointer Argument, this retains a list of Arguments of functions
506/// in the same SCC that the pointer data flows into. We use this to build an
507/// SCC of the arguments.
508struct ArgumentGraphNode {
509 Argument *Definition;
510 /// CaptureComponents for this argument, excluding captures via Uses.
511 /// We don't distinguish between other/return captures here.
512 CaptureComponents CC = CaptureComponents::None;
513 SmallVector<ArgumentGraphNode *, 4> Uses;
514};
515
516class ArgumentGraph {
517 // We store pointers to ArgumentGraphNode objects, so it's important that
518 // that they not move around upon insert.
519 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
520
521 ArgumentMapTy ArgumentMap;
522
523 // There is no root node for the argument graph, in fact:
524 // void f(int *x, int *y) { if (...) f(x, y); }
525 // is an example where the graph is disconnected. The SCCIterator requires a
526 // single entry point, so we maintain a fake ("synthetic") root node that
527 // uses every node. Because the graph is directed and nothing points into
528 // the root, it will not participate in any SCCs (except for its own).
529 ArgumentGraphNode SyntheticRoot;
530
531public:
532 ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
533
534 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
535
536 iterator begin() { return SyntheticRoot.Uses.begin(); }
537 iterator end() { return SyntheticRoot.Uses.end(); }
538 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
539
540 ArgumentGraphNode *operator[](Argument *A) {
541 ArgumentGraphNode &Node = ArgumentMap[A];
542 Node.Definition = A;
543 SyntheticRoot.Uses.push_back(Elt: &Node);
544 return &Node;
545 }
546};
547
548/// This tracker checks whether callees are in the SCC, and if so it does not
549/// consider that a capture, instead adding it to the "Uses" list and
550/// continuing with the analysis.
551struct ArgumentUsesTracker : public CaptureTracker {
552 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
553
554 void tooManyUses() override { CI = CaptureInfo::all(); }
555
556 Action captured(const Use *U, UseCaptureInfo UseCI) override {
557 if (updateCaptureInfo(U, CC: UseCI.UseCC)) {
558 // Don't bother continuing if we already capture everything.
559 if (capturesAll(CC: CI.getOtherComponents()))
560 return Stop;
561 return Continue;
562 }
563
564 // For SCC argument tracking, we're not going to analyze other/ret
565 // components separately, so don't follow the return value.
566 return ContinueIgnoringReturn;
567 }
568
569 bool updateCaptureInfo(const Use *U, CaptureComponents CC) {
570 CallBase *CB = dyn_cast<CallBase>(Val: U->getUser());
571 if (!CB) {
572 if (isa<ReturnInst>(Val: U->getUser()))
573 CI |= CaptureInfo::retOnly(RetComponents: CC);
574 else
575 // Conservatively assume that the captured value might make its way
576 // into the return value as well. This could be made more precise.
577 CI |= CaptureInfo(CC);
578 return true;
579 }
580
581 Function *F = CB->getCalledFunction();
582 if (!F || !F->hasExactDefinition() || !SCCNodes.count(key: F)) {
583 CI |= CaptureInfo(CC);
584 return true;
585 }
586
587 assert(!CB->isCallee(U) && "callee operand reported captured?");
588 const unsigned UseIndex = CB->getDataOperandNo(U);
589 if (UseIndex >= CB->arg_size()) {
590 // Data operand, but not a argument operand -- must be a bundle operand
591 assert(CB->hasOperandBundles() && "Must be!");
592
593 // CaptureTracking told us that we're being captured by an operand bundle
594 // use. In this case it does not matter if the callee is within our SCC
595 // or not -- we've been captured in some unknown way, and we have to be
596 // conservative.
597 CI |= CaptureInfo(CC);
598 return true;
599 }
600
601 if (UseIndex >= F->arg_size()) {
602 assert(F->isVarArg() && "More params than args in non-varargs call");
603 CI |= CaptureInfo(CC);
604 return true;
605 }
606
607 // TODO(captures): Could improve precision by remembering maximum
608 // capture components for the argument.
609 Uses.push_back(Elt: &*std::next(x: F->arg_begin(), n: UseIndex));
610 return false;
611 }
612
613 // Does not include potential captures via Uses in the SCC.
614 CaptureInfo CI = CaptureInfo::none();
615
616 // Uses within our SCC.
617 SmallVector<Argument *, 4> Uses;
618
619 const SCCNodeSet &SCCNodes;
620};
621
622/// A struct of argument use: a Use and the offset it accesses. This struct
623/// is to track uses inside function via GEP. If GEP has a non-constant index,
624/// the Offset field is nullopt.
625struct ArgumentUse {
626 Use *U;
627 std::optional<int64_t> Offset;
628};
629
630/// A struct of argument access info. "Unknown" accesses are the cases like
631/// unrecognized instructions, instructions that have more than one use of
632/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
633/// instructions that not only write an argument but also capture it.
634struct ArgumentAccessInfo {
635 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
636 AccessType ArgAccessType;
637 ConstantRangeList AccessRanges;
638};
639
640/// A struct to wrap the argument use info per block.
641struct UsesPerBlockInfo {
642 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
643 bool HasWrites = false;
644 bool HasUnknownAccess = false;
645};
646
647/// A struct to summarize the argument use info in a function.
648struct ArgumentUsesSummary {
649 bool HasAnyWrite = false;
650 bool HasWriteOutsideEntryBB = false;
651 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
652};
653
654ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I,
655 const ArgumentUse &ArgUse,
656 const DataLayout &DL) {
657 auto GetTypeAccessRange =
658 [&DL](Type *Ty,
659 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
660 auto TypeSize = DL.getTypeStoreSize(Ty);
661 if (!TypeSize.isScalable() && Offset) {
662 int64_t Size = TypeSize.getFixedValue();
663 APInt Low(64, *Offset, true);
664 bool Overflow;
665 APInt High = Low.sadd_ov(RHS: APInt(64, Size, true), Overflow);
666 // Bail if the range overflows signed 64-bit int.
667 if (Overflow)
668 return std::nullopt;
669 return ConstantRange(Low, High);
670 }
671 return std::nullopt;
672 };
673 auto GetConstantIntRange =
674 [](Value *Length,
675 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
676 auto *ConstantLength = dyn_cast<ConstantInt>(Val: Length);
677 if (ConstantLength && Offset) {
678 int64_t Len = ConstantLength->getSExtValue();
679
680 // Reject zero or negative lengths
681 if (Len <= 0)
682 return std::nullopt;
683
684 APInt Low(64, *Offset, true);
685 bool Overflow;
686 APInt High = Low.sadd_ov(RHS: APInt(64, Len, true), Overflow);
687 if (Overflow)
688 return std::nullopt;
689
690 return ConstantRange(Low, High);
691 }
692 return std::nullopt;
693 };
694
695 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
696 if (SI->isSimple() && &SI->getOperandUse(i: 1) == ArgUse.U) {
697 // Get the fixed type size of "SI". Since the access range of a write
698 // will be unioned, if "SI" doesn't have a fixed type size, we just set
699 // the access range to empty.
700 ConstantRangeList AccessRanges;
701 if (auto TypeAccessRange =
702 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
703 AccessRanges.insert(NewRange: *TypeAccessRange);
704 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: std::move(AccessRanges)};
705 }
706 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
707 if (LI->isSimple()) {
708 assert(&LI->getOperandUse(0) == ArgUse.U);
709 // Get the fixed type size of "LI". Different from Write, if "LI"
710 // doesn't have a fixed type size, we conservatively set as a clobber
711 // with an empty access range.
712 if (auto TypeAccessRange =
713 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
714 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*TypeAccessRange}};
715 }
716 } else if (auto *MemSet = dyn_cast<MemSetInst>(Val: I)) {
717 if (!MemSet->isVolatile()) {
718 ConstantRangeList AccessRanges;
719 if (auto AccessRange =
720 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
721 AccessRanges.insert(NewRange: *AccessRange);
722 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
723 }
724 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
725 if (!MTI->isVolatile()) {
726 if (&MTI->getOperandUse(i: 0) == ArgUse.U) {
727 ConstantRangeList AccessRanges;
728 if (auto AccessRange =
729 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
730 AccessRanges.insert(NewRange: *AccessRange);
731 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
732 } else if (&MTI->getOperandUse(i: 1) == ArgUse.U) {
733 if (auto AccessRange =
734 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
735 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*AccessRange}};
736 }
737 }
738 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
739 if (CB->isArgOperand(U: ArgUse.U) &&
740 !CB->isByValArgument(ArgNo: CB->getArgOperandNo(U: ArgUse.U))) {
741 unsigned ArgNo = CB->getArgOperandNo(U: ArgUse.U);
742 bool IsInitialize = CB->paramHasAttr(ArgNo, Kind: Attribute::Initializes);
743 if (IsInitialize && ArgUse.Offset) {
744 // Argument is a Write when parameter is writeonly/readnone
745 // and nocapture. Otherwise, it's a WriteWithSideEffect.
746 auto Access = CB->onlyWritesMemory(OpNo: ArgNo) && CB->doesNotCapture(OpNo: ArgNo)
747 ? ArgumentAccessInfo::AccessType::Write
748 : ArgumentAccessInfo::AccessType::WriteWithSideEffect;
749 ConstantRangeList AccessRanges;
750 Attribute Attr = CB->getParamAttr(ArgNo, Kind: Attribute::Initializes);
751 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
752 for (ConstantRange &CR : CBCRL)
753 AccessRanges.insert(NewRange: ConstantRange(CR.getLower() + *ArgUse.Offset,
754 CR.getUpper() + *ArgUse.Offset));
755 return {.ArgAccessType: Access, .AccessRanges: AccessRanges};
756 }
757 }
758 }
759 // Other unrecognized instructions are considered as unknown.
760 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
761}
762
763// Collect the uses of argument "A" in "F".
764ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
765 auto &DL = F.getParent()->getDataLayout();
766 unsigned PointerSize =
767 DL.getIndexSizeInBits(AS: A.getType()->getPointerAddressSpace());
768 ArgumentUsesSummary Result;
769
770 BasicBlock &EntryBB = F.getEntryBlock();
771 SmallVector<ArgumentUse, 4> Worklist;
772 for (Use &U : A.uses())
773 Worklist.push_back(Elt: {.U: &U, .Offset: 0});
774
775 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
776 // Return true if the block of "I" has write accesses after updating.
777 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
778 auto *BB = I->getParent();
779 auto &BBInfo = Result.UsesPerBlock[BB];
780 auto [It, Inserted] = BBInfo.Insts.try_emplace(Key: I);
781 auto &IInfo = It->second;
782
783 // Instructions that have more than one use of the argument are considered
784 // as clobbers.
785 if (!Inserted) {
786 IInfo = {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
787 BBInfo.HasUnknownAccess = true;
788 return false;
789 }
790
791 IInfo = std::move(Info);
792 BBInfo.HasUnknownAccess |=
793 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
794 bool InfoHasWrites =
795 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
796 IInfo.ArgAccessType ==
797 ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
798 !IInfo.AccessRanges.empty();
799 BBInfo.HasWrites |= InfoHasWrites;
800 return InfoHasWrites;
801 };
802
803 // No need for a visited set because we don't look through phis, so there are
804 // no cycles.
805 while (!Worklist.empty()) {
806 ArgumentUse ArgUse = Worklist.pop_back_val();
807 User *U = ArgUse.U->getUser();
808 // Add GEP uses to worklist.
809 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
810 if (auto *GEP = dyn_cast<GEPOperator>(Val: U)) {
811 std::optional<int64_t> NewOffset = std::nullopt;
812 if (ArgUse.Offset) {
813 APInt Offset(PointerSize, 0);
814 if (GEP->accumulateConstantOffset(DL, Offset))
815 NewOffset = *ArgUse.Offset + Offset.getSExtValue();
816 }
817 for (Use &U : GEP->uses())
818 Worklist.push_back(Elt: {.U: &U, .Offset: NewOffset});
819 continue;
820 }
821
822 auto *I = cast<Instruction>(Val: U);
823 bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL));
824
825 Result.HasAnyWrite |= HasWrite;
826
827 if (HasWrite && I->getParent() != &EntryBB)
828 Result.HasWriteOutsideEntryBB = true;
829 }
830 return Result;
831}
832
833} // end anonymous namespace
834
835namespace llvm {
836
837template <> struct GraphTraits<ArgumentGraphNode *> {
838 using NodeRef = ArgumentGraphNode *;
839 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
840
841 static NodeRef getEntryNode(NodeRef A) { return A; }
842 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
843 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
844};
845
846template <>
847struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
848 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
849
850 static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
851 return AG->begin();
852 }
853
854 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
855};
856
857} // end namespace llvm
858
859/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
860static Attribute::AttrKind
861determinePointerAccessAttrs(Argument *A,
862 const SmallPtrSet<Argument *, 8> &SCCNodes) {
863 SmallVector<Use *, 32> Worklist;
864 SmallPtrSet<Use *, 32> Visited;
865
866 // inalloca arguments are always clobbered by the call.
867 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
868 return Attribute::None;
869
870 bool IsRead = false;
871 bool IsWrite = false;
872
873 for (Use &U : A->uses()) {
874 Visited.insert(Ptr: &U);
875 Worklist.push_back(Elt: &U);
876 }
877
878 while (!Worklist.empty()) {
879 if (IsWrite && IsRead)
880 // No point in searching further..
881 return Attribute::None;
882
883 Use *U = Worklist.pop_back_val();
884 Instruction *I = cast<Instruction>(Val: U->getUser());
885
886 switch (I->getOpcode()) {
887 case Instruction::BitCast:
888 case Instruction::GetElementPtr:
889 case Instruction::PHI:
890 case Instruction::Select:
891 case Instruction::AddrSpaceCast:
892 // The original value is not read/written via this if the new value isn't.
893 for (Use &UU : I->uses())
894 if (Visited.insert(Ptr: &UU).second)
895 Worklist.push_back(Elt: &UU);
896 break;
897
898 case Instruction::Call:
899 case Instruction::Invoke: {
900 CallBase &CB = cast<CallBase>(Val&: *I);
901 if (CB.isCallee(U)) {
902 IsRead = true;
903 // Note that indirect calls do not capture, see comment in
904 // CaptureTracking for context
905 continue;
906 }
907
908 // Given we've explicitly handled the callee operand above, what's left
909 // must be a data operand (e.g. argument or operand bundle)
910 const unsigned UseIndex = CB.getDataOperandNo(U);
911
912 // Some intrinsics (for instance ptrmask) do not capture their results,
913 // but return results thas alias their pointer argument, and thus should
914 // be handled like GEP or addrspacecast above.
915 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
916 Call: &CB, /*MustPreserveNullness=*/false)) {
917 for (Use &UU : CB.uses())
918 if (Visited.insert(Ptr: &UU).second)
919 Worklist.push_back(Elt: &UU);
920 } else if (capturesAnyProvenance(CC: CB.getCaptureInfo(OpNo: UseIndex))) {
921 if (!CB.onlyReadsMemory())
922 // If the callee can save a copy into other memory, then simply
923 // scanning uses of the call is insufficient. We have no way
924 // of tracking copies of the pointer through memory to see
925 // if a reloaded copy is written to, thus we must give up.
926 return Attribute::None;
927 // Push users for processing once we finish this one
928 if (!I->getType()->isVoidTy())
929 for (Use &UU : I->uses())
930 if (Visited.insert(Ptr: &UU).second)
931 Worklist.push_back(Elt: &UU);
932 }
933
934 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(Loc: IRMemLocation::ArgMem);
935 if (isNoModRef(MRI: ArgMR))
936 continue;
937
938 if (Function *F = CB.getCalledFunction())
939 if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
940 SCCNodes.count(Ptr: F->getArg(i: UseIndex)))
941 // This is an argument which is part of the speculative SCC. Note
942 // that only operands corresponding to formal arguments of the callee
943 // can participate in the speculation.
944 break;
945
946 // The accessors used on call site here do the right thing for calls and
947 // invokes with operand bundles.
948 if (CB.doesNotAccessMemory(OpNo: UseIndex)) {
949 /* nop */
950 } else if (!isModSet(MRI: ArgMR) || CB.onlyReadsMemory(OpNo: UseIndex)) {
951 IsRead = true;
952 } else if (!isRefSet(MRI: ArgMR) ||
953 CB.dataOperandHasImpliedAttr(i: UseIndex, Kind: Attribute::WriteOnly)) {
954 IsWrite = true;
955 } else {
956 return Attribute::None;
957 }
958 break;
959 }
960
961 case Instruction::Load:
962 // A volatile load has side effects beyond what readonly can be relied
963 // upon.
964 if (cast<LoadInst>(Val: I)->isVolatile())
965 return Attribute::None;
966
967 IsRead = true;
968 break;
969
970 case Instruction::Store:
971 if (cast<StoreInst>(Val: I)->getValueOperand() == *U)
972 // untrackable capture
973 return Attribute::None;
974
975 // A volatile store has side effects beyond what writeonly can be relied
976 // upon.
977 if (cast<StoreInst>(Val: I)->isVolatile())
978 return Attribute::None;
979
980 IsWrite = true;
981 break;
982
983 case Instruction::ICmp:
984 case Instruction::Ret:
985 break;
986
987 default:
988 return Attribute::None;
989 }
990 }
991
992 if (IsWrite && IsRead)
993 return Attribute::None;
994 else if (IsRead)
995 return Attribute::ReadOnly;
996 else if (IsWrite)
997 return Attribute::WriteOnly;
998 else
999 return Attribute::ReadNone;
1000}
1001
1002/// Deduce returned attributes for the SCC.
1003static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
1004 SmallPtrSet<Function *, 8> &Changed) {
1005 // Check each function in turn, determining if an argument is always returned.
1006 for (Function *F : SCCNodes) {
1007 // We can infer and propagate function attributes only when we know that the
1008 // definition we'll get at link time is *exactly* the definition we see now.
1009 // For more details, see GlobalValue::mayBeDerefined.
1010 if (!F->hasExactDefinition())
1011 continue;
1012
1013 if (F->getReturnType()->isVoidTy())
1014 continue;
1015
1016 // There is nothing to do if an argument is already marked as 'returned'.
1017 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::Returned))
1018 continue;
1019
1020 auto FindRetArg = [&]() -> Argument * {
1021 Argument *RetArg = nullptr;
1022 for (BasicBlock &BB : *F)
1023 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1024 // Note that stripPointerCasts should look through functions with
1025 // returned arguments.
1026 auto *RetVal =
1027 dyn_cast<Argument>(Val: Ret->getReturnValue()->stripPointerCasts());
1028 if (!RetVal || RetVal->getType() != F->getReturnType())
1029 return nullptr;
1030
1031 if (!RetArg)
1032 RetArg = RetVal;
1033 else if (RetArg != RetVal)
1034 return nullptr;
1035 }
1036
1037 return RetArg;
1038 };
1039
1040 if (Argument *RetArg = FindRetArg()) {
1041 RetArg->addAttr(Kind: Attribute::Returned);
1042 ++NumReturned;
1043 Changed.insert(Ptr: F);
1044 }
1045 }
1046}
1047
1048/// If a callsite has arguments that are also arguments to the parent function,
1049/// try to propagate attributes from the callsite's arguments to the parent's
1050/// arguments. This may be important because inlining can cause information loss
1051/// when attribute knowledge disappears with the inlined call.
1052static bool addArgumentAttrsFromCallsites(Function &F) {
1053 if (!EnableNonnullArgPropagation)
1054 return false;
1055
1056 bool Changed = false;
1057
1058 // For an argument attribute to transfer from a callsite to the parent, the
1059 // call must be guaranteed to execute every time the parent is called.
1060 // Conservatively, just check for calls in the entry block that are guaranteed
1061 // to execute.
1062 // TODO: This could be enhanced by testing if the callsite post-dominates the
1063 // entry block or by doing simple forward walks or backward walks to the
1064 // callsite.
1065 BasicBlock &Entry = F.getEntryBlock();
1066 for (Instruction &I : Entry) {
1067 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
1068 if (auto *CalledFunc = CB->getCalledFunction()) {
1069 for (auto &CSArg : CalledFunc->args()) {
1070 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
1071 continue;
1072
1073 // If the non-null callsite argument operand is an argument to 'F'
1074 // (the caller) and the call is guaranteed to execute, then the value
1075 // must be non-null throughout 'F'.
1076 auto *FArg = dyn_cast<Argument>(Val: CB->getArgOperand(i: CSArg.getArgNo()));
1077 if (FArg && !FArg->hasNonNullAttr()) {
1078 FArg->addAttr(Kind: Attribute::NonNull);
1079 Changed = true;
1080 }
1081 }
1082 }
1083 }
1084 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
1085 break;
1086 }
1087
1088 return Changed;
1089}
1090
1091static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
1092 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
1093 R == Attribute::WriteOnly)
1094 && "Must be an access attribute.");
1095 assert(A && "Argument must not be null.");
1096
1097 // If the argument already has the attribute, nothing needs to be done.
1098 if (A->hasAttribute(Kind: R))
1099 return false;
1100
1101 // Otherwise, remove potentially conflicting attribute, add the new one,
1102 // and update statistics.
1103 A->removeAttr(Kind: Attribute::WriteOnly);
1104 A->removeAttr(Kind: Attribute::ReadOnly);
1105 A->removeAttr(Kind: Attribute::ReadNone);
1106 // Remove conflicting writable attribute.
1107 if (R == Attribute::ReadNone || R == Attribute::ReadOnly)
1108 A->removeAttr(Kind: Attribute::Writable);
1109 A->addAttr(Kind: R);
1110 if (R == Attribute::ReadOnly)
1111 ++NumReadOnlyArg;
1112 else if (R == Attribute::WriteOnly)
1113 ++NumWriteOnlyArg;
1114 else
1115 ++NumReadNoneArg;
1116 return true;
1117}
1118
1119static bool inferInitializes(Argument &A, Function &F) {
1120 auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
1121 // No write anywhere in the function, bail.
1122 if (!ArgumentUses.HasAnyWrite)
1123 return false;
1124
1125 auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
1126 BasicBlock &EntryBB = F.getEntryBlock();
1127 // A map to store the argument ranges initialized by a BasicBlock (including
1128 // its successors).
1129 DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
1130 // Visit the successors of "BB" block and the instructions in BB (post-order)
1131 // to get the argument ranges initialized by "BB" (including its successors).
1132 // The result will be cached in "Initialized".
1133 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
1134 auto UPB = UsesPerBlock.find(Val: BB);
1135 ConstantRangeList CRL;
1136
1137 // Start with intersection of successors.
1138 // If this block has any clobbering use, we're going to clear out the
1139 // ranges at some point in this block anyway, so don't bother looking at
1140 // successors.
1141 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
1142 bool HasAddedSuccessor = false;
1143 for (auto *Succ : successors(BB)) {
1144 if (auto SuccI = Initialized.find(Val: Succ); SuccI != Initialized.end()) {
1145 if (HasAddedSuccessor) {
1146 CRL = CRL.intersectWith(CRL: SuccI->second);
1147 } else {
1148 CRL = SuccI->second;
1149 HasAddedSuccessor = true;
1150 }
1151 } else {
1152 CRL = ConstantRangeList();
1153 break;
1154 }
1155 }
1156 }
1157
1158 if (UPB != UsesPerBlock.end()) {
1159 // Sort uses in this block by instruction order.
1160 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
1161 append_range(C&: Insts, R&: UPB->second.Insts);
1162 sort(C&: Insts, Comp: [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
1163 std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
1164 return LHS.first->comesBefore(Other: RHS.first);
1165 });
1166
1167 // From the end of the block to the beginning of the block, set
1168 // initializes ranges.
1169 for (auto &[_, Info] : reverse(C&: Insts)) {
1170 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
1171 Info.ArgAccessType ==
1172 ArgumentAccessInfo::AccessType::WriteWithSideEffect)
1173 CRL = ConstantRangeList();
1174 if (!Info.AccessRanges.empty()) {
1175 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
1176 Info.ArgAccessType ==
1177 ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
1178 CRL = CRL.unionWith(CRL: Info.AccessRanges);
1179 } else {
1180 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
1181 for (const auto &ReadRange : Info.AccessRanges)
1182 CRL.subtract(SubRange: ReadRange);
1183 }
1184 }
1185 }
1186 }
1187 return CRL;
1188 };
1189
1190 ConstantRangeList EntryCRL;
1191 // If all write instructions are in the EntryBB, or if the EntryBB has
1192 // a clobbering use, we only need to look at EntryBB.
1193 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
1194 if (!OnlyScanEntryBlock)
1195 if (auto EntryUPB = UsesPerBlock.find(Val: &EntryBB);
1196 EntryUPB != UsesPerBlock.end())
1197 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
1198 if (OnlyScanEntryBlock) {
1199 EntryCRL = VisitBlock(&EntryBB);
1200 if (EntryCRL.empty())
1201 return false;
1202 } else {
1203 // Now we have to go through CFG to get the initialized argument ranges
1204 // across blocks. With dominance and post-dominance, the initialized ranges
1205 // by a block include both accesses inside this block and accesses in its
1206 // (transitive) successors. So visit successors before predecessors with a
1207 // post-order walk of the blocks and memorize the results in "Initialized".
1208 for (const BasicBlock *BB : post_order(G: &F)) {
1209 ConstantRangeList CRL = VisitBlock(BB);
1210 if (!CRL.empty())
1211 Initialized[BB] = CRL;
1212 }
1213
1214 auto EntryCRLI = Initialized.find(Val: &EntryBB);
1215 if (EntryCRLI == Initialized.end())
1216 return false;
1217
1218 EntryCRL = EntryCRLI->second;
1219 }
1220
1221 assert(!EntryCRL.empty() &&
1222 "should have bailed already if EntryCRL is empty");
1223
1224 if (A.hasAttribute(Kind: Attribute::Initializes)) {
1225 ConstantRangeList PreviousCRL =
1226 A.getAttribute(Kind: Attribute::Initializes).getValueAsConstantRangeList();
1227 if (PreviousCRL == EntryCRL)
1228 return false;
1229 EntryCRL = EntryCRL.unionWith(CRL: PreviousCRL);
1230 }
1231
1232 A.addAttr(Attr: Attribute::get(Context&: A.getContext(), Kind: Attribute::Initializes,
1233 Val: EntryCRL.rangesRef()));
1234
1235 return true;
1236}
1237
1238/// Deduce nocapture attributes for the SCC.
1239static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
1240 SmallPtrSet<Function *, 8> &Changed,
1241 bool SkipInitializes) {
1242 ArgumentGraph AG;
1243
1244 auto DetermineAccessAttrsForSingleton = [](Argument *A) {
1245 SmallPtrSet<Argument *, 8> Self;
1246 Self.insert(Ptr: A);
1247 Attribute::AttrKind R = determinePointerAccessAttrs(A, SCCNodes: Self);
1248 if (R != Attribute::None)
1249 return addAccessAttr(A, R);
1250 return false;
1251 };
1252
1253 // Check each function in turn, determining which pointer arguments are not
1254 // captured.
1255 for (Function *F : SCCNodes) {
1256 // We can infer and propagate function attributes only when we know that the
1257 // definition we'll get at link time is *exactly* the definition we see now.
1258 // For more details, see GlobalValue::mayBeDerefined.
1259 if (!F->hasExactDefinition())
1260 continue;
1261
1262 if (addArgumentAttrsFromCallsites(F&: *F))
1263 Changed.insert(Ptr: F);
1264
1265 // Functions that are readonly (or readnone) and nounwind and don't return
1266 // a value can't capture arguments. Don't analyze them.
1267 if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() &&
1268 F->getReturnType()->isVoidTy()) {
1269 for (Argument &A : F->args()) {
1270 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
1271 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(),
1272 CI: CaptureInfo::none()));
1273 ++NumCapturesNone;
1274 Changed.insert(Ptr: F);
1275 }
1276 }
1277 continue;
1278 }
1279
1280 for (Argument &A : F->args()) {
1281 if (!A.getType()->isPointerTy())
1282 continue;
1283 bool HasNonLocalUses = false;
1284 CaptureInfo OrigCI = A.getAttributes().getCaptureInfo();
1285 if (!capturesNothing(CC: OrigCI)) {
1286 ArgumentUsesTracker Tracker(SCCNodes);
1287 PointerMayBeCaptured(V: &A, Tracker: &Tracker);
1288 CaptureInfo NewCI = Tracker.CI & OrigCI;
1289 if (NewCI != OrigCI) {
1290 if (Tracker.Uses.empty()) {
1291 // If the information is complete, add the attribute now.
1292 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(), CI: NewCI));
1293 addCapturesStat(CI: NewCI);
1294 Changed.insert(Ptr: F);
1295 } else {
1296 // If it's not trivially captured and not trivially not captured,
1297 // then it must be calling into another function in our SCC. Save
1298 // its particulars for Argument-SCC analysis later.
1299 ArgumentGraphNode *Node = AG[&A];
1300 Node->CC = CaptureComponents(NewCI);
1301 for (Argument *Use : Tracker.Uses) {
1302 Node->Uses.push_back(Elt: AG[Use]);
1303 if (Use != &A)
1304 HasNonLocalUses = true;
1305 }
1306 }
1307 }
1308 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
1309 }
1310 if (!HasNonLocalUses && !A.onlyReadsMemory()) {
1311 // Can we determine that it's readonly/readnone/writeonly without doing
1312 // an SCC? Note that we don't allow any calls at all here, or else our
1313 // result will be dependent on the iteration order through the
1314 // functions in the SCC.
1315 if (DetermineAccessAttrsForSingleton(&A))
1316 Changed.insert(Ptr: F);
1317 }
1318 if (!SkipInitializes && !A.onlyReadsMemory()) {
1319 if (inferInitializes(A, F&: *F))
1320 Changed.insert(Ptr: F);
1321 }
1322 }
1323 }
1324
1325 // The graph we've collected is partial because we stopped scanning for
1326 // argument uses once we solved the argument trivially. These partial nodes
1327 // show up as ArgumentGraphNode objects with an empty Uses list, and for
1328 // these nodes the final decision about whether they capture has already been
1329 // made. If the definition doesn't have a 'nocapture' attribute by now, it
1330 // captures.
1331
1332 for (scc_iterator<ArgumentGraph *> I = scc_begin(G: &AG); !I.isAtEnd(); ++I) {
1333 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
1334 if (ArgumentSCC.size() == 1) {
1335 if (!ArgumentSCC[0]->Definition)
1336 continue; // synthetic root node
1337
1338 // eg. "void f(int* x) { if (...) f(x); }"
1339 if (ArgumentSCC[0]->Uses.size() == 1 &&
1340 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
1341 Argument *A = ArgumentSCC[0]->Definition;
1342 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1343 CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI;
1344 if (NewCI != OrigCI) {
1345 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1346 addCapturesStat(CI: NewCI);
1347 Changed.insert(Ptr: A->getParent());
1348 }
1349
1350 // Infer the access attributes given the new captures one
1351 if (DetermineAccessAttrsForSingleton(A))
1352 Changed.insert(Ptr: A->getParent());
1353 }
1354 continue;
1355 }
1356
1357 SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
1358 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
1359 // quickly looking up whether a given Argument is in this ArgumentSCC.
1360 for (ArgumentGraphNode *I : ArgumentSCC) {
1361 ArgumentSCCNodes.insert(Ptr: I->Definition);
1362 }
1363
1364 // At the SCC level, only track merged CaptureComponents. We're not
1365 // currently prepared to handle propagation of return-only captures across
1366 // the SCC.
1367 CaptureComponents CC = CaptureComponents::None;
1368 for (ArgumentGraphNode *N : ArgumentSCC) {
1369 for (ArgumentGraphNode *Use : N->Uses) {
1370 Argument *A = Use->Definition;
1371 if (ArgumentSCCNodes.count(Ptr: A))
1372 CC |= Use->CC;
1373 else
1374 CC |= CaptureComponents(A->getAttributes().getCaptureInfo());
1375 break;
1376 }
1377 if (capturesAll(CC))
1378 break;
1379 }
1380
1381 if (!capturesAll(CC)) {
1382 for (ArgumentGraphNode *N : ArgumentSCC) {
1383 Argument *A = N->Definition;
1384 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1385 CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI;
1386 if (NewCI != OrigCI) {
1387 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1388 addCapturesStat(CI: NewCI);
1389 Changed.insert(Ptr: A->getParent());
1390 }
1391 }
1392 }
1393
1394 if (capturesAnyProvenance(CC)) {
1395 // As the pointer provenance may be captured, determine the pointer
1396 // attributes looking at each argument individually.
1397 for (ArgumentGraphNode *N : ArgumentSCC) {
1398 if (DetermineAccessAttrsForSingleton(N->Definition))
1399 Changed.insert(Ptr: N->Definition->getParent());
1400 }
1401 continue;
1402 }
1403
1404 // We also want to compute readonly/readnone/writeonly. With a small number
1405 // of false negatives, we can assume that any pointer which is captured
1406 // isn't going to be provably readonly or readnone, since by definition
1407 // we can't analyze all uses of a captured pointer.
1408 //
1409 // The false negatives happen when the pointer is captured by a function
1410 // that promises readonly/readnone behaviour on the pointer, then the
1411 // pointer's lifetime ends before anything that writes to arbitrary memory.
1412 // Also, a readonly/readnone pointer may be returned, but returning a
1413 // pointer is capturing it.
1414
1415 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
1416 if (A == B)
1417 return A;
1418 if (A == Attribute::ReadNone)
1419 return B;
1420 if (B == Attribute::ReadNone)
1421 return A;
1422 return Attribute::None;
1423 };
1424
1425 Attribute::AttrKind AccessAttr = Attribute::ReadNone;
1426 for (ArgumentGraphNode *N : ArgumentSCC) {
1427 Argument *A = N->Definition;
1428 Attribute::AttrKind K = determinePointerAccessAttrs(A, SCCNodes: ArgumentSCCNodes);
1429 AccessAttr = meetAccessAttr(AccessAttr, K);
1430 if (AccessAttr == Attribute::None)
1431 break;
1432 }
1433
1434 if (AccessAttr != Attribute::None) {
1435 for (ArgumentGraphNode *N : ArgumentSCC) {
1436 Argument *A = N->Definition;
1437 if (addAccessAttr(A, R: AccessAttr))
1438 Changed.insert(Ptr: A->getParent());
1439 }
1440 }
1441 }
1442}
1443
1444/// Tests whether a function is "malloc-like".
1445///
1446/// A function is "malloc-like" if it returns either null or a pointer that
1447/// doesn't alias any other pointer visible to the caller.
1448static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
1449 SmallSetVector<Value *, 8> FlowsToReturn;
1450 for (BasicBlock &BB : *F)
1451 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1452 FlowsToReturn.insert(X: Ret->getReturnValue());
1453
1454 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1455 Value *RetVal = FlowsToReturn[i];
1456
1457 if (Constant *C = dyn_cast<Constant>(Val: RetVal)) {
1458 if (!C->isNullValue() && !isa<UndefValue>(Val: C))
1459 return false;
1460
1461 continue;
1462 }
1463
1464 if (isa<Argument>(Val: RetVal))
1465 return false;
1466
1467 if (Instruction *RVI = dyn_cast<Instruction>(Val: RetVal))
1468 switch (RVI->getOpcode()) {
1469 // Extend the analysis by looking upwards.
1470 case Instruction::BitCast:
1471 case Instruction::GetElementPtr:
1472 case Instruction::AddrSpaceCast:
1473 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1474 continue;
1475 case Instruction::Select: {
1476 SelectInst *SI = cast<SelectInst>(Val: RVI);
1477 FlowsToReturn.insert(X: SI->getTrueValue());
1478 FlowsToReturn.insert(X: SI->getFalseValue());
1479 continue;
1480 }
1481 case Instruction::PHI: {
1482 PHINode *PN = cast<PHINode>(Val: RVI);
1483 FlowsToReturn.insert_range(R: PN->incoming_values());
1484 continue;
1485 }
1486
1487 // Check whether the pointer came from an allocation.
1488 case Instruction::Alloca:
1489 break;
1490 case Instruction::Call:
1491 case Instruction::Invoke: {
1492 CallBase &CB = cast<CallBase>(Val&: *RVI);
1493 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1494 break;
1495 if (CB.getCalledFunction() && SCCNodes.count(key: CB.getCalledFunction()))
1496 break;
1497 [[fallthrough]];
1498 }
1499 default:
1500 return false; // Did not come from an allocation.
1501 }
1502
1503 if (PointerMayBeCaptured(V: RetVal, /*ReturnCaptures=*/false))
1504 return false;
1505 }
1506
1507 return true;
1508}
1509
1510/// Deduce noalias attributes for the SCC.
1511static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
1512 SmallPtrSet<Function *, 8> &Changed) {
1513 // Check each function in turn, determining which functions return noalias
1514 // pointers.
1515 for (Function *F : SCCNodes) {
1516 // Already noalias.
1517 if (F->returnDoesNotAlias())
1518 continue;
1519
1520 // We can infer and propagate function attributes only when we know that the
1521 // definition we'll get at link time is *exactly* the definition we see now.
1522 // For more details, see GlobalValue::mayBeDerefined.
1523 if (!F->hasExactDefinition())
1524 return;
1525
1526 // We annotate noalias return values, which are only applicable to
1527 // pointer types.
1528 if (!F->getReturnType()->isPointerTy())
1529 continue;
1530
1531 if (!isFunctionMallocLike(F, SCCNodes))
1532 return;
1533 }
1534
1535 for (Function *F : SCCNodes) {
1536 if (F->returnDoesNotAlias() ||
1537 !F->getReturnType()->isPointerTy())
1538 continue;
1539
1540 F->setReturnDoesNotAlias();
1541 ++NumNoAlias;
1542 Changed.insert(Ptr: F);
1543 }
1544}
1545
1546/// Tests whether this function is known to not return null.
1547///
1548/// Requires that the function returns a pointer.
1549///
1550/// Returns true if it believes the function will not return a null, and sets
1551/// \p Speculative based on whether the returned conclusion is a speculative
1552/// conclusion due to SCC calls.
1553static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
1554 bool &Speculative) {
1555 assert(F->getReturnType()->isPointerTy() &&
1556 "nonnull only meaningful on pointer types");
1557 Speculative = false;
1558
1559 SmallSetVector<Value *, 8> FlowsToReturn;
1560 for (BasicBlock &BB : *F)
1561 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1562 FlowsToReturn.insert(X: Ret->getReturnValue());
1563
1564 auto &DL = F->getDataLayout();
1565
1566 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1567 Value *RetVal = FlowsToReturn[i];
1568
1569 // If this value is locally known to be non-null, we're good
1570 if (isKnownNonZero(V: RetVal, Q: DL))
1571 continue;
1572
1573 // Otherwise, we need to look upwards since we can't make any local
1574 // conclusions.
1575 Instruction *RVI = dyn_cast<Instruction>(Val: RetVal);
1576 if (!RVI)
1577 return false;
1578 switch (RVI->getOpcode()) {
1579 // Extend the analysis by looking upwards.
1580 case Instruction::BitCast:
1581 case Instruction::AddrSpaceCast:
1582 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1583 continue;
1584 case Instruction::GetElementPtr:
1585 if (cast<GEPOperator>(Val: RVI)->isInBounds()) {
1586 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1587 continue;
1588 }
1589 return false;
1590 case Instruction::Select: {
1591 SelectInst *SI = cast<SelectInst>(Val: RVI);
1592 FlowsToReturn.insert(X: SI->getTrueValue());
1593 FlowsToReturn.insert(X: SI->getFalseValue());
1594 continue;
1595 }
1596 case Instruction::PHI: {
1597 PHINode *PN = cast<PHINode>(Val: RVI);
1598 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1599 FlowsToReturn.insert(X: PN->getIncomingValue(i));
1600 continue;
1601 }
1602 case Instruction::Call:
1603 case Instruction::Invoke: {
1604 CallBase &CB = cast<CallBase>(Val&: *RVI);
1605 Function *Callee = CB.getCalledFunction();
1606 // A call to a node within the SCC is assumed to return null until
1607 // proven otherwise
1608 if (Callee && SCCNodes.count(key: Callee)) {
1609 Speculative = true;
1610 continue;
1611 }
1612 return false;
1613 }
1614 default:
1615 return false; // Unknown source, may be null
1616 };
1617 llvm_unreachable("should have either continued or returned");
1618 }
1619
1620 return true;
1621}
1622
1623/// Deduce nonnull attributes for the SCC.
1624static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
1625 SmallPtrSet<Function *, 8> &Changed) {
1626 // Speculative that all functions in the SCC return only nonnull
1627 // pointers. We may refute this as we analyze functions.
1628 bool SCCReturnsNonNull = true;
1629
1630 // Check each function in turn, determining which functions return nonnull
1631 // pointers.
1632 for (Function *F : SCCNodes) {
1633 // Already nonnull.
1634 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull))
1635 continue;
1636
1637 // We can infer and propagate function attributes only when we know that the
1638 // definition we'll get at link time is *exactly* the definition we see now.
1639 // For more details, see GlobalValue::mayBeDerefined.
1640 if (!F->hasExactDefinition())
1641 return;
1642
1643 // We annotate nonnull return values, which are only applicable to
1644 // pointer types.
1645 if (!F->getReturnType()->isPointerTy())
1646 continue;
1647
1648 bool Speculative = false;
1649 if (isReturnNonNull(F, SCCNodes, Speculative)) {
1650 if (!Speculative) {
1651 // Mark the function eagerly since we may discover a function
1652 // which prevents us from speculating about the entire SCC
1653 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
1654 << " as nonnull\n");
1655 F->addRetAttr(Kind: Attribute::NonNull);
1656 ++NumNonNullReturn;
1657 Changed.insert(Ptr: F);
1658 }
1659 continue;
1660 }
1661 // At least one function returns something which could be null, can't
1662 // speculate any more.
1663 SCCReturnsNonNull = false;
1664 }
1665
1666 if (SCCReturnsNonNull) {
1667 for (Function *F : SCCNodes) {
1668 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull) ||
1669 !F->getReturnType()->isPointerTy())
1670 continue;
1671
1672 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
1673 F->addRetAttr(Kind: Attribute::NonNull);
1674 ++NumNonNullReturn;
1675 Changed.insert(Ptr: F);
1676 }
1677 }
1678}
1679
1680/// Deduce noundef attributes for the SCC.
1681static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
1682 SmallPtrSet<Function *, 8> &Changed) {
1683 // Check each function in turn, determining which functions return noundef
1684 // values.
1685 for (Function *F : SCCNodes) {
1686 // Already noundef.
1687 AttributeList Attrs = F->getAttributes();
1688 if (Attrs.hasRetAttr(Kind: Attribute::NoUndef))
1689 continue;
1690
1691 // We can infer and propagate function attributes only when we know that the
1692 // definition we'll get at link time is *exactly* the definition we see now.
1693 // For more details, see GlobalValue::mayBeDerefined.
1694 if (!F->hasExactDefinition())
1695 return;
1696
1697 // MemorySanitizer assumes that the definition and declaration of a
1698 // function will be consistent. A function with sanitize_memory attribute
1699 // should be skipped from inference.
1700 if (F->hasFnAttribute(Kind: Attribute::SanitizeMemory))
1701 continue;
1702
1703 if (F->getReturnType()->isVoidTy())
1704 continue;
1705
1706 const DataLayout &DL = F->getDataLayout();
1707 if (all_of(Range&: *F, P: [&](BasicBlock &BB) {
1708 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1709 // TODO: perform context-sensitive analysis?
1710 Value *RetVal = Ret->getReturnValue();
1711 if (!isGuaranteedNotToBeUndefOrPoison(V: RetVal))
1712 return false;
1713
1714 // We know the original return value is not poison now, but it
1715 // could still be converted to poison by another return attribute.
1716 // Try to explicitly re-prove the relevant attributes.
1717 if (Attrs.hasRetAttr(Kind: Attribute::NonNull) &&
1718 !isKnownNonZero(V: RetVal, Q: DL))
1719 return false;
1720
1721 if (MaybeAlign Align = Attrs.getRetAlignment())
1722 if (RetVal->getPointerAlignment(DL) < *Align)
1723 return false;
1724
1725 Attribute Attr = Attrs.getRetAttr(Kind: Attribute::Range);
1726 if (Attr.isValid() &&
1727 !Attr.getRange().contains(
1728 CR: computeConstantRange(V: RetVal, /*ForSigned=*/false)))
1729 return false;
1730 }
1731 return true;
1732 })) {
1733 F->addRetAttr(Kind: Attribute::NoUndef);
1734 ++NumNoUndefReturn;
1735 Changed.insert(Ptr: F);
1736 }
1737 }
1738}
1739
1740namespace {
1741
1742/// Collects a set of attribute inference requests and performs them all in one
1743/// go on a single SCC Node. Inference involves scanning function bodies
1744/// looking for instructions that violate attribute assumptions.
1745/// As soon as all the bodies are fine we are free to set the attribute.
1746/// Customization of inference for individual attributes is performed by
1747/// providing a handful of predicates for each attribute.
1748class AttributeInferer {
1749public:
1750 /// Describes a request for inference of a single attribute.
1751 struct InferenceDescriptor {
1752
1753 /// Returns true if this function does not have to be handled.
1754 /// General intent for this predicate is to provide an optimization
1755 /// for functions that do not need this attribute inference at all
1756 /// (say, for functions that already have the attribute).
1757 std::function<bool(const Function &)> SkipFunction;
1758
1759 /// Returns true if this instruction violates attribute assumptions.
1760 std::function<bool(Instruction &)> InstrBreaksAttribute;
1761
1762 /// Sets the inferred attribute for this function.
1763 std::function<void(Function &)> SetAttribute;
1764
1765 /// Attribute we derive.
1766 Attribute::AttrKind AKind;
1767
1768 /// If true, only "exact" definitions can be used to infer this attribute.
1769 /// See GlobalValue::isDefinitionExact.
1770 bool RequiresExactDefinition;
1771
1772 InferenceDescriptor(Attribute::AttrKind AK,
1773 std::function<bool(const Function &)> SkipFunc,
1774 std::function<bool(Instruction &)> InstrScan,
1775 std::function<void(Function &)> SetAttr,
1776 bool ReqExactDef)
1777 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
1778 SetAttribute(SetAttr), AKind(AK),
1779 RequiresExactDefinition(ReqExactDef) {}
1780 };
1781
1782private:
1783 SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
1784
1785public:
1786 void registerAttrInference(InferenceDescriptor AttrInference) {
1787 InferenceDescriptors.push_back(Elt: AttrInference);
1788 }
1789
1790 void run(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed);
1791};
1792
1793/// Perform all the requested attribute inference actions according to the
1794/// attribute predicates stored before.
1795void AttributeInferer::run(const SCCNodeSet &SCCNodes,
1796 SmallPtrSet<Function *, 8> &Changed) {
1797 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
1798 // Go through all the functions in SCC and check corresponding attribute
1799 // assumptions for each of them. Attributes that are invalid for this SCC
1800 // will be removed from InferInSCC.
1801 for (Function *F : SCCNodes) {
1802
1803 // No attributes whose assumptions are still valid - done.
1804 if (InferInSCC.empty())
1805 return;
1806
1807 // Check if our attributes ever need scanning/can be scanned.
1808 llvm::erase_if(C&: InferInSCC, P: [F](const InferenceDescriptor &ID) {
1809 if (ID.SkipFunction(*F))
1810 return false;
1811
1812 // Remove from further inference (invalidate) when visiting a function
1813 // that has no instructions to scan/has an unsuitable definition.
1814 return F->isDeclaration() ||
1815 (ID.RequiresExactDefinition && !F->hasExactDefinition());
1816 });
1817
1818 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1819 // set up the F instructions scan to verify assumptions of the attribute.
1820 SmallVector<InferenceDescriptor, 4> InferInThisFunc;
1821 llvm::copy_if(
1822 Range&: InferInSCC, Out: std::back_inserter(x&: InferInThisFunc),
1823 P: [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
1824
1825 if (InferInThisFunc.empty())
1826 continue;
1827
1828 // Start instruction scan.
1829 for (Instruction &I : instructions(F&: *F)) {
1830 llvm::erase_if(C&: InferInThisFunc, P: [&](const InferenceDescriptor &ID) {
1831 if (!ID.InstrBreaksAttribute(I))
1832 return false;
1833 // Remove attribute from further inference on any other functions
1834 // because attribute assumptions have just been violated.
1835 llvm::erase_if(C&: InferInSCC, P: [&ID](const InferenceDescriptor &D) {
1836 return D.AKind == ID.AKind;
1837 });
1838 // Remove attribute from the rest of current instruction scan.
1839 return true;
1840 });
1841
1842 if (InferInThisFunc.empty())
1843 break;
1844 }
1845 }
1846
1847 if (InferInSCC.empty())
1848 return;
1849
1850 for (Function *F : SCCNodes)
1851 // At this point InferInSCC contains only functions that were either:
1852 // - explicitly skipped from scan/inference, or
1853 // - verified to have no instructions that break attribute assumptions.
1854 // Hence we just go and force the attribute for all non-skipped functions.
1855 for (auto &ID : InferInSCC) {
1856 if (ID.SkipFunction(*F))
1857 continue;
1858 Changed.insert(Ptr: F);
1859 ID.SetAttribute(*F);
1860 }
1861}
1862
1863struct SCCNodesResult {
1864 SCCNodeSet SCCNodes;
1865};
1866
1867} // end anonymous namespace
1868
1869/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
1870static bool InstrBreaksNonConvergent(Instruction &I,
1871 const SCCNodeSet &SCCNodes) {
1872 const CallBase *CB = dyn_cast<CallBase>(Val: &I);
1873 // Breaks non-convergent assumption if CS is a convergent call to a function
1874 // not in the SCC.
1875 return CB && CB->isConvergent() &&
1876 !SCCNodes.contains(key: CB->getCalledFunction());
1877}
1878
1879/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
1880static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
1881 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
1882 return false;
1883 if (const auto *CI = dyn_cast<CallInst>(Val: &I)) {
1884 if (Function *Callee = CI->getCalledFunction()) {
1885 // I is a may-throw call to a function inside our SCC. This doesn't
1886 // invalidate our current working assumption that the SCC is no-throw; we
1887 // just have to scan that other function.
1888 if (SCCNodes.contains(key: Callee))
1889 return false;
1890 }
1891 }
1892 return true;
1893}
1894
1895/// Helper for NoFree inference predicate InstrBreaksAttribute.
1896static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
1897 CallBase *CB = dyn_cast<CallBase>(Val: &I);
1898 if (!CB)
1899 return false;
1900
1901 if (CB->hasFnAttr(Kind: Attribute::NoFree))
1902 return false;
1903
1904 // Speculatively assume in SCC.
1905 if (Function *Callee = CB->getCalledFunction())
1906 if (SCCNodes.contains(key: Callee))
1907 return false;
1908
1909 return true;
1910}
1911
1912// Return true if this is an atomic which has an ordering stronger than
1913// unordered. Note that this is different than the predicate we use in
1914// Attributor. Here we chose to be conservative and consider monotonic
1915// operations potentially synchronizing. We generally don't do much with
1916// monotonic operations, so this is simply risk reduction.
1917static bool isOrderedAtomic(Instruction *I) {
1918 if (!I->isAtomic())
1919 return false;
1920
1921 if (auto *FI = dyn_cast<FenceInst>(Val: I))
1922 // All legal orderings for fence are stronger than monotonic.
1923 return FI->getSyncScopeID() != SyncScope::SingleThread;
1924 else if (isa<AtomicCmpXchgInst>(Val: I) || isa<AtomicRMWInst>(Val: I))
1925 return true;
1926 else if (auto *SI = dyn_cast<StoreInst>(Val: I))
1927 return !SI->isUnordered();
1928 else if (auto *LI = dyn_cast<LoadInst>(Val: I))
1929 return !LI->isUnordered();
1930 else {
1931 llvm_unreachable("unknown atomic instruction?");
1932 }
1933}
1934
1935static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
1936 // Volatile may synchronize
1937 if (I.isVolatile())
1938 return true;
1939
1940 // An ordered atomic may synchronize. (See comment about on monotonic.)
1941 if (isOrderedAtomic(I: &I))
1942 return true;
1943
1944 auto *CB = dyn_cast<CallBase>(Val: &I);
1945 if (!CB)
1946 // Non call site cases covered by the two checks above
1947 return false;
1948
1949 if (CB->hasFnAttr(Kind: Attribute::NoSync))
1950 return false;
1951
1952 // Non volatile memset/memcpy/memmoves are nosync
1953 // NOTE: Only intrinsics with volatile flags should be handled here. All
1954 // others should be marked in Intrinsics.td.
1955 if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I))
1956 if (!MI->isVolatile())
1957 return false;
1958
1959 // Speculatively assume in SCC.
1960 if (Function *Callee = CB->getCalledFunction())
1961 if (SCCNodes.contains(key: Callee))
1962 return false;
1963
1964 return true;
1965}
1966
1967/// Attempt to remove convergent function attribute when possible.
1968///
1969/// Returns true if any changes to function attributes were made.
1970static void inferConvergent(const SCCNodeSet &SCCNodes,
1971 SmallPtrSet<Function *, 8> &Changed) {
1972 AttributeInferer AI;
1973
1974 // Request to remove the convergent attribute from all functions in the SCC
1975 // if every callsite within the SCC is not convergent (except for calls
1976 // to functions within the SCC).
1977 // Note: Removal of the attr from the callsites will happen in
1978 // InstCombineCalls separately.
1979 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1980 Attribute::Convergent,
1981 // Skip non-convergent functions.
1982 [](const Function &F) { return !F.isConvergent(); },
1983 // Instructions that break non-convergent assumption.
1984 [SCCNodes](Instruction &I) {
1985 return InstrBreaksNonConvergent(I, SCCNodes);
1986 },
1987 [](Function &F) {
1988 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
1989 << "\n");
1990 F.setNotConvergent();
1991 },
1992 /* RequiresExactDefinition= */ false});
1993 // Perform all the requested attribute inference actions.
1994 AI.run(SCCNodes, Changed);
1995}
1996
1997/// Infer attributes from all functions in the SCC by scanning every
1998/// instruction for compliance to the attribute assumptions.
1999///
2000/// Returns true if any changes to function attributes were made.
2001static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
2002 SmallPtrSet<Function *, 8> &Changed) {
2003 AttributeInferer AI;
2004
2005 if (!DisableNoUnwindInference)
2006 // Request to infer nounwind attribute for all the functions in the SCC if
2007 // every callsite within the SCC is not throwing (except for calls to
2008 // functions within the SCC). Note that nounwind attribute suffers from
2009 // derefinement - results may change depending on how functions are
2010 // optimized. Thus it can be inferred only from exact definitions.
2011 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2012 Attribute::NoUnwind,
2013 // Skip non-throwing functions.
2014 [](const Function &F) { return F.doesNotThrow(); },
2015 // Instructions that break non-throwing assumption.
2016 [&SCCNodes](Instruction &I) {
2017 return InstrBreaksNonThrowing(I, SCCNodes);
2018 },
2019 [](Function &F) {
2020 LLVM_DEBUG(dbgs()
2021 << "Adding nounwind attr to fn " << F.getName() << "\n");
2022 F.setDoesNotThrow();
2023 ++NumNoUnwind;
2024 },
2025 /* RequiresExactDefinition= */ true});
2026
2027 if (!DisableNoFreeInference)
2028 // Request to infer nofree attribute for all the functions in the SCC if
2029 // every callsite within the SCC does not directly or indirectly free
2030 // memory (except for calls to functions within the SCC). Note that nofree
2031 // attribute suffers from derefinement - results may change depending on
2032 // how functions are optimized. Thus it can be inferred only from exact
2033 // definitions.
2034 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2035 Attribute::NoFree,
2036 // Skip functions known not to free memory.
2037 [](const Function &F) { return F.doesNotFreeMemory(); },
2038 // Instructions that break non-deallocating assumption.
2039 [&SCCNodes](Instruction &I) {
2040 return InstrBreaksNoFree(I, SCCNodes);
2041 },
2042 [](Function &F) {
2043 LLVM_DEBUG(dbgs()
2044 << "Adding nofree attr to fn " << F.getName() << "\n");
2045 F.setDoesNotFreeMemory();
2046 ++NumNoFree;
2047 },
2048 /* RequiresExactDefinition= */ true});
2049
2050 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2051 Attribute::NoSync,
2052 // Skip already marked functions.
2053 [](const Function &F) { return F.hasNoSync(); },
2054 // Instructions that break nosync assumption.
2055 [&SCCNodes](Instruction &I) {
2056 return InstrBreaksNoSync(I, SCCNodes);
2057 },
2058 [](Function &F) {
2059 LLVM_DEBUG(dbgs()
2060 << "Adding nosync attr to fn " << F.getName() << "\n");
2061 F.setNoSync();
2062 ++NumNoSync;
2063 },
2064 /* RequiresExactDefinition= */ true});
2065
2066 // Perform all the requested attribute inference actions.
2067 AI.run(SCCNodes, Changed);
2068}
2069
2070// Determines if the function 'F' can be marked 'norecurse'.
2071// It returns true if any call within 'F' could lead to a recursive
2072// call back to 'F', and false otherwise.
2073// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag
2074// that is true if any function's address is taken, or if any function
2075// has external linkage. This is used to determine the safety of
2076// external/library calls.
2077static bool mayHaveRecursiveCallee(Function &F,
2078 bool AnyFunctionsAddressIsTaken = true) {
2079 for (const auto &BB : F) {
2080 for (const auto &I : BB.instructionsWithoutDebug()) {
2081 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
2082 const Function *Callee = CB->getCalledFunction();
2083 if (!Callee || Callee == &F)
2084 return true;
2085
2086 if (Callee->doesNotRecurse())
2087 continue;
2088
2089 if (!AnyFunctionsAddressIsTaken ||
2090 (Callee->isDeclaration() &&
2091 Callee->hasFnAttribute(Kind: Attribute::NoCallback)))
2092 continue;
2093 return true;
2094 }
2095 }
2096 }
2097 return false;
2098}
2099
2100static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
2101 SmallPtrSet<Function *, 8> &Changed) {
2102 // Try and identify functions that do not recurse.
2103
2104 // If the SCC contains multiple nodes we know for sure there is recursion.
2105 if (SCCNodes.size() != 1)
2106 return;
2107
2108 Function *F = *SCCNodes.begin();
2109 if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
2110 return;
2111 if (!mayHaveRecursiveCallee(F&: *F)) {
2112 // Every call was to a non-recursive function other than this function, and
2113 // we have no indirect recursion as the SCC size is one. This function
2114 // cannot recurse.
2115 F->setDoesNotRecurse();
2116 ++NumNoRecurse;
2117 Changed.insert(Ptr: F);
2118 }
2119}
2120
2121// Set the noreturn function attribute if possible.
2122static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
2123 SmallPtrSet<Function *, 8> &Changed) {
2124 for (Function *F : SCCNodes) {
2125 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2126 F->doesNotReturn())
2127 continue;
2128
2129 if (!canReturn(F: *F)) {
2130 F->setDoesNotReturn();
2131 Changed.insert(Ptr: F);
2132 }
2133 }
2134}
2135
2136static bool allPathsGoThroughCold(Function &F) {
2137 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths;
2138 ColdPaths[&F.front()] = false;
2139 SmallVector<BasicBlock *> Jobs;
2140 Jobs.push_back(Elt: &F.front());
2141
2142 while (!Jobs.empty()) {
2143 BasicBlock *BB = Jobs.pop_back_val();
2144
2145 // If block contains a cold callsite this path through the CG is cold.
2146 // Ignore whether the instructions actually are guaranteed to transfer
2147 // execution. Divergent behavior is considered unlikely.
2148 if (any_of(Range&: *BB, P: [](Instruction &I) {
2149 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2150 return CB->hasFnAttr(Kind: Attribute::Cold);
2151 return false;
2152 })) {
2153 ColdPaths[BB] = true;
2154 continue;
2155 }
2156
2157 auto Succs = successors(BB);
2158 // We found a path that doesn't go through any cold callsite.
2159 if (Succs.empty())
2160 return false;
2161
2162 // We didn't find a cold callsite in this BB, so check that all successors
2163 // contain a cold callsite (or that their successors do).
2164 // Potential TODO: We could use static branch hints to assume certain
2165 // successor paths are inherently cold, irrespective of if they contain a
2166 // cold callsite.
2167 for (BasicBlock *Succ : Succs) {
2168 // Start with false, this is necessary to ensure we don't turn loops into
2169 // cold.
2170 auto [Iter, Inserted] = ColdPaths.try_emplace(Key: Succ, Args: false);
2171 if (!Inserted) {
2172 if (Iter->second)
2173 continue;
2174 return false;
2175 }
2176 Jobs.push_back(Elt: Succ);
2177 }
2178 }
2179 return true;
2180}
2181
2182// Set the cold function attribute if possible.
2183static void addColdAttrs(const SCCNodeSet &SCCNodes,
2184 SmallPtrSet<Function *, 8> &Changed) {
2185 for (Function *F : SCCNodes) {
2186 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2187 F->hasFnAttribute(Kind: Attribute::Cold) || F->hasFnAttribute(Kind: Attribute::Hot))
2188 continue;
2189
2190 // Potential TODO: We could add attribute `cold` on functions with `coldcc`.
2191 if (allPathsGoThroughCold(F&: *F)) {
2192 F->addFnAttr(Kind: Attribute::Cold);
2193 ++NumCold;
2194 Changed.insert(Ptr: F);
2195 continue;
2196 }
2197 }
2198}
2199
2200static bool functionWillReturn(const Function &F) {
2201 // We can infer and propagate function attributes only when we know that the
2202 // definition we'll get at link time is *exactly* the definition we see now.
2203 // For more details, see GlobalValue::mayBeDerefined.
2204 if (!F.hasExactDefinition())
2205 return false;
2206
2207 // Must-progress function without side-effects must return.
2208 if (F.mustProgress() && F.onlyReadsMemory())
2209 return true;
2210
2211 // Can only analyze functions with a definition.
2212 if (F.isDeclaration())
2213 return false;
2214
2215 // Functions with loops require more sophisticated analysis, as the loop
2216 // may be infinite. For now, don't try to handle them.
2217 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
2218 FindFunctionBackedges(F, Result&: Backedges);
2219 if (!Backedges.empty())
2220 return false;
2221
2222 // If there are no loops, then the function is willreturn if all calls in
2223 // it are willreturn.
2224 return all_of(Range: instructions(F), P: [](const Instruction &I) {
2225 return I.willReturn();
2226 });
2227}
2228
2229// Set the willreturn function attribute if possible.
2230static void addWillReturn(const SCCNodeSet &SCCNodes,
2231 SmallPtrSet<Function *, 8> &Changed) {
2232 for (Function *F : SCCNodes) {
2233 if (!F || F->willReturn() || !functionWillReturn(F: *F))
2234 continue;
2235
2236 F->setWillReturn();
2237 NumWillReturn++;
2238 Changed.insert(Ptr: F);
2239 }
2240}
2241
2242static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
2243 SCCNodesResult Res;
2244 for (Function *F : Functions) {
2245 if (!F || F->hasOptNone() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2246 F->isPresplitCoroutine()) {
2247 // Omit any functions we're trying not to optimize from the set.
2248 continue;
2249 }
2250
2251 Res.SCCNodes.insert(X: F);
2252 }
2253 return Res;
2254}
2255
2256template <typename AARGetterT>
2257static SmallPtrSet<Function *, 8>
2258deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
2259 bool ArgAttrsOnly) {
2260 SCCNodesResult Nodes = createSCCNodeSet(Functions);
2261
2262 // Bail if the SCC only contains optnone functions.
2263 if (Nodes.SCCNodes.empty())
2264 return {};
2265
2266 SmallPtrSet<Function *, 8> Changed;
2267 if (ArgAttrsOnly) {
2268 // ArgAttrsOnly means to only infer attributes that may aid optimizations
2269 // on the *current* function. "initializes" attribute is to aid
2270 // optimizations (like DSE) on the callers, so skip "initializes" here.
2271 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
2272 return Changed;
2273 }
2274
2275 addArgumentReturnedAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2276 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
2277 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
2278 inferConvergent(SCCNodes: Nodes.SCCNodes, Changed);
2279 addNoReturnAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2280 addColdAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2281 addWillReturn(SCCNodes: Nodes.SCCNodes, Changed);
2282 addNoUndefAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2283 addNoAliasAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2284 addNonNullAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2285 inferAttrsFromFunctionBodies(SCCNodes: Nodes.SCCNodes, Changed);
2286 addNoRecurseAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2287
2288 // Finally, infer the maximal set of attributes from the ones we've inferred
2289 // above. This is handling the cases where one attribute on a signature
2290 // implies another, but for implementation reasons the inference rule for
2291 // the later is missing (or simply less sophisticated).
2292 for (Function *F : Nodes.SCCNodes)
2293 if (F)
2294 if (inferAttributesFromOthers(F&: *F))
2295 Changed.insert(Ptr: F);
2296
2297 return Changed;
2298}
2299
2300PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
2301 CGSCCAnalysisManager &AM,
2302 LazyCallGraph &CG,
2303 CGSCCUpdateResult &) {
2304 // Skip non-recursive functions if requested.
2305 // Only infer argument attributes for non-recursive functions, because
2306 // it can affect optimization behavior in conjunction with noalias.
2307 bool ArgAttrsOnly = false;
2308 if (C.size() == 1 && SkipNonRecursive) {
2309 LazyCallGraph::Node &N = *C.begin();
2310 if (!N->lookup(N))
2311 ArgAttrsOnly = true;
2312 }
2313
2314 FunctionAnalysisManager &FAM =
2315 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2316
2317 // We pass a lambda into functions to wire them up to the analysis manager
2318 // for getting function analyses.
2319 auto AARGetter = [&](Function &F) -> AAResults & {
2320 return FAM.getResult<AAManager>(IR&: F);
2321 };
2322
2323 SmallVector<Function *, 8> Functions;
2324 for (LazyCallGraph::Node &N : C) {
2325 Functions.push_back(Elt: &N.getFunction());
2326 }
2327
2328 auto ChangedFunctions =
2329 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
2330 if (ChangedFunctions.empty())
2331 return PreservedAnalyses::all();
2332
2333 // Invalidate analyses for modified functions so that we don't have to
2334 // invalidate all analyses for all functions in this SCC.
2335 PreservedAnalyses FuncPA;
2336 // We haven't changed the CFG for modified functions.
2337 FuncPA.preserveSet<CFGAnalyses>();
2338 for (Function *Changed : ChangedFunctions) {
2339 FAM.invalidate(IR&: *Changed, PA: FuncPA);
2340 // Also invalidate any direct callers of changed functions since analyses
2341 // may care about attributes of direct callees. For example, MemorySSA cares
2342 // about whether or not a call's callee modifies memory and queries that
2343 // through function attributes.
2344 for (auto *U : Changed->users()) {
2345 if (auto *Call = dyn_cast<CallBase>(Val: U)) {
2346 if (Call->getCalledOperand() == Changed)
2347 FAM.invalidate(IR&: *Call->getFunction(), PA: FuncPA);
2348 }
2349 }
2350 }
2351
2352 PreservedAnalyses PA;
2353 // We have not added or removed functions.
2354 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
2355 // We already invalidated all relevant function analyses above.
2356 PA.preserveSet<AllAnalysesOn<Function>>();
2357 return PA;
2358}
2359
2360void PostOrderFunctionAttrsPass::printPipeline(
2361 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
2362 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
2363 OS, MapClassName2PassName);
2364 if (SkipNonRecursive)
2365 OS << "<skip-non-recursive-function-attrs>";
2366}
2367
2368template <typename AARGetterT>
2369static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
2370 SmallVector<Function *, 8> Functions;
2371 for (CallGraphNode *I : SCC) {
2372 Functions.push_back(Elt: I->getFunction());
2373 }
2374
2375 return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
2376}
2377
2378static bool addNoRecurseAttrsTopDown(Function &F) {
2379 // We check the preconditions for the function prior to calling this to avoid
2380 // the cost of building up a reversible post-order list. We assert them here
2381 // to make sure none of the invariants this relies on were violated.
2382 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
2383 assert(!F.doesNotRecurse() &&
2384 "This function has already been deduced as norecurs!");
2385 assert(F.hasInternalLinkage() &&
2386 "Can only do top-down deduction for internal linkage functions!");
2387
2388 // If F is internal and all of its uses are calls from a non-recursive
2389 // functions, then none of its calls could in fact recurse without going
2390 // through a function marked norecurse, and so we can mark this function too
2391 // as norecurse. Note that the uses must actually be calls -- otherwise
2392 // a pointer to this function could be returned from a norecurse function but
2393 // this function could be recursively (indirectly) called. Note that this
2394 // also detects if F is directly recursive as F is not yet marked as
2395 // a norecurse function.
2396 for (auto &U : F.uses()) {
2397 auto *I = dyn_cast<Instruction>(Val: U.getUser());
2398 if (!I)
2399 return false;
2400 CallBase *CB = dyn_cast<CallBase>(Val: I);
2401 if (!CB || !CB->isCallee(U: &U) ||
2402 !CB->getParent()->getParent()->doesNotRecurse())
2403 return false;
2404 }
2405 F.setDoesNotRecurse();
2406 ++NumNoRecurse;
2407 return true;
2408}
2409
2410static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
2411 // We only have a post-order SCC traversal (because SCCs are inherently
2412 // discovered in post-order), so we accumulate them in a vector and then walk
2413 // it in reverse. This is simpler than using the RPO iterator infrastructure
2414 // because we need to combine SCC detection and the PO walk of the call
2415 // graph. We can also cheat egregiously because we're primarily interested in
2416 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
2417 // with multiple functions in them will clearly be recursive.
2418
2419 SmallVector<Function *, 16> Worklist;
2420 CG.buildRefSCCs();
2421 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2422 for (LazyCallGraph::SCC &SCC : RC) {
2423 if (SCC.size() != 1)
2424 continue;
2425 Function &F = SCC.begin()->getFunction();
2426 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())
2427 Worklist.push_back(Elt: &F);
2428 }
2429 }
2430 bool Changed = false;
2431 for (auto *F : llvm::reverse(C&: Worklist))
2432 Changed |= addNoRecurseAttrsTopDown(F&: *F);
2433
2434 return Changed;
2435}
2436
2437PreservedAnalyses
2438ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
2439 auto &CG = AM.getResult<LazyCallGraphAnalysis>(IR&: M);
2440
2441 if (!deduceFunctionAttributeInRPO(M, CG))
2442 return PreservedAnalyses::all();
2443
2444 PreservedAnalyses PA;
2445 PA.preserve<LazyCallGraphAnalysis>();
2446 return PA;
2447}
2448
2449PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M,
2450 ModuleAnalysisManager &MAM) {
2451
2452 // Check if any function in the whole program has its address taken or has
2453 // potentially external linkage.
2454 // We use this information when inferring norecurse attribute: If there is
2455 // no function whose address is taken and all functions have internal
2456 // linkage, there is no path for a callback to any user function.
2457 bool AnyFunctionsAddressIsTaken = false;
2458 for (Function &F : M) {
2459 if (F.isDeclaration() || F.doesNotRecurse())
2460 continue;
2461 if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
2462 AnyFunctionsAddressIsTaken = true;
2463 break;
2464 }
2465 }
2466
2467 // Run norecurse inference on all RefSCCs in the LazyCallGraph for this
2468 // module.
2469 bool Changed = false;
2470 LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(IR&: M);
2471 CG.buildRefSCCs();
2472
2473 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2474 // Skip any RefSCC that is part of a call cycle. A RefSCC containing more
2475 // than one SCC indicates a recursive relationship involving indirect calls.
2476 if (RC.size() > 1)
2477 continue;
2478
2479 // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive
2480 // functions. Ex: foo1 -> foo2 -> foo3 -> foo1.
2481 LazyCallGraph::SCC &S = *RC.begin();
2482 if (S.size() > 1)
2483 continue;
2484
2485 // Get the single function from this SCC.
2486 Function &F = S.begin()->getFunction();
2487 if (!F.hasExactDefinition() || F.doesNotRecurse())
2488 continue;
2489
2490 // If the analysis confirms that this function has no recursive calls
2491 // (either direct, indirect, or through external linkages),
2492 // we can safely apply the norecurse attribute.
2493 if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) {
2494 F.setDoesNotRecurse();
2495 ++NumNoRecurse;
2496 Changed = true;
2497 }
2498 }
2499
2500 PreservedAnalyses PA;
2501 if (Changed)
2502 PA.preserve<LazyCallGraphAnalysis>();
2503 else
2504 PA = PreservedAnalyses::all();
2505 return PA;
2506}
2507