1//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements interprocedural passes which walk the
11/// call-graph deducing and/or propagating function attributes.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/FunctionAttrs.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SCCIterator.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/Analysis/AssumptionCache.h"
27#include "llvm/Analysis/BasicAliasAnalysis.h"
28#include "llvm/Analysis/CFG.h"
29#include "llvm/Analysis/CGSCCPassManager.h"
30#include "llvm/Analysis/CallGraph.h"
31#include "llvm/Analysis/CallGraphSCCPass.h"
32#include "llvm/Analysis/CaptureTracking.h"
33#include "llvm/Analysis/LazyCallGraph.h"
34#include "llvm/Analysis/MemoryLocation.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Argument.h"
37#include "llvm/IR/Attributes.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRangeList.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/InstIterator.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/IntrinsicInst.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/ModuleSummaryIndex.h"
50#include "llvm/IR/PassManager.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Use.h"
53#include "llvm/IR/User.h"
54#include "llvm/IR/Value.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/CommandLine.h"
57#include "llvm/Support/Compiler.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/raw_ostream.h"
61#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/Utils/Local.h"
63#include <cassert>
64#include <iterator>
65#include <map>
66#include <optional>
67#include <vector>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "function-attrs"
72
73STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
74STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)");
75STATISTIC(NumCapturesPartial, "Number of arguments marked with captures "
76 "attribute other than captures(none)");
77STATISTIC(NumReturned, "Number of arguments marked returned");
78STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
79STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
80STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
81STATISTIC(NumNoAlias, "Number of function returns marked noalias");
82STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
83STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
84STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
85STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
86STATISTIC(NumNoFree, "Number of functions marked as nofree");
87STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
88STATISTIC(NumNoSync, "Number of functions marked as nosync");
89STATISTIC(NumCold, "Number of functions marked as cold");
90
91STATISTIC(NumThinLinkNoRecurse,
92 "Number of functions marked as norecurse during thinlink");
93STATISTIC(NumThinLinkNoUnwind,
94 "Number of functions marked as nounwind during thinlink");
95
96static cl::opt<bool> EnableNonnullArgPropagation(
97 "enable-nonnull-arg-prop", cl::init(Val: true), cl::Hidden,
98 cl::desc("Try to propagate nonnull argument attributes from callsites to "
99 "caller functions."));
100
101static cl::opt<bool> DisableNoUnwindInference(
102 "disable-nounwind-inference", cl::Hidden,
103 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
104
105static cl::opt<bool> DisableNoFreeInference(
106 "disable-nofree-inference", cl::Hidden,
107 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
108
109static cl::opt<bool> DisableThinLTOPropagation(
110 "disable-thinlto-funcattrs", cl::init(Val: true), cl::Hidden,
111 cl::desc("Don't propagate function-attrs in thinLTO"));
112
113static void addCapturesStat(CaptureInfo CI) {
114 if (capturesNothing(CC: CI))
115 ++NumCapturesNone;
116 else
117 ++NumCapturesPartial;
118}
119
120namespace {
121
122using SCCNodeSet = SmallSetVector<Function *, 8>;
123
124} // end anonymous namespace
125
126static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
127 ModRefInfo MR, AAResults &AAR) {
128 // Ignore accesses to known-invariant or local memory.
129 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/IgnoreLocals: true);
130 if (isNoModRef(MRI: MR))
131 return;
132
133 const Value *UO = getUnderlyingObjectAggressive(V: Loc.Ptr);
134 if (isa<AllocaInst>(Val: UO))
135 return;
136 if (isa<Argument>(Val: UO)) {
137 ME |= MemoryEffects::argMemOnly(MR);
138 return;
139 }
140
141 // If it's not an identified object, it might be an argument.
142 if (!isIdentifiedObject(V: UO))
143 ME |= MemoryEffects::argMemOnly(MR);
144 ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR);
145 ME |= MemoryEffects(IRMemLocation::Other, MR);
146}
147
148static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
149 ModRefInfo ArgMR, AAResults &AAR) {
150 for (const Value *Arg : Call->args()) {
151 if (!Arg->getType()->isPtrOrPtrVectorTy())
152 continue;
153
154 addLocAccess(ME,
155 Loc: MemoryLocation::getBeforeOrAfter(Ptr: Arg, AATags: Call->getAAMetadata()),
156 MR: ArgMR, AAR);
157 }
158}
159
160/// Returns the memory access attribute for function F using AAR for AA results,
161/// where SCCNodes is the current SCC.
162///
163/// If ThisBody is true, this function may examine the function body and will
164/// return a result pertaining to this copy of the function. If it is false, the
165/// result will be based only on AA results for the function declaration; it
166/// will be assumed that some other (perhaps less optimized) version of the
167/// function may be selected at link time.
168///
169/// The return value is split into two parts: Memory effects that always apply,
170/// and additional memory effects that apply if any of the functions in the SCC
171/// can access argmem.
172static std::pair<MemoryEffects, MemoryEffects>
173checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
174 const SCCNodeSet &SCCNodes) {
175 MemoryEffects OrigME = AAR.getMemoryEffects(F: &F);
176 if (OrigME.doesNotAccessMemory())
177 // Already perfect!
178 return {OrigME, MemoryEffects::none()};
179
180 if (!ThisBody)
181 return {OrigME, MemoryEffects::none()};
182
183 MemoryEffects ME = MemoryEffects::none();
184 // Additional locations accessed if the SCC accesses argmem.
185 MemoryEffects RecursiveArgME = MemoryEffects::none();
186
187 // Inalloca and preallocated arguments are always clobbered by the call.
188 if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
189 F.getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
190 ME |= MemoryEffects::argMemOnly(MR: ModRefInfo::ModRef);
191
192 // Scan the function body for instructions that may read or write memory.
193 for (Instruction &I : instructions(F)) {
194 // Some instructions can be ignored even if they read or write memory.
195 // Detect these now, skipping to the next instruction if one is found.
196 if (auto *Call = dyn_cast<CallBase>(Val: &I)) {
197 // We can optimistically ignore calls to functions in the same SCC, with
198 // two caveats:
199 // * Calls with operand bundles may have additional effects.
200 // * Argument memory accesses may imply additional effects depending on
201 // what the argument location is.
202 if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
203 SCCNodes.count(key: Call->getCalledFunction())) {
204 // Keep track of which additional locations are accessed if the SCC
205 // turns out to access argmem.
206 addArgLocs(ME&: RecursiveArgME, Call, ArgMR: ModRefInfo::ModRef, AAR);
207 continue;
208 }
209
210 MemoryEffects CallME = AAR.getMemoryEffects(Call);
211
212 // If the call doesn't access memory, we're done.
213 if (CallME.doesNotAccessMemory())
214 continue;
215
216 // A pseudo probe call shouldn't change any function attribute since it
217 // doesn't translate to a real instruction. It comes with a memory access
218 // tag to prevent itself being removed by optimizations and not block
219 // other instructions being optimized.
220 if (isa<PseudoProbeInst>(Val: I))
221 continue;
222
223 // Merge callee's memory effects into caller's ones, including
224 // inaccessible and errno memory, but excluding argument memory, which is
225 // handled separately.
226 ME |= CallME.getWithoutLoc(Loc: IRMemLocation::ArgMem);
227
228 // If the call accesses captured memory (currently part of "other") and
229 // an argument is captured (currently not tracked), then it may also
230 // access argument memory.
231 ModRefInfo OtherMR = CallME.getModRef(Loc: IRMemLocation::Other);
232 ME |= MemoryEffects::argMemOnly(MR: OtherMR);
233
234 // Check whether all pointer arguments point to local memory, and
235 // ignore calls that only access local memory.
236 ModRefInfo ArgMR = CallME.getModRef(Loc: IRMemLocation::ArgMem);
237 if (ArgMR != ModRefInfo::NoModRef)
238 addArgLocs(ME, Call, ArgMR, AAR);
239 continue;
240 }
241
242 ModRefInfo MR = ModRefInfo::NoModRef;
243 if (I.mayWriteToMemory())
244 MR |= ModRefInfo::Mod;
245 if (I.mayReadFromMemory())
246 MR |= ModRefInfo::Ref;
247 if (MR == ModRefInfo::NoModRef)
248 continue;
249
250 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(Inst: &I);
251 if (!Loc) {
252 // If no location is known, conservatively assume anything can be
253 // accessed.
254 ME |= MemoryEffects(MR);
255 continue;
256 }
257
258 // Volatile operations may access inaccessible memory.
259 if (I.isVolatile())
260 ME |= MemoryEffects::inaccessibleMemOnly(MR);
261
262 addLocAccess(ME, Loc: *Loc, MR, AAR);
263 }
264
265 return {OrigME & ME, RecursiveArgME};
266}
267
268MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
269 AAResults &AAR) {
270 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, SCCNodes: {}).first;
271}
272
273/// Deduce readonly/readnone/writeonly attributes for the SCC.
274template <typename AARGetterT>
275static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
276 SmallSet<Function *, 8> &Changed) {
277 MemoryEffects ME = MemoryEffects::none();
278 MemoryEffects RecursiveArgME = MemoryEffects::none();
279 for (Function *F : SCCNodes) {
280 // Call the callable parameter to look up AA results for this function.
281 AAResults &AAR = AARGetter(*F);
282 // Non-exact function definitions may not be selected at link time, and an
283 // alternative version that writes to memory may be selected. See the
284 // comment on GlobalValue::isDefinitionExact for more details.
285 auto [FnME, FnRecursiveArgME] =
286 checkFunctionMemoryAccess(F&: *F, ThisBody: F->hasExactDefinition(), AAR, SCCNodes);
287 ME |= FnME;
288 RecursiveArgME |= FnRecursiveArgME;
289 // Reached bottom of the lattice, we will not be able to improve the result.
290 if (ME == MemoryEffects::unknown())
291 return;
292 }
293
294 // If the SCC accesses argmem, add recursive accesses resulting from that.
295 ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
296 if (ArgMR != ModRefInfo::NoModRef)
297 ME |= RecursiveArgME & MemoryEffects(ArgMR);
298
299 for (Function *F : SCCNodes) {
300 MemoryEffects OldME = F->getMemoryEffects();
301 MemoryEffects NewME = ME & OldME;
302 if (NewME != OldME) {
303 ++NumMemoryAttr;
304 F->setMemoryEffects(NewME);
305 // Remove conflicting writable attributes.
306 if (!isModSet(MRI: NewME.getModRef(Loc: IRMemLocation::ArgMem)))
307 for (Argument &A : F->args())
308 A.removeAttr(Kind: Attribute::Writable);
309 Changed.insert(Ptr: F);
310 }
311 }
312}
313
314// Compute definitive function attributes for a function taking into account
315// prevailing definitions and linkage types
316static FunctionSummary *calculatePrevailingSummary(
317 ValueInfo VI,
318 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
319 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
320 IsPrevailing) {
321
322 auto [It, Inserted] = CachedPrevailingSummary.try_emplace(Key: VI);
323 if (!Inserted)
324 return It->second;
325
326 /// At this point, prevailing symbols have been resolved. The following leads
327 /// to returning a conservative result:
328 /// - Multiple instances with local linkage. Normally local linkage would be
329 /// unique per module
330 /// as the GUID includes the module path. We could have a guid alias if
331 /// there wasn't any distinguishing path when each file was compiled, but
332 /// that should be rare so we'll punt on those.
333
334 /// These next 2 cases should not happen and will assert:
335 /// - Multiple instances with external linkage. This should be caught in
336 /// symbol resolution
337 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
338 /// knowledge meaning we have to go conservative.
339
340 /// Otherwise, we calculate attributes for a function as:
341 /// 1. If we have a local linkage, take its attributes. If there's somehow
342 /// multiple, bail and go conservative.
343 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
344 /// prevailing, take its attributes.
345 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
346 /// differences. However, if the prevailing copy is known it will be used
347 /// so take its attributes. If the prevailing copy is in a native file
348 /// all IR copies will be dead and propagation will go conservative.
349 /// 4. AvailableExternally summaries without a prevailing copy are known to
350 /// occur in a couple of circumstances:
351 /// a. An internal function gets imported due to its caller getting
352 /// imported, it becomes AvailableExternally but no prevailing
353 /// definition exists. Because it has to get imported along with its
354 /// caller the attributes will be captured by propagating on its
355 /// caller.
356 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
357 /// definitions of explicitly instanced template declarations
358 /// for inlining which are ultimately dropped from the TU. Since this
359 /// is localized to the TU the attributes will have already made it to
360 /// the callers.
361 /// These are edge cases and already captured by their callers so we
362 /// ignore these for now. If they become relevant to optimize in the
363 /// future this can be revisited.
364 /// 5. Otherwise, go conservative.
365
366 FunctionSummary *Local = nullptr;
367 FunctionSummary *Prevailing = nullptr;
368
369 for (const auto &GVS : VI.getSummaryList()) {
370 if (!GVS->isLive())
371 continue;
372
373 FunctionSummary *FS = dyn_cast<FunctionSummary>(Val: GVS->getBaseObject());
374 // Virtual and Unknown (e.g. indirect) calls require going conservative
375 if (!FS || FS->fflags().HasUnknownCall)
376 return nullptr;
377
378 const auto &Linkage = GVS->linkage();
379 if (GlobalValue::isLocalLinkage(Linkage)) {
380 if (Local) {
381 LLVM_DEBUG(
382 dbgs()
383 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
384 "function "
385 << VI.name() << " from " << FS->modulePath() << ". Previous module "
386 << Local->modulePath() << "\n");
387 return nullptr;
388 }
389 Local = FS;
390 } else if (GlobalValue::isExternalLinkage(Linkage)) {
391 assert(IsPrevailing(VI.getGUID(), GVS.get()));
392 Prevailing = FS;
393 break;
394 } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
395 GlobalValue::isLinkOnceODRLinkage(Linkage) ||
396 GlobalValue::isWeakAnyLinkage(Linkage) ||
397 GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
398 if (IsPrevailing(VI.getGUID(), GVS.get())) {
399 Prevailing = FS;
400 break;
401 }
402 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
403 // TODO: Handle these cases if they become meaningful
404 continue;
405 }
406 }
407
408 auto &CPS = CachedPrevailingSummary[VI];
409 if (Local) {
410 assert(!Prevailing);
411 CPS = Local;
412 } else if (Prevailing) {
413 assert(!Local);
414 CPS = Prevailing;
415 }
416
417 return CPS;
418}
419
420bool llvm::thinLTOPropagateFunctionAttrs(
421 ModuleSummaryIndex &Index,
422 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
423 IsPrevailing) {
424 // TODO: implement addNoAliasAttrs once
425 // there's more information about the return type in the summary
426 if (DisableThinLTOPropagation)
427 return false;
428
429 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
430 bool Changed = false;
431
432 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
433 // Assume we can propagate unless we discover otherwise
434 FunctionSummary::FFlags InferredFlags;
435 InferredFlags.NoRecurse = (SCCNodes.size() == 1);
436 InferredFlags.NoUnwind = true;
437
438 for (auto &V : SCCNodes) {
439 FunctionSummary *CallerSummary =
440 calculatePrevailingSummary(VI: V, CachedPrevailingSummary, IsPrevailing);
441
442 // Function summaries can fail to contain information such as declarations
443 if (!CallerSummary)
444 return;
445
446 if (CallerSummary->fflags().MayThrow)
447 InferredFlags.NoUnwind = false;
448
449 for (const auto &Callee : CallerSummary->calls()) {
450 FunctionSummary *CalleeSummary = calculatePrevailingSummary(
451 VI: Callee.first, CachedPrevailingSummary, IsPrevailing);
452
453 if (!CalleeSummary)
454 return;
455
456 if (!CalleeSummary->fflags().NoRecurse)
457 InferredFlags.NoRecurse = false;
458
459 if (!CalleeSummary->fflags().NoUnwind)
460 InferredFlags.NoUnwind = false;
461
462 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
463 break;
464 }
465 }
466
467 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
468 Changed = true;
469 for (auto &V : SCCNodes) {
470 if (InferredFlags.NoRecurse) {
471 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
472 << V.name() << "\n");
473 ++NumThinLinkNoRecurse;
474 }
475
476 if (InferredFlags.NoUnwind) {
477 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
478 << V.name() << "\n");
479 ++NumThinLinkNoUnwind;
480 }
481
482 for (const auto &S : V.getSummaryList()) {
483 if (auto *FS = dyn_cast<FunctionSummary>(Val: S.get())) {
484 if (InferredFlags.NoRecurse)
485 FS->setNoRecurse();
486
487 if (InferredFlags.NoUnwind)
488 FS->setNoUnwind();
489 }
490 }
491 }
492 }
493 };
494
495 // Call propagation functions on each SCC in the Index
496 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(G: &Index); !I.isAtEnd();
497 ++I) {
498 std::vector<ValueInfo> Nodes(*I);
499 PropagateAttributes(Nodes);
500 }
501 return Changed;
502}
503
504namespace {
505
506/// For a given pointer Argument, this retains a list of Arguments of functions
507/// in the same SCC that the pointer data flows into. We use this to build an
508/// SCC of the arguments.
509struct ArgumentGraphNode {
510 Argument *Definition;
511 /// CaptureComponents for this argument, excluding captures via Uses.
512 /// We don't distinguish between other/return captures here.
513 CaptureComponents CC = CaptureComponents::None;
514 SmallVector<ArgumentGraphNode *, 4> Uses;
515};
516
517class ArgumentGraph {
518 // We store pointers to ArgumentGraphNode objects, so it's important that
519 // that they not move around upon insert.
520 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
521
522 ArgumentMapTy ArgumentMap;
523
524 // There is no root node for the argument graph, in fact:
525 // void f(int *x, int *y) { if (...) f(x, y); }
526 // is an example where the graph is disconnected. The SCCIterator requires a
527 // single entry point, so we maintain a fake ("synthetic") root node that
528 // uses every node. Because the graph is directed and nothing points into
529 // the root, it will not participate in any SCCs (except for its own).
530 ArgumentGraphNode SyntheticRoot;
531
532public:
533 ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
534
535 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
536
537 iterator begin() { return SyntheticRoot.Uses.begin(); }
538 iterator end() { return SyntheticRoot.Uses.end(); }
539 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
540
541 ArgumentGraphNode *operator[](Argument *A) {
542 ArgumentGraphNode &Node = ArgumentMap[A];
543 Node.Definition = A;
544 SyntheticRoot.Uses.push_back(Elt: &Node);
545 return &Node;
546 }
547};
548
549/// This tracker checks whether callees are in the SCC, and if so it does not
550/// consider that a capture, instead adding it to the "Uses" list and
551/// continuing with the analysis.
552struct ArgumentUsesTracker : public CaptureTracker {
553 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
554
555 void tooManyUses() override { CI = CaptureInfo::all(); }
556
557 Action captured(const Use *U, UseCaptureInfo UseCI) override {
558 if (updateCaptureInfo(U, CC: UseCI.UseCC)) {
559 // Don't bother continuing if we already capture everything.
560 if (capturesAll(CC: CI.getOtherComponents()))
561 return Stop;
562 return Continue;
563 }
564
565 // For SCC argument tracking, we're not going to analyze other/ret
566 // components separately, so don't follow the return value.
567 return ContinueIgnoringReturn;
568 }
569
570 bool updateCaptureInfo(const Use *U, CaptureComponents CC) {
571 CallBase *CB = dyn_cast<CallBase>(Val: U->getUser());
572 if (!CB) {
573 if (isa<ReturnInst>(Val: U->getUser()))
574 CI |= CaptureInfo::retOnly(RetComponents: CC);
575 else
576 // Conservatively assume that the captured value might make its way
577 // into the return value as well. This could be made more precise.
578 CI |= CaptureInfo(CC);
579 return true;
580 }
581
582 Function *F = CB->getCalledFunction();
583 if (!F || !F->hasExactDefinition() || !SCCNodes.count(key: F)) {
584 CI |= CaptureInfo(CC);
585 return true;
586 }
587
588 assert(!CB->isCallee(U) && "callee operand reported captured?");
589 const unsigned UseIndex = CB->getDataOperandNo(U);
590 if (UseIndex >= CB->arg_size()) {
591 // Data operand, but not a argument operand -- must be a bundle operand
592 assert(CB->hasOperandBundles() && "Must be!");
593
594 // CaptureTracking told us that we're being captured by an operand bundle
595 // use. In this case it does not matter if the callee is within our SCC
596 // or not -- we've been captured in some unknown way, and we have to be
597 // conservative.
598 CI |= CaptureInfo(CC);
599 return true;
600 }
601
602 if (UseIndex >= F->arg_size()) {
603 assert(F->isVarArg() && "More params than args in non-varargs call");
604 CI |= CaptureInfo(CC);
605 return true;
606 }
607
608 // TODO(captures): Could improve precision by remembering maximum
609 // capture components for the argument.
610 Uses.push_back(Elt: &*std::next(x: F->arg_begin(), n: UseIndex));
611 return false;
612 }
613
614 // Does not include potential captures via Uses in the SCC.
615 CaptureInfo CI = CaptureInfo::none();
616
617 // Uses within our SCC.
618 SmallVector<Argument *, 4> Uses;
619
620 const SCCNodeSet &SCCNodes;
621};
622
623/// A struct of argument use: a Use and the offset it accesses. This struct
624/// is to track uses inside function via GEP. If GEP has a non-constant index,
625/// the Offset field is nullopt.
626struct ArgumentUse {
627 Use *U;
628 std::optional<int64_t> Offset;
629};
630
631/// A struct of argument access info. "Unknown" accesses are the cases like
632/// unrecognized instructions, instructions that have more than one use of
633/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
634/// instructions that not only write an argument but also capture it.
635struct ArgumentAccessInfo {
636 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
637 AccessType ArgAccessType;
638 ConstantRangeList AccessRanges;
639};
640
641/// A struct to wrap the argument use info per block.
642struct UsesPerBlockInfo {
643 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
644 bool HasWrites = false;
645 bool HasUnknownAccess = false;
646};
647
648/// A struct to summarize the argument use info in a function.
649struct ArgumentUsesSummary {
650 bool HasAnyWrite = false;
651 bool HasWriteOutsideEntryBB = false;
652 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
653};
654
655ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I,
656 const ArgumentUse &ArgUse,
657 const DataLayout &DL) {
658 auto GetTypeAccessRange =
659 [&DL](Type *Ty,
660 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
661 auto TypeSize = DL.getTypeStoreSize(Ty);
662 if (!TypeSize.isScalable() && Offset) {
663 int64_t Size = TypeSize.getFixedValue();
664 APInt Low(64, *Offset, true);
665 bool Overflow;
666 APInt High = Low.sadd_ov(RHS: APInt(64, Size, true), Overflow);
667 // Bail if the range overflows signed 64-bit int.
668 if (Overflow)
669 return std::nullopt;
670 return ConstantRange(Low, High);
671 }
672 return std::nullopt;
673 };
674 auto GetConstantIntRange =
675 [](Value *Length,
676 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
677 auto *ConstantLength = dyn_cast<ConstantInt>(Val: Length);
678 if (ConstantLength && Offset) {
679 int64_t Len = ConstantLength->getSExtValue();
680
681 // Reject zero or negative lengths
682 if (Len <= 0)
683 return std::nullopt;
684
685 APInt Low(64, *Offset, true);
686 bool Overflow;
687 APInt High = Low.sadd_ov(RHS: APInt(64, Len, true), Overflow);
688 if (Overflow)
689 return std::nullopt;
690
691 return ConstantRange(Low, High);
692 }
693 return std::nullopt;
694 };
695
696 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
697 if (SI->isSimple() && &SI->getOperandUse(i: 1) == ArgUse.U) {
698 // Get the fixed type size of "SI". Since the access range of a write
699 // will be unioned, if "SI" doesn't have a fixed type size, we just set
700 // the access range to empty.
701 ConstantRangeList AccessRanges;
702 if (auto TypeAccessRange =
703 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
704 AccessRanges.insert(NewRange: *TypeAccessRange);
705 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: std::move(AccessRanges)};
706 }
707 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
708 if (LI->isSimple()) {
709 assert(&LI->getOperandUse(0) == ArgUse.U);
710 // Get the fixed type size of "LI". Different from Write, if "LI"
711 // doesn't have a fixed type size, we conservatively set as a clobber
712 // with an empty access range.
713 if (auto TypeAccessRange =
714 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
715 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*TypeAccessRange}};
716 }
717 } else if (auto *MemSet = dyn_cast<MemSetInst>(Val: I)) {
718 if (!MemSet->isVolatile()) {
719 ConstantRangeList AccessRanges;
720 if (auto AccessRange =
721 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
722 AccessRanges.insert(NewRange: *AccessRange);
723 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
724 }
725 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
726 if (!MTI->isVolatile()) {
727 if (&MTI->getOperandUse(i: 0) == ArgUse.U) {
728 ConstantRangeList AccessRanges;
729 if (auto AccessRange =
730 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
731 AccessRanges.insert(NewRange: *AccessRange);
732 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
733 } else if (&MTI->getOperandUse(i: 1) == ArgUse.U) {
734 if (auto AccessRange =
735 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
736 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*AccessRange}};
737 }
738 }
739 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
740 if (CB->isArgOperand(U: ArgUse.U) &&
741 !CB->isByValArgument(ArgNo: CB->getArgOperandNo(U: ArgUse.U))) {
742 unsigned ArgNo = CB->getArgOperandNo(U: ArgUse.U);
743 bool IsInitialize = CB->paramHasAttr(ArgNo, Kind: Attribute::Initializes);
744 if (IsInitialize && ArgUse.Offset) {
745 // Argument is a Write when parameter is writeonly/readnone
746 // and nocapture. Otherwise, it's a WriteWithSideEffect.
747 auto Access = CB->onlyWritesMemory(OpNo: ArgNo) && CB->doesNotCapture(OpNo: ArgNo)
748 ? ArgumentAccessInfo::AccessType::Write
749 : ArgumentAccessInfo::AccessType::WriteWithSideEffect;
750 ConstantRangeList AccessRanges;
751 Attribute Attr = CB->getParamAttr(ArgNo, Kind: Attribute::Initializes);
752 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
753 for (ConstantRange &CR : CBCRL)
754 AccessRanges.insert(NewRange: ConstantRange(CR.getLower() + *ArgUse.Offset,
755 CR.getUpper() + *ArgUse.Offset));
756 return {.ArgAccessType: Access, .AccessRanges: AccessRanges};
757 }
758 }
759 }
760 // Other unrecognized instructions are considered as unknown.
761 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
762}
763
764// Collect the uses of argument "A" in "F".
765ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
766 auto &DL = F.getParent()->getDataLayout();
767 unsigned PointerSize =
768 DL.getIndexSizeInBits(AS: A.getType()->getPointerAddressSpace());
769 ArgumentUsesSummary Result;
770
771 BasicBlock &EntryBB = F.getEntryBlock();
772 SmallVector<ArgumentUse, 4> Worklist;
773 for (Use &U : A.uses())
774 Worklist.push_back(Elt: {.U: &U, .Offset: 0});
775
776 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
777 // Return true if the block of "I" has write accesses after updating.
778 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
779 auto *BB = I->getParent();
780 auto &BBInfo = Result.UsesPerBlock[BB];
781 auto [It, Inserted] = BBInfo.Insts.try_emplace(Key: I);
782 auto &IInfo = It->second;
783
784 // Instructions that have more than one use of the argument are considered
785 // as clobbers.
786 if (!Inserted) {
787 IInfo = {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
788 BBInfo.HasUnknownAccess = true;
789 return false;
790 }
791
792 IInfo = std::move(Info);
793 BBInfo.HasUnknownAccess |=
794 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
795 bool InfoHasWrites =
796 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
797 IInfo.ArgAccessType ==
798 ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
799 !IInfo.AccessRanges.empty();
800 BBInfo.HasWrites |= InfoHasWrites;
801 return InfoHasWrites;
802 };
803
804 // No need for a visited set because we don't look through phis, so there are
805 // no cycles.
806 while (!Worklist.empty()) {
807 ArgumentUse ArgUse = Worklist.pop_back_val();
808 User *U = ArgUse.U->getUser();
809 // Add GEP uses to worklist.
810 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
811 if (auto *GEP = dyn_cast<GEPOperator>(Val: U)) {
812 std::optional<int64_t> NewOffset = std::nullopt;
813 if (ArgUse.Offset) {
814 APInt Offset(PointerSize, 0);
815 if (GEP->accumulateConstantOffset(DL, Offset))
816 NewOffset = *ArgUse.Offset + Offset.getSExtValue();
817 }
818 for (Use &U : GEP->uses())
819 Worklist.push_back(Elt: {.U: &U, .Offset: NewOffset});
820 continue;
821 }
822
823 auto *I = cast<Instruction>(Val: U);
824 bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL));
825
826 Result.HasAnyWrite |= HasWrite;
827
828 if (HasWrite && I->getParent() != &EntryBB)
829 Result.HasWriteOutsideEntryBB = true;
830 }
831 return Result;
832}
833
834} // end anonymous namespace
835
836namespace llvm {
837
838template <> struct GraphTraits<ArgumentGraphNode *> {
839 using NodeRef = ArgumentGraphNode *;
840 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
841
842 static NodeRef getEntryNode(NodeRef A) { return A; }
843 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
844 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
845};
846
847template <>
848struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
849 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
850
851 static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
852 return AG->begin();
853 }
854
855 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
856};
857
858} // end namespace llvm
859
860/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
861static Attribute::AttrKind
862determinePointerAccessAttrs(Argument *A,
863 const SmallPtrSet<Argument *, 8> &SCCNodes) {
864 SmallVector<Use *, 32> Worklist;
865 SmallPtrSet<Use *, 32> Visited;
866
867 // inalloca arguments are always clobbered by the call.
868 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
869 return Attribute::None;
870
871 bool IsRead = false;
872 bool IsWrite = false;
873
874 for (Use &U : A->uses()) {
875 Visited.insert(Ptr: &U);
876 Worklist.push_back(Elt: &U);
877 }
878
879 while (!Worklist.empty()) {
880 if (IsWrite && IsRead)
881 // No point in searching further..
882 return Attribute::None;
883
884 Use *U = Worklist.pop_back_val();
885 Instruction *I = cast<Instruction>(Val: U->getUser());
886
887 switch (I->getOpcode()) {
888 case Instruction::BitCast:
889 case Instruction::GetElementPtr:
890 case Instruction::PHI:
891 case Instruction::Select:
892 case Instruction::AddrSpaceCast:
893 // The original value is not read/written via this if the new value isn't.
894 for (Use &UU : I->uses())
895 if (Visited.insert(Ptr: &UU).second)
896 Worklist.push_back(Elt: &UU);
897 break;
898
899 case Instruction::Call:
900 case Instruction::Invoke: {
901 CallBase &CB = cast<CallBase>(Val&: *I);
902 if (CB.isCallee(U)) {
903 IsRead = true;
904 // Note that indirect calls do not capture, see comment in
905 // CaptureTracking for context
906 continue;
907 }
908
909 // Given we've explicitly handled the callee operand above, what's left
910 // must be a data operand (e.g. argument or operand bundle)
911 const unsigned UseIndex = CB.getDataOperandNo(U);
912
913 // Some intrinsics (for instance ptrmask) do not capture their results,
914 // but return results thas alias their pointer argument, and thus should
915 // be handled like GEP or addrspacecast above.
916 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
917 Call: &CB, /*MustPreserveNullness=*/false)) {
918 for (Use &UU : CB.uses())
919 if (Visited.insert(Ptr: &UU).second)
920 Worklist.push_back(Elt: &UU);
921 } else if (capturesAnyProvenance(CC: CB.getCaptureInfo(OpNo: UseIndex))) {
922 if (!CB.onlyReadsMemory())
923 // If the callee can save a copy into other memory, then simply
924 // scanning uses of the call is insufficient. We have no way
925 // of tracking copies of the pointer through memory to see
926 // if a reloaded copy is written to, thus we must give up.
927 return Attribute::None;
928 // Push users for processing once we finish this one
929 if (!I->getType()->isVoidTy())
930 for (Use &UU : I->uses())
931 if (Visited.insert(Ptr: &UU).second)
932 Worklist.push_back(Elt: &UU);
933 }
934
935 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(Loc: IRMemLocation::ArgMem);
936 if (isNoModRef(MRI: ArgMR))
937 continue;
938
939 if (Function *F = CB.getCalledFunction())
940 if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
941 SCCNodes.count(Ptr: F->getArg(i: UseIndex)))
942 // This is an argument which is part of the speculative SCC. Note
943 // that only operands corresponding to formal arguments of the callee
944 // can participate in the speculation.
945 break;
946
947 // The accessors used on call site here do the right thing for calls and
948 // invokes with operand bundles.
949 if (CB.doesNotAccessMemory(OpNo: UseIndex)) {
950 /* nop */
951 } else if (!isModSet(MRI: ArgMR) || CB.onlyReadsMemory(OpNo: UseIndex)) {
952 IsRead = true;
953 } else if (!isRefSet(MRI: ArgMR) ||
954 CB.dataOperandHasImpliedAttr(i: UseIndex, Kind: Attribute::WriteOnly)) {
955 IsWrite = true;
956 } else {
957 return Attribute::None;
958 }
959 break;
960 }
961
962 case Instruction::Load:
963 // A volatile load has side effects beyond what readonly can be relied
964 // upon.
965 if (cast<LoadInst>(Val: I)->isVolatile())
966 return Attribute::None;
967
968 IsRead = true;
969 break;
970
971 case Instruction::Store:
972 if (cast<StoreInst>(Val: I)->getValueOperand() == *U)
973 // untrackable capture
974 return Attribute::None;
975
976 // A volatile store has side effects beyond what writeonly can be relied
977 // upon.
978 if (cast<StoreInst>(Val: I)->isVolatile())
979 return Attribute::None;
980
981 IsWrite = true;
982 break;
983
984 case Instruction::ICmp:
985 case Instruction::Ret:
986 break;
987
988 default:
989 return Attribute::None;
990 }
991 }
992
993 if (IsWrite && IsRead)
994 return Attribute::None;
995 else if (IsRead)
996 return Attribute::ReadOnly;
997 else if (IsWrite)
998 return Attribute::WriteOnly;
999 else
1000 return Attribute::ReadNone;
1001}
1002
1003/// Deduce returned attributes for the SCC.
1004static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
1005 SmallSet<Function *, 8> &Changed) {
1006 // Check each function in turn, determining if an argument is always returned.
1007 for (Function *F : SCCNodes) {
1008 // We can infer and propagate function attributes only when we know that the
1009 // definition we'll get at link time is *exactly* the definition we see now.
1010 // For more details, see GlobalValue::mayBeDerefined.
1011 if (!F->hasExactDefinition())
1012 continue;
1013
1014 if (F->getReturnType()->isVoidTy())
1015 continue;
1016
1017 // There is nothing to do if an argument is already marked as 'returned'.
1018 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::Returned))
1019 continue;
1020
1021 auto FindRetArg = [&]() -> Argument * {
1022 Argument *RetArg = nullptr;
1023 for (BasicBlock &BB : *F)
1024 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1025 // Note that stripPointerCasts should look through functions with
1026 // returned arguments.
1027 auto *RetVal =
1028 dyn_cast<Argument>(Val: Ret->getReturnValue()->stripPointerCasts());
1029 if (!RetVal || RetVal->getType() != F->getReturnType())
1030 return nullptr;
1031
1032 if (!RetArg)
1033 RetArg = RetVal;
1034 else if (RetArg != RetVal)
1035 return nullptr;
1036 }
1037
1038 return RetArg;
1039 };
1040
1041 if (Argument *RetArg = FindRetArg()) {
1042 RetArg->addAttr(Kind: Attribute::Returned);
1043 ++NumReturned;
1044 Changed.insert(Ptr: F);
1045 }
1046 }
1047}
1048
1049/// If a callsite has arguments that are also arguments to the parent function,
1050/// try to propagate attributes from the callsite's arguments to the parent's
1051/// arguments. This may be important because inlining can cause information loss
1052/// when attribute knowledge disappears with the inlined call.
1053static bool addArgumentAttrsFromCallsites(Function &F) {
1054 if (!EnableNonnullArgPropagation)
1055 return false;
1056
1057 bool Changed = false;
1058
1059 // For an argument attribute to transfer from a callsite to the parent, the
1060 // call must be guaranteed to execute every time the parent is called.
1061 // Conservatively, just check for calls in the entry block that are guaranteed
1062 // to execute.
1063 // TODO: This could be enhanced by testing if the callsite post-dominates the
1064 // entry block or by doing simple forward walks or backward walks to the
1065 // callsite.
1066 BasicBlock &Entry = F.getEntryBlock();
1067 for (Instruction &I : Entry) {
1068 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
1069 if (auto *CalledFunc = CB->getCalledFunction()) {
1070 for (auto &CSArg : CalledFunc->args()) {
1071 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
1072 continue;
1073
1074 // If the non-null callsite argument operand is an argument to 'F'
1075 // (the caller) and the call is guaranteed to execute, then the value
1076 // must be non-null throughout 'F'.
1077 auto *FArg = dyn_cast<Argument>(Val: CB->getArgOperand(i: CSArg.getArgNo()));
1078 if (FArg && !FArg->hasNonNullAttr()) {
1079 FArg->addAttr(Kind: Attribute::NonNull);
1080 Changed = true;
1081 }
1082 }
1083 }
1084 }
1085 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
1086 break;
1087 }
1088
1089 return Changed;
1090}
1091
1092static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
1093 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
1094 R == Attribute::WriteOnly)
1095 && "Must be an access attribute.");
1096 assert(A && "Argument must not be null.");
1097
1098 // If the argument already has the attribute, nothing needs to be done.
1099 if (A->hasAttribute(Kind: R))
1100 return false;
1101
1102 // Otherwise, remove potentially conflicting attribute, add the new one,
1103 // and update statistics.
1104 A->removeAttr(Kind: Attribute::WriteOnly);
1105 A->removeAttr(Kind: Attribute::ReadOnly);
1106 A->removeAttr(Kind: Attribute::ReadNone);
1107 // Remove conflicting writable attribute.
1108 if (R == Attribute::ReadNone || R == Attribute::ReadOnly)
1109 A->removeAttr(Kind: Attribute::Writable);
1110 A->addAttr(Kind: R);
1111 if (R == Attribute::ReadOnly)
1112 ++NumReadOnlyArg;
1113 else if (R == Attribute::WriteOnly)
1114 ++NumWriteOnlyArg;
1115 else
1116 ++NumReadNoneArg;
1117 return true;
1118}
1119
1120static bool inferInitializes(Argument &A, Function &F) {
1121 auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
1122 // No write anywhere in the function, bail.
1123 if (!ArgumentUses.HasAnyWrite)
1124 return false;
1125
1126 auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
1127 BasicBlock &EntryBB = F.getEntryBlock();
1128 // A map to store the argument ranges initialized by a BasicBlock (including
1129 // its successors).
1130 DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
1131 // Visit the successors of "BB" block and the instructions in BB (post-order)
1132 // to get the argument ranges initialized by "BB" (including its successors).
1133 // The result will be cached in "Initialized".
1134 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
1135 auto UPB = UsesPerBlock.find(Val: BB);
1136 ConstantRangeList CRL;
1137
1138 // Start with intersection of successors.
1139 // If this block has any clobbering use, we're going to clear out the
1140 // ranges at some point in this block anyway, so don't bother looking at
1141 // successors.
1142 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
1143 bool HasAddedSuccessor = false;
1144 for (auto *Succ : successors(BB)) {
1145 if (auto SuccI = Initialized.find(Val: Succ); SuccI != Initialized.end()) {
1146 if (HasAddedSuccessor) {
1147 CRL = CRL.intersectWith(CRL: SuccI->second);
1148 } else {
1149 CRL = SuccI->second;
1150 HasAddedSuccessor = true;
1151 }
1152 } else {
1153 CRL = ConstantRangeList();
1154 break;
1155 }
1156 }
1157 }
1158
1159 if (UPB != UsesPerBlock.end()) {
1160 // Sort uses in this block by instruction order.
1161 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
1162 append_range(C&: Insts, R&: UPB->second.Insts);
1163 sort(C&: Insts, Comp: [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
1164 std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
1165 return LHS.first->comesBefore(Other: RHS.first);
1166 });
1167
1168 // From the end of the block to the beginning of the block, set
1169 // initializes ranges.
1170 for (auto &[_, Info] : reverse(C&: Insts)) {
1171 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
1172 Info.ArgAccessType ==
1173 ArgumentAccessInfo::AccessType::WriteWithSideEffect)
1174 CRL = ConstantRangeList();
1175 if (!Info.AccessRanges.empty()) {
1176 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
1177 Info.ArgAccessType ==
1178 ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
1179 CRL = CRL.unionWith(CRL: Info.AccessRanges);
1180 } else {
1181 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
1182 for (const auto &ReadRange : Info.AccessRanges)
1183 CRL.subtract(SubRange: ReadRange);
1184 }
1185 }
1186 }
1187 }
1188 return CRL;
1189 };
1190
1191 ConstantRangeList EntryCRL;
1192 // If all write instructions are in the EntryBB, or if the EntryBB has
1193 // a clobbering use, we only need to look at EntryBB.
1194 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
1195 if (!OnlyScanEntryBlock)
1196 if (auto EntryUPB = UsesPerBlock.find(Val: &EntryBB);
1197 EntryUPB != UsesPerBlock.end())
1198 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
1199 if (OnlyScanEntryBlock) {
1200 EntryCRL = VisitBlock(&EntryBB);
1201 if (EntryCRL.empty())
1202 return false;
1203 } else {
1204 // Now we have to go through CFG to get the initialized argument ranges
1205 // across blocks. With dominance and post-dominance, the initialized ranges
1206 // by a block include both accesses inside this block and accesses in its
1207 // (transitive) successors. So visit successors before predecessors with a
1208 // post-order walk of the blocks and memorize the results in "Initialized".
1209 for (const BasicBlock *BB : post_order(G: &F)) {
1210 ConstantRangeList CRL = VisitBlock(BB);
1211 if (!CRL.empty())
1212 Initialized[BB] = CRL;
1213 }
1214
1215 auto EntryCRLI = Initialized.find(Val: &EntryBB);
1216 if (EntryCRLI == Initialized.end())
1217 return false;
1218
1219 EntryCRL = EntryCRLI->second;
1220 }
1221
1222 assert(!EntryCRL.empty() &&
1223 "should have bailed already if EntryCRL is empty");
1224
1225 if (A.hasAttribute(Kind: Attribute::Initializes)) {
1226 ConstantRangeList PreviousCRL =
1227 A.getAttribute(Kind: Attribute::Initializes).getValueAsConstantRangeList();
1228 if (PreviousCRL == EntryCRL)
1229 return false;
1230 EntryCRL = EntryCRL.unionWith(CRL: PreviousCRL);
1231 }
1232
1233 A.addAttr(Attr: Attribute::get(Context&: A.getContext(), Kind: Attribute::Initializes,
1234 Val: EntryCRL.rangesRef()));
1235
1236 return true;
1237}
1238
1239/// Deduce nocapture attributes for the SCC.
1240static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
1241 SmallSet<Function *, 8> &Changed,
1242 bool SkipInitializes) {
1243 ArgumentGraph AG;
1244
1245 auto DetermineAccessAttrsForSingleton = [](Argument *A) {
1246 SmallPtrSet<Argument *, 8> Self;
1247 Self.insert(Ptr: A);
1248 Attribute::AttrKind R = determinePointerAccessAttrs(A, SCCNodes: Self);
1249 if (R != Attribute::None)
1250 return addAccessAttr(A, R);
1251 return false;
1252 };
1253
1254 // Check each function in turn, determining which pointer arguments are not
1255 // captured.
1256 for (Function *F : SCCNodes) {
1257 // We can infer and propagate function attributes only when we know that the
1258 // definition we'll get at link time is *exactly* the definition we see now.
1259 // For more details, see GlobalValue::mayBeDerefined.
1260 if (!F->hasExactDefinition())
1261 continue;
1262
1263 if (addArgumentAttrsFromCallsites(F&: *F))
1264 Changed.insert(Ptr: F);
1265
1266 // Functions that are readonly (or readnone) and nounwind and don't return
1267 // a value can't capture arguments. Don't analyze them.
1268 if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() &&
1269 F->getReturnType()->isVoidTy()) {
1270 for (Argument &A : F->args()) {
1271 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
1272 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(),
1273 CI: CaptureInfo::none()));
1274 ++NumCapturesNone;
1275 Changed.insert(Ptr: F);
1276 }
1277 }
1278 continue;
1279 }
1280
1281 for (Argument &A : F->args()) {
1282 if (!A.getType()->isPointerTy())
1283 continue;
1284 bool HasNonLocalUses = false;
1285 CaptureInfo OrigCI = A.getAttributes().getCaptureInfo();
1286 if (!capturesNothing(CC: OrigCI)) {
1287 ArgumentUsesTracker Tracker(SCCNodes);
1288 PointerMayBeCaptured(V: &A, Tracker: &Tracker);
1289 CaptureInfo NewCI = Tracker.CI & OrigCI;
1290 if (NewCI != OrigCI) {
1291 if (Tracker.Uses.empty()) {
1292 // If the information is complete, add the attribute now.
1293 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(), CI: NewCI));
1294 addCapturesStat(CI: NewCI);
1295 Changed.insert(Ptr: F);
1296 } else {
1297 // If it's not trivially captured and not trivially not captured,
1298 // then it must be calling into another function in our SCC. Save
1299 // its particulars for Argument-SCC analysis later.
1300 ArgumentGraphNode *Node = AG[&A];
1301 Node->CC = CaptureComponents(NewCI);
1302 for (Argument *Use : Tracker.Uses) {
1303 Node->Uses.push_back(Elt: AG[Use]);
1304 if (Use != &A)
1305 HasNonLocalUses = true;
1306 }
1307 }
1308 }
1309 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
1310 }
1311 if (!HasNonLocalUses && !A.onlyReadsMemory()) {
1312 // Can we determine that it's readonly/readnone/writeonly without doing
1313 // an SCC? Note that we don't allow any calls at all here, or else our
1314 // result will be dependent on the iteration order through the
1315 // functions in the SCC.
1316 if (DetermineAccessAttrsForSingleton(&A))
1317 Changed.insert(Ptr: F);
1318 }
1319 if (!SkipInitializes && !A.onlyReadsMemory()) {
1320 if (inferInitializes(A, F&: *F))
1321 Changed.insert(Ptr: F);
1322 }
1323 }
1324 }
1325
1326 // The graph we've collected is partial because we stopped scanning for
1327 // argument uses once we solved the argument trivially. These partial nodes
1328 // show up as ArgumentGraphNode objects with an empty Uses list, and for
1329 // these nodes the final decision about whether they capture has already been
1330 // made. If the definition doesn't have a 'nocapture' attribute by now, it
1331 // captures.
1332
1333 for (scc_iterator<ArgumentGraph *> I = scc_begin(G: &AG); !I.isAtEnd(); ++I) {
1334 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
1335 if (ArgumentSCC.size() == 1) {
1336 if (!ArgumentSCC[0]->Definition)
1337 continue; // synthetic root node
1338
1339 // eg. "void f(int* x) { if (...) f(x); }"
1340 if (ArgumentSCC[0]->Uses.size() == 1 &&
1341 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
1342 Argument *A = ArgumentSCC[0]->Definition;
1343 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1344 CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI;
1345 if (NewCI != OrigCI) {
1346 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1347 addCapturesStat(CI: NewCI);
1348 Changed.insert(Ptr: A->getParent());
1349 }
1350
1351 // Infer the access attributes given the new captures one
1352 if (DetermineAccessAttrsForSingleton(A))
1353 Changed.insert(Ptr: A->getParent());
1354 }
1355 continue;
1356 }
1357
1358 SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
1359 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
1360 // quickly looking up whether a given Argument is in this ArgumentSCC.
1361 for (ArgumentGraphNode *I : ArgumentSCC) {
1362 ArgumentSCCNodes.insert(Ptr: I->Definition);
1363 }
1364
1365 // At the SCC level, only track merged CaptureComponents. We're not
1366 // currently prepared to handle propagation of return-only captures across
1367 // the SCC.
1368 CaptureComponents CC = CaptureComponents::None;
1369 for (ArgumentGraphNode *N : ArgumentSCC) {
1370 for (ArgumentGraphNode *Use : N->Uses) {
1371 Argument *A = Use->Definition;
1372 if (ArgumentSCCNodes.count(Ptr: A))
1373 CC |= Use->CC;
1374 else
1375 CC |= CaptureComponents(A->getAttributes().getCaptureInfo());
1376 break;
1377 }
1378 if (capturesAll(CC))
1379 break;
1380 }
1381
1382 if (!capturesAll(CC)) {
1383 for (ArgumentGraphNode *N : ArgumentSCC) {
1384 Argument *A = N->Definition;
1385 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1386 CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI;
1387 if (NewCI != OrigCI) {
1388 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1389 addCapturesStat(CI: NewCI);
1390 Changed.insert(Ptr: A->getParent());
1391 }
1392 }
1393 }
1394
1395 if (capturesAnyProvenance(CC)) {
1396 // As the pointer provenance may be captured, determine the pointer
1397 // attributes looking at each argument individually.
1398 for (ArgumentGraphNode *N : ArgumentSCC) {
1399 if (DetermineAccessAttrsForSingleton(N->Definition))
1400 Changed.insert(Ptr: N->Definition->getParent());
1401 }
1402 continue;
1403 }
1404
1405 // We also want to compute readonly/readnone/writeonly. With a small number
1406 // of false negatives, we can assume that any pointer which is captured
1407 // isn't going to be provably readonly or readnone, since by definition
1408 // we can't analyze all uses of a captured pointer.
1409 //
1410 // The false negatives happen when the pointer is captured by a function
1411 // that promises readonly/readnone behaviour on the pointer, then the
1412 // pointer's lifetime ends before anything that writes to arbitrary memory.
1413 // Also, a readonly/readnone pointer may be returned, but returning a
1414 // pointer is capturing it.
1415
1416 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
1417 if (A == B)
1418 return A;
1419 if (A == Attribute::ReadNone)
1420 return B;
1421 if (B == Attribute::ReadNone)
1422 return A;
1423 return Attribute::None;
1424 };
1425
1426 Attribute::AttrKind AccessAttr = Attribute::ReadNone;
1427 for (ArgumentGraphNode *N : ArgumentSCC) {
1428 Argument *A = N->Definition;
1429 Attribute::AttrKind K = determinePointerAccessAttrs(A, SCCNodes: ArgumentSCCNodes);
1430 AccessAttr = meetAccessAttr(AccessAttr, K);
1431 if (AccessAttr == Attribute::None)
1432 break;
1433 }
1434
1435 if (AccessAttr != Attribute::None) {
1436 for (ArgumentGraphNode *N : ArgumentSCC) {
1437 Argument *A = N->Definition;
1438 if (addAccessAttr(A, R: AccessAttr))
1439 Changed.insert(Ptr: A->getParent());
1440 }
1441 }
1442 }
1443}
1444
1445/// Tests whether a function is "malloc-like".
1446///
1447/// A function is "malloc-like" if it returns either null or a pointer that
1448/// doesn't alias any other pointer visible to the caller.
1449static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
1450 SmallSetVector<Value *, 8> FlowsToReturn;
1451 for (BasicBlock &BB : *F)
1452 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1453 FlowsToReturn.insert(X: Ret->getReturnValue());
1454
1455 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1456 Value *RetVal = FlowsToReturn[i];
1457
1458 if (Constant *C = dyn_cast<Constant>(Val: RetVal)) {
1459 if (!C->isNullValue() && !isa<UndefValue>(Val: C))
1460 return false;
1461
1462 continue;
1463 }
1464
1465 if (isa<Argument>(Val: RetVal))
1466 return false;
1467
1468 if (Instruction *RVI = dyn_cast<Instruction>(Val: RetVal))
1469 switch (RVI->getOpcode()) {
1470 // Extend the analysis by looking upwards.
1471 case Instruction::BitCast:
1472 case Instruction::GetElementPtr:
1473 case Instruction::AddrSpaceCast:
1474 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1475 continue;
1476 case Instruction::Select: {
1477 SelectInst *SI = cast<SelectInst>(Val: RVI);
1478 FlowsToReturn.insert(X: SI->getTrueValue());
1479 FlowsToReturn.insert(X: SI->getFalseValue());
1480 continue;
1481 }
1482 case Instruction::PHI: {
1483 PHINode *PN = cast<PHINode>(Val: RVI);
1484 FlowsToReturn.insert_range(R: PN->incoming_values());
1485 continue;
1486 }
1487
1488 // Check whether the pointer came from an allocation.
1489 case Instruction::Alloca:
1490 break;
1491 case Instruction::Call:
1492 case Instruction::Invoke: {
1493 CallBase &CB = cast<CallBase>(Val&: *RVI);
1494 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1495 break;
1496 if (CB.getCalledFunction() && SCCNodes.count(key: CB.getCalledFunction()))
1497 break;
1498 [[fallthrough]];
1499 }
1500 default:
1501 return false; // Did not come from an allocation.
1502 }
1503
1504 if (PointerMayBeCaptured(V: RetVal, /*ReturnCaptures=*/false))
1505 return false;
1506 }
1507
1508 return true;
1509}
1510
1511/// Deduce noalias attributes for the SCC.
1512static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
1513 SmallSet<Function *, 8> &Changed) {
1514 // Check each function in turn, determining which functions return noalias
1515 // pointers.
1516 for (Function *F : SCCNodes) {
1517 // Already noalias.
1518 if (F->returnDoesNotAlias())
1519 continue;
1520
1521 // We can infer and propagate function attributes only when we know that the
1522 // definition we'll get at link time is *exactly* the definition we see now.
1523 // For more details, see GlobalValue::mayBeDerefined.
1524 if (!F->hasExactDefinition())
1525 return;
1526
1527 // We annotate noalias return values, which are only applicable to
1528 // pointer types.
1529 if (!F->getReturnType()->isPointerTy())
1530 continue;
1531
1532 if (!isFunctionMallocLike(F, SCCNodes))
1533 return;
1534 }
1535
1536 for (Function *F : SCCNodes) {
1537 if (F->returnDoesNotAlias() ||
1538 !F->getReturnType()->isPointerTy())
1539 continue;
1540
1541 F->setReturnDoesNotAlias();
1542 ++NumNoAlias;
1543 Changed.insert(Ptr: F);
1544 }
1545}
1546
1547/// Tests whether this function is known to not return null.
1548///
1549/// Requires that the function returns a pointer.
1550///
1551/// Returns true if it believes the function will not return a null, and sets
1552/// \p Speculative based on whether the returned conclusion is a speculative
1553/// conclusion due to SCC calls.
1554static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
1555 bool &Speculative) {
1556 assert(F->getReturnType()->isPointerTy() &&
1557 "nonnull only meaningful on pointer types");
1558 Speculative = false;
1559
1560 SmallSetVector<Value *, 8> FlowsToReturn;
1561 for (BasicBlock &BB : *F)
1562 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1563 FlowsToReturn.insert(X: Ret->getReturnValue());
1564
1565 auto &DL = F->getDataLayout();
1566
1567 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1568 Value *RetVal = FlowsToReturn[i];
1569
1570 // If this value is locally known to be non-null, we're good
1571 if (isKnownNonZero(V: RetVal, Q: DL))
1572 continue;
1573
1574 // Otherwise, we need to look upwards since we can't make any local
1575 // conclusions.
1576 Instruction *RVI = dyn_cast<Instruction>(Val: RetVal);
1577 if (!RVI)
1578 return false;
1579 switch (RVI->getOpcode()) {
1580 // Extend the analysis by looking upwards.
1581 case Instruction::BitCast:
1582 case Instruction::AddrSpaceCast:
1583 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1584 continue;
1585 case Instruction::GetElementPtr:
1586 if (cast<GEPOperator>(Val: RVI)->isInBounds()) {
1587 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1588 continue;
1589 }
1590 return false;
1591 case Instruction::Select: {
1592 SelectInst *SI = cast<SelectInst>(Val: RVI);
1593 FlowsToReturn.insert(X: SI->getTrueValue());
1594 FlowsToReturn.insert(X: SI->getFalseValue());
1595 continue;
1596 }
1597 case Instruction::PHI: {
1598 PHINode *PN = cast<PHINode>(Val: RVI);
1599 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1600 FlowsToReturn.insert(X: PN->getIncomingValue(i));
1601 continue;
1602 }
1603 case Instruction::Call:
1604 case Instruction::Invoke: {
1605 CallBase &CB = cast<CallBase>(Val&: *RVI);
1606 Function *Callee = CB.getCalledFunction();
1607 // A call to a node within the SCC is assumed to return null until
1608 // proven otherwise
1609 if (Callee && SCCNodes.count(key: Callee)) {
1610 Speculative = true;
1611 continue;
1612 }
1613 return false;
1614 }
1615 default:
1616 return false; // Unknown source, may be null
1617 };
1618 llvm_unreachable("should have either continued or returned");
1619 }
1620
1621 return true;
1622}
1623
1624/// Deduce nonnull attributes for the SCC.
1625static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
1626 SmallSet<Function *, 8> &Changed) {
1627 // Speculative that all functions in the SCC return only nonnull
1628 // pointers. We may refute this as we analyze functions.
1629 bool SCCReturnsNonNull = true;
1630
1631 // Check each function in turn, determining which functions return nonnull
1632 // pointers.
1633 for (Function *F : SCCNodes) {
1634 // Already nonnull.
1635 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull))
1636 continue;
1637
1638 // We can infer and propagate function attributes only when we know that the
1639 // definition we'll get at link time is *exactly* the definition we see now.
1640 // For more details, see GlobalValue::mayBeDerefined.
1641 if (!F->hasExactDefinition())
1642 return;
1643
1644 // We annotate nonnull return values, which are only applicable to
1645 // pointer types.
1646 if (!F->getReturnType()->isPointerTy())
1647 continue;
1648
1649 bool Speculative = false;
1650 if (isReturnNonNull(F, SCCNodes, Speculative)) {
1651 if (!Speculative) {
1652 // Mark the function eagerly since we may discover a function
1653 // which prevents us from speculating about the entire SCC
1654 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
1655 << " as nonnull\n");
1656 F->addRetAttr(Kind: Attribute::NonNull);
1657 ++NumNonNullReturn;
1658 Changed.insert(Ptr: F);
1659 }
1660 continue;
1661 }
1662 // At least one function returns something which could be null, can't
1663 // speculate any more.
1664 SCCReturnsNonNull = false;
1665 }
1666
1667 if (SCCReturnsNonNull) {
1668 for (Function *F : SCCNodes) {
1669 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull) ||
1670 !F->getReturnType()->isPointerTy())
1671 continue;
1672
1673 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
1674 F->addRetAttr(Kind: Attribute::NonNull);
1675 ++NumNonNullReturn;
1676 Changed.insert(Ptr: F);
1677 }
1678 }
1679}
1680
1681/// Deduce noundef attributes for the SCC.
1682static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
1683 SmallSet<Function *, 8> &Changed) {
1684 // Check each function in turn, determining which functions return noundef
1685 // values.
1686 for (Function *F : SCCNodes) {
1687 // Already noundef.
1688 AttributeList Attrs = F->getAttributes();
1689 if (Attrs.hasRetAttr(Kind: Attribute::NoUndef))
1690 continue;
1691
1692 // We can infer and propagate function attributes only when we know that the
1693 // definition we'll get at link time is *exactly* the definition we see now.
1694 // For more details, see GlobalValue::mayBeDerefined.
1695 if (!F->hasExactDefinition())
1696 return;
1697
1698 // MemorySanitizer assumes that the definition and declaration of a
1699 // function will be consistent. A function with sanitize_memory attribute
1700 // should be skipped from inference.
1701 if (F->hasFnAttribute(Kind: Attribute::SanitizeMemory))
1702 continue;
1703
1704 if (F->getReturnType()->isVoidTy())
1705 continue;
1706
1707 const DataLayout &DL = F->getDataLayout();
1708 if (all_of(Range&: *F, P: [&](BasicBlock &BB) {
1709 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1710 // TODO: perform context-sensitive analysis?
1711 Value *RetVal = Ret->getReturnValue();
1712 if (!isGuaranteedNotToBeUndefOrPoison(V: RetVal))
1713 return false;
1714
1715 // We know the original return value is not poison now, but it
1716 // could still be converted to poison by another return attribute.
1717 // Try to explicitly re-prove the relevant attributes.
1718 if (Attrs.hasRetAttr(Kind: Attribute::NonNull) &&
1719 !isKnownNonZero(V: RetVal, Q: DL))
1720 return false;
1721
1722 if (MaybeAlign Align = Attrs.getRetAlignment())
1723 if (RetVal->getPointerAlignment(DL) < *Align)
1724 return false;
1725
1726 Attribute Attr = Attrs.getRetAttr(Kind: Attribute::Range);
1727 if (Attr.isValid() &&
1728 !Attr.getRange().contains(
1729 CR: computeConstantRange(V: RetVal, /*ForSigned=*/false)))
1730 return false;
1731 }
1732 return true;
1733 })) {
1734 F->addRetAttr(Kind: Attribute::NoUndef);
1735 ++NumNoUndefReturn;
1736 Changed.insert(Ptr: F);
1737 }
1738 }
1739}
1740
1741namespace {
1742
1743/// Collects a set of attribute inference requests and performs them all in one
1744/// go on a single SCC Node. Inference involves scanning function bodies
1745/// looking for instructions that violate attribute assumptions.
1746/// As soon as all the bodies are fine we are free to set the attribute.
1747/// Customization of inference for individual attributes is performed by
1748/// providing a handful of predicates for each attribute.
1749class AttributeInferer {
1750public:
1751 /// Describes a request for inference of a single attribute.
1752 struct InferenceDescriptor {
1753
1754 /// Returns true if this function does not have to be handled.
1755 /// General intent for this predicate is to provide an optimization
1756 /// for functions that do not need this attribute inference at all
1757 /// (say, for functions that already have the attribute).
1758 std::function<bool(const Function &)> SkipFunction;
1759
1760 /// Returns true if this instruction violates attribute assumptions.
1761 std::function<bool(Instruction &)> InstrBreaksAttribute;
1762
1763 /// Sets the inferred attribute for this function.
1764 std::function<void(Function &)> SetAttribute;
1765
1766 /// Attribute we derive.
1767 Attribute::AttrKind AKind;
1768
1769 /// If true, only "exact" definitions can be used to infer this attribute.
1770 /// See GlobalValue::isDefinitionExact.
1771 bool RequiresExactDefinition;
1772
1773 InferenceDescriptor(Attribute::AttrKind AK,
1774 std::function<bool(const Function &)> SkipFunc,
1775 std::function<bool(Instruction &)> InstrScan,
1776 std::function<void(Function &)> SetAttr,
1777 bool ReqExactDef)
1778 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
1779 SetAttribute(SetAttr), AKind(AK),
1780 RequiresExactDefinition(ReqExactDef) {}
1781 };
1782
1783private:
1784 SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
1785
1786public:
1787 void registerAttrInference(InferenceDescriptor AttrInference) {
1788 InferenceDescriptors.push_back(Elt: AttrInference);
1789 }
1790
1791 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
1792};
1793
1794/// Perform all the requested attribute inference actions according to the
1795/// attribute predicates stored before.
1796void AttributeInferer::run(const SCCNodeSet &SCCNodes,
1797 SmallSet<Function *, 8> &Changed) {
1798 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
1799 // Go through all the functions in SCC and check corresponding attribute
1800 // assumptions for each of them. Attributes that are invalid for this SCC
1801 // will be removed from InferInSCC.
1802 for (Function *F : SCCNodes) {
1803
1804 // No attributes whose assumptions are still valid - done.
1805 if (InferInSCC.empty())
1806 return;
1807
1808 // Check if our attributes ever need scanning/can be scanned.
1809 llvm::erase_if(C&: InferInSCC, P: [F](const InferenceDescriptor &ID) {
1810 if (ID.SkipFunction(*F))
1811 return false;
1812
1813 // Remove from further inference (invalidate) when visiting a function
1814 // that has no instructions to scan/has an unsuitable definition.
1815 return F->isDeclaration() ||
1816 (ID.RequiresExactDefinition && !F->hasExactDefinition());
1817 });
1818
1819 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1820 // set up the F instructions scan to verify assumptions of the attribute.
1821 SmallVector<InferenceDescriptor, 4> InferInThisFunc;
1822 llvm::copy_if(
1823 Range&: InferInSCC, Out: std::back_inserter(x&: InferInThisFunc),
1824 P: [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
1825
1826 if (InferInThisFunc.empty())
1827 continue;
1828
1829 // Start instruction scan.
1830 for (Instruction &I : instructions(F&: *F)) {
1831 llvm::erase_if(C&: InferInThisFunc, P: [&](const InferenceDescriptor &ID) {
1832 if (!ID.InstrBreaksAttribute(I))
1833 return false;
1834 // Remove attribute from further inference on any other functions
1835 // because attribute assumptions have just been violated.
1836 llvm::erase_if(C&: InferInSCC, P: [&ID](const InferenceDescriptor &D) {
1837 return D.AKind == ID.AKind;
1838 });
1839 // Remove attribute from the rest of current instruction scan.
1840 return true;
1841 });
1842
1843 if (InferInThisFunc.empty())
1844 break;
1845 }
1846 }
1847
1848 if (InferInSCC.empty())
1849 return;
1850
1851 for (Function *F : SCCNodes)
1852 // At this point InferInSCC contains only functions that were either:
1853 // - explicitly skipped from scan/inference, or
1854 // - verified to have no instructions that break attribute assumptions.
1855 // Hence we just go and force the attribute for all non-skipped functions.
1856 for (auto &ID : InferInSCC) {
1857 if (ID.SkipFunction(*F))
1858 continue;
1859 Changed.insert(Ptr: F);
1860 ID.SetAttribute(*F);
1861 }
1862}
1863
1864struct SCCNodesResult {
1865 SCCNodeSet SCCNodes;
1866 bool HasUnknownCall;
1867};
1868
1869} // end anonymous namespace
1870
1871/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
1872static bool InstrBreaksNonConvergent(Instruction &I,
1873 const SCCNodeSet &SCCNodes) {
1874 const CallBase *CB = dyn_cast<CallBase>(Val: &I);
1875 // Breaks non-convergent assumption if CS is a convergent call to a function
1876 // not in the SCC.
1877 return CB && CB->isConvergent() &&
1878 !SCCNodes.contains(key: CB->getCalledFunction());
1879}
1880
1881/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
1882static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
1883 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
1884 return false;
1885 if (const auto *CI = dyn_cast<CallInst>(Val: &I)) {
1886 if (Function *Callee = CI->getCalledFunction()) {
1887 // I is a may-throw call to a function inside our SCC. This doesn't
1888 // invalidate our current working assumption that the SCC is no-throw; we
1889 // just have to scan that other function.
1890 if (SCCNodes.contains(key: Callee))
1891 return false;
1892 }
1893 }
1894 return true;
1895}
1896
1897/// Helper for NoFree inference predicate InstrBreaksAttribute.
1898static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
1899 CallBase *CB = dyn_cast<CallBase>(Val: &I);
1900 if (!CB)
1901 return false;
1902
1903 if (CB->hasFnAttr(Kind: Attribute::NoFree))
1904 return false;
1905
1906 // Speculatively assume in SCC.
1907 if (Function *Callee = CB->getCalledFunction())
1908 if (SCCNodes.contains(key: Callee))
1909 return false;
1910
1911 return true;
1912}
1913
1914// Return true if this is an atomic which has an ordering stronger than
1915// unordered. Note that this is different than the predicate we use in
1916// Attributor. Here we chose to be conservative and consider monotonic
1917// operations potentially synchronizing. We generally don't do much with
1918// monotonic operations, so this is simply risk reduction.
1919static bool isOrderedAtomic(Instruction *I) {
1920 if (!I->isAtomic())
1921 return false;
1922
1923 if (auto *FI = dyn_cast<FenceInst>(Val: I))
1924 // All legal orderings for fence are stronger than monotonic.
1925 return FI->getSyncScopeID() != SyncScope::SingleThread;
1926 else if (isa<AtomicCmpXchgInst>(Val: I) || isa<AtomicRMWInst>(Val: I))
1927 return true;
1928 else if (auto *SI = dyn_cast<StoreInst>(Val: I))
1929 return !SI->isUnordered();
1930 else if (auto *LI = dyn_cast<LoadInst>(Val: I))
1931 return !LI->isUnordered();
1932 else {
1933 llvm_unreachable("unknown atomic instruction?");
1934 }
1935}
1936
1937static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
1938 // Volatile may synchronize
1939 if (I.isVolatile())
1940 return true;
1941
1942 // An ordered atomic may synchronize. (See comment about on monotonic.)
1943 if (isOrderedAtomic(I: &I))
1944 return true;
1945
1946 auto *CB = dyn_cast<CallBase>(Val: &I);
1947 if (!CB)
1948 // Non call site cases covered by the two checks above
1949 return false;
1950
1951 if (CB->hasFnAttr(Kind: Attribute::NoSync))
1952 return false;
1953
1954 // Non volatile memset/memcpy/memmoves are nosync
1955 // NOTE: Only intrinsics with volatile flags should be handled here. All
1956 // others should be marked in Intrinsics.td.
1957 if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I))
1958 if (!MI->isVolatile())
1959 return false;
1960
1961 // Speculatively assume in SCC.
1962 if (Function *Callee = CB->getCalledFunction())
1963 if (SCCNodes.contains(key: Callee))
1964 return false;
1965
1966 return true;
1967}
1968
1969/// Attempt to remove convergent function attribute when possible.
1970///
1971/// Returns true if any changes to function attributes were made.
1972static void inferConvergent(const SCCNodeSet &SCCNodes,
1973 SmallSet<Function *, 8> &Changed) {
1974 AttributeInferer AI;
1975
1976 // Request to remove the convergent attribute from all functions in the SCC
1977 // if every callsite within the SCC is not convergent (except for calls
1978 // to functions within the SCC).
1979 // Note: Removal of the attr from the callsites will happen in
1980 // InstCombineCalls separately.
1981 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1982 Attribute::Convergent,
1983 // Skip non-convergent functions.
1984 [](const Function &F) { return !F.isConvergent(); },
1985 // Instructions that break non-convergent assumption.
1986 [SCCNodes](Instruction &I) {
1987 return InstrBreaksNonConvergent(I, SCCNodes);
1988 },
1989 [](Function &F) {
1990 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
1991 << "\n");
1992 F.setNotConvergent();
1993 },
1994 /* RequiresExactDefinition= */ false});
1995 // Perform all the requested attribute inference actions.
1996 AI.run(SCCNodes, Changed);
1997}
1998
1999/// Infer attributes from all functions in the SCC by scanning every
2000/// instruction for compliance to the attribute assumptions.
2001///
2002/// Returns true if any changes to function attributes were made.
2003static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
2004 SmallSet<Function *, 8> &Changed) {
2005 AttributeInferer AI;
2006
2007 if (!DisableNoUnwindInference)
2008 // Request to infer nounwind attribute for all the functions in the SCC if
2009 // every callsite within the SCC is not throwing (except for calls to
2010 // functions within the SCC). Note that nounwind attribute suffers from
2011 // derefinement - results may change depending on how functions are
2012 // optimized. Thus it can be inferred only from exact definitions.
2013 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2014 Attribute::NoUnwind,
2015 // Skip non-throwing functions.
2016 [](const Function &F) { return F.doesNotThrow(); },
2017 // Instructions that break non-throwing assumption.
2018 [&SCCNodes](Instruction &I) {
2019 return InstrBreaksNonThrowing(I, SCCNodes);
2020 },
2021 [](Function &F) {
2022 LLVM_DEBUG(dbgs()
2023 << "Adding nounwind attr to fn " << F.getName() << "\n");
2024 F.setDoesNotThrow();
2025 ++NumNoUnwind;
2026 },
2027 /* RequiresExactDefinition= */ true});
2028
2029 if (!DisableNoFreeInference)
2030 // Request to infer nofree attribute for all the functions in the SCC if
2031 // every callsite within the SCC does not directly or indirectly free
2032 // memory (except for calls to functions within the SCC). Note that nofree
2033 // attribute suffers from derefinement - results may change depending on
2034 // how functions are optimized. Thus it can be inferred only from exact
2035 // definitions.
2036 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2037 Attribute::NoFree,
2038 // Skip functions known not to free memory.
2039 [](const Function &F) { return F.doesNotFreeMemory(); },
2040 // Instructions that break non-deallocating assumption.
2041 [&SCCNodes](Instruction &I) {
2042 return InstrBreaksNoFree(I, SCCNodes);
2043 },
2044 [](Function &F) {
2045 LLVM_DEBUG(dbgs()
2046 << "Adding nofree attr to fn " << F.getName() << "\n");
2047 F.setDoesNotFreeMemory();
2048 ++NumNoFree;
2049 },
2050 /* RequiresExactDefinition= */ true});
2051
2052 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2053 Attribute::NoSync,
2054 // Skip already marked functions.
2055 [](const Function &F) { return F.hasNoSync(); },
2056 // Instructions that break nosync assumption.
2057 [&SCCNodes](Instruction &I) {
2058 return InstrBreaksNoSync(I, SCCNodes);
2059 },
2060 [](Function &F) {
2061 LLVM_DEBUG(dbgs()
2062 << "Adding nosync attr to fn " << F.getName() << "\n");
2063 F.setNoSync();
2064 ++NumNoSync;
2065 },
2066 /* RequiresExactDefinition= */ true});
2067
2068 // Perform all the requested attribute inference actions.
2069 AI.run(SCCNodes, Changed);
2070}
2071
2072static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
2073 SmallSet<Function *, 8> &Changed) {
2074 // Try and identify functions that do not recurse.
2075
2076 // If the SCC contains multiple nodes we know for sure there is recursion.
2077 if (SCCNodes.size() != 1)
2078 return;
2079
2080 Function *F = *SCCNodes.begin();
2081 if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
2082 return;
2083
2084 // If all of the calls in F are identifiable and are to norecurse functions, F
2085 // is norecurse. This check also detects self-recursion as F is not currently
2086 // marked norecurse, so any called from F to F will not be marked norecurse.
2087 for (auto &BB : *F)
2088 for (auto &I : BB.instructionsWithoutDebug())
2089 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
2090 Function *Callee = CB->getCalledFunction();
2091 if (!Callee || Callee == F ||
2092 (!Callee->doesNotRecurse() &&
2093 !(Callee->isDeclaration() &&
2094 Callee->hasFnAttribute(Kind: Attribute::NoCallback))))
2095 // Function calls a potentially recursive function.
2096 return;
2097 }
2098
2099 // Every call was to a non-recursive function other than this function, and
2100 // we have no indirect recursion as the SCC size is one. This function cannot
2101 // recurse.
2102 F->setDoesNotRecurse();
2103 ++NumNoRecurse;
2104 Changed.insert(Ptr: F);
2105}
2106
2107// Set the noreturn function attribute if possible.
2108static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
2109 SmallSet<Function *, 8> &Changed) {
2110 for (Function *F : SCCNodes) {
2111 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2112 F->doesNotReturn())
2113 continue;
2114
2115 if (!canReturn(F: *F)) {
2116 F->setDoesNotReturn();
2117 Changed.insert(Ptr: F);
2118 }
2119 }
2120}
2121
2122static bool allPathsGoThroughCold(Function &F) {
2123 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths;
2124 ColdPaths[&F.front()] = false;
2125 SmallVector<BasicBlock *> Jobs;
2126 Jobs.push_back(Elt: &F.front());
2127
2128 while (!Jobs.empty()) {
2129 BasicBlock *BB = Jobs.pop_back_val();
2130
2131 // If block contains a cold callsite this path through the CG is cold.
2132 // Ignore whether the instructions actually are guaranteed to transfer
2133 // execution. Divergent behavior is considered unlikely.
2134 if (any_of(Range&: *BB, P: [](Instruction &I) {
2135 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2136 return CB->hasFnAttr(Kind: Attribute::Cold);
2137 return false;
2138 })) {
2139 ColdPaths[BB] = true;
2140 continue;
2141 }
2142
2143 auto Succs = successors(BB);
2144 // We found a path that doesn't go through any cold callsite.
2145 if (Succs.empty())
2146 return false;
2147
2148 // We didn't find a cold callsite in this BB, so check that all successors
2149 // contain a cold callsite (or that their successors do).
2150 // Potential TODO: We could use static branch hints to assume certain
2151 // successor paths are inherently cold, irrespective of if they contain a
2152 // cold callsite.
2153 for (BasicBlock *Succ : Succs) {
2154 // Start with false, this is necessary to ensure we don't turn loops into
2155 // cold.
2156 auto [Iter, Inserted] = ColdPaths.try_emplace(Key: Succ, Args: false);
2157 if (!Inserted) {
2158 if (Iter->second)
2159 continue;
2160 return false;
2161 }
2162 Jobs.push_back(Elt: Succ);
2163 }
2164 }
2165 return true;
2166}
2167
2168// Set the cold function attribute if possible.
2169static void addColdAttrs(const SCCNodeSet &SCCNodes,
2170 SmallSet<Function *, 8> &Changed) {
2171 for (Function *F : SCCNodes) {
2172 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2173 F->hasFnAttribute(Kind: Attribute::Cold) || F->hasFnAttribute(Kind: Attribute::Hot))
2174 continue;
2175
2176 // Potential TODO: We could add attribute `cold` on functions with `coldcc`.
2177 if (allPathsGoThroughCold(F&: *F)) {
2178 F->addFnAttr(Kind: Attribute::Cold);
2179 ++NumCold;
2180 Changed.insert(Ptr: F);
2181 continue;
2182 }
2183 }
2184}
2185
2186static bool functionWillReturn(const Function &F) {
2187 // We can infer and propagate function attributes only when we know that the
2188 // definition we'll get at link time is *exactly* the definition we see now.
2189 // For more details, see GlobalValue::mayBeDerefined.
2190 if (!F.hasExactDefinition())
2191 return false;
2192
2193 // Must-progress function without side-effects must return.
2194 if (F.mustProgress() && F.onlyReadsMemory())
2195 return true;
2196
2197 // Can only analyze functions with a definition.
2198 if (F.isDeclaration())
2199 return false;
2200
2201 // Functions with loops require more sophisticated analysis, as the loop
2202 // may be infinite. For now, don't try to handle them.
2203 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
2204 FindFunctionBackedges(F, Result&: Backedges);
2205 if (!Backedges.empty())
2206 return false;
2207
2208 // If there are no loops, then the function is willreturn if all calls in
2209 // it are willreturn.
2210 return all_of(Range: instructions(F), P: [](const Instruction &I) {
2211 return I.willReturn();
2212 });
2213}
2214
2215// Set the willreturn function attribute if possible.
2216static void addWillReturn(const SCCNodeSet &SCCNodes,
2217 SmallSet<Function *, 8> &Changed) {
2218 for (Function *F : SCCNodes) {
2219 if (!F || F->willReturn() || !functionWillReturn(F: *F))
2220 continue;
2221
2222 F->setWillReturn();
2223 NumWillReturn++;
2224 Changed.insert(Ptr: F);
2225 }
2226}
2227
2228static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
2229 SCCNodesResult Res;
2230 Res.HasUnknownCall = false;
2231 for (Function *F : Functions) {
2232 if (!F || F->hasOptNone() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2233 F->isPresplitCoroutine()) {
2234 // Treat any function we're trying not to optimize as if it were an
2235 // indirect call and omit it from the node set used below.
2236 Res.HasUnknownCall = true;
2237 continue;
2238 }
2239 // Track whether any functions in this SCC have an unknown call edge.
2240 // Note: if this is ever a performance hit, we can common it with
2241 // subsequent routines which also do scans over the instructions of the
2242 // function.
2243 if (!Res.HasUnknownCall) {
2244 for (Instruction &I : instructions(F&: *F)) {
2245 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
2246 if (!CB->getCalledFunction()) {
2247 Res.HasUnknownCall = true;
2248 break;
2249 }
2250 }
2251 }
2252 }
2253 Res.SCCNodes.insert(X: F);
2254 }
2255 return Res;
2256}
2257
2258template <typename AARGetterT>
2259static SmallSet<Function *, 8>
2260deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
2261 bool ArgAttrsOnly) {
2262 SCCNodesResult Nodes = createSCCNodeSet(Functions);
2263
2264 // Bail if the SCC only contains optnone functions.
2265 if (Nodes.SCCNodes.empty())
2266 return {};
2267
2268 SmallSet<Function *, 8> Changed;
2269 if (ArgAttrsOnly) {
2270 // ArgAttrsOnly means to only infer attributes that may aid optimizations
2271 // on the *current* function. "initializes" attribute is to aid
2272 // optimizations (like DSE) on the callers, so skip "initializes" here.
2273 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
2274 return Changed;
2275 }
2276
2277 addArgumentReturnedAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2278 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
2279 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
2280 inferConvergent(SCCNodes: Nodes.SCCNodes, Changed);
2281 addNoReturnAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2282 addColdAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2283 addWillReturn(SCCNodes: Nodes.SCCNodes, Changed);
2284 addNoUndefAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2285
2286 // If we have no external nodes participating in the SCC, we can deduce some
2287 // more precise attributes as well.
2288 if (!Nodes.HasUnknownCall) {
2289 addNoAliasAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2290 addNonNullAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2291 inferAttrsFromFunctionBodies(SCCNodes: Nodes.SCCNodes, Changed);
2292 addNoRecurseAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2293 }
2294
2295 // Finally, infer the maximal set of attributes from the ones we've inferred
2296 // above. This is handling the cases where one attribute on a signature
2297 // implies another, but for implementation reasons the inference rule for
2298 // the later is missing (or simply less sophisticated).
2299 for (Function *F : Nodes.SCCNodes)
2300 if (F)
2301 if (inferAttributesFromOthers(F&: *F))
2302 Changed.insert(Ptr: F);
2303
2304 return Changed;
2305}
2306
2307PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
2308 CGSCCAnalysisManager &AM,
2309 LazyCallGraph &CG,
2310 CGSCCUpdateResult &) {
2311 // Skip non-recursive functions if requested.
2312 // Only infer argument attributes for non-recursive functions, because
2313 // it can affect optimization behavior in conjunction with noalias.
2314 bool ArgAttrsOnly = false;
2315 if (C.size() == 1 && SkipNonRecursive) {
2316 LazyCallGraph::Node &N = *C.begin();
2317 if (!N->lookup(N))
2318 ArgAttrsOnly = true;
2319 }
2320
2321 FunctionAnalysisManager &FAM =
2322 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2323
2324 // We pass a lambda into functions to wire them up to the analysis manager
2325 // for getting function analyses.
2326 auto AARGetter = [&](Function &F) -> AAResults & {
2327 return FAM.getResult<AAManager>(IR&: F);
2328 };
2329
2330 SmallVector<Function *, 8> Functions;
2331 for (LazyCallGraph::Node &N : C) {
2332 Functions.push_back(Elt: &N.getFunction());
2333 }
2334
2335 auto ChangedFunctions =
2336 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
2337 if (ChangedFunctions.empty())
2338 return PreservedAnalyses::all();
2339
2340 // Invalidate analyses for modified functions so that we don't have to
2341 // invalidate all analyses for all functions in this SCC.
2342 PreservedAnalyses FuncPA;
2343 // We haven't changed the CFG for modified functions.
2344 FuncPA.preserveSet<CFGAnalyses>();
2345 for (Function *Changed : ChangedFunctions) {
2346 FAM.invalidate(IR&: *Changed, PA: FuncPA);
2347 // Also invalidate any direct callers of changed functions since analyses
2348 // may care about attributes of direct callees. For example, MemorySSA cares
2349 // about whether or not a call's callee modifies memory and queries that
2350 // through function attributes.
2351 for (auto *U : Changed->users()) {
2352 if (auto *Call = dyn_cast<CallBase>(Val: U)) {
2353 if (Call->getCalledFunction() == Changed)
2354 FAM.invalidate(IR&: *Call->getFunction(), PA: FuncPA);
2355 }
2356 }
2357 }
2358
2359 PreservedAnalyses PA;
2360 // We have not added or removed functions.
2361 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
2362 // We already invalidated all relevant function analyses above.
2363 PA.preserveSet<AllAnalysesOn<Function>>();
2364 return PA;
2365}
2366
2367void PostOrderFunctionAttrsPass::printPipeline(
2368 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
2369 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
2370 OS, MapClassName2PassName);
2371 if (SkipNonRecursive)
2372 OS << "<skip-non-recursive-function-attrs>";
2373}
2374
2375template <typename AARGetterT>
2376static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
2377 SmallVector<Function *, 8> Functions;
2378 for (CallGraphNode *I : SCC) {
2379 Functions.push_back(Elt: I->getFunction());
2380 }
2381
2382 return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
2383}
2384
2385static bool addNoRecurseAttrsTopDown(Function &F) {
2386 // We check the preconditions for the function prior to calling this to avoid
2387 // the cost of building up a reversible post-order list. We assert them here
2388 // to make sure none of the invariants this relies on were violated.
2389 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
2390 assert(!F.doesNotRecurse() &&
2391 "This function has already been deduced as norecurs!");
2392 assert(F.hasInternalLinkage() &&
2393 "Can only do top-down deduction for internal linkage functions!");
2394
2395 // If F is internal and all of its uses are calls from a non-recursive
2396 // functions, then none of its calls could in fact recurse without going
2397 // through a function marked norecurse, and so we can mark this function too
2398 // as norecurse. Note that the uses must actually be calls -- otherwise
2399 // a pointer to this function could be returned from a norecurse function but
2400 // this function could be recursively (indirectly) called. Note that this
2401 // also detects if F is directly recursive as F is not yet marked as
2402 // a norecurse function.
2403 for (auto &U : F.uses()) {
2404 auto *I = dyn_cast<Instruction>(Val: U.getUser());
2405 if (!I)
2406 return false;
2407 CallBase *CB = dyn_cast<CallBase>(Val: I);
2408 if (!CB || !CB->isCallee(U: &U) ||
2409 !CB->getParent()->getParent()->doesNotRecurse())
2410 return false;
2411 }
2412 F.setDoesNotRecurse();
2413 ++NumNoRecurse;
2414 return true;
2415}
2416
2417static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
2418 // We only have a post-order SCC traversal (because SCCs are inherently
2419 // discovered in post-order), so we accumulate them in a vector and then walk
2420 // it in reverse. This is simpler than using the RPO iterator infrastructure
2421 // because we need to combine SCC detection and the PO walk of the call
2422 // graph. We can also cheat egregiously because we're primarily interested in
2423 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
2424 // with multiple functions in them will clearly be recursive.
2425
2426 SmallVector<Function *, 16> Worklist;
2427 CG.buildRefSCCs();
2428 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2429 for (LazyCallGraph::SCC &SCC : RC) {
2430 if (SCC.size() != 1)
2431 continue;
2432 Function &F = SCC.begin()->getFunction();
2433 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())
2434 Worklist.push_back(Elt: &F);
2435 }
2436 }
2437 bool Changed = false;
2438 for (auto *F : llvm::reverse(C&: Worklist))
2439 Changed |= addNoRecurseAttrsTopDown(F&: *F);
2440
2441 return Changed;
2442}
2443
2444PreservedAnalyses
2445ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
2446 auto &CG = AM.getResult<LazyCallGraphAnalysis>(IR&: M);
2447
2448 if (!deduceFunctionAttributeInRPO(M, CG))
2449 return PreservedAnalyses::all();
2450
2451 PreservedAnalyses PA;
2452 PA.preserve<LazyCallGraphAnalysis>();
2453 return PA;
2454}
2455