1//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements interprocedural passes which walk the
11/// call-graph deducing and/or propagating function attributes.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/FunctionAttrs.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SCCIterator.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/BasicAliasAnalysis.h"
27#include "llvm/Analysis/CFG.h"
28#include "llvm/Analysis/CGSCCPassManager.h"
29#include "llvm/Analysis/CallGraph.h"
30#include "llvm/Analysis/CallGraphSCCPass.h"
31#include "llvm/Analysis/CaptureTracking.h"
32#include "llvm/Analysis/LazyCallGraph.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/ConstantRangeList.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Instructions.h"
46#include "llvm/IR/IntrinsicInst.h"
47#include "llvm/IR/Metadata.h"
48#include "llvm/IR/ModuleSummaryIndex.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/PatternMatch.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Use.h"
53#include "llvm/IR/User.h"
54#include "llvm/IR/Value.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/CommandLine.h"
57#include "llvm/Support/Compiler.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/raw_ostream.h"
61#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/Utils/Local.h"
63#include <cassert>
64#include <iterator>
65#include <map>
66#include <optional>
67#include <vector>
68
69using namespace llvm;
70using namespace llvm::PatternMatch;
71
72#define DEBUG_TYPE "function-attrs"
73
74STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
75STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)");
76STATISTIC(NumCapturesPartial, "Number of arguments marked with captures "
77 "attribute other than captures(none)");
78STATISTIC(NumReturned, "Number of arguments marked returned");
79STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
80STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
81STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
82STATISTIC(NumNoAlias, "Number of function returns marked noalias");
83STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
84STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
85STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
86STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
87STATISTIC(NumNoFree, "Number of functions marked as nofree");
88STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
89STATISTIC(NumNoSync, "Number of functions marked as nosync");
90STATISTIC(NumCold, "Number of functions marked as cold");
91
92STATISTIC(NumThinLinkNoRecurse,
93 "Number of functions marked as norecurse during thinlink");
94STATISTIC(NumThinLinkNoUnwind,
95 "Number of functions marked as nounwind during thinlink");
96
97static cl::opt<bool> EnablePoisonArgAttrPropagation(
98 "enable-poison-arg-attr-prop", cl::init(Val: true), cl::Hidden,
99 cl::desc("Try to propagate nonnull and nofpclass argument attributes from "
100 "callsites to caller functions."));
101
102static cl::opt<bool> DisableNoUnwindInference(
103 "disable-nounwind-inference", cl::Hidden,
104 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
105
106static cl::opt<bool> DisableNoFreeInference(
107 "disable-nofree-inference", cl::Hidden,
108 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
109
110static cl::opt<bool> DisableThinLTOPropagation(
111 "disable-thinlto-funcattrs", cl::init(Val: true), cl::Hidden,
112 cl::desc("Don't propagate function-attrs in thinLTO"));
113
114static void addCapturesStat(CaptureInfo CI) {
115 if (capturesNothing(CC: CI))
116 ++NumCapturesNone;
117 else
118 ++NumCapturesPartial;
119}
120
121namespace {
122
123using SCCNodeSet = SmallSetVector<Function *, 8>;
124
125} // end anonymous namespace
126
127static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
128 ModRefInfo MR, AAResults &AAR) {
129 // Ignore accesses to known-invariant or local memory.
130 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/IgnoreLocals: true);
131 if (isNoModRef(MRI: MR))
132 return;
133
134 const Value *UO = getUnderlyingObjectAggressive(V: Loc.Ptr);
135 if (isa<AllocaInst>(Val: UO))
136 return;
137 if (isa<Argument>(Val: UO)) {
138 ME |= MemoryEffects::argMemOnly(MR);
139 return;
140 }
141
142 // If it's not an identified object, it might be an argument.
143 if (!isIdentifiedObject(V: UO))
144 ME |= MemoryEffects::argMemOnly(MR);
145 ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR);
146 ME |= MemoryEffects(IRMemLocation::Other, MR);
147}
148
149static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
150 ModRefInfo ArgMR, AAResults &AAR) {
151 for (const Value *Arg : Call->args()) {
152 if (!Arg->getType()->isPtrOrPtrVectorTy())
153 continue;
154
155 addLocAccess(ME,
156 Loc: MemoryLocation::getBeforeOrAfter(Ptr: Arg, AATags: Call->getAAMetadata()),
157 MR: ArgMR, AAR);
158 }
159}
160
161/// Returns the memory access attribute for function F using AAR for AA results,
162/// where SCCNodes is the current SCC.
163///
164/// If ThisBody is true, this function may examine the function body and will
165/// return a result pertaining to this copy of the function. If it is false, the
166/// result will be based only on AA results for the function declaration; it
167/// will be assumed that some other (perhaps less optimized) version of the
168/// function may be selected at link time.
169///
170/// The return value is split into two parts: Memory effects that always apply,
171/// and additional memory effects that apply if any of the functions in the SCC
172/// can access argmem.
173static std::pair<MemoryEffects, MemoryEffects>
174checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
175 const SCCNodeSet &SCCNodes) {
176 MemoryEffects OrigME = AAR.getMemoryEffects(F: &F);
177 if (OrigME.doesNotAccessMemory())
178 // Already perfect!
179 return {OrigME, MemoryEffects::none()};
180
181 if (!ThisBody)
182 return {OrigME, MemoryEffects::none()};
183
184 MemoryEffects ME = MemoryEffects::none();
185 // Additional locations accessed if the SCC accesses argmem.
186 MemoryEffects RecursiveArgME = MemoryEffects::none();
187
188 // Inalloca and preallocated arguments are always clobbered by the call.
189 if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
190 F.getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
191 ME |= MemoryEffects::argMemOnly(MR: ModRefInfo::ModRef);
192
193 // Scan the function body for instructions that may read or write memory.
194 for (Instruction &I : instructions(F)) {
195 // Some instructions can be ignored even if they read or write memory.
196 // Detect these now, skipping to the next instruction if one is found.
197 if (auto *Call = dyn_cast<CallBase>(Val: &I)) {
198 // We can optimistically ignore calls to functions in the same SCC, with
199 // two caveats:
200 // * Calls with operand bundles may have additional effects.
201 // * Argument memory accesses may imply additional effects depending on
202 // what the argument location is.
203 if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
204 SCCNodes.count(key: Call->getCalledFunction())) {
205 // Keep track of which additional locations are accessed if the SCC
206 // turns out to access argmem.
207 addArgLocs(ME&: RecursiveArgME, Call, ArgMR: ModRefInfo::ModRef, AAR);
208 continue;
209 }
210
211 MemoryEffects CallME = AAR.getMemoryEffects(Call);
212
213 // If the call doesn't access memory, we're done.
214 if (CallME.doesNotAccessMemory())
215 continue;
216
217 // A pseudo probe call shouldn't change any function attribute since it
218 // doesn't translate to a real instruction. It comes with a memory access
219 // tag to prevent itself being removed by optimizations and not block
220 // other instructions being optimized.
221 if (isa<PseudoProbeInst>(Val: I))
222 continue;
223
224 // Merge callee's memory effects into caller's ones, including
225 // inaccessible and errno memory, but excluding argument memory, which is
226 // handled separately.
227 ME |= CallME.getWithoutLoc(Loc: IRMemLocation::ArgMem);
228
229 // If the call accesses captured memory (currently part of "other") and
230 // an argument is captured (currently not tracked), then it may also
231 // access argument memory.
232 ModRefInfo OtherMR = CallME.getModRef(Loc: IRMemLocation::Other);
233 ME |= MemoryEffects::argMemOnly(MR: OtherMR);
234
235 // Check whether all pointer arguments point to local memory, and
236 // ignore calls that only access local memory.
237 ModRefInfo ArgMR = CallME.getModRef(Loc: IRMemLocation::ArgMem);
238 if (ArgMR != ModRefInfo::NoModRef)
239 addArgLocs(ME, Call, ArgMR, AAR);
240 continue;
241 }
242
243 ModRefInfo MR = ModRefInfo::NoModRef;
244 if (I.mayWriteToMemory())
245 MR |= ModRefInfo::Mod;
246 if (I.mayReadFromMemory())
247 MR |= ModRefInfo::Ref;
248 if (MR == ModRefInfo::NoModRef)
249 continue;
250
251 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(Inst: &I);
252 if (!Loc) {
253 // If no location is known, conservatively assume anything can be
254 // accessed.
255 ME |= MemoryEffects(MR);
256 continue;
257 }
258
259 // Volatile operations may access inaccessible memory.
260 if (I.isVolatile())
261 ME |= MemoryEffects::inaccessibleMemOnly(MR);
262
263 addLocAccess(ME, Loc: *Loc, MR, AAR);
264 }
265
266 return {OrigME & ME, RecursiveArgME};
267}
268
269MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
270 AAResults &AAR) {
271 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, SCCNodes: {}).first;
272}
273
274/// Deduce readonly/readnone/writeonly attributes for the SCC.
275template <typename AARGetterT>
276static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
277 SmallPtrSet<Function *, 8> &Changed) {
278 MemoryEffects ME = MemoryEffects::none();
279 MemoryEffects RecursiveArgME = MemoryEffects::none();
280 for (Function *F : SCCNodes) {
281 // Call the callable parameter to look up AA results for this function.
282 AAResults &AAR = AARGetter(*F);
283 // Non-exact function definitions may not be selected at link time, and an
284 // alternative version that writes to memory may be selected. See the
285 // comment on GlobalValue::isDefinitionExact for more details.
286 auto [FnME, FnRecursiveArgME] =
287 checkFunctionMemoryAccess(F&: *F, ThisBody: F->hasExactDefinition(), AAR, SCCNodes);
288 ME |= FnME;
289 RecursiveArgME |= FnRecursiveArgME;
290 // Reached bottom of the lattice, we will not be able to improve the result.
291 if (ME == MemoryEffects::unknown())
292 return;
293 }
294
295 // If the SCC accesses argmem, add recursive accesses resulting from that.
296 ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
297 if (ArgMR != ModRefInfo::NoModRef)
298 ME |= RecursiveArgME & MemoryEffects(ArgMR);
299
300 for (Function *F : SCCNodes) {
301 MemoryEffects OldME = F->getMemoryEffects();
302 MemoryEffects NewME = ME & OldME;
303 if (NewME != OldME) {
304 ++NumMemoryAttr;
305 F->setMemoryEffects(NewME);
306 // Remove conflicting writable attributes.
307 if (!isModSet(MRI: NewME.getModRef(Loc: IRMemLocation::ArgMem)))
308 for (Argument &A : F->args())
309 A.removeAttr(Kind: Attribute::Writable);
310 Changed.insert(Ptr: F);
311 }
312 }
313}
314
315// Compute definitive function attributes for a function taking into account
316// prevailing definitions and linkage types
317static FunctionSummary *calculatePrevailingSummary(
318 ValueInfo VI,
319 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
320 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
321 IsPrevailing) {
322
323 auto [It, Inserted] = CachedPrevailingSummary.try_emplace(Key: VI);
324 if (!Inserted)
325 return It->second;
326
327 /// At this point, prevailing symbols have been resolved. The following leads
328 /// to returning a conservative result:
329 /// - Multiple instances with local linkage. Normally local linkage would be
330 /// unique per module
331 /// as the GUID includes the module path. We could have a guid alias if
332 /// there wasn't any distinguishing path when each file was compiled, but
333 /// that should be rare so we'll punt on those.
334
335 /// These next 2 cases should not happen and will assert:
336 /// - Multiple instances with external linkage. This should be caught in
337 /// symbol resolution
338 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
339 /// knowledge meaning we have to go conservative.
340
341 /// Otherwise, we calculate attributes for a function as:
342 /// 1. If we have a local linkage, take its attributes. If there's somehow
343 /// multiple, bail and go conservative.
344 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
345 /// prevailing, take its attributes.
346 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
347 /// differences. However, if the prevailing copy is known it will be used
348 /// so take its attributes. If the prevailing copy is in a native file
349 /// all IR copies will be dead and propagation will go conservative.
350 /// 4. AvailableExternally summaries without a prevailing copy are known to
351 /// occur in a couple of circumstances:
352 /// a. An internal function gets imported due to its caller getting
353 /// imported, it becomes AvailableExternally but no prevailing
354 /// definition exists. Because it has to get imported along with its
355 /// caller the attributes will be captured by propagating on its
356 /// caller.
357 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
358 /// definitions of explicitly instanced template declarations
359 /// for inlining which are ultimately dropped from the TU. Since this
360 /// is localized to the TU the attributes will have already made it to
361 /// the callers.
362 /// These are edge cases and already captured by their callers so we
363 /// ignore these for now. If they become relevant to optimize in the
364 /// future this can be revisited.
365 /// 5. Otherwise, go conservative.
366
367 FunctionSummary *Local = nullptr;
368 FunctionSummary *Prevailing = nullptr;
369
370 for (const auto &GVS : VI.getSummaryList()) {
371 if (!GVS->isLive())
372 continue;
373
374 FunctionSummary *FS = dyn_cast<FunctionSummary>(Val: GVS->getBaseObject());
375 // Virtual and Unknown (e.g. indirect) calls require going conservative
376 if (!FS || FS->fflags().HasUnknownCall)
377 return nullptr;
378
379 const auto &Linkage = GVS->linkage();
380 if (GlobalValue::isLocalLinkage(Linkage)) {
381 if (Local) {
382 LLVM_DEBUG(
383 dbgs()
384 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
385 "function "
386 << VI.name() << " from " << FS->modulePath() << ". Previous module "
387 << Local->modulePath() << "\n");
388 return nullptr;
389 }
390 Local = FS;
391 } else if (GlobalValue::isExternalLinkage(Linkage)) {
392 assert(IsPrevailing(VI.getGUID(), GVS.get()) || GVS->wasPromoted());
393 Prevailing = FS;
394 break;
395 } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
396 GlobalValue::isLinkOnceODRLinkage(Linkage) ||
397 GlobalValue::isWeakAnyLinkage(Linkage) ||
398 GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
399 if (IsPrevailing(VI.getGUID(), GVS.get())) {
400 Prevailing = FS;
401 break;
402 }
403 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
404 // TODO: Handle these cases if they become meaningful
405 continue;
406 }
407 }
408
409 auto &CPS = CachedPrevailingSummary[VI];
410 if (Local) {
411 assert(!Prevailing);
412 CPS = Local;
413 } else if (Prevailing) {
414 assert(!Local);
415 CPS = Prevailing;
416 }
417
418 return CPS;
419}
420
421bool llvm::thinLTOPropagateFunctionAttrs(
422 ModuleSummaryIndex &Index,
423 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
424 IsPrevailing) {
425 // TODO: implement addNoAliasAttrs once
426 // there's more information about the return type in the summary
427 if (DisableThinLTOPropagation)
428 return false;
429
430 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
431 bool Changed = false;
432
433 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
434 // Assume we can propagate unless we discover otherwise
435 FunctionSummary::FFlags InferredFlags;
436 InferredFlags.NoRecurse = (SCCNodes.size() == 1);
437 InferredFlags.NoUnwind = true;
438
439 for (auto &V : SCCNodes) {
440 FunctionSummary *CallerSummary =
441 calculatePrevailingSummary(VI: V, CachedPrevailingSummary, IsPrevailing);
442
443 // Function summaries can fail to contain information such as declarations
444 if (!CallerSummary)
445 return;
446
447 if (CallerSummary->fflags().MayThrow)
448 InferredFlags.NoUnwind = false;
449
450 for (const auto &Callee : CallerSummary->calls()) {
451 FunctionSummary *CalleeSummary = calculatePrevailingSummary(
452 VI: Callee.first, CachedPrevailingSummary, IsPrevailing);
453
454 if (!CalleeSummary)
455 return;
456
457 if (!CalleeSummary->fflags().NoRecurse)
458 InferredFlags.NoRecurse = false;
459
460 if (!CalleeSummary->fflags().NoUnwind)
461 InferredFlags.NoUnwind = false;
462
463 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
464 break;
465 }
466 }
467
468 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
469 Changed = true;
470 for (auto &V : SCCNodes) {
471 if (InferredFlags.NoRecurse) {
472 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
473 << V.name() << "\n");
474 ++NumThinLinkNoRecurse;
475 }
476
477 if (InferredFlags.NoUnwind) {
478 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
479 << V.name() << "\n");
480 ++NumThinLinkNoUnwind;
481 }
482
483 for (const auto &S : V.getSummaryList()) {
484 if (auto *FS = dyn_cast<FunctionSummary>(Val: S.get())) {
485 if (InferredFlags.NoRecurse)
486 FS->setNoRecurse();
487
488 if (InferredFlags.NoUnwind)
489 FS->setNoUnwind();
490 }
491 }
492 }
493 }
494 };
495
496 // Call propagation functions on each SCC in the Index
497 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(G: &Index); !I.isAtEnd();
498 ++I) {
499 std::vector<ValueInfo> Nodes(*I);
500 PropagateAttributes(Nodes);
501 }
502 return Changed;
503}
504
505namespace {
506
507/// For a given pointer Argument, this retains a list of Arguments of functions
508/// in the same SCC that the pointer data flows into. We use this to build an
509/// SCC of the arguments.
510struct ArgumentGraphNode {
511 Argument *Definition;
512 /// CaptureComponents for this argument, excluding captures via Uses.
513 /// We don't distinguish between other/return captures here.
514 CaptureComponents CC = CaptureComponents::None;
515 SmallVector<ArgumentGraphNode *, 4> Uses;
516};
517
518class ArgumentGraph {
519 // We store pointers to ArgumentGraphNode objects, so it's important that
520 // that they not move around upon insert.
521 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
522
523 ArgumentMapTy ArgumentMap;
524
525 // There is no root node for the argument graph, in fact:
526 // void f(int *x, int *y) { if (...) f(x, y); }
527 // is an example where the graph is disconnected. The SCCIterator requires a
528 // single entry point, so we maintain a fake ("synthetic") root node that
529 // uses every node. Because the graph is directed and nothing points into
530 // the root, it will not participate in any SCCs (except for its own).
531 ArgumentGraphNode SyntheticRoot;
532
533public:
534 ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
535
536 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
537
538 iterator begin() { return SyntheticRoot.Uses.begin(); }
539 iterator end() { return SyntheticRoot.Uses.end(); }
540 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
541
542 ArgumentGraphNode *operator[](Argument *A) {
543 ArgumentGraphNode &Node = ArgumentMap[A];
544 Node.Definition = A;
545 SyntheticRoot.Uses.push_back(Elt: &Node);
546 return &Node;
547 }
548};
549
550/// This tracker checks whether callees are in the SCC, and if so it does not
551/// consider that a capture, instead adding it to the "Uses" list and
552/// continuing with the analysis.
553struct ArgumentUsesTracker : public CaptureTracker {
554 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
555
556 void tooManyUses() override { CI = CaptureInfo::all(); }
557
558 Action captured(const Use *U, UseCaptureInfo UseCI) override {
559 if (updateCaptureInfo(U, CC: UseCI.UseCC)) {
560 // Don't bother continuing if we already capture everything.
561 if (capturesAll(CC: CI.getOtherComponents()))
562 return Stop;
563 return Continue;
564 }
565
566 // For SCC argument tracking, we're not going to analyze other/ret
567 // components separately, so don't follow the return value.
568 return ContinueIgnoringReturn;
569 }
570
571 bool updateCaptureInfo(const Use *U, CaptureComponents CC) {
572 CallBase *CB = dyn_cast<CallBase>(Val: U->getUser());
573 if (!CB) {
574 if (isa<ReturnInst>(Val: U->getUser()))
575 CI |= CaptureInfo::retOnly(RetComponents: CC);
576 else
577 // Conservatively assume that the captured value might make its way
578 // into the return value as well. This could be made more precise.
579 CI |= CaptureInfo(CC);
580 return true;
581 }
582
583 Function *F = CB->getCalledFunction();
584 if (!F || !F->hasExactDefinition() || !SCCNodes.count(key: F)) {
585 CI |= CaptureInfo(CC);
586 return true;
587 }
588
589 assert(!CB->isCallee(U) && "callee operand reported captured?");
590 const unsigned UseIndex = CB->getDataOperandNo(U);
591 if (UseIndex >= CB->arg_size()) {
592 // Data operand, but not a argument operand -- must be a bundle operand
593 assert(CB->hasOperandBundles() && "Must be!");
594
595 // CaptureTracking told us that we're being captured by an operand bundle
596 // use. In this case it does not matter if the callee is within our SCC
597 // or not -- we've been captured in some unknown way, and we have to be
598 // conservative.
599 CI |= CaptureInfo(CC);
600 return true;
601 }
602
603 if (UseIndex >= F->arg_size()) {
604 assert(F->isVarArg() && "More params than args in non-varargs call");
605 CI |= CaptureInfo(CC);
606 return true;
607 }
608
609 // TODO(captures): Could improve precision by remembering maximum
610 // capture components for the argument.
611 Uses.push_back(Elt: &*std::next(x: F->arg_begin(), n: UseIndex));
612 return false;
613 }
614
615 // Does not include potential captures via Uses in the SCC.
616 CaptureInfo CI = CaptureInfo::none();
617
618 // Uses within our SCC.
619 SmallVector<Argument *, 4> Uses;
620
621 const SCCNodeSet &SCCNodes;
622};
623
624/// A struct of argument use: a Use and the offset it accesses. This struct
625/// is to track uses inside function via GEP. If GEP has a non-constant index,
626/// the Offset field is nullopt.
627struct ArgumentUse {
628 Use *U;
629 std::optional<int64_t> Offset;
630};
631
632/// A struct of argument access info. "Unknown" accesses are the cases like
633/// unrecognized instructions, instructions that have more than one use of
634/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
635/// instructions that not only write an argument but also capture it.
636struct ArgumentAccessInfo {
637 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
638 AccessType ArgAccessType;
639 ConstantRangeList AccessRanges;
640};
641
642/// A struct to wrap the argument use info per block.
643struct UsesPerBlockInfo {
644 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
645 bool HasWrites = false;
646 bool HasUnknownAccess = false;
647};
648
649/// A struct to summarize the argument use info in a function.
650struct ArgumentUsesSummary {
651 bool HasAnyWrite = false;
652 bool HasWriteOutsideEntryBB = false;
653 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
654};
655
656ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I,
657 const ArgumentUse &ArgUse,
658 const DataLayout &DL) {
659 auto GetTypeAccessRange =
660 [&DL](Type *Ty,
661 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
662 auto TypeSize = DL.getTypeStoreSize(Ty);
663 if (!TypeSize.isScalable() && Offset) {
664 int64_t Size = TypeSize.getFixedValue();
665 APInt Low(64, *Offset, true);
666 bool Overflow;
667 APInt High = Low.sadd_ov(RHS: APInt(64, Size, true), Overflow);
668 // Bail if the range overflows signed 64-bit int.
669 if (Overflow)
670 return std::nullopt;
671 return ConstantRange(Low, High);
672 }
673 return std::nullopt;
674 };
675 auto GetConstantIntRange =
676 [](Value *Length,
677 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
678 auto *ConstantLength = dyn_cast<ConstantInt>(Val: Length);
679 if (ConstantLength && Offset) {
680 int64_t Len = ConstantLength->getSExtValue();
681
682 // Reject zero or negative lengths
683 if (Len <= 0)
684 return std::nullopt;
685
686 APInt Low(64, *Offset, true);
687 bool Overflow;
688 APInt High = Low.sadd_ov(RHS: APInt(64, Len, true), Overflow);
689 if (Overflow)
690 return std::nullopt;
691
692 return ConstantRange(Low, High);
693 }
694 return std::nullopt;
695 };
696
697 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
698 if (SI->isSimple() && &SI->getOperandUse(i: 1) == ArgUse.U) {
699 // Get the fixed type size of "SI". Since the access range of a write
700 // will be unioned, if "SI" doesn't have a fixed type size, we just set
701 // the access range to empty.
702 ConstantRangeList AccessRanges;
703 if (auto TypeAccessRange =
704 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
705 AccessRanges.insert(NewRange: *TypeAccessRange);
706 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: std::move(AccessRanges)};
707 }
708 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
709 if (LI->isSimple()) {
710 assert(&LI->getOperandUse(0) == ArgUse.U);
711 // Get the fixed type size of "LI". Different from Write, if "LI"
712 // doesn't have a fixed type size, we conservatively set as a clobber
713 // with an empty access range.
714 if (auto TypeAccessRange =
715 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
716 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*TypeAccessRange}};
717 }
718 } else if (auto *MemSet = dyn_cast<MemSetInst>(Val: I)) {
719 if (!MemSet->isVolatile()) {
720 ConstantRangeList AccessRanges;
721 if (auto AccessRange =
722 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
723 AccessRanges.insert(NewRange: *AccessRange);
724 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
725 }
726 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
727 if (!MTI->isVolatile()) {
728 if (&MTI->getOperandUse(i: 0) == ArgUse.U) {
729 ConstantRangeList AccessRanges;
730 if (auto AccessRange =
731 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
732 AccessRanges.insert(NewRange: *AccessRange);
733 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
734 } else if (&MTI->getOperandUse(i: 1) == ArgUse.U) {
735 if (auto AccessRange =
736 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
737 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*AccessRange}};
738 }
739 }
740 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
741 if (CB->isArgOperand(U: ArgUse.U) &&
742 !CB->isByValArgument(ArgNo: CB->getArgOperandNo(U: ArgUse.U))) {
743 unsigned ArgNo = CB->getArgOperandNo(U: ArgUse.U);
744 bool IsInitialize = CB->paramHasAttr(ArgNo, Kind: Attribute::Initializes);
745 if (IsInitialize && ArgUse.Offset) {
746 // Argument is a Write when parameter is writeonly/readnone
747 // and nocapture. Otherwise, it's a WriteWithSideEffect.
748 auto Access = CB->onlyWritesMemory(OpNo: ArgNo) && CB->doesNotCapture(OpNo: ArgNo)
749 ? ArgumentAccessInfo::AccessType::Write
750 : ArgumentAccessInfo::AccessType::WriteWithSideEffect;
751 ConstantRangeList AccessRanges;
752 Attribute Attr = CB->getParamAttr(ArgNo, Kind: Attribute::Initializes);
753 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
754 for (ConstantRange &CR : CBCRL)
755 AccessRanges.insert(NewRange: ConstantRange(CR.getLower() + *ArgUse.Offset,
756 CR.getUpper() + *ArgUse.Offset));
757 return {.ArgAccessType: Access, .AccessRanges: AccessRanges};
758 }
759 }
760 }
761 // Other unrecognized instructions are considered as unknown.
762 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
763}
764
765// Collect the uses of argument "A" in "F".
766ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
767 auto &DL = F.getParent()->getDataLayout();
768 unsigned PointerSize =
769 DL.getIndexSizeInBits(AS: A.getType()->getPointerAddressSpace());
770 ArgumentUsesSummary Result;
771
772 BasicBlock &EntryBB = F.getEntryBlock();
773 SmallVector<ArgumentUse, 4> Worklist;
774 for (Use &U : A.uses())
775 Worklist.push_back(Elt: {.U: &U, .Offset: 0});
776
777 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
778 // Return true if the block of "I" has write accesses after updating.
779 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
780 auto *BB = I->getParent();
781 auto &BBInfo = Result.UsesPerBlock[BB];
782 auto [It, Inserted] = BBInfo.Insts.try_emplace(Key: I);
783 auto &IInfo = It->second;
784
785 // Instructions that have more than one use of the argument are considered
786 // as clobbers.
787 if (!Inserted) {
788 IInfo = {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
789 BBInfo.HasUnknownAccess = true;
790 return false;
791 }
792
793 IInfo = std::move(Info);
794 BBInfo.HasUnknownAccess |=
795 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
796 bool InfoHasWrites =
797 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
798 IInfo.ArgAccessType ==
799 ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
800 !IInfo.AccessRanges.empty();
801 BBInfo.HasWrites |= InfoHasWrites;
802 return InfoHasWrites;
803 };
804
805 // No need for a visited set because we don't look through phis, so there are
806 // no cycles.
807 while (!Worklist.empty()) {
808 ArgumentUse ArgUse = Worklist.pop_back_val();
809 User *U = ArgUse.U->getUser();
810 // Add GEP uses to worklist.
811 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
812 if (auto *GEP = dyn_cast<GEPOperator>(Val: U)) {
813 std::optional<int64_t> NewOffset = std::nullopt;
814 if (ArgUse.Offset) {
815 APInt Offset(PointerSize, 0);
816 if (GEP->accumulateConstantOffset(DL, Offset))
817 NewOffset = *ArgUse.Offset + Offset.getSExtValue();
818 }
819 for (Use &U : GEP->uses())
820 Worklist.push_back(Elt: {.U: &U, .Offset: NewOffset});
821 continue;
822 }
823
824 auto *I = cast<Instruction>(Val: U);
825 bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL));
826
827 Result.HasAnyWrite |= HasWrite;
828
829 if (HasWrite && I->getParent() != &EntryBB)
830 Result.HasWriteOutsideEntryBB = true;
831 }
832 return Result;
833}
834
835} // end anonymous namespace
836
837namespace llvm {
838
839template <> struct GraphTraits<ArgumentGraphNode *> {
840 using NodeRef = ArgumentGraphNode *;
841 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
842
843 static NodeRef getEntryNode(NodeRef A) { return A; }
844 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
845 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
846};
847
848template <>
849struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
850 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
851
852 static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
853 return AG->begin();
854 }
855
856 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
857};
858
859} // end namespace llvm
860
861/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
862static Attribute::AttrKind
863determinePointerAccessAttrs(Argument *A,
864 const SmallPtrSet<Argument *, 8> &SCCNodes) {
865 SmallVector<Use *, 32> Worklist;
866 SmallPtrSet<Use *, 32> Visited;
867
868 // inalloca arguments are always clobbered by the call.
869 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
870 return Attribute::None;
871
872 bool IsRead = false;
873 bool IsWrite = false;
874
875 for (Use &U : A->uses()) {
876 Visited.insert(Ptr: &U);
877 Worklist.push_back(Elt: &U);
878 }
879
880 while (!Worklist.empty()) {
881 if (IsWrite && IsRead)
882 // No point in searching further..
883 return Attribute::None;
884
885 Use *U = Worklist.pop_back_val();
886 Instruction *I = cast<Instruction>(Val: U->getUser());
887
888 switch (I->getOpcode()) {
889 case Instruction::BitCast:
890 case Instruction::GetElementPtr:
891 case Instruction::PHI:
892 case Instruction::Select:
893 case Instruction::AddrSpaceCast:
894 // The original value is not read/written via this if the new value isn't.
895 for (Use &UU : I->uses())
896 if (Visited.insert(Ptr: &UU).second)
897 Worklist.push_back(Elt: &UU);
898 break;
899
900 case Instruction::Call:
901 case Instruction::Invoke: {
902 CallBase &CB = cast<CallBase>(Val&: *I);
903 if (CB.isCallee(U)) {
904 IsRead = true;
905 // Note that indirect calls do not capture, see comment in
906 // CaptureTracking for context
907 continue;
908 }
909
910 // Given we've explicitly handled the callee operand above, what's left
911 // must be a data operand (e.g. argument or operand bundle)
912 const unsigned UseIndex = CB.getDataOperandNo(U);
913
914 // Some intrinsics (for instance ptrmask) do not capture their results,
915 // but return results thas alias their pointer argument, and thus should
916 // be handled like GEP or addrspacecast above.
917 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
918 Call: &CB, /*MustPreserveNullness=*/false)) {
919 for (Use &UU : CB.uses())
920 if (Visited.insert(Ptr: &UU).second)
921 Worklist.push_back(Elt: &UU);
922 } else if (capturesAnyProvenance(CC: CB.getCaptureInfo(OpNo: UseIndex))) {
923 if (!CB.onlyReadsMemory())
924 // If the callee can save a copy into other memory, then simply
925 // scanning uses of the call is insufficient. We have no way
926 // of tracking copies of the pointer through memory to see
927 // if a reloaded copy is written to, thus we must give up.
928 return Attribute::None;
929 // Push users for processing once we finish this one
930 if (!I->getType()->isVoidTy())
931 for (Use &UU : I->uses())
932 if (Visited.insert(Ptr: &UU).second)
933 Worklist.push_back(Elt: &UU);
934 }
935
936 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(Loc: IRMemLocation::ArgMem);
937 if (isNoModRef(MRI: ArgMR))
938 continue;
939
940 if (Function *F = CB.getCalledFunction())
941 if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
942 SCCNodes.count(Ptr: F->getArg(i: UseIndex)))
943 // This is an argument which is part of the speculative SCC. Note
944 // that only operands corresponding to formal arguments of the callee
945 // can participate in the speculation.
946 break;
947
948 // The accessors used on call site here do the right thing for calls and
949 // invokes with operand bundles.
950 if (CB.doesNotAccessMemory(OpNo: UseIndex)) {
951 /* nop */
952 } else if (!isModSet(MRI: ArgMR) || CB.onlyReadsMemory(OpNo: UseIndex)) {
953 IsRead = true;
954 } else if (!isRefSet(MRI: ArgMR) ||
955 CB.dataOperandHasImpliedAttr(i: UseIndex, Kind: Attribute::WriteOnly)) {
956 IsWrite = true;
957 } else {
958 return Attribute::None;
959 }
960 break;
961 }
962
963 case Instruction::Load:
964 // A volatile load has side effects beyond what readonly can be relied
965 // upon.
966 if (cast<LoadInst>(Val: I)->isVolatile())
967 return Attribute::None;
968
969 IsRead = true;
970 break;
971
972 case Instruction::Store:
973 if (cast<StoreInst>(Val: I)->getValueOperand() == *U)
974 // untrackable capture
975 return Attribute::None;
976
977 // A volatile store has side effects beyond what writeonly can be relied
978 // upon.
979 if (cast<StoreInst>(Val: I)->isVolatile())
980 return Attribute::None;
981
982 IsWrite = true;
983 break;
984
985 case Instruction::ICmp:
986 case Instruction::Ret:
987 break;
988
989 default:
990 return Attribute::None;
991 }
992 }
993
994 if (IsWrite && IsRead)
995 return Attribute::None;
996 else if (IsRead)
997 return Attribute::ReadOnly;
998 else if (IsWrite)
999 return Attribute::WriteOnly;
1000 else
1001 return Attribute::ReadNone;
1002}
1003
1004/// Deduce returned attributes for the SCC.
1005static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
1006 SmallPtrSet<Function *, 8> &Changed) {
1007 // Check each function in turn, determining if an argument is always returned.
1008 for (Function *F : SCCNodes) {
1009 // We can infer and propagate function attributes only when we know that the
1010 // definition we'll get at link time is *exactly* the definition we see now.
1011 // For more details, see GlobalValue::mayBeDerefined.
1012 if (!F->hasExactDefinition())
1013 continue;
1014
1015 if (F->getReturnType()->isVoidTy())
1016 continue;
1017
1018 // There is nothing to do if an argument is already marked as 'returned'.
1019 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::Returned))
1020 continue;
1021
1022 auto FindRetArg = [&]() -> Argument * {
1023 Argument *RetArg = nullptr;
1024 for (BasicBlock &BB : *F)
1025 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1026 // Note that stripPointerCasts should look through functions with
1027 // returned arguments.
1028 auto *RetVal =
1029 dyn_cast<Argument>(Val: Ret->getReturnValue()->stripPointerCasts());
1030 if (!RetVal || RetVal->getType() != F->getReturnType())
1031 return nullptr;
1032
1033 if (!RetArg)
1034 RetArg = RetVal;
1035 else if (RetArg != RetVal)
1036 return nullptr;
1037 }
1038
1039 return RetArg;
1040 };
1041
1042 if (Argument *RetArg = FindRetArg()) {
1043 RetArg->addAttr(Kind: Attribute::Returned);
1044 ++NumReturned;
1045 Changed.insert(Ptr: F);
1046 }
1047 }
1048}
1049
1050/// If a callsite has arguments that are also arguments to the parent function,
1051/// try to propagate attributes from the callsite's arguments to the parent's
1052/// arguments. This may be important because inlining can cause information loss
1053/// when attribute knowledge disappears with the inlined call.
1054static bool addArgumentAttrsFromCallsites(Function &F) {
1055 if (!EnablePoisonArgAttrPropagation)
1056 return false;
1057
1058 bool Changed = false;
1059
1060 // For an argument attribute to transfer from a callsite to the parent, the
1061 // call must be guaranteed to execute every time the parent is called.
1062 // Conservatively, just check for calls in the entry block that are guaranteed
1063 // to execute.
1064 // TODO: This could be enhanced by testing if the callsite post-dominates the
1065 // entry block or by doing simple forward walks or backward walks to the
1066 // callsite.
1067 BasicBlock &Entry = F.getEntryBlock();
1068 for (Instruction &I : Entry) {
1069 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
1070 if (auto *CalledFunc = CB->getCalledFunction()) {
1071 for (auto &CSArg : CalledFunc->args()) {
1072 unsigned ArgNo = CSArg.getArgNo();
1073 auto *FArg = dyn_cast<Argument>(Val: CB->getArgOperand(i: ArgNo));
1074 if (!FArg)
1075 continue;
1076
1077 if (CSArg.hasNonNullAttr(/*AllowUndefOrPoison=*/false)) {
1078 // If the non-null callsite argument operand is an argument to 'F'
1079 // (the caller) and the call is guaranteed to execute, then the
1080 // value must be non-null throughout 'F'.
1081 if (!FArg->hasNonNullAttr()) {
1082 FArg->addAttr(Kind: Attribute::NonNull);
1083 Changed = true;
1084 }
1085 } else if (FPClassTest CSNoFPClass = CB->getParamNoFPClass(i: ArgNo);
1086 CSNoFPClass != fcNone &&
1087 CB->paramHasAttr(ArgNo, Kind: Attribute::NoUndef)) {
1088 FPClassTest ArgNoFPClass = FArg->getNoFPClass();
1089
1090 if ((CSNoFPClass | ArgNoFPClass) != ArgNoFPClass) {
1091 FArg->addAttr(Attr: Attribute::getWithNoFPClass(
1092 Context&: FArg->getContext(), Mask: CSNoFPClass | ArgNoFPClass));
1093 Changed = true;
1094 }
1095 }
1096 }
1097 }
1098 }
1099 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
1100 break;
1101 }
1102
1103 return Changed;
1104}
1105
1106static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
1107 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
1108 R == Attribute::WriteOnly)
1109 && "Must be an access attribute.");
1110 assert(A && "Argument must not be null.");
1111
1112 // If the argument already has the attribute, nothing needs to be done.
1113 if (A->hasAttribute(Kind: R))
1114 return false;
1115
1116 // Otherwise, remove potentially conflicting attribute, add the new one,
1117 // and update statistics.
1118 A->removeAttr(Kind: Attribute::WriteOnly);
1119 A->removeAttr(Kind: Attribute::ReadOnly);
1120 A->removeAttr(Kind: Attribute::ReadNone);
1121 // Remove conflicting writable attribute.
1122 if (R == Attribute::ReadNone || R == Attribute::ReadOnly)
1123 A->removeAttr(Kind: Attribute::Writable);
1124 A->addAttr(Kind: R);
1125 if (R == Attribute::ReadOnly)
1126 ++NumReadOnlyArg;
1127 else if (R == Attribute::WriteOnly)
1128 ++NumWriteOnlyArg;
1129 else
1130 ++NumReadNoneArg;
1131 return true;
1132}
1133
1134static bool inferInitializes(Argument &A, Function &F) {
1135 auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
1136 // No write anywhere in the function, bail.
1137 if (!ArgumentUses.HasAnyWrite)
1138 return false;
1139
1140 auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
1141 BasicBlock &EntryBB = F.getEntryBlock();
1142 // A map to store the argument ranges initialized by a BasicBlock (including
1143 // its successors).
1144 DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
1145 // Visit the successors of "BB" block and the instructions in BB (post-order)
1146 // to get the argument ranges initialized by "BB" (including its successors).
1147 // The result will be cached in "Initialized".
1148 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
1149 auto UPB = UsesPerBlock.find(Val: BB);
1150 ConstantRangeList CRL;
1151
1152 // Start with intersection of successors.
1153 // If this block has any clobbering use, we're going to clear out the
1154 // ranges at some point in this block anyway, so don't bother looking at
1155 // successors.
1156 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
1157 bool HasAddedSuccessor = false;
1158 for (auto *Succ : successors(BB)) {
1159 if (auto SuccI = Initialized.find(Val: Succ); SuccI != Initialized.end()) {
1160 if (HasAddedSuccessor) {
1161 CRL = CRL.intersectWith(CRL: SuccI->second);
1162 } else {
1163 CRL = SuccI->second;
1164 HasAddedSuccessor = true;
1165 }
1166 } else {
1167 CRL = ConstantRangeList();
1168 break;
1169 }
1170 }
1171 }
1172
1173 if (UPB != UsesPerBlock.end()) {
1174 // Sort uses in this block by instruction order.
1175 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
1176 append_range(C&: Insts, R&: UPB->second.Insts);
1177 sort(C&: Insts, Comp: [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
1178 std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
1179 return LHS.first->comesBefore(Other: RHS.first);
1180 });
1181
1182 // From the end of the block to the beginning of the block, set
1183 // initializes ranges.
1184 for (auto &[_, Info] : reverse(C&: Insts)) {
1185 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
1186 Info.ArgAccessType ==
1187 ArgumentAccessInfo::AccessType::WriteWithSideEffect)
1188 CRL = ConstantRangeList();
1189 if (!Info.AccessRanges.empty()) {
1190 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
1191 Info.ArgAccessType ==
1192 ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
1193 CRL = CRL.unionWith(CRL: Info.AccessRanges);
1194 } else {
1195 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
1196 for (const auto &ReadRange : Info.AccessRanges)
1197 CRL.subtract(SubRange: ReadRange);
1198 }
1199 }
1200 }
1201 }
1202 return CRL;
1203 };
1204
1205 ConstantRangeList EntryCRL;
1206 // If all write instructions are in the EntryBB, or if the EntryBB has
1207 // a clobbering use, we only need to look at EntryBB.
1208 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
1209 if (!OnlyScanEntryBlock)
1210 if (auto EntryUPB = UsesPerBlock.find(Val: &EntryBB);
1211 EntryUPB != UsesPerBlock.end())
1212 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
1213 if (OnlyScanEntryBlock) {
1214 EntryCRL = VisitBlock(&EntryBB);
1215 if (EntryCRL.empty())
1216 return false;
1217 } else {
1218 // Now we have to go through CFG to get the initialized argument ranges
1219 // across blocks. With dominance and post-dominance, the initialized ranges
1220 // by a block include both accesses inside this block and accesses in its
1221 // (transitive) successors. So visit successors before predecessors with a
1222 // post-order walk of the blocks and memorize the results in "Initialized".
1223 for (const BasicBlock *BB : post_order(G: &F)) {
1224 ConstantRangeList CRL = VisitBlock(BB);
1225 if (!CRL.empty())
1226 Initialized[BB] = CRL;
1227 }
1228
1229 auto EntryCRLI = Initialized.find(Val: &EntryBB);
1230 if (EntryCRLI == Initialized.end())
1231 return false;
1232
1233 EntryCRL = EntryCRLI->second;
1234 }
1235
1236 assert(!EntryCRL.empty() &&
1237 "should have bailed already if EntryCRL is empty");
1238
1239 if (A.hasAttribute(Kind: Attribute::Initializes)) {
1240 ConstantRangeList PreviousCRL =
1241 A.getAttribute(Kind: Attribute::Initializes).getValueAsConstantRangeList();
1242 if (PreviousCRL == EntryCRL)
1243 return false;
1244 EntryCRL = EntryCRL.unionWith(CRL: PreviousCRL);
1245 }
1246
1247 A.addAttr(Attr: Attribute::get(Context&: A.getContext(), Kind: Attribute::Initializes,
1248 Val: EntryCRL.rangesRef()));
1249
1250 return true;
1251}
1252
1253/// Deduce nocapture attributes for the SCC.
1254static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
1255 SmallPtrSet<Function *, 8> &Changed,
1256 bool SkipInitializes) {
1257 ArgumentGraph AG;
1258
1259 auto DetermineAccessAttrsForSingleton = [](Argument *A) {
1260 SmallPtrSet<Argument *, 8> Self;
1261 Self.insert(Ptr: A);
1262 Attribute::AttrKind R = determinePointerAccessAttrs(A, SCCNodes: Self);
1263 if (R != Attribute::None)
1264 return addAccessAttr(A, R);
1265 return false;
1266 };
1267
1268 // Check each function in turn, determining which pointer arguments are not
1269 // captured.
1270 for (Function *F : SCCNodes) {
1271 // We can infer and propagate function attributes only when we know that the
1272 // definition we'll get at link time is *exactly* the definition we see now.
1273 // For more details, see GlobalValue::mayBeDerefined.
1274 if (!F->hasExactDefinition())
1275 continue;
1276
1277 if (addArgumentAttrsFromCallsites(F&: *F))
1278 Changed.insert(Ptr: F);
1279
1280 // Functions that are readonly (or readnone) and nounwind and don't return
1281 // a value can't capture arguments. Don't analyze them.
1282 if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() &&
1283 F->getReturnType()->isVoidTy()) {
1284 for (Argument &A : F->args()) {
1285 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
1286 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(),
1287 CI: CaptureInfo::none()));
1288 ++NumCapturesNone;
1289 Changed.insert(Ptr: F);
1290 }
1291 }
1292 continue;
1293 }
1294
1295 for (Argument &A : F->args()) {
1296 if (!A.getType()->isPointerTy())
1297 continue;
1298 bool HasNonLocalUses = false;
1299 CaptureInfo OrigCI = A.getAttributes().getCaptureInfo();
1300 if (!capturesNothing(CC: OrigCI)) {
1301 ArgumentUsesTracker Tracker(SCCNodes);
1302 PointerMayBeCaptured(V: &A, Tracker: &Tracker);
1303 CaptureInfo NewCI = Tracker.CI & OrigCI;
1304 if (NewCI != OrigCI) {
1305 if (Tracker.Uses.empty()) {
1306 // If the information is complete, add the attribute now.
1307 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(), CI: NewCI));
1308 addCapturesStat(CI: NewCI);
1309 Changed.insert(Ptr: F);
1310 } else {
1311 // If it's not trivially captured and not trivially not captured,
1312 // then it must be calling into another function in our SCC. Save
1313 // its particulars for Argument-SCC analysis later.
1314 ArgumentGraphNode *Node = AG[&A];
1315 Node->CC = CaptureComponents(NewCI);
1316 for (Argument *Use : Tracker.Uses) {
1317 Node->Uses.push_back(Elt: AG[Use]);
1318 if (Use != &A)
1319 HasNonLocalUses = true;
1320 }
1321 }
1322 }
1323 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
1324 }
1325 if (!HasNonLocalUses && !A.onlyReadsMemory()) {
1326 // Can we determine that it's readonly/readnone/writeonly without doing
1327 // an SCC? Note that we don't allow any calls at all here, or else our
1328 // result will be dependent on the iteration order through the
1329 // functions in the SCC.
1330 if (DetermineAccessAttrsForSingleton(&A))
1331 Changed.insert(Ptr: F);
1332 }
1333 if (!SkipInitializes && !A.onlyReadsMemory()) {
1334 if (inferInitializes(A, F&: *F))
1335 Changed.insert(Ptr: F);
1336 }
1337 }
1338 }
1339
1340 // The graph we've collected is partial because we stopped scanning for
1341 // argument uses once we solved the argument trivially. These partial nodes
1342 // show up as ArgumentGraphNode objects with an empty Uses list, and for
1343 // these nodes the final decision about whether they capture has already been
1344 // made. If the definition doesn't have a 'nocapture' attribute by now, it
1345 // captures.
1346
1347 for (scc_iterator<ArgumentGraph *> I = scc_begin(G: &AG); !I.isAtEnd(); ++I) {
1348 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
1349 if (ArgumentSCC.size() == 1) {
1350 if (!ArgumentSCC[0]->Definition)
1351 continue; // synthetic root node
1352
1353 // eg. "void f(int* x) { if (...) f(x); }"
1354 if (ArgumentSCC[0]->Uses.size() == 1 &&
1355 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
1356 Argument *A = ArgumentSCC[0]->Definition;
1357 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1358 CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI;
1359 if (NewCI != OrigCI) {
1360 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1361 addCapturesStat(CI: NewCI);
1362 Changed.insert(Ptr: A->getParent());
1363 }
1364
1365 // Infer the access attributes given the new captures one
1366 if (DetermineAccessAttrsForSingleton(A))
1367 Changed.insert(Ptr: A->getParent());
1368 }
1369 continue;
1370 }
1371
1372 SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
1373 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
1374 // quickly looking up whether a given Argument is in this ArgumentSCC.
1375 for (ArgumentGraphNode *I : ArgumentSCC) {
1376 ArgumentSCCNodes.insert(Ptr: I->Definition);
1377 }
1378
1379 // At the SCC level, only track merged CaptureComponents. We're not
1380 // currently prepared to handle propagation of return-only captures across
1381 // the SCC.
1382 CaptureComponents CC = CaptureComponents::None;
1383 for (ArgumentGraphNode *N : ArgumentSCC) {
1384 for (ArgumentGraphNode *Use : N->Uses) {
1385 Argument *A = Use->Definition;
1386 if (ArgumentSCCNodes.count(Ptr: A))
1387 CC |= Use->CC;
1388 else
1389 CC |= CaptureComponents(A->getAttributes().getCaptureInfo());
1390 break;
1391 }
1392 if (capturesAll(CC))
1393 break;
1394 }
1395
1396 if (!capturesAll(CC)) {
1397 for (ArgumentGraphNode *N : ArgumentSCC) {
1398 Argument *A = N->Definition;
1399 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1400 CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI;
1401 if (NewCI != OrigCI) {
1402 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1403 addCapturesStat(CI: NewCI);
1404 Changed.insert(Ptr: A->getParent());
1405 }
1406 }
1407 }
1408
1409 if (capturesAnyProvenance(CC)) {
1410 // As the pointer provenance may be captured, determine the pointer
1411 // attributes looking at each argument individually.
1412 for (ArgumentGraphNode *N : ArgumentSCC) {
1413 if (DetermineAccessAttrsForSingleton(N->Definition))
1414 Changed.insert(Ptr: N->Definition->getParent());
1415 }
1416 continue;
1417 }
1418
1419 // We also want to compute readonly/readnone/writeonly. With a small number
1420 // of false negatives, we can assume that any pointer which is captured
1421 // isn't going to be provably readonly or readnone, since by definition
1422 // we can't analyze all uses of a captured pointer.
1423 //
1424 // The false negatives happen when the pointer is captured by a function
1425 // that promises readonly/readnone behaviour on the pointer, then the
1426 // pointer's lifetime ends before anything that writes to arbitrary memory.
1427 // Also, a readonly/readnone pointer may be returned, but returning a
1428 // pointer is capturing it.
1429
1430 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
1431 if (A == B)
1432 return A;
1433 if (A == Attribute::ReadNone)
1434 return B;
1435 if (B == Attribute::ReadNone)
1436 return A;
1437 return Attribute::None;
1438 };
1439
1440 Attribute::AttrKind AccessAttr = Attribute::ReadNone;
1441 for (ArgumentGraphNode *N : ArgumentSCC) {
1442 Argument *A = N->Definition;
1443 Attribute::AttrKind K = determinePointerAccessAttrs(A, SCCNodes: ArgumentSCCNodes);
1444 AccessAttr = meetAccessAttr(AccessAttr, K);
1445 if (AccessAttr == Attribute::None)
1446 break;
1447 }
1448
1449 if (AccessAttr != Attribute::None) {
1450 for (ArgumentGraphNode *N : ArgumentSCC) {
1451 Argument *A = N->Definition;
1452 if (addAccessAttr(A, R: AccessAttr))
1453 Changed.insert(Ptr: A->getParent());
1454 }
1455 }
1456 }
1457}
1458
1459/// Tests whether a function is "malloc-like".
1460///
1461/// A function is "malloc-like" if it returns either null or a pointer that
1462/// doesn't alias any other pointer visible to the caller.
1463static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
1464 SmallSetVector<Value *, 8> FlowsToReturn;
1465 for (BasicBlock &BB : *F)
1466 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1467 FlowsToReturn.insert(X: Ret->getReturnValue());
1468
1469 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1470 Value *RetVal = FlowsToReturn[i];
1471
1472 if (Constant *C = dyn_cast<Constant>(Val: RetVal)) {
1473 if (!C->isNullValue() && !isa<UndefValue>(Val: C))
1474 return false;
1475
1476 continue;
1477 }
1478
1479 if (isa<Argument>(Val: RetVal))
1480 return false;
1481
1482 if (Instruction *RVI = dyn_cast<Instruction>(Val: RetVal))
1483 switch (RVI->getOpcode()) {
1484 // Extend the analysis by looking upwards.
1485 case Instruction::BitCast:
1486 case Instruction::GetElementPtr:
1487 case Instruction::AddrSpaceCast:
1488 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1489 continue;
1490 case Instruction::Select: {
1491 SelectInst *SI = cast<SelectInst>(Val: RVI);
1492 FlowsToReturn.insert(X: SI->getTrueValue());
1493 FlowsToReturn.insert(X: SI->getFalseValue());
1494 continue;
1495 }
1496 case Instruction::PHI: {
1497 PHINode *PN = cast<PHINode>(Val: RVI);
1498 FlowsToReturn.insert_range(R: PN->incoming_values());
1499 continue;
1500 }
1501
1502 // Check whether the pointer came from an allocation.
1503 case Instruction::Alloca:
1504 break;
1505 case Instruction::Call:
1506 case Instruction::Invoke: {
1507 CallBase &CB = cast<CallBase>(Val&: *RVI);
1508 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1509 break;
1510 if (CB.getCalledFunction() && SCCNodes.count(key: CB.getCalledFunction()))
1511 break;
1512 [[fallthrough]];
1513 }
1514 default:
1515 return false; // Did not come from an allocation.
1516 }
1517
1518 if (PointerMayBeCaptured(V: RetVal, /*ReturnCaptures=*/false))
1519 return false;
1520 }
1521
1522 return true;
1523}
1524
1525/// Deduce noalias attributes for the SCC.
1526static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
1527 SmallPtrSet<Function *, 8> &Changed) {
1528 // Check each function in turn, determining which functions return noalias
1529 // pointers.
1530 for (Function *F : SCCNodes) {
1531 // Already noalias.
1532 if (F->returnDoesNotAlias())
1533 continue;
1534
1535 // We can infer and propagate function attributes only when we know that the
1536 // definition we'll get at link time is *exactly* the definition we see now.
1537 // For more details, see GlobalValue::mayBeDerefined.
1538 if (!F->hasExactDefinition())
1539 return;
1540
1541 // We annotate noalias return values, which are only applicable to
1542 // pointer types.
1543 if (!F->getReturnType()->isPointerTy())
1544 continue;
1545
1546 if (!isFunctionMallocLike(F, SCCNodes))
1547 return;
1548 }
1549
1550 for (Function *F : SCCNodes) {
1551 if (F->returnDoesNotAlias() ||
1552 !F->getReturnType()->isPointerTy())
1553 continue;
1554
1555 F->setReturnDoesNotAlias();
1556 ++NumNoAlias;
1557 Changed.insert(Ptr: F);
1558 }
1559}
1560
1561/// Tests whether this function is known to not return null.
1562///
1563/// Requires that the function returns a pointer.
1564///
1565/// Returns true if it believes the function will not return a null, and sets
1566/// \p Speculative based on whether the returned conclusion is a speculative
1567/// conclusion due to SCC calls.
1568static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
1569 bool &Speculative) {
1570 assert(F->getReturnType()->isPointerTy() &&
1571 "nonnull only meaningful on pointer types");
1572 Speculative = false;
1573
1574 SmallSetVector<Value *, 8> FlowsToReturn;
1575 for (BasicBlock &BB : *F)
1576 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1577 FlowsToReturn.insert(X: Ret->getReturnValue());
1578
1579 auto &DL = F->getDataLayout();
1580
1581 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1582 Value *RetVal = FlowsToReturn[i];
1583
1584 // If this value is locally known to be non-null, we're good
1585 if (isKnownNonZero(V: RetVal, Q: DL))
1586 continue;
1587
1588 // Otherwise, we need to look upwards since we can't make any local
1589 // conclusions.
1590 Instruction *RVI = dyn_cast<Instruction>(Val: RetVal);
1591 if (!RVI)
1592 return false;
1593 switch (RVI->getOpcode()) {
1594 // Extend the analysis by looking upwards.
1595 case Instruction::BitCast:
1596 case Instruction::AddrSpaceCast:
1597 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1598 continue;
1599 case Instruction::GetElementPtr:
1600 if (cast<GEPOperator>(Val: RVI)->isInBounds()) {
1601 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1602 continue;
1603 }
1604 return false;
1605 case Instruction::Select: {
1606 SelectInst *SI = cast<SelectInst>(Val: RVI);
1607 FlowsToReturn.insert(X: SI->getTrueValue());
1608 FlowsToReturn.insert(X: SI->getFalseValue());
1609 continue;
1610 }
1611 case Instruction::PHI: {
1612 PHINode *PN = cast<PHINode>(Val: RVI);
1613 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1614 FlowsToReturn.insert(X: PN->getIncomingValue(i));
1615 continue;
1616 }
1617 case Instruction::Call:
1618 case Instruction::Invoke: {
1619 CallBase &CB = cast<CallBase>(Val&: *RVI);
1620 Function *Callee = CB.getCalledFunction();
1621 // A call to a node within the SCC is assumed to return null until
1622 // proven otherwise
1623 if (Callee && SCCNodes.count(key: Callee)) {
1624 Speculative = true;
1625 continue;
1626 }
1627 return false;
1628 }
1629 default:
1630 return false; // Unknown source, may be null
1631 };
1632 llvm_unreachable("should have either continued or returned");
1633 }
1634
1635 return true;
1636}
1637
1638/// Deduce nonnull attributes for the SCC.
1639static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
1640 SmallPtrSet<Function *, 8> &Changed) {
1641 // Speculative that all functions in the SCC return only nonnull
1642 // pointers. We may refute this as we analyze functions.
1643 bool SCCReturnsNonNull = true;
1644
1645 // Check each function in turn, determining which functions return nonnull
1646 // pointers.
1647 for (Function *F : SCCNodes) {
1648 // Already nonnull.
1649 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull))
1650 continue;
1651
1652 // We can infer and propagate function attributes only when we know that the
1653 // definition we'll get at link time is *exactly* the definition we see now.
1654 // For more details, see GlobalValue::mayBeDerefined.
1655 if (!F->hasExactDefinition())
1656 return;
1657
1658 // We annotate nonnull return values, which are only applicable to
1659 // pointer types.
1660 if (!F->getReturnType()->isPointerTy())
1661 continue;
1662
1663 bool Speculative = false;
1664 if (isReturnNonNull(F, SCCNodes, Speculative)) {
1665 if (!Speculative) {
1666 // Mark the function eagerly since we may discover a function
1667 // which prevents us from speculating about the entire SCC
1668 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
1669 << " as nonnull\n");
1670 F->addRetAttr(Kind: Attribute::NonNull);
1671 ++NumNonNullReturn;
1672 Changed.insert(Ptr: F);
1673 }
1674 continue;
1675 }
1676 // At least one function returns something which could be null, can't
1677 // speculate any more.
1678 SCCReturnsNonNull = false;
1679 }
1680
1681 if (SCCReturnsNonNull) {
1682 for (Function *F : SCCNodes) {
1683 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull) ||
1684 !F->getReturnType()->isPointerTy())
1685 continue;
1686
1687 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
1688 F->addRetAttr(Kind: Attribute::NonNull);
1689 ++NumNonNullReturn;
1690 Changed.insert(Ptr: F);
1691 }
1692 }
1693}
1694
1695/// Deduce noundef attributes for the SCC.
1696static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
1697 SmallPtrSet<Function *, 8> &Changed) {
1698 // Check each function in turn, determining which functions return noundef
1699 // values.
1700 for (Function *F : SCCNodes) {
1701 // Already noundef.
1702 AttributeList Attrs = F->getAttributes();
1703 if (Attrs.hasRetAttr(Kind: Attribute::NoUndef))
1704 continue;
1705
1706 // We can infer and propagate function attributes only when we know that the
1707 // definition we'll get at link time is *exactly* the definition we see now.
1708 // For more details, see GlobalValue::mayBeDerefined.
1709 if (!F->hasExactDefinition())
1710 return;
1711
1712 // MemorySanitizer assumes that the definition and declaration of a
1713 // function will be consistent. A function with sanitize_memory attribute
1714 // should be skipped from inference.
1715 if (F->hasFnAttribute(Kind: Attribute::SanitizeMemory))
1716 continue;
1717
1718 if (F->getReturnType()->isVoidTy())
1719 continue;
1720
1721 const DataLayout &DL = F->getDataLayout();
1722 if (all_of(Range&: *F, P: [&](BasicBlock &BB) {
1723 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1724 // TODO: perform context-sensitive analysis?
1725 Value *RetVal = Ret->getReturnValue();
1726 if (!isGuaranteedNotToBeUndefOrPoison(V: RetVal))
1727 return false;
1728
1729 // We know the original return value is not poison now, but it
1730 // could still be converted to poison by another return attribute.
1731 // Try to explicitly re-prove the relevant attributes.
1732 if (Attrs.hasRetAttr(Kind: Attribute::NonNull) &&
1733 !isKnownNonZero(V: RetVal, Q: DL))
1734 return false;
1735
1736 if (MaybeAlign Align = Attrs.getRetAlignment())
1737 if (RetVal->getPointerAlignment(DL) < *Align)
1738 return false;
1739
1740 Attribute Attr = Attrs.getRetAttr(Kind: Attribute::Range);
1741 if (Attr.isValid() &&
1742 !Attr.getRange().contains(
1743 CR: computeConstantRange(V: RetVal, /*ForSigned=*/false)))
1744 return false;
1745 }
1746 return true;
1747 })) {
1748 F->addRetAttr(Kind: Attribute::NoUndef);
1749 ++NumNoUndefReturn;
1750 Changed.insert(Ptr: F);
1751 }
1752 }
1753}
1754
1755namespace {
1756
1757/// Collects a set of attribute inference requests and performs them all in one
1758/// go on a single SCC Node. Inference involves scanning function bodies
1759/// looking for instructions that violate attribute assumptions.
1760/// As soon as all the bodies are fine we are free to set the attribute.
1761/// Customization of inference for individual attributes is performed by
1762/// providing a handful of predicates for each attribute.
1763class AttributeInferer {
1764public:
1765 /// Describes a request for inference of a single attribute.
1766 struct InferenceDescriptor {
1767
1768 /// Returns true if this function does not have to be handled.
1769 /// General intent for this predicate is to provide an optimization
1770 /// for functions that do not need this attribute inference at all
1771 /// (say, for functions that already have the attribute).
1772 std::function<bool(const Function &)> SkipFunction;
1773
1774 /// Returns true if this instruction violates attribute assumptions.
1775 std::function<bool(Instruction &)> InstrBreaksAttribute;
1776
1777 /// Sets the inferred attribute for this function.
1778 std::function<void(Function &)> SetAttribute;
1779
1780 /// Attribute we derive.
1781 Attribute::AttrKind AKind;
1782
1783 /// If true, only "exact" definitions can be used to infer this attribute.
1784 /// See GlobalValue::isDefinitionExact.
1785 bool RequiresExactDefinition;
1786
1787 InferenceDescriptor(Attribute::AttrKind AK,
1788 std::function<bool(const Function &)> SkipFunc,
1789 std::function<bool(Instruction &)> InstrScan,
1790 std::function<void(Function &)> SetAttr,
1791 bool ReqExactDef)
1792 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
1793 SetAttribute(SetAttr), AKind(AK),
1794 RequiresExactDefinition(ReqExactDef) {}
1795 };
1796
1797private:
1798 SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
1799
1800public:
1801 void registerAttrInference(InferenceDescriptor AttrInference) {
1802 InferenceDescriptors.push_back(Elt: AttrInference);
1803 }
1804
1805 void run(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed);
1806};
1807
1808/// Perform all the requested attribute inference actions according to the
1809/// attribute predicates stored before.
1810void AttributeInferer::run(const SCCNodeSet &SCCNodes,
1811 SmallPtrSet<Function *, 8> &Changed) {
1812 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
1813 // Go through all the functions in SCC and check corresponding attribute
1814 // assumptions for each of them. Attributes that are invalid for this SCC
1815 // will be removed from InferInSCC.
1816 for (Function *F : SCCNodes) {
1817
1818 // No attributes whose assumptions are still valid - done.
1819 if (InferInSCC.empty())
1820 return;
1821
1822 // Check if our attributes ever need scanning/can be scanned.
1823 llvm::erase_if(C&: InferInSCC, P: [F](const InferenceDescriptor &ID) {
1824 if (ID.SkipFunction(*F))
1825 return false;
1826
1827 // Remove from further inference (invalidate) when visiting a function
1828 // that has no instructions to scan/has an unsuitable definition.
1829 return F->isDeclaration() ||
1830 (ID.RequiresExactDefinition && !F->hasExactDefinition());
1831 });
1832
1833 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1834 // set up the F instructions scan to verify assumptions of the attribute.
1835 SmallVector<InferenceDescriptor, 4> InferInThisFunc;
1836 llvm::copy_if(
1837 Range&: InferInSCC, Out: std::back_inserter(x&: InferInThisFunc),
1838 P: [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
1839
1840 if (InferInThisFunc.empty())
1841 continue;
1842
1843 // Start instruction scan.
1844 for (Instruction &I : instructions(F&: *F)) {
1845 llvm::erase_if(C&: InferInThisFunc, P: [&](const InferenceDescriptor &ID) {
1846 if (!ID.InstrBreaksAttribute(I))
1847 return false;
1848 // Remove attribute from further inference on any other functions
1849 // because attribute assumptions have just been violated.
1850 llvm::erase_if(C&: InferInSCC, P: [&ID](const InferenceDescriptor &D) {
1851 return D.AKind == ID.AKind;
1852 });
1853 // Remove attribute from the rest of current instruction scan.
1854 return true;
1855 });
1856
1857 if (InferInThisFunc.empty())
1858 break;
1859 }
1860 }
1861
1862 if (InferInSCC.empty())
1863 return;
1864
1865 for (Function *F : SCCNodes)
1866 // At this point InferInSCC contains only functions that were either:
1867 // - explicitly skipped from scan/inference, or
1868 // - verified to have no instructions that break attribute assumptions.
1869 // Hence we just go and force the attribute for all non-skipped functions.
1870 for (auto &ID : InferInSCC) {
1871 if (ID.SkipFunction(*F))
1872 continue;
1873 Changed.insert(Ptr: F);
1874 ID.SetAttribute(*F);
1875 }
1876}
1877
1878struct SCCNodesResult {
1879 SCCNodeSet SCCNodes;
1880};
1881
1882} // end anonymous namespace
1883
1884/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
1885static bool InstrBreaksNonConvergent(Instruction &I,
1886 const SCCNodeSet &SCCNodes) {
1887 const CallBase *CB = dyn_cast<CallBase>(Val: &I);
1888 // Breaks non-convergent assumption if CS is a convergent call to a function
1889 // not in the SCC.
1890 return CB && CB->isConvergent() &&
1891 !SCCNodes.contains(key: CB->getCalledFunction());
1892}
1893
1894/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
1895static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
1896 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
1897 return false;
1898 if (const auto *CI = dyn_cast<CallInst>(Val: &I)) {
1899 if (Function *Callee = CI->getCalledFunction()) {
1900 // I is a may-throw call to a function inside our SCC. This doesn't
1901 // invalidate our current working assumption that the SCC is no-throw; we
1902 // just have to scan that other function.
1903 if (SCCNodes.contains(key: Callee))
1904 return false;
1905 }
1906 }
1907 return true;
1908}
1909
1910/// Helper for NoFree inference predicate InstrBreaksAttribute.
1911static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
1912 CallBase *CB = dyn_cast<CallBase>(Val: &I);
1913 if (!CB)
1914 return false;
1915
1916 if (CB->hasFnAttr(Kind: Attribute::NoFree))
1917 return false;
1918
1919 // Speculatively assume in SCC.
1920 if (Function *Callee = CB->getCalledFunction())
1921 if (SCCNodes.contains(key: Callee))
1922 return false;
1923
1924 return true;
1925}
1926
1927// Return true if this is an atomic which has an ordering stronger than
1928// unordered. Note that this is different than the predicate we use in
1929// Attributor. Here we chose to be conservative and consider monotonic
1930// operations potentially synchronizing. We generally don't do much with
1931// monotonic operations, so this is simply risk reduction.
1932static bool isOrderedAtomic(Instruction *I) {
1933 if (!I->isAtomic())
1934 return false;
1935
1936 if (auto *FI = dyn_cast<FenceInst>(Val: I))
1937 // All legal orderings for fence are stronger than monotonic.
1938 return FI->getSyncScopeID() != SyncScope::SingleThread;
1939 else if (isa<AtomicCmpXchgInst>(Val: I) || isa<AtomicRMWInst>(Val: I))
1940 return true;
1941 else if (auto *SI = dyn_cast<StoreInst>(Val: I))
1942 return !SI->isUnordered();
1943 else if (auto *LI = dyn_cast<LoadInst>(Val: I))
1944 return !LI->isUnordered();
1945 else {
1946 llvm_unreachable("unknown atomic instruction?");
1947 }
1948}
1949
1950static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
1951 // Volatile may synchronize
1952 if (I.isVolatile())
1953 return true;
1954
1955 // An ordered atomic may synchronize. (See comment about on monotonic.)
1956 if (isOrderedAtomic(I: &I))
1957 return true;
1958
1959 auto *CB = dyn_cast<CallBase>(Val: &I);
1960 if (!CB)
1961 // Non call site cases covered by the two checks above
1962 return false;
1963
1964 if (CB->hasFnAttr(Kind: Attribute::NoSync))
1965 return false;
1966
1967 // Non volatile memset/memcpy/memmoves are nosync
1968 // NOTE: Only intrinsics with volatile flags should be handled here. All
1969 // others should be marked in Intrinsics.td.
1970 if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I))
1971 if (!MI->isVolatile())
1972 return false;
1973
1974 // Speculatively assume in SCC.
1975 if (Function *Callee = CB->getCalledFunction())
1976 if (SCCNodes.contains(key: Callee))
1977 return false;
1978
1979 return true;
1980}
1981
1982/// Attempt to remove convergent function attribute when possible.
1983///
1984/// Returns true if any changes to function attributes were made.
1985static void inferConvergent(const SCCNodeSet &SCCNodes,
1986 SmallPtrSet<Function *, 8> &Changed) {
1987 AttributeInferer AI;
1988
1989 // Request to remove the convergent attribute from all functions in the SCC
1990 // if every callsite within the SCC is not convergent (except for calls
1991 // to functions within the SCC).
1992 // Note: Removal of the attr from the callsites will happen in
1993 // InstCombineCalls separately.
1994 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1995 Attribute::Convergent,
1996 // Skip non-convergent functions.
1997 [](const Function &F) { return !F.isConvergent(); },
1998 // Instructions that break non-convergent assumption.
1999 [SCCNodes](Instruction &I) {
2000 return InstrBreaksNonConvergent(I, SCCNodes);
2001 },
2002 [](Function &F) {
2003 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
2004 << "\n");
2005 F.setNotConvergent();
2006 },
2007 /* RequiresExactDefinition= */ false});
2008 // Perform all the requested attribute inference actions.
2009 AI.run(SCCNodes, Changed);
2010}
2011
2012/// Infer attributes from all functions in the SCC by scanning every
2013/// instruction for compliance to the attribute assumptions.
2014///
2015/// Returns true if any changes to function attributes were made.
2016static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
2017 SmallPtrSet<Function *, 8> &Changed) {
2018 AttributeInferer AI;
2019
2020 if (!DisableNoUnwindInference)
2021 // Request to infer nounwind attribute for all the functions in the SCC if
2022 // every callsite within the SCC is not throwing (except for calls to
2023 // functions within the SCC). Note that nounwind attribute suffers from
2024 // derefinement - results may change depending on how functions are
2025 // optimized. Thus it can be inferred only from exact definitions.
2026 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2027 Attribute::NoUnwind,
2028 // Skip non-throwing functions.
2029 [](const Function &F) { return F.doesNotThrow(); },
2030 // Instructions that break non-throwing assumption.
2031 [&SCCNodes](Instruction &I) {
2032 return InstrBreaksNonThrowing(I, SCCNodes);
2033 },
2034 [](Function &F) {
2035 LLVM_DEBUG(dbgs()
2036 << "Adding nounwind attr to fn " << F.getName() << "\n");
2037 F.setDoesNotThrow();
2038 ++NumNoUnwind;
2039 },
2040 /* RequiresExactDefinition= */ true});
2041
2042 if (!DisableNoFreeInference)
2043 // Request to infer nofree attribute for all the functions in the SCC if
2044 // every callsite within the SCC does not directly or indirectly free
2045 // memory (except for calls to functions within the SCC). Note that nofree
2046 // attribute suffers from derefinement - results may change depending on
2047 // how functions are optimized. Thus it can be inferred only from exact
2048 // definitions.
2049 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2050 Attribute::NoFree,
2051 // Skip functions known not to free memory.
2052 [](const Function &F) { return F.doesNotFreeMemory(); },
2053 // Instructions that break non-deallocating assumption.
2054 [&SCCNodes](Instruction &I) {
2055 return InstrBreaksNoFree(I, SCCNodes);
2056 },
2057 [](Function &F) {
2058 LLVM_DEBUG(dbgs()
2059 << "Adding nofree attr to fn " << F.getName() << "\n");
2060 F.setDoesNotFreeMemory();
2061 ++NumNoFree;
2062 },
2063 /* RequiresExactDefinition= */ true});
2064
2065 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2066 Attribute::NoSync,
2067 // Skip already marked functions.
2068 [](const Function &F) { return F.hasNoSync(); },
2069 // Instructions that break nosync assumption.
2070 [&SCCNodes](Instruction &I) {
2071 return InstrBreaksNoSync(I, SCCNodes);
2072 },
2073 [](Function &F) {
2074 LLVM_DEBUG(dbgs()
2075 << "Adding nosync attr to fn " << F.getName() << "\n");
2076 F.setNoSync();
2077 ++NumNoSync;
2078 },
2079 /* RequiresExactDefinition= */ true});
2080
2081 // Perform all the requested attribute inference actions.
2082 AI.run(SCCNodes, Changed);
2083}
2084
2085// Determines if the function 'F' can be marked 'norecurse'.
2086// It returns true if any call within 'F' could lead to a recursive
2087// call back to 'F', and false otherwise.
2088// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag
2089// that is true if any function's address is taken, or if any function
2090// has external linkage. This is used to determine the safety of
2091// external/library calls.
2092static bool mayHaveRecursiveCallee(Function &F,
2093 bool AnyFunctionsAddressIsTaken = true) {
2094 for (const auto &BB : F) {
2095 for (const auto &I : BB.instructionsWithoutDebug()) {
2096 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
2097 const Function *Callee = CB->getCalledFunction();
2098 if (!Callee || Callee == &F)
2099 return true;
2100
2101 if (Callee->doesNotRecurse())
2102 continue;
2103
2104 if (!AnyFunctionsAddressIsTaken ||
2105 (Callee->isDeclaration() &&
2106 Callee->hasFnAttribute(Kind: Attribute::NoCallback)))
2107 continue;
2108 return true;
2109 }
2110 }
2111 }
2112 return false;
2113}
2114
2115static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
2116 SmallPtrSet<Function *, 8> &Changed) {
2117 // Try and identify functions that do not recurse.
2118
2119 // If the SCC contains multiple nodes we know for sure there is recursion.
2120 if (SCCNodes.size() != 1)
2121 return;
2122
2123 Function *F = *SCCNodes.begin();
2124 if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
2125 return;
2126 if (!mayHaveRecursiveCallee(F&: *F)) {
2127 // Every call was to a non-recursive function other than this function, and
2128 // we have no indirect recursion as the SCC size is one. This function
2129 // cannot recurse.
2130 F->setDoesNotRecurse();
2131 ++NumNoRecurse;
2132 Changed.insert(Ptr: F);
2133 }
2134}
2135
2136// Set the noreturn function attribute if possible.
2137static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
2138 SmallPtrSet<Function *, 8> &Changed) {
2139 for (Function *F : SCCNodes) {
2140 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2141 F->doesNotReturn())
2142 continue;
2143
2144 if (!canReturn(F: *F)) {
2145 F->setDoesNotReturn();
2146 Changed.insert(Ptr: F);
2147 }
2148 }
2149}
2150
2151static bool allPathsGoThroughCold(Function &F) {
2152 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths;
2153 ColdPaths[&F.front()] = false;
2154 SmallVector<BasicBlock *> Jobs;
2155 Jobs.push_back(Elt: &F.front());
2156
2157 while (!Jobs.empty()) {
2158 BasicBlock *BB = Jobs.pop_back_val();
2159
2160 // If block contains a cold callsite this path through the CG is cold.
2161 // Ignore whether the instructions actually are guaranteed to transfer
2162 // execution. Divergent behavior is considered unlikely.
2163 if (any_of(Range&: *BB, P: [](Instruction &I) {
2164 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2165 return CB->hasFnAttr(Kind: Attribute::Cold);
2166 return false;
2167 })) {
2168 ColdPaths[BB] = true;
2169 continue;
2170 }
2171
2172 auto Succs = successors(BB);
2173 // We found a path that doesn't go through any cold callsite.
2174 if (Succs.empty())
2175 return false;
2176
2177 // We didn't find a cold callsite in this BB, so check that all successors
2178 // contain a cold callsite (or that their successors do).
2179 // Potential TODO: We could use static branch hints to assume certain
2180 // successor paths are inherently cold, irrespective of if they contain a
2181 // cold callsite.
2182 for (BasicBlock *Succ : Succs) {
2183 // Start with false, this is necessary to ensure we don't turn loops into
2184 // cold.
2185 auto [Iter, Inserted] = ColdPaths.try_emplace(Key: Succ, Args: false);
2186 if (!Inserted) {
2187 if (Iter->second)
2188 continue;
2189 return false;
2190 }
2191 Jobs.push_back(Elt: Succ);
2192 }
2193 }
2194 return true;
2195}
2196
2197// Set the cold function attribute if possible.
2198static void addColdAttrs(const SCCNodeSet &SCCNodes,
2199 SmallPtrSet<Function *, 8> &Changed) {
2200 for (Function *F : SCCNodes) {
2201 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2202 F->hasFnAttribute(Kind: Attribute::Cold) || F->hasFnAttribute(Kind: Attribute::Hot))
2203 continue;
2204
2205 // Potential TODO: We could add attribute `cold` on functions with `coldcc`.
2206 if (allPathsGoThroughCold(F&: *F)) {
2207 F->addFnAttr(Kind: Attribute::Cold);
2208 ++NumCold;
2209 Changed.insert(Ptr: F);
2210 continue;
2211 }
2212 }
2213}
2214
2215static bool functionWillReturn(const Function &F) {
2216 // We can infer and propagate function attributes only when we know that the
2217 // definition we'll get at link time is *exactly* the definition we see now.
2218 // For more details, see GlobalValue::mayBeDerefined.
2219 if (!F.hasExactDefinition())
2220 return false;
2221
2222 // Must-progress function without side-effects must return.
2223 if (F.mustProgress() && F.onlyReadsMemory())
2224 return true;
2225
2226 // Can only analyze functions with a definition.
2227 if (F.isDeclaration())
2228 return false;
2229
2230 // Functions with loops require more sophisticated analysis, as the loop
2231 // may be infinite. For now, don't try to handle them.
2232 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
2233 FindFunctionBackedges(F, Result&: Backedges);
2234 if (!Backedges.empty())
2235 return false;
2236
2237 // If there are no loops, then the function is willreturn if all calls in
2238 // it are willreturn.
2239 return all_of(Range: instructions(F), P: [](const Instruction &I) {
2240 return I.willReturn();
2241 });
2242}
2243
2244// Set the willreturn function attribute if possible.
2245static void addWillReturn(const SCCNodeSet &SCCNodes,
2246 SmallPtrSet<Function *, 8> &Changed) {
2247 for (Function *F : SCCNodes) {
2248 if (!F || F->willReturn() || !functionWillReturn(F: *F))
2249 continue;
2250
2251 F->setWillReturn();
2252 NumWillReturn++;
2253 Changed.insert(Ptr: F);
2254 }
2255}
2256
2257static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
2258 SCCNodesResult Res;
2259 for (Function *F : Functions) {
2260 if (!F || F->hasOptNone() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2261 F->isPresplitCoroutine()) {
2262 // Omit any functions we're trying not to optimize from the set.
2263 continue;
2264 }
2265
2266 Res.SCCNodes.insert(X: F);
2267 }
2268 return Res;
2269}
2270
2271template <typename AARGetterT>
2272static SmallPtrSet<Function *, 8>
2273deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
2274 bool ArgAttrsOnly) {
2275 SCCNodesResult Nodes = createSCCNodeSet(Functions);
2276
2277 // Bail if the SCC only contains optnone functions.
2278 if (Nodes.SCCNodes.empty())
2279 return {};
2280
2281 SmallPtrSet<Function *, 8> Changed;
2282 if (ArgAttrsOnly) {
2283 // ArgAttrsOnly means to only infer attributes that may aid optimizations
2284 // on the *current* function. "initializes" attribute is to aid
2285 // optimizations (like DSE) on the callers, so skip "initializes" here.
2286 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
2287 return Changed;
2288 }
2289
2290 addArgumentReturnedAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2291 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
2292 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
2293 inferConvergent(SCCNodes: Nodes.SCCNodes, Changed);
2294 addNoReturnAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2295 addColdAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2296 addWillReturn(SCCNodes: Nodes.SCCNodes, Changed);
2297 addNoUndefAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2298 addNoAliasAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2299 addNonNullAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2300 inferAttrsFromFunctionBodies(SCCNodes: Nodes.SCCNodes, Changed);
2301 addNoRecurseAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2302
2303 // Finally, infer the maximal set of attributes from the ones we've inferred
2304 // above. This is handling the cases where one attribute on a signature
2305 // implies another, but for implementation reasons the inference rule for
2306 // the later is missing (or simply less sophisticated).
2307 for (Function *F : Nodes.SCCNodes)
2308 if (F)
2309 if (inferAttributesFromOthers(F&: *F))
2310 Changed.insert(Ptr: F);
2311
2312 return Changed;
2313}
2314
2315PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
2316 CGSCCAnalysisManager &AM,
2317 LazyCallGraph &CG,
2318 CGSCCUpdateResult &) {
2319 // Skip non-recursive functions if requested.
2320 // Only infer argument attributes for non-recursive functions, because
2321 // it can affect optimization behavior in conjunction with noalias.
2322 bool ArgAttrsOnly = false;
2323 if (C.size() == 1 && SkipNonRecursive) {
2324 LazyCallGraph::Node &N = *C.begin();
2325 if (!N->lookup(N))
2326 ArgAttrsOnly = true;
2327 }
2328
2329 FunctionAnalysisManager &FAM =
2330 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2331
2332 // We pass a lambda into functions to wire them up to the analysis manager
2333 // for getting function analyses.
2334 auto AARGetter = [&](Function &F) -> AAResults & {
2335 return FAM.getResult<AAManager>(IR&: F);
2336 };
2337
2338 SmallVector<Function *, 8> Functions;
2339 for (LazyCallGraph::Node &N : C) {
2340 Functions.push_back(Elt: &N.getFunction());
2341 }
2342
2343 auto ChangedFunctions =
2344 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
2345 if (ChangedFunctions.empty())
2346 return PreservedAnalyses::all();
2347
2348 // Invalidate analyses for modified functions so that we don't have to
2349 // invalidate all analyses for all functions in this SCC.
2350 PreservedAnalyses FuncPA;
2351 // We haven't changed the CFG for modified functions.
2352 FuncPA.preserveSet<CFGAnalyses>();
2353 for (Function *Changed : ChangedFunctions) {
2354 FAM.invalidate(IR&: *Changed, PA: FuncPA);
2355 // Also invalidate any direct callers of changed functions since analyses
2356 // may care about attributes of direct callees. For example, MemorySSA cares
2357 // about whether or not a call's callee modifies memory and queries that
2358 // through function attributes.
2359 for (auto *U : Changed->users()) {
2360 if (auto *Call = dyn_cast<CallBase>(Val: U)) {
2361 if (Call->getCalledOperand() == Changed)
2362 FAM.invalidate(IR&: *Call->getFunction(), PA: FuncPA);
2363 }
2364 }
2365 }
2366
2367 PreservedAnalyses PA;
2368 // We have not added or removed functions.
2369 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
2370 // We already invalidated all relevant function analyses above.
2371 PA.preserveSet<AllAnalysesOn<Function>>();
2372 return PA;
2373}
2374
2375void PostOrderFunctionAttrsPass::printPipeline(
2376 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
2377 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
2378 OS, MapClassName2PassName);
2379 if (SkipNonRecursive)
2380 OS << "<skip-non-recursive-function-attrs>";
2381}
2382
2383template <typename AARGetterT>
2384static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
2385 SmallVector<Function *, 8> Functions;
2386 for (CallGraphNode *I : SCC) {
2387 Functions.push_back(Elt: I->getFunction());
2388 }
2389
2390 return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
2391}
2392
2393static bool addNoRecurseAttrsTopDown(Function &F) {
2394 if (F.doesNotRecurse())
2395 return false;
2396
2397 // We check the preconditions for the function prior to calling this to avoid
2398 // the cost of building up a reversible post-order list. We assert them here
2399 // to make sure none of the invariants this relies on were violated.
2400 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
2401 assert(F.hasInternalLinkage() &&
2402 "Can only do top-down deduction for internal linkage functions!");
2403
2404 // If F is internal and all of its uses are calls from a non-recursive
2405 // functions, then none of its calls could in fact recurse without going
2406 // through a function marked norecurse, and so we can mark this function too
2407 // as norecurse. Note that the uses must actually be calls -- otherwise
2408 // a pointer to this function could be returned from a norecurse function but
2409 // this function could be recursively (indirectly) called. Note that this
2410 // also detects if F is directly recursive as F is not yet marked as
2411 // a norecurse function.
2412 for (auto &U : F.uses()) {
2413 const CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
2414 if (!CB || !CB->isCallee(U: &U) ||
2415 !CB->getParent()->getParent()->doesNotRecurse())
2416 return false;
2417 }
2418 F.setDoesNotRecurse();
2419 ++NumNoRecurse;
2420 return true;
2421}
2422
2423static bool addNoFPClassAttrsTopDown(Function &F) {
2424 assert(!F.isDeclaration() && "Cannot deduce nofpclass without a definition!");
2425 unsigned NumArgs = F.arg_size();
2426 SmallVector<FPClassTest, 8> ArgsNoFPClass(NumArgs, fcAllFlags);
2427 FPClassTest RetNoFPClass = fcAllFlags;
2428
2429 bool Changed = false;
2430 for (User *U : F.users()) {
2431 auto *CB = dyn_cast<CallBase>(Val: U);
2432 if (!CB || CB->getCalledFunction() != &F)
2433 return false;
2434
2435 RetNoFPClass &= CB->getRetNoFPClass();
2436 for (unsigned I = 0; I != NumArgs; ++I) {
2437 // TODO: Consider computeKnownFPClass, at least with a small search
2438 // depth. This will currently not catch non-splat vectors.
2439 const APFloat *Cst;
2440 if (match(V: CB->getArgOperand(i: I), P: m_APFloat(Res&: Cst)))
2441 ArgsNoFPClass[I] &= ~Cst->classify();
2442 else
2443 ArgsNoFPClass[I] &= CB->getParamNoFPClass(i: I);
2444 }
2445 }
2446
2447 LLVMContext &Ctx = F.getContext();
2448
2449 if (RetNoFPClass != fcNone) {
2450 FPClassTest OldAttr = F.getAttributes().getRetNoFPClass();
2451 if (OldAttr != RetNoFPClass) {
2452 F.addRetAttr(Attr: Attribute::getWithNoFPClass(Context&: Ctx, Mask: RetNoFPClass));
2453 Changed = true;
2454 }
2455 }
2456
2457 for (unsigned I = 0; I != NumArgs; ++I) {
2458 FPClassTest ArgNoFPClass = ArgsNoFPClass[I];
2459 if (ArgNoFPClass == fcNone)
2460 continue;
2461 FPClassTest OldAttr = F.getParamNoFPClass(ArgNo: I);
2462 if (OldAttr == ArgNoFPClass)
2463 continue;
2464
2465 F.addParamAttr(ArgNo: I, Attr: Attribute::getWithNoFPClass(Context&: Ctx, Mask: ArgNoFPClass));
2466 Changed = true;
2467 }
2468
2469 return Changed;
2470}
2471
2472static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
2473 // We only have a post-order SCC traversal (because SCCs are inherently
2474 // discovered in post-order), so we accumulate them in a vector and then walk
2475 // it in reverse. This is simpler than using the RPO iterator infrastructure
2476 // because we need to combine SCC detection and the PO walk of the call
2477 // graph. We can also cheat egregiously because we're primarily interested in
2478 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
2479 // with multiple functions in them will clearly be recursive.
2480
2481 SmallVector<Function *, 16> Worklist;
2482 CG.buildRefSCCs();
2483 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2484 for (LazyCallGraph::SCC &SCC : RC) {
2485 if (SCC.size() != 1)
2486 continue;
2487 Function &F = SCC.begin()->getFunction();
2488 if (!F.isDeclaration() && F.hasInternalLinkage() && !F.use_empty())
2489 Worklist.push_back(Elt: &F);
2490 }
2491 }
2492 bool Changed = false;
2493 for (auto *F : llvm::reverse(C&: Worklist)) {
2494 Changed |= addNoRecurseAttrsTopDown(F&: *F);
2495 Changed |= addNoFPClassAttrsTopDown(F&: *F);
2496 }
2497
2498 return Changed;
2499}
2500
2501PreservedAnalyses
2502ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
2503 auto &CG = AM.getResult<LazyCallGraphAnalysis>(IR&: M);
2504
2505 if (!deduceFunctionAttributeInRPO(M, CG))
2506 return PreservedAnalyses::all();
2507
2508 PreservedAnalyses PA;
2509 PA.preserve<LazyCallGraphAnalysis>();
2510 return PA;
2511}
2512
2513PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M,
2514 ModuleAnalysisManager &MAM) {
2515
2516 // Check if any function in the whole program has its address taken or has
2517 // potentially external linkage.
2518 // We use this information when inferring norecurse attribute: If there is
2519 // no function whose address is taken and all functions have internal
2520 // linkage, there is no path for a callback to any user function.
2521 bool AnyFunctionsAddressIsTaken = false;
2522 for (Function &F : M) {
2523 if (F.isDeclaration() || F.doesNotRecurse())
2524 continue;
2525 if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
2526 AnyFunctionsAddressIsTaken = true;
2527 break;
2528 }
2529 }
2530
2531 // Run norecurse inference on all RefSCCs in the LazyCallGraph for this
2532 // module.
2533 bool Changed = false;
2534 LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(IR&: M);
2535 CG.buildRefSCCs();
2536
2537 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2538 // Skip any RefSCC that is part of a call cycle. A RefSCC containing more
2539 // than one SCC indicates a recursive relationship involving indirect calls.
2540 if (RC.size() > 1)
2541 continue;
2542
2543 // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive
2544 // functions. Ex: foo1 -> foo2 -> foo3 -> foo1.
2545 LazyCallGraph::SCC &S = *RC.begin();
2546 if (S.size() > 1)
2547 continue;
2548
2549 // Get the single function from this SCC.
2550 Function &F = S.begin()->getFunction();
2551 if (!F.hasExactDefinition() || F.doesNotRecurse())
2552 continue;
2553
2554 // If the analysis confirms that this function has no recursive calls
2555 // (either direct, indirect, or through external linkages),
2556 // we can safely apply the norecurse attribute.
2557 if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) {
2558 F.setDoesNotRecurse();
2559 ++NumNoRecurse;
2560 Changed = true;
2561 }
2562 }
2563
2564 PreservedAnalyses PA;
2565 if (Changed)
2566 PA.preserve<LazyCallGraphAnalysis>();
2567 else
2568 PA = PreservedAnalyses::all();
2569 return PA;
2570}
2571