1//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements interprocedural passes which walk the
11/// call-graph deducing and/or propagating function attributes.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/FunctionAttrs.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SCCIterator.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/BasicAliasAnalysis.h"
27#include "llvm/Analysis/CFG.h"
28#include "llvm/Analysis/CGSCCPassManager.h"
29#include "llvm/Analysis/CallGraph.h"
30#include "llvm/Analysis/CallGraphSCCPass.h"
31#include "llvm/Analysis/CaptureTracking.h"
32#include "llvm/Analysis/LazyCallGraph.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/ConstantRangeList.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Instructions.h"
46#include "llvm/IR/IntrinsicInst.h"
47#include "llvm/IR/Metadata.h"
48#include "llvm/IR/ModuleSummaryIndex.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/PatternMatch.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Use.h"
53#include "llvm/IR/User.h"
54#include "llvm/IR/Value.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/CommandLine.h"
57#include "llvm/Support/Compiler.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/KnownFPClass.h"
61#include "llvm/Support/raw_ostream.h"
62#include "llvm/Transforms/IPO.h"
63#include "llvm/Transforms/Utils/Local.h"
64#include <cassert>
65#include <iterator>
66#include <map>
67#include <optional>
68#include <vector>
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73#define DEBUG_TYPE "function-attrs"
74
75STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
76STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)");
77STATISTIC(NumCapturesPartial, "Number of arguments marked with captures "
78 "attribute other than captures(none)");
79STATISTIC(NumReturned, "Number of arguments marked returned");
80STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
81STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
82STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
83STATISTIC(NumNoAlias, "Number of function returns marked noalias");
84STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
85STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
86STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
87STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
88STATISTIC(NumNoFree, "Number of functions marked as nofree");
89STATISTIC(NumNoFreeArg, "Number of arguments marked as nofree");
90STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
91STATISTIC(NumNoSync, "Number of functions marked as nosync");
92STATISTIC(NumCold, "Number of functions marked as cold");
93
94STATISTIC(NumThinLinkNoRecurse,
95 "Number of functions marked as norecurse during thinlink");
96STATISTIC(NumThinLinkNoUnwind,
97 "Number of functions marked as nounwind during thinlink");
98
99static cl::opt<bool> EnablePoisonArgAttrPropagation(
100 "enable-poison-arg-attr-prop", cl::init(Val: true), cl::Hidden,
101 cl::desc("Try to propagate nonnull and nofpclass argument attributes from "
102 "callsites to caller functions."));
103
104static cl::opt<bool> DisableNoUnwindInference(
105 "disable-nounwind-inference", cl::Hidden,
106 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
107
108static cl::opt<bool> DisableNoFreeInference(
109 "disable-nofree-inference", cl::Hidden,
110 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
111
112static cl::opt<bool> DisableThinLTOPropagation(
113 "disable-thinlto-funcattrs", cl::init(Val: true), cl::Hidden,
114 cl::desc("Don't propagate function-attrs in thinLTO"));
115
116static void addCapturesStat(CaptureInfo CI) {
117 if (capturesNothing(CC: CI))
118 ++NumCapturesNone;
119 else
120 ++NumCapturesPartial;
121}
122
123namespace {
124
125using SCCNodeSet = SmallSetVector<Function *, 8>;
126
127} // end anonymous namespace
128
129static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
130 ModRefInfo MR, AAResults &AAR) {
131 // Ignore accesses to known-invariant or local memory.
132 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/IgnoreLocals: true);
133 if (isNoModRef(MRI: MR))
134 return;
135
136 const Value *UO = getUnderlyingObjectAggressive(V: Loc.Ptr);
137 if (isa<AllocaInst>(Val: UO))
138 return;
139 if (isa<Argument>(Val: UO)) {
140 ME |= MemoryEffects::argMemOnly(MR);
141 return;
142 }
143
144 // If it's not an identified object, it might be an argument.
145 if (!isIdentifiedObject(V: UO))
146 ME |= MemoryEffects::argMemOnly(MR);
147 ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR);
148 ME |= MemoryEffects(IRMemLocation::Other, MR);
149}
150
151static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
152 ModRefInfo ArgMR, AAResults &AAR) {
153 for (const Value *Arg : Call->args()) {
154 if (!Arg->getType()->isPtrOrPtrVectorTy())
155 continue;
156
157 addLocAccess(ME,
158 Loc: MemoryLocation::getBeforeOrAfter(Ptr: Arg, AATags: Call->getAAMetadata()),
159 MR: ArgMR, AAR);
160 }
161}
162
163/// Returns the memory access attribute for function F using AAR for AA results,
164/// where SCCNodes is the current SCC.
165///
166/// If ThisBody is true, this function may examine the function body and will
167/// return a result pertaining to this copy of the function. If it is false, the
168/// result will be based only on AA results for the function declaration; it
169/// will be assumed that some other (perhaps less optimized) version of the
170/// function may be selected at link time.
171///
172/// The return value is split into two parts: Memory effects that always apply,
173/// and additional memory effects that apply if any of the functions in the SCC
174/// can access argmem.
175static std::pair<MemoryEffects, MemoryEffects>
176checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
177 const SCCNodeSet &SCCNodes) {
178 MemoryEffects OrigME = AAR.getMemoryEffects(F: &F);
179 if (OrigME.doesNotAccessMemory())
180 // Already perfect!
181 return {OrigME, MemoryEffects::none()};
182
183 if (!ThisBody)
184 return {OrigME, MemoryEffects::none()};
185
186 MemoryEffects ME = MemoryEffects::none();
187 // Additional locations accessed if the SCC accesses argmem.
188 MemoryEffects RecursiveArgME = MemoryEffects::none();
189
190 auto AddNonArgMemoryEffects = [&ME](MemoryEffects InstME) {
191 // Merge instruction memory effects, including inaccessible and errno
192 // memory, but excluding argument memory, which is handled separately.
193 ME |= InstME.getWithoutLoc(Loc: IRMemLocation::ArgMem);
194
195 // If the instruction accesses captured memory (currently part of "other")
196 // and an argument is captured (currently not tracked), then it may also
197 // access argument memory.
198 ModRefInfo OtherMR = InstME.getModRef(Loc: IRMemLocation::Other);
199 ME |= MemoryEffects::argMemOnly(MR: OtherMR);
200 };
201
202 // Inalloca and preallocated arguments are always clobbered by the call.
203 if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
204 F.getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
205 ME |= MemoryEffects::argMemOnly(MR: ModRefInfo::ModRef);
206
207 // Scan the function body for instructions that may read or write memory.
208 for (Instruction &I : instructions(F)) {
209 // Some instructions can be ignored even if they read or write memory.
210 // Detect these now, skipping to the next instruction if one is found.
211 if (auto *Call = dyn_cast<CallBase>(Val: &I)) {
212 // We can optimistically ignore calls to functions in the same SCC, with
213 // two caveats:
214 // * Calls with operand bundles may have additional effects.
215 // * Argument memory accesses may imply additional effects depending on
216 // what the argument location is.
217 if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
218 SCCNodes.count(key: Call->getCalledFunction())) {
219 // Keep track of which additional locations are accessed if the SCC
220 // turns out to access argmem.
221 addArgLocs(ME&: RecursiveArgME, Call, ArgMR: ModRefInfo::ModRef, AAR);
222 continue;
223 }
224
225 MemoryEffects CallME = AAR.getMemoryEffects(Call);
226
227 // If the call doesn't access memory, we're done.
228 if (CallME.doesNotAccessMemory())
229 continue;
230
231 // A pseudo probe call shouldn't change any function attribute since it
232 // doesn't translate to a real instruction. It comes with a memory access
233 // tag to prevent itself being removed by optimizations and not block
234 // other instructions being optimized.
235 if (isa<PseudoProbeInst>(Val: I))
236 continue;
237
238 AddNonArgMemoryEffects(CallME);
239
240 // Check whether all pointer arguments point to local memory, and
241 // ignore calls that only access local memory.
242 ModRefInfo ArgMR = CallME.getModRef(Loc: IRMemLocation::ArgMem);
243 if (ArgMR != ModRefInfo::NoModRef)
244 addArgLocs(ME, Call, ArgMR, AAR);
245 continue;
246 }
247
248 MemoryEffects InstME = I.getMemoryEffects();
249 if (InstME.doesNotAccessMemory())
250 continue;
251
252 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(Inst: &I);
253 if (!Loc) {
254 // If no location is known, conservatively assume anything can be
255 // accessed.
256 ME |= MemoryEffects(InstME.getModRef());
257 continue;
258 }
259
260 AddNonArgMemoryEffects(InstME);
261 addLocAccess(ME, Loc: *Loc, MR: InstME.getModRef(Loc: IRMemLocation::ArgMem), AAR);
262 }
263
264 return {OrigME & ME, RecursiveArgME};
265}
266
267MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
268 AAResults &AAR) {
269 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, SCCNodes: {}).first;
270}
271
272/// Deduce readonly/readnone/writeonly attributes for the SCC.
273template <typename AARGetterT>
274static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
275 SmallPtrSet<Function *, 8> &Changed) {
276 MemoryEffects ME = MemoryEffects::none();
277 MemoryEffects RecursiveArgME = MemoryEffects::none();
278 for (Function *F : SCCNodes) {
279 // Call the callable parameter to look up AA results for this function.
280 AAResults &AAR = AARGetter(*F);
281 // Non-exact function definitions may not be selected at link time, and an
282 // alternative version that writes to memory may be selected. See the
283 // comment on GlobalValue::isDefinitionExact for more details.
284 auto [FnME, FnRecursiveArgME] =
285 checkFunctionMemoryAccess(F&: *F, ThisBody: F->hasExactDefinition(), AAR, SCCNodes);
286 ME |= FnME;
287 RecursiveArgME |= FnRecursiveArgME;
288 // Reached bottom of the lattice, we will not be able to improve the result.
289 if (ME == MemoryEffects::unknown())
290 return;
291 }
292
293 // If the SCC accesses argmem, add recursive accesses resulting from that.
294 ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
295 if (ArgMR != ModRefInfo::NoModRef)
296 ME |= RecursiveArgME & MemoryEffects(ArgMR);
297
298 for (Function *F : SCCNodes) {
299 MemoryEffects OldME = F->getMemoryEffects();
300 MemoryEffects NewME = ME & OldME;
301 if (NewME != OldME) {
302 ++NumMemoryAttr;
303 F->setMemoryEffects(NewME);
304 // Remove conflicting writable attributes.
305 if (!isModSet(MRI: NewME.getModRef(Loc: IRMemLocation::ArgMem)))
306 for (Argument &A : F->args())
307 A.removeAttr(Kind: Attribute::Writable);
308 Changed.insert(Ptr: F);
309 }
310 }
311}
312
313// Compute definitive function attributes for a function taking into account
314// prevailing definitions and linkage types
315static FunctionSummary *calculatePrevailingSummary(
316 ValueInfo VI,
317 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
318 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
319 IsPrevailing) {
320
321 auto [It, Inserted] = CachedPrevailingSummary.try_emplace(Key: VI);
322 if (!Inserted)
323 return It->second;
324
325 /// At this point, prevailing symbols have been resolved. The following leads
326 /// to returning a conservative result:
327 /// - Multiple instances with local linkage. Normally local linkage would be
328 /// unique per module
329 /// as the GUID includes the module path. We could have a guid alias if
330 /// there wasn't any distinguishing path when each file was compiled, but
331 /// that should be rare so we'll punt on those.
332
333 /// These next 2 cases should not happen and will assert:
334 /// - Multiple instances with external linkage. This should be caught in
335 /// symbol resolution
336 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
337 /// knowledge meaning we have to go conservative.
338
339 /// Otherwise, we calculate attributes for a function as:
340 /// 1. If we have a local linkage, take its attributes. If there's somehow
341 /// multiple, bail and go conservative.
342 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
343 /// prevailing, take its attributes.
344 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
345 /// differences. However, if the prevailing copy is known it will be used
346 /// so take its attributes. If the prevailing copy is in a native file
347 /// all IR copies will be dead and propagation will go conservative.
348 /// 4. AvailableExternally summaries without a prevailing copy are known to
349 /// occur in a couple of circumstances:
350 /// a. An internal function gets imported due to its caller getting
351 /// imported, it becomes AvailableExternally but no prevailing
352 /// definition exists. Because it has to get imported along with its
353 /// caller the attributes will be captured by propagating on its
354 /// caller.
355 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
356 /// definitions of explicitly instanced template declarations
357 /// for inlining which are ultimately dropped from the TU. Since this
358 /// is localized to the TU the attributes will have already made it to
359 /// the callers.
360 /// These are edge cases and already captured by their callers so we
361 /// ignore these for now. If they become relevant to optimize in the
362 /// future this can be revisited.
363 /// 5. Otherwise, go conservative.
364
365 FunctionSummary *Local = nullptr;
366 FunctionSummary *Prevailing = nullptr;
367
368 for (const auto &GVS : VI.getSummaryList()) {
369 if (!GVS->isLive())
370 continue;
371
372 FunctionSummary *FS = dyn_cast<FunctionSummary>(Val: GVS->getBaseObject());
373 // Virtual and Unknown (e.g. indirect) calls require going conservative
374 if (!FS || FS->fflags().HasUnknownCall)
375 return nullptr;
376
377 const auto &Linkage = GVS->linkage();
378 if (GlobalValue::isLocalLinkage(Linkage)) {
379 if (Local) {
380 LLVM_DEBUG(
381 dbgs()
382 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
383 "function "
384 << VI.name() << " from " << FS->modulePath() << ". Previous module "
385 << Local->modulePath() << "\n");
386 return nullptr;
387 }
388 Local = FS;
389 } else if (GlobalValue::isExternalLinkage(Linkage)) {
390 assert(IsPrevailing(VI.getGUID(), GVS.get()) || GVS->wasPromoted());
391 Prevailing = FS;
392 break;
393 } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
394 GlobalValue::isLinkOnceODRLinkage(Linkage) ||
395 GlobalValue::isWeakAnyLinkage(Linkage) ||
396 GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
397 if (IsPrevailing(VI.getGUID(), GVS.get())) {
398 Prevailing = FS;
399 break;
400 }
401 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
402 // TODO: Handle these cases if they become meaningful
403 continue;
404 }
405 }
406
407 auto &CPS = CachedPrevailingSummary[VI];
408 if (Local) {
409 assert(!Prevailing);
410 CPS = Local;
411 } else if (Prevailing) {
412 assert(!Local);
413 CPS = Prevailing;
414 }
415
416 return CPS;
417}
418
419bool llvm::thinLTOPropagateFunctionAttrs(
420 ModuleSummaryIndex &Index,
421 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
422 IsPrevailing) {
423 // TODO: implement addNoAliasAttrs once
424 // there's more information about the return type in the summary
425 if (DisableThinLTOPropagation)
426 return false;
427
428 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
429 bool Changed = false;
430
431 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
432 // Assume we can propagate unless we discover otherwise
433 FunctionSummary::FFlags InferredFlags;
434 InferredFlags.NoRecurse = (SCCNodes.size() == 1);
435 InferredFlags.NoUnwind = true;
436
437 for (auto &V : SCCNodes) {
438 FunctionSummary *CallerSummary =
439 calculatePrevailingSummary(VI: V, CachedPrevailingSummary, IsPrevailing);
440
441 // Function summaries can fail to contain information such as declarations
442 if (!CallerSummary)
443 return;
444
445 if (CallerSummary->fflags().MayThrow)
446 InferredFlags.NoUnwind = false;
447
448 for (const auto &Callee : CallerSummary->calls()) {
449 FunctionSummary *CalleeSummary = calculatePrevailingSummary(
450 VI: Callee.first, CachedPrevailingSummary, IsPrevailing);
451
452 if (!CalleeSummary)
453 return;
454
455 if (!CalleeSummary->fflags().NoRecurse)
456 InferredFlags.NoRecurse = false;
457
458 if (!CalleeSummary->fflags().NoUnwind)
459 InferredFlags.NoUnwind = false;
460
461 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
462 break;
463 }
464 }
465
466 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
467 Changed = true;
468 for (auto &V : SCCNodes) {
469 if (InferredFlags.NoRecurse) {
470 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
471 << V.name() << "\n");
472 ++NumThinLinkNoRecurse;
473 }
474
475 if (InferredFlags.NoUnwind) {
476 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
477 << V.name() << "\n");
478 ++NumThinLinkNoUnwind;
479 }
480
481 for (const auto &S : V.getSummaryList()) {
482 if (auto *FS = dyn_cast<FunctionSummary>(Val: S.get())) {
483 if (InferredFlags.NoRecurse)
484 FS->setNoRecurse();
485
486 if (InferredFlags.NoUnwind)
487 FS->setNoUnwind();
488 }
489 }
490 }
491 }
492 };
493
494 // Call propagation functions on each SCC in the Index
495 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(G: &Index); !I.isAtEnd();
496 ++I) {
497 std::vector<ValueInfo> Nodes(*I);
498 PropagateAttributes(Nodes);
499 }
500 return Changed;
501}
502
503namespace {
504
505/// For a given pointer Argument, this retains a list of Arguments of functions
506/// in the same SCC that the pointer data flows into. We use this to build an
507/// SCC of the arguments.
508struct ArgumentGraphNode {
509 Argument *Definition;
510 /// CaptureComponents for this argument, excluding captures via Uses.
511 /// We don't distinguish between other/return captures here.
512 CaptureComponents CC = CaptureComponents::None;
513 SmallVector<ArgumentGraphNode *, 4> Uses;
514};
515
516class ArgumentGraph {
517 // We store pointers to ArgumentGraphNode objects, so it's important that
518 // that they not move around upon insert.
519 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
520
521 ArgumentMapTy ArgumentMap;
522
523 // There is no root node for the argument graph, in fact:
524 // void f(int *x, int *y) { if (...) f(x, y); }
525 // is an example where the graph is disconnected. The SCCIterator requires a
526 // single entry point, so we maintain a fake ("synthetic") root node that
527 // uses every node. Because the graph is directed and nothing points into
528 // the root, it will not participate in any SCCs (except for its own).
529 ArgumentGraphNode SyntheticRoot;
530
531public:
532 ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
533
534 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
535
536 iterator begin() { return SyntheticRoot.Uses.begin(); }
537 iterator end() { return SyntheticRoot.Uses.end(); }
538 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
539
540 ArgumentGraphNode *operator[](Argument *A) {
541 ArgumentGraphNode &Node = ArgumentMap[A];
542 Node.Definition = A;
543 SyntheticRoot.Uses.push_back(Elt: &Node);
544 return &Node;
545 }
546};
547
548/// This tracker checks whether callees are in the SCC, and if so it does not
549/// consider that a capture, instead adding it to the "Uses" list and
550/// continuing with the analysis.
551struct ArgumentUsesTracker : public CaptureTracker {
552 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
553
554 void tooManyUses() override { CI = CaptureInfo::all(); }
555
556 Action captured(const Use *U, UseCaptureInfo UseCI) override {
557 if (updateCaptureInfo(U, CC: UseCI.UseCC)) {
558 // Don't bother continuing if we already capture everything.
559 if (capturesAll(CC: CI.getOtherComponents()))
560 return Stop;
561 return Continue;
562 }
563
564 // For SCC argument tracking, we're not going to analyze other/ret
565 // components separately, so don't follow the return value.
566 return ContinueIgnoringReturn;
567 }
568
569 bool updateCaptureInfo(const Use *U, CaptureComponents CC) {
570 CallBase *CB = dyn_cast<CallBase>(Val: U->getUser());
571 if (!CB) {
572 if (isa<ReturnInst>(Val: U->getUser()))
573 CI |= CaptureInfo::retOnly(RetComponents: CC);
574 else
575 // Conservatively assume that the captured value might make its way
576 // into the return value as well. This could be made more precise.
577 CI |= CaptureInfo(CC);
578 return true;
579 }
580
581 Function *F = CB->getCalledFunction();
582 if (!F || !F->hasExactDefinition() || !SCCNodes.count(key: F)) {
583 CI |= CaptureInfo(CC);
584 return true;
585 }
586
587 assert(!CB->isCallee(U) && "callee operand reported captured?");
588 const unsigned UseIndex = CB->getDataOperandNo(U);
589 if (UseIndex >= CB->arg_size()) {
590 // Data operand, but not a argument operand -- must be a bundle operand
591 assert(CB->hasOperandBundles() && "Must be!");
592
593 // CaptureTracking told us that we're being captured by an operand bundle
594 // use. In this case it does not matter if the callee is within our SCC
595 // or not -- we've been captured in some unknown way, and we have to be
596 // conservative.
597 CI |= CaptureInfo(CC);
598 return true;
599 }
600
601 if (UseIndex >= F->arg_size()) {
602 assert(F->isVarArg() && "More params than args in non-varargs call");
603 CI |= CaptureInfo(CC);
604 return true;
605 }
606
607 // TODO(captures): Could improve precision by remembering maximum
608 // capture components for the argument.
609 Uses.push_back(Elt: &*std::next(x: F->arg_begin(), n: UseIndex));
610 return false;
611 }
612
613 // Does not include potential captures via Uses in the SCC.
614 CaptureInfo CI = CaptureInfo::none();
615
616 // Uses within our SCC.
617 SmallVector<Argument *, 4> Uses;
618
619 const SCCNodeSet &SCCNodes;
620};
621
622/// A struct of argument use: a Use and the offset it accesses. This struct
623/// is to track uses inside function via GEP. If GEP has a non-constant index,
624/// the Offset field is nullopt.
625struct ArgumentUse {
626 Use *U;
627 std::optional<int64_t> Offset;
628};
629
630/// A struct of argument access info. "Unknown" accesses are the cases like
631/// unrecognized instructions, instructions that have more than one use of
632/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
633/// instructions that not only write an argument but also capture it.
634struct ArgumentAccessInfo {
635 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
636 AccessType ArgAccessType;
637 ConstantRangeList AccessRanges;
638};
639
640/// A struct to wrap the argument use info per block.
641struct UsesPerBlockInfo {
642 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
643 bool HasWrites = false;
644 bool HasUnknownAccess = false;
645};
646
647/// A struct to summarize the argument use info in a function.
648struct ArgumentUsesSummary {
649 bool HasAnyWrite = false;
650 bool HasWriteOutsideEntryBB = false;
651 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
652};
653
654ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I,
655 const ArgumentUse &ArgUse,
656 const DataLayout &DL) {
657 auto GetTypeAccessRange =
658 [&DL](Type *Ty,
659 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
660 auto TypeSize = DL.getTypeStoreSize(Ty);
661 if (!TypeSize.isScalable() && Offset) {
662 int64_t Size = TypeSize.getFixedValue();
663 APInt Low(64, *Offset, true);
664 bool Overflow;
665 APInt High = Low.sadd_ov(RHS: APInt(64, Size, true), Overflow);
666 // Bail if the range overflows signed 64-bit int.
667 if (Overflow)
668 return std::nullopt;
669 return ConstantRange(Low, High);
670 }
671 return std::nullopt;
672 };
673 auto GetConstantIntRange =
674 [](Value *Length,
675 std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
676 auto *ConstantLength = dyn_cast<ConstantInt>(Val: Length);
677 if (ConstantLength && Offset) {
678 int64_t Len = ConstantLength->getSExtValue();
679
680 // Reject zero or negative lengths
681 if (Len <= 0)
682 return std::nullopt;
683
684 APInt Low(64, *Offset, true);
685 bool Overflow;
686 APInt High = Low.sadd_ov(RHS: APInt(64, Len, true), Overflow);
687 if (Overflow)
688 return std::nullopt;
689
690 return ConstantRange(Low, High);
691 }
692 return std::nullopt;
693 };
694
695 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
696 if (SI->isSimple() && &SI->getOperandUse(i: 1) == ArgUse.U) {
697 // Get the fixed type size of "SI". Since the access range of a write
698 // will be unioned, if "SI" doesn't have a fixed type size, we just set
699 // the access range to empty.
700 ConstantRangeList AccessRanges;
701 if (auto TypeAccessRange =
702 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
703 AccessRanges.insert(NewRange: *TypeAccessRange);
704 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: std::move(AccessRanges)};
705 }
706 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
707 if (LI->isSimple()) {
708 assert(&LI->getOperandUse(0) == ArgUse.U);
709 // Get the fixed type size of "LI". Different from Write, if "LI"
710 // doesn't have a fixed type size, we conservatively set as a clobber
711 // with an empty access range.
712 if (auto TypeAccessRange =
713 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
714 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*TypeAccessRange}};
715 }
716 } else if (auto *MemSet = dyn_cast<MemSetInst>(Val: I)) {
717 if (!MemSet->isVolatile()) {
718 ConstantRangeList AccessRanges;
719 if (auto AccessRange =
720 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
721 AccessRanges.insert(NewRange: *AccessRange);
722 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
723 }
724 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
725 if (!MTI->isVolatile()) {
726 if (&MTI->getOperandUse(i: 0) == ArgUse.U) {
727 ConstantRangeList AccessRanges;
728 if (auto AccessRange =
729 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
730 AccessRanges.insert(NewRange: *AccessRange);
731 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Write, .AccessRanges: AccessRanges};
732 } else if (&MTI->getOperandUse(i: 1) == ArgUse.U) {
733 if (auto AccessRange =
734 GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
735 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Read, .AccessRanges: {*AccessRange}};
736 }
737 }
738 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
739 if (CB->isArgOperand(U: ArgUse.U) &&
740 !CB->isByValArgument(ArgNo: CB->getArgOperandNo(U: ArgUse.U))) {
741 unsigned ArgNo = CB->getArgOperandNo(U: ArgUse.U);
742 bool IsInitialize = CB->paramHasAttr(ArgNo, Kind: Attribute::Initializes);
743 if (IsInitialize && ArgUse.Offset) {
744 // Argument is a Write when parameter is writeonly/readnone
745 // and nocapture. Otherwise, it's a WriteWithSideEffect.
746 auto Access = CB->onlyWritesMemory(OpNo: ArgNo) && CB->doesNotCapture(OpNo: ArgNo)
747 ? ArgumentAccessInfo::AccessType::Write
748 : ArgumentAccessInfo::AccessType::WriteWithSideEffect;
749 ConstantRangeList AccessRanges;
750 Attribute Attr = CB->getParamAttr(ArgNo, Kind: Attribute::Initializes);
751 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
752 for (ConstantRange &CR : CBCRL)
753 AccessRanges.insert(NewRange: ConstantRange(CR.getLower() + *ArgUse.Offset,
754 CR.getUpper() + *ArgUse.Offset));
755 return {.ArgAccessType: Access, .AccessRanges: AccessRanges};
756 }
757 }
758 }
759 // Other unrecognized instructions are considered as unknown.
760 return {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
761}
762
763// Collect the uses of argument "A" in "F".
764ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
765 auto &DL = F.getParent()->getDataLayout();
766 unsigned PointerSize =
767 DL.getIndexSizeInBits(AS: A.getType()->getPointerAddressSpace());
768 ArgumentUsesSummary Result;
769
770 BasicBlock &EntryBB = F.getEntryBlock();
771 SmallVector<ArgumentUse, 4> Worklist;
772 for (Use &U : A.uses())
773 Worklist.push_back(Elt: {.U: &U, .Offset: 0});
774
775 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
776 // Return true if the block of "I" has write accesses after updating.
777 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
778 auto *BB = I->getParent();
779 auto &BBInfo = Result.UsesPerBlock[BB];
780 auto [It, Inserted] = BBInfo.Insts.try_emplace(Key: I);
781 auto &IInfo = It->second;
782
783 // Instructions that have more than one use of the argument are considered
784 // as clobbers.
785 if (!Inserted) {
786 IInfo = {.ArgAccessType: ArgumentAccessInfo::AccessType::Unknown, .AccessRanges: {}};
787 BBInfo.HasUnknownAccess = true;
788 return false;
789 }
790
791 IInfo = std::move(Info);
792 BBInfo.HasUnknownAccess |=
793 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
794 bool InfoHasWrites =
795 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
796 IInfo.ArgAccessType ==
797 ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
798 !IInfo.AccessRanges.empty();
799 BBInfo.HasWrites |= InfoHasWrites;
800 return InfoHasWrites;
801 };
802
803 // No need for a visited set because we don't look through phis, so there are
804 // no cycles.
805 while (!Worklist.empty()) {
806 ArgumentUse ArgUse = Worklist.pop_back_val();
807 User *U = ArgUse.U->getUser();
808 // Add GEP uses to worklist.
809 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
810 if (auto *GEP = dyn_cast<GEPOperator>(Val: U)) {
811 std::optional<int64_t> NewOffset = std::nullopt;
812 if (ArgUse.Offset) {
813 APInt Offset(PointerSize, 0);
814 if (GEP->accumulateConstantOffset(DL, Offset))
815 NewOffset = *ArgUse.Offset + Offset.getSExtValue();
816 }
817 for (Use &U : GEP->uses())
818 Worklist.push_back(Elt: {.U: &U, .Offset: NewOffset});
819 continue;
820 }
821
822 auto *I = cast<Instruction>(Val: U);
823 bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL));
824
825 Result.HasAnyWrite |= HasWrite;
826
827 if (HasWrite && I->getParent() != &EntryBB)
828 Result.HasWriteOutsideEntryBB = true;
829 }
830 return Result;
831}
832
833} // end anonymous namespace
834
835namespace llvm {
836
837template <> struct GraphTraits<ArgumentGraphNode *> {
838 using NodeRef = ArgumentGraphNode *;
839 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
840
841 static NodeRef getEntryNode(NodeRef A) { return A; }
842 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
843 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
844};
845
846template <>
847struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
848 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
849
850 static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
851 return AG->begin();
852 }
853
854 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
855};
856
857struct ArgAccessProperties {
858 bool IsRead = false;
859 bool IsWrite = false;
860 bool IsFree = false;
861
862 static ArgAccessProperties all() { return {.IsRead: true, .IsWrite: true, .IsFree: true}; }
863
864 bool hasAll() const { return IsRead && IsWrite && IsFree; }
865
866 ArgAccessProperties &operator|=(const ArgAccessProperties &Other) {
867 IsRead |= Other.IsRead;
868 IsWrite |= Other.IsWrite;
869 IsFree |= Other.IsFree;
870 return *this;
871 }
872};
873
874} // end namespace llvm
875
876/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
877static ArgAccessProperties
878determinePointerAccessAttrs(Argument *A,
879 const SmallPtrSet<Argument *, 8> &SCCNodes) {
880 SmallVector<Use *, 32> Worklist;
881 SmallPtrSet<Use *, 32> Visited;
882
883 // inalloca arguments are always clobbered by the call.
884 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
885 return ArgAccessProperties::all();
886
887 ArgAccessProperties Props;
888
889 for (Use &U : A->uses()) {
890 Visited.insert(Ptr: &U);
891 Worklist.push_back(Elt: &U);
892 }
893
894 while (!Worklist.empty()) {
895 if (Props.hasAll())
896 // No point in searching further..
897 return Props;
898
899 Use *U = Worklist.pop_back_val();
900 Instruction *I = cast<Instruction>(Val: U->getUser());
901 if (isa<ReturnInst>(Val: I))
902 continue;
903
904 UseCaptureInfo Info = DetermineUseCaptureKind(U: *U, Base: A);
905
906 // FIXME: This should really be part of CaptureTracking, but keep it here
907 // for now due to interference with isEscapeSource().
908 if (auto *CB = dyn_cast<CallBase>(Val: I))
909 if (CB->onlyReadsMemory())
910 Info.UseCC &= CaptureComponents::Address;
911
912 if (capturesAnyProvenance(CC: Info.UseCC)) {
913 // Handle indirect access via captured provenance.
914 if (!capturesReadProvenanceOnly(CC: Info.UseCC))
915 return ArgAccessProperties::all();
916 Props.IsRead = true;
917 }
918
919 if (capturesAnyProvenance(CC: Info.ResultCC)) {
920 for (Use &UU : I->uses())
921 if (Visited.insert(Ptr: &UU).second)
922 Worklist.push_back(Elt: &UU);
923 }
924
925 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
926 if (CB->isCallee(U)) {
927 Props.IsRead = true;
928 continue;
929 }
930
931 // Given we've explicitly handled the callee operand above, what's left
932 // must be a data operand (e.g. argument or operand bundle)
933 const unsigned UseIndex = CB->getDataOperandNo(U);
934
935 ModRefInfo ArgMR =
936 CB->getMemoryEffects().getModRef(Loc: IRMemLocation::ArgMem);
937 if (isNoModRef(MRI: ArgMR))
938 continue;
939
940 if (Function *F = CB->getCalledFunction())
941 if (CB->isArgOperand(U) && UseIndex < F->arg_size() &&
942 SCCNodes.count(Ptr: F->getArg(i: UseIndex)))
943 // This is an argument which is part of the speculative SCC. Note
944 // that only operands corresponding to formal arguments of the callee
945 // can participate in the speculation.
946 continue;
947
948 // The accessors used on call site here do the right thing for calls and
949 // invokes with operand bundles.
950 if (isRefSet(MRI: ArgMR) && !CB->onlyWritesMemory(OpNo: UseIndex))
951 Props.IsRead = true;
952 if (isModSet(MRI: ArgMR) && !CB->onlyReadsMemory(OpNo: UseIndex)) {
953 Props.IsWrite = true;
954 if (CB->isArgOperand(U) && !CB->hasFnAttr(Kind: Attribute::NoFree) &&
955 !CB->paramHasAttr(ArgNo: UseIndex, Kind: Attribute::NoFree))
956 Props.IsFree = true;
957 }
958 } else {
959 // Ignore value operand for stores.
960 if (isa<StoreInst>(Val: I) &&
961 StoreInst::getPointerOperandIndex() != U->getOperandNo())
962 continue;
963
964 Props.IsRead |= I->mayReadFromMemory();
965 Props.IsWrite |= I->mayWriteToMemory();
966 }
967 }
968
969 return Props;
970}
971
972/// Deduce returned attributes for the SCC.
973static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
974 SmallPtrSet<Function *, 8> &Changed) {
975 // Check each function in turn, determining if an argument is always returned.
976 for (Function *F : SCCNodes) {
977 // We can infer and propagate function attributes only when we know that the
978 // definition we'll get at link time is *exactly* the definition we see now.
979 // For more details, see GlobalValue::mayBeDerefined.
980 if (!F->hasExactDefinition())
981 continue;
982
983 if (F->getReturnType()->isVoidTy())
984 continue;
985
986 // There is nothing to do if an argument is already marked as 'returned'.
987 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::Returned))
988 continue;
989
990 auto FindRetArg = [&]() -> Argument * {
991 Argument *RetArg = nullptr;
992 for (BasicBlock &BB : *F)
993 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
994 // Note that stripPointerCasts should look through functions with
995 // returned arguments.
996 auto *RetVal =
997 dyn_cast<Argument>(Val: Ret->getReturnValue()->stripPointerCasts());
998 if (!RetVal || RetVal->getType() != F->getReturnType())
999 return nullptr;
1000
1001 if (!RetArg)
1002 RetArg = RetVal;
1003 else if (RetArg != RetVal)
1004 return nullptr;
1005 }
1006
1007 return RetArg;
1008 };
1009
1010 if (Argument *RetArg = FindRetArg()) {
1011 RetArg->addAttr(Kind: Attribute::Returned);
1012 ++NumReturned;
1013 Changed.insert(Ptr: F);
1014 }
1015 }
1016}
1017
1018/// If a callsite has arguments that are also arguments to the parent function,
1019/// try to propagate attributes from the callsite's arguments to the parent's
1020/// arguments. This may be important because inlining can cause information loss
1021/// when attribute knowledge disappears with the inlined call.
1022static bool addArgumentAttrsFromCallsites(Function &F) {
1023 if (!EnablePoisonArgAttrPropagation)
1024 return false;
1025
1026 bool Changed = false;
1027
1028 // For an argument attribute to transfer from a callsite to the parent, the
1029 // call must be guaranteed to execute every time the parent is called.
1030 // Conservatively, just check for calls in the entry block that are guaranteed
1031 // to execute.
1032 // TODO: This could be enhanced by testing if the callsite post-dominates the
1033 // entry block or by doing simple forward walks or backward walks to the
1034 // callsite.
1035 BasicBlock &Entry = F.getEntryBlock();
1036 for (Instruction &I : Entry) {
1037 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
1038 if (auto *CalledFunc = CB->getCalledFunction()) {
1039 for (auto &CSArg : CalledFunc->args()) {
1040 unsigned ArgNo = CSArg.getArgNo();
1041 auto *FArg = dyn_cast<Argument>(Val: CB->getArgOperand(i: ArgNo));
1042 if (!FArg)
1043 continue;
1044
1045 if (CSArg.hasNonNullAttr(/*AllowUndefOrPoison=*/false)) {
1046 // If the non-null callsite argument operand is an argument to 'F'
1047 // (the caller) and the call is guaranteed to execute, then the
1048 // value must be non-null throughout 'F'.
1049 if (!FArg->hasNonNullAttr()) {
1050 FArg->addAttr(Kind: Attribute::NonNull);
1051 Changed = true;
1052 }
1053 } else if (FPClassTest CSNoFPClass = CB->getParamNoFPClass(i: ArgNo);
1054 CSNoFPClass != fcNone &&
1055 CB->paramHasAttr(ArgNo, Kind: Attribute::NoUndef)) {
1056 FPClassTest ArgNoFPClass = FArg->getNoFPClass();
1057
1058 if ((CSNoFPClass | ArgNoFPClass) != ArgNoFPClass) {
1059 FArg->addAttr(Attr: Attribute::getWithNoFPClass(
1060 Context&: FArg->getContext(), Mask: CSNoFPClass | ArgNoFPClass));
1061 Changed = true;
1062 }
1063 }
1064 }
1065 }
1066 }
1067 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
1068 break;
1069 }
1070
1071 return Changed;
1072}
1073
1074static bool addAccessAttrs(Argument *A, ArgAccessProperties Props) {
1075 assert(A && "Argument must not be null.");
1076
1077 bool Changed = false;
1078 if (!Props.IsFree && !A->hasAttribute(Kind: Attribute::NoFree)) {
1079 ++NumNoFreeArg;
1080 A->addAttr(Kind: Attribute::NoFree);
1081 Changed = true;
1082 }
1083
1084 if (Props.IsRead && Props.IsWrite)
1085 return Changed;
1086
1087 Attribute::AttrKind Attr;
1088 if (Props.IsRead)
1089 Attr = Attribute::ReadOnly;
1090 else if (Props.IsWrite)
1091 Attr = Attribute::WriteOnly;
1092 else
1093 Attr = Attribute::ReadNone;
1094
1095 // If the argument already has the attribute, nothing needs to be done.
1096 if (A->hasAttribute(Kind: Attr))
1097 return false;
1098
1099 // Otherwise, remove potentially conflicting attribute, add the new one,
1100 // and update statistics.
1101 A->removeAttr(Kind: Attribute::WriteOnly);
1102 A->removeAttr(Kind: Attribute::ReadOnly);
1103 A->removeAttr(Kind: Attribute::ReadNone);
1104 // Remove conflicting writable attribute.
1105 if (Attr == Attribute::ReadNone || Attr == Attribute::ReadOnly)
1106 A->removeAttr(Kind: Attribute::Writable);
1107 A->addAttr(Kind: Attr);
1108 if (Attr == Attribute::ReadOnly)
1109 ++NumReadOnlyArg;
1110 else if (Attr == Attribute::WriteOnly)
1111 ++NumWriteOnlyArg;
1112 else
1113 ++NumReadNoneArg;
1114 return true;
1115}
1116
1117static bool inferInitializes(Argument &A, Function &F) {
1118 auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
1119 // No write anywhere in the function, bail.
1120 if (!ArgumentUses.HasAnyWrite)
1121 return false;
1122
1123 auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
1124 BasicBlock &EntryBB = F.getEntryBlock();
1125 // A map to store the argument ranges initialized by a BasicBlock (including
1126 // its successors).
1127 DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
1128 // Visit the successors of "BB" block and the instructions in BB (post-order)
1129 // to get the argument ranges initialized by "BB" (including its successors).
1130 // The result will be cached in "Initialized".
1131 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
1132 auto UPB = UsesPerBlock.find(Val: BB);
1133 ConstantRangeList CRL;
1134
1135 // Start with intersection of successors.
1136 // If this block has any clobbering use, we're going to clear out the
1137 // ranges at some point in this block anyway, so don't bother looking at
1138 // successors.
1139 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
1140 bool HasAddedSuccessor = false;
1141 for (auto *Succ : successors(BB)) {
1142 if (auto SuccI = Initialized.find(Val: Succ); SuccI != Initialized.end()) {
1143 if (HasAddedSuccessor) {
1144 CRL = CRL.intersectWith(CRL: SuccI->second);
1145 } else {
1146 CRL = SuccI->second;
1147 HasAddedSuccessor = true;
1148 }
1149 } else {
1150 CRL = ConstantRangeList();
1151 break;
1152 }
1153 }
1154 }
1155
1156 if (UPB != UsesPerBlock.end()) {
1157 // Sort uses in this block by instruction order.
1158 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
1159 append_range(C&: Insts, R&: UPB->second.Insts);
1160 sort(C&: Insts, Comp: [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
1161 std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
1162 return LHS.first->comesBefore(Other: RHS.first);
1163 });
1164
1165 // From the end of the block to the beginning of the block, set
1166 // initializes ranges.
1167 for (auto &[_, Info] : reverse(C&: Insts)) {
1168 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
1169 Info.ArgAccessType ==
1170 ArgumentAccessInfo::AccessType::WriteWithSideEffect)
1171 CRL = ConstantRangeList();
1172 if (!Info.AccessRanges.empty()) {
1173 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
1174 Info.ArgAccessType ==
1175 ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
1176 CRL = CRL.unionWith(CRL: Info.AccessRanges);
1177 } else {
1178 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
1179 for (const auto &ReadRange : Info.AccessRanges)
1180 CRL.subtract(SubRange: ReadRange);
1181 }
1182 }
1183 }
1184 }
1185 return CRL;
1186 };
1187
1188 ConstantRangeList EntryCRL;
1189 // If all write instructions are in the EntryBB, or if the EntryBB has
1190 // a clobbering use, we only need to look at EntryBB.
1191 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
1192 if (!OnlyScanEntryBlock)
1193 if (auto EntryUPB = UsesPerBlock.find(Val: &EntryBB);
1194 EntryUPB != UsesPerBlock.end())
1195 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
1196 if (OnlyScanEntryBlock) {
1197 EntryCRL = VisitBlock(&EntryBB);
1198 if (EntryCRL.empty())
1199 return false;
1200 } else {
1201 // Now we have to go through CFG to get the initialized argument ranges
1202 // across blocks. With dominance and post-dominance, the initialized ranges
1203 // by a block include both accesses inside this block and accesses in its
1204 // (transitive) successors. So visit successors before predecessors with a
1205 // post-order walk of the blocks and memorize the results in "Initialized".
1206 for (const BasicBlock *BB : post_order(G: &F)) {
1207 ConstantRangeList CRL = VisitBlock(BB);
1208 if (!CRL.empty())
1209 Initialized[BB] = CRL;
1210 }
1211
1212 auto EntryCRLI = Initialized.find(Val: &EntryBB);
1213 if (EntryCRLI == Initialized.end())
1214 return false;
1215
1216 EntryCRL = EntryCRLI->second;
1217 }
1218
1219 assert(!EntryCRL.empty() &&
1220 "should have bailed already if EntryCRL is empty");
1221
1222 if (A.hasAttribute(Kind: Attribute::Initializes)) {
1223 ConstantRangeList PreviousCRL =
1224 A.getAttribute(Kind: Attribute::Initializes).getValueAsConstantRangeList();
1225 if (PreviousCRL == EntryCRL)
1226 return false;
1227 EntryCRL = EntryCRL.unionWith(CRL: PreviousCRL);
1228 }
1229
1230 A.addAttr(Attr: Attribute::get(Context&: A.getContext(), Kind: Attribute::Initializes,
1231 Val: EntryCRL.rangesRef()));
1232
1233 return true;
1234}
1235
1236/// Deduce nocapture attributes for the SCC.
1237static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
1238 SmallPtrSet<Function *, 8> &Changed,
1239 bool SkipInitializes) {
1240 ArgumentGraph AG;
1241
1242 auto DetermineAccessAttrsForSingleton = [](Argument *A) {
1243 SmallPtrSet<Argument *, 8> Self;
1244 Self.insert(Ptr: A);
1245 return addAccessAttrs(A, Props: determinePointerAccessAttrs(A, SCCNodes: Self));
1246 };
1247
1248 // Check each function in turn, determining which pointer arguments are not
1249 // captured.
1250 for (Function *F : SCCNodes) {
1251 // We can infer and propagate function attributes only when we know that the
1252 // definition we'll get at link time is *exactly* the definition we see now.
1253 // For more details, see GlobalValue::mayBeDerefined.
1254 if (!F->hasExactDefinition())
1255 continue;
1256
1257 if (addArgumentAttrsFromCallsites(F&: *F))
1258 Changed.insert(Ptr: F);
1259
1260 // Functions that are readonly (or readnone) and nounwind and don't return
1261 // a value can't capture arguments. Don't analyze them.
1262 if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() &&
1263 F->getReturnType()->isVoidTy()) {
1264 for (Argument &A : F->args()) {
1265 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
1266 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(),
1267 CI: CaptureInfo::none()));
1268 ++NumCapturesNone;
1269 Changed.insert(Ptr: F);
1270 }
1271 }
1272 continue;
1273 }
1274
1275 for (Argument &A : F->args()) {
1276 if (!A.getType()->isPointerTy())
1277 continue;
1278 bool HasNonLocalUses = false;
1279 CaptureInfo OrigCI = A.getAttributes().getCaptureInfo();
1280 if (!capturesNothing(CC: OrigCI)) {
1281 ArgumentUsesTracker Tracker(SCCNodes);
1282 PointerMayBeCaptured(V: &A, Tracker: &Tracker);
1283 CaptureInfo NewCI = Tracker.CI & OrigCI;
1284 if (NewCI != OrigCI) {
1285 if (Tracker.Uses.empty()) {
1286 // If the information is complete, add the attribute now.
1287 A.addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A.getContext(), CI: NewCI));
1288 addCapturesStat(CI: NewCI);
1289 Changed.insert(Ptr: F);
1290 } else {
1291 // If it's not trivially captured and not trivially not captured,
1292 // then it must be calling into another function in our SCC. Save
1293 // its particulars for Argument-SCC analysis later.
1294 ArgumentGraphNode *Node = AG[&A];
1295 Node->CC = CaptureComponents(NewCI);
1296 for (Argument *Use : Tracker.Uses) {
1297 Node->Uses.push_back(Elt: AG[Use]);
1298 if (Use != &A)
1299 HasNonLocalUses = true;
1300 }
1301 }
1302 }
1303 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
1304 }
1305 if (!HasNonLocalUses && !A.onlyReadsMemory()) {
1306 // Can we determine that it's readonly/readnone/writeonly without doing
1307 // an SCC? Note that we don't allow any calls at all here, or else our
1308 // result will be dependent on the iteration order through the
1309 // functions in the SCC.
1310 if (DetermineAccessAttrsForSingleton(&A))
1311 Changed.insert(Ptr: F);
1312 }
1313 if (!SkipInitializes && !A.onlyReadsMemory()) {
1314 if (inferInitializes(A, F&: *F))
1315 Changed.insert(Ptr: F);
1316 }
1317 }
1318 }
1319
1320 // The graph we've collected is partial because we stopped scanning for
1321 // argument uses once we solved the argument trivially. These partial nodes
1322 // show up as ArgumentGraphNode objects with an empty Uses list, and for
1323 // these nodes the final decision about whether they capture has already been
1324 // made. If the definition doesn't have a 'nocapture' attribute by now, it
1325 // captures.
1326
1327 for (scc_iterator<ArgumentGraph *> I = scc_begin(G: &AG); !I.isAtEnd(); ++I) {
1328 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
1329 if (ArgumentSCC.size() == 1) {
1330 if (!ArgumentSCC[0]->Definition)
1331 continue; // synthetic root node
1332
1333 // eg. "void f(int* x) { if (...) f(x); }"
1334 if (ArgumentSCC[0]->Uses.size() == 1 &&
1335 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
1336 Argument *A = ArgumentSCC[0]->Definition;
1337 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1338 CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI;
1339 if (NewCI != OrigCI) {
1340 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1341 addCapturesStat(CI: NewCI);
1342 Changed.insert(Ptr: A->getParent());
1343 }
1344
1345 // Infer the access attributes given the new captures one
1346 if (DetermineAccessAttrsForSingleton(A))
1347 Changed.insert(Ptr: A->getParent());
1348 }
1349 continue;
1350 }
1351
1352 SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
1353 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
1354 // quickly looking up whether a given Argument is in this ArgumentSCC.
1355 for (ArgumentGraphNode *I : ArgumentSCC) {
1356 ArgumentSCCNodes.insert(Ptr: I->Definition);
1357 }
1358
1359 // At the SCC level, only track merged CaptureComponents. We're not
1360 // currently prepared to handle propagation of return-only captures across
1361 // the SCC.
1362 CaptureComponents CC = CaptureComponents::None;
1363 for (ArgumentGraphNode *N : ArgumentSCC) {
1364 for (ArgumentGraphNode *Use : N->Uses) {
1365 Argument *A = Use->Definition;
1366 if (ArgumentSCCNodes.count(Ptr: A))
1367 CC |= Use->CC;
1368 else
1369 CC |= CaptureComponents(A->getAttributes().getCaptureInfo());
1370 break;
1371 }
1372 if (capturesAll(CC))
1373 break;
1374 }
1375
1376 if (!capturesAll(CC)) {
1377 for (ArgumentGraphNode *N : ArgumentSCC) {
1378 Argument *A = N->Definition;
1379 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo();
1380 CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI;
1381 if (NewCI != OrigCI) {
1382 A->addAttr(Attr: Attribute::getWithCaptureInfo(Context&: A->getContext(), CI: NewCI));
1383 addCapturesStat(CI: NewCI);
1384 Changed.insert(Ptr: A->getParent());
1385 }
1386 }
1387 }
1388
1389 if (capturesAnyProvenance(CC)) {
1390 // As the pointer provenance may be captured, determine the pointer
1391 // attributes looking at each argument individually.
1392 for (ArgumentGraphNode *N : ArgumentSCC) {
1393 if (DetermineAccessAttrsForSingleton(N->Definition))
1394 Changed.insert(Ptr: N->Definition->getParent());
1395 }
1396 continue;
1397 }
1398
1399 // We also want to compute readonly/readnone/writeonly. With a small number
1400 // of false negatives, we can assume that any pointer which is captured
1401 // isn't going to be provably readonly or readnone, since by definition
1402 // we can't analyze all uses of a captured pointer.
1403 //
1404 // The false negatives happen when the pointer is captured by a function
1405 // that promises readonly/readnone behaviour on the pointer, then the
1406 // pointer's lifetime ends before anything that writes to arbitrary memory.
1407 // Also, a readonly/readnone pointer may be returned, but returning a
1408 // pointer is capturing it.
1409
1410 ArgAccessProperties Props;
1411 for (ArgumentGraphNode *N : ArgumentSCC) {
1412 Argument *A = N->Definition;
1413 Props |= determinePointerAccessAttrs(A, SCCNodes: ArgumentSCCNodes);
1414 if (Props.hasAll())
1415 break;
1416 }
1417
1418 if (!Props.hasAll()) {
1419 for (ArgumentGraphNode *N : ArgumentSCC) {
1420 Argument *A = N->Definition;
1421 if (addAccessAttrs(A, Props))
1422 Changed.insert(Ptr: A->getParent());
1423 }
1424 }
1425 }
1426}
1427
1428/// Tests whether a function is "malloc-like".
1429///
1430/// A function is "malloc-like" if it returns either null or a pointer that
1431/// doesn't alias any other pointer visible to the caller.
1432static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
1433 SmallSetVector<Value *, 8> FlowsToReturn;
1434 for (BasicBlock &BB : *F)
1435 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1436 FlowsToReturn.insert(X: Ret->getReturnValue());
1437
1438 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1439 Value *RetVal = FlowsToReturn[i];
1440
1441 if (Constant *C = dyn_cast<Constant>(Val: RetVal)) {
1442 if (!C->isNullValue() && !isa<UndefValue>(Val: C))
1443 return false;
1444
1445 continue;
1446 }
1447
1448 if (isa<Argument>(Val: RetVal))
1449 return false;
1450
1451 if (Instruction *RVI = dyn_cast<Instruction>(Val: RetVal))
1452 switch (RVI->getOpcode()) {
1453 // Extend the analysis by looking upwards.
1454 case Instruction::BitCast:
1455 case Instruction::GetElementPtr:
1456 case Instruction::AddrSpaceCast:
1457 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1458 continue;
1459 case Instruction::Select: {
1460 SelectInst *SI = cast<SelectInst>(Val: RVI);
1461 FlowsToReturn.insert(X: SI->getTrueValue());
1462 FlowsToReturn.insert(X: SI->getFalseValue());
1463 continue;
1464 }
1465 case Instruction::PHI: {
1466 PHINode *PN = cast<PHINode>(Val: RVI);
1467 FlowsToReturn.insert_range(R: PN->incoming_values());
1468 continue;
1469 }
1470
1471 // Check whether the pointer came from an allocation.
1472 case Instruction::Alloca:
1473 break;
1474 case Instruction::Call:
1475 case Instruction::Invoke: {
1476 CallBase &CB = cast<CallBase>(Val&: *RVI);
1477 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1478 break;
1479 if (CB.getCalledFunction() && SCCNodes.count(key: CB.getCalledFunction()))
1480 break;
1481 [[fallthrough]];
1482 }
1483 default:
1484 return false; // Did not come from an allocation.
1485 }
1486
1487 if (PointerMayBeCaptured(V: RetVal, /*ReturnCaptures=*/false))
1488 return false;
1489 }
1490
1491 return true;
1492}
1493
1494/// Deduce noalias attributes for the SCC.
1495static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
1496 SmallPtrSet<Function *, 8> &Changed) {
1497 // Check each function in turn, determining which functions return noalias
1498 // pointers.
1499 for (Function *F : SCCNodes) {
1500 // Already noalias.
1501 if (F->returnDoesNotAlias())
1502 continue;
1503
1504 // We can infer and propagate function attributes only when we know that the
1505 // definition we'll get at link time is *exactly* the definition we see now.
1506 // For more details, see GlobalValue::mayBeDerefined.
1507 if (!F->hasExactDefinition())
1508 return;
1509
1510 // We annotate noalias return values, which are only applicable to
1511 // pointer types.
1512 if (!F->getReturnType()->isPointerTy())
1513 continue;
1514
1515 if (!isFunctionMallocLike(F, SCCNodes))
1516 return;
1517 }
1518
1519 for (Function *F : SCCNodes) {
1520 if (F->returnDoesNotAlias() ||
1521 !F->getReturnType()->isPointerTy())
1522 continue;
1523
1524 F->setReturnDoesNotAlias();
1525 ++NumNoAlias;
1526 Changed.insert(Ptr: F);
1527 }
1528}
1529
1530/// Tests whether this function is known to not return null.
1531///
1532/// Requires that the function returns a pointer.
1533///
1534/// Returns true if it believes the function will not return a null, and sets
1535/// \p Speculative based on whether the returned conclusion is a speculative
1536/// conclusion due to SCC calls.
1537static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
1538 bool &Speculative) {
1539 assert(F->getReturnType()->isPointerTy() &&
1540 "nonnull only meaningful on pointer types");
1541 Speculative = false;
1542
1543 SmallSetVector<Value *, 8> FlowsToReturn;
1544 for (BasicBlock &BB : *F)
1545 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
1546 FlowsToReturn.insert(X: Ret->getReturnValue());
1547
1548 auto &DL = F->getDataLayout();
1549
1550 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1551 Value *RetVal = FlowsToReturn[i];
1552
1553 // If this value is locally known to be non-null, we're good
1554 if (isKnownNonZero(V: RetVal, Q: DL))
1555 continue;
1556
1557 // Otherwise, we need to look upwards since we can't make any local
1558 // conclusions.
1559 Instruction *RVI = dyn_cast<Instruction>(Val: RetVal);
1560 if (!RVI)
1561 return false;
1562 switch (RVI->getOpcode()) {
1563 // Extend the analysis by looking upwards.
1564 case Instruction::BitCast:
1565 case Instruction::AddrSpaceCast:
1566 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1567 continue;
1568 case Instruction::GetElementPtr:
1569 if (cast<GEPOperator>(Val: RVI)->isInBounds()) {
1570 FlowsToReturn.insert(X: RVI->getOperand(i: 0));
1571 continue;
1572 }
1573 return false;
1574 case Instruction::Select: {
1575 SelectInst *SI = cast<SelectInst>(Val: RVI);
1576 FlowsToReturn.insert(X: SI->getTrueValue());
1577 FlowsToReturn.insert(X: SI->getFalseValue());
1578 continue;
1579 }
1580 case Instruction::PHI: {
1581 PHINode *PN = cast<PHINode>(Val: RVI);
1582 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1583 FlowsToReturn.insert(X: PN->getIncomingValue(i));
1584 continue;
1585 }
1586 case Instruction::Call:
1587 case Instruction::Invoke: {
1588 CallBase &CB = cast<CallBase>(Val&: *RVI);
1589 Function *Callee = CB.getCalledFunction();
1590 // A call to a node within the SCC is assumed to return null until
1591 // proven otherwise
1592 if (Callee && SCCNodes.count(key: Callee)) {
1593 Speculative = true;
1594 continue;
1595 }
1596 return false;
1597 }
1598 default:
1599 return false; // Unknown source, may be null
1600 };
1601 llvm_unreachable("should have either continued or returned");
1602 }
1603
1604 return true;
1605}
1606
1607/// Deduce nonnull attributes for the SCC.
1608static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
1609 SmallPtrSet<Function *, 8> &Changed) {
1610 // Speculative that all functions in the SCC return only nonnull
1611 // pointers. We may refute this as we analyze functions.
1612 bool SCCReturnsNonNull = true;
1613
1614 // Check each function in turn, determining which functions return nonnull
1615 // pointers.
1616 for (Function *F : SCCNodes) {
1617 // Already nonnull.
1618 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull))
1619 continue;
1620
1621 // We can infer and propagate function attributes only when we know that the
1622 // definition we'll get at link time is *exactly* the definition we see now.
1623 // For more details, see GlobalValue::mayBeDerefined.
1624 if (!F->hasExactDefinition())
1625 return;
1626
1627 // We annotate nonnull return values, which are only applicable to
1628 // pointer types.
1629 if (!F->getReturnType()->isPointerTy())
1630 continue;
1631
1632 bool Speculative = false;
1633 if (isReturnNonNull(F, SCCNodes, Speculative)) {
1634 if (!Speculative) {
1635 // Mark the function eagerly since we may discover a function
1636 // which prevents us from speculating about the entire SCC
1637 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
1638 << " as nonnull\n");
1639 F->addRetAttr(Kind: Attribute::NonNull);
1640 ++NumNonNullReturn;
1641 Changed.insert(Ptr: F);
1642 }
1643 continue;
1644 }
1645 // At least one function returns something which could be null, can't
1646 // speculate any more.
1647 SCCReturnsNonNull = false;
1648 }
1649
1650 if (SCCReturnsNonNull) {
1651 for (Function *F : SCCNodes) {
1652 if (F->getAttributes().hasRetAttr(Kind: Attribute::NonNull) ||
1653 !F->getReturnType()->isPointerTy())
1654 continue;
1655
1656 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
1657 F->addRetAttr(Kind: Attribute::NonNull);
1658 ++NumNonNullReturn;
1659 Changed.insert(Ptr: F);
1660 }
1661 }
1662}
1663
1664/// Deduce noundef attributes for the SCC.
1665static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
1666 SmallPtrSet<Function *, 8> &Changed) {
1667 // Check each function in turn, determining which functions return noundef
1668 // values.
1669 for (Function *F : SCCNodes) {
1670 // Already noundef.
1671 AttributeList Attrs = F->getAttributes();
1672 if (Attrs.hasRetAttr(Kind: Attribute::NoUndef))
1673 continue;
1674
1675 // We can infer and propagate function attributes only when we know that the
1676 // definition we'll get at link time is *exactly* the definition we see now.
1677 // For more details, see GlobalValue::mayBeDerefined.
1678 if (!F->hasExactDefinition())
1679 return;
1680
1681 // MemorySanitizer assumes that the definition and declaration of a
1682 // function will be consistent. A function with sanitize_memory attribute
1683 // should be skipped from inference.
1684 if (F->hasFnAttribute(Kind: Attribute::SanitizeMemory))
1685 continue;
1686
1687 if (F->getReturnType()->isVoidTy())
1688 continue;
1689
1690 const DataLayout &DL = F->getDataLayout();
1691 if (all_of(Range&: *F, P: [&](BasicBlock &BB) {
1692 if (auto *Ret = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
1693 // TODO: perform context-sensitive analysis?
1694 Value *RetVal = Ret->getReturnValue();
1695 if (!isGuaranteedNotToBeUndefOrPoison(V: RetVal))
1696 return false;
1697
1698 // We know the original return value is not poison now, but it
1699 // could still be converted to poison by another return attribute.
1700 // Try to explicitly re-prove the relevant attributes.
1701 if (Attrs.hasRetAttr(Kind: Attribute::NonNull) &&
1702 !isKnownNonZero(V: RetVal, Q: DL))
1703 return false;
1704
1705 if (MaybeAlign Align = Attrs.getRetAlignment())
1706 if (RetVal->getPointerAlignment(DL) < *Align)
1707 return false;
1708
1709 Attribute Attr = Attrs.getRetAttr(Kind: Attribute::Range);
1710 if (Attr.isValid() &&
1711 !Attr.getRange().contains(
1712 CR: computeConstantRange(V: RetVal, /*ForSigned=*/false,
1713 SQ: SimplifyQuery(F->getDataLayout()))))
1714 return false;
1715
1716 FPClassTest AttrFPClass = Attrs.getRetNoFPClass();
1717 if (AttrFPClass != fcNone) {
1718 KnownFPClass ComputedFPClass = computeKnownFPClass(V: RetVal, DL);
1719 if (!ComputedFPClass.isKnownNever(Mask: AttrFPClass))
1720 return false;
1721 }
1722 }
1723 return true;
1724 })) {
1725 F->addRetAttr(Kind: Attribute::NoUndef);
1726 ++NumNoUndefReturn;
1727 Changed.insert(Ptr: F);
1728 }
1729 }
1730}
1731
1732namespace {
1733
1734/// Collects a set of attribute inference requests and performs them all in one
1735/// go on a single SCC Node. Inference involves scanning function bodies
1736/// looking for instructions that violate attribute assumptions.
1737/// As soon as all the bodies are fine we are free to set the attribute.
1738/// Customization of inference for individual attributes is performed by
1739/// providing a handful of predicates for each attribute.
1740class AttributeInferer {
1741public:
1742 /// Describes a request for inference of a single attribute.
1743 struct InferenceDescriptor {
1744
1745 /// Returns true if this function does not have to be handled.
1746 /// General intent for this predicate is to provide an optimization
1747 /// for functions that do not need this attribute inference at all
1748 /// (say, for functions that already have the attribute).
1749 std::function<bool(const Function &)> SkipFunction;
1750
1751 /// Returns true if this instruction violates attribute assumptions.
1752 std::function<bool(Instruction &)> InstrBreaksAttribute;
1753
1754 /// Sets the inferred attribute for this function.
1755 std::function<void(Function &)> SetAttribute;
1756
1757 /// Attribute we derive.
1758 Attribute::AttrKind AKind;
1759
1760 /// If true, only "exact" definitions can be used to infer this attribute.
1761 /// See GlobalValue::isDefinitionExact.
1762 bool RequiresExactDefinition;
1763
1764 InferenceDescriptor(Attribute::AttrKind AK,
1765 std::function<bool(const Function &)> SkipFunc,
1766 std::function<bool(Instruction &)> InstrScan,
1767 std::function<void(Function &)> SetAttr,
1768 bool ReqExactDef)
1769 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
1770 SetAttribute(SetAttr), AKind(AK),
1771 RequiresExactDefinition(ReqExactDef) {}
1772 };
1773
1774private:
1775 SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
1776
1777public:
1778 void registerAttrInference(InferenceDescriptor AttrInference) {
1779 InferenceDescriptors.push_back(Elt: AttrInference);
1780 }
1781
1782 void run(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed);
1783};
1784
1785/// Perform all the requested attribute inference actions according to the
1786/// attribute predicates stored before.
1787void AttributeInferer::run(const SCCNodeSet &SCCNodes,
1788 SmallPtrSet<Function *, 8> &Changed) {
1789 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
1790 // Go through all the functions in SCC and check corresponding attribute
1791 // assumptions for each of them. Attributes that are invalid for this SCC
1792 // will be removed from InferInSCC.
1793 for (Function *F : SCCNodes) {
1794
1795 // No attributes whose assumptions are still valid - done.
1796 if (InferInSCC.empty())
1797 return;
1798
1799 // Check if our attributes ever need scanning/can be scanned.
1800 llvm::erase_if(C&: InferInSCC, P: [F](const InferenceDescriptor &ID) {
1801 if (ID.SkipFunction(*F))
1802 return false;
1803
1804 // Remove from further inference (invalidate) when visiting a function
1805 // that has no instructions to scan/has an unsuitable definition.
1806 return F->isDeclaration() ||
1807 (ID.RequiresExactDefinition && !F->hasExactDefinition());
1808 });
1809
1810 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1811 // set up the F instructions scan to verify assumptions of the attribute.
1812 SmallVector<InferenceDescriptor, 4> InferInThisFunc;
1813 llvm::copy_if(
1814 Range&: InferInSCC, Out: std::back_inserter(x&: InferInThisFunc),
1815 P: [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
1816
1817 if (InferInThisFunc.empty())
1818 continue;
1819
1820 // Start instruction scan.
1821 for (Instruction &I : instructions(F&: *F)) {
1822 llvm::erase_if(C&: InferInThisFunc, P: [&](const InferenceDescriptor &ID) {
1823 if (!ID.InstrBreaksAttribute(I))
1824 return false;
1825 // Remove attribute from further inference on any other functions
1826 // because attribute assumptions have just been violated.
1827 llvm::erase_if(C&: InferInSCC, P: [&ID](const InferenceDescriptor &D) {
1828 return D.AKind == ID.AKind;
1829 });
1830 // Remove attribute from the rest of current instruction scan.
1831 return true;
1832 });
1833
1834 if (InferInThisFunc.empty())
1835 break;
1836 }
1837 }
1838
1839 if (InferInSCC.empty())
1840 return;
1841
1842 for (Function *F : SCCNodes)
1843 // At this point InferInSCC contains only functions that were either:
1844 // - explicitly skipped from scan/inference, or
1845 // - verified to have no instructions that break attribute assumptions.
1846 // Hence we just go and force the attribute for all non-skipped functions.
1847 for (auto &ID : InferInSCC) {
1848 if (ID.SkipFunction(*F))
1849 continue;
1850 Changed.insert(Ptr: F);
1851 ID.SetAttribute(*F);
1852 }
1853}
1854
1855struct SCCNodesResult {
1856 SCCNodeSet SCCNodes;
1857};
1858
1859} // end anonymous namespace
1860
1861/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
1862static bool InstrBreaksNonConvergent(Instruction &I,
1863 const SCCNodeSet &SCCNodes) {
1864 const CallBase *CB = dyn_cast<CallBase>(Val: &I);
1865 // Breaks non-convergent assumption if CS is a convergent call to a function
1866 // not in the SCC.
1867 return CB && CB->isConvergent() &&
1868 !SCCNodes.contains(key: CB->getCalledFunction());
1869}
1870
1871/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
1872static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
1873 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
1874 return false;
1875 if (const auto *CI = dyn_cast<CallInst>(Val: &I)) {
1876 if (Function *Callee = CI->getCalledFunction()) {
1877 // I is a may-throw call to a function inside our SCC. This doesn't
1878 // invalidate our current working assumption that the SCC is no-throw; we
1879 // just have to scan that other function.
1880 if (SCCNodes.contains(key: Callee))
1881 return false;
1882 }
1883 }
1884 return true;
1885}
1886
1887/// Helper for NoFree inference predicate InstrBreaksAttribute.
1888static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
1889 CallBase *CB = dyn_cast<CallBase>(Val: &I);
1890 if (!CB) {
1891 // Synchronization may establish happens-before with a free on another
1892 // thread.
1893 return I.maySynchronize();
1894 }
1895
1896 if (CB->hasFnAttr(Kind: Attribute::NoFree))
1897 return false;
1898
1899 // Speculatively assume in SCC.
1900 if (Function *Callee = CB->getCalledFunction())
1901 if (SCCNodes.contains(key: Callee))
1902 return false;
1903
1904 return true;
1905}
1906
1907static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
1908 if (!I.maySynchronize())
1909 return false;
1910
1911 // Optimistically assume calls within the SCC are nosync: if nothing else in
1912 // the SCC synchronizes, the assumption holds.
1913 if (auto *CB = dyn_cast<CallBase>(Val: &I))
1914 if (Function *Callee = CB->getCalledFunction())
1915 if (SCCNodes.contains(key: Callee))
1916 return false;
1917
1918 return true;
1919}
1920
1921/// Attempt to remove convergent function attribute when possible.
1922///
1923/// Returns true if any changes to function attributes were made.
1924static void inferConvergent(const SCCNodeSet &SCCNodes,
1925 SmallPtrSet<Function *, 8> &Changed) {
1926 AttributeInferer AI;
1927
1928 // Request to remove the convergent attribute from all functions in the SCC
1929 // if every callsite within the SCC is not convergent (except for calls
1930 // to functions within the SCC).
1931 // Note: Removal of the attr from the callsites will happen in
1932 // InstCombineCalls separately.
1933 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1934 Attribute::Convergent,
1935 // Skip non-convergent functions.
1936 [](const Function &F) { return !F.isConvergent(); },
1937 // Instructions that break non-convergent assumption.
1938 [SCCNodes](Instruction &I) {
1939 return InstrBreaksNonConvergent(I, SCCNodes);
1940 },
1941 [](Function &F) {
1942 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
1943 << "\n");
1944 F.setNotConvergent();
1945 },
1946 /* RequiresExactDefinition= */ false});
1947 // Perform all the requested attribute inference actions.
1948 AI.run(SCCNodes, Changed);
1949}
1950
1951/// Infer attributes from all functions in the SCC by scanning every
1952/// instruction for compliance to the attribute assumptions.
1953///
1954/// Returns true if any changes to function attributes were made.
1955static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
1956 SmallPtrSet<Function *, 8> &Changed) {
1957 AttributeInferer AI;
1958
1959 if (!DisableNoUnwindInference)
1960 // Request to infer nounwind attribute for all the functions in the SCC if
1961 // every callsite within the SCC is not throwing (except for calls to
1962 // functions within the SCC). Note that nounwind attribute suffers from
1963 // derefinement - results may change depending on how functions are
1964 // optimized. Thus it can be inferred only from exact definitions.
1965 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1966 Attribute::NoUnwind,
1967 // Skip non-throwing functions.
1968 [](const Function &F) { return F.doesNotThrow(); },
1969 // Instructions that break non-throwing assumption.
1970 [&SCCNodes](Instruction &I) {
1971 return InstrBreaksNonThrowing(I, SCCNodes);
1972 },
1973 [](Function &F) {
1974 LLVM_DEBUG(dbgs()
1975 << "Adding nounwind attr to fn " << F.getName() << "\n");
1976 F.setDoesNotThrow();
1977 ++NumNoUnwind;
1978 },
1979 /* RequiresExactDefinition= */ true});
1980
1981 if (!DisableNoFreeInference)
1982 // Request to infer nofree attribute for all the functions in the SCC if
1983 // every callsite within the SCC does not directly or indirectly free
1984 // memory (except for calls to functions within the SCC). Note that nofree
1985 // attribute suffers from derefinement - results may change depending on
1986 // how functions are optimized. Thus it can be inferred only from exact
1987 // definitions.
1988 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
1989 Attribute::NoFree,
1990 // Skip functions known not to free memory.
1991 [](const Function &F) { return F.doesNotFreeMemory(); },
1992 // Instructions that break non-deallocating assumption.
1993 [&SCCNodes](Instruction &I) {
1994 return InstrBreaksNoFree(I, SCCNodes);
1995 },
1996 [](Function &F) {
1997 LLVM_DEBUG(dbgs()
1998 << "Adding nofree attr to fn " << F.getName() << "\n");
1999 F.setDoesNotFreeMemory();
2000 ++NumNoFree;
2001 },
2002 /* RequiresExactDefinition= */ true});
2003
2004 AI.registerAttrInference(AttrInference: AttributeInferer::InferenceDescriptor{
2005 Attribute::NoSync,
2006 // Skip already marked functions.
2007 [](const Function &F) { return F.hasNoSync(); },
2008 // Instructions that break nosync assumption.
2009 [&SCCNodes](Instruction &I) {
2010 return InstrBreaksNoSync(I, SCCNodes);
2011 },
2012 [](Function &F) {
2013 LLVM_DEBUG(dbgs()
2014 << "Adding nosync attr to fn " << F.getName() << "\n");
2015 F.setNoSync();
2016 ++NumNoSync;
2017 },
2018 /* RequiresExactDefinition= */ true});
2019
2020 // Perform all the requested attribute inference actions.
2021 AI.run(SCCNodes, Changed);
2022}
2023
2024// Determines if the function 'F' can be marked 'norecurse'.
2025// It returns true if any call within 'F' could lead to a recursive
2026// call back to 'F', and false otherwise.
2027// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag
2028// that is true if any function's address is taken, or if any function
2029// has external linkage. This is used to determine the safety of
2030// external/library calls.
2031static bool mayHaveRecursiveCallee(Function &F,
2032 bool AnyFunctionsAddressIsTaken = true) {
2033 for (const auto &BB : F) {
2034 for (const auto &I : BB) {
2035 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
2036 const Function *Callee = CB->getCalledFunction();
2037 if (!Callee || Callee == &F)
2038 return true;
2039
2040 if (Callee->doesNotRecurse())
2041 continue;
2042
2043 if (!AnyFunctionsAddressIsTaken ||
2044 (Callee->isDeclaration() &&
2045 Callee->hasFnAttribute(Kind: Attribute::NoCallback)))
2046 continue;
2047 return true;
2048 }
2049 }
2050 }
2051 return false;
2052}
2053
2054static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
2055 SmallPtrSet<Function *, 8> &Changed) {
2056 // Try and identify functions that do not recurse.
2057
2058 // If the SCC contains multiple nodes we know for sure there is recursion.
2059 if (SCCNodes.size() != 1)
2060 return;
2061
2062 Function *F = *SCCNodes.begin();
2063 if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
2064 return;
2065 if (!mayHaveRecursiveCallee(F&: *F)) {
2066 // Every call was to a non-recursive function other than this function, and
2067 // we have no indirect recursion as the SCC size is one. This function
2068 // cannot recurse.
2069 F->setDoesNotRecurse();
2070 ++NumNoRecurse;
2071 Changed.insert(Ptr: F);
2072 }
2073}
2074
2075// Set the noreturn function attribute if possible.
2076static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
2077 SmallPtrSet<Function *, 8> &Changed) {
2078 for (Function *F : SCCNodes) {
2079 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2080 F->doesNotReturn())
2081 continue;
2082
2083 if (!canReturn(F: *F)) {
2084 F->setDoesNotReturn();
2085 Changed.insert(Ptr: F);
2086 }
2087 }
2088}
2089
2090static bool allPathsGoThroughCold(Function &F) {
2091 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths;
2092 ColdPaths[&F.front()] = false;
2093 SmallVector<BasicBlock *> Jobs;
2094 Jobs.push_back(Elt: &F.front());
2095
2096 while (!Jobs.empty()) {
2097 BasicBlock *BB = Jobs.pop_back_val();
2098
2099 // If block contains a cold callsite this path through the CG is cold.
2100 // Ignore whether the instructions actually are guaranteed to transfer
2101 // execution. Divergent behavior is considered unlikely.
2102 if (any_of(Range&: *BB, P: [](Instruction &I) {
2103 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2104 return CB->hasFnAttr(Kind: Attribute::Cold);
2105 return false;
2106 })) {
2107 ColdPaths[BB] = true;
2108 continue;
2109 }
2110
2111 auto Succs = successors(BB);
2112 // We found a path that doesn't go through any cold callsite.
2113 if (Succs.empty())
2114 return false;
2115
2116 // We didn't find a cold callsite in this BB, so check that all successors
2117 // contain a cold callsite (or that their successors do).
2118 // Potential TODO: We could use static branch hints to assume certain
2119 // successor paths are inherently cold, irrespective of if they contain a
2120 // cold callsite.
2121 for (BasicBlock *Succ : Succs) {
2122 // Start with false, this is necessary to ensure we don't turn loops into
2123 // cold.
2124 auto [Iter, Inserted] = ColdPaths.try_emplace(Key: Succ, Args: false);
2125 if (!Inserted) {
2126 if (Iter->second)
2127 continue;
2128 return false;
2129 }
2130 Jobs.push_back(Elt: Succ);
2131 }
2132 }
2133 return true;
2134}
2135
2136// Set the cold function attribute if possible.
2137static void addColdAttrs(const SCCNodeSet &SCCNodes,
2138 SmallPtrSet<Function *, 8> &Changed) {
2139 for (Function *F : SCCNodes) {
2140 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2141 F->hasFnAttribute(Kind: Attribute::Cold) || F->hasFnAttribute(Kind: Attribute::Hot))
2142 continue;
2143
2144 // Potential TODO: We could add attribute `cold` on functions with `coldcc`.
2145 if (allPathsGoThroughCold(F&: *F)) {
2146 F->addFnAttr(Kind: Attribute::Cold);
2147 ++NumCold;
2148 Changed.insert(Ptr: F);
2149 continue;
2150 }
2151 }
2152}
2153
2154static bool functionWillReturn(const Function &F) {
2155 // We can infer and propagate function attributes only when we know that the
2156 // definition we'll get at link time is *exactly* the definition we see now.
2157 // For more details, see GlobalValue::mayBeDerefined.
2158 if (!F.hasExactDefinition())
2159 return false;
2160
2161 // Must-progress function without side-effects must return.
2162 if (F.mustProgress() && F.onlyReadsMemory())
2163 return true;
2164
2165 // Can only analyze functions with a definition.
2166 if (F.isDeclaration())
2167 return false;
2168
2169 // Functions with loops require more sophisticated analysis, as the loop
2170 // may be infinite. For now, don't try to handle them.
2171 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
2172 FindFunctionBackedges(F, Result&: Backedges);
2173 if (!Backedges.empty())
2174 return false;
2175
2176 // If there are no loops, then the function is willreturn if all calls in
2177 // it are willreturn.
2178 return all_of(Range: instructions(F), P: [](const Instruction &I) {
2179 return I.willReturn();
2180 });
2181}
2182
2183// Set the willreturn function attribute if possible.
2184static void addWillReturn(const SCCNodeSet &SCCNodes,
2185 SmallPtrSet<Function *, 8> &Changed) {
2186 for (Function *F : SCCNodes) {
2187 if (!F || F->willReturn() || !functionWillReturn(F: *F))
2188 continue;
2189
2190 F->setWillReturn();
2191 NumWillReturn++;
2192 Changed.insert(Ptr: F);
2193 }
2194}
2195
2196static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
2197 SCCNodesResult Res;
2198 for (Function *F : Functions) {
2199 if (!F || F->hasOptNone() || F->hasFnAttribute(Kind: Attribute::Naked) ||
2200 F->isPresplitCoroutine()) {
2201 // Omit any functions we're trying not to optimize from the set.
2202 continue;
2203 }
2204
2205 Res.SCCNodes.insert(X: F);
2206 }
2207 return Res;
2208}
2209
2210template <typename AARGetterT>
2211static SmallPtrSet<Function *, 8>
2212deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
2213 bool ArgAttrsOnly) {
2214 SCCNodesResult Nodes = createSCCNodeSet(Functions);
2215
2216 // Bail if the SCC only contains optnone functions.
2217 if (Nodes.SCCNodes.empty())
2218 return {};
2219
2220 SmallPtrSet<Function *, 8> Changed;
2221 if (ArgAttrsOnly) {
2222 // ArgAttrsOnly means to only infer attributes that may aid optimizations
2223 // on the *current* function. "initializes" attribute is to aid
2224 // optimizations (like DSE) on the callers, so skip "initializes" here.
2225 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
2226 return Changed;
2227 }
2228
2229 addArgumentReturnedAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2230 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
2231 addArgumentAttrs(SCCNodes: Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
2232 inferConvergent(SCCNodes: Nodes.SCCNodes, Changed);
2233 addNoReturnAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2234 addColdAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2235 addWillReturn(SCCNodes: Nodes.SCCNodes, Changed);
2236 addNoUndefAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2237 addNoAliasAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2238 addNonNullAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2239 inferAttrsFromFunctionBodies(SCCNodes: Nodes.SCCNodes, Changed);
2240 addNoRecurseAttrs(SCCNodes: Nodes.SCCNodes, Changed);
2241
2242 // Finally, infer the maximal set of attributes from the ones we've inferred
2243 // above. This is handling the cases where one attribute on a signature
2244 // implies another, but for implementation reasons the inference rule for
2245 // the later is missing (or simply less sophisticated).
2246 for (Function *F : Nodes.SCCNodes)
2247 if (F)
2248 if (inferAttributesFromOthers(F&: *F))
2249 Changed.insert(Ptr: F);
2250
2251 return Changed;
2252}
2253
2254PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
2255 CGSCCAnalysisManager &AM,
2256 LazyCallGraph &CG,
2257 CGSCCUpdateResult &) {
2258 // Skip non-recursive functions if requested.
2259 // Only infer argument attributes for non-recursive functions, because
2260 // it can affect optimization behavior in conjunction with noalias.
2261 bool ArgAttrsOnly = false;
2262 if (C.size() == 1 && SkipNonRecursive) {
2263 LazyCallGraph::Node &N = *C.begin();
2264 if (!N->lookup(N))
2265 ArgAttrsOnly = true;
2266 }
2267
2268 FunctionAnalysisManager &FAM =
2269 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2270
2271 // We pass a lambda into functions to wire them up to the analysis manager
2272 // for getting function analyses.
2273 auto AARGetter = [&](Function &F) -> AAResults & {
2274 return FAM.getResult<AAManager>(IR&: F);
2275 };
2276
2277 SmallVector<Function *, 8> Functions;
2278 for (LazyCallGraph::Node &N : C) {
2279 Functions.push_back(Elt: &N.getFunction());
2280 }
2281
2282 auto ChangedFunctions =
2283 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
2284 if (ChangedFunctions.empty())
2285 return PreservedAnalyses::all();
2286
2287 // Invalidate analyses for modified functions so that we don't have to
2288 // invalidate all analyses for all functions in this SCC.
2289 PreservedAnalyses FuncPA;
2290 // We haven't changed the CFG for modified functions.
2291 FuncPA.preserveSet<CFGAnalyses>();
2292 for (Function *Changed : ChangedFunctions) {
2293 FAM.invalidate(IR&: *Changed, PA: FuncPA);
2294 // Also invalidate any direct callers of changed functions since analyses
2295 // may care about attributes of direct callees. For example, MemorySSA cares
2296 // about whether or not a call's callee modifies memory and queries that
2297 // through function attributes.
2298 for (auto *U : Changed->users()) {
2299 if (auto *Call = dyn_cast<CallBase>(Val: U)) {
2300 if (Call->getCalledOperand() == Changed)
2301 FAM.invalidate(IR&: *Call->getFunction(), PA: FuncPA);
2302 }
2303 }
2304 }
2305
2306 PreservedAnalyses PA;
2307 // We have not added or removed functions.
2308 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
2309 // We already invalidated all relevant function analyses above.
2310 PA.preserveSet<AllAnalysesOn<Function>>();
2311 return PA;
2312}
2313
2314void PostOrderFunctionAttrsPass::printPipeline(
2315 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
2316 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
2317 OS, MapClassName2PassName);
2318 if (SkipNonRecursive)
2319 OS << "<skip-non-recursive-function-attrs>";
2320}
2321
2322static bool addNoRecurseAttrsTopDown(Function &F) {
2323 if (F.doesNotRecurse())
2324 return false;
2325
2326 // We check the preconditions for the function prior to calling this to avoid
2327 // the cost of building up a reversible post-order list. We assert them here
2328 // to make sure none of the invariants this relies on were violated.
2329 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
2330 assert(F.hasInternalLinkage() &&
2331 "Can only do top-down deduction for internal linkage functions!");
2332
2333 // If F is internal and all of its uses are calls from a non-recursive
2334 // functions, then none of its calls could in fact recurse without going
2335 // through a function marked norecurse, and so we can mark this function too
2336 // as norecurse. Note that the uses must actually be calls -- otherwise
2337 // a pointer to this function could be returned from a norecurse function but
2338 // this function could be recursively (indirectly) called. Note that this
2339 // also detects if F is directly recursive as F is not yet marked as
2340 // a norecurse function.
2341 for (auto &U : F.uses()) {
2342 const CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
2343 if (!CB || !CB->isCallee(U: &U) ||
2344 !CB->getParent()->getParent()->doesNotRecurse())
2345 return false;
2346 }
2347 F.setDoesNotRecurse();
2348 ++NumNoRecurse;
2349 return true;
2350}
2351
2352static bool addNoFPClassAttrsTopDown(Function &F) {
2353 assert(!F.isDeclaration() && "Cannot deduce nofpclass without a definition!");
2354 unsigned NumArgs = F.arg_size();
2355 SmallVector<FPClassTest, 8> ArgsNoFPClass(NumArgs, fcAllFlags);
2356 FPClassTest RetNoFPClass = fcAllFlags;
2357
2358 bool Changed = false;
2359 for (User *U : F.users()) {
2360 auto *CB = dyn_cast<CallBase>(Val: U);
2361 if (!CB || CB->getCalledFunction() != &F)
2362 return false;
2363
2364 RetNoFPClass &= CB->getRetNoFPClass();
2365 for (unsigned I = 0; I != NumArgs; ++I) {
2366 // TODO: Consider computeKnownFPClass, at least with a small search
2367 // depth. This will currently not catch non-splat vectors.
2368 const APFloat *Cst;
2369 if (match(V: CB->getArgOperand(i: I), P: m_APFloat(Res&: Cst)))
2370 ArgsNoFPClass[I] &= ~Cst->classify();
2371 else
2372 ArgsNoFPClass[I] &= CB->getParamNoFPClass(i: I);
2373 }
2374 }
2375
2376 LLVMContext &Ctx = F.getContext();
2377
2378 if (RetNoFPClass != fcNone) {
2379 FPClassTest OldAttr = F.getAttributes().getRetNoFPClass();
2380 if (OldAttr != RetNoFPClass) {
2381 F.addRetAttr(Attr: Attribute::getWithNoFPClass(Context&: Ctx, Mask: RetNoFPClass));
2382 Changed = true;
2383 }
2384 }
2385
2386 for (unsigned I = 0; I != NumArgs; ++I) {
2387 FPClassTest ArgNoFPClass = ArgsNoFPClass[I];
2388 if (ArgNoFPClass == fcNone)
2389 continue;
2390 FPClassTest OldAttr = F.getParamNoFPClass(ArgNo: I);
2391 if (OldAttr == ArgNoFPClass)
2392 continue;
2393
2394 F.addParamAttr(ArgNo: I, Attr: Attribute::getWithNoFPClass(Context&: Ctx, Mask: ArgNoFPClass));
2395 Changed = true;
2396 }
2397
2398 return Changed;
2399}
2400
2401static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
2402 // We only have a post-order SCC traversal (because SCCs are inherently
2403 // discovered in post-order), so we accumulate them in a vector and then walk
2404 // it in reverse. This is simpler than using the RPO iterator infrastructure
2405 // because we need to combine SCC detection and the PO walk of the call
2406 // graph. We can also cheat egregiously because we're primarily interested in
2407 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
2408 // with multiple functions in them will clearly be recursive.
2409
2410 SmallVector<Function *, 16> Worklist;
2411 CG.buildRefSCCs();
2412 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2413 for (LazyCallGraph::SCC &SCC : RC) {
2414 if (SCC.size() != 1)
2415 continue;
2416 Function &F = SCC.begin()->getFunction();
2417 if (!F.isDeclaration() && F.hasInternalLinkage() && !F.use_empty())
2418 Worklist.push_back(Elt: &F);
2419 }
2420 }
2421 bool Changed = false;
2422 for (auto *F : llvm::reverse(C&: Worklist)) {
2423 Changed |= addNoRecurseAttrsTopDown(F&: *F);
2424 Changed |= addNoFPClassAttrsTopDown(F&: *F);
2425 }
2426
2427 return Changed;
2428}
2429
2430PreservedAnalyses
2431ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
2432 auto &CG = AM.getResult<LazyCallGraphAnalysis>(IR&: M);
2433
2434 if (!deduceFunctionAttributeInRPO(M, CG))
2435 return PreservedAnalyses::all();
2436
2437 PreservedAnalyses PA;
2438 PA.preserve<LazyCallGraphAnalysis>();
2439 return PA;
2440}
2441
2442PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M,
2443 ModuleAnalysisManager &MAM) {
2444
2445 // Check if any function in the whole program has its address taken or has
2446 // potentially external linkage.
2447 // We use this information when inferring norecurse attribute: If there is
2448 // no function whose address is taken and all functions have internal
2449 // linkage, there is no path for a callback to any user function.
2450 bool AnyFunctionsAddressIsTaken = false;
2451 for (Function &F : M) {
2452 if (F.isDeclaration() || F.doesNotRecurse())
2453 continue;
2454 if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
2455 AnyFunctionsAddressIsTaken = true;
2456 break;
2457 }
2458 }
2459
2460 // Run norecurse inference on all RefSCCs in the LazyCallGraph for this
2461 // module.
2462 bool Changed = false;
2463 LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(IR&: M);
2464 CG.buildRefSCCs();
2465
2466 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2467 // Skip any RefSCC that is part of a call cycle. A RefSCC containing more
2468 // than one SCC indicates a recursive relationship involving indirect calls.
2469 if (RC.size() > 1)
2470 continue;
2471
2472 // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive
2473 // functions. Ex: foo1 -> foo2 -> foo3 -> foo1.
2474 LazyCallGraph::SCC &S = *RC.begin();
2475 if (S.size() > 1)
2476 continue;
2477
2478 // Get the single function from this SCC.
2479 Function &F = S.begin()->getFunction();
2480 if (!F.hasExactDefinition() || F.doesNotRecurse())
2481 continue;
2482
2483 // If the analysis confirms that this function has no recursive calls
2484 // (either direct, indirect, or through external linkages),
2485 // we can safely apply the norecurse attribute.
2486 if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) {
2487 F.setDoesNotRecurse();
2488 ++NumNoRecurse;
2489 Changed = true;
2490 }
2491 }
2492
2493 PreservedAnalyses PA;
2494 if (Changed)
2495 PA.preserve<LazyCallGraphAnalysis>();
2496 else
2497 PA = PreservedAnalyses::all();
2498 return PA;
2499}
2500