1//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemorySSA class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/MemorySSA.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseMapInfo.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/DepthFirstIterator.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/iterator.h"
24#include "llvm/ADT/iterator_range.h"
25#include "llvm/Analysis/AliasAnalysis.h"
26#include "llvm/Analysis/CFGPrinter.h"
27#include "llvm/Analysis/IteratedDominanceFrontier.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryLocation.h"
30#include "llvm/Config/llvm-config.h"
31#include "llvm/IR/AssemblyAnnotationWriter.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Dominators.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Use.h"
42#include "llvm/InitializePasses.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/AtomicOrdering.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/Compiler.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/FormattedStream.h"
51#include "llvm/Support/GraphWriter.h"
52#include "llvm/Support/raw_ostream.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <memory>
57#include <utility>
58
59using namespace llvm;
60
61#define DEBUG_TYPE "memoryssa"
62
63static cl::opt<std::string>
64 DotCFGMSSA("dot-cfg-mssa",
65 cl::value_desc("file name for generated dot file"),
66 cl::desc("file name for generated dot file"), cl::init(Val: ""));
67
68INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
69 true)
70INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
71INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
72INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
73 true)
74
75static cl::opt<unsigned> MaxCheckLimit(
76 "memssa-check-limit", cl::Hidden, cl::init(Val: 100),
77 cl::desc("The maximum number of stores/phis MemorySSA"
78 "will consider trying to walk past (default = 100)"));
79
80// Always verify MemorySSA if expensive checking is enabled.
81#ifdef EXPENSIVE_CHECKS
82bool llvm::VerifyMemorySSA = true;
83#else
84bool llvm::VerifyMemorySSA = false;
85#endif
86
87static cl::opt<bool, true>
88 VerifyMemorySSAX("verify-memoryssa", cl::location(L&: VerifyMemorySSA),
89 cl::Hidden, cl::desc("Enable verification of MemorySSA."));
90
91const static char LiveOnEntryStr[] = "liveOnEntry";
92
93namespace {
94
95/// An assembly annotator class to print Memory SSA information in
96/// comments.
97class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
98 const MemorySSA *MSSA;
99
100public:
101 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
102
103 void emitBasicBlockStartAnnot(const BasicBlock *BB,
104 formatted_raw_ostream &OS) override {
105 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
106 OS << "; " << *MA << "\n";
107 }
108
109 void emitInstructionAnnot(const Instruction *I,
110 formatted_raw_ostream &OS) override {
111 if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
112 OS << "; " << *MA << "\n";
113 }
114};
115
116/// An assembly annotator class to print Memory SSA information in
117/// comments.
118class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter {
119 MemorySSA *MSSA;
120 MemorySSAWalker *Walker;
121 BatchAAResults BAA;
122
123public:
124 MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
125 : MSSA(M), Walker(M->getWalker()), BAA(M->getAA()) {}
126
127 void emitBasicBlockStartAnnot(const BasicBlock *BB,
128 formatted_raw_ostream &OS) override {
129 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
130 OS << "; " << *MA << "\n";
131 }
132
133 void emitInstructionAnnot(const Instruction *I,
134 formatted_raw_ostream &OS) override {
135 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
136 MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA, AA&: BAA);
137 OS << "; " << *MA;
138 if (Clobber) {
139 OS << " - clobbered by ";
140 if (MSSA->isLiveOnEntryDef(MA: Clobber))
141 OS << LiveOnEntryStr;
142 else
143 OS << *Clobber;
144 }
145 OS << "\n";
146 }
147 }
148};
149
150} // namespace
151
152namespace {
153
154/// Our current alias analysis API differentiates heavily between calls and
155/// non-calls, and functions called on one usually assert on the other.
156/// This class encapsulates the distinction to simplify other code that wants
157/// "Memory affecting instructions and related data" to use as a key.
158/// For example, this class is used as a densemap key in the use optimizer.
159class MemoryLocOrCall {
160public:
161 bool IsCall = false;
162
163 MemoryLocOrCall(MemoryUseOrDef *MUD)
164 : MemoryLocOrCall(MUD->getMemoryInst()) {}
165 MemoryLocOrCall(const MemoryUseOrDef *MUD)
166 : MemoryLocOrCall(MUD->getMemoryInst()) {}
167
168 MemoryLocOrCall(Instruction *Inst) {
169 if (auto *C = dyn_cast<CallBase>(Val: Inst)) {
170 IsCall = true;
171 Call = C;
172 } else {
173 IsCall = false;
174 // There is no such thing as a memorylocation for a fence inst, and it is
175 // unique in that regard.
176 if (!isa<FenceInst>(Val: Inst))
177 Loc = MemoryLocation::get(Inst);
178 }
179 }
180
181 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
182
183 const CallBase *getCall() const {
184 assert(IsCall);
185 return Call;
186 }
187
188 MemoryLocation getLoc() const {
189 assert(!IsCall);
190 return Loc;
191 }
192
193 bool operator==(const MemoryLocOrCall &Other) const {
194 if (IsCall != Other.IsCall)
195 return false;
196
197 if (!IsCall)
198 return Loc == Other.Loc;
199
200 if (Call->getCalledOperand() != Other.Call->getCalledOperand())
201 return false;
202
203 return Call->arg_size() == Other.Call->arg_size() &&
204 std::equal(first1: Call->arg_begin(), last1: Call->arg_end(),
205 first2: Other.Call->arg_begin());
206 }
207
208private:
209 union {
210 const CallBase *Call;
211 MemoryLocation Loc;
212 };
213};
214
215} // end anonymous namespace
216
217namespace llvm {
218
219template <> struct DenseMapInfo<MemoryLocOrCall> {
220 static inline MemoryLocOrCall getEmptyKey() {
221 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
222 }
223
224 static inline MemoryLocOrCall getTombstoneKey() {
225 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
226 }
227
228 static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
229 if (!MLOC.IsCall)
230 return hash_combine(
231 args: MLOC.IsCall,
232 args: DenseMapInfo<MemoryLocation>::getHashValue(Val: MLOC.getLoc()));
233
234 hash_code hash =
235 hash_combine(args: MLOC.IsCall, args: DenseMapInfo<const Value *>::getHashValue(
236 PtrVal: MLOC.getCall()->getCalledOperand()));
237
238 for (const Value *Arg : MLOC.getCall()->args())
239 hash = hash_combine(args: hash, args: DenseMapInfo<const Value *>::getHashValue(PtrVal: Arg));
240 return hash;
241 }
242
243 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
244 return LHS == RHS;
245 }
246};
247
248} // end namespace llvm
249
250/// This does one-way checks to see if Use could theoretically be hoisted above
251/// MayClobber. This will not check the other way around.
252///
253/// This assumes that, for the purposes of MemorySSA, Use comes directly after
254/// MayClobber, with no potentially clobbering operations in between them.
255/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
256static bool areLoadsReorderable(const LoadInst *Use,
257 const LoadInst *MayClobber) {
258 bool VolatileUse = Use->isVolatile();
259 bool VolatileClobber = MayClobber->isVolatile();
260 // Volatile operations may never be reordered with other volatile operations.
261 if (VolatileUse && VolatileClobber)
262 return false;
263 // Otherwise, volatile doesn't matter here. From the language reference:
264 // 'optimizers may change the order of volatile operations relative to
265 // non-volatile operations.'"
266
267 // If a load is seq_cst, it cannot be moved above other loads. If its ordering
268 // is weaker, it can be moved above other loads. We just need to be sure that
269 // MayClobber isn't an acquire load, because loads can't be moved above
270 // acquire loads.
271 //
272 // Note that this explicitly *does* allow the free reordering of monotonic (or
273 // weaker) loads of the same address.
274 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
275 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(AO: MayClobber->getOrdering(),
276 Other: AtomicOrdering::Acquire);
277 return !(SeqCstUse || MayClobberIsAcquire);
278}
279
280template <typename AliasAnalysisType>
281static bool
282instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
283 const Instruction *UseInst, AliasAnalysisType &AA) {
284 Instruction *DefInst = MD->getMemoryInst();
285 assert(DefInst && "Defining instruction not actually an instruction");
286
287 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: DefInst)) {
288 // These intrinsics will show up as affecting memory, but they are just
289 // markers, mostly.
290 //
291 // FIXME: We probably don't actually want MemorySSA to model these at all
292 // (including creating MemoryAccesses for them): we just end up inventing
293 // clobbers where they don't really exist at all. Please see D43269 for
294 // context.
295 switch (II->getIntrinsicID()) {
296 case Intrinsic::allow_runtime_check:
297 case Intrinsic::allow_ubsan_check:
298 case Intrinsic::invariant_start:
299 case Intrinsic::invariant_end:
300 case Intrinsic::assume:
301 case Intrinsic::experimental_noalias_scope_decl:
302 case Intrinsic::pseudoprobe:
303 return false;
304 case Intrinsic::dbg_declare:
305 case Intrinsic::dbg_label:
306 case Intrinsic::dbg_value:
307 llvm_unreachable("debuginfo shouldn't have associated defs!");
308 default:
309 break;
310 }
311 }
312
313 if (auto *CB = dyn_cast_or_null<CallBase>(Val: UseInst)) {
314 ModRefInfo I = AA.getModRefInfo(DefInst, CB);
315 return isModSet(MRI: I);
316 }
317
318 if (auto *DefLoad = dyn_cast<LoadInst>(Val: DefInst))
319 if (auto *UseLoad = dyn_cast_or_null<LoadInst>(Val: UseInst))
320 return !areLoadsReorderable(Use: UseLoad, MayClobber: DefLoad);
321
322 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc);
323 return isModSet(MRI: I);
324}
325
326template <typename AliasAnalysisType>
327static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
328 const MemoryLocOrCall &UseMLOC,
329 AliasAnalysisType &AA) {
330 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
331 // to exist while MemoryLocOrCall is pushed through places.
332 if (UseMLOC.IsCall)
333 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
334 AA);
335 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
336 AA);
337}
338
339// Return true when MD may alias MU, return false otherwise.
340bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
341 AliasAnalysis &AA) {
342 return instructionClobbersQuery(MD, MU, UseMLOC: MemoryLocOrCall(MU), AA);
343}
344
345namespace {
346
347struct UpwardsMemoryQuery {
348 // True if our original query started off as a call
349 bool IsCall = false;
350 // The pointer location we started the query with. This will be empty if
351 // IsCall is true.
352 MemoryLocation StartingLoc;
353 // This is the instruction we were querying about.
354 const Instruction *Inst = nullptr;
355 // The MemoryAccess we actually got called with, used to test local domination
356 const MemoryAccess *OriginalAccess = nullptr;
357 bool SkipSelfAccess = false;
358
359 UpwardsMemoryQuery() = default;
360
361 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
362 : IsCall(isa<CallBase>(Val: Inst)), Inst(Inst), OriginalAccess(Access) {
363 if (!IsCall)
364 StartingLoc = MemoryLocation::get(Inst);
365 }
366};
367
368} // end anonymous namespace
369
370template <typename AliasAnalysisType>
371static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
372 const Instruction *I) {
373 // If the memory can't be changed, then loads of the memory can't be
374 // clobbered.
375 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
376 return I->hasMetadata(KindID: LLVMContext::MD_invariant_load) ||
377 !isModSet(AA.getModRefInfoMask(MemoryLocation::get(LI)));
378 }
379 return false;
380}
381
382/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
383/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
384///
385/// This is meant to be as simple and self-contained as possible. Because it
386/// uses no cache, etc., it can be relatively expensive.
387///
388/// \param Start The MemoryAccess that we want to walk from.
389/// \param ClobberAt A clobber for Start.
390/// \param StartLoc The MemoryLocation for Start.
391/// \param MSSA The MemorySSA instance that Start and ClobberAt belong to.
392/// \param Query The UpwardsMemoryQuery we used for our search.
393/// \param AA The AliasAnalysis we used for our search.
394/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
395
396[[maybe_unused]] static void
397checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
398 const MemoryLocation &StartLoc, const MemorySSA &MSSA,
399 const UpwardsMemoryQuery &Query, BatchAAResults &AA,
400 bool AllowImpreciseClobber = false) {
401 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
402
403 if (MSSA.isLiveOnEntryDef(MA: Start)) {
404 assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
405 "liveOnEntry must clobber itself");
406 return;
407 }
408
409 bool FoundClobber = false;
410 DenseSet<UpwardDefsElem> VisitedPhis;
411 SmallVector<UpwardDefsElem, 8> Worklist;
412 Worklist.push_back(Elt: {.MA: Start, .Loc: StartLoc, /*MayBeCrossIteration=*/false});
413 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
414 // is found, complain.
415 while (!Worklist.empty()) {
416 auto MAP = Worklist.pop_back_val();
417 // All we care about is that nothing from Start to ClobberAt clobbers Start.
418 // We learn nothing from revisiting nodes.
419 if (!VisitedPhis.insert(V: MAP).second)
420 continue;
421
422 for (auto *MA : def_chain(MA: MAP.MA)) {
423 if (MA == ClobberAt) {
424 if (const auto *MD = dyn_cast<MemoryDef>(Val: MA)) {
425 // instructionClobbersQuery isn't essentially free, so don't use `|=`,
426 // since it won't let us short-circuit.
427 //
428 // Also, note that this can't be hoisted out of the `Worklist` loop,
429 // since MD may only act as a clobber for 1 of N MemoryLocations.
430 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MA: MD);
431 if (!FoundClobber) {
432 BatchAACrossIterationScope _(AA, MAP.MayBeCrossIteration);
433 if (instructionClobbersQuery(MD, UseLoc: MAP.Loc, UseInst: Query.Inst, AA))
434 FoundClobber = true;
435 }
436 }
437 break;
438 }
439
440 // We should never hit liveOnEntry, unless it's the clobber.
441 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
442
443 if (const auto *MD = dyn_cast<MemoryDef>(Val: MA)) {
444 // If Start is a Def, skip self.
445 if (MD == Start)
446 continue;
447
448 BatchAACrossIterationScope _(AA, MAP.MayBeCrossIteration);
449 assert(!instructionClobbersQuery(MD, MAP.Loc, Query.Inst, AA) &&
450 "Found clobber before reaching ClobberAt!");
451 continue;
452 }
453
454 if (const auto *MU = dyn_cast<MemoryUse>(Val: MA)) {
455 (void)MU;
456 assert (MU == Start &&
457 "Can only find use in def chain if Start is a use");
458 continue;
459 }
460
461 assert(isa<MemoryPhi>(MA));
462
463 // Add reachable phi predecessors
464 for (auto ItB = upward_defs_begin(Pair: {.MA: MA, .Loc: MAP.Loc, .MayBeCrossIteration: MAP.MayBeCrossIteration},
465 DT&: MSSA.getDomTree()),
466 ItE = upward_defs_end();
467 ItB != ItE; ++ItB)
468 if (MSSA.getDomTree().isReachableFromEntry(A: ItB.getPhiArgBlock()))
469 Worklist.emplace_back(Args: *ItB);
470 }
471 }
472
473 // If the verify is done following an optimization, it's possible that
474 // ClobberAt was a conservative clobbering, that we can now infer is not a
475 // true clobbering access. Don't fail the verify if that's the case.
476 // We do have accesses that claim they're optimized, but could be optimized
477 // further. Updating all these can be expensive, so allow it for now (FIXME).
478 if (AllowImpreciseClobber)
479 return;
480
481 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
482 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
483 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
484 "ClobberAt never acted as a clobber");
485}
486
487namespace {
488
489/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
490/// in one class.
491class ClobberWalker {
492 /// Save a few bytes by using unsigned instead of size_t.
493 using ListIndex = unsigned;
494
495 /// Represents a span of contiguous MemoryDefs, potentially ending in a
496 /// MemoryPhi.
497 struct DefPath {
498 MemoryLocation Loc;
499 // Note that, because we always walk in reverse, Last will always dominate
500 // First. Also note that First and Last are inclusive.
501 MemoryAccess *First;
502 MemoryAccess *Last;
503 std::optional<ListIndex> Previous;
504 bool MayBeCrossIteration;
505
506 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
507 bool MayBeCrossIteration, std::optional<ListIndex> Previous)
508 : Loc(Loc), First(First), Last(Last), Previous(Previous),
509 MayBeCrossIteration(MayBeCrossIteration) {}
510
511 DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
512 bool MayBeCrossIteration, std::optional<ListIndex> Previous)
513 : DefPath(Loc, Init, Init, MayBeCrossIteration, Previous) {}
514 };
515
516 const MemorySSA &MSSA;
517 DominatorTree &DT;
518 BatchAAResults *AA;
519 UpwardsMemoryQuery *Query;
520 unsigned *UpwardWalkLimit;
521
522 // Phi optimization bookkeeping:
523 // List of DefPath to process during the current phi optimization walk.
524 SmallVector<DefPath, 32> Paths;
525 // List of visited <Access, Location> pairs; we can skip paths already
526 // visited with the same memory location.
527 DenseSet<UpwardDefsElem> VisitedPhis;
528
529 /// Find the nearest def or phi that `From` can legally be optimized to.
530 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
531 assert(From->getNumOperands() && "Phi with no operands?");
532
533 BasicBlock *BB = From->getBlock();
534 MemoryAccess *Result = MSSA.getLiveOnEntryDef();
535 DomTreeNode *Node = DT.getNode(BB);
536 while ((Node = Node->getIDom())) {
537 auto *Defs = MSSA.getBlockDefs(BB: Node->getBlock());
538 if (Defs)
539 return &*Defs->rbegin();
540 }
541 return Result;
542 }
543
544 /// Result of calling walkToPhiOrClobber.
545 struct UpwardsWalkResult {
546 /// The "Result" of the walk. Either a clobber, the last thing we walked, or
547 /// both. Include alias info when clobber found.
548 MemoryAccess *Result;
549 bool IsKnownClobber;
550 };
551
552 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
553 /// This will update Desc.Last as it walks. It will (optionally) also stop at
554 /// StopAt.
555 ///
556 /// This does not test for whether StopAt is a clobber
557 UpwardsWalkResult
558 walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
559 const MemoryAccess *SkipStopAt = nullptr) const {
560 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
561 assert(UpwardWalkLimit && "Need a valid walk limit");
562 bool LimitAlreadyReached = false;
563 // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set
564 // it to 1. This will not do any alias() calls. It either returns in the
565 // first iteration in the loop below, or is set back to 0 if all def chains
566 // are free of MemoryDefs.
567 if (!*UpwardWalkLimit) {
568 *UpwardWalkLimit = 1;
569 LimitAlreadyReached = true;
570 }
571
572 for (MemoryAccess *Current : def_chain(MA: Desc.Last)) {
573 Desc.Last = Current;
574 if (Current == StopAt || Current == SkipStopAt)
575 return {.Result: Current, .IsKnownClobber: false};
576
577 if (auto *MD = dyn_cast<MemoryDef>(Val: Current)) {
578 if (MSSA.isLiveOnEntryDef(MA: MD))
579 return {.Result: MD, .IsKnownClobber: true};
580
581 if (!--*UpwardWalkLimit)
582 return {.Result: Current, .IsKnownClobber: true};
583
584 BatchAACrossIterationScope _(*AA, Desc.MayBeCrossIteration);
585 if (instructionClobbersQuery(MD, UseLoc: Desc.Loc, UseInst: Query->Inst, AA&: *AA))
586 return {.Result: MD, .IsKnownClobber: true};
587 }
588 }
589
590 if (LimitAlreadyReached)
591 *UpwardWalkLimit = 0;
592
593 assert(isa<MemoryPhi>(Desc.Last) &&
594 "Ended at a non-clobber that's not a phi?");
595 return {.Result: Desc.Last, .IsKnownClobber: false};
596 }
597
598 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
599 ListIndex PriorNode, bool MayBeCrossIteration) {
600 auto UpwardDefsBegin =
601 upward_defs_begin(Pair: {.MA: Phi, .Loc: Paths[PriorNode].Loc, .MayBeCrossIteration: MayBeCrossIteration}, DT);
602 auto UpwardDefs = make_range(x: UpwardDefsBegin, y: upward_defs_end());
603 for (const UpwardDefsElem &E : UpwardDefs) {
604 PausedSearches.push_back(Elt: Paths.size());
605 Paths.emplace_back(Args: E.Loc, Args: E.MA, Args: E.MayBeCrossIteration, Args&: PriorNode);
606 }
607 }
608
609 /// Represents a search that terminated after finding a clobber. This clobber
610 /// may or may not be present in the path of defs from LastNode..SearchStart,
611 /// since it may have been retrieved from cache.
612 struct TerminatedPath {
613 MemoryAccess *Clobber;
614 ListIndex LastNode;
615 };
616
617 /// Get an access that keeps us from optimizing to the given phi.
618 ///
619 /// PausedSearches is an array of indices into the Paths array. Its incoming
620 /// value is the indices of searches that stopped at the last phi optimization
621 /// target. It's left in an unspecified state.
622 ///
623 /// If this returns std::nullopt, NewPaused is a vector of searches that
624 /// terminated at StopWhere. Otherwise, NewPaused is left in an unspecified
625 /// state.
626 std::optional<TerminatedPath>
627 getBlockingAccess(const MemoryAccess *StopWhere,
628 SmallVectorImpl<ListIndex> &PausedSearches,
629 SmallVectorImpl<ListIndex> &NewPaused,
630 SmallVectorImpl<TerminatedPath> &Terminated) {
631 assert(!PausedSearches.empty() && "No searches to continue?");
632
633 // BFS vs DFS really doesn't make a difference here, so just do a DFS with
634 // PausedSearches as our stack.
635 while (!PausedSearches.empty()) {
636 ListIndex PathIndex = PausedSearches.pop_back_val();
637 DefPath &Node = Paths[PathIndex];
638
639 // If we've already visited this path with this MemoryLocation, we don't
640 // need to do so again.
641 //
642 // NOTE: That we just drop these paths on the ground makes caching
643 // behavior sporadic. e.g. given a diamond:
644 // A
645 // B C
646 // D
647 //
648 // ...If we walk D, B, A, C, we'll only cache the result of phi
649 // optimization for A, B, and D; C will be skipped because it dies here.
650 // This arguably isn't the worst thing ever, since:
651 // - We generally query things in a top-down order, so if we got below D
652 // without needing cache entries for {C, MemLoc}, then chances are
653 // that those cache entries would end up ultimately unused.
654 // - We still cache things for A, so C only needs to walk up a bit.
655 // If this behavior becomes problematic, we can fix without a ton of extra
656 // work.
657 if (!VisitedPhis.insert(V: {.MA: Node.Last, .Loc: Node.Loc, .MayBeCrossIteration: Node.MayBeCrossIteration})
658 .second)
659 continue;
660
661 const MemoryAccess *SkipStopWhere = nullptr;
662 if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
663 assert(isa<MemoryDef>(Query->OriginalAccess));
664 SkipStopWhere = Query->OriginalAccess;
665 }
666
667 UpwardsWalkResult Res = walkToPhiOrClobber(Desc&: Node,
668 /*StopAt=*/StopWhere,
669 /*SkipStopAt=*/SkipStopWhere);
670 if (Res.IsKnownClobber) {
671 assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
672
673 // If this wasn't a cache hit, we hit a clobber when walking. That's a
674 // failure.
675 TerminatedPath Term{.Clobber: Res.Result, .LastNode: PathIndex};
676 if (!MSSA.dominates(A: Res.Result, B: StopWhere))
677 return Term;
678
679 // Otherwise, it's a valid thing to potentially optimize to.
680 Terminated.push_back(Elt: Term);
681 continue;
682 }
683
684 if (Res.Result == StopWhere || Res.Result == SkipStopWhere) {
685 // We've hit our target. Save this path off for if we want to continue
686 // walking. If we are in the mode of skipping the OriginalAccess, and
687 // we've reached back to the OriginalAccess, do not save path, we've
688 // just looped back to self.
689 if (Res.Result != SkipStopWhere)
690 NewPaused.push_back(Elt: PathIndex);
691 continue;
692 }
693
694 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
695 addSearches(Phi: cast<MemoryPhi>(Val: Res.Result), PausedSearches, PriorNode: PathIndex,
696 MayBeCrossIteration: Node.MayBeCrossIteration);
697 }
698
699 return std::nullopt;
700 }
701
702 template <typename T, typename Walker>
703 struct generic_def_path_iterator
704 : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
705 std::forward_iterator_tag, T *> {
706 generic_def_path_iterator() = default;
707 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
708
709 T &operator*() const { return curNode(); }
710
711 generic_def_path_iterator &operator++() {
712 N = curNode().Previous;
713 return *this;
714 }
715
716 bool operator==(const generic_def_path_iterator &O) const {
717 if (N.has_value() != O.N.has_value())
718 return false;
719 return !N || *N == *O.N;
720 }
721
722 private:
723 T &curNode() const { return W->Paths[*N]; }
724
725 Walker *W = nullptr;
726 std::optional<ListIndex> N;
727 };
728
729 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
730 using const_def_path_iterator =
731 generic_def_path_iterator<const DefPath, const ClobberWalker>;
732
733 iterator_range<def_path_iterator> def_path(ListIndex From) {
734 return make_range(x: def_path_iterator(this, From), y: def_path_iterator());
735 }
736
737 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
738 return make_range(x: const_def_path_iterator(this, From),
739 y: const_def_path_iterator());
740 }
741
742 struct OptznResult {
743 /// The path that contains our result.
744 TerminatedPath PrimaryClobber;
745 /// The paths that we can legally cache back from, but that aren't
746 /// necessarily the result of the Phi optimization.
747 SmallVector<TerminatedPath, 4> OtherClobbers;
748 };
749
750 ListIndex defPathIndex(const DefPath &N) const {
751 // The assert looks nicer if we don't need to do &N
752 const DefPath *NP = &N;
753 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
754 "Out of bounds DefPath!");
755 return NP - &Paths.front();
756 }
757
758 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
759 /// that act as legal clobbers. Note that this won't return *all* clobbers.
760 ///
761 /// Phi optimization algorithm tl;dr:
762 /// - Find the earliest def/phi, A, we can optimize to
763 /// - Find if all paths from the starting memory access ultimately reach A
764 /// - If not, optimization isn't possible.
765 /// - Otherwise, walk from A to another clobber or phi, A'.
766 /// - If A' is a def, we're done.
767 /// - If A' is a phi, try to optimize it.
768 ///
769 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
770 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
771 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
772 const MemoryLocation &Loc) {
773 assert(Paths.empty() && VisitedPhis.empty() &&
774 "Reset the optimization state.");
775
776 Paths.emplace_back(Args: Loc, Args&: Start, Args&: Phi, /*MayBeCrossIteration=*/Args: false,
777 Args: std::nullopt);
778 // Stores how many "valid" optimization nodes we had prior to calling
779 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
780 auto PriorPathsSize = Paths.size();
781
782 SmallVector<ListIndex, 16> PausedSearches;
783 SmallVector<ListIndex, 8> NewPaused;
784 SmallVector<TerminatedPath, 4> TerminatedPaths;
785
786 addSearches(Phi, PausedSearches, PriorNode: 0, /*MayBeCrossIteration=*/false);
787
788 // Moves the TerminatedPath with the "most dominated" Clobber to the end of
789 // Paths.
790 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
791 assert(!Paths.empty() && "Need a path to move");
792 auto Dom = Paths.begin();
793 for (auto I = std::next(x: Dom), E = Paths.end(); I != E; ++I)
794 if (!MSSA.dominates(A: I->Clobber, B: Dom->Clobber))
795 Dom = I;
796 auto Last = Paths.end() - 1;
797 if (Last != Dom)
798 std::iter_swap(a: Last, b: Dom);
799 };
800
801 MemoryPhi *Current = Phi;
802 while (true) {
803 assert(!MSSA.isLiveOnEntryDef(Current) &&
804 "liveOnEntry wasn't treated as a clobber?");
805
806 const auto *Target = getWalkTarget(From: Current);
807 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
808 // optimization for the prior phi.
809 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
810 return MSSA.dominates(P.Clobber, Target);
811 }));
812
813 // FIXME: This is broken, because the Blocker may be reported to be
814 // liveOnEntry, and we'll happily wait for that to disappear (read: never)
815 // For the moment, this is fine, since we do nothing with blocker info.
816 if (std::optional<TerminatedPath> Blocker = getBlockingAccess(
817 StopWhere: Target, PausedSearches, NewPaused, Terminated&: TerminatedPaths)) {
818
819 // Find the node we started at. We can't search based on N->Last, since
820 // we may have gone around a loop with a different MemoryLocation.
821 auto Iter = find_if(Range: def_path(From: Blocker->LastNode), P: [&](const DefPath &N) {
822 return defPathIndex(N) < PriorPathsSize;
823 });
824 assert(Iter != def_path_iterator());
825
826 DefPath &CurNode = *Iter;
827 assert(CurNode.Last == Current);
828
829 // Two things:
830 // A. We can't reliably cache all of NewPaused back. Consider a case
831 // where we have two paths in NewPaused; one of which can't optimize
832 // above this phi, whereas the other can. If we cache the second path
833 // back, we'll end up with suboptimal cache entries. We can handle
834 // cases like this a bit better when we either try to find all
835 // clobbers that block phi optimization, or when our cache starts
836 // supporting unfinished searches.
837 // B. We can't reliably cache TerminatedPaths back here without doing
838 // extra checks; consider a case like:
839 // T
840 // / \
841 // D C
842 // \ /
843 // S
844 // Where T is our target, C is a node with a clobber on it, D is a
845 // diamond (with a clobber *only* on the left or right node, N), and
846 // S is our start. Say we walk to D, through the node opposite N
847 // (read: ignoring the clobber), and see a cache entry in the top
848 // node of D. That cache entry gets put into TerminatedPaths. We then
849 // walk up to C (N is later in our worklist), find the clobber, and
850 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache
851 // the bottom part of D to the cached clobber, ignoring the clobber
852 // in N. Again, this problem goes away if we start tracking all
853 // blockers for a given phi optimization.
854 TerminatedPath Result{.Clobber: CurNode.Last, .LastNode: defPathIndex(N: CurNode)};
855 return {.PrimaryClobber: Result, .OtherClobbers: {}};
856 }
857
858 // If there's nothing left to search, then all paths led to valid clobbers
859 // that we got from our cache; pick the nearest to the start, and allow
860 // the rest to be cached back.
861 if (NewPaused.empty()) {
862 MoveDominatedPathToEnd(TerminatedPaths);
863 TerminatedPath Result = TerminatedPaths.pop_back_val();
864 return {.PrimaryClobber: Result, .OtherClobbers: std::move(TerminatedPaths)};
865 }
866
867 MemoryAccess *DefChainEnd = nullptr;
868 SmallVector<TerminatedPath, 4> Clobbers;
869 for (ListIndex Paused : NewPaused) {
870 UpwardsWalkResult WR = walkToPhiOrClobber(Desc&: Paths[Paused]);
871 if (WR.IsKnownClobber)
872 Clobbers.push_back(Elt: {.Clobber: WR.Result, .LastNode: Paused});
873 else
874 // Micro-opt: If we hit the end of the chain, save it.
875 DefChainEnd = WR.Result;
876 }
877
878 if (!TerminatedPaths.empty()) {
879 // If we couldn't find the dominating phi/liveOnEntry in the above loop,
880 // do it now.
881 if (!DefChainEnd)
882 for (auto *MA : def_chain(MA: const_cast<MemoryAccess *>(Target)))
883 DefChainEnd = MA;
884 assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry");
885
886 // If any of the terminated paths don't dominate the phi we'll try to
887 // optimize, we need to figure out what they are and quit.
888 const BasicBlock *ChainBB = DefChainEnd->getBlock();
889 for (const TerminatedPath &TP : TerminatedPaths) {
890 // Because we know that DefChainEnd is as "high" as we can go, we
891 // don't need local dominance checks; BB dominance is sufficient.
892 if (DT.dominates(A: ChainBB, B: TP.Clobber->getBlock()))
893 Clobbers.push_back(Elt: TP);
894 }
895 }
896
897 // If we have clobbers in the def chain, find the one closest to Current
898 // and quit.
899 if (!Clobbers.empty()) {
900 MoveDominatedPathToEnd(Clobbers);
901 TerminatedPath Result = Clobbers.pop_back_val();
902 return {.PrimaryClobber: Result, .OtherClobbers: std::move(Clobbers)};
903 }
904
905 assert(all_of(NewPaused,
906 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
907
908 // Because liveOnEntry is a clobber, this must be a phi.
909 auto *DefChainPhi = cast<MemoryPhi>(Val: DefChainEnd);
910
911 PriorPathsSize = Paths.size();
912 PausedSearches.clear();
913 for (ListIndex I : NewPaused)
914 addSearches(Phi: DefChainPhi, PausedSearches, PriorNode: I,
915 MayBeCrossIteration: Paths[I].MayBeCrossIteration);
916 NewPaused.clear();
917
918 Current = DefChainPhi;
919 }
920 }
921
922 void verifyOptResult(const OptznResult &R) const {
923 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
924 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
925 }));
926 }
927
928 void resetPhiOptznState() {
929 Paths.clear();
930 VisitedPhis.clear();
931 }
932
933public:
934 ClobberWalker(const MemorySSA &MSSA, DominatorTree &DT)
935 : MSSA(MSSA), DT(DT) {}
936
937 /// Finds the nearest clobber for the given query, optimizing phis if
938 /// possible.
939 MemoryAccess *findClobber(BatchAAResults &BAA, MemoryAccess *Start,
940 UpwardsMemoryQuery &Q, unsigned &UpWalkLimit) {
941 AA = &BAA;
942 Query = &Q;
943 UpwardWalkLimit = &UpWalkLimit;
944 // Starting limit must be > 0.
945 if (!UpWalkLimit)
946 UpWalkLimit++;
947
948 MemoryAccess *Current = Start;
949 // This walker pretends uses don't exist. If we're handed one, silently grab
950 // its def. (This has the nice side-effect of ensuring we never cache uses)
951 if (auto *MU = dyn_cast<MemoryUse>(Val: Start))
952 Current = MU->getDefiningAccess();
953
954 DefPath FirstDesc(Q.StartingLoc, Current, Current,
955 /*MayBeCrossIteration=*/false, std::nullopt);
956 // Fast path for the overly-common case (no crazy phi optimization
957 // necessary)
958 UpwardsWalkResult WalkResult = walkToPhiOrClobber(Desc&: FirstDesc);
959 MemoryAccess *Result;
960 if (WalkResult.IsKnownClobber) {
961 Result = WalkResult.Result;
962 } else {
963 OptznResult OptRes = tryOptimizePhi(Phi: cast<MemoryPhi>(Val: FirstDesc.Last),
964 Start: Current, Loc: Q.StartingLoc);
965 verifyOptResult(R: OptRes);
966 resetPhiOptznState();
967 Result = OptRes.PrimaryClobber.Clobber;
968 }
969
970#ifdef EXPENSIVE_CHECKS
971 if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0)
972 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, BAA);
973#endif
974 return Result;
975 }
976};
977
978struct RenamePassData {
979 DomTreeNode *DTN;
980 DomTreeNode::const_iterator ChildIt;
981 MemoryAccess *IncomingVal;
982
983 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
984 MemoryAccess *M)
985 : DTN(D), ChildIt(It), IncomingVal(M) {}
986
987 void swap(RenamePassData &RHS) {
988 std::swap(a&: DTN, b&: RHS.DTN);
989 std::swap(a&: ChildIt, b&: RHS.ChildIt);
990 std::swap(a&: IncomingVal, b&: RHS.IncomingVal);
991 }
992};
993
994} // end anonymous namespace
995
996namespace llvm {
997
998class MemorySSA::ClobberWalkerBase {
999 ClobberWalker Walker;
1000 MemorySSA *MSSA;
1001
1002public:
1003 ClobberWalkerBase(MemorySSA *M, DominatorTree *D) : Walker(*M, *D), MSSA(M) {}
1004
1005 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
1006 const MemoryLocation &,
1007 BatchAAResults &, unsigned &);
1008 // Third argument (bool), defines whether the clobber search should skip the
1009 // original queried access. If true, there will be a follow-up query searching
1010 // for a clobber access past "self". Note that the Optimized access is not
1011 // updated if a new clobber is found by this SkipSelf search. If this
1012 // additional query becomes heavily used we may decide to cache the result.
1013 // Walker instantiations will decide how to set the SkipSelf bool.
1014 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, BatchAAResults &,
1015 unsigned &, bool,
1016 bool UseInvariantGroup = true);
1017};
1018
1019/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
1020/// longer does caching on its own, but the name has been retained for the
1021/// moment.
1022class MemorySSA::CachingWalker final : public MemorySSAWalker {
1023 ClobberWalkerBase *Walker;
1024
1025public:
1026 CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
1027 : MemorySSAWalker(M), Walker(W) {}
1028 ~CachingWalker() override = default;
1029
1030 using MemorySSAWalker::getClobberingMemoryAccess;
1031
1032 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA,
1033 unsigned &UWL) {
1034 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false);
1035 }
1036 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1037 const MemoryLocation &Loc,
1038 BatchAAResults &BAA, unsigned &UWL) {
1039 return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL);
1040 }
1041 // This method is not accessible outside of this file.
1042 MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(
1043 MemoryAccess *MA, BatchAAResults &BAA, unsigned &UWL) {
1044 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false, UseInvariantGroup: false);
1045 }
1046
1047 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1048 BatchAAResults &BAA) override {
1049 unsigned UpwardWalkLimit = MaxCheckLimit;
1050 return getClobberingMemoryAccess(MA, BAA, UWL&: UpwardWalkLimit);
1051 }
1052 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1053 const MemoryLocation &Loc,
1054 BatchAAResults &BAA) override {
1055 unsigned UpwardWalkLimit = MaxCheckLimit;
1056 return getClobberingMemoryAccess(MA, Loc, BAA, UWL&: UpwardWalkLimit);
1057 }
1058
1059 void invalidateInfo(MemoryAccess *MA) override {
1060 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1061 MUD->resetOptimized();
1062 }
1063};
1064
1065class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
1066 ClobberWalkerBase *Walker;
1067
1068public:
1069 SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
1070 : MemorySSAWalker(M), Walker(W) {}
1071 ~SkipSelfWalker() override = default;
1072
1073 using MemorySSAWalker::getClobberingMemoryAccess;
1074
1075 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA,
1076 unsigned &UWL) {
1077 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, true);
1078 }
1079 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1080 const MemoryLocation &Loc,
1081 BatchAAResults &BAA, unsigned &UWL) {
1082 return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL);
1083 }
1084
1085 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1086 BatchAAResults &BAA) override {
1087 unsigned UpwardWalkLimit = MaxCheckLimit;
1088 return getClobberingMemoryAccess(MA, BAA, UWL&: UpwardWalkLimit);
1089 }
1090 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1091 const MemoryLocation &Loc,
1092 BatchAAResults &BAA) override {
1093 unsigned UpwardWalkLimit = MaxCheckLimit;
1094 return getClobberingMemoryAccess(MA, Loc, BAA, UWL&: UpwardWalkLimit);
1095 }
1096
1097 void invalidateInfo(MemoryAccess *MA) override {
1098 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1099 MUD->resetOptimized();
1100 }
1101};
1102
1103} // end namespace llvm
1104
1105void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
1106 bool RenameAllUses) {
1107 // Pass through values to our successors
1108 for (const BasicBlock *S : successors(BB)) {
1109 auto It = PerBlockAccesses.find(Val: S);
1110 // Rename the phi nodes in our successor block
1111 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(Val: It->second->front()))
1112 continue;
1113 AccessList *Accesses = It->second.get();
1114 auto *Phi = cast<MemoryPhi>(Val: &Accesses->front());
1115 if (RenameAllUses) {
1116 bool ReplacementDone = false;
1117 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
1118 if (Phi->getIncomingBlock(I) == BB) {
1119 Phi->setIncomingValue(I, V: IncomingVal);
1120 ReplacementDone = true;
1121 }
1122 (void) ReplacementDone;
1123 assert(ReplacementDone && "Incomplete phi during partial rename");
1124 } else
1125 Phi->addIncoming(V: IncomingVal, BB);
1126 }
1127}
1128
1129/// Rename a single basic block into MemorySSA form.
1130/// Uses the standard SSA renaming algorithm.
1131/// \returns The new incoming value.
1132MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
1133 bool RenameAllUses) {
1134 auto It = PerBlockAccesses.find(Val: BB);
1135 // Skip most processing if the list is empty.
1136 if (It != PerBlockAccesses.end()) {
1137 AccessList *Accesses = It->second.get();
1138 for (MemoryAccess &L : *Accesses) {
1139 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(Val: &L)) {
1140 if (MUD->getDefiningAccess() == nullptr || RenameAllUses)
1141 MUD->setDefiningAccess(DMA: IncomingVal);
1142 if (isa<MemoryDef>(Val: &L))
1143 IncomingVal = &L;
1144 } else {
1145 IncomingVal = &L;
1146 }
1147 }
1148 }
1149 return IncomingVal;
1150}
1151
1152/// This is the standard SSA renaming algorithm.
1153///
1154/// We walk the dominator tree in preorder, renaming accesses, and then filling
1155/// in phi nodes in our successors.
1156void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
1157 SmallPtrSetImpl<BasicBlock *> &Visited,
1158 bool SkipVisited, bool RenameAllUses) {
1159 assert(Root && "Trying to rename accesses in an unreachable block");
1160
1161 SmallVector<RenamePassData, 32> WorkStack;
1162 // Skip everything if we already renamed this block and we are skipping.
1163 // Note: You can't sink this into the if, because we need it to occur
1164 // regardless of whether we skip blocks or not.
1165 bool AlreadyVisited = !Visited.insert(Ptr: Root->getBlock()).second;
1166 if (SkipVisited && AlreadyVisited)
1167 return;
1168
1169 IncomingVal = renameBlock(BB: Root->getBlock(), IncomingVal, RenameAllUses);
1170 renameSuccessorPhis(BB: Root->getBlock(), IncomingVal, RenameAllUses);
1171 WorkStack.push_back(Elt: {Root, Root->begin(), IncomingVal});
1172
1173 while (!WorkStack.empty()) {
1174 DomTreeNode *Node = WorkStack.back().DTN;
1175 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
1176 IncomingVal = WorkStack.back().IncomingVal;
1177
1178 if (ChildIt == Node->end()) {
1179 WorkStack.pop_back();
1180 } else {
1181 DomTreeNode *Child = *ChildIt;
1182 ++WorkStack.back().ChildIt;
1183 BasicBlock *BB = Child->getBlock();
1184 // Note: You can't sink this into the if, because we need it to occur
1185 // regardless of whether we skip blocks or not.
1186 AlreadyVisited = !Visited.insert(Ptr: BB).second;
1187 if (SkipVisited && AlreadyVisited) {
1188 // We already visited this during our renaming, which can happen when
1189 // being asked to rename multiple blocks. Figure out the incoming val,
1190 // which is the last def.
1191 // Incoming value can only change if there is a block def, and in that
1192 // case, it's the last block def in the list.
1193 if (auto *BlockDefs = getBlockDefs(BB))
1194 IncomingVal = &*BlockDefs->rbegin();
1195 } else
1196 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses);
1197 renameSuccessorPhis(BB, IncomingVal, RenameAllUses);
1198 WorkStack.push_back(Elt: {Child, Child->begin(), IncomingVal});
1199 }
1200 }
1201}
1202
1203/// This handles unreachable block accesses by deleting phi nodes in
1204/// unreachable blocks, and marking all other unreachable MemoryAccess's as
1205/// being uses of the live on entry definition.
1206void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
1207 assert(!DT->isReachableFromEntry(BB) &&
1208 "Reachable block found while handling unreachable blocks");
1209
1210 // Make sure phi nodes in our reachable successors end up with a
1211 // LiveOnEntryDef for our incoming edge, even though our block is forward
1212 // unreachable. We could just disconnect these blocks from the CFG fully,
1213 // but we do not right now.
1214 for (const BasicBlock *S : successors(BB)) {
1215 if (!DT->isReachableFromEntry(A: S))
1216 continue;
1217 auto It = PerBlockAccesses.find(Val: S);
1218 // Rename the phi nodes in our successor block
1219 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(Val: It->second->front()))
1220 continue;
1221 AccessList *Accesses = It->second.get();
1222 auto *Phi = cast<MemoryPhi>(Val: &Accesses->front());
1223 Phi->addIncoming(V: LiveOnEntryDef.get(), BB);
1224 }
1225
1226 auto It = PerBlockAccesses.find(Val: BB);
1227 if (It == PerBlockAccesses.end())
1228 return;
1229
1230 auto &Accesses = It->second;
1231 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
1232 auto Next = std::next(x: AI);
1233 // If we have a phi, just remove it. We are going to replace all
1234 // users with live on entry.
1235 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val&: AI))
1236 UseOrDef->setDefiningAccess(DMA: LiveOnEntryDef.get());
1237 else
1238 Accesses->erase(where: AI);
1239 AI = Next;
1240 }
1241}
1242
1243MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
1244 : DT(DT), F(&Func), LiveOnEntryDef(nullptr), Walker(nullptr),
1245 SkipWalker(nullptr) {
1246 // Build MemorySSA using a batch alias analysis. This reuses the internal
1247 // state that AA collects during an alias()/getModRefInfo() call. This is
1248 // safe because there are no CFG changes while building MemorySSA and can
1249 // significantly reduce the time spent by the compiler in AA, because we will
1250 // make queries about all the instructions in the Function.
1251 assert(AA && "No alias analysis?");
1252 BatchAAResults BatchAA(*AA);
1253 buildMemorySSA(BAA&: BatchAA, Blocks: iterator_range(F->begin(), F->end()));
1254 // Intentionally leave AA to nullptr while building so we don't accidentally
1255 // use non-batch AliasAnalysis.
1256 this->AA = AA;
1257 // Also create the walker here.
1258 getWalker();
1259}
1260
1261MemorySSA::MemorySSA(Loop &L, AliasAnalysis *AA, DominatorTree *DT)
1262 : DT(DT), L(&L), LiveOnEntryDef(nullptr), Walker(nullptr),
1263 SkipWalker(nullptr) {
1264 // Build MemorySSA using a batch alias analysis. This reuses the internal
1265 // state that AA collects during an alias()/getModRefInfo() call. This is
1266 // safe because there are no CFG changes while building MemorySSA and can
1267 // significantly reduce the time spent by the compiler in AA, because we will
1268 // make queries about all the instructions in the Function.
1269 assert(AA && "No alias analysis?");
1270 BatchAAResults BatchAA(*AA);
1271 buildMemorySSA(
1272 BAA&: BatchAA, Blocks: map_range(C: L.blocks(), F: [](const BasicBlock *BB) -> BasicBlock & {
1273 return *const_cast<BasicBlock *>(BB);
1274 }));
1275 // Intentionally leave AA to nullptr while building so we don't accidentally
1276 // use non-batch AliasAnalysis.
1277 this->AA = AA;
1278 // Also create the walker here.
1279 getWalker();
1280}
1281
1282MemorySSA::~MemorySSA() {
1283 // Drop all our references
1284 for (const auto &Pair : PerBlockAccesses)
1285 for (MemoryAccess &MA : *Pair.second)
1286 MA.dropAllReferences();
1287}
1288
1289MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
1290 auto Res = PerBlockAccesses.try_emplace(Key: BB);
1291
1292 if (Res.second)
1293 Res.first->second = std::make_unique<AccessList>();
1294 return Res.first->second.get();
1295}
1296
1297MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
1298 auto Res = PerBlockDefs.try_emplace(Key: BB);
1299
1300 if (Res.second)
1301 Res.first->second = std::make_unique<DefsList>();
1302 return Res.first->second.get();
1303}
1304
1305namespace llvm {
1306
1307/// This class is a batch walker of all MemoryUse's in the program, and points
1308/// their defining access at the thing that actually clobbers them. Because it
1309/// is a batch walker that touches everything, it does not operate like the
1310/// other walkers. This walker is basically performing a top-down SSA renaming
1311/// pass, where the version stack is used as the cache. This enables it to be
1312/// significantly more time and memory efficient than using the regular walker,
1313/// which is walking bottom-up.
1314class MemorySSA::OptimizeUses {
1315public:
1316 OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, BatchAAResults *BAA,
1317 DominatorTree *DT)
1318 : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {}
1319
1320 void optimizeUses();
1321
1322private:
1323 /// This represents where a given memorylocation is in the stack.
1324 struct MemlocStackInfo {
1325 // This essentially is keeping track of versions of the stack. Whenever
1326 // the stack changes due to pushes or pops, these versions increase.
1327 unsigned long StackEpoch;
1328 unsigned long PopEpoch;
1329 // This is the lower bound of places on the stack to check. It is equal to
1330 // the place the last stack walk ended.
1331 // Note: Correctness depends on this being initialized to 0, which densemap
1332 // does
1333 unsigned long LowerBound;
1334 const BasicBlock *LowerBoundBlock;
1335 // This is where the last walk for this memory location ended.
1336 unsigned long LastKill;
1337 bool LastKillValid;
1338 };
1339
1340 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
1341 SmallVectorImpl<MemoryAccess *> &,
1342 DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
1343
1344 MemorySSA *MSSA;
1345 CachingWalker *Walker;
1346 BatchAAResults *AA;
1347 DominatorTree *DT;
1348};
1349
1350} // end namespace llvm
1351
1352/// Optimize the uses in a given block This is basically the SSA renaming
1353/// algorithm, with one caveat: We are able to use a single stack for all
1354/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is
1355/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just
1356/// going to be some position in that stack of possible ones.
1357///
1358/// We track the stack positions that each MemoryLocation needs
1359/// to check, and last ended at. This is because we only want to check the
1360/// things that changed since last time. The same MemoryLocation should
1361/// get clobbered by the same store (getModRefInfo does not use invariantness or
1362/// things like this, and if they start, we can modify MemoryLocOrCall to
1363/// include relevant data)
1364void MemorySSA::OptimizeUses::optimizeUsesInBlock(
1365 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
1366 SmallVectorImpl<MemoryAccess *> &VersionStack,
1367 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
1368
1369 /// If no accesses, nothing to do.
1370 MemorySSA::AccessList *Accesses = MSSA->getBlockAccesses(BB);
1371 if (Accesses == nullptr)
1372 return;
1373
1374 // Pop everything that doesn't dominate the current block off the stack,
1375 // increment the PopEpoch to account for this.
1376 while (true) {
1377 assert(
1378 !VersionStack.empty() &&
1379 "Version stack should have liveOnEntry sentinel dominating everything");
1380 BasicBlock *BackBlock = VersionStack.back()->getBlock();
1381 if (DT->dominates(A: BackBlock, B: BB))
1382 break;
1383 while (VersionStack.back()->getBlock() == BackBlock)
1384 VersionStack.pop_back();
1385 ++PopEpoch;
1386 }
1387
1388 for (MemoryAccess &MA : *Accesses) {
1389 auto *MU = dyn_cast<MemoryUse>(Val: &MA);
1390 if (!MU) {
1391 VersionStack.push_back(Elt: &MA);
1392 ++StackEpoch;
1393 continue;
1394 }
1395
1396 if (MU->isOptimized())
1397 continue;
1398
1399 MemoryLocOrCall UseMLOC(MU);
1400 auto &LocInfo = LocStackInfo[UseMLOC];
1401 // If the pop epoch changed, it means we've removed stuff from top of
1402 // stack due to changing blocks. We may have to reset the lower bound or
1403 // last kill info.
1404 if (LocInfo.PopEpoch != PopEpoch) {
1405 LocInfo.PopEpoch = PopEpoch;
1406 LocInfo.StackEpoch = StackEpoch;
1407 // If the lower bound was in something that no longer dominates us, we
1408 // have to reset it.
1409 // We can't simply track stack size, because the stack may have had
1410 // pushes/pops in the meantime.
1411 // XXX: This is non-optimal, but only is slower cases with heavily
1412 // branching dominator trees. To get the optimal number of queries would
1413 // be to make lowerbound and lastkill a per-loc stack, and pop it until
1414 // the top of that stack dominates us. This does not seem worth it ATM.
1415 // A much cheaper optimization would be to always explore the deepest
1416 // branch of the dominator tree first. This will guarantee this resets on
1417 // the smallest set of blocks.
1418 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
1419 !DT->dominates(A: LocInfo.LowerBoundBlock, B: BB)) {
1420 // Reset the lower bound of things to check.
1421 // TODO: Some day we should be able to reset to last kill, rather than
1422 // 0.
1423 LocInfo.LowerBound = 0;
1424 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
1425 LocInfo.LastKillValid = false;
1426 }
1427 } else if (LocInfo.StackEpoch != StackEpoch) {
1428 // If all that has changed is the StackEpoch, we only have to check the
1429 // new things on the stack, because we've checked everything before. In
1430 // this case, the lower bound of things to check remains the same.
1431 LocInfo.PopEpoch = PopEpoch;
1432 LocInfo.StackEpoch = StackEpoch;
1433 }
1434 if (!LocInfo.LastKillValid) {
1435 LocInfo.LastKill = VersionStack.size() - 1;
1436 LocInfo.LastKillValid = true;
1437 }
1438
1439 // At this point, we should have corrected last kill and LowerBound to be
1440 // in bounds.
1441 assert(LocInfo.LowerBound < VersionStack.size() &&
1442 "Lower bound out of range");
1443 assert(LocInfo.LastKill < VersionStack.size() &&
1444 "Last kill info out of range");
1445 // In any case, the new upper bound is the top of the stack.
1446 unsigned long UpperBound = VersionStack.size() - 1;
1447
1448 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
1449 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
1450 << *(MU->getMemoryInst()) << ")"
1451 << " because there are "
1452 << UpperBound - LocInfo.LowerBound
1453 << " stores to disambiguate\n");
1454 // Because we did not walk, LastKill is no longer valid, as this may
1455 // have been a kill.
1456 LocInfo.LastKillValid = false;
1457 continue;
1458 }
1459 bool FoundClobberResult = false;
1460 unsigned UpwardWalkLimit = MaxCheckLimit;
1461 while (UpperBound > LocInfo.LowerBound) {
1462 if (isa<MemoryPhi>(Val: VersionStack[UpperBound])) {
1463 // For phis, use the walker, see where we ended up, go there.
1464 // The invariant.group handling in MemorySSA is ad-hoc and doesn't
1465 // support updates, so don't use it to optimize uses.
1466 MemoryAccess *Result =
1467 Walker->getClobberingMemoryAccessWithoutInvariantGroup(
1468 MA: MU, BAA&: *AA, UWL&: UpwardWalkLimit);
1469 // We are guaranteed to find it or something is wrong.
1470 while (VersionStack[UpperBound] != Result) {
1471 assert(UpperBound != 0);
1472 --UpperBound;
1473 }
1474 FoundClobberResult = true;
1475 break;
1476 }
1477
1478 MemoryDef *MD = cast<MemoryDef>(Val: VersionStack[UpperBound]);
1479 if (instructionClobbersQuery(MD, MU, UseMLOC, AA&: *AA)) {
1480 FoundClobberResult = true;
1481 break;
1482 }
1483 --UpperBound;
1484 }
1485
1486 // At the end of this loop, UpperBound is either a clobber, or lower bound
1487 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
1488 if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
1489 MU->setDefiningAccess(DMA: VersionStack[UpperBound], Optimized: true);
1490 LocInfo.LastKill = UpperBound;
1491 } else {
1492 // Otherwise, we checked all the new ones, and now we know we can get to
1493 // LastKill.
1494 MU->setDefiningAccess(DMA: VersionStack[LocInfo.LastKill], Optimized: true);
1495 }
1496 LocInfo.LowerBound = VersionStack.size() - 1;
1497 LocInfo.LowerBoundBlock = BB;
1498 }
1499}
1500
1501/// Optimize uses to point to their actual clobbering definitions.
1502void MemorySSA::OptimizeUses::optimizeUses() {
1503 SmallVector<MemoryAccess *, 16> VersionStack;
1504 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
1505 VersionStack.push_back(Elt: MSSA->getLiveOnEntryDef());
1506
1507 unsigned long StackEpoch = 1;
1508 unsigned long PopEpoch = 1;
1509 // We perform a non-recursive top-down dominator tree walk.
1510 for (const auto *DomNode : depth_first(G: DT->getRootNode()))
1511 optimizeUsesInBlock(BB: DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
1512 LocStackInfo);
1513}
1514
1515void MemorySSA::placePHINodes(
1516 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) {
1517 // Determine where our MemoryPhi's should go
1518 ForwardIDFCalculator IDFs(*DT);
1519 IDFs.setDefiningBlocks(DefiningBlocks);
1520 SmallVector<BasicBlock *, 32> IDFBlocks;
1521 IDFs.calculate(IDFBlocks);
1522
1523 // Now place MemoryPhi nodes.
1524 for (auto &BB : IDFBlocks)
1525 createMemoryPhi(BB);
1526}
1527
1528template <typename IterT>
1529void MemorySSA::buildMemorySSA(BatchAAResults &BAA, IterT Blocks) {
1530 // We create an access to represent "live on entry", for things like
1531 // arguments or users of globals, where the memory they use is defined before
1532 // the beginning of the function. We do not actually insert it into the IR.
1533 // We do not define a live on exit for the immediate uses, and thus our
1534 // semantics do *not* imply that something with no immediate uses can simply
1535 // be removed.
1536 BasicBlock &StartingPoint = *Blocks.begin();
1537 LiveOnEntryDef.reset(p: new MemoryDef(StartingPoint.getContext(), nullptr,
1538 nullptr, &StartingPoint, NextID++));
1539
1540 // We maintain lists of memory accesses per-block, trading memory for time. We
1541 // could just look up the memory access for every possible instruction in the
1542 // stream.
1543 SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
1544 // Go through each block, figure out where defs occur, and chain together all
1545 // the accesses.
1546 for (BasicBlock &B : Blocks) {
1547 bool InsertIntoDef = false;
1548 AccessList *Accesses = nullptr;
1549 DefsList *Defs = nullptr;
1550 for (Instruction &I : B) {
1551 MemoryUseOrDef *MUD = createNewAccess(I: &I, AAP: &BAA);
1552 if (!MUD)
1553 continue;
1554
1555 if (!Accesses)
1556 Accesses = getOrCreateAccessList(BB: &B);
1557 Accesses->push_back(val: MUD);
1558 if (isa<MemoryDef>(Val: MUD)) {
1559 InsertIntoDef = true;
1560 if (!Defs)
1561 Defs = getOrCreateDefsList(BB: &B);
1562 Defs->push_back(Node&: *MUD);
1563 }
1564 }
1565 if (InsertIntoDef)
1566 DefiningBlocks.insert(Ptr: &B);
1567 }
1568 placePHINodes(DefiningBlocks);
1569
1570 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
1571 // filled in with all blocks.
1572 SmallPtrSet<BasicBlock *, 16> Visited;
1573 if (L) {
1574 // Only building MemorySSA for a single loop. placePHINodes may have
1575 // inserted a MemoryPhi in the loop's preheader. As this is outside the
1576 // scope of the loop, set them to LiveOnEntry.
1577 if (auto *P = getMemoryAccess(BB: L->getLoopPreheader())) {
1578 for (Use &U : make_early_inc_range(Range: P->uses()))
1579 U.set(LiveOnEntryDef.get());
1580 removeFromLists(P);
1581 }
1582 // Now rename accesses in the loop. Populate Visited with the exit blocks of
1583 // the loop, to limit the scope of the renaming.
1584 SmallVector<BasicBlock *> ExitBlocks;
1585 L->getExitBlocks(ExitBlocks);
1586 Visited.insert_range(R&: ExitBlocks);
1587 renamePass(Root: DT->getNode(BB: L->getLoopPreheader()), IncomingVal: LiveOnEntryDef.get(),
1588 Visited);
1589 } else {
1590 renamePass(Root: DT->getRootNode(), IncomingVal: LiveOnEntryDef.get(), Visited);
1591 }
1592
1593 // Mark the uses in unreachable blocks as live on entry, so that they go
1594 // somewhere.
1595 for (auto &BB : Blocks)
1596 if (!Visited.count(Ptr: &BB))
1597 markUnreachableAsLiveOnEntry(BB: &BB);
1598}
1599
1600MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
1601
1602MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
1603 if (Walker)
1604 return Walker.get();
1605
1606 if (!WalkerBase)
1607 WalkerBase = std::make_unique<ClobberWalkerBase>(args: this, args&: DT);
1608
1609 Walker = std::make_unique<CachingWalker>(args: this, args: WalkerBase.get());
1610 return Walker.get();
1611}
1612
1613MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
1614 if (SkipWalker)
1615 return SkipWalker.get();
1616
1617 if (!WalkerBase)
1618 WalkerBase = std::make_unique<ClobberWalkerBase>(args: this, args&: DT);
1619
1620 SkipWalker = std::make_unique<SkipSelfWalker>(args: this, args: WalkerBase.get());
1621 return SkipWalker.get();
1622 }
1623
1624
1625// This is a helper function used by the creation routines. It places NewAccess
1626// into the access and defs lists for a given basic block, at the given
1627// insertion point.
1628void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess,
1629 const BasicBlock *BB,
1630 InsertionPlace Point) {
1631 auto *Accesses = getOrCreateAccessList(BB);
1632 if (Point == Beginning) {
1633 // If it's a phi node, it goes first, otherwise, it goes after any phi
1634 // nodes.
1635 if (isa<MemoryPhi>(Val: NewAccess)) {
1636 Accesses->push_front(val: NewAccess);
1637 auto *Defs = getOrCreateDefsList(BB);
1638 Defs->push_front(Node&: *NewAccess);
1639 } else {
1640 auto AI = find_if_not(
1641 Range&: *Accesses, P: [](const MemoryAccess &MA) { return isa<MemoryPhi>(Val: MA); });
1642 Accesses->insert(where: AI, New: NewAccess);
1643 if (!isa<MemoryUse>(Val: NewAccess)) {
1644 auto *Defs = getOrCreateDefsList(BB);
1645 auto DI = find_if_not(
1646 Range&: *Defs, P: [](const MemoryAccess &MA) { return isa<MemoryPhi>(Val: MA); });
1647 Defs->insert(I: DI, Node&: *NewAccess);
1648 }
1649 }
1650 } else {
1651 Accesses->push_back(val: NewAccess);
1652 if (!isa<MemoryUse>(Val: NewAccess)) {
1653 auto *Defs = getOrCreateDefsList(BB);
1654 Defs->push_back(Node&: *NewAccess);
1655 }
1656 }
1657 BlockNumberingValid.erase(Ptr: BB);
1658}
1659
1660void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
1661 AccessList::iterator InsertPt) {
1662 auto *Accesses = getBlockAccesses(BB);
1663 bool WasEnd = InsertPt == Accesses->end();
1664 Accesses->insert(where: AccessList::iterator(InsertPt), New: What);
1665 if (!isa<MemoryUse>(Val: What)) {
1666 auto *Defs = getOrCreateDefsList(BB);
1667 // If we got asked to insert at the end, we have an easy job, just shove it
1668 // at the end. If we got asked to insert before an existing def, we also get
1669 // an iterator. If we got asked to insert before a use, we have to hunt for
1670 // the next def.
1671 if (WasEnd) {
1672 Defs->push_back(Node&: *What);
1673 } else if (isa<MemoryDef>(Val: InsertPt)) {
1674 Defs->insert(I: InsertPt->getDefsIterator(), Node&: *What);
1675 } else {
1676 while (InsertPt != Accesses->end() && !isa<MemoryDef>(Val: InsertPt))
1677 ++InsertPt;
1678 // Either we found a def, or we are inserting at the end
1679 if (InsertPt == Accesses->end())
1680 Defs->push_back(Node&: *What);
1681 else
1682 Defs->insert(I: InsertPt->getDefsIterator(), Node&: *What);
1683 }
1684 }
1685 BlockNumberingValid.erase(Ptr: BB);
1686}
1687
1688void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) {
1689 // Keep it in the lookup tables, remove from the lists
1690 removeFromLists(What, ShouldDelete: false);
1691
1692 // Note that moving should implicitly invalidate the optimized state of a
1693 // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a
1694 // MemoryDef.
1695 if (auto *MD = dyn_cast<MemoryDef>(Val: What))
1696 MD->resetOptimized();
1697 What->setBlock(BB);
1698}
1699
1700// Move What before Where in the IR. The end result is that What will belong to
1701// the right lists and have the right Block set, but will not otherwise be
1702// correct. It will not have the right defining access, and if it is a def,
1703// things below it will not properly be updated.
1704void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
1705 AccessList::iterator Where) {
1706 prepareForMoveTo(What, BB);
1707 insertIntoListsBefore(What, BB, InsertPt: Where);
1708}
1709
1710void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
1711 InsertionPlace Point) {
1712 if (isa<MemoryPhi>(Val: What)) {
1713 assert(Point == Beginning &&
1714 "Can only move a Phi at the beginning of the block");
1715 // Update lookup table entry
1716 ValueToMemoryAccess.erase(Val: What->getBlock());
1717 bool Inserted = ValueToMemoryAccess.insert(KV: {BB, What}).second;
1718 (void)Inserted;
1719 assert(Inserted && "Cannot move a Phi to a block that already has one");
1720 }
1721
1722 prepareForMoveTo(What, BB);
1723 insertIntoListsForBlock(NewAccess: What, BB, Point);
1724}
1725
1726MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
1727 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
1728 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
1729 // Phi's always are placed at the front of the block.
1730 insertIntoListsForBlock(NewAccess: Phi, BB, Point: Beginning);
1731 ValueToMemoryAccess[BB] = Phi;
1732 return Phi;
1733}
1734
1735MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
1736 MemoryAccess *Definition,
1737 const MemoryUseOrDef *Template,
1738 bool CreationMustSucceed) {
1739 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
1740 MemoryUseOrDef *NewAccess = createNewAccess(I, AAP: AA, Template);
1741 if (CreationMustSucceed)
1742 assert(NewAccess != nullptr && "Tried to create a memory access for a "
1743 "non-memory touching instruction");
1744 if (NewAccess) {
1745 assert((!Definition || !isa<MemoryUse>(Definition)) &&
1746 "A use cannot be a defining access");
1747 NewAccess->setDefiningAccess(DMA: Definition);
1748 }
1749 return NewAccess;
1750}
1751
1752// Return true if the instruction has ordering constraints.
1753// Note specifically that this only considers stores and loads
1754// because others are still considered ModRef by getModRefInfo.
1755static inline bool isOrdered(const Instruction *I) {
1756 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
1757 if (!SI->isUnordered())
1758 return true;
1759 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1760 if (!LI->isUnordered())
1761 return true;
1762 }
1763 return false;
1764}
1765
1766/// Helper function to create new memory accesses
1767template <typename AliasAnalysisType>
1768MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
1769 AliasAnalysisType *AAP,
1770 const MemoryUseOrDef *Template) {
1771 // The assume intrinsic has a control dependency which we model by claiming
1772 // that it writes arbitrarily. Debuginfo intrinsics may be considered
1773 // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory
1774 // dependencies here.
1775 // FIXME: Replace this special casing with a more accurate modelling of
1776 // assume's control dependency.
1777 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
1778 switch (II->getIntrinsicID()) {
1779 default:
1780 break;
1781 case Intrinsic::allow_runtime_check:
1782 case Intrinsic::allow_ubsan_check:
1783 case Intrinsic::assume:
1784 case Intrinsic::experimental_noalias_scope_decl:
1785 case Intrinsic::pseudoprobe:
1786 return nullptr;
1787 }
1788 }
1789
1790 // Using a nonstandard AA pipelines might leave us with unexpected modref
1791 // results for I, so add a check to not model instructions that may not read
1792 // from or write to memory. This is necessary for correctness.
1793 if (!I->mayReadFromMemory() && !I->mayWriteToMemory())
1794 return nullptr;
1795
1796 bool Def, Use;
1797 if (Template) {
1798 Def = isa<MemoryDef>(Val: Template);
1799 Use = isa<MemoryUse>(Val: Template);
1800#if !defined(NDEBUG)
1801 ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt);
1802 bool DefCheck, UseCheck;
1803 DefCheck = isModSet(ModRef) || isOrdered(I);
1804 UseCheck = isRefSet(ModRef);
1805 // Memory accesses should only be reduced and can not be increased since AA
1806 // just might return better results as a result of some transformations.
1807 assert((Def == DefCheck || !DefCheck) &&
1808 "Memory accesses should only be reduced");
1809 if (!Def && Use != UseCheck) {
1810 // New Access should not have more power than template access
1811 assert(!UseCheck && "Invalid template");
1812 }
1813#endif
1814 } else {
1815 // Find out what affect this instruction has on memory.
1816 ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt);
1817 // The isOrdered check is used to ensure that volatiles end up as defs
1818 // (atomics end up as ModRef right now anyway). Until we separate the
1819 // ordering chain from the memory chain, this enables people to see at least
1820 // some relative ordering to volatiles. Note that getClobberingMemoryAccess
1821 // will still give an answer that bypasses other volatile loads. TODO:
1822 // Separate memory aliasing and ordering into two different chains so that
1823 // we can precisely represent both "what memory will this read/write/is
1824 // clobbered by" and "what instructions can I move this past".
1825 Def = isModSet(MRI: ModRef) || isOrdered(I);
1826 Use = isRefSet(MRI: ModRef);
1827 }
1828
1829 // It's possible for an instruction to not modify memory at all. During
1830 // construction, we ignore them.
1831 if (!Def && !Use)
1832 return nullptr;
1833
1834 MemoryUseOrDef *MUD;
1835 if (Def) {
1836 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
1837 } else {
1838 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
1839 if (isUseTriviallyOptimizableToLiveOnEntry(*AAP, I)) {
1840 MemoryAccess *LiveOnEntry = getLiveOnEntryDef();
1841 MUD->setOptimized(LiveOnEntry);
1842 }
1843 }
1844 ValueToMemoryAccess[I] = MUD;
1845 return MUD;
1846}
1847
1848/// Properly remove \p MA from all of MemorySSA's lookup tables.
1849void MemorySSA::removeFromLookups(MemoryAccess *MA) {
1850 assert(MA->use_empty() &&
1851 "Trying to remove memory access that still has uses");
1852 BlockNumbering.erase(Val: MA);
1853 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1854 MUD->setDefiningAccess(DMA: nullptr);
1855 // Invalidate our walker's cache if necessary
1856 if (!isa<MemoryUse>(Val: MA))
1857 getWalker()->invalidateInfo(MA);
1858
1859 Value *MemoryInst;
1860 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1861 MemoryInst = MUD->getMemoryInst();
1862 else
1863 MemoryInst = MA->getBlock();
1864
1865 auto VMA = ValueToMemoryAccess.find(Val: MemoryInst);
1866 if (VMA->second == MA)
1867 ValueToMemoryAccess.erase(I: VMA);
1868}
1869
1870/// Properly remove \p MA from all of MemorySSA's lists.
1871///
1872/// Because of the way the intrusive list and use lists work, it is important to
1873/// do removal in the right order.
1874/// ShouldDelete defaults to true, and will cause the memory access to also be
1875/// deleted, not just removed.
1876void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
1877 BasicBlock *BB = MA->getBlock();
1878 // The access list owns the reference, so we erase it from the non-owning list
1879 // first.
1880 if (!isa<MemoryUse>(Val: MA)) {
1881 auto DefsIt = PerBlockDefs.find(Val: BB);
1882 std::unique_ptr<DefsList> &Defs = DefsIt->second;
1883 Defs->remove(N&: *MA);
1884 if (Defs->empty())
1885 PerBlockDefs.erase(I: DefsIt);
1886 }
1887
1888 // The erase call here will delete it. If we don't want it deleted, we call
1889 // remove instead.
1890 auto AccessIt = PerBlockAccesses.find(Val: BB);
1891 std::unique_ptr<AccessList> &Accesses = AccessIt->second;
1892 if (ShouldDelete)
1893 Accesses->erase(IT: MA);
1894 else
1895 Accesses->remove(IT: MA);
1896
1897 if (Accesses->empty()) {
1898 PerBlockAccesses.erase(I: AccessIt);
1899 BlockNumberingValid.erase(Ptr: BB);
1900 }
1901}
1902
1903void MemorySSA::print(raw_ostream &OS) const {
1904 MemorySSAAnnotatedWriter Writer(this);
1905 Function *F = this->F;
1906 if (L)
1907 F = L->getHeader()->getParent();
1908 F->print(OS, AAW: &Writer);
1909}
1910
1911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1912LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
1913#endif
1914
1915void MemorySSA::verifyMemorySSA(VerificationLevel VL) const {
1916#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
1917 VL = VerificationLevel::Full;
1918#endif
1919
1920#ifndef NDEBUG
1921 if (F) {
1922 auto Blocks = iterator_range(F->begin(), F->end());
1923 verifyOrderingDominationAndDefUses(Blocks, VL);
1924 verifyDominationNumbers(Blocks);
1925 if (VL == VerificationLevel::Full)
1926 verifyPrevDefInPhis(Blocks);
1927 } else {
1928 assert(L && "must either have loop or function");
1929 auto Blocks =
1930 map_range(L->blocks(), [](const BasicBlock *BB) -> BasicBlock & {
1931 return *const_cast<BasicBlock *>(BB);
1932 });
1933 verifyOrderingDominationAndDefUses(Blocks, VL);
1934 verifyDominationNumbers(Blocks);
1935 if (VL == VerificationLevel::Full)
1936 verifyPrevDefInPhis(Blocks);
1937 }
1938#endif
1939 // Previously, the verification used to also verify that the clobberingAccess
1940 // cached by MemorySSA is the same as the clobberingAccess found at a later
1941 // query to AA. This does not hold true in general due to the current fragility
1942 // of BasicAA which has arbitrary caps on the things it analyzes before giving
1943 // up. As a result, transformations that are correct, will lead to BasicAA
1944 // returning different Alias answers before and after that transformation.
1945 // Invalidating MemorySSA is not an option, as the results in BasicAA can be so
1946 // random, in the worst case we'd need to rebuild MemorySSA from scratch after
1947 // every transformation, which defeats the purpose of using it. For such an
1948 // example, see test4 added in D51960.
1949}
1950
1951template <typename IterT>
1952void MemorySSA::verifyPrevDefInPhis(IterT Blocks) const {
1953 for (const BasicBlock &BB : Blocks) {
1954 if (MemoryPhi *Phi = getMemoryAccess(BB: &BB)) {
1955 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
1956 auto *Pred = Phi->getIncomingBlock(I);
1957 auto *IncAcc = Phi->getIncomingValue(I);
1958 // If Pred has no unreachable predecessors, get last def looking at
1959 // IDoms. If, while walkings IDoms, any of these has an unreachable
1960 // predecessor, then the incoming def can be any access.
1961 if (auto *DTNode = DT->getNode(BB: Pred)) {
1962 while (DTNode) {
1963 if (auto *DefList = getBlockDefs(BB: DTNode->getBlock())) {
1964 auto *LastAcc = &*(--DefList->end());
1965 assert(LastAcc == IncAcc &&
1966 "Incorrect incoming access into phi.");
1967 (void)IncAcc;
1968 (void)LastAcc;
1969 break;
1970 }
1971 DTNode = DTNode->getIDom();
1972 }
1973 } else {
1974 // If Pred has unreachable predecessors, but has at least a Def, the
1975 // incoming access can be the last Def in Pred, or it could have been
1976 // optimized to LoE. After an update, though, the LoE may have been
1977 // replaced by another access, so IncAcc may be any access.
1978 // If Pred has unreachable predecessors and no Defs, incoming access
1979 // should be LoE; However, after an update, it may be any access.
1980 }
1981 }
1982 }
1983 }
1984}
1985
1986/// Verify that all of the blocks we believe to have valid domination numbers
1987/// actually have valid domination numbers.
1988template <typename IterT>
1989void MemorySSA::verifyDominationNumbers(IterT Blocks) const {
1990 if (BlockNumberingValid.empty())
1991 return;
1992
1993 SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid;
1994 for (const BasicBlock &BB : Blocks) {
1995 if (!ValidBlocks.count(Ptr: &BB))
1996 continue;
1997
1998 ValidBlocks.erase(Ptr: &BB);
1999
2000 const AccessList *Accesses = getBlockAccesses(BB: &BB);
2001 // It's correct to say an empty block has valid numbering.
2002 if (!Accesses)
2003 continue;
2004
2005 // Block numbering starts at 1.
2006 unsigned long LastNumber = 0;
2007 for (const MemoryAccess &MA : *Accesses) {
2008 auto ThisNumberIter = BlockNumbering.find(Val: &MA);
2009 assert(ThisNumberIter != BlockNumbering.end() &&
2010 "MemoryAccess has no domination number in a valid block!");
2011
2012 unsigned long ThisNumber = ThisNumberIter->second;
2013 assert(ThisNumber > LastNumber &&
2014 "Domination numbers should be strictly increasing!");
2015 (void)LastNumber;
2016 LastNumber = ThisNumber;
2017 }
2018 }
2019
2020 assert(ValidBlocks.empty() &&
2021 "All valid BasicBlocks should exist in F -- dangling pointers?");
2022}
2023
2024/// Verify ordering: the order and existence of MemoryAccesses matches the
2025/// order and existence of memory affecting instructions.
2026/// Verify domination: each definition dominates all of its uses.
2027/// Verify def-uses: the immediate use information - walk all the memory
2028/// accesses and verifying that, for each use, it appears in the appropriate
2029/// def's use list
2030template <typename IterT>
2031void MemorySSA::verifyOrderingDominationAndDefUses(IterT Blocks,
2032 VerificationLevel VL) const {
2033 // Walk all the blocks, comparing what the lookups think and what the access
2034 // lists think, as well as the order in the blocks vs the order in the access
2035 // lists.
2036 SmallVector<MemoryAccess *, 32> ActualAccesses;
2037 SmallVector<MemoryAccess *, 32> ActualDefs;
2038 for (BasicBlock &B : Blocks) {
2039 const AccessList *AL = getBlockAccesses(BB: &B);
2040 const auto *DL = getBlockDefs(BB: &B);
2041 MemoryPhi *Phi = getMemoryAccess(BB: &B);
2042 if (Phi) {
2043 // Verify ordering.
2044 ActualAccesses.push_back(Elt: Phi);
2045 ActualDefs.push_back(Elt: Phi);
2046 // Verify domination
2047 for (const Use &U : Phi->uses()) {
2048 assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
2049 (void)U;
2050 }
2051 // Verify def-uses for full verify.
2052 if (VL == VerificationLevel::Full) {
2053 assert(Phi->getNumOperands() == pred_size(&B) &&
2054 "Incomplete MemoryPhi Node");
2055 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
2056 verifyUseInDefs(Phi->getIncomingValue(I), Phi);
2057 assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
2058 "Incoming phi block not a block predecessor");
2059 }
2060 }
2061 }
2062
2063 for (Instruction &I : B) {
2064 MemoryUseOrDef *MA = getMemoryAccess(I: &I);
2065 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
2066 "We have memory affecting instructions "
2067 "in this block but they are not in the "
2068 "access list or defs list");
2069 if (MA) {
2070 // Verify ordering.
2071 ActualAccesses.push_back(Elt: MA);
2072 if (MemoryAccess *MD = dyn_cast<MemoryDef>(Val: MA)) {
2073 // Verify ordering.
2074 ActualDefs.push_back(Elt: MA);
2075 // Verify domination.
2076 for (const Use &U : MD->uses()) {
2077 assert(dominates(MD, U) &&
2078 "Memory Def does not dominate it's uses");
2079 (void)U;
2080 }
2081 }
2082 // Verify def-uses for full verify.
2083 if (VL == VerificationLevel::Full)
2084 verifyUseInDefs(MA->getDefiningAccess(), MA);
2085 }
2086 }
2087 // Either we hit the assert, really have no accesses, or we have both
2088 // accesses and an access list. Same with defs.
2089 if (!AL && !DL)
2090 continue;
2091 // Verify ordering.
2092 assert(AL->size() == ActualAccesses.size() &&
2093 "We don't have the same number of accesses in the block as on the "
2094 "access list");
2095 assert((DL || ActualDefs.size() == 0) &&
2096 "Either we should have a defs list, or we should have no defs");
2097 assert((!DL || DL->size() == ActualDefs.size()) &&
2098 "We don't have the same number of defs in the block as on the "
2099 "def list");
2100 auto ALI = AL->begin();
2101 auto AAI = ActualAccesses.begin();
2102 while (ALI != AL->end() && AAI != ActualAccesses.end()) {
2103 assert(&*ALI == *AAI && "Not the same accesses in the same order");
2104 ++ALI;
2105 ++AAI;
2106 }
2107 ActualAccesses.clear();
2108 if (DL) {
2109 auto DLI = DL->begin();
2110 auto ADI = ActualDefs.begin();
2111 while (DLI != DL->end() && ADI != ActualDefs.end()) {
2112 assert(&*DLI == *ADI && "Not the same defs in the same order");
2113 ++DLI;
2114 ++ADI;
2115 }
2116 }
2117 ActualDefs.clear();
2118 }
2119}
2120
2121/// Verify the def-use lists in MemorySSA, by verifying that \p Use
2122/// appears in the use list of \p Def.
2123void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
2124 // The live on entry use may cause us to get a NULL def here
2125 if (!Def)
2126 assert(isLiveOnEntryDef(Use) &&
2127 "Null def but use not point to live on entry def");
2128 else
2129 assert(is_contained(Def->users(), Use) &&
2130 "Did not find use in def's use list");
2131}
2132
2133/// Perform a local numbering on blocks so that instruction ordering can be
2134/// determined in constant time.
2135/// TODO: We currently just number in order. If we numbered by N, we could
2136/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
2137/// log2(N) sequences of mixed before and after) without needing to invalidate
2138/// the numbering.
2139void MemorySSA::renumberBlock(const BasicBlock *B) const {
2140 // The pre-increment ensures the numbers really start at 1.
2141 unsigned long CurrentNumber = 0;
2142 const AccessList *AL = getBlockAccesses(BB: B);
2143 assert(AL != nullptr && "Asking to renumber an empty block");
2144 for (const auto &I : *AL)
2145 BlockNumbering[&I] = ++CurrentNumber;
2146 BlockNumberingValid.insert(Ptr: B);
2147}
2148
2149/// Determine, for two memory accesses in the same block,
2150/// whether \p Dominator dominates \p Dominatee.
2151/// \returns True if \p Dominator dominates \p Dominatee.
2152bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
2153 const MemoryAccess *Dominatee) const {
2154 const BasicBlock *DominatorBlock = Dominator->getBlock();
2155
2156 assert((DominatorBlock == Dominatee->getBlock()) &&
2157 "Asking for local domination when accesses are in different blocks!");
2158 // A node dominates itself.
2159 if (Dominatee == Dominator)
2160 return true;
2161
2162 // When Dominatee is defined on function entry, it is not dominated by another
2163 // memory access.
2164 if (isLiveOnEntryDef(MA: Dominatee))
2165 return false;
2166
2167 // When Dominator is defined on function entry, it dominates the other memory
2168 // access.
2169 if (isLiveOnEntryDef(MA: Dominator))
2170 return true;
2171
2172 if (!BlockNumberingValid.count(Ptr: DominatorBlock))
2173 renumberBlock(B: DominatorBlock);
2174
2175 unsigned long DominatorNum = BlockNumbering.lookup(Val: Dominator);
2176 // All numbers start with 1
2177 assert(DominatorNum != 0 && "Block was not numbered properly");
2178 unsigned long DominateeNum = BlockNumbering.lookup(Val: Dominatee);
2179 assert(DominateeNum != 0 && "Block was not numbered properly");
2180 return DominatorNum < DominateeNum;
2181}
2182
2183bool MemorySSA::dominates(const MemoryAccess *Dominator,
2184 const MemoryAccess *Dominatee) const {
2185 if (Dominator == Dominatee)
2186 return true;
2187
2188 if (isLiveOnEntryDef(MA: Dominatee))
2189 return false;
2190
2191 if (Dominator->getBlock() != Dominatee->getBlock())
2192 return DT->dominates(A: Dominator->getBlock(), B: Dominatee->getBlock());
2193 return locallyDominates(Dominator, Dominatee);
2194}
2195
2196bool MemorySSA::dominates(const MemoryAccess *Dominator,
2197 const Use &Dominatee) const {
2198 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Val: Dominatee.getUser())) {
2199 BasicBlock *UseBB = MP->getIncomingBlock(U: Dominatee);
2200 // The def must dominate the incoming block of the phi.
2201 if (UseBB != Dominator->getBlock())
2202 return DT->dominates(A: Dominator->getBlock(), B: UseBB);
2203 // If the UseBB and the DefBB are the same, compare locally.
2204 return locallyDominates(Dominator, Dominatee: cast<MemoryAccess>(Val: Dominatee));
2205 }
2206 // If it's not a PHI node use, the normal dominates can already handle it.
2207 return dominates(Dominator, Dominatee: cast<MemoryAccess>(Val: Dominatee.getUser()));
2208}
2209
2210void MemorySSA::ensureOptimizedUses() {
2211 if (IsOptimized)
2212 return;
2213
2214 BatchAAResults BatchAA(*AA);
2215 ClobberWalkerBase WalkerBase(this, DT);
2216 CachingWalker WalkerLocal(this, &WalkerBase);
2217 OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses();
2218 IsOptimized = true;
2219}
2220
2221void MemoryAccess::print(raw_ostream &OS) const {
2222 switch (getValueID()) {
2223 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
2224 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS);
2225 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS);
2226 }
2227 llvm_unreachable("invalid value id");
2228}
2229
2230void MemoryDef::print(raw_ostream &OS) const {
2231 MemoryAccess *UO = getDefiningAccess();
2232
2233 auto printID = [&OS](MemoryAccess *A) {
2234 if (A && A->getID())
2235 OS << A->getID();
2236 else
2237 OS << LiveOnEntryStr;
2238 };
2239
2240 OS << getID() << " = MemoryDef(";
2241 printID(UO);
2242 OS << ")";
2243
2244 if (isOptimized()) {
2245 OS << "->";
2246 printID(getOptimized());
2247 }
2248}
2249
2250void MemoryPhi::print(raw_ostream &OS) const {
2251 ListSeparator LS(",");
2252 OS << getID() << " = MemoryPhi(";
2253 for (const auto &Op : operands()) {
2254 BasicBlock *BB = getIncomingBlock(U: Op);
2255 MemoryAccess *MA = cast<MemoryAccess>(Val: Op);
2256
2257 OS << LS << '{';
2258 if (BB->hasName())
2259 OS << BB->getName();
2260 else
2261 BB->printAsOperand(O&: OS, PrintType: false);
2262 OS << ',';
2263 if (unsigned ID = MA->getID())
2264 OS << ID;
2265 else
2266 OS << LiveOnEntryStr;
2267 OS << '}';
2268 }
2269 OS << ')';
2270}
2271
2272void MemoryUse::print(raw_ostream &OS) const {
2273 MemoryAccess *UO = getDefiningAccess();
2274 OS << "MemoryUse(";
2275 if (UO && UO->getID())
2276 OS << UO->getID();
2277 else
2278 OS << LiveOnEntryStr;
2279 OS << ')';
2280}
2281
2282void MemoryAccess::dump() const {
2283// Cannot completely remove virtual function even in release mode.
2284#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2285 print(dbgs());
2286 dbgs() << "\n";
2287#endif
2288}
2289
2290class DOTFuncMSSAInfo {
2291private:
2292 const Function &F;
2293 MemorySSAAnnotatedWriter MSSAWriter;
2294
2295public:
2296 DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA)
2297 : F(F), MSSAWriter(&MSSA) {}
2298
2299 const Function *getFunction() { return &F; }
2300 MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; }
2301};
2302
2303namespace llvm {
2304
2305template <>
2306struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> {
2307 static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) {
2308 return &(CFGInfo->getFunction()->getEntryBlock());
2309 }
2310
2311 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
2312 using nodes_iterator = pointer_iterator<Function::const_iterator>;
2313
2314 static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) {
2315 return nodes_iterator(CFGInfo->getFunction()->begin());
2316 }
2317
2318 static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) {
2319 return nodes_iterator(CFGInfo->getFunction()->end());
2320 }
2321
2322 static size_t size(DOTFuncMSSAInfo *CFGInfo) {
2323 return CFGInfo->getFunction()->size();
2324 }
2325};
2326
2327template <>
2328struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits {
2329
2330 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
2331
2332 static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) {
2333 return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() +
2334 "' function";
2335 }
2336
2337 std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) {
2338 return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(
2339 Node, nullptr,
2340 HandleBasicBlock: [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void {
2341 BB.print(OS, AAW: &CFGInfo->getWriter(), ShouldPreserveUseListOrder: true, IsForDebug: true);
2342 },
2343 HandleComment: [](std::string &S, unsigned &I, unsigned Idx) -> void {
2344 std::string Str = S.substr(pos: I, n: Idx - I);
2345 StringRef SR = Str;
2346 if (SR.count(Str: " = MemoryDef(") || SR.count(Str: " = MemoryPhi(") ||
2347 SR.count(Str: "MemoryUse("))
2348 return;
2349 DOTGraphTraits<DOTFuncInfo *>::eraseComment(OutStr&: S, I, Idx);
2350 });
2351 }
2352
2353 static std::string getEdgeSourceLabel(const BasicBlock *Node,
2354 const_succ_iterator I) {
2355 return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I);
2356 }
2357
2358 /// Display the raw branch weights from PGO.
2359 std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I,
2360 DOTFuncMSSAInfo *CFGInfo) {
2361 return "";
2362 }
2363
2364 std::string getNodeAttributes(const BasicBlock *Node,
2365 DOTFuncMSSAInfo *CFGInfo) {
2366 return getNodeLabel(Node, CFGInfo).find(c: ';') != std::string::npos
2367 ? "style=filled, fillcolor=lightpink"
2368 : "";
2369 }
2370};
2371
2372} // namespace llvm
2373
2374AnalysisKey MemorySSAAnalysis::Key;
2375
2376MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
2377 FunctionAnalysisManager &AM) {
2378 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2379 auto &AA = AM.getResult<AAManager>(IR&: F);
2380 return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(args&: F, args: &AA, args: &DT));
2381}
2382
2383bool MemorySSAAnalysis::Result::invalidate(
2384 Function &F, const PreservedAnalyses &PA,
2385 FunctionAnalysisManager::Invalidator &Inv) {
2386 auto PAC = PA.getChecker<MemorySSAAnalysis>();
2387 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
2388 Inv.invalidate<AAManager>(IR&: F, PA) ||
2389 Inv.invalidate<DominatorTreeAnalysis>(IR&: F, PA);
2390}
2391
2392PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
2393 FunctionAnalysisManager &AM) {
2394 auto &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2395 if (EnsureOptimizedUses)
2396 MSSA.ensureOptimizedUses();
2397 if (DotCFGMSSA != "") {
2398 DOTFuncMSSAInfo CFGInfo(F, MSSA);
2399 WriteGraph(G: &CFGInfo, Name: "", ShortNames: false, Title: "MSSA", Filename: DotCFGMSSA);
2400 } else {
2401 OS << "MemorySSA for function: " << F.getName() << "\n";
2402 MSSA.print(OS);
2403 }
2404
2405 return PreservedAnalyses::all();
2406}
2407
2408PreservedAnalyses MemorySSAWalkerPrinterPass::run(Function &F,
2409 FunctionAnalysisManager &AM) {
2410 auto &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2411 OS << "MemorySSA (walker) for function: " << F.getName() << "\n";
2412 MemorySSAWalkerAnnotatedWriter Writer(&MSSA);
2413 F.print(OS, AAW: &Writer);
2414
2415 return PreservedAnalyses::all();
2416}
2417
2418PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
2419 FunctionAnalysisManager &AM) {
2420 AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA().verifyMemorySSA();
2421
2422 return PreservedAnalyses::all();
2423}
2424
2425char MemorySSAWrapperPass::ID = 0;
2426
2427MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {}
2428
2429void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
2430
2431void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
2432 AU.setPreservesAll();
2433 AU.addRequiredTransitive<DominatorTreeWrapperPass>();
2434 AU.addRequiredTransitive<AAResultsWrapperPass>();
2435}
2436
2437bool MemorySSAWrapperPass::runOnFunction(Function &F) {
2438 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2439 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2440 MSSA.reset(p: new MemorySSA(F, &AA, &DT));
2441 return false;
2442}
2443
2444void MemorySSAWrapperPass::verifyAnalysis() const {
2445 if (VerifyMemorySSA)
2446 MSSA->verifyMemorySSA();
2447}
2448
2449void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
2450 MSSA->print(OS);
2451}
2452
2453MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
2454
2455/// Walk the use-def chains starting at \p StartingAccess and find
2456/// the MemoryAccess that actually clobbers Loc.
2457///
2458/// \returns our clobbering memory access
2459MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
2460 MemoryAccess *StartingAccess, const MemoryLocation &Loc,
2461 BatchAAResults &BAA, unsigned &UpwardWalkLimit) {
2462 assert(!isa<MemoryUse>(StartingAccess) && "Use cannot be defining access");
2463
2464 // If location is undefined, conservatively return starting access.
2465 if (Loc.Ptr == nullptr)
2466 return StartingAccess;
2467
2468 Instruction *I = nullptr;
2469 if (auto *StartingUseOrDef = dyn_cast<MemoryUseOrDef>(Val: StartingAccess)) {
2470 if (MSSA->isLiveOnEntryDef(MA: StartingUseOrDef))
2471 return StartingUseOrDef;
2472
2473 I = StartingUseOrDef->getMemoryInst();
2474
2475 // Conservatively, fences are always clobbers, so don't perform the walk if
2476 // we hit a fence.
2477 if (!isa<CallBase>(Val: I) && I->isFenceLike())
2478 return StartingUseOrDef;
2479 }
2480
2481 UpwardsMemoryQuery Q;
2482 Q.OriginalAccess = StartingAccess;
2483 Q.StartingLoc = Loc;
2484 Q.Inst = nullptr;
2485 Q.IsCall = false;
2486
2487 // Unlike the other function, do not walk to the def of a def, because we are
2488 // handed something we already believe is the clobbering access.
2489 // We never set SkipSelf to true in Q in this method.
2490 MemoryAccess *Clobber =
2491 Walker.findClobber(BAA, Start: StartingAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2492 LLVM_DEBUG({
2493 dbgs() << "Clobber starting at access " << *StartingAccess << "\n";
2494 if (I)
2495 dbgs() << " for instruction " << *I << "\n";
2496 dbgs() << " is " << *Clobber << "\n";
2497 });
2498 return Clobber;
2499}
2500
2501static const Instruction *
2502getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) {
2503 if (!I.hasMetadata(KindID: LLVMContext::MD_invariant_group) || I.isVolatile())
2504 return nullptr;
2505
2506 // We consider bitcasts and zero GEPs to be the same pointer value. Start by
2507 // stripping bitcasts and zero GEPs, then we will recursively look at loads
2508 // and stores through bitcasts and zero GEPs.
2509 Value *PointerOperand = getLoadStorePointerOperand(V: &I)->stripPointerCasts();
2510
2511 // It's not safe to walk the use list of a global value because function
2512 // passes aren't allowed to look outside their functions.
2513 // FIXME: this could be fixed by filtering instructions from outside of
2514 // current function.
2515 if (isa<Constant>(Val: PointerOperand))
2516 return nullptr;
2517
2518 const Instruction *MostDominatingInstruction = &I;
2519
2520 for (const User *Us : PointerOperand->users()) {
2521 auto *U = dyn_cast<Instruction>(Val: Us);
2522 if (!U || U == &I || !DT.dominates(Def: U, User: MostDominatingInstruction))
2523 continue;
2524
2525 // If we hit a load/store with an invariant.group metadata and the same
2526 // pointer operand, we can assume that value pointed to by the pointer
2527 // operand didn't change.
2528 if (U->hasMetadata(KindID: LLVMContext::MD_invariant_group) &&
2529 getLoadStorePointerOperand(V: U) == PointerOperand && !U->isVolatile()) {
2530 MostDominatingInstruction = U;
2531 }
2532 }
2533
2534 return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction;
2535}
2536
2537MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
2538 MemoryAccess *MA, BatchAAResults &BAA, unsigned &UpwardWalkLimit,
2539 bool SkipSelf, bool UseInvariantGroup) {
2540 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(Val: MA);
2541 // If this is a MemoryPhi, we can't do anything.
2542 if (!StartingAccess)
2543 return MA;
2544
2545 if (UseInvariantGroup) {
2546 if (auto *I = getInvariantGroupClobberingInstruction(
2547 I&: *StartingAccess->getMemoryInst(), DT&: MSSA->getDomTree())) {
2548 assert(isa<LoadInst>(I) || isa<StoreInst>(I));
2549
2550 auto *ClobberMA = MSSA->getMemoryAccess(I);
2551 assert(ClobberMA);
2552 if (isa<MemoryUse>(Val: ClobberMA))
2553 return ClobberMA->getDefiningAccess();
2554 return ClobberMA;
2555 }
2556 }
2557
2558 bool IsOptimized = false;
2559
2560 // If this is an already optimized use or def, return the optimized result.
2561 // Note: Currently, we store the optimized def result in a separate field,
2562 // since we can't use the defining access.
2563 if (StartingAccess->isOptimized()) {
2564 if (!SkipSelf || !isa<MemoryDef>(Val: StartingAccess))
2565 return StartingAccess->getOptimized();
2566 IsOptimized = true;
2567 }
2568
2569 const Instruction *I = StartingAccess->getMemoryInst();
2570 // We can't sanely do anything with a fence, since they conservatively clobber
2571 // all memory, and have no locations to get pointers from to try to
2572 // disambiguate.
2573 if (!isa<CallBase>(Val: I) && I->isFenceLike())
2574 return StartingAccess;
2575
2576 UpwardsMemoryQuery Q(I, StartingAccess);
2577
2578 if (isUseTriviallyOptimizableToLiveOnEntry(AA&: BAA, I)) {
2579 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
2580 StartingAccess->setOptimized(LiveOnEntry);
2581 return LiveOnEntry;
2582 }
2583
2584 MemoryAccess *OptimizedAccess;
2585 if (!IsOptimized) {
2586 // Start with the thing we already think clobbers this location
2587 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
2588
2589 // At this point, DefiningAccess may be the live on entry def.
2590 // If it is, we will not get a better result.
2591 if (MSSA->isLiveOnEntryDef(MA: DefiningAccess)) {
2592 StartingAccess->setOptimized(DefiningAccess);
2593 return DefiningAccess;
2594 }
2595
2596 OptimizedAccess =
2597 Walker.findClobber(BAA, Start: DefiningAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2598 StartingAccess->setOptimized(OptimizedAccess);
2599 } else
2600 OptimizedAccess = StartingAccess->getOptimized();
2601
2602 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
2603 LLVM_DEBUG(dbgs() << *StartingAccess << "\n");
2604 LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ");
2605 LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n");
2606
2607 MemoryAccess *Result;
2608 if (SkipSelf && isa<MemoryPhi>(Val: OptimizedAccess) &&
2609 isa<MemoryDef>(Val: StartingAccess) && UpwardWalkLimit) {
2610 assert(isa<MemoryDef>(Q.OriginalAccess));
2611 Q.SkipSelfAccess = true;
2612 Result = Walker.findClobber(BAA, Start: OptimizedAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2613 } else
2614 Result = OptimizedAccess;
2615
2616 LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf);
2617 LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n");
2618
2619 return Result;
2620}
2621
2622MemoryAccess *
2623DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA,
2624 BatchAAResults &) {
2625 if (auto *Use = dyn_cast<MemoryUseOrDef>(Val: MA))
2626 return Use->getDefiningAccess();
2627 return MA;
2628}
2629
2630MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
2631 MemoryAccess *StartingAccess, const MemoryLocation &, BatchAAResults &) {
2632 if (auto *Use = dyn_cast<MemoryUseOrDef>(Val: StartingAccess))
2633 return Use->getDefiningAccess();
2634 return StartingAccess;
2635}
2636
2637void MemoryPhi::deleteMe(DerivedUser *Self) {
2638 delete static_cast<MemoryPhi *>(Self);
2639}
2640
2641void MemoryDef::deleteMe(DerivedUser *Self) {
2642 delete static_cast<MemoryDef *>(Self);
2643}
2644
2645void MemoryUse::deleteMe(DerivedUser *Self) {
2646 delete static_cast<MemoryUse *>(Self);
2647}
2648
2649bool upward_defs_iterator::IsGuaranteedLoopInvariant(const Value *Ptr) const {
2650 auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) {
2651 Ptr = Ptr->stripPointerCasts();
2652 if (!isa<Instruction>(Val: Ptr))
2653 return true;
2654 return isa<AllocaInst>(Val: Ptr);
2655 };
2656
2657 Ptr = Ptr->stripPointerCasts();
2658 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
2659 if (I->getParent()->isEntryBlock())
2660 return true;
2661 }
2662 if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr)) {
2663 return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
2664 GEP->hasAllConstantIndices();
2665 }
2666 return IsGuaranteedLoopInvariantBase(Ptr);
2667}
2668