1//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemorySSA class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/MemorySSA.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseMapInfo.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/DepthFirstIterator.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/iterator.h"
24#include "llvm/ADT/iterator_range.h"
25#include "llvm/Analysis/AliasAnalysis.h"
26#include "llvm/Analysis/CFGPrinter.h"
27#include "llvm/Analysis/IteratedDominanceFrontier.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryLocation.h"
30#include "llvm/Config/llvm-config.h"
31#include "llvm/IR/AssemblyAnnotationWriter.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Dominators.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Use.h"
42#include "llvm/InitializePasses.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/AtomicOrdering.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/Compiler.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/FormattedStream.h"
51#include "llvm/Support/GraphWriter.h"
52#include "llvm/Support/raw_ostream.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <memory>
57#include <utility>
58
59using namespace llvm;
60
61#define DEBUG_TYPE "memoryssa"
62
63static cl::opt<std::string>
64 DotCFGMSSA("dot-cfg-mssa",
65 cl::value_desc("file name for generated dot file"),
66 cl::desc("file name for generated dot file"), cl::init(Val: ""));
67
68INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
69 true)
70INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
71INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
72INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
73 true)
74
75static cl::opt<unsigned> MaxCheckLimit(
76 "memssa-check-limit", cl::Hidden, cl::init(Val: 100),
77 cl::desc("The maximum number of stores/phis MemorySSA"
78 "will consider trying to walk past (default = 100)"));
79
80// Always verify MemorySSA if expensive checking is enabled.
81#ifdef EXPENSIVE_CHECKS
82bool llvm::VerifyMemorySSA = true;
83#else
84bool llvm::VerifyMemorySSA = false;
85#endif
86
87static cl::opt<bool, true>
88 VerifyMemorySSAX("verify-memoryssa", cl::location(L&: VerifyMemorySSA),
89 cl::Hidden, cl::desc("Enable verification of MemorySSA."));
90
91const static char LiveOnEntryStr[] = "liveOnEntry";
92
93namespace {
94
95/// An assembly annotator class to print Memory SSA information in
96/// comments.
97class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
98 const MemorySSA *MSSA;
99
100public:
101 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
102
103 void emitBasicBlockStartAnnot(const BasicBlock *BB,
104 formatted_raw_ostream &OS) override {
105 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
106 OS << "; " << *MA << "\n";
107 }
108
109 void emitInstructionAnnot(const Instruction *I,
110 formatted_raw_ostream &OS) override {
111 if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
112 OS << "; " << *MA << "\n";
113 }
114};
115
116/// An assembly annotator class to print Memory SSA information in
117/// comments.
118class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter {
119 MemorySSA *MSSA;
120 MemorySSAWalker *Walker;
121 BatchAAResults BAA;
122
123public:
124 MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
125 : MSSA(M), Walker(M->getWalker()), BAA(M->getAA()) {}
126
127 void emitBasicBlockStartAnnot(const BasicBlock *BB,
128 formatted_raw_ostream &OS) override {
129 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
130 OS << "; " << *MA << "\n";
131 }
132
133 void emitInstructionAnnot(const Instruction *I,
134 formatted_raw_ostream &OS) override {
135 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
136 MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA, AA&: BAA);
137 OS << "; " << *MA;
138 if (Clobber) {
139 OS << " - clobbered by ";
140 if (MSSA->isLiveOnEntryDef(MA: Clobber))
141 OS << LiveOnEntryStr;
142 else
143 OS << *Clobber;
144 }
145 OS << "\n";
146 }
147 }
148};
149
150} // namespace
151
152namespace {
153
154/// Our current alias analysis API differentiates heavily between calls and
155/// non-calls, and functions called on one usually assert on the other.
156/// This class encapsulates the distinction to simplify other code that wants
157/// "Memory affecting instructions and related data" to use as a key.
158/// For example, this class is used as a densemap key in the use optimizer.
159class MemoryLocOrCall {
160public:
161 bool IsCall = false;
162
163 MemoryLocOrCall(MemoryUseOrDef *MUD)
164 : MemoryLocOrCall(MUD->getMemoryInst()) {}
165 MemoryLocOrCall(const MemoryUseOrDef *MUD)
166 : MemoryLocOrCall(MUD->getMemoryInst()) {}
167
168 MemoryLocOrCall(Instruction *Inst) {
169 if (auto *C = dyn_cast<CallBase>(Val: Inst)) {
170 IsCall = true;
171 Call = C;
172 } else {
173 IsCall = false;
174 // There is no such thing as a memorylocation for a fence inst, and it is
175 // unique in that regard.
176 if (!isa<FenceInst>(Val: Inst))
177 Loc = MemoryLocation::get(Inst);
178 }
179 }
180
181 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
182
183 const CallBase *getCall() const {
184 assert(IsCall);
185 return Call;
186 }
187
188 MemoryLocation getLoc() const {
189 assert(!IsCall);
190 return Loc;
191 }
192
193 bool operator==(const MemoryLocOrCall &Other) const {
194 if (IsCall != Other.IsCall)
195 return false;
196
197 if (!IsCall)
198 return Loc == Other.Loc;
199
200 if (Call->getCalledOperand() != Other.Call->getCalledOperand())
201 return false;
202
203 return Call->arg_size() == Other.Call->arg_size() &&
204 std::equal(first1: Call->arg_begin(), last1: Call->arg_end(),
205 first2: Other.Call->arg_begin());
206 }
207
208private:
209 union {
210 const CallBase *Call;
211 MemoryLocation Loc;
212 };
213};
214
215} // end anonymous namespace
216
217namespace llvm {
218
219template <> struct DenseMapInfo<MemoryLocOrCall> {
220 static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
221 if (!MLOC.IsCall)
222 return hash_combine(
223 args: MLOC.IsCall,
224 args: DenseMapInfo<MemoryLocation>::getHashValue(Val: MLOC.getLoc()));
225
226 hash_code hash =
227 hash_combine(args: MLOC.IsCall, args: DenseMapInfo<const Value *>::getHashValue(
228 PtrVal: MLOC.getCall()->getCalledOperand()));
229
230 for (const Value *Arg : MLOC.getCall()->args())
231 hash = hash_combine(args: hash, args: DenseMapInfo<const Value *>::getHashValue(PtrVal: Arg));
232 return hash;
233 }
234
235 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
236 return LHS == RHS;
237 }
238};
239
240} // end namespace llvm
241
242/// This does one-way checks to see if Use could theoretically be hoisted above
243/// MayClobber. This will not check the other way around.
244///
245/// This assumes that, for the purposes of MemorySSA, Use comes directly after
246/// MayClobber, with no potentially clobbering operations in between them.
247/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
248static bool areLoadsReorderable(const LoadInst *Use,
249 const LoadInst *MayClobber) {
250 bool VolatileUse = Use->isVolatile();
251 bool VolatileClobber = MayClobber->isVolatile();
252 // Volatile operations may never be reordered with other volatile operations.
253 if (VolatileUse && VolatileClobber)
254 return false;
255 // Otherwise, volatile doesn't matter here. From the language reference:
256 // 'optimizers may change the order of volatile operations relative to
257 // non-volatile operations.'"
258
259 // If a load is seq_cst, it cannot be moved above other loads. If its ordering
260 // is weaker, it can be moved above other loads. We just need to be sure that
261 // MayClobber isn't an acquire load, because loads can't be moved above
262 // acquire loads.
263 //
264 // Note that this explicitly *does* allow the free reordering of monotonic (or
265 // weaker) loads of the same address.
266 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
267 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(AO: MayClobber->getOrdering(),
268 Other: AtomicOrdering::Acquire);
269 return !(SeqCstUse || MayClobberIsAcquire);
270}
271
272template <typename AliasAnalysisType>
273static bool
274instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
275 const Instruction *UseInst, AliasAnalysisType &AA) {
276 Instruction *DefInst = MD->getMemoryInst();
277 assert(DefInst && "Defining instruction not actually an instruction");
278
279 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: DefInst)) {
280 // These intrinsics will show up as affecting memory, but they are just
281 // markers, mostly.
282 //
283 // FIXME: We probably don't actually want MemorySSA to model these at all
284 // (including creating MemoryAccesses for them): we just end up inventing
285 // clobbers where they don't really exist at all. Please see D43269 for
286 // context.
287 switch (II->getIntrinsicID()) {
288 case Intrinsic::allow_runtime_check:
289 case Intrinsic::allow_ubsan_check:
290 case Intrinsic::invariant_start:
291 case Intrinsic::invariant_end:
292 case Intrinsic::assume:
293 case Intrinsic::experimental_noalias_scope_decl:
294 case Intrinsic::pseudoprobe:
295 return false;
296 case Intrinsic::dbg_declare:
297 case Intrinsic::dbg_label:
298 case Intrinsic::dbg_value:
299 llvm_unreachable("debuginfo shouldn't have associated defs!");
300 default:
301 break;
302 }
303 }
304
305 if (auto *CB = dyn_cast_or_null<CallBase>(Val: UseInst)) {
306 ModRefInfo I = AA.getModRefInfo(DefInst, CB);
307 return isModSet(MRI: I);
308 }
309
310 if (auto *DefLoad = dyn_cast<LoadInst>(Val: DefInst))
311 if (auto *UseLoad = dyn_cast_or_null<LoadInst>(Val: UseInst))
312 return !areLoadsReorderable(Use: UseLoad, MayClobber: DefLoad);
313
314 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc);
315 return isModSet(MRI: I);
316}
317
318template <typename AliasAnalysisType>
319static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
320 const MemoryLocOrCall &UseMLOC,
321 AliasAnalysisType &AA) {
322 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
323 // to exist while MemoryLocOrCall is pushed through places.
324 if (UseMLOC.IsCall)
325 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
326 AA);
327 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
328 AA);
329}
330
331// Return true when MD may alias MU, return false otherwise.
332bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
333 AliasAnalysis &AA) {
334 return instructionClobbersQuery(MD, MU, UseMLOC: MemoryLocOrCall(MU), AA);
335}
336
337namespace {
338
339struct UpwardsMemoryQuery {
340 // True if our original query started off as a call
341 bool IsCall = false;
342 // The pointer location we started the query with. This will be empty if
343 // IsCall is true.
344 MemoryLocation StartingLoc;
345 // This is the instruction we were querying about.
346 const Instruction *Inst = nullptr;
347 // The MemoryAccess we actually got called with, used to test local domination
348 const MemoryAccess *OriginalAccess = nullptr;
349 bool SkipSelfAccess = false;
350
351 UpwardsMemoryQuery() = default;
352
353 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
354 : IsCall(isa<CallBase>(Val: Inst)), Inst(Inst), OriginalAccess(Access) {
355 if (!IsCall)
356 StartingLoc = MemoryLocation::get(Inst);
357 }
358};
359
360} // end anonymous namespace
361
362template <typename AliasAnalysisType>
363static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
364 const Instruction *I) {
365 // If the memory can't be changed, then loads of the memory can't be
366 // clobbered.
367 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
368 return I->hasMetadata(KindID: LLVMContext::MD_invariant_load) ||
369 !isModSet(AA.getModRefInfoMask(MemoryLocation::get(LI)));
370 }
371 return false;
372}
373
374/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
375/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
376///
377/// This is meant to be as simple and self-contained as possible. Because it
378/// uses no cache, etc., it can be relatively expensive.
379///
380/// \param Start The MemoryAccess that we want to walk from.
381/// \param ClobberAt A clobber for Start.
382/// \param StartLoc The MemoryLocation for Start.
383/// \param MSSA The MemorySSA instance that Start and ClobberAt belong to.
384/// \param Query The UpwardsMemoryQuery we used for our search.
385/// \param AA The AliasAnalysis we used for our search.
386/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
387
388[[maybe_unused]] static void
389checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
390 const MemoryLocation &StartLoc, const MemorySSA &MSSA,
391 const UpwardsMemoryQuery &Query, BatchAAResults &AA,
392 bool AllowImpreciseClobber = false) {
393 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
394
395 if (MSSA.isLiveOnEntryDef(MA: Start)) {
396 assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
397 "liveOnEntry must clobber itself");
398 return;
399 }
400
401 bool FoundClobber = false;
402 DenseSet<UpwardDefsElem> VisitedPhis;
403 SmallVector<UpwardDefsElem, 8> Worklist;
404 Worklist.push_back(Elt: {.MA: Start, .Loc: StartLoc, /*MayBeCrossIteration=*/false});
405 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
406 // is found, complain.
407 while (!Worklist.empty()) {
408 auto MAP = Worklist.pop_back_val();
409 // All we care about is that nothing from Start to ClobberAt clobbers Start.
410 // We learn nothing from revisiting nodes.
411 if (!VisitedPhis.insert(V: MAP).second)
412 continue;
413
414 for (auto *MA : def_chain(MA: MAP.MA)) {
415 if (MA == ClobberAt) {
416 if (const auto *MD = dyn_cast<MemoryDef>(Val: MA)) {
417 // instructionClobbersQuery isn't essentially free, so don't use `|=`,
418 // since it won't let us short-circuit.
419 //
420 // Also, note that this can't be hoisted out of the `Worklist` loop,
421 // since MD may only act as a clobber for 1 of N MemoryLocations.
422 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MA: MD);
423 if (!FoundClobber) {
424 BatchAACrossIterationScope _(AA, MAP.MayBeCrossIteration);
425 if (instructionClobbersQuery(MD, UseLoc: MAP.Loc, UseInst: Query.Inst, AA))
426 FoundClobber = true;
427 }
428 }
429 break;
430 }
431
432 // We should never hit liveOnEntry, unless it's the clobber.
433 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
434
435 if (const auto *MD = dyn_cast<MemoryDef>(Val: MA)) {
436 // If Start is a Def, skip self.
437 if (MD == Start)
438 continue;
439
440 BatchAACrossIterationScope _(AA, MAP.MayBeCrossIteration);
441 assert(!instructionClobbersQuery(MD, MAP.Loc, Query.Inst, AA) &&
442 "Found clobber before reaching ClobberAt!");
443 continue;
444 }
445
446 if (const auto *MU = dyn_cast<MemoryUse>(Val: MA)) {
447 (void)MU;
448 assert (MU == Start &&
449 "Can only find use in def chain if Start is a use");
450 continue;
451 }
452
453 assert(isa<MemoryPhi>(MA));
454
455 // Add reachable phi predecessors
456 for (auto ItB = upward_defs_begin(Pair: {.MA: MA, .Loc: MAP.Loc, .MayBeCrossIteration: MAP.MayBeCrossIteration},
457 DT&: MSSA.getDomTree()),
458 ItE = upward_defs_end();
459 ItB != ItE; ++ItB)
460 if (MSSA.getDomTree().isReachableFromEntry(A: ItB.getPhiArgBlock()))
461 Worklist.emplace_back(Args: *ItB);
462 }
463 }
464
465 // If the verify is done following an optimization, it's possible that
466 // ClobberAt was a conservative clobbering, that we can now infer is not a
467 // true clobbering access. Don't fail the verify if that's the case.
468 // We do have accesses that claim they're optimized, but could be optimized
469 // further. Updating all these can be expensive, so allow it for now (FIXME).
470 if (AllowImpreciseClobber)
471 return;
472
473 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
474 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
475 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
476 "ClobberAt never acted as a clobber");
477}
478
479namespace {
480
481/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
482/// in one class.
483class ClobberWalker {
484 /// Save a few bytes by using unsigned instead of size_t.
485 using ListIndex = unsigned;
486
487 /// Represents a span of contiguous MemoryDefs, potentially ending in a
488 /// MemoryPhi.
489 struct DefPath {
490 MemoryLocation Loc;
491 // Note that, because we always walk in reverse, Last will always dominate
492 // First. Also note that First and Last are inclusive.
493 MemoryAccess *First;
494 MemoryAccess *Last;
495 std::optional<ListIndex> Previous;
496 bool MayBeCrossIteration;
497
498 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
499 bool MayBeCrossIteration, std::optional<ListIndex> Previous)
500 : Loc(Loc), First(First), Last(Last), Previous(Previous),
501 MayBeCrossIteration(MayBeCrossIteration) {}
502
503 DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
504 bool MayBeCrossIteration, std::optional<ListIndex> Previous)
505 : DefPath(Loc, Init, Init, MayBeCrossIteration, Previous) {}
506 };
507
508 const MemorySSA &MSSA;
509 DominatorTree &DT;
510 BatchAAResults *AA;
511 UpwardsMemoryQuery *Query;
512 unsigned *UpwardWalkLimit;
513
514 // Phi optimization bookkeeping:
515 // List of DefPath to process during the current phi optimization walk.
516 SmallVector<DefPath, 32> Paths;
517 // List of visited <Access, Location> pairs; we can skip paths already
518 // visited with the same memory location.
519 DenseSet<UpwardDefsElem> VisitedPhis;
520
521 /// Find the nearest def or phi that `From` can legally be optimized to.
522 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
523 assert(From->getNumOperands() && "Phi with no operands?");
524
525 BasicBlock *BB = From->getBlock();
526 MemoryAccess *Result = MSSA.getLiveOnEntryDef();
527 DomTreeNode *Node = DT.getNode(BB);
528 while ((Node = Node->getIDom())) {
529 auto *Defs = MSSA.getBlockDefs(BB: Node->getBlock());
530 if (Defs)
531 return &*Defs->rbegin();
532 }
533 return Result;
534 }
535
536 /// Result of calling walkToPhiOrClobber.
537 struct UpwardsWalkResult {
538 /// The "Result" of the walk. Either a clobber, the last thing we walked, or
539 /// both. Include alias info when clobber found.
540 MemoryAccess *Result;
541 bool IsKnownClobber;
542 };
543
544 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
545 /// This will update Desc.Last as it walks. It will (optionally) also stop at
546 /// StopAt.
547 ///
548 /// This does not test for whether StopAt is a clobber
549 UpwardsWalkResult
550 walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
551 const MemoryAccess *SkipStopAt = nullptr) const {
552 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
553 assert(UpwardWalkLimit && "Need a valid walk limit");
554 bool LimitAlreadyReached = false;
555 // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set
556 // it to 1. This will not do any alias() calls. It either returns in the
557 // first iteration in the loop below, or is set back to 0 if all def chains
558 // are free of MemoryDefs.
559 if (!*UpwardWalkLimit) {
560 *UpwardWalkLimit = 1;
561 LimitAlreadyReached = true;
562 }
563
564 for (MemoryAccess *Current : def_chain(MA: Desc.Last)) {
565 Desc.Last = Current;
566 if (Current == StopAt || Current == SkipStopAt)
567 return {.Result: Current, .IsKnownClobber: false};
568
569 if (auto *MD = dyn_cast<MemoryDef>(Val: Current)) {
570 if (MSSA.isLiveOnEntryDef(MA: MD))
571 return {.Result: MD, .IsKnownClobber: true};
572
573 if (!--*UpwardWalkLimit)
574 return {.Result: Current, .IsKnownClobber: true};
575
576 BatchAACrossIterationScope _(*AA, Desc.MayBeCrossIteration);
577 if (instructionClobbersQuery(MD, UseLoc: Desc.Loc, UseInst: Query->Inst, AA&: *AA))
578 return {.Result: MD, .IsKnownClobber: true};
579 }
580 }
581
582 if (LimitAlreadyReached)
583 *UpwardWalkLimit = 0;
584
585 assert(isa<MemoryPhi>(Desc.Last) &&
586 "Ended at a non-clobber that's not a phi?");
587 return {.Result: Desc.Last, .IsKnownClobber: false};
588 }
589
590 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
591 ListIndex PriorNode, bool MayBeCrossIteration) {
592 auto UpwardDefsBegin =
593 upward_defs_begin(Pair: {.MA: Phi, .Loc: Paths[PriorNode].Loc, .MayBeCrossIteration: MayBeCrossIteration}, DT);
594 auto UpwardDefs = make_range(x: UpwardDefsBegin, y: upward_defs_end());
595 for (const UpwardDefsElem &E : UpwardDefs) {
596 PausedSearches.push_back(Elt: Paths.size());
597 Paths.emplace_back(Args: E.Loc, Args: E.MA, Args: E.MayBeCrossIteration, Args&: PriorNode);
598 }
599 }
600
601 /// Represents a search that terminated after finding a clobber. This clobber
602 /// may or may not be present in the path of defs from LastNode..SearchStart,
603 /// since it may have been retrieved from cache.
604 struct TerminatedPath {
605 MemoryAccess *Clobber;
606 ListIndex LastNode;
607 };
608
609 /// Get an access that keeps us from optimizing to the given phi.
610 ///
611 /// PausedSearches is an array of indices into the Paths array. Its incoming
612 /// value is the indices of searches that stopped at the last phi optimization
613 /// target. It's left in an unspecified state.
614 ///
615 /// If this returns std::nullopt, NewPaused is a vector of searches that
616 /// terminated at StopWhere. Otherwise, NewPaused is left in an unspecified
617 /// state.
618 std::optional<TerminatedPath>
619 getBlockingAccess(const MemoryAccess *StopWhere,
620 SmallVectorImpl<ListIndex> &PausedSearches,
621 SmallVectorImpl<ListIndex> &NewPaused,
622 SmallVectorImpl<TerminatedPath> &Terminated) {
623 assert(!PausedSearches.empty() && "No searches to continue?");
624
625 // BFS vs DFS really doesn't make a difference here, so just do a DFS with
626 // PausedSearches as our stack.
627 while (!PausedSearches.empty()) {
628 ListIndex PathIndex = PausedSearches.pop_back_val();
629 DefPath &Node = Paths[PathIndex];
630
631 // If we've already visited this path with this MemoryLocation, we don't
632 // need to do so again.
633 //
634 // NOTE: That we just drop these paths on the ground makes caching
635 // behavior sporadic. e.g. given a diamond:
636 // A
637 // B C
638 // D
639 //
640 // ...If we walk D, B, A, C, we'll only cache the result of phi
641 // optimization for A, B, and D; C will be skipped because it dies here.
642 // This arguably isn't the worst thing ever, since:
643 // - We generally query things in a top-down order, so if we got below D
644 // without needing cache entries for {C, MemLoc}, then chances are
645 // that those cache entries would end up ultimately unused.
646 // - We still cache things for A, so C only needs to walk up a bit.
647 // If this behavior becomes problematic, we can fix without a ton of extra
648 // work.
649 if (!VisitedPhis.insert(V: {.MA: Node.Last, .Loc: Node.Loc, .MayBeCrossIteration: Node.MayBeCrossIteration})
650 .second)
651 continue;
652
653 const MemoryAccess *SkipStopWhere = nullptr;
654 if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
655 assert(isa<MemoryDef>(Query->OriginalAccess));
656 SkipStopWhere = Query->OriginalAccess;
657 }
658
659 UpwardsWalkResult Res = walkToPhiOrClobber(Desc&: Node,
660 /*StopAt=*/StopWhere,
661 /*SkipStopAt=*/SkipStopWhere);
662 if (Res.IsKnownClobber) {
663 assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
664
665 // If this wasn't a cache hit, we hit a clobber when walking. That's a
666 // failure.
667 TerminatedPath Term{.Clobber: Res.Result, .LastNode: PathIndex};
668 if (!MSSA.dominates(A: Res.Result, B: StopWhere))
669 return Term;
670
671 // Otherwise, it's a valid thing to potentially optimize to.
672 Terminated.push_back(Elt: Term);
673 continue;
674 }
675
676 if (Res.Result == StopWhere || Res.Result == SkipStopWhere) {
677 // We've hit our target. Save this path off for if we want to continue
678 // walking. If we are in the mode of skipping the OriginalAccess, and
679 // we've reached back to the OriginalAccess, do not save path, we've
680 // just looped back to self.
681 if (Res.Result != SkipStopWhere)
682 NewPaused.push_back(Elt: PathIndex);
683 continue;
684 }
685
686 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
687 addSearches(Phi: cast<MemoryPhi>(Val: Res.Result), PausedSearches, PriorNode: PathIndex,
688 MayBeCrossIteration: Node.MayBeCrossIteration);
689 }
690
691 return std::nullopt;
692 }
693
694 template <typename T, typename Walker>
695 struct generic_def_path_iterator
696 : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
697 std::forward_iterator_tag, T *> {
698 generic_def_path_iterator() = default;
699 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
700
701 T &operator*() const { return curNode(); }
702
703 generic_def_path_iterator &operator++() {
704 N = curNode().Previous;
705 return *this;
706 }
707
708 bool operator==(const generic_def_path_iterator &O) const {
709 if (N.has_value() != O.N.has_value())
710 return false;
711 return !N || *N == *O.N;
712 }
713
714 private:
715 T &curNode() const { return W->Paths[*N]; }
716
717 Walker *W = nullptr;
718 std::optional<ListIndex> N;
719 };
720
721 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
722 using const_def_path_iterator =
723 generic_def_path_iterator<const DefPath, const ClobberWalker>;
724
725 iterator_range<def_path_iterator> def_path(ListIndex From) {
726 return make_range(x: def_path_iterator(this, From), y: def_path_iterator());
727 }
728
729 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
730 return make_range(x: const_def_path_iterator(this, From),
731 y: const_def_path_iterator());
732 }
733
734 struct OptznResult {
735 /// The path that contains our result.
736 TerminatedPath PrimaryClobber;
737 /// The paths that we can legally cache back from, but that aren't
738 /// necessarily the result of the Phi optimization.
739 SmallVector<TerminatedPath, 4> OtherClobbers;
740 };
741
742 ListIndex defPathIndex(const DefPath &N) const {
743 // The assert looks nicer if we don't need to do &N
744 const DefPath *NP = &N;
745 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
746 "Out of bounds DefPath!");
747 return NP - &Paths.front();
748 }
749
750 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
751 /// that act as legal clobbers. Note that this won't return *all* clobbers.
752 ///
753 /// Phi optimization algorithm tl;dr:
754 /// - Find the earliest def/phi, A, we can optimize to
755 /// - Find if all paths from the starting memory access ultimately reach A
756 /// - If not, optimization isn't possible.
757 /// - Otherwise, walk from A to another clobber or phi, A'.
758 /// - If A' is a def, we're done.
759 /// - If A' is a phi, try to optimize it.
760 ///
761 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
762 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
763 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
764 const MemoryLocation &Loc) {
765 assert(Paths.empty() && VisitedPhis.empty() &&
766 "Reset the optimization state.");
767
768 Paths.emplace_back(Args: Loc, Args&: Start, Args&: Phi, /*MayBeCrossIteration=*/Args: false,
769 Args: std::nullopt);
770 // Stores how many "valid" optimization nodes we had prior to calling
771 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
772 auto PriorPathsSize = Paths.size();
773
774 SmallVector<ListIndex, 16> PausedSearches;
775 SmallVector<ListIndex, 8> NewPaused;
776 SmallVector<TerminatedPath, 4> TerminatedPaths;
777
778 addSearches(Phi, PausedSearches, PriorNode: 0, /*MayBeCrossIteration=*/false);
779
780 // Moves the TerminatedPath with the "most dominated" Clobber to the end of
781 // Paths.
782 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
783 assert(!Paths.empty() && "Need a path to move");
784 auto Dom = Paths.begin();
785 for (auto I = std::next(x: Dom), E = Paths.end(); I != E; ++I)
786 if (!MSSA.dominates(A: I->Clobber, B: Dom->Clobber))
787 Dom = I;
788 auto Last = Paths.end() - 1;
789 if (Last != Dom)
790 std::iter_swap(a: Last, b: Dom);
791 };
792
793 MemoryPhi *Current = Phi;
794 while (true) {
795 assert(!MSSA.isLiveOnEntryDef(Current) &&
796 "liveOnEntry wasn't treated as a clobber?");
797
798 const auto *Target = getWalkTarget(From: Current);
799 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
800 // optimization for the prior phi.
801 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
802 return MSSA.dominates(P.Clobber, Target);
803 }));
804
805 // FIXME: This is broken, because the Blocker may be reported to be
806 // liveOnEntry, and we'll happily wait for that to disappear (read: never)
807 // For the moment, this is fine, since we do nothing with blocker info.
808 if (std::optional<TerminatedPath> Blocker = getBlockingAccess(
809 StopWhere: Target, PausedSearches, NewPaused, Terminated&: TerminatedPaths)) {
810
811 // Find the node we started at. We can't search based on N->Last, since
812 // we may have gone around a loop with a different MemoryLocation.
813 auto Iter = find_if(Range: def_path(From: Blocker->LastNode), P: [&](const DefPath &N) {
814 return defPathIndex(N) < PriorPathsSize;
815 });
816 assert(Iter != def_path_iterator());
817
818 DefPath &CurNode = *Iter;
819 assert(CurNode.Last == Current);
820
821 // Two things:
822 // A. We can't reliably cache all of NewPaused back. Consider a case
823 // where we have two paths in NewPaused; one of which can't optimize
824 // above this phi, whereas the other can. If we cache the second path
825 // back, we'll end up with suboptimal cache entries. We can handle
826 // cases like this a bit better when we either try to find all
827 // clobbers that block phi optimization, or when our cache starts
828 // supporting unfinished searches.
829 // B. We can't reliably cache TerminatedPaths back here without doing
830 // extra checks; consider a case like:
831 // T
832 // / \
833 // D C
834 // \ /
835 // S
836 // Where T is our target, C is a node with a clobber on it, D is a
837 // diamond (with a clobber *only* on the left or right node, N), and
838 // S is our start. Say we walk to D, through the node opposite N
839 // (read: ignoring the clobber), and see a cache entry in the top
840 // node of D. That cache entry gets put into TerminatedPaths. We then
841 // walk up to C (N is later in our worklist), find the clobber, and
842 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache
843 // the bottom part of D to the cached clobber, ignoring the clobber
844 // in N. Again, this problem goes away if we start tracking all
845 // blockers for a given phi optimization.
846 TerminatedPath Result{.Clobber: CurNode.Last, .LastNode: defPathIndex(N: CurNode)};
847 return {.PrimaryClobber: Result, .OtherClobbers: {}};
848 }
849
850 // If there's nothing left to search, then all paths led to valid clobbers
851 // that we got from our cache; pick the nearest to the start, and allow
852 // the rest to be cached back.
853 if (NewPaused.empty()) {
854 MoveDominatedPathToEnd(TerminatedPaths);
855 TerminatedPath Result = TerminatedPaths.pop_back_val();
856 return {.PrimaryClobber: Result, .OtherClobbers: std::move(TerminatedPaths)};
857 }
858
859 MemoryAccess *DefChainEnd = nullptr;
860 SmallVector<TerminatedPath, 4> Clobbers;
861 for (ListIndex Paused : NewPaused) {
862 UpwardsWalkResult WR = walkToPhiOrClobber(Desc&: Paths[Paused]);
863 if (WR.IsKnownClobber)
864 Clobbers.push_back(Elt: {.Clobber: WR.Result, .LastNode: Paused});
865 else
866 // Micro-opt: If we hit the end of the chain, save it.
867 DefChainEnd = WR.Result;
868 }
869
870 if (!TerminatedPaths.empty()) {
871 // If we couldn't find the dominating phi/liveOnEntry in the above loop,
872 // do it now.
873 if (!DefChainEnd)
874 for (auto *MA : def_chain(MA: const_cast<MemoryAccess *>(Target)))
875 DefChainEnd = MA;
876 assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry");
877
878 // If any of the terminated paths don't dominate the phi we'll try to
879 // optimize, we need to figure out what they are and quit.
880 const BasicBlock *ChainBB = DefChainEnd->getBlock();
881 for (const TerminatedPath &TP : TerminatedPaths) {
882 // Because we know that DefChainEnd is as "high" as we can go, we
883 // don't need local dominance checks; BB dominance is sufficient.
884 if (DT.dominates(A: ChainBB, B: TP.Clobber->getBlock()))
885 Clobbers.push_back(Elt: TP);
886 }
887 }
888
889 // If we have clobbers in the def chain, find the one closest to Current
890 // and quit.
891 if (!Clobbers.empty()) {
892 MoveDominatedPathToEnd(Clobbers);
893 TerminatedPath Result = Clobbers.pop_back_val();
894 return {.PrimaryClobber: Result, .OtherClobbers: std::move(Clobbers)};
895 }
896
897 assert(all_of(NewPaused,
898 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
899
900 // Because liveOnEntry is a clobber, this must be a phi.
901 auto *DefChainPhi = cast<MemoryPhi>(Val: DefChainEnd);
902
903 PriorPathsSize = Paths.size();
904 PausedSearches.clear();
905 for (ListIndex I : NewPaused)
906 addSearches(Phi: DefChainPhi, PausedSearches, PriorNode: I,
907 MayBeCrossIteration: Paths[I].MayBeCrossIteration);
908 NewPaused.clear();
909
910 Current = DefChainPhi;
911 }
912 }
913
914 void verifyOptResult(const OptznResult &R) const {
915 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
916 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
917 }));
918 }
919
920 void resetPhiOptznState() {
921 Paths.clear();
922 VisitedPhis.clear();
923 }
924
925public:
926 ClobberWalker(const MemorySSA &MSSA, DominatorTree &DT)
927 : MSSA(MSSA), DT(DT) {}
928
929 /// Finds the nearest clobber for the given query, optimizing phis if
930 /// possible.
931 MemoryAccess *findClobber(BatchAAResults &BAA, MemoryAccess *Start,
932 UpwardsMemoryQuery &Q, unsigned &UpWalkLimit) {
933 AA = &BAA;
934 Query = &Q;
935 UpwardWalkLimit = &UpWalkLimit;
936 // Starting limit must be > 0.
937 if (!UpWalkLimit)
938 UpWalkLimit++;
939
940 MemoryAccess *Current = Start;
941 // This walker pretends uses don't exist. If we're handed one, silently grab
942 // its def. (This has the nice side-effect of ensuring we never cache uses)
943 if (auto *MU = dyn_cast<MemoryUse>(Val: Start))
944 Current = MU->getDefiningAccess();
945
946 DefPath FirstDesc(Q.StartingLoc, Current, Current,
947 /*MayBeCrossIteration=*/false, std::nullopt);
948 // Fast path for the overly-common case (no crazy phi optimization
949 // necessary)
950 UpwardsWalkResult WalkResult = walkToPhiOrClobber(Desc&: FirstDesc);
951 MemoryAccess *Result;
952 if (WalkResult.IsKnownClobber) {
953 Result = WalkResult.Result;
954 } else {
955 OptznResult OptRes = tryOptimizePhi(Phi: cast<MemoryPhi>(Val: FirstDesc.Last),
956 Start: Current, Loc: Q.StartingLoc);
957 verifyOptResult(R: OptRes);
958 resetPhiOptznState();
959 Result = OptRes.PrimaryClobber.Clobber;
960 }
961
962#ifdef EXPENSIVE_CHECKS
963 if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0)
964 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, BAA);
965#endif
966 return Result;
967 }
968};
969
970struct RenamePassData {
971 DomTreeNode *DTN;
972 DomTreeNode::const_iterator ChildIt;
973 MemoryAccess *IncomingVal;
974
975 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
976 MemoryAccess *M)
977 : DTN(D), ChildIt(It), IncomingVal(M) {}
978
979 void swap(RenamePassData &RHS) {
980 std::swap(a&: DTN, b&: RHS.DTN);
981 std::swap(a&: ChildIt, b&: RHS.ChildIt);
982 std::swap(a&: IncomingVal, b&: RHS.IncomingVal);
983 }
984};
985
986} // end anonymous namespace
987
988namespace llvm {
989
990class MemorySSA::ClobberWalkerBase {
991 ClobberWalker Walker;
992 MemorySSA *MSSA;
993
994public:
995 ClobberWalkerBase(MemorySSA *M, DominatorTree *D) : Walker(*M, *D), MSSA(M) {}
996
997 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
998 const MemoryLocation &,
999 BatchAAResults &, unsigned &);
1000 // Third argument (bool), defines whether the clobber search should skip the
1001 // original queried access. If true, there will be a follow-up query searching
1002 // for a clobber access past "self". Note that the Optimized access is not
1003 // updated if a new clobber is found by this SkipSelf search. If this
1004 // additional query becomes heavily used we may decide to cache the result.
1005 // Walker instantiations will decide how to set the SkipSelf bool.
1006 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, BatchAAResults &,
1007 unsigned &, bool,
1008 bool UseInvariantGroup = true);
1009};
1010
1011/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
1012/// longer does caching on its own, but the name has been retained for the
1013/// moment.
1014class MemorySSA::CachingWalker final : public MemorySSAWalker {
1015 ClobberWalkerBase *Walker;
1016
1017public:
1018 CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
1019 : MemorySSAWalker(M), Walker(W) {}
1020 ~CachingWalker() override = default;
1021
1022 using MemorySSAWalker::getClobberingMemoryAccess;
1023
1024 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA,
1025 unsigned &UWL) {
1026 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false);
1027 }
1028 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1029 const MemoryLocation &Loc,
1030 BatchAAResults &BAA, unsigned &UWL) {
1031 return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL);
1032 }
1033 // This method is not accessible outside of this file.
1034 MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(
1035 MemoryAccess *MA, BatchAAResults &BAA, unsigned &UWL) {
1036 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false, UseInvariantGroup: false);
1037 }
1038
1039 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1040 BatchAAResults &BAA) override {
1041 unsigned UpwardWalkLimit = MaxCheckLimit;
1042 return getClobberingMemoryAccess(MA, BAA, UWL&: UpwardWalkLimit);
1043 }
1044 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1045 const MemoryLocation &Loc,
1046 BatchAAResults &BAA) override {
1047 unsigned UpwardWalkLimit = MaxCheckLimit;
1048 return getClobberingMemoryAccess(MA, Loc, BAA, UWL&: UpwardWalkLimit);
1049 }
1050
1051 void invalidateInfo(MemoryAccess *MA) override {
1052 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1053 MUD->resetOptimized();
1054 }
1055};
1056
1057class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
1058 ClobberWalkerBase *Walker;
1059
1060public:
1061 SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
1062 : MemorySSAWalker(M), Walker(W) {}
1063 ~SkipSelfWalker() override = default;
1064
1065 using MemorySSAWalker::getClobberingMemoryAccess;
1066
1067 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA,
1068 unsigned &UWL) {
1069 return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, true);
1070 }
1071 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1072 const MemoryLocation &Loc,
1073 BatchAAResults &BAA, unsigned &UWL) {
1074 return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL);
1075 }
1076
1077 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1078 BatchAAResults &BAA) override {
1079 unsigned UpwardWalkLimit = MaxCheckLimit;
1080 return getClobberingMemoryAccess(MA, BAA, UWL&: UpwardWalkLimit);
1081 }
1082 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
1083 const MemoryLocation &Loc,
1084 BatchAAResults &BAA) override {
1085 unsigned UpwardWalkLimit = MaxCheckLimit;
1086 return getClobberingMemoryAccess(MA, Loc, BAA, UWL&: UpwardWalkLimit);
1087 }
1088
1089 void invalidateInfo(MemoryAccess *MA) override {
1090 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1091 MUD->resetOptimized();
1092 }
1093};
1094
1095} // end namespace llvm
1096
1097void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
1098 bool RenameAllUses) {
1099 // Pass through values to our successors
1100 for (const BasicBlock *S : successors(BB)) {
1101 auto It = PerBlockAccesses.find(Val: S);
1102 // Rename the phi nodes in our successor block
1103 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(Val: It->second->front()))
1104 continue;
1105 AccessList *Accesses = It->second.get();
1106 auto *Phi = cast<MemoryPhi>(Val: &Accesses->front());
1107 if (RenameAllUses) {
1108 bool ReplacementDone = false;
1109 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
1110 if (Phi->getIncomingBlock(I) == BB) {
1111 Phi->setIncomingValue(I, V: IncomingVal);
1112 ReplacementDone = true;
1113 }
1114 (void) ReplacementDone;
1115 assert(ReplacementDone && "Incomplete phi during partial rename");
1116 } else
1117 Phi->addIncoming(V: IncomingVal, BB);
1118 }
1119}
1120
1121/// Rename a single basic block into MemorySSA form.
1122/// Uses the standard SSA renaming algorithm.
1123/// \returns The new incoming value.
1124MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
1125 bool RenameAllUses) {
1126 auto It = PerBlockAccesses.find(Val: BB);
1127 // Skip most processing if the list is empty.
1128 if (It != PerBlockAccesses.end()) {
1129 AccessList *Accesses = It->second.get();
1130 for (MemoryAccess &L : *Accesses) {
1131 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(Val: &L)) {
1132 if (MUD->getDefiningAccess() == nullptr || RenameAllUses)
1133 MUD->setDefiningAccess(DMA: IncomingVal);
1134 if (isa<MemoryDef>(Val: &L))
1135 IncomingVal = &L;
1136 } else {
1137 IncomingVal = &L;
1138 }
1139 }
1140 }
1141 return IncomingVal;
1142}
1143
1144/// This is the standard SSA renaming algorithm.
1145///
1146/// We walk the dominator tree in preorder, renaming accesses, and then filling
1147/// in phi nodes in our successors.
1148void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
1149 SmallPtrSetImpl<BasicBlock *> &Visited,
1150 bool SkipVisited, bool RenameAllUses) {
1151 assert(Root && "Trying to rename accesses in an unreachable block");
1152
1153 SmallVector<RenamePassData, 32> WorkStack;
1154 // Skip everything if we already renamed this block and we are skipping.
1155 // Note: You can't sink this into the if, because we need it to occur
1156 // regardless of whether we skip blocks or not.
1157 bool AlreadyVisited = !Visited.insert(Ptr: Root->getBlock()).second;
1158 if (SkipVisited && AlreadyVisited)
1159 return;
1160
1161 IncomingVal = renameBlock(BB: Root->getBlock(), IncomingVal, RenameAllUses);
1162 renameSuccessorPhis(BB: Root->getBlock(), IncomingVal, RenameAllUses);
1163 WorkStack.push_back(Elt: {Root, Root->begin(), IncomingVal});
1164
1165 while (!WorkStack.empty()) {
1166 DomTreeNode *Node = WorkStack.back().DTN;
1167 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
1168 IncomingVal = WorkStack.back().IncomingVal;
1169
1170 if (ChildIt == Node->end()) {
1171 WorkStack.pop_back();
1172 } else {
1173 DomTreeNode *Child = *ChildIt;
1174 ++WorkStack.back().ChildIt;
1175 BasicBlock *BB = Child->getBlock();
1176 // Note: You can't sink this into the if, because we need it to occur
1177 // regardless of whether we skip blocks or not.
1178 AlreadyVisited = !Visited.insert(Ptr: BB).second;
1179 if (SkipVisited && AlreadyVisited) {
1180 // We already visited this during our renaming, which can happen when
1181 // being asked to rename multiple blocks. Figure out the incoming val,
1182 // which is the last def.
1183 // Incoming value can only change if there is a block def, and in that
1184 // case, it's the last block def in the list.
1185 if (auto *BlockDefs = getBlockDefs(BB))
1186 IncomingVal = &*BlockDefs->rbegin();
1187 } else
1188 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses);
1189 renameSuccessorPhis(BB, IncomingVal, RenameAllUses);
1190 WorkStack.push_back(Elt: {Child, Child->begin(), IncomingVal});
1191 }
1192 }
1193}
1194
1195/// This handles unreachable block accesses by deleting phi nodes in
1196/// unreachable blocks, and marking all other unreachable MemoryAccess's as
1197/// being uses of the live on entry definition.
1198void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
1199 assert(!DT->isReachableFromEntry(BB) &&
1200 "Reachable block found while handling unreachable blocks");
1201
1202 // Make sure phi nodes in our reachable successors end up with a
1203 // LiveOnEntryDef for our incoming edge, even though our block is forward
1204 // unreachable. We could just disconnect these blocks from the CFG fully,
1205 // but we do not right now.
1206 for (const BasicBlock *S : successors(BB)) {
1207 if (!DT->isReachableFromEntry(A: S))
1208 continue;
1209 auto It = PerBlockAccesses.find(Val: S);
1210 // Rename the phi nodes in our successor block
1211 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(Val: It->second->front()))
1212 continue;
1213 AccessList *Accesses = It->second.get();
1214 auto *Phi = cast<MemoryPhi>(Val: &Accesses->front());
1215 Phi->addIncoming(V: LiveOnEntryDef.get(), BB);
1216 }
1217
1218 auto It = PerBlockAccesses.find(Val: BB);
1219 if (It == PerBlockAccesses.end())
1220 return;
1221
1222 auto &Accesses = It->second;
1223 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
1224 auto Next = std::next(x: AI);
1225 // If we have a phi, just remove it. We are going to replace all
1226 // users with live on entry.
1227 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val&: AI))
1228 UseOrDef->setDefiningAccess(DMA: LiveOnEntryDef.get());
1229 else
1230 Accesses->erase(where: AI);
1231 AI = Next;
1232 }
1233}
1234
1235MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
1236 : DT(DT), F(&Func), LiveOnEntryDef(nullptr), Walker(nullptr),
1237 SkipWalker(nullptr) {
1238 // Build MemorySSA using a batch alias analysis. This reuses the internal
1239 // state that AA collects during an alias()/getModRefInfo() call. This is
1240 // safe because there are no CFG changes while building MemorySSA and can
1241 // significantly reduce the time spent by the compiler in AA, because we will
1242 // make queries about all the instructions in the Function.
1243 assert(AA && "No alias analysis?");
1244 BatchAAResults BatchAA(*AA);
1245 buildMemorySSA(BAA&: BatchAA, Blocks: iterator_range(F->begin(), F->end()));
1246 // Intentionally leave AA to nullptr while building so we don't accidentally
1247 // use non-batch AliasAnalysis.
1248 this->AA = AA;
1249 // Also create the walker here.
1250 getWalker();
1251}
1252
1253MemorySSA::MemorySSA(Loop &L, AliasAnalysis *AA, DominatorTree *DT)
1254 : DT(DT), L(&L), LiveOnEntryDef(nullptr), Walker(nullptr),
1255 SkipWalker(nullptr) {
1256 // Build MemorySSA using a batch alias analysis. This reuses the internal
1257 // state that AA collects during an alias()/getModRefInfo() call. This is
1258 // safe because there are no CFG changes while building MemorySSA and can
1259 // significantly reduce the time spent by the compiler in AA, because we will
1260 // make queries about all the instructions in the Function.
1261 assert(AA && "No alias analysis?");
1262 BatchAAResults BatchAA(*AA);
1263 buildMemorySSA(
1264 BAA&: BatchAA, Blocks: map_range(C: L.blocks(), F: [](const BasicBlock *BB) -> BasicBlock & {
1265 return *const_cast<BasicBlock *>(BB);
1266 }));
1267 // Intentionally leave AA to nullptr while building so we don't accidentally
1268 // use non-batch AliasAnalysis.
1269 this->AA = AA;
1270 // Also create the walker here.
1271 getWalker();
1272}
1273
1274MemorySSA::~MemorySSA() {
1275 // Drop all our references
1276 for (const auto &Pair : PerBlockAccesses)
1277 for (MemoryAccess &MA : *Pair.second)
1278 MA.dropAllReferences();
1279}
1280
1281MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
1282 auto Res = PerBlockAccesses.try_emplace(Key: BB);
1283
1284 if (Res.second)
1285 Res.first->second = std::make_unique<AccessList>();
1286 return Res.first->second.get();
1287}
1288
1289MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
1290 auto Res = PerBlockDefs.try_emplace(Key: BB);
1291
1292 if (Res.second)
1293 Res.first->second = std::make_unique<DefsList>();
1294 return Res.first->second.get();
1295}
1296
1297namespace llvm {
1298
1299/// This class is a batch walker of all MemoryUse's in the program, and points
1300/// their defining access at the thing that actually clobbers them. Because it
1301/// is a batch walker that touches everything, it does not operate like the
1302/// other walkers. This walker is basically performing a top-down SSA renaming
1303/// pass, where the version stack is used as the cache. This enables it to be
1304/// significantly more time and memory efficient than using the regular walker,
1305/// which is walking bottom-up.
1306class MemorySSA::OptimizeUses {
1307public:
1308 OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, BatchAAResults *BAA,
1309 DominatorTree *DT)
1310 : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {}
1311
1312 void optimizeUses();
1313
1314private:
1315 /// This represents where a given memorylocation is in the stack.
1316 struct MemlocStackInfo {
1317 // This essentially is keeping track of versions of the stack. Whenever
1318 // the stack changes due to pushes or pops, these versions increase.
1319 unsigned long StackEpoch;
1320 unsigned long PopEpoch;
1321 // This is the lower bound of places on the stack to check. It is equal to
1322 // the place the last stack walk ended.
1323 // Note: Correctness depends on this being initialized to 0, which densemap
1324 // does
1325 unsigned long LowerBound;
1326 const BasicBlock *LowerBoundBlock;
1327 // This is where the last walk for this memory location ended.
1328 unsigned long LastKill;
1329 bool LastKillValid;
1330 };
1331
1332 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
1333 SmallVectorImpl<MemoryAccess *> &,
1334 DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
1335
1336 MemorySSA *MSSA;
1337 CachingWalker *Walker;
1338 BatchAAResults *AA;
1339 DominatorTree *DT;
1340};
1341
1342} // end namespace llvm
1343
1344/// Optimize the uses in a given block This is basically the SSA renaming
1345/// algorithm, with one caveat: We are able to use a single stack for all
1346/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is
1347/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just
1348/// going to be some position in that stack of possible ones.
1349///
1350/// We track the stack positions that each MemoryLocation needs
1351/// to check, and last ended at. This is because we only want to check the
1352/// things that changed since last time. The same MemoryLocation should
1353/// get clobbered by the same store (getModRefInfo does not use invariantness or
1354/// things like this, and if they start, we can modify MemoryLocOrCall to
1355/// include relevant data)
1356void MemorySSA::OptimizeUses::optimizeUsesInBlock(
1357 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
1358 SmallVectorImpl<MemoryAccess *> &VersionStack,
1359 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
1360
1361 /// If no accesses, nothing to do.
1362 MemorySSA::AccessList *Accesses = MSSA->getBlockAccesses(BB);
1363 if (Accesses == nullptr)
1364 return;
1365
1366 // Pop everything that doesn't dominate the current block off the stack,
1367 // increment the PopEpoch to account for this.
1368 while (true) {
1369 assert(
1370 !VersionStack.empty() &&
1371 "Version stack should have liveOnEntry sentinel dominating everything");
1372 BasicBlock *BackBlock = VersionStack.back()->getBlock();
1373 if (DT->dominates(A: BackBlock, B: BB))
1374 break;
1375 while (VersionStack.back()->getBlock() == BackBlock)
1376 VersionStack.pop_back();
1377 ++PopEpoch;
1378 }
1379
1380 for (MemoryAccess &MA : *Accesses) {
1381 auto *MU = dyn_cast<MemoryUse>(Val: &MA);
1382 if (!MU) {
1383 VersionStack.push_back(Elt: &MA);
1384 ++StackEpoch;
1385 continue;
1386 }
1387
1388 if (MU->isOptimized())
1389 continue;
1390
1391 MemoryLocOrCall UseMLOC(MU);
1392 auto &LocInfo = LocStackInfo[UseMLOC];
1393 // If the pop epoch changed, it means we've removed stuff from top of
1394 // stack due to changing blocks. We may have to reset the lower bound or
1395 // last kill info.
1396 if (LocInfo.PopEpoch != PopEpoch) {
1397 LocInfo.PopEpoch = PopEpoch;
1398 LocInfo.StackEpoch = StackEpoch;
1399 // If the lower bound was in something that no longer dominates us, we
1400 // have to reset it.
1401 // We can't simply track stack size, because the stack may have had
1402 // pushes/pops in the meantime.
1403 // XXX: This is non-optimal, but only is slower cases with heavily
1404 // branching dominator trees. To get the optimal number of queries would
1405 // be to make lowerbound and lastkill a per-loc stack, and pop it until
1406 // the top of that stack dominates us. This does not seem worth it ATM.
1407 // A much cheaper optimization would be to always explore the deepest
1408 // branch of the dominator tree first. This will guarantee this resets on
1409 // the smallest set of blocks.
1410 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
1411 !DT->dominates(A: LocInfo.LowerBoundBlock, B: BB)) {
1412 // Reset the lower bound of things to check.
1413 // TODO: Some day we should be able to reset to last kill, rather than
1414 // 0.
1415 LocInfo.LowerBound = 0;
1416 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
1417 LocInfo.LastKillValid = false;
1418 }
1419 } else if (LocInfo.StackEpoch != StackEpoch) {
1420 // If all that has changed is the StackEpoch, we only have to check the
1421 // new things on the stack, because we've checked everything before. In
1422 // this case, the lower bound of things to check remains the same.
1423 LocInfo.PopEpoch = PopEpoch;
1424 LocInfo.StackEpoch = StackEpoch;
1425 }
1426 if (!LocInfo.LastKillValid) {
1427 LocInfo.LastKill = VersionStack.size() - 1;
1428 LocInfo.LastKillValid = true;
1429 }
1430
1431 // At this point, we should have corrected last kill and LowerBound to be
1432 // in bounds.
1433 assert(LocInfo.LowerBound < VersionStack.size() &&
1434 "Lower bound out of range");
1435 assert(LocInfo.LastKill < VersionStack.size() &&
1436 "Last kill info out of range");
1437 // In any case, the new upper bound is the top of the stack.
1438 unsigned long UpperBound = VersionStack.size() - 1;
1439
1440 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
1441 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
1442 << *(MU->getMemoryInst()) << ")"
1443 << " because there are "
1444 << UpperBound - LocInfo.LowerBound
1445 << " stores to disambiguate\n");
1446 // Because we did not walk, LastKill is no longer valid, as this may
1447 // have been a kill.
1448 LocInfo.LastKillValid = false;
1449 continue;
1450 }
1451 bool FoundClobberResult = false;
1452 unsigned UpwardWalkLimit = MaxCheckLimit;
1453 while (UpperBound > LocInfo.LowerBound) {
1454 if (isa<MemoryPhi>(Val: VersionStack[UpperBound])) {
1455 // For phis, use the walker, see where we ended up, go there.
1456 // The invariant.group handling in MemorySSA is ad-hoc and doesn't
1457 // support updates, so don't use it to optimize uses.
1458 MemoryAccess *Result =
1459 Walker->getClobberingMemoryAccessWithoutInvariantGroup(
1460 MA: MU, BAA&: *AA, UWL&: UpwardWalkLimit);
1461 // We are guaranteed to find it or something is wrong.
1462 while (VersionStack[UpperBound] != Result) {
1463 assert(UpperBound != 0);
1464 --UpperBound;
1465 }
1466 FoundClobberResult = true;
1467 break;
1468 }
1469
1470 MemoryDef *MD = cast<MemoryDef>(Val: VersionStack[UpperBound]);
1471 if (instructionClobbersQuery(MD, MU, UseMLOC, AA&: *AA)) {
1472 FoundClobberResult = true;
1473 break;
1474 }
1475 --UpperBound;
1476 }
1477
1478 // At the end of this loop, UpperBound is either a clobber, or lower bound
1479 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
1480 if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
1481 MU->setDefiningAccess(DMA: VersionStack[UpperBound], Optimized: true);
1482 LocInfo.LastKill = UpperBound;
1483 } else {
1484 // Otherwise, we checked all the new ones, and now we know we can get to
1485 // LastKill.
1486 MU->setDefiningAccess(DMA: VersionStack[LocInfo.LastKill], Optimized: true);
1487 }
1488 LocInfo.LowerBound = VersionStack.size() - 1;
1489 LocInfo.LowerBoundBlock = BB;
1490 }
1491}
1492
1493/// Optimize uses to point to their actual clobbering definitions.
1494void MemorySSA::OptimizeUses::optimizeUses() {
1495 SmallVector<MemoryAccess *, 16> VersionStack;
1496 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
1497 VersionStack.push_back(Elt: MSSA->getLiveOnEntryDef());
1498
1499 unsigned long StackEpoch = 1;
1500 unsigned long PopEpoch = 1;
1501 // We perform a non-recursive top-down dominator tree walk.
1502 for (const auto *DomNode : depth_first(G: DT->getRootNode()))
1503 optimizeUsesInBlock(BB: DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
1504 LocStackInfo);
1505}
1506
1507void MemorySSA::placePHINodes(
1508 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) {
1509 // Determine where our MemoryPhi's should go
1510 ForwardIDFCalculator IDFs(*DT);
1511 IDFs.setDefiningBlocks(DefiningBlocks);
1512 SmallVector<BasicBlock *, 32> IDFBlocks;
1513 IDFs.calculate(IDFBlocks);
1514
1515 // Now place MemoryPhi nodes.
1516 for (auto &BB : IDFBlocks)
1517 createMemoryPhi(BB);
1518}
1519
1520template <typename IterT>
1521void MemorySSA::buildMemorySSA(BatchAAResults &BAA, IterT Blocks) {
1522 // We create an access to represent "live on entry", for things like
1523 // arguments or users of globals, where the memory they use is defined before
1524 // the beginning of the function. We do not actually insert it into the IR.
1525 // We do not define a live on exit for the immediate uses, and thus our
1526 // semantics do *not* imply that something with no immediate uses can simply
1527 // be removed.
1528 BasicBlock &StartingPoint = *Blocks.begin();
1529 LiveOnEntryDef.reset(p: new MemoryDef(StartingPoint.getContext(), nullptr,
1530 nullptr, &StartingPoint, NextID++));
1531
1532 // We maintain lists of memory accesses per-block, trading memory for time. We
1533 // could just look up the memory access for every possible instruction in the
1534 // stream.
1535 SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
1536 // Go through each block, figure out where defs occur, and chain together all
1537 // the accesses.
1538 for (BasicBlock &B : Blocks) {
1539 bool InsertIntoDef = false;
1540 AccessList *Accesses = nullptr;
1541 DefsList *Defs = nullptr;
1542 for (Instruction &I : B) {
1543 MemoryUseOrDef *MUD = createNewAccess(I: &I, AAP: &BAA);
1544 if (!MUD)
1545 continue;
1546
1547 if (!Accesses)
1548 Accesses = getOrCreateAccessList(BB: &B);
1549 Accesses->push_back(val: MUD);
1550 if (isa<MemoryDef>(Val: MUD)) {
1551 InsertIntoDef = true;
1552 if (!Defs)
1553 Defs = getOrCreateDefsList(BB: &B);
1554 Defs->push_back(Node&: *MUD);
1555 }
1556 }
1557 if (InsertIntoDef)
1558 DefiningBlocks.insert(Ptr: &B);
1559 }
1560 placePHINodes(DefiningBlocks);
1561
1562 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
1563 // filled in with all blocks.
1564 SmallPtrSet<BasicBlock *, 16> Visited;
1565 if (L) {
1566 // Only building MemorySSA for a single loop. placePHINodes may have
1567 // inserted a MemoryPhi in the loop's preheader. As this is outside the
1568 // scope of the loop, set them to LiveOnEntry.
1569 if (auto *P = getMemoryAccess(BB: L->getLoopPreheader())) {
1570 for (Use &U : make_early_inc_range(Range: P->uses()))
1571 U.set(LiveOnEntryDef.get());
1572 removeFromLists(P);
1573 }
1574 // Now rename accesses in the loop. Populate Visited with the exit blocks of
1575 // the loop, to limit the scope of the renaming.
1576 SmallVector<BasicBlock *> ExitBlocks;
1577 L->getExitBlocks(ExitBlocks);
1578 Visited.insert_range(R&: ExitBlocks);
1579 renamePass(Root: DT->getNode(BB: L->getLoopPreheader()), IncomingVal: LiveOnEntryDef.get(),
1580 Visited);
1581 } else {
1582 renamePass(Root: DT->getRootNode(), IncomingVal: LiveOnEntryDef.get(), Visited);
1583 }
1584
1585 // Mark the uses in unreachable blocks as live on entry, so that they go
1586 // somewhere.
1587 for (auto &BB : Blocks)
1588 if (!Visited.count(Ptr: &BB))
1589 markUnreachableAsLiveOnEntry(BB: &BB);
1590}
1591
1592MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
1593
1594MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
1595 if (Walker)
1596 return Walker.get();
1597
1598 if (!WalkerBase)
1599 WalkerBase = std::make_unique<ClobberWalkerBase>(args: this, args&: DT);
1600
1601 Walker = std::make_unique<CachingWalker>(args: this, args: WalkerBase.get());
1602 return Walker.get();
1603}
1604
1605MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
1606 if (SkipWalker)
1607 return SkipWalker.get();
1608
1609 if (!WalkerBase)
1610 WalkerBase = std::make_unique<ClobberWalkerBase>(args: this, args&: DT);
1611
1612 SkipWalker = std::make_unique<SkipSelfWalker>(args: this, args: WalkerBase.get());
1613 return SkipWalker.get();
1614 }
1615
1616
1617// This is a helper function used by the creation routines. It places NewAccess
1618// into the access and defs lists for a given basic block, at the given
1619// insertion point.
1620void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess,
1621 const BasicBlock *BB,
1622 InsertionPlace Point) {
1623 auto *Accesses = getOrCreateAccessList(BB);
1624 if (Point == Beginning) {
1625 // If it's a phi node, it goes first, otherwise, it goes after any phi
1626 // nodes.
1627 if (isa<MemoryPhi>(Val: NewAccess)) {
1628 Accesses->push_front(val: NewAccess);
1629 auto *Defs = getOrCreateDefsList(BB);
1630 Defs->push_front(Node&: *NewAccess);
1631 } else {
1632 auto AI = find_if_not(
1633 Range&: *Accesses, P: [](const MemoryAccess &MA) { return isa<MemoryPhi>(Val: MA); });
1634 Accesses->insert(where: AI, New: NewAccess);
1635 if (!isa<MemoryUse>(Val: NewAccess)) {
1636 auto *Defs = getOrCreateDefsList(BB);
1637 auto DI = find_if_not(
1638 Range&: *Defs, P: [](const MemoryAccess &MA) { return isa<MemoryPhi>(Val: MA); });
1639 Defs->insert(I: DI, Node&: *NewAccess);
1640 }
1641 }
1642 } else {
1643 Accesses->push_back(val: NewAccess);
1644 if (!isa<MemoryUse>(Val: NewAccess)) {
1645 auto *Defs = getOrCreateDefsList(BB);
1646 Defs->push_back(Node&: *NewAccess);
1647 }
1648 }
1649 BlockNumberingValid.erase(Ptr: BB);
1650}
1651
1652void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
1653 AccessList::iterator InsertPt) {
1654 auto *Accesses = getBlockAccesses(BB);
1655 bool WasEnd = InsertPt == Accesses->end();
1656 Accesses->insert(where: AccessList::iterator(InsertPt), New: What);
1657 if (!isa<MemoryUse>(Val: What)) {
1658 auto *Defs = getOrCreateDefsList(BB);
1659 // If we got asked to insert at the end, we have an easy job, just shove it
1660 // at the end. If we got asked to insert before an existing def, we also get
1661 // an iterator. If we got asked to insert before a use, we have to hunt for
1662 // the next def.
1663 if (WasEnd) {
1664 Defs->push_back(Node&: *What);
1665 } else if (isa<MemoryDef>(Val: InsertPt)) {
1666 Defs->insert(I: InsertPt->getDefsIterator(), Node&: *What);
1667 } else {
1668 while (InsertPt != Accesses->end() && !isa<MemoryDef>(Val: InsertPt))
1669 ++InsertPt;
1670 // Either we found a def, or we are inserting at the end
1671 if (InsertPt == Accesses->end())
1672 Defs->push_back(Node&: *What);
1673 else
1674 Defs->insert(I: InsertPt->getDefsIterator(), Node&: *What);
1675 }
1676 }
1677 BlockNumberingValid.erase(Ptr: BB);
1678}
1679
1680void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) {
1681 // Keep it in the lookup tables, remove from the lists
1682 removeFromLists(What, ShouldDelete: false);
1683
1684 // Note that moving should implicitly invalidate the optimized state of a
1685 // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a
1686 // MemoryDef.
1687 if (auto *MD = dyn_cast<MemoryDef>(Val: What))
1688 MD->resetOptimized();
1689 What->setBlock(BB);
1690}
1691
1692// Move What before Where in the IR. The end result is that What will belong to
1693// the right lists and have the right Block set, but will not otherwise be
1694// correct. It will not have the right defining access, and if it is a def,
1695// things below it will not properly be updated.
1696void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
1697 AccessList::iterator Where) {
1698 prepareForMoveTo(What, BB);
1699 insertIntoListsBefore(What, BB, InsertPt: Where);
1700}
1701
1702void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
1703 InsertionPlace Point) {
1704 if (isa<MemoryPhi>(Val: What)) {
1705 assert(Point == Beginning &&
1706 "Can only move a Phi at the beginning of the block");
1707 // Update lookup table entry
1708 ValueToMemoryAccess.erase(Val: What->getBlock());
1709 bool Inserted = ValueToMemoryAccess.insert(KV: {BB, What}).second;
1710 (void)Inserted;
1711 assert(Inserted && "Cannot move a Phi to a block that already has one");
1712 }
1713
1714 prepareForMoveTo(What, BB);
1715 insertIntoListsForBlock(NewAccess: What, BB, Point);
1716}
1717
1718MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
1719 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
1720 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
1721 // Phi's always are placed at the front of the block.
1722 insertIntoListsForBlock(NewAccess: Phi, BB, Point: Beginning);
1723 ValueToMemoryAccess[BB] = Phi;
1724 return Phi;
1725}
1726
1727MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
1728 MemoryAccess *Definition,
1729 const MemoryUseOrDef *Template,
1730 bool CreationMustSucceed) {
1731 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
1732 MemoryUseOrDef *NewAccess = createNewAccess(I, AAP: AA, Template);
1733 if (CreationMustSucceed)
1734 assert(NewAccess != nullptr && "Tried to create a memory access for a "
1735 "non-memory touching instruction");
1736 if (NewAccess) {
1737 assert((!Definition || !isa<MemoryUse>(Definition)) &&
1738 "A use cannot be a defining access");
1739 NewAccess->setDefiningAccess(DMA: Definition);
1740 }
1741 return NewAccess;
1742}
1743
1744// Return true if the instruction has ordering constraints.
1745// Note specifically that this only considers stores and loads
1746// because others are still considered ModRef by getModRefInfo.
1747static inline bool isOrdered(const Instruction *I) {
1748 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
1749 if (!SI->isUnordered())
1750 return true;
1751 } else if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1752 if (!LI->isUnordered())
1753 return true;
1754 }
1755 return false;
1756}
1757
1758/// Helper function to create new memory accesses
1759template <typename AliasAnalysisType>
1760MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
1761 AliasAnalysisType *AAP,
1762 const MemoryUseOrDef *Template) {
1763 // The assume intrinsic has a control dependency which we model by claiming
1764 // that it writes arbitrarily. Debuginfo intrinsics may be considered
1765 // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory
1766 // dependencies here.
1767 // FIXME: Replace this special casing with a more accurate modelling of
1768 // assume's control dependency.
1769 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
1770 switch (II->getIntrinsicID()) {
1771 default:
1772 break;
1773 case Intrinsic::allow_runtime_check:
1774 case Intrinsic::allow_ubsan_check:
1775 case Intrinsic::assume:
1776 case Intrinsic::experimental_noalias_scope_decl:
1777 case Intrinsic::pseudoprobe:
1778 return nullptr;
1779 }
1780 }
1781
1782 // Using a nonstandard AA pipelines might leave us with unexpected modref
1783 // results for I, so add a check to not model instructions that may not read
1784 // from or write to memory. This is necessary for correctness.
1785 if (!I->mayReadFromMemory() && !I->mayWriteToMemory())
1786 return nullptr;
1787
1788 bool Def, Use;
1789 if (Template) {
1790 Def = isa<MemoryDef>(Val: Template);
1791 Use = isa<MemoryUse>(Val: Template);
1792#if !defined(NDEBUG)
1793 ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt);
1794 bool DefCheck, UseCheck;
1795 DefCheck = isModSet(ModRef) || isOrdered(I);
1796 UseCheck = isRefSet(ModRef);
1797 // Memory accesses should only be reduced and can not be increased since AA
1798 // just might return better results as a result of some transformations.
1799 assert((Def == DefCheck || !DefCheck) &&
1800 "Memory accesses should only be reduced");
1801 if (!Def && Use != UseCheck) {
1802 // New Access should not have more power than template access
1803 assert(!UseCheck && "Invalid template");
1804 }
1805#endif
1806 } else {
1807 // Find out what affect this instruction has on memory.
1808 ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt);
1809 // The isOrdered check is used to ensure that volatiles end up as defs
1810 // (atomics end up as ModRef right now anyway). Until we separate the
1811 // ordering chain from the memory chain, this enables people to see at least
1812 // some relative ordering to volatiles. Note that getClobberingMemoryAccess
1813 // will still give an answer that bypasses other volatile loads. TODO:
1814 // Separate memory aliasing and ordering into two different chains so that
1815 // we can precisely represent both "what memory will this read/write/is
1816 // clobbered by" and "what instructions can I move this past".
1817 Def = isModSet(MRI: ModRef) || isOrdered(I);
1818 Use = isRefSet(MRI: ModRef);
1819 }
1820
1821 // It's possible for an instruction to not modify memory at all. During
1822 // construction, we ignore them.
1823 if (!Def && !Use)
1824 return nullptr;
1825
1826 MemoryUseOrDef *MUD;
1827 if (Def) {
1828 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
1829 } else {
1830 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
1831 if (isUseTriviallyOptimizableToLiveOnEntry(*AAP, I)) {
1832 MemoryAccess *LiveOnEntry = getLiveOnEntryDef();
1833 MUD->setOptimized(LiveOnEntry);
1834 }
1835 }
1836 ValueToMemoryAccess[I] = MUD;
1837 return MUD;
1838}
1839
1840/// Properly remove \p MA from all of MemorySSA's lookup tables.
1841void MemorySSA::removeFromLookups(MemoryAccess *MA) {
1842 assert(MA->use_empty() &&
1843 "Trying to remove memory access that still has uses");
1844 BlockNumbering.erase(Val: MA);
1845 if (auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1846 MUD->setDefiningAccess(DMA: nullptr);
1847 // Invalidate our walker's cache if necessary
1848 if (!isa<MemoryUse>(Val: MA))
1849 getWalker()->invalidateInfo(MA);
1850
1851 Value *MemoryInst;
1852 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(Val: MA))
1853 MemoryInst = MUD->getMemoryInst();
1854 else
1855 MemoryInst = MA->getBlock();
1856
1857 auto VMA = ValueToMemoryAccess.find(Val: MemoryInst);
1858 if (VMA->second == MA)
1859 ValueToMemoryAccess.erase(I: VMA);
1860}
1861
1862/// Properly remove \p MA from all of MemorySSA's lists.
1863///
1864/// Because of the way the intrusive list and use lists work, it is important to
1865/// do removal in the right order.
1866/// ShouldDelete defaults to true, and will cause the memory access to also be
1867/// deleted, not just removed.
1868void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
1869 BasicBlock *BB = MA->getBlock();
1870 // The access list owns the reference, so we erase it from the non-owning list
1871 // first.
1872 if (!isa<MemoryUse>(Val: MA)) {
1873 auto DefsIt = PerBlockDefs.find(Val: BB);
1874 std::unique_ptr<DefsList> &Defs = DefsIt->second;
1875 Defs->remove(N&: *MA);
1876 if (Defs->empty())
1877 PerBlockDefs.erase(I: DefsIt);
1878 }
1879
1880 // The erase call here will delete it. If we don't want it deleted, we call
1881 // remove instead.
1882 auto AccessIt = PerBlockAccesses.find(Val: BB);
1883 std::unique_ptr<AccessList> &Accesses = AccessIt->second;
1884 if (ShouldDelete)
1885 Accesses->erase(IT: MA);
1886 else
1887 Accesses->remove(IT: MA);
1888
1889 if (Accesses->empty()) {
1890 PerBlockAccesses.erase(I: AccessIt);
1891 BlockNumberingValid.erase(Ptr: BB);
1892 }
1893}
1894
1895void MemorySSA::print(raw_ostream &OS) const {
1896 MemorySSAAnnotatedWriter Writer(this);
1897 Function *F = this->F;
1898 if (L)
1899 F = L->getHeader()->getParent();
1900 F->print(OS, AAW: &Writer);
1901}
1902
1903#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1904LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
1905#endif
1906
1907void MemorySSA::verifyMemorySSA(VerificationLevel VL) const {
1908#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
1909 VL = VerificationLevel::Full;
1910#endif
1911
1912#ifndef NDEBUG
1913 if (F) {
1914 auto Blocks = iterator_range(F->begin(), F->end());
1915 verifyOrderingDominationAndDefUses(Blocks, VL);
1916 verifyDominationNumbers(Blocks);
1917 if (VL == VerificationLevel::Full)
1918 verifyPrevDefInPhis(Blocks);
1919 } else {
1920 assert(L && "must either have loop or function");
1921 auto Blocks =
1922 map_range(L->blocks(), [](const BasicBlock *BB) -> BasicBlock & {
1923 return *const_cast<BasicBlock *>(BB);
1924 });
1925 verifyOrderingDominationAndDefUses(Blocks, VL);
1926 verifyDominationNumbers(Blocks);
1927 if (VL == VerificationLevel::Full)
1928 verifyPrevDefInPhis(Blocks);
1929 }
1930#endif
1931 // Previously, the verification used to also verify that the clobberingAccess
1932 // cached by MemorySSA is the same as the clobberingAccess found at a later
1933 // query to AA. This does not hold true in general due to the current fragility
1934 // of BasicAA which has arbitrary caps on the things it analyzes before giving
1935 // up. As a result, transformations that are correct, will lead to BasicAA
1936 // returning different Alias answers before and after that transformation.
1937 // Invalidating MemorySSA is not an option, as the results in BasicAA can be so
1938 // random, in the worst case we'd need to rebuild MemorySSA from scratch after
1939 // every transformation, which defeats the purpose of using it. For such an
1940 // example, see test4 added in D51960.
1941}
1942
1943template <typename IterT>
1944void MemorySSA::verifyPrevDefInPhis(IterT Blocks) const {
1945 for (const BasicBlock &BB : Blocks) {
1946 if (MemoryPhi *Phi = getMemoryAccess(BB: &BB)) {
1947 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
1948 auto *Pred = Phi->getIncomingBlock(I);
1949 auto *IncAcc = Phi->getIncomingValue(I);
1950 // If Pred has no unreachable predecessors, get last def looking at
1951 // IDoms. If, while walkings IDoms, any of these has an unreachable
1952 // predecessor, then the incoming def can be any access.
1953 if (auto *DTNode = DT->getNode(BB: Pred)) {
1954 while (DTNode) {
1955 if (auto *DefList = getBlockDefs(BB: DTNode->getBlock())) {
1956 auto *LastAcc = &*(--DefList->end());
1957 assert(LastAcc == IncAcc &&
1958 "Incorrect incoming access into phi.");
1959 (void)IncAcc;
1960 (void)LastAcc;
1961 break;
1962 }
1963 DTNode = DTNode->getIDom();
1964 }
1965 } else {
1966 // If Pred has unreachable predecessors, but has at least a Def, the
1967 // incoming access can be the last Def in Pred, or it could have been
1968 // optimized to LoE. After an update, though, the LoE may have been
1969 // replaced by another access, so IncAcc may be any access.
1970 // If Pred has unreachable predecessors and no Defs, incoming access
1971 // should be LoE; However, after an update, it may be any access.
1972 }
1973 }
1974 }
1975 }
1976}
1977
1978/// Verify that all of the blocks we believe to have valid domination numbers
1979/// actually have valid domination numbers.
1980template <typename IterT>
1981void MemorySSA::verifyDominationNumbers(IterT Blocks) const {
1982 if (BlockNumberingValid.empty())
1983 return;
1984
1985 SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid;
1986 for (const BasicBlock &BB : Blocks) {
1987 if (!ValidBlocks.count(Ptr: &BB))
1988 continue;
1989
1990 ValidBlocks.erase(Ptr: &BB);
1991
1992 const AccessList *Accesses = getBlockAccesses(BB: &BB);
1993 // It's correct to say an empty block has valid numbering.
1994 if (!Accesses)
1995 continue;
1996
1997 // Block numbering starts at 1.
1998 unsigned long LastNumber = 0;
1999 for (const MemoryAccess &MA : *Accesses) {
2000 auto ThisNumberIter = BlockNumbering.find(Val: &MA);
2001 assert(ThisNumberIter != BlockNumbering.end() &&
2002 "MemoryAccess has no domination number in a valid block!");
2003
2004 unsigned long ThisNumber = ThisNumberIter->second;
2005 assert(ThisNumber > LastNumber &&
2006 "Domination numbers should be strictly increasing!");
2007 (void)LastNumber;
2008 LastNumber = ThisNumber;
2009 }
2010 }
2011
2012 assert(ValidBlocks.empty() &&
2013 "All valid BasicBlocks should exist in F -- dangling pointers?");
2014}
2015
2016/// Verify ordering: the order and existence of MemoryAccesses matches the
2017/// order and existence of memory affecting instructions.
2018/// Verify domination: each definition dominates all of its uses.
2019/// Verify def-uses: the immediate use information - walk all the memory
2020/// accesses and verifying that, for each use, it appears in the appropriate
2021/// def's use list
2022template <typename IterT>
2023void MemorySSA::verifyOrderingDominationAndDefUses(IterT Blocks,
2024 VerificationLevel VL) const {
2025 // Walk all the blocks, comparing what the lookups think and what the access
2026 // lists think, as well as the order in the blocks vs the order in the access
2027 // lists.
2028 SmallVector<MemoryAccess *, 32> ActualAccesses;
2029 SmallVector<MemoryAccess *, 32> ActualDefs;
2030 for (BasicBlock &B : Blocks) {
2031 const AccessList *AL = getBlockAccesses(BB: &B);
2032 const auto *DL = getBlockDefs(BB: &B);
2033 MemoryPhi *Phi = getMemoryAccess(BB: &B);
2034 if (Phi) {
2035 // Verify ordering.
2036 ActualAccesses.push_back(Elt: Phi);
2037 ActualDefs.push_back(Elt: Phi);
2038 // Verify domination
2039 for (const Use &U : Phi->uses()) {
2040 assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
2041 (void)U;
2042 }
2043 // Verify def-uses for full verify.
2044 if (VL == VerificationLevel::Full) {
2045 assert(Phi->getNumOperands() == pred_size(&B) &&
2046 "Incomplete MemoryPhi Node");
2047 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
2048 verifyUseInDefs(Phi->getIncomingValue(I), Phi);
2049 assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
2050 "Incoming phi block not a block predecessor");
2051 }
2052 }
2053 }
2054
2055 for (Instruction &I : B) {
2056 MemoryUseOrDef *MA = getMemoryAccess(I: &I);
2057 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
2058 "We have memory affecting instructions "
2059 "in this block but they are not in the "
2060 "access list or defs list");
2061 if (MA) {
2062 // Verify ordering.
2063 ActualAccesses.push_back(Elt: MA);
2064 if (MemoryAccess *MD = dyn_cast<MemoryDef>(Val: MA)) {
2065 // Verify ordering.
2066 ActualDefs.push_back(Elt: MA);
2067 // Verify domination.
2068 for (const Use &U : MD->uses()) {
2069 assert(dominates(MD, U) &&
2070 "Memory Def does not dominate it's uses");
2071 (void)U;
2072 }
2073 }
2074 // Verify def-uses for full verify.
2075 if (VL == VerificationLevel::Full)
2076 verifyUseInDefs(MA->getDefiningAccess(), MA);
2077 }
2078 }
2079 // Either we hit the assert, really have no accesses, or we have both
2080 // accesses and an access list. Same with defs.
2081 if (!AL && !DL)
2082 continue;
2083 // Verify ordering.
2084 assert(AL->size() == ActualAccesses.size() &&
2085 "We don't have the same number of accesses in the block as on the "
2086 "access list");
2087 assert((DL || ActualDefs.size() == 0) &&
2088 "Either we should have a defs list, or we should have no defs");
2089 assert((!DL || DL->size() == ActualDefs.size()) &&
2090 "We don't have the same number of defs in the block as on the "
2091 "def list");
2092 auto ALI = AL->begin();
2093 auto AAI = ActualAccesses.begin();
2094 while (ALI != AL->end() && AAI != ActualAccesses.end()) {
2095 assert(&*ALI == *AAI && "Not the same accesses in the same order");
2096 ++ALI;
2097 ++AAI;
2098 }
2099 ActualAccesses.clear();
2100 if (DL) {
2101 auto DLI = DL->begin();
2102 auto ADI = ActualDefs.begin();
2103 while (DLI != DL->end() && ADI != ActualDefs.end()) {
2104 assert(&*DLI == *ADI && "Not the same defs in the same order");
2105 ++DLI;
2106 ++ADI;
2107 }
2108 }
2109 ActualDefs.clear();
2110 }
2111}
2112
2113/// Verify the def-use lists in MemorySSA, by verifying that \p Use
2114/// appears in the use list of \p Def.
2115void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
2116 // The live on entry use may cause us to get a NULL def here
2117 if (!Def)
2118 assert(isLiveOnEntryDef(Use) &&
2119 "Null def but use not point to live on entry def");
2120 else
2121 assert(is_contained(Def->users(), Use) &&
2122 "Did not find use in def's use list");
2123}
2124
2125/// Perform a local numbering on blocks so that instruction ordering can be
2126/// determined in constant time.
2127/// TODO: We currently just number in order. If we numbered by N, we could
2128/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
2129/// log2(N) sequences of mixed before and after) without needing to invalidate
2130/// the numbering.
2131void MemorySSA::renumberBlock(const BasicBlock *B) const {
2132 // The pre-increment ensures the numbers really start at 1.
2133 unsigned long CurrentNumber = 0;
2134 const AccessList *AL = getBlockAccesses(BB: B);
2135 assert(AL != nullptr && "Asking to renumber an empty block");
2136 for (const auto &I : *AL)
2137 BlockNumbering[&I] = ++CurrentNumber;
2138 BlockNumberingValid.insert(Ptr: B);
2139}
2140
2141/// Determine, for two memory accesses in the same block,
2142/// whether \p Dominator dominates \p Dominatee.
2143/// \returns True if \p Dominator dominates \p Dominatee.
2144bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
2145 const MemoryAccess *Dominatee) const {
2146 const BasicBlock *DominatorBlock = Dominator->getBlock();
2147
2148 assert((DominatorBlock == Dominatee->getBlock()) &&
2149 "Asking for local domination when accesses are in different blocks!");
2150 // A node dominates itself.
2151 if (Dominatee == Dominator)
2152 return true;
2153
2154 // When Dominatee is defined on function entry, it is not dominated by another
2155 // memory access.
2156 if (isLiveOnEntryDef(MA: Dominatee))
2157 return false;
2158
2159 // When Dominator is defined on function entry, it dominates the other memory
2160 // access.
2161 if (isLiveOnEntryDef(MA: Dominator))
2162 return true;
2163
2164 if (!BlockNumberingValid.count(Ptr: DominatorBlock))
2165 renumberBlock(B: DominatorBlock);
2166
2167 unsigned long DominatorNum = BlockNumbering.lookup(Val: Dominator);
2168 // All numbers start with 1
2169 assert(DominatorNum != 0 && "Block was not numbered properly");
2170 unsigned long DominateeNum = BlockNumbering.lookup(Val: Dominatee);
2171 assert(DominateeNum != 0 && "Block was not numbered properly");
2172 return DominatorNum < DominateeNum;
2173}
2174
2175bool MemorySSA::dominates(const MemoryAccess *Dominator,
2176 const MemoryAccess *Dominatee) const {
2177 if (Dominator == Dominatee)
2178 return true;
2179
2180 if (isLiveOnEntryDef(MA: Dominatee))
2181 return false;
2182
2183 if (Dominator->getBlock() != Dominatee->getBlock())
2184 return DT->dominates(A: Dominator->getBlock(), B: Dominatee->getBlock());
2185 return locallyDominates(Dominator, Dominatee);
2186}
2187
2188bool MemorySSA::dominates(const MemoryAccess *Dominator,
2189 const Use &Dominatee) const {
2190 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Val: Dominatee.getUser())) {
2191 BasicBlock *UseBB = MP->getIncomingBlock(U: Dominatee);
2192 // The def must dominate the incoming block of the phi.
2193 if (UseBB != Dominator->getBlock())
2194 return DT->dominates(A: Dominator->getBlock(), B: UseBB);
2195 // If the UseBB and the DefBB are the same, compare locally.
2196 return locallyDominates(Dominator, Dominatee: cast<MemoryAccess>(Val: Dominatee));
2197 }
2198 // If it's not a PHI node use, the normal dominates can already handle it.
2199 return dominates(Dominator, Dominatee: cast<MemoryAccess>(Val: Dominatee.getUser()));
2200}
2201
2202void MemorySSA::ensureOptimizedUses() {
2203 if (IsOptimized)
2204 return;
2205
2206 BatchAAResults BatchAA(*AA);
2207 ClobberWalkerBase WalkerBase(this, DT);
2208 CachingWalker WalkerLocal(this, &WalkerBase);
2209 OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses();
2210 IsOptimized = true;
2211}
2212
2213void MemoryAccess::print(raw_ostream &OS) const {
2214 switch (getValueID()) {
2215 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
2216 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS);
2217 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS);
2218 }
2219 llvm_unreachable("invalid value id");
2220}
2221
2222void MemoryDef::print(raw_ostream &OS) const {
2223 MemoryAccess *UO = getDefiningAccess();
2224
2225 auto printID = [&OS](MemoryAccess *A) {
2226 if (A && A->getID())
2227 OS << A->getID();
2228 else
2229 OS << LiveOnEntryStr;
2230 };
2231
2232 OS << getID() << " = MemoryDef(";
2233 printID(UO);
2234 OS << ")";
2235
2236 if (isOptimized()) {
2237 OS << "->";
2238 printID(getOptimized());
2239 }
2240}
2241
2242void MemoryPhi::print(raw_ostream &OS) const {
2243 ListSeparator LS(",");
2244 OS << getID() << " = MemoryPhi(";
2245 for (const auto &Op : operands()) {
2246 BasicBlock *BB = getIncomingBlock(U: Op);
2247 MemoryAccess *MA = cast<MemoryAccess>(Val: Op);
2248
2249 OS << LS << '{';
2250 if (BB->hasName())
2251 OS << BB->getName();
2252 else
2253 BB->printAsOperand(O&: OS, PrintType: false);
2254 OS << ',';
2255 if (unsigned ID = MA->getID())
2256 OS << ID;
2257 else
2258 OS << LiveOnEntryStr;
2259 OS << '}';
2260 }
2261 OS << ')';
2262}
2263
2264void MemoryUse::print(raw_ostream &OS) const {
2265 MemoryAccess *UO = getDefiningAccess();
2266 OS << "MemoryUse(";
2267 if (UO && UO->getID())
2268 OS << UO->getID();
2269 else
2270 OS << LiveOnEntryStr;
2271 OS << ')';
2272}
2273
2274void MemoryAccess::dump() const {
2275// Cannot completely remove virtual function even in release mode.
2276#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2277 print(dbgs());
2278 dbgs() << "\n";
2279#endif
2280}
2281
2282class DOTFuncMSSAInfo {
2283private:
2284 const Function &F;
2285 MemorySSAAnnotatedWriter MSSAWriter;
2286
2287public:
2288 DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA)
2289 : F(F), MSSAWriter(&MSSA) {}
2290
2291 const Function *getFunction() { return &F; }
2292 MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; }
2293};
2294
2295namespace llvm {
2296
2297template <>
2298struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> {
2299 static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) {
2300 return &(CFGInfo->getFunction()->getEntryBlock());
2301 }
2302
2303 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
2304 using nodes_iterator = pointer_iterator<Function::const_iterator>;
2305
2306 static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) {
2307 return nodes_iterator(CFGInfo->getFunction()->begin());
2308 }
2309
2310 static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) {
2311 return nodes_iterator(CFGInfo->getFunction()->end());
2312 }
2313
2314 static size_t size(DOTFuncMSSAInfo *CFGInfo) {
2315 return CFGInfo->getFunction()->size();
2316 }
2317};
2318
2319template <>
2320struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits {
2321
2322 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
2323
2324 static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) {
2325 return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() +
2326 "' function";
2327 }
2328
2329 std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) {
2330 return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(
2331 Node, nullptr,
2332 HandleBasicBlock: [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void {
2333 BB.print(OS, AAW: &CFGInfo->getWriter(), ShouldPreserveUseListOrder: true, IsForDebug: true);
2334 },
2335 HandleComment: [](std::string &S, unsigned &I, unsigned Idx) -> void {
2336 std::string Str = S.substr(pos: I, n: Idx - I);
2337 StringRef SR = Str;
2338 if (SR.count(Str: " = MemoryDef(") || SR.count(Str: " = MemoryPhi(") ||
2339 SR.count(Str: "MemoryUse("))
2340 return;
2341 DOTGraphTraits<DOTFuncInfo *>::eraseComment(OutStr&: S, I, Idx);
2342 });
2343 }
2344
2345 static std::string getEdgeSourceLabel(const BasicBlock *Node,
2346 const_succ_iterator I) {
2347 return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I);
2348 }
2349
2350 /// Display the raw branch weights from PGO.
2351 std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I,
2352 DOTFuncMSSAInfo *CFGInfo) {
2353 return "";
2354 }
2355
2356 std::string getNodeAttributes(const BasicBlock *Node,
2357 DOTFuncMSSAInfo *CFGInfo) {
2358 return getNodeLabel(Node, CFGInfo).find(c: ';') != std::string::npos
2359 ? "style=filled, fillcolor=lightpink"
2360 : "";
2361 }
2362};
2363
2364} // namespace llvm
2365
2366AnalysisKey MemorySSAAnalysis::Key;
2367
2368MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
2369 FunctionAnalysisManager &AM) {
2370 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2371 auto &AA = AM.getResult<AAManager>(IR&: F);
2372 return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(args&: F, args: &AA, args: &DT));
2373}
2374
2375bool MemorySSAAnalysis::Result::invalidate(
2376 Function &F, const PreservedAnalyses &PA,
2377 FunctionAnalysisManager::Invalidator &Inv) {
2378 auto PAC = PA.getChecker<MemorySSAAnalysis>();
2379 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
2380 Inv.invalidate<AAManager>(IR&: F, PA) ||
2381 Inv.invalidate<DominatorTreeAnalysis>(IR&: F, PA);
2382}
2383
2384PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
2385 FunctionAnalysisManager &AM) {
2386 auto &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2387 if (EnsureOptimizedUses)
2388 MSSA.ensureOptimizedUses();
2389 if (DotCFGMSSA != "") {
2390 DOTFuncMSSAInfo CFGInfo(F, MSSA);
2391 WriteGraph(G: &CFGInfo, Name: "", ShortNames: false, Title: "MSSA", Filename: DotCFGMSSA);
2392 } else {
2393 OS << "MemorySSA for function: " << F.getName() << "\n";
2394 MSSA.print(OS);
2395 }
2396
2397 return PreservedAnalyses::all();
2398}
2399
2400PreservedAnalyses MemorySSAWalkerPrinterPass::run(Function &F,
2401 FunctionAnalysisManager &AM) {
2402 auto &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2403 OS << "MemorySSA (walker) for function: " << F.getName() << "\n";
2404 MemorySSAWalkerAnnotatedWriter Writer(&MSSA);
2405 F.print(OS, AAW: &Writer);
2406
2407 return PreservedAnalyses::all();
2408}
2409
2410PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
2411 FunctionAnalysisManager &AM) {
2412 AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA().verifyMemorySSA();
2413
2414 return PreservedAnalyses::all();
2415}
2416
2417char MemorySSAWrapperPass::ID = 0;
2418
2419MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {}
2420
2421void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
2422
2423void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
2424 AU.setPreservesAll();
2425 AU.addRequiredTransitive<DominatorTreeWrapperPass>();
2426 AU.addRequiredTransitive<AAResultsWrapperPass>();
2427}
2428
2429bool MemorySSAWrapperPass::runOnFunction(Function &F) {
2430 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2431 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2432 MSSA.reset(p: new MemorySSA(F, &AA, &DT));
2433 return false;
2434}
2435
2436void MemorySSAWrapperPass::verifyAnalysis() const {
2437 if (VerifyMemorySSA)
2438 MSSA->verifyMemorySSA();
2439}
2440
2441void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
2442 MSSA->print(OS);
2443}
2444
2445MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
2446
2447/// Walk the use-def chains starting at \p StartingAccess and find
2448/// the MemoryAccess that actually clobbers Loc.
2449///
2450/// \returns our clobbering memory access
2451MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
2452 MemoryAccess *StartingAccess, const MemoryLocation &Loc,
2453 BatchAAResults &BAA, unsigned &UpwardWalkLimit) {
2454 assert(!isa<MemoryUse>(StartingAccess) && "Use cannot be defining access");
2455
2456 // If location is undefined, conservatively return starting access.
2457 if (Loc.Ptr == nullptr)
2458 return StartingAccess;
2459
2460 Instruction *I = nullptr;
2461 if (auto *StartingUseOrDef = dyn_cast<MemoryUseOrDef>(Val: StartingAccess)) {
2462 if (MSSA->isLiveOnEntryDef(MA: StartingUseOrDef))
2463 return StartingUseOrDef;
2464
2465 I = StartingUseOrDef->getMemoryInst();
2466
2467 // Conservatively, fences are always clobbers, so don't perform the walk if
2468 // we hit a fence.
2469 if (!isa<CallBase>(Val: I) && I->isFenceLike())
2470 return StartingUseOrDef;
2471 }
2472
2473 UpwardsMemoryQuery Q;
2474 Q.OriginalAccess = StartingAccess;
2475 Q.StartingLoc = Loc;
2476 Q.Inst = nullptr;
2477 Q.IsCall = false;
2478
2479 // Unlike the other function, do not walk to the def of a def, because we are
2480 // handed something we already believe is the clobbering access.
2481 // We never set SkipSelf to true in Q in this method.
2482 MemoryAccess *Clobber =
2483 Walker.findClobber(BAA, Start: StartingAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2484 LLVM_DEBUG({
2485 dbgs() << "Clobber starting at access " << *StartingAccess << "\n";
2486 if (I)
2487 dbgs() << " for instruction " << *I << "\n";
2488 dbgs() << " is " << *Clobber << "\n";
2489 });
2490 return Clobber;
2491}
2492
2493static const Instruction *
2494getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) {
2495 if (!I.hasMetadata(KindID: LLVMContext::MD_invariant_group) || I.isVolatile())
2496 return nullptr;
2497
2498 // We consider bitcasts and zero GEPs to be the same pointer value. Start by
2499 // stripping bitcasts and zero GEPs, then we will recursively look at loads
2500 // and stores through bitcasts and zero GEPs.
2501 Value *PointerOperand = getLoadStorePointerOperand(V: &I)->stripPointerCasts();
2502
2503 // It's not safe to walk the use list of a global value because function
2504 // passes aren't allowed to look outside their functions.
2505 // FIXME: this could be fixed by filtering instructions from outside of
2506 // current function.
2507 if (isa<Constant>(Val: PointerOperand))
2508 return nullptr;
2509
2510 const Instruction *MostDominatingInstruction = &I;
2511
2512 for (const User *Us : PointerOperand->users()) {
2513 auto *U = dyn_cast<Instruction>(Val: Us);
2514 if (!U || U == &I || !DT.dominates(Def: U, User: MostDominatingInstruction))
2515 continue;
2516
2517 // If we hit a load/store with an invariant.group metadata and the same
2518 // pointer operand, we can assume that value pointed to by the pointer
2519 // operand didn't change.
2520 if (U->hasMetadata(KindID: LLVMContext::MD_invariant_group) &&
2521 getLoadStorePointerOperand(V: U) == PointerOperand && !U->isVolatile()) {
2522 MostDominatingInstruction = U;
2523 }
2524 }
2525
2526 return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction;
2527}
2528
2529MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
2530 MemoryAccess *MA, BatchAAResults &BAA, unsigned &UpwardWalkLimit,
2531 bool SkipSelf, bool UseInvariantGroup) {
2532 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(Val: MA);
2533 // If this is a MemoryPhi, we can't do anything.
2534 if (!StartingAccess)
2535 return MA;
2536
2537 if (UseInvariantGroup) {
2538 if (auto *I = getInvariantGroupClobberingInstruction(
2539 I&: *StartingAccess->getMemoryInst(), DT&: MSSA->getDomTree())) {
2540 assert(isa<LoadInst>(I) || isa<StoreInst>(I));
2541
2542 auto *ClobberMA = MSSA->getMemoryAccess(I);
2543 assert(ClobberMA);
2544 if (isa<MemoryUse>(Val: ClobberMA))
2545 return ClobberMA->getDefiningAccess();
2546 return ClobberMA;
2547 }
2548 }
2549
2550 bool IsOptimized = false;
2551
2552 // If this is an already optimized use or def, return the optimized result.
2553 // Note: Currently, we store the optimized def result in a separate field,
2554 // since we can't use the defining access.
2555 if (StartingAccess->isOptimized()) {
2556 if (!SkipSelf || !isa<MemoryDef>(Val: StartingAccess))
2557 return StartingAccess->getOptimized();
2558 IsOptimized = true;
2559 }
2560
2561 const Instruction *I = StartingAccess->getMemoryInst();
2562 // We can't sanely do anything with a fence, since they conservatively clobber
2563 // all memory, and have no locations to get pointers from to try to
2564 // disambiguate.
2565 if (!isa<CallBase>(Val: I) && I->isFenceLike())
2566 return StartingAccess;
2567
2568 UpwardsMemoryQuery Q(I, StartingAccess);
2569
2570 if (isUseTriviallyOptimizableToLiveOnEntry(AA&: BAA, I)) {
2571 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
2572 StartingAccess->setOptimized(LiveOnEntry);
2573 return LiveOnEntry;
2574 }
2575
2576 MemoryAccess *OptimizedAccess;
2577 if (!IsOptimized) {
2578 // Start with the thing we already think clobbers this location
2579 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
2580
2581 // At this point, DefiningAccess may be the live on entry def.
2582 // If it is, we will not get a better result.
2583 if (MSSA->isLiveOnEntryDef(MA: DefiningAccess)) {
2584 StartingAccess->setOptimized(DefiningAccess);
2585 return DefiningAccess;
2586 }
2587
2588 OptimizedAccess =
2589 Walker.findClobber(BAA, Start: DefiningAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2590 StartingAccess->setOptimized(OptimizedAccess);
2591 } else
2592 OptimizedAccess = StartingAccess->getOptimized();
2593
2594 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
2595 LLVM_DEBUG(dbgs() << *StartingAccess << "\n");
2596 LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ");
2597 LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n");
2598
2599 MemoryAccess *Result;
2600 if (SkipSelf && isa<MemoryPhi>(Val: OptimizedAccess) &&
2601 isa<MemoryDef>(Val: StartingAccess) && UpwardWalkLimit) {
2602 assert(isa<MemoryDef>(Q.OriginalAccess));
2603 Q.SkipSelfAccess = true;
2604 Result = Walker.findClobber(BAA, Start: OptimizedAccess, Q, UpWalkLimit&: UpwardWalkLimit);
2605 } else
2606 Result = OptimizedAccess;
2607
2608 LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf);
2609 LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n");
2610
2611 return Result;
2612}
2613
2614MemoryAccess *
2615DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA,
2616 BatchAAResults &) {
2617 if (auto *Use = dyn_cast<MemoryUseOrDef>(Val: MA))
2618 return Use->getDefiningAccess();
2619 return MA;
2620}
2621
2622MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
2623 MemoryAccess *StartingAccess, const MemoryLocation &, BatchAAResults &) {
2624 if (auto *Use = dyn_cast<MemoryUseOrDef>(Val: StartingAccess))
2625 return Use->getDefiningAccess();
2626 return StartingAccess;
2627}
2628
2629void MemoryPhi::deleteMe(DerivedUser *Self) {
2630 delete static_cast<MemoryPhi *>(Self);
2631}
2632
2633void MemoryDef::deleteMe(DerivedUser *Self) {
2634 delete static_cast<MemoryDef *>(Self);
2635}
2636
2637void MemoryUse::deleteMe(DerivedUser *Self) {
2638 delete static_cast<MemoryUse *>(Self);
2639}
2640
2641bool upward_defs_iterator::IsGuaranteedLoopInvariant(const Value *Ptr) const {
2642 auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) {
2643 Ptr = Ptr->stripPointerCasts();
2644 if (!isa<Instruction>(Val: Ptr))
2645 return true;
2646 return isa<AllocaInst>(Val: Ptr);
2647 };
2648
2649 Ptr = Ptr->stripPointerCasts();
2650 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
2651 if (I->getParent()->isEntryBlock())
2652 return true;
2653 }
2654 if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr)) {
2655 return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
2656 GEP->hasAllConstantIndices();
2657 }
2658 return IsGuaranteedLoopInvariantBase(Ptr);
2659}
2660