MemoryDependenceAnalysis.cpp source code [llvm_projects/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp]

1	//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements an analysis that determines, for a given memory
10	// operation, what preceding memory operations it depends on. It builds on
11	// alias analysis information, and tries to provide a lazy, caching interface to
12	// a common kind of alias information query.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "llvm/Analysis/MemoryDependenceAnalysis.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallPtrSet.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/AliasAnalysis.h"
23	#include "llvm/Analysis/AssumptionCache.h"
24	#include "llvm/Analysis/Loads.h"
25	#include "llvm/Analysis/MemoryBuiltins.h"
26	#include "llvm/Analysis/MemoryLocation.h"
27	#include "llvm/Analysis/PHITransAddr.h"
28	#include "llvm/Analysis/TargetLibraryInfo.h"
29	#include "llvm/Analysis/ValueTracking.h"
30	#include "llvm/IR/BasicBlock.h"
31	#include "llvm/IR/Dominators.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/InstrTypes.h"
34	#include "llvm/IR/Instruction.h"
35	#include "llvm/IR/Instructions.h"
36	#include "llvm/IR/IntrinsicInst.h"
37	#include "llvm/IR/LLVMContext.h"
38	#include "llvm/IR/Metadata.h"
39	#include "llvm/IR/Module.h"
40	#include "llvm/IR/PredIteratorCache.h"
41	#include "llvm/IR/Type.h"
42	#include "llvm/IR/Use.h"
43	#include "llvm/IR/Value.h"
44	#include "llvm/InitializePasses.h"
45	#include "llvm/Pass.h"
46	#include "llvm/Support/AtomicOrdering.h"
47	#include "llvm/Support/Casting.h"
48	#include "llvm/Support/CommandLine.h"
49	#include "llvm/Support/Compiler.h"
50	#include "llvm/Support/Debug.h"
51	#include <algorithm>
52	#include <cassert>
53	#include <iterator>
54	#include <utility>
55
56	using namespace llvm;
57
58	#define DEBUG_TYPE "memdep"
59
60	STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
61	STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
62	STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
63
64	STATISTIC(NumCacheNonLocalPtr,
65	"Number of fully cached non-local ptr responses");
66	STATISTIC(NumCacheDirtyNonLocalPtr,
67	"Number of cached, but dirty, non-local ptr responses");
68	STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
69	STATISTIC(NumCacheCompleteNonLocalPtr,
70	"Number of block queries that were completely cached");
71
72	// Limit for the number of instructions to scan in a block.
73
74	static cl::opt<unsigned> BlockScanLimit(
75	"memdep-block-scan-limit", cl::Hidden, cl::init(Val: `100`),
76	cl::desc("The number of instructions to scan in a block in memory "
77	"dependency analysis (default = 100)"));
78
79	static cl::opt<unsigned>
80	BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(Val: `200`),
81	cl::desc("The number of blocks to scan during memory "
82	"dependency analysis (default = 200)"));
83
84	static cl::opt<unsigned> CacheGlobalLimit(
85	"memdep-cache-global-limit", cl::Hidden, cl::init(Val: `10000`),
86	cl::desc("The max number of entries allowed in a cache (default = 10000)"));
87
88	// Limit on the number of memdep results to process.
89	static const unsigned int NumResultsLimit = `100`;
90
91	/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
92	///
93	/// If the set becomes empty, remove Inst's entry.
94	template <typename KeyTy>
95	static void
96	RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, `4`>> &ReverseMap,
97	Instruction *Inst, KeyTy Val) {
98	typename DenseMap<Instruction *, SmallPtrSet<KeyTy, `4`>>::iterator InstIt =
99	ReverseMap.find(Inst);
100	assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
101	bool Found = InstIt->second.erase(Val);
102	assert(Found && "Invalid reverse map!");
103	(void)Found;
104	if (InstIt->second.empty())
105	ReverseMap.erase(InstIt);
106	}
107
108	/// If the given instruction references a specific memory location, fill in Loc
109	/// with the details, otherwise set Loc.Ptr to null.
110	///
111	/// Returns a ModRefInfo value describing the general behavior of the
112	/// instruction.
113	static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
114	const TargetLibraryInfo &TLI) {
115	if (const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
116	if (LI->isUnordered()) {
117	Loc = MemoryLocation::get(LI);
118	return ModRefInfo::Ref;
119	}
120	if (LI->getOrdering() == AtomicOrdering::Monotonic) {
121	Loc = MemoryLocation::get(LI);
122	return ModRefInfo::ModRef;
123	}
124	Loc = MemoryLocation ();
125	return ModRefInfo::ModRef;
126	}
127
128	if (const StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
129	if (SI->isUnordered()) {
130	Loc = MemoryLocation::get(SI);
131	return ModRefInfo::Mod;
132	}
133	if (SI->getOrdering() == AtomicOrdering::Monotonic) {
134	Loc = MemoryLocation::get(SI);
135	return ModRefInfo::ModRef;
136	}
137	Loc = MemoryLocation ();
138	return ModRefInfo::ModRef;
139	}
140
141	if (const VAArgInst *V = dyn_cast<VAArgInst>(Val: Inst)) {
142	Loc = MemoryLocation::get(VI: V);
143	return ModRefInfo::ModRef;
144	}
145
146	if (const CallBase *CB = dyn_cast<CallBase>(Val: Inst)) {
147	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI)) {
148	// calls to free() deallocate the entire structure
149	Loc = MemoryLocation::getAfter(Ptr: FreedOp);
150	return ModRefInfo::Mod;
151	}
152	}
153
154	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
155	switch (II->getIntrinsicID()) {
156	case Intrinsic::lifetime_start:
157	case Intrinsic::lifetime_end:
158	Loc = MemoryLocation::getForArgument(Call: II, ArgIdx: `0`, TLI);
159	// These intrinsics don't really modify the memory, but returning Mod
160	// will allow them to be handled conservatively.
161	return ModRefInfo::Mod;
162	case Intrinsic::invariant_start:
163	Loc = MemoryLocation::getForArgument(Call: II, ArgIdx: `1`, TLI);
164	// These intrinsics don't really modify the memory, but returning Mod
165	// will allow them to be handled conservatively.
166	return ModRefInfo::Mod;
167	case Intrinsic::invariant_end:
168	Loc = MemoryLocation::getForArgument(Call: II, ArgIdx: `2`, TLI);
169	// These intrinsics don't really modify the memory, but returning Mod
170	// will allow them to be handled conservatively.
171	return ModRefInfo::Mod;
172	case Intrinsic::masked_load:
173	Loc = MemoryLocation::getForArgument(Call: II, ArgIdx: `0`, TLI);
174	return ModRefInfo::Ref;
175	case Intrinsic::masked_store:
176	Loc = MemoryLocation::getForArgument(Call: II, ArgIdx: `1`, TLI);
177	return ModRefInfo::Mod;
178	default:
179	break;
180	}
181	}
182
183	// Otherwise, just do the coarse-grained thing that always works.
184	if (Inst->mayWriteToMemory())
185	return ModRefInfo::ModRef;
186	if (Inst->mayReadFromMemory())
187	return ModRefInfo::Ref;
188	return ModRefInfo::NoModRef;
189	}
190
191	/// Private helper for finding the local dependencies of a call site.
192	MemDepResult MemoryDependenceResults::getCallDependencyFrom(
193	CallBase Call, bool* isReadOnlyCall, BasicBlock::iterator ScanIt,
194	BasicBlock *BB) {
195	unsigned Limit = getDefaultBlockScanLimit();
196	bool IsInvariantLoad = Call->hasMetadata(KindID: LLVMContext::MD_invariant_load);
197
198	// Walk backwards through the block, looking for dependencies.
199	while (ScanIt != BB->begin()) {
200	Instruction Inst = &--ScanIt;
201
202	// Limit the amount of scanning we do so we don't end up with quadratic
203	// running time on extreme testcases.
204	--Limit;
205	if (!Limit)
206	return MemDepResult::getUnknown();
207
208	// If this inst is a memory op, get the pointer it accessed
209	MemoryLocation Loc;
210	ModRefInfo MR = GetLocation(Inst, Loc, TLI);
211	if (Loc.Ptr) {
212	// A simple instruction.
213	if (isModOrRefSet(MRI: AA.getModRefInfo(I: Call, OptLoc: Loc))) {
214	if (IsInvariantLoad)
215	continue;
216	return MemDepResult::getClobber(Inst);
217	}
218	continue;
219	}
220
221	if (auto *CallB = dyn_cast<CallBase>(Val: Inst)) {
222	bool IsIdenticalReadOnlyCall = isReadOnlyCall && !isModSet(MRI: MR) &&
223	Call->isIdenticalToWhenDefined(I: CallB);
224
225	// An identical earlier invariant load-like call is an available value
226	// even if AA sees both calls as reading the same memory.
227	if (IsInvariantLoad && IsIdenticalReadOnlyCall)
228	return MemDepResult::getDef(Inst);
229
230	// If these two calls do not interfere, look past it.
231	if (isNoModRef(MRI: AA.getModRefInfo(I: Call, Call: CallB))) {
232	// If the two calls are the same, return Inst as a Def, so that
233	// Call can be found redundant and eliminated.
234	if (IsIdenticalReadOnlyCall)
235	return MemDepResult::getDef(Inst);
236
237	// Otherwise if the two calls don't interact (e.g. CallB is readnone)
238	// keep scanning.
239	continue;
240	} else if (IsInvariantLoad) {
241	continue;
242	} else {
243	return MemDepResult::getClobber(Inst);
244	}
245	}
246
247	// If we could not obtain a pointer for the instruction and the instruction
248	// touches memory then assume that this is a dependency.
249	if (isModOrRefSet(MRI: MR))
250	return MemDepResult::getClobber(Inst);
251	}
252
253	// No dependence found. If this is the entry block of the function, it is
254	// unknown, otherwise it is non-local.
255	if (BB != &BB->getParent()->getEntryBlock())
256	return MemDepResult::getNonLocal();
257	return MemDepResult::getNonFuncLocal();
258	}
259
260	MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
261	const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
262	BasicBlock BB, Instruction QueryInst, unsigned *Limit,
263	BatchAAResults &BatchAA) {
264	MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
265	if (QueryInst != nullptr) {
266	if (auto *LI = dyn_cast<LoadInst>(Val: QueryInst)) {
267	InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
268
269	if (InvariantGroupDependency.isDef())
270	return InvariantGroupDependency;
271	}
272	}
273	MemDepResult SimpleDep = getSimplePointerDependencyFrom(
274	MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
275	if (SimpleDep.isDef())
276	return SimpleDep;
277	// Non-local invariant group dependency indicates there is non local Def
278	// (it only returns nonLocal if it finds nonLocal def), which is better than
279	// local clobber and everything else.
280	if (InvariantGroupDependency.isNonLocal())
281	return InvariantGroupDependency;
282
283	assert(InvariantGroupDependency.isUnknown() &&
284	"InvariantGroupDependency should be only unknown at this point");
285	return SimpleDep;
286	}
287
288	MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
289	const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
290	BasicBlock BB, Instruction QueryInst, unsigned *Limit) {
291	BatchAAResults BatchAA(AA, &EEA);
292	return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
293	BatchAA);
294	}
295
296	MemDepResult
297	MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
298	BasicBlock *BB) {
299
300	if (!LI->hasMetadata(KindID: LLVMContext::MD_invariant_group))
301	return MemDepResult::getUnknown();
302
303	// Take the ptr operand after all casts and geps 0. This way we can search
304	// cast graph down only.
305	Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
306
307	// It's is not safe to walk the use list of global value, because function
308	// passes aren't allowed to look outside their functions.
309	// FIXME: this could be fixed by filtering instructions from outside
310	// of current function.
311	if (isa<GlobalValue>(Val: LoadOperand))
312	return MemDepResult::getUnknown();
313
314	Instruction ClosestDependency = nullptr*;
315	// Order of instructions in uses list is unpredictible. In order to always
316	// get the same result, we will look for the closest dominance.
317	auto GetClosestDependency = [this](Instruction Best, Instruction Other) {
318	assert(Other && "Must call it with not null instruction");
319	if (Best == nullptr \|\| DT.dominates(Def: Best, User: Other))
320	return Other;
321	return Best;
322	};
323
324	for (const Use &Us : LoadOperand->uses()) {
325	auto *U = dyn_cast<Instruction>(Val: Us.getUser());
326	if (!U \|\| U == LI \|\| !DT.dominates(Def: U, User: LI))
327	continue;
328
329	// If we hit load/store with the same invariant.group metadata (and the
330	// same pointer operand) we can assume that value pointed by pointer
331	// operand didn't change.
332	if ((isa<LoadInst>(Val: U) \|\|
333	(isa<StoreInst>(Val: U) &&
334	cast<StoreInst>(Val: U)->getPointerOperand() == LoadOperand)) &&
335	U->hasMetadata(KindID: LLVMContext::MD_invariant_group))
336	ClosestDependency = GetClosestDependency (ClosestDependency, U);
337	}
338
339	if (!ClosestDependency)
340	return MemDepResult::getUnknown();
341	if (ClosestDependency->getParent() == BB)
342	return MemDepResult::getDef(Inst: ClosestDependency);
343	// Def(U) can't be returned here because it is non-local. If local
344	// dependency won't be found then return nonLocal counting that the
345	// user will call getNonLocalPointerDependency, which will return cached
346	// result.
347	NonLocalDefsCache.try_emplace(
348	Key: LI, Args: NonLocalDepResult (ClosestDependency->getParent(),
349	MemDepResult::getDef(Inst: ClosestDependency), nullptr));
350	ReverseNonLocalDefsCache [ClosestDependency].insert(Ptr: LI);
351	return MemDepResult::getNonLocal();
352	}
353
354	// Check if SI that may alias with MemLoc can be safely skipped. This is
355	// possible in case if SI can only must alias or no alias with MemLoc (no
356	// partial overlapping possible) and it writes the same value that MemLoc
357	// contains now (it was loaded before this store and was not modified in
358	// between).
359	static bool canSkipClobberingStore(const StoreInst *SI,
360	const MemoryLocation &MemLoc,
361	Align MemLocAlign, BatchAAResults &BatchAA,
362	unsigned ScanLimit) {
363	if (!MemLoc.Size.hasValue())
364	return false;
365	if (MemoryLocation::get(SI).Size != MemLoc.Size)
366	return false;
367	if (MemLoc.Size.isScalable())
368	return false;
369	if (std::min(a: MemLocAlign, b: SI->getAlign()).value() <
370	MemLoc.Size.getValue().getKnownMinValue())
371	return false;
372
373	auto *LI = dyn_cast<LoadInst>(Val: SI->getValueOperand());
374	if (!LI \|\| LI->getParent() != SI->getParent())
375	return false;
376	if (BatchAA.alias(LocA: MemoryLocation::get(LI), LocB: MemLoc) != AliasResult::MustAlias)
377	return false;
378	unsigned NumVisitedInsts = `0`;
379	for (const Instruction *I = LI; I != SI; I = I->getNextNode())
380	if (++NumVisitedInsts > ScanLimit \|\|
381	isModSet(MRI: BatchAA.getModRefInfo(I, OptLoc: MemLoc)))
382	return false;
383
384	return true;
385	}
386
387	MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
388	const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
389	BasicBlock BB, Instruction QueryInst, unsigned *Limit,
390	BatchAAResults &BatchAA) {
391	bool isInvariantLoad = false;
392	Align MemLocAlign =
393	MemLoc.Ptr->getPointerAlignment(DL: BB->getDataLayout());
394
395	unsigned DefaultLimit = getDefaultBlockScanLimit();
396	if (!Limit)
397	Limit = &DefaultLimit;
398
399	// We must be careful with atomic accesses, as they may allow another thread
400	// to touch this location, clobbering it. We are conservative: if the
401	// QueryInst is not a simple (non-atomic) memory access, we automatically
402	// return getClobber.
403	// If it is simple, we know based on the results of
404	// "Compiler testing via a theory of sound optimisations in the C11/C++11
405	// memory model" in PLDI 2013, that a non-atomic location can only be
406	// clobbered between a pair of a release and an acquire action, with no
407	// access to the location in between.
408	// Here is an example for giving the general intuition behind this rule.
409	// In the following code:
410	// store x 0;
411	// release action; [1]
412	// acquire action; [4]
413	// %val = load x;
414	// It is unsafe to replace %val by 0 because another thread may be running:
415	// acquire action; [2]
416	// store x 42;
417	// release action; [3]
418	// with synchronization from 1 to 2 and from 3 to 4, resulting in %val
419	// being 42. A key property of this program however is that if either
420	// 1 or 4 were missing, there would be a race between the store of 42
421	// either the store of 0 or the load (making the whole program racy).
422	// The paper mentioned above shows that the same property is respected
423	// by every program that can detect any optimization of that kind: either
424	// it is racy (undefined) or there is a release followed by an acquire
425	// between the pair of accesses under consideration.
426
427	// If the load is invariant, we "know" that it doesn't alias any* write. We*
428	// do want to respect mustalias results since defs are useful for value
429	// forwarding, but any mayalias write can be assumed to be noalias.
430	// Arguably, this logic should be pushed inside AliasAnalysis itself.
431	if (isLoad && QueryInst) {
432	isInvariantLoad = QueryInst->hasMetadata(KindID: LLVMContext::MD_invariant_load);
433	if (LoadInst *LI = dyn_cast<LoadInst>(Val: QueryInst))
434	MemLocAlign = LI->getAlign();
435	}
436
437	// True for volatile instruction.
438	// For Load/Store return true if atomic ordering is stronger than AO,
439	// for other instruction just true if it can read or write to memory.
440	auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
441	if (I->isVolatile())
442	return true;
443	if (auto *LI = dyn_cast<LoadInst>(Val: I))
444	return isStrongerThan(AO: LI->getOrdering(), Other: AO);
445	if (auto *SI = dyn_cast<StoreInst>(Val: I))
446	return isStrongerThan(AO: SI->getOrdering(), Other: AO);
447	return I->mayReadOrWriteMemory();
448	};
449
450	// Walk backwards through the basic block, looking for dependencies.
451	while (ScanIt != BB->begin()) {
452	Instruction Inst = &--ScanIt;
453
454	// Limit the amount of scanning we do so we don't end up with quadratic
455	// running time on extreme testcases.
456	--*Limit;
457	if (!*Limit)
458	return MemDepResult::getUnknown();
459
460	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
461	// If we reach a lifetime begin or end marker, then the query ends here
462	// because the value is undefined.
463	Intrinsic::ID ID = II->getIntrinsicID();
464	switch (ID) {
465	case Intrinsic::lifetime_start: {
466	MemoryLocation ArgLoc = MemoryLocation::getAfter(Ptr: II->getArgOperand(i: `0`));
467	if (BatchAA.isMustAlias(LocA: ArgLoc, LocB: MemLoc))
468	return MemDepResult::getDef(Inst: II);
469	continue;
470	}
471	case Intrinsic::masked_load:
472	case Intrinsic::masked_store: {
473	MemoryLocation Loc;
474	/ModRefInfo MR =/ GetLocation(Inst: II, Loc, TLI);
475	AliasResult R = BatchAA.alias(LocA: Loc, LocB: MemLoc);
476	if (R == AliasResult::NoAlias)
477	continue;
478	if (R == AliasResult::MustAlias)
479	return MemDepResult::getDef(Inst: II);
480	if (ID == Intrinsic::masked_load)
481	continue;
482	return MemDepResult::getClobber(Inst: II);
483	}
484	}
485	}
486
487	// Values depend on loads if the pointers are must aliased. This means
488	// that a load depends on another must aliased load from the same value.
489	// One exception is atomic loads: a value can depend on an atomic load that
490	// it does not alias with when this atomic load indicates that another
491	// thread may be accessing the location.
492	if (LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
493	// While volatile access cannot be eliminated, they do not have to clobber
494	// non-aliasing locations, as normal accesses, for example, can be safely
495	// reordered with volatile accesses.
496	if (LI->isVolatile()) {
497	if (!QueryInst)
498	// Original QueryInst may* be volatile*
499	return MemDepResult::getClobber(Inst: LI);
500	if (QueryInst->isVolatile())
501	// Ordering required if QueryInst is itself volatile
502	return MemDepResult::getClobber(Inst: LI);
503	// Otherwise, volatile doesn't imply any special ordering
504	}
505
506	// Atomic loads have complications involved.
507	// A Monotonic (or higher) load is OK if the query inst is itself not
508	// atomic.
509	// FIXME: This is overly conservative.
510	if (LI->isAtomic() && isStrongerThanUnordered(AO: LI->getOrdering())) {
511	if (!QueryInst \|\|
512	isComplexForReordering (QueryInst, AtomicOrdering::NotAtomic))
513	return MemDepResult::getClobber(Inst: LI);
514	if (LI->getOrdering() != AtomicOrdering::Monotonic)
515	return MemDepResult::getClobber(Inst: LI);
516	}
517
518	MemoryLocation LoadLoc = MemoryLocation::get(LI);
519
520	// If we found a pointer, check if it could be the same as our pointer.
521	AliasResult R = BatchAA.alias(LocA: LoadLoc, LocB: MemLoc);
522
523	if (R == AliasResult::NoAlias)
524	continue;
525
526	if (isLoad) {
527	// Must aliased loads are defs of each other.
528	if (R == AliasResult::MustAlias)
529	return MemDepResult::getDef(Inst);
530
531	// If we have a partial alias, then return this as a clobber for the
532	// client to handle.
533	if (R == AliasResult::PartialAlias && R.hasOffset()) {
534	ClobberOffsets [LI] = R.getOffset();
535	return MemDepResult::getClobber(Inst);
536	}
537
538	// Random may-alias loads don't depend on each other without a
539	// dependence.
540	continue;
541	}
542
543	// Stores don't alias loads from read-only memory.
544	if (!isModSet(MRI: BatchAA.getModRefInfoMask(Loc: LoadLoc)))
545	continue;
546
547	// Stores depend on may/must aliased loads.
548	return MemDepResult::getDef(Inst);
549	}
550
551	if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
552	// Atomic stores have complications involved.
553	// A Monotonic store is OK if the query inst is itself not atomic.
554	// FIXME: This is overly conservative.
555	if (!SI->isUnordered() && SI->isAtomic()) {
556	if (!QueryInst \|\|
557	isComplexForReordering (QueryInst, AtomicOrdering::Unordered))
558	return MemDepResult::getClobber(Inst: SI);
559	// Ok, if we are here the guard above guarantee us that
560	// QueryInst is a non-atomic or unordered load/store.
561	// SI is atomic with monotonic or release semantic (seq_cst for store
562	// is actually a release semantic plus total order over other seq_cst
563	// instructions, as soon as QueryInst is not seq_cst we can consider it
564	// as simple release semantic).
565	// Monotonic and Release semantic allows re-ordering before store
566	// so we are safe to go further and check the aliasing. It will prohibit
567	// re-ordering in case locations are may or must alias.
568	}
569
570	// While volatile access cannot be eliminated, they do not have to clobber
571	// non-aliasing locations, as normal accesses can for example be reordered
572	// with volatile accesses.
573	if (SI->isVolatile())
574	if (!QueryInst \|\| QueryInst->isVolatile())
575	return MemDepResult::getClobber(Inst: SI);
576
577	// If alias analysis can tell that this store is guaranteed to not modify
578	// the query pointer, ignore it. Use getModRefInfo to handle cases where
579	// the query pointer points to constant memory etc.
580	if (!isModOrRefSet(MRI: BatchAA.getModRefInfo(I: SI, OptLoc: MemLoc)))
581	continue;
582
583	// Ok, this store might clobber the query pointer. Check to see if it is
584	// a must alias: in this case, we want to return this as a def.
585	// FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
586	MemoryLocation StoreLoc = MemoryLocation::get(SI);
587
588	// If we found a pointer, check if it could be the same as our pointer.
589	AliasResult R = BatchAA.alias(LocA: StoreLoc, LocB: MemLoc);
590
591	if (R == AliasResult::NoAlias)
592	continue;
593	if (R == AliasResult::MustAlias)
594	return MemDepResult::getDef(Inst);
595	if (isInvariantLoad)
596	continue;
597	if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, ScanLimit: *Limit))
598	continue;
599	return MemDepResult::getClobber(Inst);
600	}
601
602	// If this is an allocation, and if we know that the accessed pointer is to
603	// the allocation, return Def. This means that there is no dependence and
604	// the access can be optimized based on that. For example, a load could
605	// turn into undef. Note that we can bypass the allocation itself when
606	// looking for a clobber in many cases; that's an alias property and is
607	// handled by BasicAA.
608	if (isa<AllocaInst>(Val: Inst) \|\| isNoAliasCall(V: Inst)) {
609	const Value *AccessPtr = getUnderlyingObject(V: MemLoc.Ptr);
610	if (AccessPtr == Inst \|\| BatchAA.isMustAlias(V1: Inst, V2: AccessPtr))
611	return MemDepResult::getDef(Inst);
612	}
613
614	// If we found a select instruction for MemLoc pointer, return it as Def
615	// dependency.
616	if (isa<SelectInst>(Val: Inst) && MemLoc.Ptr == Inst)
617	return MemDepResult::getDef(Inst);
618
619	if (isInvariantLoad)
620	continue;
621
622	// A release fence requires that all stores complete before it, but does
623	// not prevent the reordering of following loads or stores 'before' the
624	// fence. As a result, we look past it when finding a dependency for
625	// loads. DSE uses this to find preceding stores to delete and thus we
626	// can't bypass the fence if the query instruction is a store.
627	if (FenceInst *FI = dyn_cast<FenceInst>(Val: Inst))
628	if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
629	continue;
630
631	// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
632	switch (BatchAA.getModRefInfo(I: Inst, OptLoc: MemLoc)) {
633	case ModRefInfo::NoModRef:
634	// If the call has no effect on the queried pointer, just ignore it.
635	continue;
636	case ModRefInfo::Mod:
637	return MemDepResult::getClobber(Inst);
638	case ModRefInfo::Ref:
639	// If the call is known to never store to the pointer, and if this is a
640	// load query, we can safely ignore it (scan past it).
641	if (isLoad)
642	continue;
643	[[fallthrough]];
644	default:
645	// Otherwise, there is a potential dependence. Return a clobber.
646	return MemDepResult::getClobber(Inst);
647	}
648	}
649
650	// No dependence found. If this is the entry block of the function, it is
651	// unknown, otherwise it is non-local.
652	if (BB != &BB->getParent()->getEntryBlock())
653	return MemDepResult::getNonLocal();
654	return MemDepResult::getNonFuncLocal();
655	}
656
657	MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
658	ClobberOffsets.clear();
659	Instruction *ScanPos = QueryInst;
660
661	// Check for a cached result
662	MemDepResult &LocalCache = LocalDeps [QueryInst];
663
664	// If the cached entry is non-dirty, just return it. Note that this depends
665	// on MemDepResult's default constructing to 'dirty'.
666	if (!LocalCache.isDirty())
667	return LocalCache;
668
669	// Otherwise, if we have a dirty entry, we know we can start the scan at that
670	// instruction, which may save us some work.
671	if (Instruction *Inst = LocalCache.getInst()) {
672	ScanPos = Inst;
673
674	RemoveFromReverseMap(ReverseMap&: ReverseLocalDeps, Inst, Val: QueryInst);
675	}
676
677	BasicBlock *QueryParent = QueryInst->getParent();
678
679	// Do the scan.
680	if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
681	// No dependence found. If this is the entry block of the function, it is
682	// unknown, otherwise it is non-local.
683	if (QueryParent != &QueryParent->getParent()->getEntryBlock())
684	LocalCache = MemDepResult::getNonLocal();
685	else
686	LocalCache = MemDepResult::getNonFuncLocal();
687	} else {
688	MemoryLocation MemLoc;
689	ModRefInfo MR = GetLocation(Inst: QueryInst, Loc&: MemLoc, TLI);
690	if (MemLoc.Ptr) {
691	// If we can do a pointer scan, make it happen.
692	bool isLoad = !isModSet(MRI: MR);
693	if (auto *II = dyn_cast<IntrinsicInst>(Val: QueryInst))
694	isLoad \|= II->getIntrinsicID() == Intrinsic::lifetime_start;
695
696	LocalCache =
697	getPointerDependencyFrom(MemLoc, isLoad, ScanIt: ScanPos->getIterator(),
698	BB: QueryParent, QueryInst, Limit: nullptr);
699	} else if (auto *QueryCall = dyn_cast<CallBase>(Val: QueryInst)) {
700	bool isReadOnly = AA.onlyReadsMemory(Call: QueryCall);
701	LocalCache = getCallDependencyFrom(Call: QueryCall, isReadOnlyCall: isReadOnly,
702	ScanIt: ScanPos->getIterator(), BB: QueryParent);
703	} else
704	// Non-memory instruction.
705	LocalCache = MemDepResult::getUnknown();
706	}
707
708	// Remember the result!
709	if (Instruction *I = LocalCache.getInst())
710	ReverseLocalDeps [I].insert(Ptr: QueryInst);
711
712	return LocalCache;
713	}
714
715	#ifndef NDEBUG
716	/// This method is used when -debug is specified to verify that cache arrays
717	/// are properly kept sorted.
718	static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
719	int Count = -`1`) {
720	if (Count == -`1`)
721	Count = Cache.size();
722	assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
723	"Cache isn't sorted!");
724	}
725	#endif
726
727	const MemoryDependenceResults::NonLocalDepInfo &
728	MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
729	assert(getDependency(QueryCall).isNonLocal() &&
730	"getNonLocalCallDependency should only be used on calls with "
731	"non-local deps!");
732	PerInstNLInfo &CacheP = NonLocalDepsMap [QueryCall];
733	NonLocalDepInfo &Cache = CacheP.first;
734
735	// This is the set of blocks that need to be recomputed. In the cached case,
736	// this can happen due to instructions being deleted etc. In the uncached
737	// case, this starts out as the set of predecessors we care about.
738	SmallVector<BasicBlock *, `32`> DirtyBlocks;
739
740	if (!Cache.empty()) {
741	// Okay, we have a cache entry. If we know it is not dirty, just return it
742	// with no computation.
743	if (!CacheP.second) {
744	++NumCacheNonLocal;
745	return Cache;
746	}
747
748	// If we already have a partially computed set of results, scan them to
749	// determine what is dirty, seeding our initial DirtyBlocks worklist.
750	for (auto &Entry : Cache)
751	if (Entry.getResult().isDirty())
752	DirtyBlocks.push_back(Elt: Entry.getBB());
753
754	// Sort the cache so that we can do fast binary search lookups below.
755	llvm::sort(C&: Cache);
756
757	++NumCacheDirtyNonLocal;
758	} else {
759	// Seed DirtyBlocks with each of the preds of QueryInst's block.
760	BasicBlock *QueryBB = QueryCall->getParent();
761	append_range(C&: DirtyBlocks, R: PredCache.get(BB: QueryBB));
762	++NumUncacheNonLocal;
763	}
764
765	// isReadonlyCall - If this is a read-only call, we can be more aggressive.
766	bool isReadonlyCall = AA.onlyReadsMemory(Call: QueryCall);
767
768	SmallPtrSet<BasicBlock *, `32`> Visited;
769
770	unsigned NumSortedEntries = Cache.size();
771	LLVM_DEBUG(AssertSorted(Cache));
772
773	// Iterate while we still have blocks to update.
774	while (!DirtyBlocks.empty()) {
775	BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
776
777	// Already processed this block?
778	if (!Visited.insert(Ptr: DirtyBB).second)
779	continue;
780
781	// Do a binary search to see if we already have an entry for this block in
782	// the cache set. If so, find it.
783	LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
784	NonLocalDepInfo::iterator Entry =
785	std::upper_bound(first: Cache.begin(), last: Cache.begin() + NumSortedEntries,
786	val: NonLocalDepEntry (DirtyBB));
787	if (Entry != Cache.begin() && std::prev(x: Entry)->getBB() == DirtyBB)
788	--Entry;
789
790	NonLocalDepEntry ExistingResult = nullptr*;
791	if (Entry != Cache.begin() + NumSortedEntries &&
792	Entry ->getBB() == DirtyBB) {
793	// If we already have an entry, and if it isn't already dirty, the block
794	// is done.
795	if (!Entry ->getResult().isDirty())
796	continue;
797
798	// Otherwise, remember this slot so we can update the value.
799	ExistingResult = &*Entry;
800	}
801
802	// If the dirty entry has a pointer, start scanning from it so we don't have
803	// to rescan the entire block.
804	BasicBlock::iterator ScanPos = DirtyBB->end();
805	if (ExistingResult) {
806	if (Instruction *Inst = ExistingResult->getResult().getInst()) {
807	ScanPos = Inst->getIterator();
808	// We're removing QueryInst's use of Inst.
809	RemoveFromReverseMap<Instruction *>(ReverseMap&: ReverseNonLocalDeps, Inst,
810	Val: QueryCall);
811	}
812	}
813
814	// Find out if this block has a local dependency for QueryInst.
815	MemDepResult Dep;
816
817	if (ScanPos != DirtyBB->begin()) {
818	Dep = getCallDependencyFrom(Call: QueryCall, isReadOnlyCall: isReadonlyCall, ScanIt: ScanPos, BB: DirtyBB);
819	} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
820	// No dependence found. If this is the entry block of the function, it is
821	// a clobber, otherwise it is unknown.
822	Dep = MemDepResult::getNonLocal();
823	} else {
824	Dep = MemDepResult::getNonFuncLocal();
825	}
826
827	// If we had a dirty entry for the block, update it. Otherwise, just add
828	// a new entry.
829	if (ExistingResult)
830	ExistingResult->setResult(Dep);
831	else
832	Cache.push_back(x: NonLocalDepEntry (DirtyBB, Dep));
833
834	// If the block has a dependency (i.e. it isn't completely transparent to
835	// the value), remember the association!
836	if (!Dep.isNonLocal()) {
837	// Keep the ReverseNonLocalDeps map up to date so we can efficiently
838	// update this when we remove instructions.
839	if (Instruction *Inst = Dep.getInst())
840	ReverseNonLocalDeps [Inst].insert(Ptr: QueryCall);
841	} else {
842
843	// If the block is* completely transparent to the load, we need to check*
844	// the predecessors of this block. Add them to our worklist.
845	append_range(C&: DirtyBlocks, R: PredCache.get(BB: DirtyBB));
846	}
847	}
848
849	return Cache;
850	}
851
852	void MemoryDependenceResults::getNonLocalPointerDependency(
853	Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
854	const MemoryLocation Loc = MemoryLocation::get(Inst: QueryInst);
855	bool isLoad = isa<LoadInst>(Val: QueryInst);
856	BasicBlock *FromBB = QueryInst->getParent();
857	assert(FromBB);
858
859	assert(Loc.Ptr->getType()->isPointerTy() &&
860	"Can't get pointer deps of a non-pointer!");
861	Result.clear();
862	{
863	// Check if there is cached Def with invariant.group.
864	auto NonLocalDefIt = NonLocalDefsCache.find(Val: QueryInst);
865	if (NonLocalDefIt != NonLocalDefsCache.end()) {
866	Result.push_back(Elt: NonLocalDefIt ->second);
867	ReverseNonLocalDefsCache [NonLocalDefIt ->second.getResult().getInst()]
868	.erase(Ptr: QueryInst);
869	NonLocalDefsCache.erase(I: NonLocalDefIt);
870	return;
871	}
872	}
873	// This routine does not expect to deal with volatile instructions.
874	// Doing so would require piping through the QueryInst all the way through.
875	// TODO: volatiles can't be elided, but they can be reordered with other
876	// non-volatile accesses.
877
878	// We currently give up on any instruction which is ordered, but we do handle
879	// atomic instructions which are unordered.
880	// TODO: Handle ordered instructions
881	auto isOrdered = [](Instruction *Inst) {
882	if (LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
883	return !LI->isUnordered();
884	} else if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
885	return !SI->isUnordered();
886	}
887	return false;
888	};
889	if (QueryInst->isVolatile() \|\| isOrdered (QueryInst)) {
890	Result.push_back(Elt: NonLocalDepResult (FromBB, MemDepResult::getUnknown(),
891	const_cast<Value *>(Loc.Ptr)));
892	return;
893	}
894	const DataLayout &DL = FromBB->getDataLayout();
895	PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
896
897	// NonLocalPointerDepVisited is the set of blocks we've inspected, and the
898	// pointer we consider in each block. Because of critical edges, we currently
899	// bail out if querying a block with multiple different pointers. This can
900	// happen during PHI translation.
901	++NonLocalPointerDepEpoch;
902	assert(NonLocalPointerDepEpoch > `0` &&
903	"NonLocalPointerDepVisitedEpoch overflow");
904	NonLocalPointerDepVisited.resize(N: FromBB->getParent()->getMaxBlockNumber());
905	if (getNonLocalPointerDepFromBB(QueryInst, Pointer: Address, Loc, isLoad, BB: FromBB,
906	Result, SkipFirstBlock: true))
907	return;
908	Result.clear();
909	Result.push_back(Elt: NonLocalDepResult (FromBB, MemDepResult::getUnknown(),
910	const_cast<Value *>(Loc.Ptr)));
911	}
912
913	/// Compute the memdep value for BB with Pointer/PointeeSize using either
914	/// cached information in Cache or by doing a lookup (which may use dirty cache
915	/// info if available).
916	///
917	/// If we do a lookup, add the result to the cache.
918	MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
919	Instruction QueryInst, const* MemoryLocation &Loc, bool isLoad,
920	BasicBlock BB, NonLocalDepInfo Cache, unsigned NumSortedEntries,
921	BatchAAResults &BatchAA) {
922
923	bool isInvariantLoad = false;
924
925	if (QueryInst)
926	isInvariantLoad = QueryInst->hasMetadata(KindID: LLVMContext::MD_invariant_load);
927
928	// Do a binary search to see if we already have an entry for this block in
929	// the cache set. If so, find it.
930	NonLocalDepInfo::iterator Entry = std::upper_bound(
931	first: Cache->begin(), last: Cache->begin() + NumSortedEntries, val: NonLocalDepEntry (BB));
932	if (Entry != Cache->begin() && (Entry - `1`)->getBB() == BB)
933	--Entry;
934
935	NonLocalDepEntry ExistingResult = nullptr*;
936	if (Entry != Cache->begin() + NumSortedEntries && Entry ->getBB() == BB)
937	ExistingResult = &*Entry;
938
939	// Use cached result for invariant load only if there is no dependency for non
940	// invariant load. In this case invariant load can not have any dependency as
941	// well.
942	if (ExistingResult && isInvariantLoad &&
943	!ExistingResult->getResult().isNonFuncLocal())
944	ExistingResult = nullptr;
945
946	// If we have a cached entry, and it is non-dirty, use it as the value for
947	// this dependency.
948	if (ExistingResult && !ExistingResult->getResult().isDirty()) {
949	++NumCacheNonLocalPtr;
950	return ExistingResult->getResult();
951	}
952
953	// Otherwise, we have to scan for the value. If we have a dirty cache
954	// entry, start scanning from its position, otherwise we scan from the end
955	// of the block.
956	BasicBlock::iterator ScanPos = BB->end();
957	if (ExistingResult && ExistingResult->getResult().getInst()) {
958	assert(ExistingResult->getResult().getInst()->getParent() == BB &&
959	"Instruction invalidated?");
960	++NumCacheDirtyNonLocalPtr;
961	ScanPos = ExistingResult->getResult().getInst()->getIterator();
962
963	// Eliminating the dirty entry from 'Cache', so update the reverse info.
964	ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
965	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalPtrDeps, Inst: &*ScanPos, Val: CacheKey);
966	} else {
967	++NumUncacheNonLocalPtr;
968	}
969
970	// Scan the block for the dependency.
971	MemDepResult Dep = getPointerDependencyFrom(MemLoc: Loc, isLoad, ScanIt: ScanPos, BB,
972	QueryInst, Limit: nullptr, BatchAA);
973
974	// Don't cache results for invariant load.
975	if (isInvariantLoad)
976	return Dep;
977
978	// If we had a dirty entry for the block, update it. Otherwise, just add
979	// a new entry.
980	if (ExistingResult)
981	ExistingResult->setResult(Dep);
982	else
983	Cache->push_back(x: NonLocalDepEntry (BB, Dep));
984
985	// If the block has a dependency (i.e. it isn't completely transparent to
986	// the value), remember the reverse association because we just added it
987	// to Cache!
988	if (!Dep.isLocal())
989	return Dep;
990
991	// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
992	// update MemDep when we remove instructions.
993	Instruction *Inst = Dep.getInst();
994	assert(Inst && "Didn't depend on anything?");
995	ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
996	ReverseNonLocalPtrDeps [Inst].insert(Ptr: CacheKey);
997	return Dep;
998	}
999
1000	/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1001	/// array that are already properly ordered.
1002	///
1003	/// This is optimized for the case when only a few entries are added.
1004	static void
1005	SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
1006	unsigned NumSortedEntries) {
1007
1008	// If only one entry, don't sort.
1009	if (Cache.size() < `2`)
1010	return;
1011
1012	unsigned s = Cache.size() - NumSortedEntries;
1013
1014	// If the cache is already sorted, don't sort it again.
1015	if (s == `0`)
1016	return;
1017
1018	// If no entry is sorted, sort the whole cache.
1019	if (NumSortedEntries == `0`) {
1020	llvm::sort(C&: Cache);
1021	return;
1022	}
1023
1024	// If the number of unsorted entires is small and the cache size is big, using
1025	// insertion sort is faster. Here use Log2_32 to quickly choose the sort
1026	// method.
1027	if (s < Log2_32(Value: Cache.size())) {
1028	while (s > `0`) {
1029	NonLocalDepEntry Val = Cache.back();
1030	Cache.pop_back();
1031	MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1032	std::upper_bound(first: Cache.begin(), last: Cache.end() - s + `1`, val: Val);
1033	Cache.insert(position: Entry, x: Val);
1034	s--;
1035	}
1036	} else {
1037	llvm::sort(C&: Cache);
1038	}
1039	}
1040
1041	void MemoryDependenceResults::setNonLocalPointerDepVisited(BasicBlock *BB,
1042	Value *V) {
1043	NonLocalPointerDepVisited [BB->getNumber()] = {V, NonLocalPointerDepEpoch};
1044	}
1045
1046	bool MemoryDependenceResults::isNonLocalPointerDepVisited(
1047	BasicBlock BB) const* {
1048	return NonLocalPointerDepVisited [BB->getNumber()].second ==
1049	NonLocalPointerDepEpoch;
1050	}
1051
1052	Value *
1053	MemoryDependenceResults::lookupNonLocalPointerDepVisited(BasicBlock BB) const* {
1054	assert(isNonLocalPointerDepVisited(BB) &&
1055	"Visited value requested for unseen block");
1056	return NonLocalPointerDepVisited [BB->getNumber()].first;
1057	}
1058
1059	/// Perform a dependency query based on pointer/pointeesize starting at the end
1060	/// of StartBB.
1061	///
1062	/// Add any clobber/def results to the results vector and keep track of which
1063	/// blocks are visited in 'NonLocalPointerDepVisited'.
1064	///
1065	/// This has special behavior for the first block queries (when SkipFirstBlock
1066	/// is true). In this special case, it ignores the contents of the specified
1067	/// block and starts returning dependence info for its predecessors.
1068	///
1069	/// This function returns true on success, or false to indicate that it could
1070	/// not compute dependence information for some reason. This should be treated
1071	/// as a clobber dependence on the first instruction in the predecessor block.
1072	bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1073	Instruction QueryInst, const* PHITransAddr &Pointer,
1074	const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1075	SmallVectorImpl<NonLocalDepResult> &Result, bool SkipFirstBlock,
1076	bool IsIncomplete) {
1077	// Look up the cached info for Pointer.
1078	ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1079
1080	// Set up a temporary NLPI value. If the map doesn't yet have an entry for
1081	// CacheKey, this value will be inserted as the associated value. Otherwise,
1082	// it'll be ignored, and we'll have to check to see if the cached size and
1083	// aa tags are consistent with the current query.
1084	NonLocalPointerInfo InitialNLPI;
1085	InitialNLPI.Size = Loc.Size;
1086	InitialNLPI.AATags = Loc.AATags;
1087
1088	bool isInvariantLoad = false;
1089	if (QueryInst)
1090	isInvariantLoad = QueryInst->hasMetadata(KindID: LLVMContext::MD_invariant_load);
1091
1092	// Get the NLPI for CacheKey, inserting one into the map if it doesn't
1093	// already have one.
1094	std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1095	NonLocalPointerDeps.insert(KV: std::make_pair(x&: CacheKey, y&: InitialNLPI));
1096	NonLocalPointerInfo *CacheInfo = &Pair.first ->second;
1097
1098	// If we already have a cache entry for this CacheKey, we may need to do some
1099	// work to reconcile the cache entry and the current query.
1100	// Invariant loads don't participate in caching. Thus no need to reconcile.
1101	if (!isInvariantLoad && !Pair.second) {
1102	if (CacheInfo->Size != Loc.Size) {
1103	// The query's Size is not equal to the cached one. Throw out the cached
1104	// data and proceed with the query with the new size.
1105	CacheInfo->Pair = BBSkipFirstBlockPair ();
1106	CacheInfo->Size = Loc.Size;
1107	for (auto &Entry : CacheInfo->NonLocalDeps)
1108	if (Instruction *Inst = Entry.getResult().getInst())
1109	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalPtrDeps, Inst, Val: CacheKey);
1110	CacheInfo->NonLocalDeps.clear();
1111	// The cache is cleared (in the above line) so we will have lost
1112	// information about blocks we have already visited. We therefore must
1113	// assume that the cache information is incomplete.
1114	IsIncomplete = true;
1115	}
1116
1117	// If the query's AATags are inconsistent with the cached one,
1118	// conservatively throw out the cached data and restart the query with
1119	// no tag if needed.
1120	if (CacheInfo->AATags != Loc.AATags) {
1121	if (CacheInfo->AATags) {
1122	CacheInfo->Pair = BBSkipFirstBlockPair ();
1123	CacheInfo->AATags = AAMDNodes ();
1124	for (auto &Entry : CacheInfo->NonLocalDeps)
1125	if (Instruction *Inst = Entry.getResult().getInst())
1126	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalPtrDeps, Inst, Val: CacheKey);
1127	CacheInfo->NonLocalDeps.clear();
1128	// The cache is cleared (in the above line) so we will have lost
1129	// information about blocks we have already visited. We therefore must
1130	// assume that the cache information is incomplete.
1131	IsIncomplete = true;
1132	}
1133	if (Loc.AATags)
1134	return getNonLocalPointerDepFromBB(
1135	QueryInst, Pointer, Loc: Loc.getWithoutAATags(), isLoad, StartBB, Result,
1136	SkipFirstBlock, IsIncomplete);
1137	}
1138	}
1139
1140	NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1141
1142	// If we have valid cached information for exactly the block we are
1143	// investigating, just return it with no recomputation.
1144	// Don't use cached information for invariant loads since it is valid for
1145	// non-invariant loads only.
1146	if (!IsIncomplete && !isInvariantLoad &&
1147	CacheInfo->Pair == BBSkipFirstBlockPair (StartBB, SkipFirstBlock)) {
1148	// We have a fully cached result for this query then we can just return the
1149	// cached results and populate the visited set. However, we have to verify
1150	// that we don't already have conflicting results for these blocks. Check
1151	// to ensure that if a block in the results set is in the visited set that
1152	// it was for the same pointer query.
1153	for (auto &Entry : *Cache) {
1154	if (!isNonLocalPointerDepVisited(BB: Entry.getBB()))
1155	continue;
1156	Value *Prev = lookupNonLocalPointerDepVisited(BB: Entry.getBB());
1157	if (Prev == Pointer.getAddr())
1158	continue;
1159
1160	// We have a pointer mismatch in a block. Just return false, saying
1161	// that something was clobbered in this result. We could also do a
1162	// non-fully cached query, but there is little point in doing this.
1163	return false;
1164	}
1165
1166	Value *Addr = Pointer.getAddr();
1167	for (auto &Entry : *Cache) {
1168	setNonLocalPointerDepVisited(BB: Entry.getBB(), V: Addr);
1169	if (Entry.getResult().isNonLocal()) {
1170	continue;
1171	}
1172
1173	if (DT.isReachableFromEntry(A: Entry.getBB())) {
1174	Result.push_back(
1175	Elt: NonLocalDepResult (Entry.getBB(), Entry.getResult(), Addr));
1176	}
1177	}
1178	++NumCacheCompleteNonLocalPtr;
1179	return true;
1180	}
1181
1182	// If the size of this cache has surpassed the global limit, stop here.
1183	if (Cache->size() > CacheGlobalLimit)
1184	return false;
1185
1186	// Otherwise, either this is a new block, a block with an invalid cache
1187	// pointer or one that we're about to invalidate by putting more info into
1188	// it than its valid cache info. If empty and not explicitly indicated as
1189	// incomplete, the result will be valid cache info, otherwise it isn't.
1190	//
1191	// Invariant loads don't affect cache in any way thus no need to update
1192	// CacheInfo as well.
1193	if (!isInvariantLoad) {
1194	if (!IsIncomplete && Cache->empty())
1195	CacheInfo->Pair = BBSkipFirstBlockPair (StartBB, SkipFirstBlock);
1196	else
1197	CacheInfo->Pair = BBSkipFirstBlockPair ();
1198	}
1199
1200	SmallVector<BasicBlock *, `32`> Worklist;
1201	Worklist.push_back(Elt: StartBB);
1202
1203	// PredList used inside loop.
1204	SmallVector<std::pair<BasicBlock *, PHITransAddr>, `16`> PredList;
1205
1206	// Keep track of the entries that we know are sorted. Previously cached
1207	// entries will all be sorted. The entries we add we only sort on demand (we
1208	// don't insert every element into its sorted position). We know that we
1209	// won't get any reuse from currently inserted values, because we don't
1210	// revisit blocks after we insert info for them.
1211	unsigned NumSortedEntries = Cache->size();
1212	unsigned WorklistEntries = BlockNumberLimit;
1213	bool GotWorklistLimit = false;
1214	LLVM_DEBUG(AssertSorted(*Cache));
1215
1216	BatchAAResults BatchAA(AA, &EEA);
1217	while (!Worklist.empty()) {
1218	BasicBlock *BB = Worklist.pop_back_val();
1219
1220	// If we do process a large number of blocks it becomes very expensive and
1221	// likely it isn't worth worrying about
1222	if (Result.size() > NumResultsLimit) {
1223	// Sort it now (if needed) so that recursive invocations of
1224	// getNonLocalPointerDepFromBB and other routines that could reuse the
1225	// cache value will only see properly sorted cache arrays.
1226	if (Cache && NumSortedEntries != Cache->size()) {
1227	SortNonLocalDepInfoCache(Cache&: *Cache, NumSortedEntries);
1228	}
1229	// Since we bail out, the "Cache" set won't contain all of the
1230	// results for the query. This is ok (we can still use it to accelerate
1231	// specific block queries) but we can't do the fastpath "return all
1232	// results from the set". Clear out the indicator for this.
1233	CacheInfo->Pair = BBSkipFirstBlockPair ();
1234	return false;
1235	}
1236
1237	// Skip the first block if we have it.
1238	if (!SkipFirstBlock) {
1239	// Analyze the dependency of Pointer in FromBB. See if we already have*
1240	// been here.
1241	assert(isNonLocalPointerDepVisited(BB) &&
1242	"Should check 'visited' before adding to WL");
1243
1244	// Get the dependency info for Pointer in BB. If we have cached
1245	// information, we will use it, otherwise we compute it.
1246	LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1247	MemDepResult Dep = getNonLocalInfoForBlock(
1248	QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1249
1250	// If we got a Def or Clobber, add this to the list of results.
1251	if (!Dep.isNonLocal()) {
1252	if (DT.isReachableFromEntry(A: BB)) {
1253	Result.push_back(Elt: NonLocalDepResult (BB, Dep, Pointer.getAddr()));
1254	continue;
1255	}
1256	}
1257	}
1258
1259	// If 'Pointer' is an instruction defined in this block, then we need to do
1260	// phi translation to change it into a value live in the predecessor block.
1261	// If not, we just add the predecessors to the worklist and scan them with
1262	// the same Pointer.
1263	if (!Pointer.needsPHITranslationFromBlock(BB)) {
1264	SkipFirstBlock = false;
1265	SmallVector<BasicBlock *, `16`> NewBlocks;
1266	for (BasicBlock *Pred : PredCache.get(BB)) {
1267	// Verify that we haven't looked at this block yet.
1268	if (!isNonLocalPointerDepVisited(BB: Pred)) {
1269	setNonLocalPointerDepVisited(BB: Pred, V: Pointer.getAddr());
1270	// First time we've looked at PI.*
1271	NewBlocks.push_back(Elt: Pred);
1272	continue;
1273	}
1274	Value *Prev = lookupNonLocalPointerDepVisited(BB: Pred);
1275	// If we have seen this block before, but it was with a different
1276	// pointer then we have a phi translation failure and we have to treat
1277	// this as a clobber.
1278	if (Prev != Pointer.getAddr()) {
1279	// Make sure to clean up the Visited map before continuing on to
1280	// PredTranslationFailure.
1281	for (auto *NewBlock : NewBlocks)
1282	setNonLocalPointerDepVisited(BB: NewBlock, V: nullptr);
1283	goto PredTranslationFailure;
1284	}
1285	}
1286	if (NewBlocks.size() > WorklistEntries) {
1287	// Make sure to clean up the Visited map before continuing on to
1288	// PredTranslationFailure.
1289	for (auto *NewBlock : NewBlocks)
1290	setNonLocalPointerDepVisited(BB: NewBlock, V: nullptr);
1291	GotWorklistLimit = true;
1292	goto PredTranslationFailure;
1293	}
1294	WorklistEntries -= NewBlocks.size();
1295	Worklist.append(in_start: NewBlocks.begin(), in_end: NewBlocks.end());
1296	continue;
1297	}
1298
1299	// We do need to do phi translation, if we know ahead of time we can't phi
1300	// translate this value, don't even try.
1301	if (!Pointer.isPotentiallyPHITranslatable())
1302	goto PredTranslationFailure;
1303
1304	// We may have added values to the cache list before this PHI translation.
1305	// If so, we haven't done anything to ensure that the cache remains sorted.
1306	// Sort it now (if needed) so that recursive invocations of
1307	// getNonLocalPointerDepFromBB and other routines that could reuse the cache
1308	// value will only see properly sorted cache arrays.
1309	if (Cache && NumSortedEntries != Cache->size()) {
1310	SortNonLocalDepInfoCache(Cache&: *Cache, NumSortedEntries);
1311	NumSortedEntries = Cache->size();
1312	}
1313	Cache = nullptr;
1314
1315	PredList.clear();
1316	for (BasicBlock *Pred : PredCache.get(BB)) {
1317	PredList.push_back(Elt: std::make_pair(x&: Pred, y: Pointer));
1318
1319	// Get the PHI translated pointer in this predecessor. This can fail if
1320	// not translatable, in which case the getAddr() returns null.
1321	PHITransAddr &PredPointer = PredList.back().second;
1322	Value *PredPtrVal =
1323	PredPointer.translateValue(CurBB: BB, PredBB: Pred, DT: &DT, /MustDominate=/false);
1324
1325	// Check to see if we have already visited this pred block with another
1326	// pointer. If so, we can't do this lookup. This failure can occur
1327	// with PHI translation when a critical edge exists and the PHI node in
1328	// the successor translates to a pointer value different than the
1329	// pointer the block was first analyzed with.
1330	if (!isNonLocalPointerDepVisited(BB: Pred)) {
1331	setNonLocalPointerDepVisited(BB: Pred, V: PredPtrVal);
1332	continue;
1333	}
1334	Value *PrevVal = lookupNonLocalPointerDepVisited(BB: Pred);
1335
1336	// We found the pred; take it off the list of preds to visit.
1337	PredList.pop_back();
1338
1339	// If the predecessor was visited with PredPtr, then we already did
1340	// the analysis and can ignore it.
1341	if (PrevVal == PredPtrVal)
1342	continue;
1343
1344	// Otherwise, the block was previously analyzed with a different
1345	// pointer. We can't represent the result of this case, so we just
1346	// treat this as a phi translation failure.
1347
1348	// Make sure to clean up the Visited map before continuing on to
1349	// PredTranslationFailure.
1350	for (const auto &Pred : PredList)
1351	setNonLocalPointerDepVisited(BB: Pred.first, V: nullptr);
1352
1353	goto PredTranslationFailure;
1354	}
1355
1356	// Actually process results here; this need to be a separate loop to avoid
1357	// calling getNonLocalPointerDepFromBB for blocks we don't want to return
1358	// any results for. (getNonLocalPointerDepFromBB will modify our
1359	// datastructures in ways the code after the PredTranslationFailure label
1360	// doesn't expect.)
1361	for (auto &I : PredList) {
1362	BasicBlock *Pred = I.first;
1363	PHITransAddr &PredPointer = I.second;
1364	Value *PredPtrVal = PredPointer.getAddr();
1365
1366	bool CanTranslate = true;
1367	// If PHI translation was unable to find an available pointer in this
1368	// predecessor, then we have to assume that the pointer is clobbered in
1369	// that predecessor. We can still do PRE of the load, which would insert
1370	// a computation of the pointer in this predecessor.
1371	if (!PredPtrVal) {
1372	// If translation failed but the (partially) translated address
1373	// expression depends on a select instruction, try to translate both
1374	// sides of that select. The select condition is recovered from the
1375	// failed `PredPointer` (the phi has already been resolved to the
1376	// select there), but the two sides must be translated from the
1377	// original, untranslated `Pointer`.
1378	if (Value *Cond = PredPointer.getSelectCondition()) {
1379	SelectAddr::SelectAddrs SelAddrs =
1380	PHITransAddr (Pointer).translateValue(CurBB: BB, PredBB: Pred, DT: &DT, Cond);
1381	if (SelAddrs.first && SelAddrs.second) {
1382	Result.push_back(Elt: NonLocalDepResult (Pred, MemDepResult::getSelect(),
1383	SelectAddr (Cond, SelAddrs)));
1384	NonLocalPointerInfo &NLPI = NonLocalPointerDeps [CacheKey];
1385	NLPI.Pair = BBSkipFirstBlockPair ();
1386	continue;
1387	}
1388	}
1389	CanTranslate = false;
1390	}
1391
1392	// FIXME: it is entirely possible that PHI translating will end up with
1393	// the same value. Consider PHI translating something like:
1394	// X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't need
1395	// to recurse here, pedantically speaking.
1396
1397	// If getNonLocalPointerDepFromBB fails here, that means the cached
1398	// result conflicted with the Visited list; we have to conservatively
1399	// assume it is unknown, but this also does not block PRE of the load.
1400	if (!CanTranslate \|\|
1401	!getNonLocalPointerDepFromBB(QueryInst, Pointer: PredPointer,
1402	Loc: Loc.getWithNewPtr(NewPtr: PredPtrVal), isLoad,
1403	StartBB: Pred, Result)) {
1404	// Add the entry to the Result list.
1405	NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1406	Result.push_back(Elt: Entry);
1407
1408	// Since we had a phi translation failure, the cache for CacheKey won't
1409	// include all of the entries that we need to immediately satisfy future
1410	// queries. Mark this in NonLocalPointerDeps by setting the
1411	// BBSkipFirstBlockPair pointer to null. This requires reuse of the
1412	// cached value to do more work but not miss the phi trans failure.
1413	NonLocalPointerInfo &NLPI = NonLocalPointerDeps [CacheKey];
1414	NLPI.Pair = BBSkipFirstBlockPair ();
1415	continue;
1416	}
1417	}
1418
1419	// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1420	CacheInfo = &NonLocalPointerDeps [CacheKey];
1421	Cache = &CacheInfo->NonLocalDeps;
1422	NumSortedEntries = Cache->size();
1423
1424	// Since we did phi translation, the "Cache" set won't contain all of the
1425	// results for the query. This is ok (we can still use it to accelerate
1426	// specific block queries) but we can't do the fastpath "return all
1427	// results from the set" Clear out the indicator for this.
1428	CacheInfo->Pair = BBSkipFirstBlockPair ();
1429	SkipFirstBlock = false;
1430	continue;
1431
1432	PredTranslationFailure:
1433	// The following code is "failure"; we can't produce a sane translation
1434	// for the given block. It assumes that we haven't modified any of
1435	// our datastructures while processing the current block.
1436
1437	if (!Cache) {
1438	// Refresh the CacheInfo/Cache pointer if it got invalidated.
1439	CacheInfo = &NonLocalPointerDeps [CacheKey];
1440	Cache = &CacheInfo->NonLocalDeps;
1441	NumSortedEntries = Cache->size();
1442	}
1443
1444	// Since we failed phi translation, the "Cache" set won't contain all of the
1445	// results for the query. This is ok (we can still use it to accelerate
1446	// specific block queries) but we can't do the fastpath "return all
1447	// results from the set". Clear out the indicator for this.
1448	CacheInfo->Pair = BBSkipFirstBlockPair ();
1449
1450	// If nothing* works, mark the pointer as unknown.*
1451	//
1452	// If this is the magic first block, return this as a clobber of the whole
1453	// incoming value. Since we can't phi translate to one of the predecessors,
1454	// we have to bail out.
1455	if (SkipFirstBlock)
1456	return false;
1457
1458	// Results of invariant loads are not cached thus no need to update cached
1459	// information.
1460	if (!isInvariantLoad) {
1461	for (NonLocalDepEntry &I : llvm::reverse(C&: *Cache)) {
1462	if (I.getBB() != BB)
1463	continue;
1464
1465	assert((GotWorklistLimit \|\| I.getResult().isNonLocal() \|\|
1466	!DT.isReachableFromEntry(BB)) &&
1467	"Should only be here with transparent block");
1468
1469	I.setResult(MemDepResult::getUnknown());
1470
1471
1472	break;
1473	}
1474	}
1475	(void)GotWorklistLimit;
1476	// Go ahead and report unknown dependence.
1477	Result.push_back(
1478	Elt: NonLocalDepResult (BB, MemDepResult::getUnknown(), Pointer.getAddr()));
1479	}
1480
1481	// Okay, we're done now. If we added new values to the cache, re-sort it.
1482	SortNonLocalDepInfoCache(Cache&: *Cache, NumSortedEntries);
1483	LLVM_DEBUG(AssertSorted(*Cache));
1484	return true;
1485	}
1486
1487	/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1488	void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1489	ValueIsLoadPair P) {
1490
1491	// Most of the time this cache is empty.
1492	if (!NonLocalDefsCache.empty()) {
1493	auto it = NonLocalDefsCache.find(Val: P.getPointer());
1494	if (it != NonLocalDefsCache.end()) {
1495	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalDefsCache,
1496	Inst: it ->second.getResult().getInst(), Val: P.getPointer());
1497	NonLocalDefsCache.erase(I: it);
1498	}
1499
1500	if (auto *I = dyn_cast<Instruction>(Val: P.getPointer())) {
1501	auto toRemoveIt = ReverseNonLocalDefsCache.find(Val: I);
1502	if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1503	for (const auto *entry : toRemoveIt ->second)
1504	NonLocalDefsCache.erase(Val: entry);
1505	ReverseNonLocalDefsCache.erase(I: toRemoveIt);
1506	}
1507	}
1508	}
1509
1510	CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(Val: P);
1511	if (It == NonLocalPointerDeps.end())
1512	return;
1513
1514	// Remove all of the entries in the BB->val map. This involves removing
1515	// instructions from the reverse map.
1516	NonLocalDepInfo &PInfo = It ->second.NonLocalDeps;
1517
1518	for (const NonLocalDepEntry &DE : PInfo) {
1519	Instruction *Target = DE.getResult().getInst();
1520	if (!Target)
1521	continue; // Ignore non-local dep results.
1522	assert(Target->getParent() == DE.getBB());
1523
1524	// Eliminating the dirty entry from 'Cache', so update the reverse info.
1525	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalPtrDeps, Inst: Target, Val: P);
1526	}
1527
1528	// Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1529	NonLocalPointerDeps.erase(I: It);
1530	}
1531
1532	void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
1533	// If Ptr isn't really a pointer, just ignore it.
1534	if (!Ptr->getType()->isPointerTy())
1535	return;
1536	// Flush store info for the pointer.
1537	removeCachedNonLocalPointerDependencies(P: ValueIsLoadPair (Ptr, false));
1538	// Flush load info for the pointer.
1539	removeCachedNonLocalPointerDependencies(P: ValueIsLoadPair (Ptr, true));
1540	}
1541
1542	void MemoryDependenceResults::invalidateCachedPredecessors() {
1543	PredCache.clear();
1544	}
1545
1546	void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
1547	EEA.removeInstruction(I: RemInst);
1548
1549	// Walk through the Non-local dependencies, removing this one as the value
1550	// for any cached queries.
1551	NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(Val: RemInst);
1552	if (NLDI != NonLocalDepsMap.end()) {
1553	NonLocalDepInfo &BlockMap = NLDI ->second.first;
1554	for (auto &Entry : BlockMap)
1555	if (Instruction *Inst = Entry.getResult().getInst())
1556	RemoveFromReverseMap(ReverseMap&: ReverseNonLocalDeps, Inst, Val: RemInst);
1557	NonLocalDepsMap.erase(I: NLDI);
1558	}
1559
1560	// If we have a cached local dependence query for this instruction, remove it.
1561	LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(Val: RemInst);
1562	if (LocalDepEntry != LocalDeps.end()) {
1563	// Remove us from DepInst's reverse set now that the local dep info is gone.
1564	if (Instruction *Inst = LocalDepEntry ->second.getInst())
1565	RemoveFromReverseMap(ReverseMap&: ReverseLocalDeps, Inst, Val: RemInst);
1566
1567	// Remove this local dependency info.
1568	LocalDeps.erase(I: LocalDepEntry);
1569	}
1570
1571	// If we have any cached dependencies on this instruction, remove
1572	// them.
1573
1574	// If the instruction is a pointer, remove it from both the load info and the
1575	// store info.
1576	if (RemInst->getType()->isPointerTy()) {
1577	removeCachedNonLocalPointerDependencies(P: ValueIsLoadPair (RemInst, false));
1578	removeCachedNonLocalPointerDependencies(P: ValueIsLoadPair (RemInst, true));
1579	} else {
1580	// Otherwise, if the instructions is in the map directly, it must be a load.
1581	// Remove it.
1582	auto toRemoveIt = NonLocalDefsCache.find(Val: RemInst);
1583	if (toRemoveIt != NonLocalDefsCache.end()) {
1584	assert(isa<LoadInst>(RemInst) &&
1585	"only load instructions should be added directly");
1586	const Instruction *DepV = toRemoveIt ->second.getResult().getInst();
1587	ReverseNonLocalDefsCache.find(Val: DepV)->second.erase(Ptr: RemInst);
1588	NonLocalDefsCache.erase(I: toRemoveIt);
1589	}
1590	}
1591
1592	// Loop over all of the things that depend on the instruction we're removing.
1593	SmallVector<std::pair<Instruction , Instruction >, `8`> ReverseDepsToAdd;
1594
1595	// If we find RemInst as a clobber or Def in any of the maps for other values,
1596	// we need to replace its entry with a dirty version of the instruction after
1597	// it. If RemInst is a terminator, we use a null dirty value.
1598	//
1599	// Using a dirty version of the instruction after RemInst saves having to scan
1600	// the entire block to get to this point.
1601	MemDepResult NewDirtyVal;
1602	if (!RemInst->isTerminator())
1603	NewDirtyVal = MemDepResult::getDirty(Inst: &*++RemInst->getIterator());
1604
1605	ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(Val: RemInst);
1606	if (ReverseDepIt != ReverseLocalDeps.end()) {
1607	// RemInst can't be the terminator if it has local stuff depending on it.
1608	assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1609	"Nothing can locally depend on a terminator");
1610
1611	for (Instruction *InstDependingOnRemInst : ReverseDepIt ->second) {
1612	assert(InstDependingOnRemInst != RemInst &&
1613	"Already removed our local dep info");
1614
1615	LocalDeps [InstDependingOnRemInst] = NewDirtyVal;
1616
1617	// Make sure to remember that new things depend on NewDepInst.
1618	assert(NewDirtyVal.getInst() &&
1619	"There is no way something else can have "
1620	"a local dep on this if it is a terminator!");
1621	ReverseDepsToAdd.push_back(
1622	Elt: std::make_pair(x: NewDirtyVal.getInst(), y&: InstDependingOnRemInst));
1623	}
1624
1625	ReverseLocalDeps.erase(I: ReverseDepIt);
1626
1627	// Add new reverse deps after scanning the set, to avoid invalidating the
1628	// 'ReverseDeps' reference.
1629	while (!ReverseDepsToAdd.empty()) {
1630	ReverseLocalDeps [ReverseDepsToAdd.back().first].insert(
1631	Ptr: ReverseDepsToAdd.back().second);
1632	ReverseDepsToAdd.pop_back();
1633	}
1634	}
1635
1636	ReverseDepIt = ReverseNonLocalDeps.find(Val: RemInst);
1637	if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1638	for (Instruction *I : ReverseDepIt ->second) {
1639	assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1640
1641	PerInstNLInfo &INLD = NonLocalDepsMap [I];
1642	// The information is now dirty!
1643	INLD.second = true;
1644
1645	for (auto &Entry : INLD.first) {
1646	if (Entry.getResult().getInst() != RemInst)
1647	continue;
1648
1649	// Convert to a dirty entry for the subsequent instruction.
1650	Entry.setResult(NewDirtyVal);
1651
1652	if (Instruction *NextI = NewDirtyVal.getInst())
1653	ReverseDepsToAdd.push_back(Elt: std::make_pair(x&: NextI, y&: I));
1654	}
1655	}
1656
1657	ReverseNonLocalDeps.erase(I: ReverseDepIt);
1658
1659	// Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1660	while (!ReverseDepsToAdd.empty()) {
1661	ReverseNonLocalDeps [ReverseDepsToAdd.back().first].insert(
1662	Ptr: ReverseDepsToAdd.back().second);
1663	ReverseDepsToAdd.pop_back();
1664	}
1665	}
1666
1667	// If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1668	// value in the NonLocalPointerDeps info.
1669	ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1670	ReverseNonLocalPtrDeps.find(Val: RemInst);
1671	if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1672	SmallVector<std::pair<Instruction *, ValueIsLoadPair>, `8`>
1673	ReversePtrDepsToAdd;
1674
1675	for (ValueIsLoadPair P : ReversePtrDepIt ->second) {
1676	assert(P.getPointer() != RemInst &&
1677	"Already removed NonLocalPointerDeps info for RemInst");
1678
1679	auto &NLPD = NonLocalPointerDeps [P];
1680
1681	NonLocalDepInfo &NLPDI = NLPD.NonLocalDeps;
1682
1683	// The cache is not valid for any specific block anymore.
1684	NLPD.Pair = BBSkipFirstBlockPair ();
1685
1686	// Update any entries for RemInst to use the instruction after it.
1687	for (auto &Entry : NLPDI) {
1688	if (Entry.getResult().getInst() != RemInst)
1689	continue;
1690
1691	// Convert to a dirty entry for the subsequent instruction.
1692	Entry.setResult(NewDirtyVal);
1693
1694	if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1695	ReversePtrDepsToAdd.push_back(Elt: std::make_pair(x&: NewDirtyInst, y&: P));
1696	}
1697
1698	// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1699	// subsequent value may invalidate the sortedness.
1700	llvm::sort(C&: NLPDI);
1701	}
1702
1703	ReverseNonLocalPtrDeps.erase(I: ReversePtrDepIt);
1704
1705	while (!ReversePtrDepsToAdd.empty()) {
1706	ReverseNonLocalPtrDeps [ReversePtrDepsToAdd.back().first].insert(
1707	Ptr: ReversePtrDepsToAdd.back().second);
1708	ReversePtrDepsToAdd.pop_back();
1709	}
1710	}
1711
1712	assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1713	LLVM_DEBUG(verifyRemoved(RemInst));
1714	}
1715
1716	/// Verify that the specified instruction does not occur in our internal data
1717	/// structures.
1718	///
1719	/// This function verifies by asserting in debug builds.
1720	void MemoryDependenceResults::verifyRemoved(Instruction D) const* {
1721	#ifndef NDEBUG
1722	for (const auto &DepKV : LocalDeps) {
1723	assert(DepKV.first != D && "Inst occurs in data structures");
1724	assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1725	}
1726
1727	for (const auto &DepKV : NonLocalPointerDeps) {
1728	assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1729	for (const auto &Entry : DepKV.second.NonLocalDeps)
1730	assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1731	}
1732
1733	for (const auto &DepKV : NonLocalDepsMap) {
1734	assert(DepKV.first != D && "Inst occurs in data structures");
1735	const PerInstNLInfo &INLD = DepKV.second;
1736	for (const auto &Entry : INLD.first)
1737	assert(Entry.getResult().getInst() != D &&
1738	"Inst occurs in data structures");
1739	}
1740
1741	for (const auto &DepKV : ReverseLocalDeps) {
1742	assert(DepKV.first != D && "Inst occurs in data structures");
1743	for (Instruction *Inst : DepKV.second)
1744	assert(Inst != D && "Inst occurs in data structures");
1745	}
1746
1747	for (const auto &DepKV : ReverseNonLocalDeps) {
1748	assert(DepKV.first != D && "Inst occurs in data structures");
1749	for (Instruction *Inst : DepKV.second)
1750	assert(Inst != D && "Inst occurs in data structures");
1751	}
1752
1753	for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1754	assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1755
1756	for (ValueIsLoadPair P : DepKV.second)
1757	assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1758	"Inst occurs in ReverseNonLocalPtrDeps map");
1759	}
1760	#endif
1761	}
1762
1763	AnalysisKey MemoryDependenceAnalysis::Key;
1764
1765	MemoryDependenceAnalysis::MemoryDependenceAnalysis()
1766	: DefaultBlockScanLimit(BlockScanLimit) {}
1767
1768	MemoryDependenceResults
1769	MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
1770	auto &AA = AM.getResult<AAManager>(IR&: F);
1771	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1772	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
1773	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1774	return MemoryDependenceResults (AA, AC, TLI, DT, DefaultBlockScanLimit);
1775	}
1776
1777	char MemoryDependenceWrapperPass::ID = `0`;
1778
1779	INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
1780	"Memory Dependence Analysis", false, true)
1781	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1782	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1783	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1784	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1785	INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
1786	"Memory Dependence Analysis", false, true)
1787
1788	MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass (ID) {}
1789
1790	MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
1791
1792	void MemoryDependenceWrapperPass::releaseMemory() {
1793	MemDep.reset();
1794	}
1795
1796	void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1797	AU.setPreservesAll();
1798	AU.addRequired<AssumptionCacheTracker>();
1799	AU.addRequired<DominatorTreeWrapperPass>();
1800	AU.addRequiredTransitive<AAResultsWrapperPass>();
1801	AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
1802	}
1803
1804	bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
1805	FunctionAnalysisManager::Invalidator &Inv) {
1806	// Check whether our analysis is preserved.
1807	auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1808	if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1809	// If not, give up now.
1810	return true;
1811
1812	// Check whether the analyses we depend on became invalid for any reason.
1813	if (Inv.invalidate<AAManager>(IR&: F, PA) \|\|
1814	Inv.invalidate<AssumptionAnalysis>(IR&: F, PA) \|\|
1815	Inv.invalidate<DominatorTreeAnalysis>(IR&: F, PA))
1816	return true;
1817
1818	// Otherwise this analysis result remains valid.
1819	return false;
1820	}
1821
1822	unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
1823	return DefaultBlockScanLimit;
1824	}
1825
1826	bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
1827	auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1828	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1829	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1830	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1831	MemDep.emplace(args&: AA, args&: AC, args&: TLI, args&: DT, args&: BlockScanLimit);
1832	return false;
1833	}
1834

Browse the source code of llvm_projects/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp