DeadStoreElimination.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp]

1	//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The code below implements dead store elimination using MemorySSA. It uses
10	// the following general approach: given a MemoryDef, walk upwards to find
11	// clobbering MemoryDefs that may be killed by the starting def. Then check
12	// that there are no uses that may read the location of the original MemoryDef
13	// in between both MemoryDefs. A bit more concretely:
14	//
15	// For all MemoryDefs StartDef:
16	// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17	// upwards.
18	// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19	// checking all uses starting at MaybeDeadAccess and walking until we see
20	// StartDef.
21	// 3. For each found CurrentDef, check that:
22	// 1. There are no barrier instructions between CurrentDef and StartDef (like
23	// throws or stores with ordering constraints).
24	// 2. StartDef is executed whenever CurrentDef is executed.
25	// 3. StartDef completely overwrites CurrentDef.
26	// 4. Erase CurrentDef from the function and MemorySSA.
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31	#include "llvm/ADT/APInt.h"
32	#include "llvm/ADT/DenseMap.h"
33	#include "llvm/ADT/MapVector.h"
34	#include "llvm/ADT/PostOrderIterator.h"
35	#include "llvm/ADT/SetVector.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/StringRef.h"
40	#include "llvm/Analysis/AliasAnalysis.h"
41	#include "llvm/Analysis/CaptureTracking.h"
42	#include "llvm/Analysis/GlobalsModRef.h"
43	#include "llvm/Analysis/LoopInfo.h"
44	#include "llvm/Analysis/MemoryBuiltins.h"
45	#include "llvm/Analysis/MemoryLocation.h"
46	#include "llvm/Analysis/MemorySSA.h"
47	#include "llvm/Analysis/MemorySSAUpdater.h"
48	#include "llvm/Analysis/MustExecute.h"
49	#include "llvm/Analysis/PostDominators.h"
50	#include "llvm/Analysis/TargetLibraryInfo.h"
51	#include "llvm/Analysis/ValueTracking.h"
52	#include "llvm/IR/Argument.h"
53	#include "llvm/IR/BasicBlock.h"
54	#include "llvm/IR/Constant.h"
55	#include "llvm/IR/Constants.h"
56	#include "llvm/IR/DataLayout.h"
57	#include "llvm/IR/DebugInfo.h"
58	#include "llvm/IR/Dominators.h"
59	#include "llvm/IR/Function.h"
60	#include "llvm/IR/IRBuilder.h"
61	#include "llvm/IR/InstIterator.h"
62	#include "llvm/IR/InstrTypes.h"
63	#include "llvm/IR/Instruction.h"
64	#include "llvm/IR/Instructions.h"
65	#include "llvm/IR/IntrinsicInst.h"
66	#include "llvm/IR/Module.h"
67	#include "llvm/IR/PassManager.h"
68	#include "llvm/IR/PatternMatch.h"
69	#include "llvm/IR/Value.h"
70	#include "llvm/Support/Casting.h"
71	#include "llvm/Support/CommandLine.h"
72	#include "llvm/Support/Debug.h"
73	#include "llvm/Support/DebugCounter.h"
74	#include "llvm/Support/ErrorHandling.h"
75	#include "llvm/Support/raw_ostream.h"
76	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
77	#include "llvm/Transforms/Utils/BuildLibCalls.h"
78	#include "llvm/Transforms/Utils/Local.h"
79	#include <algorithm>
80	#include <cassert>
81	#include <cstdint>
82	#include <iterator>
83	#include <map>
84	#include <optional>
85	#include <utility>
86
87	using namespace llvm;
88	using namespace PatternMatch;
89
90	#define DEBUG_TYPE "dse"
91
92	STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93	STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94	STATISTIC(NumFastStores, "Number of stores deleted");
95	STATISTIC(NumFastOther, "Number of other instrs removed");
96	STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97	STATISTIC(NumModifiedStores, "Number of stores modified");
98	STATISTIC(NumCFGChecks, "Number of stores modified");
99	STATISTIC(NumCFGTries, "Number of stores modified");
100	STATISTIC(NumCFGSuccess, "Number of stores modified");
101	STATISTIC(NumGetDomMemoryDefPassed,
102	"Number of times a valid candidate is returned from getDomMemoryDef");
103	STATISTIC(NumDomMemDefChecks,
104	"Number iterations check for reads in getDomMemoryDef");
105
106	DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107	"Controls which MemoryDefs are eliminated.");
108
109	static cl::opt<bool>
110	EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111	cl::init(Val: true), cl::Hidden,
112	cl::desc ("Enable partial-overwrite tracking in DSE"));
113
114	static cl::opt<bool>
115	EnablePartialStoreMerging("enable-dse-partial-store-merging",
116	cl::init(Val: true), cl::Hidden,
117	cl::desc ("Enable partial store merging in DSE"));
118
119	static cl::opt<unsigned>
120	MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: `150`), cl::Hidden,
121	cl::desc ("The number of memory instructions to scan for "
122	"dead store elimination (default = 150)"));
123	static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
124	"dse-memoryssa-walklimit", cl::init(Val: `90`), cl::Hidden,
125	cl::desc ("The maximum number of steps while walking upwards to find "
126	"MemoryDefs that may be killed (default = 90)"));
127
128	static cl::opt<unsigned> MemorySSAPartialStoreLimit(
129	"dse-memoryssa-partial-store-limit", cl::init(Val: `5`), cl::Hidden,
130	cl::desc ("The maximum number candidates that only partially overwrite the "
131	"killing MemoryDef to consider"
132	" (default = 5)"));
133
134	static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
135	"dse-memoryssa-defs-per-block-limit", cl::init(Val: `5000`), cl::Hidden,
136	cl::desc ("The number of MemoryDefs we consider as candidates to eliminated "
137	"other stores per basic block (default = 5000)"));
138
139	static cl::opt<unsigned> MemorySSASameBBStepCost(
140	"dse-memoryssa-samebb-cost", cl::init(Val: `1`), cl::Hidden,
141	cl::desc (
142	"The cost of a step in the same basic block as the killing MemoryDef"
143	"(default = 1)"));
144
145	static cl::opt<unsigned>
146	MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: `5`),
147	cl::Hidden,
148	cl::desc ("The cost of a step in a different basic "
149	"block than the killing MemoryDef"
150	"(default = 5)"));
151
152	static cl::opt<unsigned> MemorySSAPathCheckLimit(
153	"dse-memoryssa-path-check-limit", cl::init(Val: `50`), cl::Hidden,
154	cl::desc ("The maximum number of blocks to check when trying to prove that "
155	"all paths to an exit go through a killing block (default = 50)"));
156
157	// This flags allows or disallows DSE to optimize MemorySSA during its
158	// traversal. Note that DSE optimizing MemorySSA may impact other passes
159	// downstream of the DSE invocation and can lead to issues not being
160	// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161	// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162	// impact follow-up passes.
163	static cl::opt<bool>
164	OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
165	cl::desc ("Allow DSE to optimize memory accesses."));
166
167	//===----------------------------------------------------------------------===//
168	// Helper functions
169	//===----------------------------------------------------------------------===//
170	using OverlapIntervalsTy = std::map<int64_t, int64_t>;
171	using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
172
173	/// Returns true if the end of this instruction can be safely shortened in
174	/// length.
175	static bool isShortenableAtTheEnd(Instruction *I) {
176	// Don't shorten stores for now
177	if (isa<StoreInst>(Val: I))
178	return false;
179
180	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
181	switch (II->getIntrinsicID()) {
182	default: return false;
183	case Intrinsic::memset:
184	case Intrinsic::memcpy:
185	case Intrinsic::memcpy_element_unordered_atomic:
186	case Intrinsic::memset_element_unordered_atomic:
187	// Do shorten memory intrinsics.
188	// FIXME: Add memmove if it's also safe to transform.
189	return true;
190	}
191	}
192
193	// Don't shorten libcalls calls for now.
194
195	return false;
196	}
197
198	/// Returns true if the beginning of this instruction can be safely shortened
199	/// in length.
200	static bool isShortenableAtTheBeginning(Instruction *I) {
201	// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202	// easily done by offsetting the source address.
203	return isa<AnyMemSetInst>(Val: I);
204	}
205
206	static std::optional<TypeSize> getPointerSize(const Value *V,
207	const DataLayout &DL,
208	const TargetLibraryInfo &TLI,
209	const Function *F) {
210	uint64_t Size;
211	ObjectSizeOpts Opts;
212	Opts.NullIsUnknownSize = NullPointerIsDefined(F);
213
214	if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
215	return TypeSize::getFixed(ExactSize: Size);
216	return std::nullopt;
217	}
218
219	namespace {
220
221	enum OverwriteResult {
222	OW_Begin,
223	OW_Complete,
224	OW_End,
225	OW_PartialEarlierWithFullLater,
226	OW_MaybePartial,
227	OW_None,
228	OW_Unknown
229	};
230
231	} // end anonymous namespace
232
233	/// Check if two instruction are masked stores that completely
234	/// overwrite one another. More specifically, \p KillingI has to
235	/// overwrite \p DeadI.
236	static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237	const Instruction *DeadI,
238	BatchAAResults &AA) {
239	const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
240	const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
241	if (KillingII == nullptr \|\| DeadII == nullptr)
242	return OW_Unknown;
243	if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244	return OW_Unknown;
245	if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246	// Type size.
247	VectorType *KillingTy =
248	cast<VectorType>(Val: KillingII->getArgOperand(i: `0`)->getType());
249	VectorType *DeadTy = cast<VectorType>(Val: DeadII->getArgOperand(i: `0`)->getType());
250	if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251	return OW_Unknown;
252	// Element count.
253	if (KillingTy->getElementCount() != DeadTy->getElementCount())
254	return OW_Unknown;
255	// Pointers.
256	Value *KillingPtr = KillingII->getArgOperand(i: `1`)->stripPointerCasts();
257	Value *DeadPtr = DeadII->getArgOperand(i: `1`)->stripPointerCasts();
258	if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
259	return OW_Unknown;
260	// Masks.
261	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
262	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
263	return OW_Unknown;
264	return OW_Complete;
265	}
266	return OW_Unknown;
267	}
268
269	/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270	/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271	/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272	/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273	/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274	/// overwritten by a killing (smaller) store which doesn't write outside the big
275	/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276	/// NOTE: This function must only be called if both \p KillingLoc and \p
277	/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278	/// \p DeadOff.
279	static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280	const MemoryLocation &DeadLoc,
281	int64_t KillingOff, int64_t DeadOff,
282	Instruction *DeadI,
283	InstOverlapIntervalsTy &IOL) {
284	const uint64_t KillingSize = KillingLoc.Size.getValue();
285	const uint64_t DeadSize = DeadLoc.Size.getValue();
286	// We may now overlap, although the overlap is not complete. There might also
287	// be other incomplete overlaps, and together, they might cover the complete
288	// dead store.
289	// Note: The correctness of this logic depends on the fact that this function
290	// is not even called providing DepWrite when there are any intervening reads.
291	if (EnablePartialOverwriteTracking &&
292	KillingOff < int64_t(DeadOff + DeadSize) &&
293	int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295	// Insert our part of the overlap into the map.
296	auto &IM = IOL [DeadI];
297	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298	<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299	<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
300	<< ")\n");
301
302	// Make sure that we only insert non-overlapping intervals and combine
303	// adjacent intervals. The intervals are stored in the map with the ending
304	// offset as the key (in the half-open sense) and the starting offset as
305	// the value.
306	int64_t KillingIntStart = KillingOff;
307	int64_t KillingIntEnd = KillingOff + KillingSize;
308
309	// Find any intervals ending at, or after, KillingIntStart which start
310	// before KillingIntEnd.
311	auto ILI = IM.lower_bound(x: KillingIntStart);
312	if (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
313	// This existing interval is overlapped with the current store somewhere
314	// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315	// intervals and adjusting our start and end.
316	KillingIntStart = std::min(a: KillingIntStart, b: ILI ->second);
317	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
318	ILI = IM.erase(position: ILI);
319
320	// Continue erasing and adjusting our end in case other previous
321	// intervals are also overlapped with the current store.
322	//
323	// \|--- dead 1 ---\| \|--- dead 2 ---\|
324	// \|------- killing---------\|
325	//
326	while (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
327	assert(ILI->second > KillingIntStart && "Unexpected interval");
328	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
329	ILI = IM.erase(position: ILI);
330	}
331	}
332
333	IM [KillingIntEnd] = KillingIntStart;
334
335	ILI = IM.begin();
336	if (ILI ->second <= DeadOff && ILI ->first >= int64_t(DeadOff + DeadSize)) {
337	LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338	<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
339	<< ") Composite KillingLoc [" << ILI->second << ", "
340	<< ILI->first << ")\n");
341	++NumCompletePartials;
342	return OW_Complete;
343	}
344	}
345
346	// Check for a dead store which writes to all the memory locations that
347	// the killing store writes to.
348	if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349	int64_t(DeadOff + DeadSize) > KillingOff &&
350	uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352	<< ", " << int64_t(DeadOff + DeadSize)
353	<< ") by a killing store [" << KillingOff << ", "
354	<< int64_t(KillingOff + KillingSize) << ")\n");
355	// TODO: Maybe come up with a better name?
356	return OW_PartialEarlierWithFullLater;
357	}
358
359	// Another interesting case is if the killing store overwrites the end of the
360	// dead store.
361	//
362	// \|--dead--\|
363	// \|-- killing --\|
364	//
365	// In this case we may want to trim the size of dead store to avoid
366	// generating stores to addresses which will definitely be overwritten killing
367	// store.
368	if (!EnablePartialOverwriteTracking &&
369	(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370	int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371	return OW_End;
372
373	// Finally, we also need to check if the killing store overwrites the
374	// beginning of the dead store.
375	//
376	// \|--dead--\|
377	// \|-- killing --\|
378	//
379	// In this case we may want to move the destination address and trim the size
380	// of dead store to avoid generating stores to addresses which will definitely
381	// be overwritten killing store.
382	if (!EnablePartialOverwriteTracking &&
383	(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384	assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385	"Expect to be handled as OW_Complete");
386	return OW_Begin;
387	}
388	// Otherwise, they don't completely overlap.
389	return OW_Unknown;
390	}
391
392	/// Returns true if the memory which is accessed by the second instruction is not
393	/// modified between the first and the second instruction.
394	/// Precondition: Second instruction must be dominated by the first
395	/// instruction.
396	static bool
397	memoryIsNotModifiedBetween(Instruction FirstI, Instruction SecondI,
398	BatchAAResults &AA, const DataLayout &DL,
399	DominatorTree *DT) {
400	// Do a backwards scan through the CFG from SecondI to FirstI. Look for
401	// instructions which can modify the memory location accessed by SecondI.
402	//
403	// While doing the walk keep track of the address to check. It might be
404	// different in different basic blocks due to PHI translation.
405	using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
406	SmallVector<BlockAddressPair, `16`> WorkList;
407	// Keep track of the address we visited each block with. Bail out if we
408	// visit a block with different addresses.
409	DenseMap<BasicBlock , Value > Visited;
410
411	BasicBlock::iterator FirstBBI(FirstI);
412	++FirstBBI;
413	BasicBlock::iterator SecondBBI(SecondI);
414	BasicBlock *FirstBB = FirstI->getParent();
415	BasicBlock *SecondBB = SecondI->getParent();
416	MemoryLocation MemLoc;
417	if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
418	MemLoc = MemoryLocation::getForDest(MI: MemSet);
419	else
420	MemLoc = MemoryLocation::get(Inst: SecondI);
421
422	auto MemLocPtr = const_cast<Value >(MemLoc.Ptr);
423
424	// Start checking the SecondBB.
425	WorkList.push_back(
426	Elt: std::make_pair(x&: SecondBB, y: PHITransAddr (MemLocPtr, DL, nullptr)));
427	bool isFirstBlock = true;
428
429	// Check all blocks going backward until we reach the FirstBB.
430	while (!WorkList.empty()) {
431	BlockAddressPair Current = WorkList.pop_back_val();
432	BasicBlock *B = Current.first;
433	PHITransAddr &Addr = Current.second;
434	Value *Ptr = Addr.getAddr();
435
436	// Ignore instructions before FirstI if this is the FirstBB.
437	BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
439	BasicBlock::iterator EI;
440	if (isFirstBlock) {
441	// Ignore instructions after SecondI if this is the first visit of SecondBB.
442	assert(B == SecondBB && "first block is not the store block");
443	EI = SecondBBI;
444	isFirstBlock = false;
445	} else {
446	// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447	// In this case we also have to look at instructions after SecondI.
448	EI = B->end();
449	}
450	for (; BI != EI; ++BI) {
451	Instruction I = &BI;
452	if (I->mayWriteToMemory() && I != SecondI)
453	if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
454	return false;
455	}
456	if (B != FirstBB) {
457	assert(B != &FirstBB->getParent()->getEntryBlock() &&
458	"Should not hit the entry block because SI must be dominated by LI");
459	for (BasicBlock *Pred : predecessors(BB: B)) {
460	PHITransAddr PredAddr = Addr;
461	if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
462	if (!PredAddr.isPotentiallyPHITranslatable())
463	return false;
464	if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
465	return false;
466	}
467	Value *TranslatedPtr = PredAddr.getAddr();
468	auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
469	if (!Inserted.second) {
470	// We already visited this block before. If it was with a different
471	// address - bail out!
472	if (TranslatedPtr != Inserted.first ->second)
473	return false;
474	// ... otherwise just skip it.
475	continue;
476	}
477	WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
478	}
479	}
480	}
481	return true;
482	}
483
484	static void shortenAssignment(Instruction Inst, Value OriginalDest,
485	uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486	uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487	const DataLayout &DL = Inst->getDataLayout();
488	uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489	uint64_t DeadSliceOffsetInBits =
490	OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : `0`);
491	auto SetDeadFragExpr = [](auto *Assign,
492	DIExpression::FragmentInfo DeadFragment) {
493	// createFragmentExpression expects an offset relative to the existing
494	// fragment offset if there is one.
495	uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496	Assign->getExpression()
497	->getFragmentInfo()
498	.value_or(DIExpression::FragmentInfo (`0`, `0`))
499	.OffsetInBits;
500	if (auto NewExpr = DIExpression::createFragmentExpression(
501	Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
502	Assign->setExpression(*NewExpr);
503	return;
504	}
505	// Failed to create a fragment expression for this so discard the value,
506	// making this a kill location.
507	auto Expr = DIExpression::createFragmentExpression(
508	Expr: DIExpression::get(Context&: Assign->getContext(), Elements: std::nullopt),
509	OffsetInBits: DeadFragment.OffsetInBits, SizeInBits: DeadFragment.SizeInBits);
510	Assign->setExpression(Expr);
511	Assign->setKillLocation();
512	};
513
514	// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515	// link to any instructions. Created in the loop below (once).
516	DIAssignID LinkToNothing = nullptr*;
517	LLVMContext &Ctx = Inst->getContext();
518	auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519	if (!LinkToNothing)
520	LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
521	return LinkToNothing;
522	};
523
524	// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525	// overlapping dbg.assign intrinsic. The loop invalidates the iterators
526	// returned by getAssignmentMarkers so save a copy of the markers to iterate
527	// over.
528	auto LinkedRange = at::getAssignmentMarkers(Inst);
529	SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
530	at::getDVRAssignmentMarkers(Inst);
531	SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
532	LinkedRange.end());
533	auto InsertAssignForOverlap = [&](auto *Assign) {
534	std::optional<DIExpression::FragmentInfo> NewFragment;
535	if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
536	DeadSliceSizeInBits, Assign,
537	NewFragment) \|\|
538	!NewFragment) {
539	// We couldn't calculate the intersecting fragment for some reason. Be
540	// cautious and unlink the whole assignment from the store.
541	Assign->setKillAddress();
542	Assign->setAssignId(GetDeadLink ());
543	return;
544	}
545	// No intersect.
546	if (NewFragment ->SizeInBits == `0`)
547	return;
548
549	// Fragments overlap: insert a new dbg.assign for this dead part.
550	auto NewAssign = static_cast<decltype*(Assign)>(Assign->clone());
551	NewAssign->insertAfter(Assign);
552	NewAssign->setAssignId(GetDeadLink ());
553	if (NewFragment)
554	SetDeadFragExpr(NewAssign, *NewFragment);
555	NewAssign->setKillAddress();
556	};
557	for_each(Range&: Linked, F: InsertAssignForOverlap);
558	for_each(Range&: LinkedDVRAssigns, F: InsertAssignForOverlap);
559	}
560
561	static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
562	uint64_t &DeadSize, int64_t KillingStart,
563	uint64_t KillingSize, bool IsOverwriteEnd) {
564	auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
565	Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
566
567	// We assume that memet/memcpy operates in chunks of the "largest" native
568	// type size and aligned on the same value. That means optimal start and size
569	// of memset/memcpy should be modulo of preferred alignment of that type. That
570	// is it there is no any sense in trying to reduce store size any further
571	// since any "extra" stores comes for free anyway.
572	// On the other hand, maximum alignment we can achieve is limited by alignment
573	// of initial store.
574
575	// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
576	// "largest" native type.
577	// Note: What is the proper way to get that value?
578	// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
579	// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
580
581	int64_t ToRemoveStart = `0`;
582	uint64_t ToRemoveSize = `0`;
583	// Compute start and size of the region to remove. Make sure 'PrefAlign' is
584	// maintained on the remaining store.
585	if (IsOverwriteEnd) {
586	// Calculate required adjustment for 'KillingStart' in order to keep
587	// remaining store size aligned on 'PerfAlign'.
588	uint64_t Off =
589	offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
590	ToRemoveStart = KillingStart + Off;
591	if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
592	return false;
593	ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
594	} else {
595	ToRemoveStart = DeadStart;
596	assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
597	"Not overlapping accesses?");
598	ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
599	// Calculate required adjustment for 'ToRemoveSize'in order to keep
600	// start of the remaining store aligned on 'PerfAlign'.
601	uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
602	if (Off != `0`) {
603	if (ToRemoveSize <= (PrefAlign.value() - Off))
604	return false;
605	ToRemoveSize -= PrefAlign.value() - Off;
606	}
607	assert(isAligned(PrefAlign, ToRemoveSize) &&
608	"Should preserve selected alignment");
609	}
610
611	assert(ToRemoveSize > `0` && "Shouldn't reach here if nothing to remove");
612	assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
613
614	uint64_t NewSize = DeadSize - ToRemoveSize;
615	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: DeadI)) {
616	// When shortening an atomic memory intrinsic, the newly shortened
617	// length must remain an integer multiple of the element size.
618	const uint32_t ElementSize = AMI->getElementSizeInBytes();
619	if (`0` != NewSize % ElementSize)
620	return false;
621	}
622
623	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
624	<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
625	<< "\n KILLER [" << ToRemoveStart << ", "
626	<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
627
628	Value *DeadWriteLength = DeadIntrinsic->getLength();
629	Value *TrimmedLength = ConstantInt::get(Ty: DeadWriteLength->getType(), V: NewSize);
630	DeadIntrinsic->setLength(TrimmedLength);
631	DeadIntrinsic->setDestAlignment(PrefAlign);
632
633	Value *OrigDest = DeadIntrinsic->getRawDest();
634	if (!IsOverwriteEnd) {
635	Value *Indices[`1`] = {
636	ConstantInt::get(Ty: DeadWriteLength->getType(), V: ToRemoveSize)};
637	Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
638	PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
639	InsertBefore: DeadI->getIterator());
640	NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
641	DeadIntrinsic->setDest(NewDestGEP);
642	}
643
644	// Update attached dbg.assign intrinsics. Assume 8-bit byte.
645	shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * `8`, OldSizeInBits: DeadSize * `8`, NewSizeInBits: NewSize * `8`,
646	IsOverwriteEnd);
647
648	// Finally update start and size of dead access.
649	if (!IsOverwriteEnd)
650	DeadStart += ToRemoveSize;
651	DeadSize = NewSize;
652
653	return true;
654	}
655
656	static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657	int64_t &DeadStart, uint64_t &DeadSize) {
658	if (IntervalMap.empty() \|\| !isShortenableAtTheEnd(I: DeadI))
659	return false;
660
661	OverlapIntervalsTy::iterator OII = --IntervalMap.end();
662	int64_t KillingStart = OII ->second;
663	uint64_t KillingSize = OII ->first - KillingStart;
664
665	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
666
667	if (KillingStart > DeadStart &&
668	// Note: "KillingStart - KillingStart" is known to be positive due to
669	// preceding check.
670	(uint64_t)(KillingStart - DeadStart) < DeadSize &&
671	// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
672	// be non negative due to preceding checks.
673	KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
674	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675	IsOverwriteEnd: true)) {
676	IntervalMap.erase(position: OII);
677	return true;
678	}
679	}
680	return false;
681	}
682
683	static bool tryToShortenBegin(Instruction *DeadI,
684	OverlapIntervalsTy &IntervalMap,
685	int64_t &DeadStart, uint64_t &DeadSize) {
686	if (IntervalMap.empty() \|\| !isShortenableAtTheBeginning(I: DeadI))
687	return false;
688
689	OverlapIntervalsTy::iterator OII = IntervalMap.begin();
690	int64_t KillingStart = OII ->second;
691	uint64_t KillingSize = OII ->first - KillingStart;
692
693	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
694
695	if (KillingStart <= DeadStart &&
696	// Note: "DeadStart - KillingStart" is known to be non negative due to
697	// preceding check.
698	KillingSize > (uint64_t)(DeadStart - KillingStart)) {
699	// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
700	// be positive due to preceding checks.
701	assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
702	"Should have been handled as OW_Complete");
703	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704	IsOverwriteEnd: false)) {
705	IntervalMap.erase(position: OII);
706	return true;
707	}
708	}
709	return false;
710	}
711
712	static Constant *
713	tryToMergePartialOverlappingStores(StoreInst KillingI, StoreInst DeadI,
714	int64_t KillingOffset, int64_t DeadOffset,
715	const DataLayout &DL, BatchAAResults &AA,
716	DominatorTree *DT) {
717
718	if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
719	DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
720	KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
721	DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
722	memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
723	// If the store we find is:
724	// a) partially overwritten by the store to 'Loc'
725	// b) the killing store is fully contained in the dead one and
726	// c) they both have a constant value
727	// d) none of the two stores need padding
728	// Merge the two stores, replacing the dead store's value with a
729	// merge of both values.
730	// TODO: Deal with other constant types (vectors, etc), and probably
731	// some mem intrinsics (if needed)
732
733	APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
734	APInt KillingValue =
735	cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
736	unsigned KillingBits = KillingValue.getBitWidth();
737	assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
738	KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
739
740	// Offset of the smaller store inside the larger store
741	unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * `8`;
742	unsigned LShiftAmount =
743	DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
744	: BitOffsetDiff;
745	APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
746	hiBit: LShiftAmount + KillingBits);
747	// Clear the bits we'll be replacing, then OR with the smaller
748	// store, shifted appropriately.
749	APInt Merged = (DeadValue & ~Mask) \| (KillingValue << LShiftAmount);
750	LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
751	<< "\n Killing: " << *KillingI
752	<< "\n Merged Value: " << Merged << `'\n'`);
753	return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
754	}
755	return nullptr;
756	}
757
758	namespace {
759	// Returns true if \p I is an intrinsic that does not read or write memory.
760	bool isNoopIntrinsic(Instruction *I) {
761	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
762	switch (II->getIntrinsicID()) {
763	case Intrinsic::lifetime_start:
764	case Intrinsic::lifetime_end:
765	case Intrinsic::invariant_end:
766	case Intrinsic::launder_invariant_group:
767	case Intrinsic::assume:
768	return true;
769	case Intrinsic::dbg_declare:
770	case Intrinsic::dbg_label:
771	case Intrinsic::dbg_value:
772	llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
773	default:
774	return false;
775	}
776	}
777	return false;
778	}
779
780	// Check if we can ignore \p D for DSE.
781	bool canSkipDef(MemoryDef D, bool* DefVisibleToCaller) {
782	Instruction *DI = D->getMemoryInst();
783	// Calls that only access inaccessible memory cannot read or write any memory
784	// locations we consider for elimination.
785	if (auto *CB = dyn_cast<CallBase>(Val: DI))
786	if (CB->onlyAccessesInaccessibleMemory())
787	return true;
788
789	// We can eliminate stores to locations not visible to the caller across
790	// throwing instructions.
791	if (DI->mayThrow() && !DefVisibleToCaller)
792	return true;
793
794	// We can remove the dead stores, irrespective of the fence and its ordering
795	// (release/acquire/seq_cst). Fences only constraints the ordering of
796	// already visible stores, it does not make a store visible to other
797	// threads. So, skipping over a fence does not change a store from being
798	// dead.
799	if (isa<FenceInst>(Val: DI))
800	return true;
801
802	// Skip intrinsics that do not really read or modify memory.
803	if (isNoopIntrinsic(I: DI))
804	return true;
805
806	return false;
807	}
808
809	struct DSEState {
810	Function &F;
811	AliasAnalysis &AA;
812	EarliestEscapeInfo EI;
813
814	/// The single BatchAA instance that is used to cache AA queries. It will
815	/// not be invalidated over the whole run. This is safe, because:
816	/// 1. Only memory writes are removed, so the alias cache for memory
817	/// locations remains valid.
818	/// 2. No new instructions are added (only instructions removed), so cached
819	/// information for a deleted value cannot be accessed by a re-used new
820	/// value pointer.
821	BatchAAResults BatchAA;
822
823	MemorySSA &MSSA;
824	DominatorTree &DT;
825	PostDominatorTree &PDT;
826	const TargetLibraryInfo &TLI;
827	const DataLayout &DL;
828	const LoopInfo &LI;
829
830	// Whether the function contains any irreducible control flow, useful for
831	// being accurately able to detect loops.
832	bool ContainsIrreducibleLoops;
833
834	// All MemoryDefs that potentially could kill other MemDefs.
835	SmallVector<MemoryDef *, `64`> MemDefs;
836	// Any that should be skipped as they are already deleted
837	SmallPtrSet<MemoryAccess *, `4`> SkipStores;
838	// Keep track whether a given object is captured before return or not.
839	DenseMap<const Value , bool*> CapturedBeforeReturn;
840	// Keep track of all of the objects that are invisible to the caller after
841	// the function returns.
842	DenseMap<const Value , bool*> InvisibleToCallerAfterRet;
843	// Keep track of blocks with throwing instructions not modeled in MemorySSA.
844	SmallPtrSet<BasicBlock *, `16`> ThrowingBlocks;
845	// Post-order numbers for each basic block. Used to figure out if memory
846	// accesses are executed before another access.
847	DenseMap<BasicBlock , unsigned*> PostOrderNumbers;
848
849	/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
850	/// basic block.
851	MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
852	// Check if there are root nodes that are terminated by UnreachableInst.
853	// Those roots pessimize post-dominance queries. If there are such roots,
854	// fall back to CFG scan starting from all non-unreachable roots.
855	bool AnyUnreachableExit;
856
857	// Whether or not we should iterate on removing dead stores at the end of the
858	// function due to removing a store causing a previously captured pointer to
859	// no longer be captured.
860	bool ShouldIterateEndOfFunctionDSE;
861
862	/// Dead instructions to be removed at the end of DSE.
863	SmallVector<Instruction *> ToRemove;
864
865	// Class contains self-reference, make sure it's not copied/moved.
866	DSEState(const DSEState &) = delete;
867	DSEState &operator=(const DSEState &) = delete;
868
869	DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870	PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871	const LoopInfo &LI)
872	: F(F), AA(AA), EI (DT, &LI), BatchAA (AA, &EI), MSSA(MSSA), DT(DT),
873	PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
874	// Collect blocks with throwing instructions not modeled in MemorySSA and
875	// alloc-like objects.
876	unsigned PO = `0`;
877	for (BasicBlock *BB : post_order(G: &F)) {
878	PostOrderNumbers [BB] = PO++;
879	for (Instruction &I : *BB) {
880	MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
881	if (I.mayThrow() && !MA)
882	ThrowingBlocks.insert(Ptr: I.getParent());
883
884	auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
885	if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
886	(getLocForWrite(I: &I) \|\| isMemTerminatorInst(I: &I)))
887	MemDefs.push_back(Elt: MD);
888	}
889	}
890
891	// Treat byval or inalloca arguments the same as Allocas, stores to them are
892	// dead at the end of the function.
893	for (Argument &AI : F.args())
894	if (AI.hasPassPointeeByValueCopyAttr())
895	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
896
897	// Collect whether there is any irreducible control flow in the function.
898	ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
899
900	AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
901	return isa<UnreachableInst>(Val: E->getTerminator());
902	});
903	}
904
905	static void pushMemUses(MemoryAccess *Acc,
906	SmallVectorImpl<MemoryAccess *> &WorkList,
907	SmallPtrSetImpl<MemoryAccess *> &Visited) {
908	for (Use &U : Acc->uses()) {
909	auto *MA = cast<MemoryAccess>(Val: U.getUser());
910	if (Visited.insert(Ptr: MA).second)
911	WorkList.push_back(Elt: MA);
912	}
913	};
914
915	LocationSize strengthenLocationSize(const Instruction *I,
916	LocationSize Size) const {
917	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
918	LibFunc F;
919	if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
920	(F == LibFunc_memset_chk \|\| F == LibFunc_memcpy_chk)) {
921	// Use the precise location size specified by the 3rd argument
922	// for determining KillingI overwrites DeadLoc if it is a memset_chk
923	// instruction. memset_chk will write either the amount specified as 3rd
924	// argument or the function will immediately abort and exit the program.
925	// NOTE: AA may determine NoAlias if it can prove that the access size
926	// is larger than the allocation size due to that being UB. To avoid
927	// returning potentially invalid NoAlias results by AA, limit the use of
928	// the precise location size to isOverwrite.
929	if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: `2`)))
930	return LocationSize::precise(Value: Len->getZExtValue());
931	}
932	}
933	return Size;
934	}
935
936	/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
937	/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
938	/// location (by \p DeadI instruction).
939	/// Return OW_MaybePartial if \p KillingI does not completely overwrite
940	/// \p DeadI, but they both write to the same underlying object. In that
941	/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
942	/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
943	/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
944	OverwriteResult isOverwrite(const Instruction *KillingI,
945	const Instruction *DeadI,
946	const MemoryLocation &KillingLoc,
947	const MemoryLocation &DeadLoc,
948	int64_t &KillingOff, int64_t &DeadOff) {
949	// AliasAnalysis does not always account for loops. Limit overwrite checks
950	// to dependencies for which we can guarantee they are independent of any
951	// loops they are in.
952	if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
953	return OW_Unknown;
954
955	LocationSize KillingLocSize =
956	strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
957	const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
958	const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
959	const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
960	const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
961
962	// Check whether the killing store overwrites the whole object, in which
963	// case the size/offset of the dead store does not matter.
964	if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
965	isIdentifiedObject(V: KillingUndObj)) {
966	std::optional<TypeSize> KillingUndObjSize =
967	getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
968	if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
969	return OW_Complete;
970	}
971
972	// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
973	// get imprecise values here, though (except for unknown sizes).
974	if (!KillingLocSize.isPrecise() \|\| !DeadLoc.Size.isPrecise()) {
975	// In case no constant size is known, try to an IR values for the number
976	// of bytes written and check if they match.
977	const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
978	const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
979	if (KillingMemI && DeadMemI) {
980	const Value *KillingV = KillingMemI->getLength();
981	const Value *DeadV = DeadMemI->getLength();
982	if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
983	return OW_Complete;
984	}
985
986	// Masked stores have imprecise locations, but we can reason about them
987	// to some extent.
988	return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
989	}
990
991	const TypeSize KillingSize = KillingLocSize.getValue();
992	const TypeSize DeadSize = DeadLoc.Size.getValue();
993	// Bail on doing Size comparison which depends on AA for now
994	// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
995	const bool AnyScalable =
996	DeadSize.isScalable() \|\| KillingLocSize.isScalable();
997
998	if (AnyScalable)
999	return OW_Unknown;
1000	// Query the alias information
1001	AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1002
1003	// If the start pointers are the same, we just have to compare sizes to see if
1004	// the killing store was larger than the dead store.
1005	if (AAR == AliasResult::MustAlias) {
1006	// Make sure that the KillingSize size is >= the DeadSize size.
1007	if (KillingSize >= DeadSize)
1008	return OW_Complete;
1009	}
1010
1011	// If we hit a partial alias we may have a full overwrite
1012	if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1013	int32_t Off = AAR.getOffset();
1014	if (Off >= `0` && (uint64_t)Off + DeadSize <= KillingSize)
1015	return OW_Complete;
1016	}
1017
1018	// If we can't resolve the same pointers to the same object, then we can't
1019	// analyze them at all.
1020	if (DeadUndObj != KillingUndObj) {
1021	// Non aliasing stores to different objects don't overlap. Note that
1022	// if the killing store is known to overwrite whole object (out of
1023	// bounds access overwrites whole object as well) then it is assumed to
1024	// completely overwrite any store to the same object even if they don't
1025	// actually alias (see next check).
1026	if (AAR == AliasResult::NoAlias)
1027	return OW_None;
1028	return OW_Unknown;
1029	}
1030
1031	// Okay, we have stores to two completely different pointers. Try to
1032	// decompose the pointer into a "base + constant_offset" form. If the base
1033	// pointers are equal, then we can reason about the two stores.
1034	DeadOff = `0`;
1035	KillingOff = `0`;
1036	const Value *DeadBasePtr =
1037	GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1038	const Value *KillingBasePtr =
1039	GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1040
1041	// If the base pointers still differ, we have two completely different
1042	// stores.
1043	if (DeadBasePtr != KillingBasePtr)
1044	return OW_Unknown;
1045
1046	// The killing access completely overlaps the dead store if and only if
1047	// both start and end of the dead one is "inside" the killing one:
1048	// \|<->\|--dead--\|<->\|
1049	// \|-----killing------\|
1050	// Accesses may overlap if and only if start of one of them is "inside"
1051	// another one:
1052	// \|<->\|--dead--\|<-------->\|
1053	// \|-------killing--------\|
1054	// OR
1055	// \|-------dead-------\|
1056	// \|<->\|---killing---\|<----->\|
1057	//
1058	// We have to be careful here as Off is signed while .Size is unsigned.
1059
1060	// Check if the dead access starts "not before" the killing one.
1061	if (DeadOff >= KillingOff) {
1062	// If the dead access ends "not after" the killing access then the
1063	// dead one is completely overwritten by the killing one.
1064	if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1065	return OW_Complete;
1066	// If start of the dead access is "before" end of the killing access
1067	// then accesses overlap.
1068	else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1069	return OW_MaybePartial;
1070	}
1071	// If start of the killing access is "before" end of the dead access then
1072	// accesses overlap.
1073	else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1074	return OW_MaybePartial;
1075	}
1076
1077	// Can reach here only if accesses are known not to overlap.
1078	return OW_None;
1079	}
1080
1081	bool isInvisibleToCallerAfterRet(const Value *V) {
1082	if (isa<AllocaInst>(Val: V))
1083	return true;
1084	auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1085	if (I.second) {
1086	if (!isInvisibleToCallerOnUnwind(V)) {
1087	I.first ->second = false;
1088	} else if (isNoAliasCall(V)) {
1089	I.first ->second = !PointerMayBeCaptured(V, ReturnCaptures: true, StoreCaptures: false);
1090	}
1091	}
1092	return I.first ->second;
1093	}
1094
1095	bool isInvisibleToCallerOnUnwind(const Value *V) {
1096	bool RequiresNoCaptureBeforeUnwind;
1097	if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1098	return false;
1099	if (!RequiresNoCaptureBeforeUnwind)
1100	return true;
1101
1102	auto I = CapturedBeforeReturn.insert(KV: {V, true});
1103	if (I.second)
1104	// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1105	// with the killing MemoryDef. But we refrain from doing so for now to
1106	// limit compile-time and this does not cause any changes to the number
1107	// of stores removed on a large test set in practice.
1108	I.first ->second = PointerMayBeCaptured(V, ReturnCaptures: false, StoreCaptures: true);
1109	return !I.first ->second;
1110	}
1111
1112	std::optional<MemoryLocation> getLocForWrite(Instruction I) const* {
1113	if (!I->mayWriteToMemory())
1114	return std::nullopt;
1115
1116	if (auto *CB = dyn_cast<CallBase>(Val: I))
1117	return MemoryLocation::getForDest(CI: CB, TLI);
1118
1119	return MemoryLocation::getOrNone(Inst: I);
1120	}
1121
1122	/// Assuming this instruction has a dead analyzable write, can we delete
1123	/// this instruction?
1124	bool isRemovable(Instruction *I) {
1125	assert(getLocForWrite(I) && "Must have analyzable write");
1126
1127	// Don't remove volatile/atomic stores.
1128	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1129	return SI->isUnordered();
1130
1131	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1132	// Don't remove volatile memory intrinsics.
1133	if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1134	return !MI->isVolatile();
1135
1136	// Never remove dead lifetime intrinsics, e.g. because they are followed
1137	// by a free.
1138	if (CB->isLifetimeStartOrEnd())
1139	return false;
1140
1141	return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1142	!CB->isTerminator();
1143	}
1144
1145	return false;
1146	}
1147
1148	/// Returns true if \p UseInst completely overwrites \p DefLoc
1149	/// (stored by \p DefInst).
1150	bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1151	Instruction *UseInst) {
1152	// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1153	// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1154	// MemoryDef.
1155	if (!UseInst->mayWriteToMemory())
1156	return false;
1157
1158	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1159	if (CB->onlyAccessesInaccessibleMemory())
1160	return false;
1161
1162	int64_t InstWriteOffset, DepWriteOffset;
1163	if (auto CC = getLocForWrite(I: UseInst))
1164	return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1165	DeadOff&: DepWriteOffset) == OW_Complete;
1166	return false;
1167	}
1168
1169	/// Returns true if \p Def is not read before returning from the function.
1170	bool isWriteAtEndOfFunction(MemoryDef Def, const* MemoryLocation &DefLoc) {
1171	LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1172	<< *Def->getMemoryInst()
1173	<< ") is at the end the function \n");
1174	SmallVector<MemoryAccess *, `4`> WorkList;
1175	SmallPtrSet<MemoryAccess *, `8`> Visited;
1176
1177	pushMemUses(Acc: Def, WorkList, Visited);
1178	for (unsigned I = `0`; I < WorkList.size(); I++) {
1179	if (WorkList.size() >= MemorySSAScanLimit) {
1180	LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1181	return false;
1182	}
1183
1184	MemoryAccess *UseAccess = WorkList [I];
1185	if (isa<MemoryPhi>(Val: UseAccess)) {
1186	// AliasAnalysis does not account for loops. Limit elimination to
1187	// candidates for which we can guarantee they always store to the same
1188	// memory location.
1189	if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1190	return false;
1191
1192	pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1193	continue;
1194	}
1195	// TODO: Checking for aliasing is expensive. Consider reducing the amount
1196	// of times this is called and/or caching it.
1197	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1198	if (isReadClobber(DefLoc, UseInst)) {
1199	LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1200	return false;
1201	}
1202
1203	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1204	pushMemUses(Acc: UseDef, WorkList, Visited);
1205	}
1206	return true;
1207	}
1208
1209	/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1210	/// pair with the MemoryLocation terminated by \p I and a boolean flag
1211	/// indicating whether \p I is a free-like call.
1212	std::optional<std::pair<MemoryLocation, bool>>
1213	getLocForTerminator(Instruction I) const* {
1214	uint64_t Len;
1215	Value *Ptr;
1216	if (match(V: I, P: m_Intrinsic<Intrinsic::lifetime_end>(Op0: m_ConstantInt(V&: Len),
1217	Op1: m_Value(V&: Ptr))))
1218	return {std::make_pair(x: MemoryLocation (Ptr, Len), y: false)};
1219
1220	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1221	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1222	return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1223	}
1224
1225	return std::nullopt;
1226	}
1227
1228	/// Returns true if \p I is a memory terminator instruction like
1229	/// llvm.lifetime.end or free.
1230	bool isMemTerminatorInst(Instruction I) const* {
1231	auto *CB = dyn_cast<CallBase>(Val: I);
1232	return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end \|\|
1233	getFreedOperand(CB, TLI: &TLI) != nullptr);
1234	}
1235
1236	/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1237	/// instruction \p AccessI.
1238	bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1239	Instruction *MaybeTerm) {
1240	std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1241	getLocForTerminator(I: MaybeTerm);
1242
1243	if (!MaybeTermLoc)
1244	return false;
1245
1246	// If the terminator is a free-like call, all accesses to the underlying
1247	// object can be considered terminated.
1248	if (getUnderlyingObject(V: Loc.Ptr) !=
1249	getUnderlyingObject(V: MaybeTermLoc ->first.Ptr))
1250	return false;
1251
1252	auto TermLoc = MaybeTermLoc ->first;
1253	if (MaybeTermLoc ->second) {
1254	const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1255	return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1256	}
1257	int64_t InstWriteOffset = `0`;
1258	int64_t DepWriteOffset = `0`;
1259	return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1260	DeadOff&: DepWriteOffset) == OW_Complete;
1261	}
1262
1263	// Returns true if \p Use may read from \p DefLoc.
1264	bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1265	if (isNoopIntrinsic(I: UseInst))
1266	return false;
1267
1268	// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1269	// treated as read clobber.
1270	if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1271	return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1272
1273	if (!UseInst->mayReadFromMemory())
1274	return false;
1275
1276	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1277	if (CB->onlyAccessesInaccessibleMemory())
1278	return false;
1279
1280	return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1281	}
1282
1283	/// Returns true if a dependency between \p Current and \p KillingDef is
1284	/// guaranteed to be loop invariant for the loops that they are in. Either
1285	/// because they are known to be in the same block, in the same loop level or
1286	/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1287	/// during execution of the containing function.
1288	bool isGuaranteedLoopIndependent(const Instruction *Current,
1289	const Instruction *KillingDef,
1290	const MemoryLocation &CurrentLoc) {
1291	// If the dependency is within the same block or loop level (being careful
1292	// of irreducible loops), we know that AA will return a valid result for the
1293	// memory dependency. (Both at the function level, outside of any loop,
1294	// would also be valid but we currently disable that to limit compile time).
1295	if (Current->getParent() == KillingDef->getParent())
1296	return true;
1297	const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1298	if (!ContainsIrreducibleLoops && CurrentLI &&
1299	CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1300	return true;
1301	// Otherwise check the memory location is invariant to any loops.
1302	return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1303	}
1304
1305	/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1306	/// loop. In particular, this guarantees that it only references a single
1307	/// MemoryLocation during execution of the containing function.
1308	bool isGuaranteedLoopInvariant(const Value *Ptr) {
1309	Ptr = Ptr->stripPointerCasts();
1310	if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1311	if (GEP->hasAllConstantIndices())
1312	Ptr = GEP->getPointerOperand()->stripPointerCasts();
1313
1314	if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1315	return I->getParent()->isEntryBlock() \|\|
1316	(!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1317	}
1318	return true;
1319	}
1320
1321	// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1322	// with no read access between them or on any other path to a function exit
1323	// block if \p KillingLoc is not accessible after the function returns. If
1324	// there is no such MemoryDef, return std::nullopt. The returned value may not
1325	// (completely) overwrite \p KillingLoc. Currently we bail out when we
1326	// encounter an aliasing MemoryUse (read).
1327	std::optional<MemoryAccess *>
1328	getDomMemoryDef(MemoryDef KillingDef, MemoryAccess StartAccess,
1329	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1330	unsigned &ScanLimit, unsigned &WalkerStepLimit,
1331	bool IsMemTerm, unsigned &PartialLimit) {
1332	if (ScanLimit == `0` \|\| WalkerStepLimit == `0`) {
1333	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1334	return std::nullopt;
1335	}
1336
1337	MemoryAccess *Current = StartAccess;
1338	Instruction *KillingI = KillingDef->getMemoryInst();
1339	LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1340
1341	// Only optimize defining access of KillingDef when directly starting at its
1342	// defining access. The defining access also must only access KillingLoc. At
1343	// the moment we only support instructions with a single write location, so
1344	// it should be sufficient to disable optimizations for instructions that
1345	// also read from memory.
1346	bool CanOptimize = OptimizeMemorySSA &&
1347	KillingDef->getDefiningAccess() == StartAccess &&
1348	!KillingI->mayReadFromMemory();
1349
1350	// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1351	std::optional<MemoryLocation> CurrentLoc;
1352	for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1353	LLVM_DEBUG({
1354	dbgs() << " visiting " << *Current;
1355	if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1356	dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1357	<< ")";
1358	dbgs() << "\n";
1359	});
1360
1361	// Reached TOP.
1362	if (MSSA.isLiveOnEntryDef(MA: Current)) {
1363	LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1364	if (CanOptimize && Current != KillingDef->getDefiningAccess())
1365	// The first clobbering def is... none.
1366	KillingDef->setOptimized(Current);
1367	return std::nullopt;
1368	}
1369
1370	// Cost of a step. Accesses in the same block are more likely to be valid
1371	// candidates for elimination, hence consider them cheaper.
1372	unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1373	? MemorySSASameBBStepCost
1374	: MemorySSAOtherBBStepCost;
1375	if (WalkerStepLimit <= StepCost) {
1376	LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1377	return std::nullopt;
1378	}
1379	WalkerStepLimit -= StepCost;
1380
1381	// Return for MemoryPhis. They cannot be eliminated directly and the
1382	// caller is responsible for traversing them.
1383	if (isa<MemoryPhi>(Val: Current)) {
1384	LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1385	return Current;
1386	}
1387
1388	// Below, check if CurrentDef is a valid candidate to be eliminated by
1389	// KillingDef. If it is not, check the next candidate.
1390	MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1391	Instruction *CurrentI = CurrentDef->getMemoryInst();
1392
1393	if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1394	CanOptimize = false;
1395	continue;
1396	}
1397
1398	// Before we try to remove anything, check for any extra throwing
1399	// instructions that block us from DSEing
1400	if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1401	LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1402	return std::nullopt;
1403	}
1404
1405	// Check for anything that looks like it will be a barrier to further
1406	// removal
1407	if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1408	LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1409	return std::nullopt;
1410	}
1411
1412	// If Current is known to be on path that reads DefLoc or is a read
1413	// clobber, bail out, as the path is not profitable. We skip this check
1414	// for intrinsic calls, because the code knows how to handle memcpy
1415	// intrinsics.
1416	if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1417	return std::nullopt;
1418
1419	// Quick check if there are direct uses that are read-clobbers.
1420	if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1421	if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1422	return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1423	isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1424	return false;
1425	})) {
1426	LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1427	return std::nullopt;
1428	}
1429
1430	// If Current does not have an analyzable write location or is not
1431	// removable, skip it.
1432	CurrentLoc = getLocForWrite(I: CurrentI);
1433	if (!CurrentLoc \|\| !isRemovable(I: CurrentI)) {
1434	CanOptimize = false;
1435	continue;
1436	}
1437
1438	// AliasAnalysis does not account for loops. Limit elimination to
1439	// candidates for which we can guarantee they always store to the same
1440	// memory location and not located in different loops.
1441	if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1442	LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1443	CanOptimize = false;
1444	continue;
1445	}
1446
1447	if (IsMemTerm) {
1448	// If the killing def is a memory terminator (e.g. lifetime.end), check
1449	// the next candidate if the current Current does not write the same
1450	// underlying object as the terminator.
1451	if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1452	CanOptimize = false;
1453	continue;
1454	}
1455	} else {
1456	int64_t KillingOffset = `0`;
1457	int64_t DeadOffset = `0`;
1458	auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1459	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1460	if (CanOptimize) {
1461	// CurrentDef is the earliest write clobber of KillingDef. Use it as
1462	// optimized access. Do not optimize if CurrentDef is already the
1463	// defining access of KillingDef.
1464	if (CurrentDef != KillingDef->getDefiningAccess() &&
1465	(OR == OW_Complete \|\| OR == OW_MaybePartial))
1466	KillingDef->setOptimized(CurrentDef);
1467
1468	// Once a may-aliasing def is encountered do not set an optimized
1469	// access.
1470	if (OR != OW_None)
1471	CanOptimize = false;
1472	}
1473
1474	// If Current does not write to the same object as KillingDef, check
1475	// the next candidate.
1476	if (OR == OW_Unknown \|\| OR == OW_None)
1477	continue;
1478	else if (OR == OW_MaybePartial) {
1479	// If KillingDef only partially overwrites Current, check the next
1480	// candidate if the partial step limit is exceeded. This aggressively
1481	// limits the number of candidates for partial store elimination,
1482	// which are less likely to be removable in the end.
1483	if (PartialLimit <= `1`) {
1484	WalkerStepLimit -= `1`;
1485	LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1486	continue;
1487	}
1488	PartialLimit -= `1`;
1489	}
1490	}
1491	break;
1492	};
1493
1494	// Accesses to objects accessible after the function returns can only be
1495	// eliminated if the access is dead along all paths to the exit. Collect
1496	// the blocks with killing (=completely overwriting MemoryDefs) and check if
1497	// they cover all paths from MaybeDeadAccess to any function exit.
1498	SmallPtrSet<Instruction *, `16`> KillingDefs;
1499	KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1500	MemoryAccess *MaybeDeadAccess = Current;
1501	MemoryLocation MaybeDeadLoc = *CurrentLoc;
1502	Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1503	LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1504	<< *MaybeDeadI << ")\n");
1505
1506	SmallVector<MemoryAccess *, `32`> WorkList;
1507	SmallPtrSet<MemoryAccess *, `32`> Visited;
1508	pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1509
1510	// Check if DeadDef may be read.
1511	for (unsigned I = `0`; I < WorkList.size(); I++) {
1512	MemoryAccess *UseAccess = WorkList [I];
1513
1514	LLVM_DEBUG(dbgs() << " " << *UseAccess);
1515	// Bail out if the number of accesses to check exceeds the scan limit.
1516	if (ScanLimit < (WorkList.size() - I)) {
1517	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1518	return std::nullopt;
1519	}
1520	--ScanLimit;
1521	NumDomMemDefChecks ++;
1522
1523	if (isa<MemoryPhi>(Val: UseAccess)) {
1524	if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1525	return DT.properlyDominates(A: KI->getParent(),
1526	B: UseAccess->getBlock());
1527	})) {
1528	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1529	continue;
1530	}
1531	LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1532	pushMemUses(Acc: UseAccess, WorkList, Visited);
1533	continue;
1534	}
1535
1536	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1537	LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1538
1539	if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1540	return DT.dominates(Def: KI, User: UseInst);
1541	})) {
1542	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1543	continue;
1544	}
1545
1546	// A memory terminator kills all preceeding MemoryDefs and all succeeding
1547	// MemoryAccesses. We do not have to check it's users.
1548	if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1549	LLVM_DEBUG(
1550	dbgs()
1551	<< " ... skipping, memterminator invalidates following accesses\n");
1552	continue;
1553	}
1554
1555	if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1556	LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1557	pushMemUses(Acc: UseAccess, WorkList, Visited);
1558	continue;
1559	}
1560
1561	if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1562	LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1563	return std::nullopt;
1564	}
1565
1566	// Uses which may read the original MemoryDef mean we cannot eliminate the
1567	// original MD. Stop walk.
1568	if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst)) {
1569	LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1570	return std::nullopt;
1571	}
1572
1573	// If this worklist walks back to the original memory access (and the
1574	// pointer is not guarenteed loop invariant) then we cannot assume that a
1575	// store kills itself.
1576	if (MaybeDeadAccess == UseAccess &&
1577	!isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1578	LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1579	return std::nullopt;
1580	}
1581	// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1582	// if it reads the memory location.
1583	// TODO: It would probably be better to check for self-reads before
1584	// calling the function.
1585	if (KillingDef == UseAccess \|\| MaybeDeadAccess == UseAccess) {
1586	LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1587	continue;
1588	}
1589
1590	// Check all uses for MemoryDefs, except for defs completely overwriting
1591	// the original location. Otherwise we have to check uses of all
1592	// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1593	// miss cases like the following
1594	// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1595	// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1596	// Use(2) ; MayAlias 2 and* 1, loads [0, 3].*
1597	// (The Use points to the first* Def it may alias)*
1598	// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1599	// stores [0,1]
1600	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1601	if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1602	BasicBlock *MaybeKillingBlock = UseInst->getParent();
1603	if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1604	PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1605	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1606	LLVM_DEBUG(dbgs()
1607	<< " ... found killing def " << *UseInst << "\n");
1608	KillingDefs.insert(Ptr: UseInst);
1609	}
1610	} else {
1611	LLVM_DEBUG(dbgs()
1612	<< " ... found preceeding def " << *UseInst << "\n");
1613	return std::nullopt;
1614	}
1615	} else
1616	pushMemUses(Acc: UseDef, WorkList, Visited);
1617	}
1618	}
1619
1620	// For accesses to locations visible after the function returns, make sure
1621	// that the location is dead (=overwritten) along all paths from
1622	// MaybeDeadAccess to the exit.
1623	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1624	SmallPtrSet<BasicBlock *, `16`> KillingBlocks;
1625	for (Instruction *KD : KillingDefs)
1626	KillingBlocks.insert(Ptr: KD->getParent());
1627	assert(!KillingBlocks.empty() &&
1628	"Expected at least a single killing block");
1629
1630	// Find the common post-dominator of all killing blocks.
1631	BasicBlock CommonPred = KillingBlocks.begin();
1632	for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1633	if (!CommonPred)
1634	break;
1635	CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1636	}
1637
1638	// If the common post-dominator does not post-dominate MaybeDeadAccess,
1639	// there is a path from MaybeDeadAccess to an exit not going through a
1640	// killing block.
1641	if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1642	if (!AnyUnreachableExit)
1643	return std::nullopt;
1644
1645	// Fall back to CFG scan starting at all non-unreachable roots if not
1646	// all paths to the exit go through CommonPred.
1647	CommonPred = nullptr;
1648	}
1649
1650	// If CommonPred itself is in the set of killing blocks, we're done.
1651	if (KillingBlocks.count(Ptr: CommonPred))
1652	return {MaybeDeadAccess};
1653
1654	SetVector<BasicBlock *> WorkList;
1655	// If CommonPred is null, there are multiple exits from the function.
1656	// They all have to be added to the worklist.
1657	if (CommonPred)
1658	WorkList.insert(X: CommonPred);
1659	else
1660	for (BasicBlock *R : PDT.roots()) {
1661	if (!isa<UnreachableInst>(Val: R->getTerminator()))
1662	WorkList.insert(X: R);
1663	}
1664
1665	NumCFGTries ++;
1666	// Check if all paths starting from an exit node go through one of the
1667	// killing blocks before reaching MaybeDeadAccess.
1668	for (unsigned I = `0`; I < WorkList.size(); I++) {
1669	NumCFGChecks ++;
1670	BasicBlock *Current = WorkList [I];
1671	if (KillingBlocks.count(Ptr: Current))
1672	continue;
1673	if (Current == MaybeDeadAccess->getBlock())
1674	return std::nullopt;
1675
1676	// MaybeDeadAccess is reachable from the entry, so we don't have to
1677	// explore unreachable blocks further.
1678	if (!DT.isReachableFromEntry(A: Current))
1679	continue;
1680
1681	for (BasicBlock *Pred : predecessors(BB: Current))
1682	WorkList.insert(X: Pred);
1683
1684	if (WorkList.size() >= MemorySSAPathCheckLimit)
1685	return std::nullopt;
1686	}
1687	NumCFGSuccess ++;
1688	}
1689
1690	// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1691	// potentially dead.
1692	return {MaybeDeadAccess};
1693	}
1694
1695	/// Delete dead memory defs and recursively add their operands to ToRemove if
1696	/// they became dead.
1697	void
1698	deleteDeadInstruction(Instruction *SI,
1699	SmallPtrSetImpl<MemoryAccess > Deleted = nullptr) {
1700	MemorySSAUpdater Updater(&MSSA);
1701	SmallVector<Instruction *, `32`> NowDeadInsts;
1702	NowDeadInsts.push_back(Elt: SI);
1703	--NumFastOther;
1704
1705	while (!NowDeadInsts.empty()) {
1706	Instruction *DeadInst = NowDeadInsts.pop_back_val();
1707	++NumFastOther;
1708
1709	// Try to preserve debug information attached to the dead instruction.
1710	salvageDebugInfo(I&: *DeadInst);
1711	salvageKnowledge(I: DeadInst);
1712
1713	// Remove the Instruction from MSSA.
1714	MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
1715	bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
1716	if (MA) {
1717	if (IsMemDef) {
1718	auto *MD = cast<MemoryDef>(Val: MA);
1719	SkipStores.insert(Ptr: MD);
1720	if (Deleted)
1721	Deleted->insert(Ptr: MD);
1722	if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
1723	if (SI->getValueOperand()->getType()->isPointerTy()) {
1724	const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
1725	if (CapturedBeforeReturn.erase(Val: UO))
1726	ShouldIterateEndOfFunctionDSE = true;
1727	InvisibleToCallerAfterRet.erase(Val: UO);
1728	}
1729	}
1730	}
1731
1732	Updater.removeMemoryAccess(MA);
1733	}
1734
1735	auto I = IOLs.find(Key: DeadInst->getParent());
1736	if (I != IOLs.end())
1737	I->second.erase(Val: DeadInst);
1738	// Remove its operands
1739	for (Use &O : DeadInst->operands())
1740	if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
1741	O.set(PoisonValue::get(T: O ->getType()));
1742	if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
1743	NowDeadInsts.push_back(Elt: OpI);
1744	}
1745
1746	EI.removeInstruction(I: DeadInst);
1747	// Remove memory defs directly if they don't produce results, but only
1748	// queue other dead instructions for later removal. They may have been
1749	// used as memory locations that have been cached by BatchAA. Removing
1750	// them here may lead to newly created instructions to be allocated at the
1751	// same address, yielding stale cache entries.
1752	if (IsMemDef && DeadInst->getType()->isVoidTy())
1753	DeadInst->eraseFromParent();
1754	else
1755	ToRemove.push_back(Elt: DeadInst);
1756	}
1757	}
1758
1759	// Check for any extra throws between \p KillingI and \p DeadI that block
1760	// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1761	// MemoryDef that may throw are handled during the walk from one def to the
1762	// next.
1763	bool mayThrowBetween(Instruction KillingI, Instruction DeadI,
1764	const Value *KillingUndObj) {
1765	// First see if we can ignore it by using the fact that KillingI is an
1766	// alloca/alloca like object that is not visible to the caller during
1767	// execution of the function.
1768	if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
1769	return false;
1770
1771	if (KillingI->getParent() == DeadI->getParent())
1772	return ThrowingBlocks.count(Ptr: KillingI->getParent());
1773	return !ThrowingBlocks.empty();
1774	}
1775
1776	// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1777	// instructions act as barriers:
1778	// A memory instruction that may throw and \p KillingI accesses a non-stack*
1779	// object.
1780	// Atomic stores stronger that monotonic.*
1781	bool isDSEBarrier(const Value KillingUndObj, Instruction DeadI) {
1782	// If DeadI may throw it acts as a barrier, unless we are to an
1783	// alloca/alloca like object that does not escape.
1784	if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
1785	return true;
1786
1787	// If DeadI is an atomic load/store stronger than monotonic, do not try to
1788	// eliminate/reorder it.
1789	if (DeadI->isAtomic()) {
1790	if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
1791	return isStrongerThanMonotonic(AO: LI->getOrdering());
1792	if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
1793	return isStrongerThanMonotonic(AO: SI->getOrdering());
1794	if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
1795	return isStrongerThanMonotonic(AO: ARMW->getOrdering());
1796	if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
1797	return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) \|\|
1798	isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
1799	llvm_unreachable("other instructions should be skipped in MemorySSA");
1800	}
1801	return false;
1802	}
1803
1804	/// Eliminate writes to objects that are not visible in the caller and are not
1805	/// accessed before returning from the function.
1806	bool eliminateDeadWritesAtEndOfFunction() {
1807	bool MadeChange = false;
1808	LLVM_DEBUG(
1809	dbgs()
1810	<< "Trying to eliminate MemoryDefs at the end of the function\n");
1811	do {
1812	ShouldIterateEndOfFunctionDSE = false;
1813	for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
1814	if (SkipStores.contains(Ptr: Def))
1815	continue;
1816
1817	Instruction *DefI = Def->getMemoryInst();
1818	auto DefLoc = getLocForWrite(I: DefI);
1819	if (!DefLoc \|\| !isRemovable(I: DefI)) {
1820	LLVM_DEBUG(dbgs() << " ... could not get location for write or "
1821	"instruction not removable.\n");
1822	continue;
1823	}
1824
1825	// NOTE: Currently eliminating writes at the end of a function is
1826	// limited to MemoryDefs with a single underlying object, to save
1827	// compile-time. In practice it appears the case with multiple
1828	// underlying objects is very uncommon. If it turns out to be important,
1829	// we can use getUnderlyingObjects here instead.
1830	const Value *UO = getUnderlyingObject(V: DefLoc ->Ptr);
1831	if (!isInvisibleToCallerAfterRet(V: UO))
1832	continue;
1833
1834	if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
1835	// See through pointer-to-pointer bitcasts
1836	LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1837	"of the function\n");
1838	deleteDeadInstruction(SI: DefI);
1839	++NumFastStores;
1840	MadeChange = true;
1841	}
1842	}
1843	} while (ShouldIterateEndOfFunctionDSE);
1844	return MadeChange;
1845	}
1846
1847	/// If we have a zero initializing memset following a call to malloc,
1848	/// try folding it into a call to calloc.
1849	bool tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO) {
1850	Instruction *DefI = Def->getMemoryInst();
1851	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1852	if (!MemSet)
1853	// TODO: Could handle zero store to small allocation as well.
1854	return false;
1855	Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1856	if (!StoredConstant \|\| !StoredConstant->isNullValue())
1857	return false;
1858
1859	if (!isRemovable(I: DefI))
1860	// The memset might be volatile..
1861	return false;
1862
1863	if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
1864	F.hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
1865	F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) \|\|
1866	F.getName() == "calloc")
1867	return false;
1868	auto Malloc = const_cast<CallInst >(dyn_cast<CallInst>(Val: DefUO));
1869	if (!Malloc)
1870	return false;
1871	auto *InnerCallee = Malloc->getCalledFunction();
1872	if (!InnerCallee)
1873	return false;
1874	LibFunc Func;
1875	if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) \|\| !TLI.has(F: Func) \|\|
1876	Func != LibFunc_malloc)
1877	return false;
1878	// Gracefully handle malloc with unexpected memory attributes.
1879	auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
1880	if (!MallocDef)
1881	return false;
1882
1883	auto shouldCreateCalloc = [](CallInst Malloc, CallInst Memset) {
1884	// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1885	// of malloc block
1886	auto *MallocBB = Malloc->getParent(),
1887	*MemsetBB = Memset->getParent();
1888	if (MallocBB == MemsetBB)
1889	return true;
1890	auto *Ptr = Memset->getArgOperand(i: `0`);
1891	auto *TI = MallocBB->getTerminator();
1892	ICmpInst::Predicate Pred;
1893	BasicBlock TrueBB, FalseBB;
1894	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, L: m_Specific(V: Ptr), R: m_Zero()), T&: TrueBB,
1895	F&: FalseBB)))
1896	return false;
1897	if (Pred != ICmpInst::ICMP_EQ \|\| MemsetBB != FalseBB)
1898	return false;
1899	return true;
1900	};
1901
1902	if (Malloc->getOperand(i_nocapture: `0`) != MemSet->getLength())
1903	return false;
1904	if (!shouldCreateCalloc(Malloc, MemSet) \|\|
1905	!DT.dominates(Def: Malloc, User: MemSet) \|\|
1906	!memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
1907	return false;
1908	IRBuilder<> IRB(Malloc);
1909	Type *SizeTTy = Malloc->getArgOperand(i: `0`)->getType();
1910	auto *Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: `1`),
1911	Size: Malloc->getArgOperand(i: `0`), B&: IRB, TLI);
1912	if (!Calloc)
1913	return false;
1914
1915	MemorySSAUpdater Updater(&MSSA);
1916	auto *NewAccess =
1917	Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc), Definition: nullptr,
1918	InsertPt: MallocDef);
1919	auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
1920	Updater.insertDef(Def: NewAccessMD, /RenameUses=/true);
1921	Malloc->replaceAllUsesWith(V: Calloc);
1922	deleteDeadInstruction(SI: Malloc);
1923	return true;
1924	}
1925
1926	// Check if there is a dominating condition, that implies that the value
1927	// being stored in a ptr is already present in the ptr.
1928	bool dominatingConditionImpliesValue(MemoryDef *Def) {
1929	auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
1930	BasicBlock *StoreBB = StoreI->getParent();
1931	Value *StorePtr = StoreI->getPointerOperand();
1932	Value *StoreVal = StoreI->getValueOperand();
1933
1934	DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
1935	if (!IDom)
1936	return false;
1937
1938	auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
1939	if (!BI \|\| !BI->isConditional())
1940	return false;
1941
1942	// In case both blocks are the same, it is not possible to determine
1943	// if optimization is possible. (We would not want to optimize a store
1944	// in the FalseBB if condition is true and vice versa.)
1945	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`))
1946	return false;
1947
1948	Instruction *ICmpL;
1949	ICmpInst::Predicate Pred;
1950	if (!match(V: BI->getCondition(),
1951	P: m_c_ICmp(Pred,
1952	L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
1953	R: m_Instruction(I&: ICmpL)),
1954	R: m_Specific(V: StoreVal))) \|\|
1955	!ICmpInst::isEquality(P: Pred))
1956	return false;
1957
1958	// In case the else blocks also branches to the if block or the other way
1959	// around it is not possible to determine if the optimization is possible.
1960	if (Pred == ICmpInst::ICMP_EQ &&
1961	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `0`)),
1962	BB: StoreBB))
1963	return false;
1964
1965	if (Pred == ICmpInst::ICMP_NE &&
1966	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `1`)),
1967	BB: StoreBB))
1968	return false;
1969
1970	MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
1971	MemoryAccess *ClobAcc =
1972	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
1973
1974	return MSSA.dominates(A: ClobAcc, B: LoadAcc);
1975	}
1976
1977	/// \returns true if \p Def is a no-op store, either because it
1978	/// directly stores back a loaded value or stores zero to a calloced object.
1979	bool storeIsNoop(MemoryDef Def, const* Value *DefUO) {
1980	Instruction *DefI = Def->getMemoryInst();
1981	StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
1982	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1983	Constant StoredConstant = nullptr*;
1984	if (Store)
1985	StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: `0`));
1986	else if (MemSet)
1987	StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1988	else
1989	return false;
1990
1991	if (!isRemovable(I: DefI))
1992	return false;
1993
1994	if (StoredConstant) {
1995	Constant *InitC =
1996	getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
1997	// If the clobbering access is LiveOnEntry, no instructions between them
1998	// can modify the memory location.
1999	if (InitC && InitC == StoredConstant)
2000	return MSSA.isLiveOnEntryDef(
2001	MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2002	}
2003
2004	if (!Store)
2005	return false;
2006
2007	if (dominatingConditionImpliesValue(Def))
2008	return true;
2009
2010	if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: `0`))) {
2011	if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: `1`)) {
2012	// Get the defining access for the load.
2013	auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2014	// Fast path: the defining accesses are the same.
2015	if (LoadAccess == Def->getDefiningAccess())
2016	return true;
2017
2018	// Look through phi accesses. Recursively scan all phi accesses by
2019	// adding them to a worklist. Bail when we run into a memory def that
2020	// does not match LoadAccess.
2021	SetVector<MemoryAccess *> ToCheck;
2022	MemoryAccess *Current =
2023	MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2024	// We don't want to bail when we run into the store memory def. But,
2025	// the phi access may point to it. So, pretend like we've already
2026	// checked it.
2027	ToCheck.insert(X: Def);
2028	ToCheck.insert(X: Current);
2029	// Start at current (1) to simulate already having checked Def.
2030	for (unsigned I = `1`; I < ToCheck.size(); ++I) {
2031	Current = ToCheck [I];
2032	if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2033	// Check all the operands.
2034	for (auto &Use : PhiAccess->incoming_values())
2035	ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2036	continue;
2037	}
2038
2039	// If we found a memory def, bail. This happens when we have an
2040	// unrelated write in between an otherwise noop store.
2041	assert(isa<MemoryDef>(Current) &&
2042	"Only MemoryDefs should reach here.");
2043	// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2044	// We are searching for the definition of the store's destination.
2045	// So, if that is the same definition as the load, then this is a
2046	// noop. Otherwise, fail.
2047	if (LoadAccess != Current)
2048	return false;
2049	}
2050	return true;
2051	}
2052	}
2053
2054	return false;
2055	}
2056
2057	bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2058	bool Changed = false;
2059	for (auto OI : IOL) {
2060	Instruction *DeadI = OI.first;
2061	MemoryLocation Loc = *getLocForWrite(I: DeadI);
2062	assert(isRemovable(DeadI) && "Expect only removable instruction");
2063
2064	const Value *Ptr = Loc.Ptr->stripPointerCasts();
2065	int64_t DeadStart = `0`;
2066	uint64_t DeadSize = Loc.Size.getValue();
2067	GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2068	OverlapIntervalsTy &IntervalMap = OI.second;
2069	Changed \|= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2070	if (IntervalMap.empty())
2071	continue;
2072	Changed \|= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2073	}
2074	return Changed;
2075	}
2076
2077	/// Eliminates writes to locations where the value that is being written
2078	/// is already stored at the same location.
2079	bool eliminateRedundantStoresOfExistingValues() {
2080	bool MadeChange = false;
2081	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2082	"already existing value\n");
2083	for (auto *Def : MemDefs) {
2084	if (SkipStores.contains(Ptr: Def) \|\| MSSA.isLiveOnEntryDef(MA: Def))
2085	continue;
2086
2087	Instruction *DefInst = Def->getMemoryInst();
2088	auto MaybeDefLoc = getLocForWrite(I: DefInst);
2089	if (!MaybeDefLoc \|\| !isRemovable(I: DefInst))
2090	continue;
2091
2092	MemoryDef *UpperDef;
2093	// To conserve compile-time, we avoid walking to the next clobbering def.
2094	// Instead, we just try to get the optimized access, if it exists. DSE
2095	// will try to optimize defs during the earlier traversal.
2096	if (Def->isOptimized())
2097	UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2098	else
2099	UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2100	if (!UpperDef \|\| MSSA.isLiveOnEntryDef(MA: UpperDef))
2101	continue;
2102
2103	Instruction *UpperInst = UpperDef->getMemoryInst();
2104	auto IsRedundantStore = [&]() {
2105	if (DefInst->isIdenticalTo(I: UpperInst))
2106	return true;
2107	if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2108	if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2109	// MemSetInst must have a write location.
2110	auto UpperLoc = getLocForWrite(I: UpperInst);
2111	if (!UpperLoc)
2112	return false;
2113	int64_t InstWriteOffset = `0`;
2114	int64_t DepWriteOffset = `0`;
2115	auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: UpperLoc, DeadLoc: MaybeDefLoc,
2116	KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2117	Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2118	return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: `1`) &&
2119	OR == OW_Complete;
2120	}
2121	}
2122	return false;
2123	};
2124
2125	if (!IsRedundantStore() \|\| isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2126	continue;
2127	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2128	<< `'\n'`);
2129	deleteDeadInstruction(SI: DefInst);
2130	NumRedundantStores ++;
2131	MadeChange = true;
2132	}
2133	return MadeChange;
2134	}
2135	};
2136
2137	static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2138	DominatorTree &DT, PostDominatorTree &PDT,
2139	const TargetLibraryInfo &TLI,
2140	const LoopInfo &LI) {
2141	bool MadeChange = false;
2142
2143	DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2144	// For each store:
2145	for (unsigned I = `0`; I < State.MemDefs.size(); I++) {
2146	MemoryDef *KillingDef = State.MemDefs [I];
2147	if (State.SkipStores.count(Ptr: KillingDef))
2148	continue;
2149	Instruction *KillingI = KillingDef->getMemoryInst();
2150
2151	std::optional<MemoryLocation> MaybeKillingLoc;
2152	if (State.isMemTerminatorInst(I: KillingI)) {
2153	if (auto KillingLoc = State.getLocForTerminator(I: KillingI))
2154	MaybeKillingLoc = KillingLoc ->first;
2155	} else {
2156	MaybeKillingLoc = State.getLocForWrite(I: KillingI);
2157	}
2158
2159	if (!MaybeKillingLoc) {
2160	LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2161	<< *KillingI << "\n");
2162	continue;
2163	}
2164	MemoryLocation KillingLoc = *MaybeKillingLoc;
2165	assert(KillingLoc.Ptr && "KillingLoc should not be null");
2166	const Value *KillingUndObj = getUnderlyingObject(V: KillingLoc.Ptr);
2167	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2168	<< KillingDef << " (" << KillingI << ")\n");
2169
2170	unsigned ScanLimit = MemorySSAScanLimit;
2171	unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2172	unsigned PartialLimit = MemorySSAPartialStoreLimit;
2173	// Worklist of MemoryAccesses that may be killed by KillingDef.
2174	SmallSetVector<MemoryAccess *, `8`> ToCheck;
2175	// Track MemoryAccesses that have been deleted in the loop below, so we can
2176	// skip them. Don't use SkipStores for this, which may contain reused
2177	// MemoryAccess addresses.
2178	SmallPtrSet<MemoryAccess *, `8`> Deleted;
2179	[[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();
2180	ToCheck.insert(X: KillingDef->getDefiningAccess());
2181
2182	bool Shortend = false;
2183	bool IsMemTerm = State.isMemTerminatorInst(I: KillingI);
2184	// Check if MemoryAccesses in the worklist are killed by KillingDef.
2185	for (unsigned I = `0`; I < ToCheck.size(); I++) {
2186	MemoryAccess *Current = ToCheck [I];
2187	if (Deleted.contains(Ptr: Current))
2188	continue;
2189
2190	std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2191	KillingDef, StartAccess: Current, KillingLoc, KillingUndObj, ScanLimit,
2192	WalkerStepLimit, IsMemTerm, PartialLimit);
2193
2194	if (!MaybeDeadAccess) {
2195	LLVM_DEBUG(dbgs() << " finished walk\n");
2196	continue;
2197	}
2198
2199	MemoryAccess DeadAccess = MaybeDeadAccess;
2200	LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2201	if (isa<MemoryPhi>(Val: DeadAccess)) {
2202	LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2203	for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2204	MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2205	BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2206	BasicBlock *PhiBlock = DeadAccess->getBlock();
2207
2208	// We only consider incoming MemoryAccesses that come before the
2209	// MemoryPhi. Otherwise we could discover candidates that do not
2210	// strictly dominate our starting def.
2211	if (State.PostOrderNumbers [IncomingBlock] >
2212	State.PostOrderNumbers [PhiBlock])
2213	ToCheck.insert(X: IncomingAccess);
2214	}
2215	continue;
2216	}
2217	auto *DeadDefAccess = cast<MemoryDef>(Val: DeadAccess);
2218	Instruction *DeadI = DeadDefAccess->getMemoryInst();
2219	LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2220	ToCheck.insert(X: DeadDefAccess->getDefiningAccess());
2221	NumGetDomMemoryDefPassed ++;
2222
2223	if (!DebugCounter::shouldExecute(CounterName: MemorySSACounter))
2224	continue;
2225
2226	MemoryLocation DeadLoc = *State.getLocForWrite(I: DeadI);
2227
2228	if (IsMemTerm) {
2229	const Value *DeadUndObj = getUnderlyingObject(V: DeadLoc.Ptr);
2230	if (KillingUndObj != DeadUndObj)
2231	continue;
2232	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2233	<< "\n KILLER: " << *KillingI << `'\n'`);
2234	State.deleteDeadInstruction(SI: DeadI, Deleted: &Deleted);
2235	++NumFastStores;
2236	MadeChange = true;
2237	} else {
2238	// Check if DeadI overwrites KillingI.
2239	int64_t KillingOffset = `0`;
2240	int64_t DeadOffset = `0`;
2241	OverwriteResult OR = State.isOverwrite(
2242	KillingI, DeadI, KillingLoc, DeadLoc, KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2243	if (OR == OW_MaybePartial) {
2244	auto Iter = State.IOLs.insert(
2245	KV: std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2246	x: DeadI->getParent(), y: InstOverlapIntervalsTy ()));
2247	auto &IOL = Iter.first->second;
2248	OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOff: KillingOffset,
2249	DeadOff: DeadOffset, DeadI, IOL);
2250	}
2251
2252	if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2253	auto *DeadSI = dyn_cast<StoreInst>(Val: DeadI);
2254	auto *KillingSI = dyn_cast<StoreInst>(Val: KillingI);
2255	// We are re-using tryToMergePartialOverlappingStores, which requires
2256	// DeadSI to dominate KillingSI.
2257	// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2258	if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2259	if (Constant *Merged = tryToMergePartialOverlappingStores(
2260	KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL: State.DL,
2261	AA&: State.BatchAA, DT: &DT)) {
2262
2263	// Update stored value of earlier store to merged constant.
2264	DeadSI->setOperand(i_nocapture: `0`, Val_nocapture: Merged);
2265	++NumModifiedStores;
2266	MadeChange = true;
2267
2268	Shortend = true;
2269	// Remove killing store and remove any outstanding overlap
2270	// intervals for the updated store.
2271	State.deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2272	auto I = State.IOLs.find(Key: DeadSI->getParent());
2273	if (I != State.IOLs.end())
2274	I->second.erase(Val: DeadSI);
2275	break;
2276	}
2277	}
2278	}
2279
2280	if (OR == OW_Complete) {
2281	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2282	<< "\n KILLER: " << *KillingI << `'\n'`);
2283	State.deleteDeadInstruction(SI: DeadI, Deleted: &Deleted);
2284	++NumFastStores;
2285	MadeChange = true;
2286	}
2287	}
2288	}
2289
2290	assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2291	"SkipStores and Deleted out of sync?");
2292
2293	// Check if the store is a no-op.
2294	if (!Shortend && State.storeIsNoop(Def: KillingDef, DefUO: KillingUndObj)) {
2295	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2296	<< `'\n'`);
2297	State.deleteDeadInstruction(SI: KillingI);
2298	NumRedundantStores ++;
2299	MadeChange = true;
2300	continue;
2301	}
2302
2303	// Can we form a calloc from a memset/malloc pair?
2304	if (!Shortend && State.tryFoldIntoCalloc(Def: KillingDef, DefUO: KillingUndObj)) {
2305	LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2306	<< " DEAD: " << *KillingI << `'\n'`);
2307	State.deleteDeadInstruction(SI: KillingI);
2308	MadeChange = true;
2309	continue;
2310	}
2311	}
2312
2313	if (EnablePartialOverwriteTracking)
2314	for (auto &KV : State.IOLs)
2315	MadeChange \|= State.removePartiallyOverlappedStores(IOL&: KV.second);
2316
2317	MadeChange \|= State.eliminateRedundantStoresOfExistingValues();
2318	MadeChange \|= State.eliminateDeadWritesAtEndOfFunction();
2319
2320	while (!State.ToRemove.empty()) {
2321	Instruction *DeadInst = State.ToRemove.pop_back_val();
2322	DeadInst->eraseFromParent();
2323	}
2324
2325	return MadeChange;
2326	}
2327	} // end anonymous namespace
2328
2329	//===----------------------------------------------------------------------===//
2330	// DSE Pass
2331	//===----------------------------------------------------------------------===//
2332	PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2333	AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2334	const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2335	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2336	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2337	PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2338	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2339
2340	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2341
2342	#ifdef LLVM_ENABLE_STATS
2343	if (AreStatisticsEnabled())
2344	for (auto &I : instructions(F))
2345	NumRemainingStores += isa<StoreInst>(Val: &I);
2346	#endif
2347
2348	if (!Changed)
2349	return PreservedAnalyses::all();
2350
2351	PreservedAnalyses PA;
2352	PA.preserveSet<CFGAnalyses>();
2353	PA.preserve<MemorySSAAnalysis>();
2354	PA.preserve<LoopAnalysis>();
2355	return PA;
2356	}
2357

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp