DeadStoreElimination.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp]

1	//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The code below implements dead store elimination using MemorySSA. It uses
10	// the following general approach: given a MemoryDef, walk upwards to find
11	// clobbering MemoryDefs that may be killed by the starting def. Then check
12	// that there are no uses that may read the location of the original MemoryDef
13	// in between both MemoryDefs. A bit more concretely:
14	//
15	// For all MemoryDefs StartDef:
16	// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17	// upwards.
18	// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19	// checking all uses starting at MaybeDeadAccess and walking until we see
20	// StartDef.
21	// 3. For each found CurrentDef, check that:
22	// 1. There are no barrier instructions between CurrentDef and StartDef (like
23	// throws or stores with ordering constraints).
24	// 2. StartDef is executed whenever CurrentDef is executed.
25	// 3. StartDef completely overwrites CurrentDef.
26	// 4. Erase CurrentDef from the function and MemorySSA.
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31	#include "llvm/ADT/APInt.h"
32	#include "llvm/ADT/DenseMap.h"
33	#include "llvm/ADT/MapVector.h"
34	#include "llvm/ADT/PostOrderIterator.h"
35	#include "llvm/ADT/SetVector.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/StringRef.h"
40	#include "llvm/Analysis/AliasAnalysis.h"
41	#include "llvm/Analysis/AssumptionCache.h"
42	#include "llvm/Analysis/CaptureTracking.h"
43	#include "llvm/Analysis/GlobalsModRef.h"
44	#include "llvm/Analysis/Loads.h"
45	#include "llvm/Analysis/LoopInfo.h"
46	#include "llvm/Analysis/MemoryBuiltins.h"
47	#include "llvm/Analysis/MemoryLocation.h"
48	#include "llvm/Analysis/MemorySSA.h"
49	#include "llvm/Analysis/MemorySSAUpdater.h"
50	#include "llvm/Analysis/MustExecute.h"
51	#include "llvm/Analysis/PostDominators.h"
52	#include "llvm/Analysis/TargetLibraryInfo.h"
53	#include "llvm/Analysis/ValueTracking.h"
54	#include "llvm/IR/Argument.h"
55	#include "llvm/IR/AttributeMask.h"
56	#include "llvm/IR/BasicBlock.h"
57	#include "llvm/IR/Constant.h"
58	#include "llvm/IR/ConstantRangeList.h"
59	#include "llvm/IR/Constants.h"
60	#include "llvm/IR/DataLayout.h"
61	#include "llvm/IR/DebugInfo.h"
62	#include "llvm/IR/Dominators.h"
63	#include "llvm/IR/Function.h"
64	#include "llvm/IR/IRBuilder.h"
65	#include "llvm/IR/InstIterator.h"
66	#include "llvm/IR/InstrTypes.h"
67	#include "llvm/IR/Instruction.h"
68	#include "llvm/IR/Instructions.h"
69	#include "llvm/IR/IntrinsicInst.h"
70	#include "llvm/IR/Module.h"
71	#include "llvm/IR/PassManager.h"
72	#include "llvm/IR/PatternMatch.h"
73	#include "llvm/IR/Value.h"
74	#include "llvm/InitializePasses.h"
75	#include "llvm/Support/Casting.h"
76	#include "llvm/Support/CommandLine.h"
77	#include "llvm/Support/Debug.h"
78	#include "llvm/Support/DebugCounter.h"
79	#include "llvm/Support/ErrorHandling.h"
80	#include "llvm/Support/raw_ostream.h"
81	#include "llvm/Transforms/Scalar.h"
82	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
83	#include "llvm/Transforms/Utils/BuildLibCalls.h"
84	#include "llvm/Transforms/Utils/Local.h"
85	#include <algorithm>
86	#include <cassert>
87	#include <cstdint>
88	#include <map>
89	#include <optional>
90	#include <utility>
91
92	using namespace llvm;
93	using namespace PatternMatch;
94
95	#define DEBUG_TYPE "dse"
96
97	STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98	STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99	STATISTIC(NumFastStores, "Number of stores deleted");
100	STATISTIC(NumFastOther, "Number of other instrs removed");
101	STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102	STATISTIC(NumModifiedStores, "Number of stores modified");
103	STATISTIC(NumCFGChecks, "Number of stores modified");
104	STATISTIC(NumCFGTries, "Number of stores modified");
105	STATISTIC(NumCFGSuccess, "Number of stores modified");
106	STATISTIC(NumGetDomMemoryDefPassed,
107	"Number of times a valid candidate is returned from getDomMemoryDef");
108	STATISTIC(NumDomMemDefChecks,
109	"Number iterations check for reads in getDomMemoryDef");
110
111	DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112	"Controls which MemoryDefs are eliminated.");
113
114	static cl::opt<bool>
115	EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116	cl::init(Val: true), cl::Hidden,
117	cl::desc ("Enable partial-overwrite tracking in DSE"));
118
119	static cl::opt<bool>
120	EnablePartialStoreMerging("enable-dse-partial-store-merging",
121	cl::init(Val: true), cl::Hidden,
122	cl::desc ("Enable partial store merging in DSE"));
123
124	static cl::opt<unsigned>
125	MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: `150`), cl::Hidden,
126	cl::desc ("The number of memory instructions to scan for "
127	"dead store elimination (default = 150)"));
128	static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
129	"dse-memoryssa-walklimit", cl::init(Val: `90`), cl::Hidden,
130	cl::desc ("The maximum number of steps while walking upwards to find "
131	"MemoryDefs that may be killed (default = 90)"));
132
133	static cl::opt<unsigned> MemorySSAPartialStoreLimit(
134	"dse-memoryssa-partial-store-limit", cl::init(Val: `5`), cl::Hidden,
135	cl::desc ("The maximum number candidates that only partially overwrite the "
136	"killing MemoryDef to consider"
137	" (default = 5)"));
138
139	static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
140	"dse-memoryssa-defs-per-block-limit", cl::init(Val: `5000`), cl::Hidden,
141	cl::desc ("The number of MemoryDefs we consider as candidates to eliminated "
142	"other stores per basic block (default = 5000)"));
143
144	static cl::opt<unsigned> MemorySSASameBBStepCost(
145	"dse-memoryssa-samebb-cost", cl::init(Val: `1`), cl::Hidden,
146	cl::desc (
147	"The cost of a step in the same basic block as the killing MemoryDef"
148	"(default = 1)"));
149
150	static cl::opt<unsigned>
151	MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: `5`),
152	cl::Hidden,
153	cl::desc ("The cost of a step in a different basic "
154	"block than the killing MemoryDef"
155	"(default = 5)"));
156
157	static cl::opt<unsigned> MemorySSAPathCheckLimit(
158	"dse-memoryssa-path-check-limit", cl::init(Val: `50`), cl::Hidden,
159	cl::desc ("The maximum number of blocks to check when trying to prove that "
160	"all paths to an exit go through a killing block (default = 50)"));
161
162	// This flags allows or disallows DSE to optimize MemorySSA during its
163	// traversal. Note that DSE optimizing MemorySSA may impact other passes
164	// downstream of the DSE invocation and can lead to issues not being
165	// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166	// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167	// impact follow-up passes.
168	static cl::opt<bool>
169	OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
170	cl::desc ("Allow DSE to optimize memory accesses."));
171
172	// TODO: remove this flag.
173	static cl::opt<bool> EnableInitializesImprovement(
174	"enable-dse-initializes-attr-improvement", cl::init(Val: true), cl::Hidden,
175	cl::desc ("Enable the initializes attr improvement in DSE"));
176
177	//===----------------------------------------------------------------------===//
178	// Helper functions
179	//===----------------------------------------------------------------------===//
180	using OverlapIntervalsTy = std::map<int64_t, int64_t>;
181	using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
182
183	/// Returns true if the end of this instruction can be safely shortened in
184	/// length.
185	static bool isShortenableAtTheEnd(Instruction *I) {
186	// Don't shorten stores for now
187	if (isa<StoreInst>(Val: I))
188	return false;
189
190	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
191	switch (II->getIntrinsicID()) {
192	default: return false;
193	case Intrinsic::memset:
194	case Intrinsic::memcpy:
195	case Intrinsic::memcpy_element_unordered_atomic:
196	case Intrinsic::memset_element_unordered_atomic:
197	// Do shorten memory intrinsics.
198	// FIXME: Add memmove if it's also safe to transform.
199	return true;
200	}
201	}
202
203	// Don't shorten libcalls calls for now.
204
205	return false;
206	}
207
208	/// Returns true if the beginning of this instruction can be safely shortened
209	/// in length.
210	static bool isShortenableAtTheBeginning(Instruction *I) {
211	// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
212	// easily done by offsetting the source address.
213	return isa<AnyMemSetInst>(Val: I);
214	}
215
216	static std::optional<TypeSize> getPointerSize(const Value *V,
217	const DataLayout &DL,
218	const TargetLibraryInfo &TLI,
219	const Function *F) {
220	uint64_t Size;
221	ObjectSizeOpts Opts;
222	Opts.NullIsUnknownSize = NullPointerIsDefined(F);
223
224	if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
225	return TypeSize::getFixed(ExactSize: Size);
226	return std::nullopt;
227	}
228
229	namespace {
230
231	enum OverwriteResult {
232	OW_Begin,
233	OW_Complete,
234	OW_End,
235	OW_PartialEarlierWithFullLater,
236	OW_MaybePartial,
237	OW_None,
238	OW_Unknown
239	};
240
241	} // end anonymous namespace
242
243	/// Check if two instruction are masked stores that completely
244	/// overwrite one another. More specifically, \p KillingI has to
245	/// overwrite \p DeadI.
246	static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
247	const Instruction *DeadI,
248	BatchAAResults &AA) {
249	const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
250	const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
251	if (KillingII == nullptr \|\| DeadII == nullptr)
252	return OW_Unknown;
253	if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
254	return OW_Unknown;
255
256	switch (KillingII->getIntrinsicID()) {
257	case Intrinsic::masked_store:
258	case Intrinsic::vp_store: {
259	const DataLayout &DL = KillingII->getDataLayout();
260	auto *KillingTy = KillingII->getArgOperand(i: `0`)->getType();
261	auto *DeadTy = DeadII->getArgOperand(i: `0`)->getType();
262	if (DL.getTypeSizeInBits(Ty: KillingTy) != DL.getTypeSizeInBits(Ty: DeadTy))
263	return OW_Unknown;
264	// Element count.
265	if (cast<VectorType>(Val: KillingTy)->getElementCount() !=
266	cast<VectorType>(Val: DeadTy)->getElementCount())
267	return OW_Unknown;
268	// Pointers.
269	Value *KillingPtr = KillingII->getArgOperand(i: `1`);
270	Value *DeadPtr = DeadII->getArgOperand(i: `1`);
271	if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
272	return OW_Unknown;
273	if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
274	// Masks.
275	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
276	if (KillingII->getArgOperand(i: `2`) != DeadII->getArgOperand(i: `2`))
277	return OW_Unknown;
278	} else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
279	// Masks.
280	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
281	if (KillingII->getArgOperand(i: `2`) != DeadII->getArgOperand(i: `2`))
282	return OW_Unknown;
283	// Lengths.
284	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
285	return OW_Unknown;
286	}
287	return OW_Complete;
288	}
289	default:
290	return OW_Unknown;
291	}
292	}
293
294	/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
295	/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
296	/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
297	/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
298	/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
299	/// overwritten by a killing (smaller) store which doesn't write outside the big
300	/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
301	/// NOTE: This function must only be called if both \p KillingLoc and \p
302	/// DeadLoc belong to the same underlying object with valid \p KillingOff and
303	/// \p DeadOff.
304	static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
305	const MemoryLocation &DeadLoc,
306	int64_t KillingOff, int64_t DeadOff,
307	Instruction *DeadI,
308	InstOverlapIntervalsTy &IOL) {
309	const uint64_t KillingSize = KillingLoc.Size.getValue();
310	const uint64_t DeadSize = DeadLoc.Size.getValue();
311	// We may now overlap, although the overlap is not complete. There might also
312	// be other incomplete overlaps, and together, they might cover the complete
313	// dead store.
314	// Note: The correctness of this logic depends on the fact that this function
315	// is not even called providing DepWrite when there are any intervening reads.
316	if (EnablePartialOverwriteTracking &&
317	KillingOff < int64_t(DeadOff + DeadSize) &&
318	int64_t(KillingOff + KillingSize) >= DeadOff) {
319
320	// Insert our part of the overlap into the map.
321	auto &IM = IOL [DeadI];
322	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
323	<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
324	<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
325	<< ")\n");
326
327	// Make sure that we only insert non-overlapping intervals and combine
328	// adjacent intervals. The intervals are stored in the map with the ending
329	// offset as the key (in the half-open sense) and the starting offset as
330	// the value.
331	int64_t KillingIntStart = KillingOff;
332	int64_t KillingIntEnd = KillingOff + KillingSize;
333
334	// Find any intervals ending at, or after, KillingIntStart which start
335	// before KillingIntEnd.
336	auto ILI = IM.lower_bound(x: KillingIntStart);
337	if (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
338	// This existing interval is overlapped with the current store somewhere
339	// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
340	// intervals and adjusting our start and end.
341	KillingIntStart = std::min(a: KillingIntStart, b: ILI ->second);
342	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
343	ILI = IM.erase(position: ILI);
344
345	// Continue erasing and adjusting our end in case other previous
346	// intervals are also overlapped with the current store.
347	//
348	// \|--- dead 1 ---\| \|--- dead 2 ---\|
349	// \|------- killing---------\|
350	//
351	while (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
352	assert(ILI->second > KillingIntStart && "Unexpected interval");
353	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
354	ILI = IM.erase(position: ILI);
355	}
356	}
357
358	IM [KillingIntEnd] = KillingIntStart;
359
360	ILI = IM.begin();
361	if (ILI ->second <= DeadOff && ILI ->first >= int64_t(DeadOff + DeadSize)) {
362	LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
363	<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
364	<< ") Composite KillingLoc [" << ILI->second << ", "
365	<< ILI->first << ")\n");
366	++NumCompletePartials;
367	return OW_Complete;
368	}
369	}
370
371	// Check for a dead store which writes to all the memory locations that
372	// the killing store writes to.
373	if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
374	int64_t(DeadOff + DeadSize) > KillingOff &&
375	uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
376	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
377	<< ", " << int64_t(DeadOff + DeadSize)
378	<< ") by a killing store [" << KillingOff << ", "
379	<< int64_t(KillingOff + KillingSize) << ")\n");
380	// TODO: Maybe come up with a better name?
381	return OW_PartialEarlierWithFullLater;
382	}
383
384	// Another interesting case is if the killing store overwrites the end of the
385	// dead store.
386	//
387	// \|--dead--\|
388	// \|-- killing --\|
389	//
390	// In this case we may want to trim the size of dead store to avoid
391	// generating stores to addresses which will definitely be overwritten killing
392	// store.
393	if (!EnablePartialOverwriteTracking &&
394	(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
395	int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
396	return OW_End;
397
398	// Finally, we also need to check if the killing store overwrites the
399	// beginning of the dead store.
400	//
401	// \|--dead--\|
402	// \|-- killing --\|
403	//
404	// In this case we may want to move the destination address and trim the size
405	// of dead store to avoid generating stores to addresses which will definitely
406	// be overwritten killing store.
407	if (!EnablePartialOverwriteTracking &&
408	(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
409	assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
410	"Expect to be handled as OW_Complete");
411	return OW_Begin;
412	}
413	// Otherwise, they don't completely overlap.
414	return OW_Unknown;
415	}
416
417	/// Returns true if the memory which is accessed by the second instruction is not
418	/// modified between the first and the second instruction.
419	/// Precondition: Second instruction must be dominated by the first
420	/// instruction.
421	static bool
422	memoryIsNotModifiedBetween(Instruction FirstI, Instruction SecondI,
423	BatchAAResults &AA, const DataLayout &DL,
424	DominatorTree *DT) {
425	// Do a backwards scan through the CFG from SecondI to FirstI. Look for
426	// instructions which can modify the memory location accessed by SecondI.
427	//
428	// While doing the walk keep track of the address to check. It might be
429	// different in different basic blocks due to PHI translation.
430	using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
431	SmallVector<BlockAddressPair, `16`> WorkList;
432	// Keep track of the address we visited each block with. Bail out if we
433	// visit a block with different addresses.
434	DenseMap<BasicBlock , Value > Visited;
435
436	BasicBlock::iterator FirstBBI(FirstI);
437	++FirstBBI;
438	BasicBlock::iterator SecondBBI(SecondI);
439	BasicBlock *FirstBB = FirstI->getParent();
440	BasicBlock *SecondBB = SecondI->getParent();
441	MemoryLocation MemLoc;
442	if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
443	MemLoc = MemoryLocation::getForDest(MI: MemSet);
444	else
445	MemLoc = MemoryLocation::get(Inst: SecondI);
446
447	auto MemLocPtr = const_cast<Value >(MemLoc.Ptr);
448
449	// Start checking the SecondBB.
450	WorkList.push_back(
451	Elt: std::make_pair(x&: SecondBB, y: PHITransAddr (MemLocPtr, DL, nullptr)));
452	bool isFirstBlock = true;
453
454	// Check all blocks going backward until we reach the FirstBB.
455	while (!WorkList.empty()) {
456	BlockAddressPair Current = WorkList.pop_back_val();
457	BasicBlock *B = Current.first;
458	PHITransAddr &Addr = Current.second;
459	Value *Ptr = Addr.getAddr();
460
461	// Ignore instructions before FirstI if this is the FirstBB.
462	BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
463
464	BasicBlock::iterator EI;
465	if (isFirstBlock) {
466	// Ignore instructions after SecondI if this is the first visit of SecondBB.
467	assert(B == SecondBB && "first block is not the store block");
468	EI = SecondBBI;
469	isFirstBlock = false;
470	} else {
471	// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
472	// In this case we also have to look at instructions after SecondI.
473	EI = B->end();
474	}
475	for (; BI != EI; ++BI) {
476	Instruction I = &BI;
477	if (I->mayWriteToMemory() && I != SecondI)
478	if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
479	return false;
480	}
481	if (B != FirstBB) {
482	assert(B != &FirstBB->getParent()->getEntryBlock() &&
483	"Should not hit the entry block because SI must be dominated by LI");
484	for (BasicBlock *Pred : predecessors(BB: B)) {
485	PHITransAddr PredAddr = Addr;
486	if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
487	if (!PredAddr.isPotentiallyPHITranslatable())
488	return false;
489	if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
490	return false;
491	}
492	Value *TranslatedPtr = PredAddr.getAddr();
493	auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
494	if (!Inserted.second) {
495	// We already visited this block before. If it was with a different
496	// address - bail out!
497	if (TranslatedPtr != Inserted.first ->second)
498	return false;
499	// ... otherwise just skip it.
500	continue;
501	}
502	WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
503	}
504	}
505	}
506	return true;
507	}
508
509	static void shortenAssignment(Instruction Inst, Value OriginalDest,
510	uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
511	uint64_t NewSizeInBits, bool IsOverwriteEnd) {
512	const DataLayout &DL = Inst->getDataLayout();
513	uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
514	uint64_t DeadSliceOffsetInBits =
515	OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : `0`);
516	auto SetDeadFragExpr = [](auto *Assign,
517	DIExpression::FragmentInfo DeadFragment) {
518	// createFragmentExpression expects an offset relative to the existing
519	// fragment offset if there is one.
520	uint64_t RelativeOffset = DeadFragment.OffsetInBits -
521	Assign->getExpression()
522	->getFragmentInfo()
523	.value_or(DIExpression::FragmentInfo (`0`, `0`))
524	.OffsetInBits;
525	if (auto NewExpr = DIExpression::createFragmentExpression(
526	Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
527	Assign->setExpression(*NewExpr);
528	return;
529	}
530	// Failed to create a fragment expression for this so discard the value,
531	// making this a kill location.
532	auto Expr = DIExpression::createFragmentExpression(
533	Expr: DIExpression::get(Context&: Assign->getContext(), Elements: {}), OffsetInBits: DeadFragment.OffsetInBits,
534	SizeInBits: DeadFragment.SizeInBits);
535	Assign->setExpression(Expr);
536	Assign->setKillLocation();
537	};
538
539	// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
540	// link to any instructions. Created in the loop below (once).
541	DIAssignID LinkToNothing = nullptr*;
542	LLVMContext &Ctx = Inst->getContext();
543	auto GetDeadLink = [&Ctx, &LinkToNothing]() {
544	if (!LinkToNothing)
545	LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
546	return LinkToNothing;
547	};
548
549	// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
550	// overlapping dbg.assign intrinsic.
551	for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
552	std::optional<DIExpression::FragmentInfo> NewFragment;
553	if (!at::calculateFragmentIntersect(DL, Dest: OriginalDest, SliceOffsetInBits: DeadSliceOffsetInBits,
554	SliceSizeInBits: DeadSliceSizeInBits, DVRAssign: Assign,
555	Result&: NewFragment) \|\|
556	!NewFragment) {
557	// We couldn't calculate the intersecting fragment for some reason. Be
558	// cautious and unlink the whole assignment from the store.
559	Assign->setKillAddress();
560	Assign->setAssignId(GetDeadLink ());
561	continue;
562	}
563	// No intersect.
564	if (NewFragment ->SizeInBits == `0`)
565	continue;
566
567	// Fragments overlap: insert a new dbg.assign for this dead part.
568	auto NewAssign = static_cast<decltype*(Assign)>(Assign->clone());
569	NewAssign->insertAfter(InsertAfter: Assign->getIterator());
570	NewAssign->setAssignId(GetDeadLink ());
571	if (NewFragment)
572	SetDeadFragExpr (NewAssign, *NewFragment);
573	NewAssign->setKillAddress();
574	}
575	}
576
577	/// Update the attributes given that a memory access is updated (the
578	/// dereferenced pointer could be moved forward when shortening a
579	/// mem intrinsic).
580	static void adjustArgAttributes(AnyMemIntrinsic Intrinsic, unsigned* ArgNo,
581	uint64_t PtrOffset) {
582	// Remember old attributes.
583	AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
584
585	// Find attributes that should be kept, and remove the rest.
586	AttributeMask AttrsToRemove;
587	for (auto &Attr : OldAttrs) {
588	if (Attr.hasKindAsEnum()) {
589	switch (Attr.getKindAsEnum()) {
590	default:
591	break;
592	case Attribute::Alignment:
593	// Only keep alignment if PtrOffset satisfy the alignment.
594	if (isAligned(Lhs: Attr.getAlignment().valueOrOne(), SizeInBytes: PtrOffset))
595	continue;
596	break;
597	case Attribute::Dereferenceable:
598	case Attribute::DereferenceableOrNull:
599	// We could reduce the size of these attributes according to
600	// PtrOffset. But we simply drop these for now.
601	break;
602	case Attribute::NonNull:
603	case Attribute::NoUndef:
604	continue;
605	}
606	}
607	AttrsToRemove.addAttribute(A: Attr);
608	}
609
610	// Remove the attributes that should be dropped.
611	Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
612	}
613
614	static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
615	uint64_t &DeadSize, int64_t KillingStart,
616	uint64_t KillingSize, bool IsOverwriteEnd) {
617	auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
618	Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
619
620	// We assume that memet/memcpy operates in chunks of the "largest" native
621	// type size and aligned on the same value. That means optimal start and size
622	// of memset/memcpy should be modulo of preferred alignment of that type. That
623	// is it there is no any sense in trying to reduce store size any further
624	// since any "extra" stores comes for free anyway.
625	// On the other hand, maximum alignment we can achieve is limited by alignment
626	// of initial store.
627
628	// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
629	// "largest" native type.
630	// Note: What is the proper way to get that value?
631	// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
632	// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
633
634	int64_t ToRemoveStart = `0`;
635	uint64_t ToRemoveSize = `0`;
636	// Compute start and size of the region to remove. Make sure 'PrefAlign' is
637	// maintained on the remaining store.
638	if (IsOverwriteEnd) {
639	// Calculate required adjustment for 'KillingStart' in order to keep
640	// remaining store size aligned on 'PerfAlign'.
641	uint64_t Off =
642	offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
643	ToRemoveStart = KillingStart + Off;
644	if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
645	return false;
646	ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
647	} else {
648	ToRemoveStart = DeadStart;
649	assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
650	"Not overlapping accesses?");
651	ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
652	// Calculate required adjustment for 'ToRemoveSize'in order to keep
653	// start of the remaining store aligned on 'PerfAlign'.
654	uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
655	if (Off != `0`) {
656	if (ToRemoveSize <= (PrefAlign.value() - Off))
657	return false;
658	ToRemoveSize -= PrefAlign.value() - Off;
659	}
660	assert(isAligned(PrefAlign, ToRemoveSize) &&
661	"Should preserve selected alignment");
662	}
663
664	assert(ToRemoveSize > `0` && "Shouldn't reach here if nothing to remove");
665	assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
666
667	uint64_t NewSize = DeadSize - ToRemoveSize;
668	if (DeadIntrinsic->isAtomic()) {
669	// When shortening an atomic memory intrinsic, the newly shortened
670	// length must remain an integer multiple of the element size.
671	const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
672	if (`0` != NewSize % ElementSize)
673	return false;
674	}
675
676	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
677	<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
678	<< "\n KILLER [" << ToRemoveStart << ", "
679	<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
680
681	DeadIntrinsic->setLength(NewSize);
682	DeadIntrinsic->setDestAlignment(PrefAlign);
683
684	Value *OrigDest = DeadIntrinsic->getRawDest();
685	if (!IsOverwriteEnd) {
686	Value *Indices[`1`] = {
687	ConstantInt::get(Ty: DeadIntrinsic->getLength()->getType(), V: ToRemoveSize)};
688	Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
689	PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
690	InsertBefore: DeadI->getIterator());
691	NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
692	DeadIntrinsic->setDest(NewDestGEP);
693	adjustArgAttributes(Intrinsic: DeadIntrinsic, ArgNo: `0`, PtrOffset: ToRemoveSize);
694	}
695
696	// Update attached dbg.assign intrinsics. Assume 8-bit byte.
697	shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * `8`, OldSizeInBits: DeadSize * `8`, NewSizeInBits: NewSize * `8`,
698	IsOverwriteEnd);
699
700	// Finally update start and size of dead access.
701	if (!IsOverwriteEnd)
702	DeadStart += ToRemoveSize;
703	DeadSize = NewSize;
704
705	return true;
706	}
707
708	static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
709	int64_t &DeadStart, uint64_t &DeadSize) {
710	if (IntervalMap.empty() \|\| !isShortenableAtTheEnd(I: DeadI))
711	return false;
712
713	OverlapIntervalsTy::iterator OII = --IntervalMap.end();
714	int64_t KillingStart = OII ->second;
715	uint64_t KillingSize = OII ->first - KillingStart;
716
717	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
718
719	if (KillingStart > DeadStart &&
720	// Note: "KillingStart - KillingStart" is known to be positive due to
721	// preceding check.
722	(uint64_t)(KillingStart - DeadStart) < DeadSize &&
723	// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
724	// be non negative due to preceding checks.
725	KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
726	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
727	IsOverwriteEnd: true)) {
728	IntervalMap.erase(position: OII);
729	return true;
730	}
731	}
732	return false;
733	}
734
735	static bool tryToShortenBegin(Instruction *DeadI,
736	OverlapIntervalsTy &IntervalMap,
737	int64_t &DeadStart, uint64_t &DeadSize) {
738	if (IntervalMap.empty() \|\| !isShortenableAtTheBeginning(I: DeadI))
739	return false;
740
741	OverlapIntervalsTy::iterator OII = IntervalMap.begin();
742	int64_t KillingStart = OII ->second;
743	uint64_t KillingSize = OII ->first - KillingStart;
744
745	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
746
747	if (KillingStart <= DeadStart &&
748	// Note: "DeadStart - KillingStart" is known to be non negative due to
749	// preceding check.
750	KillingSize > (uint64_t)(DeadStart - KillingStart)) {
751	// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
752	// be positive due to preceding checks.
753	assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
754	"Should have been handled as OW_Complete");
755	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
756	IsOverwriteEnd: false)) {
757	IntervalMap.erase(position: OII);
758	return true;
759	}
760	}
761	return false;
762	}
763
764	static Constant *
765	tryToMergePartialOverlappingStores(StoreInst KillingI, StoreInst DeadI,
766	int64_t KillingOffset, int64_t DeadOffset,
767	const DataLayout &DL, BatchAAResults &AA,
768	DominatorTree *DT) {
769
770	if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
771	DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
772	KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
773	DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
774	memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
775	// If the store we find is:
776	// a) partially overwritten by the store to 'Loc'
777	// b) the killing store is fully contained in the dead one and
778	// c) they both have a constant value
779	// d) none of the two stores need padding
780	// Merge the two stores, replacing the dead store's value with a
781	// merge of both values.
782	// TODO: Deal with other constant types (vectors, etc), and probably
783	// some mem intrinsics (if needed)
784
785	APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
786	APInt KillingValue =
787	cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
788	unsigned KillingBits = KillingValue.getBitWidth();
789	assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
790	KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
791
792	// Offset of the smaller store inside the larger store
793	unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * `8`;
794	unsigned LShiftAmount =
795	DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
796	: BitOffsetDiff;
797	APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
798	hiBit: LShiftAmount + KillingBits);
799	// Clear the bits we'll be replacing, then OR with the smaller
800	// store, shifted appropriately.
801	APInt Merged = (DeadValue & ~Mask) \| (KillingValue << LShiftAmount);
802	LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
803	<< "\n Killing: " << *KillingI
804	<< "\n Merged Value: " << Merged << `'\n'`);
805	return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
806	}
807	return nullptr;
808	}
809
810	// Returns true if \p I is an intrinsic that does not read or write memory.
811	static bool isNoopIntrinsic(Instruction *I) {
812	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
813	switch (II->getIntrinsicID()) {
814	case Intrinsic::lifetime_start:
815	case Intrinsic::lifetime_end:
816	case Intrinsic::invariant_end:
817	case Intrinsic::launder_invariant_group:
818	case Intrinsic::assume:
819	return true;
820	case Intrinsic::dbg_declare:
821	case Intrinsic::dbg_label:
822	case Intrinsic::dbg_value:
823	llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
824	default:
825	return false;
826	}
827	}
828	return false;
829	}
830
831	// Check if we can ignore \p D for DSE.
832	static bool canSkipDef(MemoryDef D, bool* DefVisibleToCaller) {
833	Instruction *DI = D->getMemoryInst();
834	// Calls that only access inaccessible memory cannot read or write any memory
835	// locations we consider for elimination.
836	if (auto *CB = dyn_cast<CallBase>(Val: DI))
837	if (CB->onlyAccessesInaccessibleMemory())
838	return true;
839
840	// We can eliminate stores to locations not visible to the caller across
841	// throwing instructions.
842	if (DI->mayThrow() && !DefVisibleToCaller)
843	return true;
844
845	// We can remove the dead stores, irrespective of the fence and its ordering
846	// (release/acquire/seq_cst). Fences only constraints the ordering of
847	// already visible stores, it does not make a store visible to other
848	// threads. So, skipping over a fence does not change a store from being
849	// dead.
850	if (isa<FenceInst>(Val: DI))
851	return true;
852
853	// Skip intrinsics that do not really read or modify memory.
854	if (isNoopIntrinsic(I: DI))
855	return true;
856
857	return false;
858	}
859
860	namespace {
861
862	// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
863	// defined by `MemDef`.
864	struct MemoryLocationWrapper {
865	MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
866	bool DefByInitializesAttr)
867	: MemLoc (MemLoc), MemDef(MemDef),
868	DefByInitializesAttr(DefByInitializesAttr) {
869	assert(MemLoc.Ptr && "MemLoc should be not null");
870	UnderlyingObject = getUnderlyingObject(V: MemLoc.Ptr);
871	DefInst = MemDef->getMemoryInst();
872	}
873
874	MemoryLocation MemLoc;
875	const Value *UnderlyingObject;
876	MemoryDef *MemDef;
877	Instruction *DefInst;
878	bool DefByInitializesAttr = false;
879	};
880
881	// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
882	// defined by this MemoryDef.
883	struct MemoryDefWrapper {
884	MemoryDefWrapper(MemoryDef *MemDef,
885	ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
886	DefInst = MemDef->getMemoryInst();
887	for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
888	DefinedLocations.push_back(
889	Elt: MemoryLocationWrapper (MemLoc, MemDef, DefByInitializesAttr));
890	}
891	Instruction *DefInst;
892	SmallVector<MemoryLocationWrapper, `1`> DefinedLocations;
893	};
894
895	struct ArgumentInitInfo {
896	unsigned Idx;
897	bool IsDeadOrInvisibleOnUnwind;
898	ConstantRangeList Inits;
899	};
900	} // namespace
901
902	static bool hasInitializesAttr(Instruction *I) {
903	CallBase *CB = dyn_cast<CallBase>(Val: I);
904	return CB && CB->getArgOperandWithAttribute(Kind: Attribute::Initializes);
905	}
906
907	// Return the intersected range list of the initializes attributes of "Args".
908	// "Args" are call arguments that alias to each other.
909	// If any argument in "Args" doesn't have dead_on_unwind attr and
910	// "CallHasNoUnwindAttr" is false, return empty.
911	static ConstantRangeList
912	getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
913	bool CallHasNoUnwindAttr) {
914	if (Args.empty())
915	return {};
916
917	// To address unwind, the function should have nounwind attribute or the
918	// arguments have dead or invisible on unwind. Otherwise, return empty.
919	for (const auto &Arg : Args) {
920	if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
921	return {};
922	if (Arg.Inits.empty())
923	return {};
924	}
925
926	ConstantRangeList IntersectedIntervals = Args.front().Inits;
927	for (auto &Arg : Args.drop_front())
928	IntersectedIntervals = IntersectedIntervals.intersectWith(CRL: Arg.Inits);
929
930	return IntersectedIntervals;
931	}
932
933	namespace {
934
935	struct DSEState {
936	Function &F;
937	AliasAnalysis &AA;
938	EarliestEscapeAnalysis EA;
939
940	/// The single BatchAA instance that is used to cache AA queries. It will
941	/// not be invalidated over the whole run. This is safe, because:
942	/// 1. Only memory writes are removed, so the alias cache for memory
943	/// locations remains valid.
944	/// 2. No new instructions are added (only instructions removed), so cached
945	/// information for a deleted value cannot be accessed by a re-used new
946	/// value pointer.
947	BatchAAResults BatchAA;
948
949	MemorySSA &MSSA;
950	DominatorTree &DT;
951	PostDominatorTree &PDT;
952	const TargetLibraryInfo &TLI;
953	const DataLayout &DL;
954	const LoopInfo &LI;
955
956	// Whether the function contains any irreducible control flow, useful for
957	// being accurately able to detect loops.
958	bool ContainsIrreducibleLoops;
959
960	// All MemoryDefs that potentially could kill other MemDefs.
961	SmallVector<MemoryDef *, `64`> MemDefs;
962	// Any that should be skipped as they are already deleted
963	SmallPtrSet<MemoryAccess *, `4`> SkipStores;
964	// Keep track whether a given object is captured before return or not.
965	DenseMap<const Value , bool*> CapturedBeforeReturn;
966	// Keep track of all of the objects that are invisible to the caller after
967	// the function returns.
968	DenseMap<const Value , bool*> InvisibleToCallerAfterRet;
969	DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
970	// Keep track of blocks with throwing instructions not modeled in MemorySSA.
971	SmallPtrSet<BasicBlock *, `16`> ThrowingBlocks;
972	// Post-order numbers for each basic block. Used to figure out if memory
973	// accesses are executed before another access.
974	DenseMap<BasicBlock , unsigned*> PostOrderNumbers;
975
976	/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
977	/// basic block.
978	MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
979	// Check if there are root nodes that are terminated by UnreachableInst.
980	// Those roots pessimize post-dominance queries. If there are such roots,
981	// fall back to CFG scan starting from all non-unreachable roots.
982	bool AnyUnreachableExit;
983
984	// Whether or not we should iterate on removing dead stores at the end of the
985	// function due to removing a store causing a previously captured pointer to
986	// no longer be captured.
987	bool ShouldIterateEndOfFunctionDSE;
988
989	/// Dead instructions to be removed at the end of DSE.
990	SmallVector<Instruction *> ToRemove;
991
992	// Class contains self-reference, make sure it's not copied/moved.
993	DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
994	PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
995	const LoopInfo &LI);
996	DSEState(const DSEState &) = delete;
997	DSEState &operator=(const DSEState &) = delete;
998
999	LocationSize strengthenLocationSize(const Instruction *I,
1000	LocationSize Size) const;
1001
1002	/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1003	/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1004	/// location (by \p DeadI instruction).
1005	/// Return OW_MaybePartial if \p KillingI does not completely overwrite
1006	/// \p DeadI, but they both write to the same underlying object. In that
1007	/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1008	/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1009	/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1010	OverwriteResult isOverwrite(const Instruction *KillingI,
1011	const Instruction *DeadI,
1012	const MemoryLocation &KillingLoc,
1013	const MemoryLocation &DeadLoc,
1014	int64_t &KillingOff, int64_t &DeadOff);
1015
1016	bool isInvisibleToCallerAfterRet(const Value V, const* Value *Ptr,
1017	const LocationSize StoreSize);
1018
1019	bool isInvisibleToCallerOnUnwind(const Value *V);
1020
1021	std::optional<MemoryLocation> getLocForWrite(Instruction I) const*;
1022
1023	// Returns a list of <MemoryLocation, bool> pairs written by I.
1024	// The bool means whether the write is from Initializes attr.
1025	SmallVector<std::pair<MemoryLocation, bool>, `1`>
1026	getLocForInst(Instruction I, bool* ConsiderInitializesAttr);
1027
1028	/// Assuming this instruction has a dead analyzable write, can we delete
1029	/// this instruction?
1030	bool isRemovable(Instruction *I);
1031
1032	/// Returns true if \p UseInst completely overwrites \p DefLoc
1033	/// (stored by \p DefInst).
1034	bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1035	Instruction *UseInst);
1036
1037	/// Returns true if \p Def is not read before returning from the function.
1038	bool isWriteAtEndOfFunction(MemoryDef Def, const* MemoryLocation &DefLoc);
1039
1040	/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1041	/// pair with the MemoryLocation terminated by \p I and a boolean flag
1042	/// indicating whether \p I is a free-like call.
1043	std::optional<std::pair<MemoryLocation, bool>>
1044	getLocForTerminator(Instruction I) const*;
1045
1046	/// Returns true if \p I is a memory terminator instruction like
1047	/// llvm.lifetime.end or free.
1048	bool isMemTerminatorInst(Instruction I) const*;
1049
1050	/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1051	/// instruction \p AccessI.
1052	bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1053	Instruction *MaybeTerm);
1054
1055	// Returns true if \p Use may read from \p DefLoc.
1056	bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1057
1058	/// Returns true if a dependency between \p Current and \p KillingDef is
1059	/// guaranteed to be loop invariant for the loops that they are in. Either
1060	/// because they are known to be in the same block, in the same loop level or
1061	/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1062	/// during execution of the containing function.
1063	bool isGuaranteedLoopIndependent(const Instruction *Current,
1064	const Instruction *KillingDef,
1065	const MemoryLocation &CurrentLoc);
1066
1067	/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1068	/// loop. In particular, this guarantees that it only references a single
1069	/// MemoryLocation during execution of the containing function.
1070	bool isGuaranteedLoopInvariant(const Value *Ptr);
1071
1072	// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1073	// with no read access between them or on any other path to a function exit
1074	// block if \p KillingLoc is not accessible after the function returns. If
1075	// there is no such MemoryDef, return std::nullopt. The returned value may not
1076	// (completely) overwrite \p KillingLoc. Currently we bail out when we
1077	// encounter an aliasing MemoryUse (read).
1078	std::optional<MemoryAccess *>
1079	getDomMemoryDef(MemoryDef KillingDef, MemoryAccess StartAccess,
1080	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1081	unsigned &ScanLimit, unsigned &WalkerStepLimit,
1082	bool IsMemTerm, unsigned &PartialLimit,
1083	bool IsInitializesAttrMemLoc);
1084
1085	/// Delete dead memory defs and recursively add their operands to ToRemove if
1086	/// they became dead.
1087	void
1088	deleteDeadInstruction(Instruction *SI,
1089	SmallPtrSetImpl<MemoryAccess > Deleted = nullptr);
1090
1091	// Check for any extra throws between \p KillingI and \p DeadI that block
1092	// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1093	// MemoryDef that may throw are handled during the walk from one def to the
1094	// next.
1095	bool mayThrowBetween(Instruction KillingI, Instruction DeadI,
1096	const Value *KillingUndObj);
1097
1098	// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1099	// instructions act as barriers:
1100	// A memory instruction that may throw and \p KillingI accesses a non-stack*
1101	// object.
1102	// Atomic stores stronger that monotonic.*
1103	bool isDSEBarrier(const Value KillingUndObj, Instruction DeadI);
1104
1105	/// Eliminate writes to objects that are not visible in the caller and are not
1106	/// accessed before returning from the function.
1107	bool eliminateDeadWritesAtEndOfFunction();
1108
1109	/// If we have a zero initializing memset following a call to malloc,
1110	/// try folding it into a call to calloc.
1111	bool tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO);
1112
1113	// Check if there is a dominating condition, that implies that the value
1114	// being stored in a ptr is already present in the ptr.
1115	bool dominatingConditionImpliesValue(MemoryDef *Def);
1116
1117	/// \returns true if \p Def is a no-op store, either because it
1118	/// directly stores back a loaded value or stores zero to a calloced object.
1119	bool storeIsNoop(MemoryDef Def, const* Value *DefUO);
1120
1121	bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1122
1123	/// Eliminates writes to locations where the value that is being written
1124	/// is already stored at the same location.
1125	bool eliminateRedundantStoresOfExistingValues();
1126
1127	// Return the locations written by the initializes attribute.
1128	// Note that this function considers:
1129	// 1. Unwind edge: use "initializes" attribute only if the callee has
1130	// "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1131	// or the argument is invisible to caller on unwind. That is, we don't
1132	// perform incorrect DSE on unwind edges in the current function.
1133	// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1134	// the intersected range list of their "initializes" attributes.
1135	SmallVector<MemoryLocation, `1`> getInitializesArgMemLoc(const Instruction *I);
1136
1137	// Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1138	// killed by `KillingLocWrapper.MemDef`. Return whether
1139	// any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1140	std::pair<bool, bool>
1141	eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1142
1143	// Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1144	// change state: whether make any change.
1145	bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1146	};
1147
1148	} // end anonymous namespace
1149
1150	static void pushMemUses(MemoryAccess *Acc,
1151	SmallVectorImpl<MemoryAccess *> &WorkList,
1152	SmallPtrSetImpl<MemoryAccess *> &Visited) {
1153	for (Use &U : Acc->uses()) {
1154	auto *MA = cast<MemoryAccess>(Val: U.getUser());
1155	if (Visited.insert(Ptr: MA).second)
1156	WorkList.push_back(Elt: MA);
1157	}
1158	}
1159
1160	// Return true if "Arg" is function local and isn't captured before "CB".
1161	static bool isFuncLocalAndNotCaptured(Value Arg, const* CallBase *CB,
1162	EarliestEscapeAnalysis &EA) {
1163	const Value *UnderlyingObj = getUnderlyingObject(V: Arg);
1164	return isIdentifiedFunctionLocal(V: UnderlyingObj) &&
1165	capturesNothing(
1166	CC: EA.getCapturesBefore(Object: UnderlyingObj, I: CB, /OrAt/ true));
1167	}
1168
1169	DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1170	DominatorTree &DT, PostDominatorTree &PDT,
1171	const TargetLibraryInfo &TLI, const LoopInfo &LI)
1172	: F(F), AA(AA), EA (DT, &LI), BatchAA (AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
1173	TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1174	// Collect blocks with throwing instructions not modeled in MemorySSA and
1175	// alloc-like objects.
1176	unsigned PO = `0`;
1177	for (BasicBlock *BB : post_order(G: &F)) {
1178	PostOrderNumbers [BB] = PO++;
1179	for (Instruction &I : *BB) {
1180	MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
1181	if (I.mayThrow() && !MA)
1182	ThrowingBlocks.insert(Ptr: I.getParent());
1183
1184	auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
1185	if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1186	(getLocForWrite(I: &I) \|\| isMemTerminatorInst(I: &I) \|\|
1187	(EnableInitializesImprovement && hasInitializesAttr(I: &I))))
1188	MemDefs.push_back(Elt: MD);
1189	}
1190	}
1191
1192	// Treat byval, inalloca or dead on return arguments the same as Allocas,
1193	// stores to them are dead at the end of the function.
1194	for (Argument &AI : F.args()) {
1195	if (AI.hasPassPointeeByValueCopyAttr()) {
1196	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1197	continue;
1198	}
1199
1200	if (!AI.getType()->isPointerTy())
1201	continue;
1202
1203	const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1204	if (Info.coversAllReachableMemory())
1205	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1206	else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1207	InvisibleToCallerAfterRetBounded.insert(KV: {&AI, DeadBytes});
1208	}
1209
1210	// Collect whether there is any irreducible control flow in the function.
1211	ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
1212
1213	AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
1214	return isa<UnreachableInst>(Val: E->getTerminator());
1215	});
1216	}
1217
1218	LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1219	LocationSize Size) const {
1220	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1221	LibFunc F;
1222	if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
1223	(F == LibFunc_memset_chk \|\| F == LibFunc_memcpy_chk)) {
1224	// Use the precise location size specified by the 3rd argument
1225	// for determining KillingI overwrites DeadLoc if it is a memset_chk
1226	// instruction. memset_chk will write either the amount specified as 3rd
1227	// argument or the function will immediately abort and exit the program.
1228	// NOTE: AA may determine NoAlias if it can prove that the access size
1229	// is larger than the allocation size due to that being UB. To avoid
1230	// returning potentially invalid NoAlias results by AA, limit the use of
1231	// the precise location size to isOverwrite.
1232	if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: `2`)))
1233	return LocationSize::precise(Value: Len->getZExtValue());
1234	}
1235	}
1236	return Size;
1237	}
1238
1239	OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1240	const Instruction *DeadI,
1241	const MemoryLocation &KillingLoc,
1242	const MemoryLocation &DeadLoc,
1243	int64_t &KillingOff, int64_t &DeadOff) {
1244	// AliasAnalysis does not always account for loops. Limit overwrite checks
1245	// to dependencies for which we can guarantee they are independent of any
1246	// loops they are in.
1247	if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
1248	return OW_Unknown;
1249
1250	LocationSize KillingLocSize =
1251	strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
1252	const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1253	const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1254	const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
1255	const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
1256
1257	// Check whether the killing store overwrites the whole object, in which
1258	// case the size/offset of the dead store does not matter.
1259	if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1260	isIdentifiedObject(V: KillingUndObj)) {
1261	std::optional<TypeSize> KillingUndObjSize =
1262	getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
1263	if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1264	return OW_Complete;
1265	}
1266
1267	// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1268	// get imprecise values here, though (except for unknown sizes).
1269	if (!KillingLocSize.isPrecise() \|\| !DeadLoc.Size.isPrecise()) {
1270	// In case no constant size is known, try to an IR values for the number
1271	// of bytes written and check if they match.
1272	const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
1273	const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
1274	if (KillingMemI && DeadMemI) {
1275	const Value *KillingV = KillingMemI->getLength();
1276	const Value *DeadV = DeadMemI->getLength();
1277	if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
1278	return OW_Complete;
1279	}
1280
1281	// Masked stores have imprecise locations, but we can reason about them
1282	// to some extent.
1283	return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
1284	}
1285
1286	const TypeSize KillingSize = KillingLocSize.getValue();
1287	const TypeSize DeadSize = DeadLoc.Size.getValue();
1288	// Bail on doing Size comparison which depends on AA for now
1289	// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1290	const bool AnyScalable = DeadSize.isScalable() \|\| KillingLocSize.isScalable();
1291
1292	if (AnyScalable)
1293	return OW_Unknown;
1294	// Query the alias information
1295	AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1296
1297	// If the start pointers are the same, we just have to compare sizes to see if
1298	// the killing store was larger than the dead store.
1299	if (AAR == AliasResult::MustAlias) {
1300	// Make sure that the KillingSize size is >= the DeadSize size.
1301	if (KillingSize >= DeadSize)
1302	return OW_Complete;
1303	}
1304
1305	// If we hit a partial alias we may have a full overwrite
1306	if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1307	int32_t Off = AAR.getOffset();
1308	if (Off >= `0` && (uint64_t)Off + DeadSize <= KillingSize)
1309	return OW_Complete;
1310	}
1311
1312	// If we can't resolve the same pointers to the same object, then we can't
1313	// analyze them at all.
1314	if (DeadUndObj != KillingUndObj) {
1315	// Non aliasing stores to different objects don't overlap. Note that
1316	// if the killing store is known to overwrite whole object (out of
1317	// bounds access overwrites whole object as well) then it is assumed to
1318	// completely overwrite any store to the same object even if they don't
1319	// actually alias (see next check).
1320	if (AAR == AliasResult::NoAlias)
1321	return OW_None;
1322	return OW_Unknown;
1323	}
1324
1325	// Okay, we have stores to two completely different pointers. Try to
1326	// decompose the pointer into a "base + constant_offset" form. If the base
1327	// pointers are equal, then we can reason about the two stores.
1328	DeadOff = `0`;
1329	KillingOff = `0`;
1330	const Value *DeadBasePtr =
1331	GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1332	const Value *KillingBasePtr =
1333	GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1334
1335	// If the base pointers still differ, we have two completely different
1336	// stores.
1337	if (DeadBasePtr != KillingBasePtr)
1338	return OW_Unknown;
1339
1340	// The killing access completely overlaps the dead store if and only if
1341	// both start and end of the dead one is "inside" the killing one:
1342	// \|<->\|--dead--\|<->\|
1343	// \|-----killing------\|
1344	// Accesses may overlap if and only if start of one of them is "inside"
1345	// another one:
1346	// \|<->\|--dead--\|<-------->\|
1347	// \|-------killing--------\|
1348	// OR
1349	// \|-------dead-------\|
1350	// \|<->\|---killing---\|<----->\|
1351	//
1352	// We have to be careful here as Off is signed while .Size is unsigned.
1353
1354	// Check if the dead access starts "not before" the killing one.
1355	if (DeadOff >= KillingOff) {
1356	// If the dead access ends "not after" the killing access then the
1357	// dead one is completely overwritten by the killing one.
1358	if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1359	return OW_Complete;
1360	// If start of the dead access is "before" end of the killing access
1361	// then accesses overlap.
1362	else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1363	return OW_MaybePartial;
1364	}
1365	// If start of the killing access is "before" end of the dead access then
1366	// accesses overlap.
1367	else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1368	return OW_MaybePartial;
1369	}
1370
1371	// Can reach here only if accesses are known not to overlap.
1372	return OW_None;
1373	}
1374
1375	bool DSEState::isInvisibleToCallerAfterRet(const Value V, const* Value *Ptr,
1376	const LocationSize StoreSize) {
1377	if (isa<AllocaInst>(Val: V))
1378	return true;
1379
1380	auto IBounded = InvisibleToCallerAfterRetBounded.find(Val: V);
1381	if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1382	int64_t ValueOffset;
1383	[[maybe_unused]] const Value *BaseValue =
1384	GetPointerBaseWithConstantOffset(Ptr, Offset&: ValueOffset, DL);
1385	// If we are not able to find a constant offset from the UO, we have to
1386	// pessimistically assume that the store writes to memory out of the
1387	// dead_on_return bounds.
1388	if (BaseValue != V)
1389	return false;
1390	// This store is only invisible after return if we are in bounds of the
1391	// range marked dead.
1392	if (StoreSize.hasValue() &&
1393	ValueOffset + StoreSize.getValue() <= IBounded ->second &&
1394	ValueOffset >= `0`)
1395	return true;
1396	}
1397	auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1398	if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1399	I.first ->second = capturesNothing(CC: PointerMayBeCaptured(
1400	V, /ReturnCaptures=/true, Mask: CaptureComponents::Provenance));
1401	return I.first ->second;
1402	}
1403
1404	bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1405	bool RequiresNoCaptureBeforeUnwind;
1406	if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1407	return false;
1408	if (!RequiresNoCaptureBeforeUnwind)
1409	return true;
1410
1411	auto I = CapturedBeforeReturn.insert(KV: {V, true});
1412	if (I.second)
1413	// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1414	// with the killing MemoryDef. But we refrain from doing so for now to
1415	// limit compile-time and this does not cause any changes to the number
1416	// of stores removed on a large test set in practice.
1417	I.first ->second = capturesAnything(CC: PointerMayBeCaptured(
1418	V, /ReturnCaptures=/false, Mask: CaptureComponents::Provenance));
1419	return !I.first ->second;
1420	}
1421
1422	std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction I) const* {
1423	if (!I->mayWriteToMemory())
1424	return std::nullopt;
1425
1426	if (auto *CB = dyn_cast<CallBase>(Val: I))
1427	return MemoryLocation::getForDest(CI: CB, TLI);
1428
1429	return MemoryLocation::getOrNone(Inst: I);
1430	}
1431
1432	SmallVector<std::pair<MemoryLocation, bool>, `1`>
1433	DSEState::getLocForInst(Instruction I, bool* ConsiderInitializesAttr) {
1434	SmallVector<std::pair<MemoryLocation, bool>, `1`> Locations;
1435	if (isMemTerminatorInst(I)) {
1436	if (auto Loc = getLocForTerminator(I))
1437	Locations.push_back(Elt: std::make_pair(x&: Loc ->first, y: false));
1438	return Locations;
1439	}
1440
1441	if (auto Loc = getLocForWrite(I))
1442	Locations.push_back(Elt: std::make_pair(x&: Loc, y: false*));
1443
1444	if (ConsiderInitializesAttr) {
1445	for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1446	Locations.push_back(Elt: std::make_pair(x&: MemLoc, y: true));
1447	}
1448	}
1449	return Locations;
1450	}
1451
1452	bool DSEState::isRemovable(Instruction *I) {
1453	assert(getLocForWrite(I) && "Must have analyzable write");
1454
1455	// Don't remove volatile/atomic stores.
1456	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1457	return SI->isUnordered();
1458
1459	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1460	// Don't remove volatile memory intrinsics.
1461	if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1462	return !MI->isVolatile();
1463
1464	// Never remove dead lifetime intrinsics, e.g. because they are followed
1465	// by a free.
1466	if (CB->isLifetimeStartOrEnd())
1467	return false;
1468
1469	return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1470	!CB->isTerminator();
1471	}
1472
1473	return false;
1474	}
1475
1476	bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1477	Instruction DefInst, Instruction UseInst) {
1478	// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1479	// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1480	// MemoryDef.
1481	if (!UseInst->mayWriteToMemory())
1482	return false;
1483
1484	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1485	if (CB->onlyAccessesInaccessibleMemory())
1486	return false;
1487
1488	int64_t InstWriteOffset, DepWriteOffset;
1489	if (auto CC = getLocForWrite(I: UseInst))
1490	return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1491	DeadOff&: DepWriteOffset) == OW_Complete;
1492	return false;
1493	}
1494
1495	bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1496	const MemoryLocation &DefLoc) {
1497	LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1498	<< *Def->getMemoryInst()
1499	<< ") is at the end the function \n");
1500	SmallVector<MemoryAccess *, `4`> WorkList;
1501	SmallPtrSet<MemoryAccess *, `8`> Visited;
1502
1503	pushMemUses(Acc: Def, WorkList, Visited);
1504	for (unsigned I = `0`; I < WorkList.size(); I++) {
1505	if (WorkList.size() >= MemorySSAScanLimit) {
1506	LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1507	return false;
1508	}
1509
1510	MemoryAccess *UseAccess = WorkList [I];
1511	if (isa<MemoryPhi>(Val: UseAccess)) {
1512	// AliasAnalysis does not account for loops. Limit elimination to
1513	// candidates for which we can guarantee they always store to the same
1514	// memory location.
1515	if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1516	return false;
1517
1518	pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1519	continue;
1520	}
1521	// TODO: Checking for aliasing is expensive. Consider reducing the amount
1522	// of times this is called and/or caching it.
1523	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1524	if (isReadClobber(DefLoc, UseInst)) {
1525	LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1526	return false;
1527	}
1528
1529	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1530	pushMemUses(Acc: UseDef, WorkList, Visited);
1531	}
1532	return true;
1533	}
1534
1535	std::optional<std::pair<MemoryLocation, bool>>
1536	DSEState::getLocForTerminator(Instruction I) const* {
1537	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1538	if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1539	return {
1540	std::make_pair(x: MemoryLocation::getForArgument(Call: CB, ArgIdx: `0`, TLI: &TLI), y: false)};
1541	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1542	return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1543	}
1544
1545	return std::nullopt;
1546	}
1547
1548	bool DSEState::isMemTerminatorInst(Instruction I) const* {
1549	auto *CB = dyn_cast<CallBase>(Val: I);
1550	return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end \|\|
1551	getFreedOperand(CB, TLI: &TLI) != nullptr);
1552	}
1553
1554	bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1555	Instruction *MaybeTerm) {
1556	std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1557	getLocForTerminator(I: MaybeTerm);
1558
1559	if (!MaybeTermLoc)
1560	return false;
1561
1562	// If the terminator is a free-like call, all accesses to the underlying
1563	// object can be considered terminated.
1564	if (getUnderlyingObject(V: Loc.Ptr) !=
1565	getUnderlyingObject(V: MaybeTermLoc ->first.Ptr))
1566	return false;
1567
1568	auto TermLoc = MaybeTermLoc ->first;
1569	if (MaybeTermLoc ->second) {
1570	const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1571	return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1572	}
1573	int64_t InstWriteOffset = `0`;
1574	int64_t DepWriteOffset = `0`;
1575	return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1576	DeadOff&: DepWriteOffset) == OW_Complete;
1577	}
1578
1579	bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1580	Instruction *UseInst) {
1581	if (isNoopIntrinsic(I: UseInst))
1582	return false;
1583
1584	// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1585	// treated as read clobber.
1586	if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1587	return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1588
1589	if (!UseInst->mayReadFromMemory())
1590	return false;
1591
1592	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1593	if (CB->onlyAccessesInaccessibleMemory())
1594	return false;
1595
1596	return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1597	}
1598
1599	bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1600	const Instruction *KillingDef,
1601	const MemoryLocation &CurrentLoc) {
1602	// If the dependency is within the same block or loop level (being careful
1603	// of irreducible loops), we know that AA will return a valid result for the
1604	// memory dependency. (Both at the function level, outside of any loop,
1605	// would also be valid but we currently disable that to limit compile time).
1606	if (Current->getParent() == KillingDef->getParent())
1607	return true;
1608	const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1609	if (!ContainsIrreducibleLoops && CurrentLI &&
1610	CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1611	return true;
1612	// Otherwise check the memory location is invariant to any loops.
1613	return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1614	}
1615
1616	bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1617	Ptr = Ptr->stripPointerCasts();
1618	if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1619	if (GEP->hasAllConstantIndices())
1620	Ptr = GEP->getPointerOperand()->stripPointerCasts();
1621
1622	if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1623	return I->getParent()->isEntryBlock() \|\|
1624	(!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1625	}
1626	return true;
1627	}
1628
1629	std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1630	MemoryDef KillingDef, MemoryAccess StartAccess,
1631	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1632	unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1633	unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1634	if (ScanLimit == `0` \|\| WalkerStepLimit == `0`) {
1635	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1636	return std::nullopt;
1637	}
1638
1639	MemoryAccess *Current = StartAccess;
1640	Instruction *KillingI = KillingDef->getMemoryInst();
1641	LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1642
1643	// Only optimize defining access of KillingDef when directly starting at its
1644	// defining access. The defining access also must only access KillingLoc. At
1645	// the moment we only support instructions with a single write location, so
1646	// it should be sufficient to disable optimizations for instructions that
1647	// also read from memory.
1648	bool CanOptimize = OptimizeMemorySSA &&
1649	KillingDef->getDefiningAccess() == StartAccess &&
1650	!KillingI->mayReadFromMemory();
1651
1652	// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1653	std::optional<MemoryLocation> CurrentLoc;
1654	for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1655	LLVM_DEBUG({
1656	dbgs() << " visiting " << *Current;
1657	if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1658	dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1659	<< ")";
1660	dbgs() << "\n";
1661	});
1662
1663	// Reached TOP.
1664	if (MSSA.isLiveOnEntryDef(MA: Current)) {
1665	LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1666	if (CanOptimize && Current != KillingDef->getDefiningAccess())
1667	// The first clobbering def is... none.
1668	KillingDef->setOptimized(Current);
1669	return std::nullopt;
1670	}
1671
1672	// Cost of a step. Accesses in the same block are more likely to be valid
1673	// candidates for elimination, hence consider them cheaper.
1674	unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1675	? MemorySSASameBBStepCost
1676	: MemorySSAOtherBBStepCost;
1677	if (WalkerStepLimit <= StepCost) {
1678	LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1679	return std::nullopt;
1680	}
1681	WalkerStepLimit -= StepCost;
1682
1683	// Return for MemoryPhis. They cannot be eliminated directly and the
1684	// caller is responsible for traversing them.
1685	if (isa<MemoryPhi>(Val: Current)) {
1686	LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1687	return Current;
1688	}
1689
1690	// Below, check if CurrentDef is a valid candidate to be eliminated by
1691	// KillingDef. If it is not, check the next candidate.
1692	MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1693	Instruction *CurrentI = CurrentDef->getMemoryInst();
1694
1695	if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1696	CanOptimize = false;
1697	continue;
1698	}
1699
1700	// Before we try to remove anything, check for any extra throwing
1701	// instructions that block us from DSEing
1702	if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1703	LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1704	return std::nullopt;
1705	}
1706
1707	// Check for anything that looks like it will be a barrier to further
1708	// removal
1709	if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1710	LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1711	return std::nullopt;
1712	}
1713
1714	// If Current is known to be on path that reads DefLoc or is a read
1715	// clobber, bail out, as the path is not profitable. We skip this check
1716	// for intrinsic calls, because the code knows how to handle memcpy
1717	// intrinsics.
1718	if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1719	return std::nullopt;
1720
1721	// Quick check if there are direct uses that are read-clobbers.
1722	if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1723	if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1724	return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1725	isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1726	return false;
1727	})) {
1728	LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1729	return std::nullopt;
1730	}
1731
1732	// If Current does not have an analyzable write location or is not
1733	// removable, skip it.
1734	CurrentLoc = getLocForWrite(I: CurrentI);
1735	if (!CurrentLoc \|\| !isRemovable(I: CurrentI)) {
1736	CanOptimize = false;
1737	continue;
1738	}
1739
1740	// AliasAnalysis does not account for loops. Limit elimination to
1741	// candidates for which we can guarantee they always store to the same
1742	// memory location and not located in different loops.
1743	if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1744	LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1745	CanOptimize = false;
1746	continue;
1747	}
1748
1749	if (IsMemTerm) {
1750	// If the killing def is a memory terminator (e.g. lifetime.end), check
1751	// the next candidate if the current Current does not write the same
1752	// underlying object as the terminator.
1753	if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1754	CanOptimize = false;
1755	continue;
1756	}
1757	} else {
1758	int64_t KillingOffset = `0`;
1759	int64_t DeadOffset = `0`;
1760	auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1761	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1762	if (CanOptimize) {
1763	// CurrentDef is the earliest write clobber of KillingDef. Use it as
1764	// optimized access. Do not optimize if CurrentDef is already the
1765	// defining access of KillingDef.
1766	if (CurrentDef != KillingDef->getDefiningAccess() &&
1767	(OR == OW_Complete \|\| OR == OW_MaybePartial))
1768	KillingDef->setOptimized(CurrentDef);
1769
1770	// Once a may-aliasing def is encountered do not set an optimized
1771	// access.
1772	if (OR != OW_None)
1773	CanOptimize = false;
1774	}
1775
1776	// If Current does not write to the same object as KillingDef, check
1777	// the next candidate.
1778	if (OR == OW_Unknown \|\| OR == OW_None)
1779	continue;
1780	else if (OR == OW_MaybePartial) {
1781	// If KillingDef only partially overwrites Current, check the next
1782	// candidate if the partial step limit is exceeded. This aggressively
1783	// limits the number of candidates for partial store elimination,
1784	// which are less likely to be removable in the end.
1785	if (PartialLimit <= `1`) {
1786	WalkerStepLimit -= `1`;
1787	LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1788	"next access\n");
1789	continue;
1790	}
1791	PartialLimit -= `1`;
1792	}
1793	}
1794	break;
1795	};
1796
1797	// Accesses to objects accessible after the function returns can only be
1798	// eliminated if the access is dead along all paths to the exit. Collect
1799	// the blocks with killing (=completely overwriting MemoryDefs) and check if
1800	// they cover all paths from MaybeDeadAccess to any function exit.
1801	SmallPtrSet<Instruction *, `16`> KillingDefs;
1802	KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1803	MemoryAccess *MaybeDeadAccess = Current;
1804	MemoryLocation MaybeDeadLoc = *CurrentLoc;
1805	Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1806	LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1807	<< *MaybeDeadI << ")\n");
1808
1809	SmallVector<MemoryAccess *, `32`> WorkList;
1810	SmallPtrSet<MemoryAccess *, `32`> Visited;
1811	pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1812
1813	// Check if DeadDef may be read.
1814	for (unsigned I = `0`; I < WorkList.size(); I++) {
1815	MemoryAccess *UseAccess = WorkList [I];
1816
1817	LLVM_DEBUG(dbgs() << " " << *UseAccess);
1818	// Bail out if the number of accesses to check exceeds the scan limit.
1819	if (ScanLimit < (WorkList.size() - I)) {
1820	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1821	return std::nullopt;
1822	}
1823	--ScanLimit;
1824	NumDomMemDefChecks ++;
1825
1826	if (isa<MemoryPhi>(Val: UseAccess)) {
1827	if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1828	return DT.properlyDominates(A: KI->getParent(), B: UseAccess->getBlock());
1829	})) {
1830	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1831	continue;
1832	}
1833	LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1834	pushMemUses(Acc: UseAccess, WorkList, Visited);
1835	continue;
1836	}
1837
1838	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1839	LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1840
1841	if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1842	return DT.dominates(Def: KI, User: UseInst);
1843	})) {
1844	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1845	continue;
1846	}
1847
1848	// A memory terminator kills all preceeding MemoryDefs and all succeeding
1849	// MemoryAccesses. We do not have to check it's users.
1850	if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1851	LLVM_DEBUG(
1852	dbgs()
1853	<< " ... skipping, memterminator invalidates following accesses\n");
1854	continue;
1855	}
1856
1857	if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1858	LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1859	pushMemUses(Acc: UseAccess, WorkList, Visited);
1860	continue;
1861	}
1862
1863	if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1864	LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1865	return std::nullopt;
1866	}
1867
1868	// Uses which may read the original MemoryDef mean we cannot eliminate the
1869	// original MD. Stop walk.
1870	// If KillingDef is a CallInst with "initializes" attribute, the reads in
1871	// the callee would be dominated by initializations, so it should be safe.
1872	bool IsKillingDefFromInitAttr = false;
1873	if (IsInitializesAttrMemLoc) {
1874	if (KillingI == UseInst &&
1875	KillingUndObj == getUnderlyingObject(V: MaybeDeadLoc.Ptr))
1876	IsKillingDefFromInitAttr = true;
1877	}
1878
1879	if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1880	LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1881	return std::nullopt;
1882	}
1883
1884	// If this worklist walks back to the original memory access (and the
1885	// pointer is not guarenteed loop invariant) then we cannot assume that a
1886	// store kills itself.
1887	if (MaybeDeadAccess == UseAccess &&
1888	!isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1889	LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1890	return std::nullopt;
1891	}
1892	// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1893	// if it reads the memory location.
1894	// TODO: It would probably be better to check for self-reads before
1895	// calling the function.
1896	if (KillingDef == UseAccess \|\| MaybeDeadAccess == UseAccess) {
1897	LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1898	continue;
1899	}
1900
1901	// Check all uses for MemoryDefs, except for defs completely overwriting
1902	// the original location. Otherwise we have to check uses of all
1903	// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1904	// miss cases like the following
1905	// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1906	// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1907	// Use(2) ; MayAlias 2 and* 1, loads [0, 3].*
1908	// (The Use points to the first* Def it may alias)*
1909	// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1910	// stores [0,1]
1911	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1912	if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1913	BasicBlock *MaybeKillingBlock = UseInst->getParent();
1914	if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1915	PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1916	if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1917	StoreSize: KillingLoc.Size)) {
1918	LLVM_DEBUG(dbgs()
1919	<< " ... found killing def " << *UseInst << "\n");
1920	KillingDefs.insert(Ptr: UseInst);
1921	}
1922	} else {
1923	LLVM_DEBUG(dbgs()
1924	<< " ... found preceeding def " << *UseInst << "\n");
1925	return std::nullopt;
1926	}
1927	} else
1928	pushMemUses(Acc: UseDef, WorkList, Visited);
1929	}
1930	}
1931
1932	// For accesses to locations visible after the function returns, make sure
1933	// that the location is dead (=overwritten) along all paths from
1934	// MaybeDeadAccess to the exit.
1935	if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1936	StoreSize: KillingLoc.Size)) {
1937	SmallPtrSet<BasicBlock *, `16`> KillingBlocks;
1938	for (Instruction *KD : KillingDefs)
1939	KillingBlocks.insert(Ptr: KD->getParent());
1940	assert(!KillingBlocks.empty() &&
1941	"Expected at least a single killing block");
1942
1943	// Find the common post-dominator of all killing blocks.
1944	BasicBlock CommonPred = KillingBlocks.begin();
1945	for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1946	if (!CommonPred)
1947	break;
1948	CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1949	}
1950
1951	// If the common post-dominator does not post-dominate MaybeDeadAccess,
1952	// there is a path from MaybeDeadAccess to an exit not going through a
1953	// killing block.
1954	if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1955	if (!AnyUnreachableExit)
1956	return std::nullopt;
1957
1958	// Fall back to CFG scan starting at all non-unreachable roots if not
1959	// all paths to the exit go through CommonPred.
1960	CommonPred = nullptr;
1961	}
1962
1963	// If CommonPred itself is in the set of killing blocks, we're done.
1964	if (KillingBlocks.count(Ptr: CommonPred))
1965	return {MaybeDeadAccess};
1966
1967	SetVector<BasicBlock *> WorkList;
1968	// If CommonPred is null, there are multiple exits from the function.
1969	// They all have to be added to the worklist.
1970	if (CommonPred)
1971	WorkList.insert(X: CommonPred);
1972	else
1973	for (BasicBlock *R : PDT.roots()) {
1974	if (!isa<UnreachableInst>(Val: R->getTerminator()))
1975	WorkList.insert(X: R);
1976	}
1977
1978	NumCFGTries ++;
1979	// Check if all paths starting from an exit node go through one of the
1980	// killing blocks before reaching MaybeDeadAccess.
1981	for (unsigned I = `0`; I < WorkList.size(); I++) {
1982	NumCFGChecks ++;
1983	BasicBlock *Current = WorkList [I];
1984	if (KillingBlocks.count(Ptr: Current))
1985	continue;
1986	if (Current == MaybeDeadAccess->getBlock())
1987	return std::nullopt;
1988
1989	// MaybeDeadAccess is reachable from the entry, so we don't have to
1990	// explore unreachable blocks further.
1991	if (!DT.isReachableFromEntry(A: Current))
1992	continue;
1993
1994	WorkList.insert_range(R: predecessors(BB: Current));
1995
1996	if (WorkList.size() >= MemorySSAPathCheckLimit)
1997	return std::nullopt;
1998	}
1999	NumCFGSuccess ++;
2000	}
2001
2002	// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2003	// potentially dead.
2004	return {MaybeDeadAccess};
2005	}
2006
2007	void DSEState::deleteDeadInstruction(Instruction *SI,
2008	SmallPtrSetImpl<MemoryAccess > Deleted) {
2009	MemorySSAUpdater Updater(&MSSA);
2010	SmallVector<Instruction *, `32`> NowDeadInsts;
2011	NowDeadInsts.push_back(Elt: SI);
2012	--NumFastOther;
2013
2014	while (!NowDeadInsts.empty()) {
2015	Instruction *DeadInst = NowDeadInsts.pop_back_val();
2016	++NumFastOther;
2017
2018	// Try to preserve debug information attached to the dead instruction.
2019	salvageDebugInfo(I&: *DeadInst);
2020	salvageKnowledge(I: DeadInst);
2021
2022	// Remove the Instruction from MSSA.
2023	MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
2024	bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
2025	if (MA) {
2026	if (IsMemDef) {
2027	auto *MD = cast<MemoryDef>(Val: MA);
2028	SkipStores.insert(Ptr: MD);
2029	if (Deleted)
2030	Deleted->insert(Ptr: MD);
2031	if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
2032	if (SI->getValueOperand()->getType()->isPointerTy()) {
2033	const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
2034	if (CapturedBeforeReturn.erase(Val: UO))
2035	ShouldIterateEndOfFunctionDSE = true;
2036	InvisibleToCallerAfterRet.erase(Val: UO);
2037	InvisibleToCallerAfterRetBounded.erase(Val: UO);
2038	}
2039	}
2040	}
2041
2042	Updater.removeMemoryAccess(MA);
2043	}
2044
2045	auto I = IOLs.find(Key: DeadInst->getParent());
2046	if (I != IOLs.end())
2047	I->second.erase(Key: DeadInst);
2048	// Remove its operands
2049	for (Use &O : DeadInst->operands())
2050	if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
2051	O.set(PoisonValue::get(T: O ->getType()));
2052	if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
2053	NowDeadInsts.push_back(Elt: OpI);
2054	}
2055
2056	EA.removeInstruction(I: DeadInst);
2057	// Remove memory defs directly if they don't produce results, but only
2058	// queue other dead instructions for later removal. They may have been
2059	// used as memory locations that have been cached by BatchAA. Removing
2060	// them here may lead to newly created instructions to be allocated at the
2061	// same address, yielding stale cache entries.
2062	if (IsMemDef && DeadInst->getType()->isVoidTy())
2063	DeadInst->eraseFromParent();
2064	else
2065	ToRemove.push_back(Elt: DeadInst);
2066	}
2067	}
2068
2069	bool DSEState::mayThrowBetween(Instruction KillingI, Instruction DeadI,
2070	const Value *KillingUndObj) {
2071	// First see if we can ignore it by using the fact that KillingI is an
2072	// alloca/alloca like object that is not visible to the caller during
2073	// execution of the function.
2074	if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
2075	return false;
2076
2077	if (KillingI->getParent() == DeadI->getParent())
2078	return ThrowingBlocks.count(Ptr: KillingI->getParent());
2079	return !ThrowingBlocks.empty();
2080	}
2081
2082	bool DSEState::isDSEBarrier(const Value KillingUndObj, Instruction DeadI) {
2083	// If DeadI may throw it acts as a barrier, unless we are to an
2084	// alloca/alloca like object that does not escape.
2085	if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
2086	return true;
2087
2088	// If DeadI is an atomic load/store stronger than monotonic, do not try to
2089	// eliminate/reorder it.
2090	if (DeadI->isAtomic()) {
2091	if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
2092	return isStrongerThanMonotonic(AO: LI->getOrdering());
2093	if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
2094	return isStrongerThanMonotonic(AO: SI->getOrdering());
2095	if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
2096	return isStrongerThanMonotonic(AO: ARMW->getOrdering());
2097	if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
2098	return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) \|\|
2099	isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
2100	llvm_unreachable("other instructions should be skipped in MemorySSA");
2101	}
2102	return false;
2103	}
2104
2105	bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2106	bool MadeChange = false;
2107	LLVM_DEBUG(
2108	dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2109	do {
2110	ShouldIterateEndOfFunctionDSE = false;
2111	for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
2112	if (SkipStores.contains(Ptr: Def))
2113	continue;
2114
2115	Instruction *DefI = Def->getMemoryInst();
2116	auto DefLoc = getLocForWrite(I: DefI);
2117	if (!DefLoc \|\| !isRemovable(I: DefI)) {
2118	LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2119	"instruction not removable.\n");
2120	continue;
2121	}
2122
2123	// NOTE: Currently eliminating writes at the end of a function is
2124	// limited to MemoryDefs with a single underlying object, to save
2125	// compile-time. In practice it appears the case with multiple
2126	// underlying objects is very uncommon. If it turns out to be important,
2127	// we can use getUnderlyingObjects here instead.
2128	const Value *UO = getUnderlyingObject(V: DefLoc ->Ptr);
2129	if (!isInvisibleToCallerAfterRet(V: UO, Ptr: DefLoc ->Ptr, StoreSize: DefLoc ->Size))
2130	continue;
2131
2132	if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
2133	// See through pointer-to-pointer bitcasts
2134	LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2135	"of the function\n");
2136	deleteDeadInstruction(SI: DefI);
2137	++NumFastStores;
2138	MadeChange = true;
2139	}
2140	}
2141	} while (ShouldIterateEndOfFunctionDSE);
2142	return MadeChange;
2143	}
2144
2145	bool DSEState::tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO) {
2146	Instruction *DefI = Def->getMemoryInst();
2147	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2148	if (!MemSet)
2149	// TODO: Could handle zero store to small allocation as well.
2150	return false;
2151	Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2152	if (!StoredConstant \|\| !StoredConstant->isNullValue())
2153	return false;
2154
2155	if (!isRemovable(I: DefI))
2156	// The memset might be volatile..
2157	return false;
2158
2159	if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
2160	F.hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
2161	F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) \|\| F.getName() == "calloc")
2162	return false;
2163	auto Malloc = const_cast<CallInst >(dyn_cast<CallInst>(Val: DefUO));
2164	if (!Malloc)
2165	return false;
2166	auto *InnerCallee = Malloc->getCalledFunction();
2167	if (!InnerCallee)
2168	return false;
2169	LibFunc Func = NotLibFunc;
2170	StringRef ZeroedVariantName;
2171	if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) \|\| !TLI.has(F: Func) \|\|
2172	Func != LibFunc_malloc) {
2173	Attribute Attr = Malloc->getFnAttr(Kind: "alloc-variant-zeroed");
2174	if (!Attr.isValid())
2175	return false;
2176	ZeroedVariantName = Attr.getValueAsString();
2177	if (ZeroedVariantName.empty())
2178	return false;
2179	}
2180
2181	// Gracefully handle malloc with unexpected memory attributes.
2182	auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
2183	if (!MallocDef)
2184	return false;
2185
2186	auto shouldCreateCalloc = [](CallInst Malloc, CallInst Memset) {
2187	// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2188	// of malloc block
2189	auto MallocBB = Malloc->getParent(), MemsetBB = Memset->getParent();
2190	if (MallocBB == MemsetBB)
2191	return true;
2192	auto *Ptr = Memset->getArgOperand(i: `0`);
2193	auto *TI = MallocBB->getTerminator();
2194	BasicBlock TrueBB, FalseBB;
2195	if (!match(V: TI, P: m_Br(C: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: Ptr),
2196	R: m_Zero()),
2197	T&: TrueBB, F&: FalseBB)))
2198	return false;
2199	if (MemsetBB != FalseBB)
2200	return false;
2201	return true;
2202	};
2203
2204	if (Malloc->getOperand(i_nocapture: `0`) != MemSet->getLength())
2205	return false;
2206	if (!shouldCreateCalloc (Malloc, MemSet) \|\| !DT.dominates(Def: Malloc, User: MemSet) \|\|
2207	!memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
2208	return false;
2209	IRBuilder<> IRB(Malloc);
2210	assert(Func == LibFunc_malloc \|\| !ZeroedVariantName.empty());
2211	Value Calloc = nullptr*;
2212	if (!ZeroedVariantName.empty()) {
2213	LLVMContext &Ctx = Malloc->getContext();
2214	AttributeList Attrs = InnerCallee->getAttributes();
2215	AllocFnKind AllocKind =
2216	Attrs.getFnAttr(Kind: Attribute::AllocKind).getAllocKind() \|
2217	AllocFnKind::Zeroed;
2218	AllocKind &= ~AllocFnKind::Uninitialized;
2219	Attrs =
2220	Attrs.addFnAttribute(C&: Ctx, Attr: Attribute::getWithAllocKind(Context&: Ctx, Kind: AllocKind))
2221	.removeFnAttribute(C&: Ctx, Kind: "alloc-variant-zeroed");
2222	FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2223	Name: ZeroedVariantName, T: InnerCallee->getFunctionType(), AttributeList: Attrs);
2224	cast<Function>(Val: ZeroedVariant.getCallee())
2225	->setCallingConv(Malloc->getCallingConv());
2226	SmallVector<Value *, `3`> Args;
2227	Args.append(in_start: Malloc->arg_begin(), in_end: Malloc->arg_end());
2228	CallInst *CI = IRB.CreateCall(Callee: ZeroedVariant, Args, Name: ZeroedVariantName);
2229	CI->setCallingConv(Malloc->getCallingConv());
2230	Calloc = CI;
2231	} else {
2232	Type *SizeTTy = Malloc->getArgOperand(i: `0`)->getType();
2233	Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: `1`), Size: Malloc->getArgOperand(i: `0`),
2234	B&: IRB, TLI, AddrSpace: Malloc->getType()->getPointerAddressSpace());
2235	}
2236	if (!Calloc)
2237	return false;
2238
2239	MemorySSAUpdater Updater(&MSSA);
2240	auto *NewAccess = Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc),
2241	Definition: nullptr, InsertPt: MallocDef);
2242	auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
2243	Updater.insertDef(Def: NewAccessMD, /RenameUses=/true);
2244	Malloc->replaceAllUsesWith(V: Calloc);
2245	deleteDeadInstruction(SI: Malloc);
2246	return true;
2247	}
2248
2249	bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2250	auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
2251	BasicBlock *StoreBB = StoreI->getParent();
2252	Value *StorePtr = StoreI->getPointerOperand();
2253	Value *StoreVal = StoreI->getValueOperand();
2254
2255	DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
2256	if (!IDom)
2257	return false;
2258
2259	auto *BI = dyn_cast<CondBrInst>(Val: IDom->getBlock()->getTerminator());
2260	if (!BI)
2261	return false;
2262
2263	// In case both blocks are the same, it is not possible to determine
2264	// if optimization is possible. (We would not want to optimize a store
2265	// in the FalseBB if condition is true and vice versa.)
2266	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`))
2267	return false;
2268
2269	Instruction *ICmpL;
2270	CmpPredicate Pred;
2271	if (!match(V: BI->getCondition(),
2272	P: m_c_ICmp(Pred,
2273	L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
2274	R: m_Instruction(I&: ICmpL)),
2275	R: m_Specific(V: StoreVal))) \|\|
2276	!ICmpInst::isEquality(P: Pred))
2277	return false;
2278
2279	// Ensure the replacement is allowed when comparing pointers, as
2280	// the equality compares addresses only, not pointers' provenance.
2281	if (StoreVal->getType()->isPointerTy() &&
2282	!canReplacePointersIfEqual(From: StoreVal, To: ICmpL, DL))
2283	return false;
2284
2285	// In case the else blocks also branches to the if block or the other way
2286	// around it is not possible to determine if the optimization is possible.
2287	if (Pred == ICmpInst::ICMP_EQ &&
2288	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `0`)),
2289	BB: StoreBB))
2290	return false;
2291
2292	if (Pred == ICmpInst::ICMP_NE &&
2293	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `1`)),
2294	BB: StoreBB))
2295	return false;
2296
2297	MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
2298	MemoryAccess *ClobAcc =
2299	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2300
2301	return MSSA.dominates(A: ClobAcc, B: LoadAcc);
2302	}
2303
2304	bool DSEState::storeIsNoop(MemoryDef Def, const* Value *DefUO) {
2305	Instruction *DefI = Def->getMemoryInst();
2306	StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
2307	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2308	Constant StoredConstant = nullptr*;
2309	if (Store)
2310	StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: `0`));
2311	else if (MemSet)
2312	StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2313	else
2314	return false;
2315
2316	if (!isRemovable(I: DefI))
2317	return false;
2318
2319	if (StoredConstant) {
2320	Constant *InitC =
2321	getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
2322	// If the clobbering access is LiveOnEntry, no instructions between them
2323	// can modify the memory location.
2324	if (InitC && InitC == StoredConstant)
2325	return MSSA.isLiveOnEntryDef(
2326	MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2327	}
2328
2329	if (!Store)
2330	return false;
2331
2332	if (dominatingConditionImpliesValue(Def))
2333	return true;
2334
2335	if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: `0`))) {
2336	if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: `1`)) {
2337	// Get the defining access for the load.
2338	auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2339	// Fast path: the defining accesses are the same.
2340	if (LoadAccess == Def->getDefiningAccess())
2341	return true;
2342
2343	// Look through phi accesses. Recursively scan all phi accesses by
2344	// adding them to a worklist. Bail when we run into a memory def that
2345	// does not match LoadAccess.
2346	SetVector<MemoryAccess *> ToCheck;
2347	MemoryAccess *Current =
2348	MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2349	// We don't want to bail when we run into the store memory def. But,
2350	// the phi access may point to it. So, pretend like we've already
2351	// checked it.
2352	ToCheck.insert(X: Def);
2353	ToCheck.insert(X: Current);
2354	// Start at current (1) to simulate already having checked Def.
2355	for (unsigned I = `1`; I < ToCheck.size(); ++I) {
2356	Current = ToCheck [I];
2357	if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2358	// Check all the operands.
2359	for (auto &Use : PhiAccess->incoming_values())
2360	ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2361	continue;
2362	}
2363
2364	// If we found a memory def, bail. This happens when we have an
2365	// unrelated write in between an otherwise noop store.
2366	assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2367	// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2368	// We are searching for the definition of the store's destination.
2369	// So, if that is the same definition as the load, then this is a
2370	// noop. Otherwise, fail.
2371	if (LoadAccess != Current)
2372	return false;
2373	}
2374	return true;
2375	}
2376	}
2377
2378	return false;
2379	}
2380
2381	bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2382	bool Changed = false;
2383	for (auto OI : IOL) {
2384	Instruction *DeadI = OI.first;
2385	MemoryLocation Loc = *getLocForWrite(I: DeadI);
2386	assert(isRemovable(DeadI) && "Expect only removable instruction");
2387
2388	const Value *Ptr = Loc.Ptr->stripPointerCasts();
2389	int64_t DeadStart = `0`;
2390	uint64_t DeadSize = Loc.Size.getValue();
2391	GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2392	OverlapIntervalsTy &IntervalMap = OI.second;
2393	Changed \|= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2394	if (IntervalMap.empty())
2395	continue;
2396	Changed \|= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2397	}
2398	return Changed;
2399	}
2400
2401	bool DSEState::eliminateRedundantStoresOfExistingValues() {
2402	bool MadeChange = false;
2403	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2404	"already existing value\n");
2405	for (auto *Def : MemDefs) {
2406	if (SkipStores.contains(Ptr: Def) \|\| MSSA.isLiveOnEntryDef(MA: Def))
2407	continue;
2408
2409	Instruction *DefInst = Def->getMemoryInst();
2410	auto MaybeDefLoc = getLocForWrite(I: DefInst);
2411	if (!MaybeDefLoc \|\| !isRemovable(I: DefInst))
2412	continue;
2413
2414	MemoryDef *UpperDef;
2415	// To conserve compile-time, we avoid walking to the next clobbering def.
2416	// Instead, we just try to get the optimized access, if it exists. DSE
2417	// will try to optimize defs during the earlier traversal.
2418	if (Def->isOptimized())
2419	UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2420	else
2421	UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2422	if (!UpperDef \|\| MSSA.isLiveOnEntryDef(MA: UpperDef))
2423	continue;
2424
2425	Instruction *UpperInst = UpperDef->getMemoryInst();
2426	auto IsRedundantStore = [&]() {
2427	// We don't care about differences in call attributes here.
2428	if (DefInst->isIdenticalToWhenDefined(I: UpperInst,
2429	/IntersectAttrs=/true))
2430	return true;
2431	if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2432	if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2433	// MemSetInst must have a write location.
2434	auto UpperLoc = getLocForWrite(I: UpperInst);
2435	if (!UpperLoc)
2436	return false;
2437	int64_t InstWriteOffset = `0`;
2438	int64_t DepWriteOffset = `0`;
2439	auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: UpperLoc, DeadLoc: MaybeDefLoc,
2440	KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2441	Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2442	return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: `1`) &&
2443	OR == OW_Complete;
2444	}
2445	}
2446	return false;
2447	};
2448
2449	if (!IsRedundantStore () \|\| isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2450	continue;
2451	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2452	<< `'\n'`);
2453	deleteDeadInstruction(SI: DefInst);
2454	NumRedundantStores ++;
2455	MadeChange = true;
2456	}
2457	return MadeChange;
2458	}
2459
2460	SmallVector<MemoryLocation, `1`>
2461	DSEState::getInitializesArgMemLoc(const Instruction *I) {
2462	const CallBase *CB = dyn_cast<CallBase>(Val: I);
2463	if (!CB)
2464	return {};
2465
2466	// Collect aliasing arguments and their initializes ranges.
2467	SmallMapVector<Value *, SmallVector<ArgumentInitInfo, `2`>, `2`> Arguments;
2468	for (unsigned Idx = `0`, Count = CB->arg_size(); Idx < Count; ++Idx) {
2469	Value *CurArg = CB->getArgOperand(i: Idx);
2470	if (!CurArg->getType()->isPointerTy())
2471	continue;
2472
2473	ConstantRangeList Inits;
2474	Attribute InitializesAttr = CB->getParamAttr(ArgNo: Idx, Kind: Attribute::Initializes);
2475	// initializes on byval arguments refers to the callee copy, not the
2476	// original memory the caller passed in.
2477	if (InitializesAttr.isValid() && !CB->isByValArgument(ArgNo: Idx))
2478	Inits = InitializesAttr.getValueAsConstantRangeList();
2479
2480	// Check whether "CurArg" could alias with global variables. We require
2481	// either it's function local and isn't captured before or the "CB" only
2482	// accesses arg or inaccessible mem.
2483	if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2484	!isFuncLocalAndNotCaptured(Arg: CurArg, CB, EA))
2485	Inits = ConstantRangeList ();
2486
2487	// We don't perform incorrect DSE on unwind edges in the current function,
2488	// and use the "initializes" attribute to kill dead stores if:
2489	// - The call does not throw exceptions, "CB->doesNotThrow()".
2490	// - Or the callee parameter has "dead_on_unwind" attribute.
2491	// - Or the argument is invisible to caller on unwind, and there are no
2492	// unwind edges from this call in the current function (e.g. `CallInst`).
2493	bool IsDeadOrInvisibleOnUnwind =
2494	CB->paramHasAttr(ArgNo: Idx, Kind: Attribute::DeadOnUnwind) \|\|
2495	(isa<CallInst>(Val: CB) && isInvisibleToCallerOnUnwind(V: CurArg));
2496	ArgumentInitInfo InitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind, .Inits: Inits};
2497	bool FoundAliasing = false;
2498	for (auto &[Arg, AliasList] : Arguments) {
2499	auto AAR = BatchAA.alias(LocA: MemoryLocation::getBeforeOrAfter(Ptr: Arg),
2500	LocB: MemoryLocation::getBeforeOrAfter(Ptr: CurArg));
2501	if (AAR == AliasResult::NoAlias) {
2502	continue;
2503	} else if (AAR == AliasResult::MustAlias) {
2504	FoundAliasing = true;
2505	AliasList.push_back(Elt: InitInfo);
2506	} else {
2507	// For PartialAlias and MayAlias, there is an offset or may be an
2508	// unknown offset between the arguments and we insert an empty init
2509	// range to discard the entire initializes info while intersecting.
2510	FoundAliasing = true;
2511	AliasList.push_back(Elt: ArgumentInitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind,
2512	.Inits: ConstantRangeList ()});
2513	}
2514	}
2515	if (!FoundAliasing)
2516	Arguments [CurArg] = {InitInfo};
2517	}
2518
2519	SmallVector<MemoryLocation, `1`> Locations;
2520	for (const auto &[_, Args] : Arguments) {
2521	auto IntersectedRanges =
2522	getIntersectedInitRangeList(Args, CallHasNoUnwindAttr: CB->doesNotThrow());
2523	if (IntersectedRanges.empty())
2524	continue;
2525
2526	for (const auto &Arg : Args) {
2527	for (const auto &Range : IntersectedRanges) {
2528	int64_t Start = Range.getLower().getSExtValue();
2529	int64_t End = Range.getUpper().getSExtValue();
2530	// For now, we only handle locations starting at offset 0.
2531	if (Start == `0`)
2532	Locations.push_back(Elt: MemoryLocation (CB->getArgOperand(i: Arg.Idx),
2533	LocationSize::precise(Value: End - Start),
2534	CB->getAAMetadata()));
2535	}
2536	}
2537	}
2538	return Locations;
2539	}
2540
2541	std::pair<bool, bool>
2542	DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2543	bool Changed = false;
2544	bool DeletedKillingLoc = false;
2545	unsigned ScanLimit = MemorySSAScanLimit;
2546	unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2547	unsigned PartialLimit = MemorySSAPartialStoreLimit;
2548	// Worklist of MemoryAccesses that may be killed by
2549	// "KillingLocWrapper.MemDef".
2550	SmallSetVector<MemoryAccess *, `8`> ToCheck;
2551	// Track MemoryAccesses that have been deleted in the loop below, so we can
2552	// skip them. Don't use SkipStores for this, which may contain reused
2553	// MemoryAccess addresses.
2554	SmallPtrSet<MemoryAccess *, `8`> Deleted;
2555	[[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2556	ToCheck.insert(X: KillingLocWrapper.MemDef->getDefiningAccess());
2557
2558	// Check if MemoryAccesses in the worklist are killed by
2559	// "KillingLocWrapper.MemDef".
2560	for (unsigned I = `0`; I < ToCheck.size(); I++) {
2561	MemoryAccess *Current = ToCheck [I];
2562	if (Deleted.contains(Ptr: Current))
2563	continue;
2564	std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2565	KillingDef: KillingLocWrapper.MemDef, StartAccess: Current, KillingLoc: KillingLocWrapper.MemLoc,
2566	KillingUndObj: KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2567	IsMemTerm: isMemTerminatorInst(I: KillingLocWrapper.DefInst), PartialLimit,
2568	IsInitializesAttrMemLoc: KillingLocWrapper.DefByInitializesAttr);
2569
2570	if (!MaybeDeadAccess) {
2571	LLVM_DEBUG(dbgs() << " finished walk\n");
2572	continue;
2573	}
2574	MemoryAccess DeadAccess = MaybeDeadAccess;
2575	LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2576	if (isa<MemoryPhi>(Val: DeadAccess)) {
2577	LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2578	for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2579	MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2580	BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2581	BasicBlock *PhiBlock = DeadAccess->getBlock();
2582
2583	// We only consider incoming MemoryAccesses that come before the
2584	// MemoryPhi. Otherwise we could discover candidates that do not
2585	// strictly dominate our starting def.
2586	if (PostOrderNumbers [IncomingBlock] > PostOrderNumbers [PhiBlock])
2587	ToCheck.insert(X: IncomingAccess);
2588	}
2589	continue;
2590	}
2591	// We cannot apply the initializes attribute to DeadAccess/DeadDef.
2592	// It would incorrectly consider a call instruction as redundant store
2593	// and remove this call instruction.
2594	// TODO: this conflates the existence of a MemoryLocation with being able
2595	// to delete the instruction. Fix isRemovable() to consider calls with
2596	// side effects that cannot be removed, e.g. calls with the initializes
2597	// attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2598	MemoryDefWrapper DeadDefWrapper(
2599	cast<MemoryDef>(Val: DeadAccess),
2600	getLocForInst(I: cast<MemoryDef>(Val: DeadAccess)->getMemoryInst(),
2601	/ConsiderInitializesAttr=/false));
2602	assert(DeadDefWrapper.DefinedLocations.size() == `1`);
2603	MemoryLocationWrapper &DeadLocWrapper =
2604	DeadDefWrapper.DefinedLocations.front();
2605	LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2606	ToCheck.insert(X: DeadLocWrapper.MemDef->getDefiningAccess());
2607	NumGetDomMemoryDefPassed ++;
2608
2609	if (!DebugCounter::shouldExecute(Counter&: MemorySSACounter))
2610	continue;
2611	if (isMemTerminatorInst(I: KillingLocWrapper.DefInst)) {
2612	if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2613	continue;
2614	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2615	<< *DeadLocWrapper.DefInst << "\n KILLER: "
2616	<< *KillingLocWrapper.DefInst << `'\n'`);
2617	deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2618	++NumFastStores;
2619	Changed = true;
2620	} else {
2621	// Check if DeadI overwrites KillingI.
2622	int64_t KillingOffset = `0`;
2623	int64_t DeadOffset = `0`;
2624	OverwriteResult OR =
2625	isOverwrite(KillingI: KillingLocWrapper.DefInst, DeadI: DeadLocWrapper.DefInst,
2626	KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2627	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2628	if (OR == OW_MaybePartial) {
2629	auto &IOL = IOLs [DeadLocWrapper.DefInst->getParent()];
2630	OR = isPartialOverwrite(KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2631	KillingOff: KillingOffset, DeadOff: DeadOffset,
2632	DeadI: DeadLocWrapper.DefInst, IOL);
2633	}
2634	if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2635	auto *DeadSI = dyn_cast<StoreInst>(Val: DeadLocWrapper.DefInst);
2636	auto *KillingSI = dyn_cast<StoreInst>(Val: KillingLocWrapper.DefInst);
2637	// We are re-using tryToMergePartialOverlappingStores, which requires
2638	// DeadSI to dominate KillingSI.
2639	// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2640	if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2641	if (Constant *Merged = tryToMergePartialOverlappingStores(
2642	KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL, AA&: BatchAA,
2643	DT: &DT)) {
2644
2645	// Update stored value of earlier store to merged constant.
2646	DeadSI->setOperand(i_nocapture: `0`, Val_nocapture: Merged);
2647	++NumModifiedStores;
2648	Changed = true;
2649	DeletedKillingLoc = true;
2650
2651	// Remove killing store and remove any outstanding overlap
2652	// intervals for the updated store.
2653	deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2654	auto I = IOLs.find(Key: DeadSI->getParent());
2655	if (I != IOLs.end())
2656	I->second.erase(Key: DeadSI);
2657	break;
2658	}
2659	}
2660	}
2661	if (OR == OW_Complete) {
2662	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2663	<< *DeadLocWrapper.DefInst << "\n KILLER: "
2664	<< *KillingLocWrapper.DefInst << `'\n'`);
2665	deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2666	++NumFastStores;
2667	Changed = true;
2668	}
2669	}
2670	}
2671
2672	assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2673	"SkipStores and Deleted out of sync?");
2674
2675	return {Changed, DeletedKillingLoc};
2676	}
2677
2678	bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2679	if (KillingDefWrapper.DefinedLocations.empty()) {
2680	LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2681	<< *KillingDefWrapper.DefInst << "\n");
2682	return false;
2683	}
2684
2685	bool MadeChange = false;
2686	for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2687	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2688	<< *KillingLocWrapper.MemDef << " ("
2689	<< *KillingLocWrapper.DefInst << ")\n");
2690	auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2691	MadeChange \|= Changed;
2692
2693	// Check if the store is a no-op.
2694	if (!DeletedKillingLoc && storeIsNoop(Def: KillingLocWrapper.MemDef,
2695	DefUO: KillingLocWrapper.UnderlyingObject)) {
2696	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2697	<< *KillingLocWrapper.DefInst << `'\n'`);
2698	deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2699	NumRedundantStores ++;
2700	MadeChange = true;
2701	continue;
2702	}
2703	// Can we form a calloc from a memset/malloc pair?
2704	if (!DeletedKillingLoc &&
2705	tryFoldIntoCalloc(Def: KillingLocWrapper.MemDef,
2706	DefUO: KillingLocWrapper.UnderlyingObject)) {
2707	LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2708	<< " DEAD: " << *KillingLocWrapper.DefInst << `'\n'`);
2709	deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2710	MadeChange = true;
2711	continue;
2712	}
2713	}
2714	return MadeChange;
2715	}
2716
2717	static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2718	DominatorTree &DT, PostDominatorTree &PDT,
2719	const TargetLibraryInfo &TLI,
2720	const LoopInfo &LI) {
2721	bool MadeChange = false;
2722	DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2723	// For each store:
2724	for (unsigned I = `0`; I < State.MemDefs.size(); I++) {
2725	MemoryDef *KillingDef = State.MemDefs [I];
2726	if (State.SkipStores.count(Ptr: KillingDef))
2727	continue;
2728
2729	MemoryDefWrapper KillingDefWrapper(
2730	KillingDef, State.getLocForInst(I: KillingDef->getMemoryInst(),
2731	ConsiderInitializesAttr: EnableInitializesImprovement));
2732	MadeChange \|= State.eliminateDeadDefs(KillingDefWrapper);
2733	}
2734
2735	if (EnablePartialOverwriteTracking)
2736	for (auto &KV : State.IOLs)
2737	MadeChange \|= State.removePartiallyOverlappedStores(IOL&: KV.second);
2738
2739	MadeChange \|= State.eliminateRedundantStoresOfExistingValues();
2740	MadeChange \|= State.eliminateDeadWritesAtEndOfFunction();
2741
2742	while (!State.ToRemove.empty()) {
2743	Instruction *DeadInst = State.ToRemove.pop_back_val();
2744	DeadInst->eraseFromParent();
2745	}
2746
2747	return MadeChange;
2748	}
2749
2750	//===----------------------------------------------------------------------===//
2751	// DSE Pass
2752	//===----------------------------------------------------------------------===//
2753	PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2754	AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2755	const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2756	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2757	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2758	PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2759	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2760
2761	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2762
2763	#ifdef LLVM_ENABLE_STATS
2764	if (AreStatisticsEnabled())
2765	for (auto &I : instructions(F))
2766	NumRemainingStores += isa<StoreInst>(Val: &I);
2767	#endif
2768
2769	if (!Changed)
2770	return PreservedAnalyses::all();
2771
2772	PreservedAnalyses PA;
2773	PA.preserveSet<CFGAnalyses>();
2774	PA.preserve<MemorySSAAnalysis>();
2775	PA.preserve<LoopAnalysis>();
2776	return PA;
2777	}
2778
2779	namespace {
2780
2781	/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2782	class DSELegacyPass : public FunctionPass {
2783	public:
2784	static char ID; // Pass identification, replacement for typeid
2785
2786	DSELegacyPass() : FunctionPass (ID) {
2787	initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
2788	}
2789
2790	bool runOnFunction(Function &F) override {
2791	if (skipFunction(F))
2792	return false;
2793
2794	AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2795	DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2796	const TargetLibraryInfo &TLI =
2797	getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2798	MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2799	PostDominatorTree &PDT =
2800	getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2801	LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2802
2803	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2804
2805	#ifdef LLVM_ENABLE_STATS
2806	if (AreStatisticsEnabled())
2807	for (auto &I : instructions(F))
2808	NumRemainingStores += isa<StoreInst>(Val: &I);
2809	#endif
2810
2811	return Changed;
2812	}
2813
2814	void getAnalysisUsage(AnalysisUsage &AU) const override {
2815	AU.setPreservesCFG();
2816	AU.addRequired<AAResultsWrapperPass>();
2817	AU.addRequired<TargetLibraryInfoWrapperPass>();
2818	AU.addPreserved<GlobalsAAWrapperPass>();
2819	AU.addRequired<DominatorTreeWrapperPass>();
2820	AU.addPreserved<DominatorTreeWrapperPass>();
2821	AU.addRequired<PostDominatorTreeWrapperPass>();
2822	AU.addRequired<MemorySSAWrapperPass>();
2823	AU.addPreserved<PostDominatorTreeWrapperPass>();
2824	AU.addPreserved<MemorySSAWrapperPass>();
2825	AU.addRequired<LoopInfoWrapperPass>();
2826	AU.addPreserved<LoopInfoWrapperPass>();
2827	AU.addRequired<AssumptionCacheTracker>();
2828	}
2829	};
2830
2831	} // end anonymous namespace
2832
2833	char DSELegacyPass::ID = `0`;
2834
2835	INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2836	false)
2837	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2838	INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2839	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2840	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2841	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2842	INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2843	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2844	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2845	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2846	INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2847	false)
2848
2849	LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
2850	return new DSELegacyPass ();
2851	}
2852

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp