DeadStoreElimination.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp]

1	//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The code below implements dead store elimination using MemorySSA. It uses
10	// the following general approach: given a MemoryDef, walk upwards to find
11	// clobbering MemoryDefs that may be killed by the starting def. Then check
12	// that there are no uses that may read the location of the original MemoryDef
13	// in between both MemoryDefs. A bit more concretely:
14	//
15	// For all MemoryDefs StartDef:
16	// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17	// upwards.
18	// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19	// checking all uses starting at MaybeDeadAccess and walking until we see
20	// StartDef.
21	// 3. For each found CurrentDef, check that:
22	// 1. There are no barrier instructions between CurrentDef and StartDef (like
23	// throws or stores with ordering constraints).
24	// 2. StartDef is executed whenever CurrentDef is executed.
25	// 3. StartDef completely overwrites CurrentDef.
26	// 4. Erase CurrentDef from the function and MemorySSA.
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31	#include "llvm/ADT/APInt.h"
32	#include "llvm/ADT/DenseMap.h"
33	#include "llvm/ADT/MapVector.h"
34	#include "llvm/ADT/PostOrderIterator.h"
35	#include "llvm/ADT/SetVector.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/StringRef.h"
40	#include "llvm/Analysis/AliasAnalysis.h"
41	#include "llvm/Analysis/CaptureTracking.h"
42	#include "llvm/Analysis/GlobalsModRef.h"
43	#include "llvm/Analysis/LoopInfo.h"
44	#include "llvm/Analysis/MemoryBuiltins.h"
45	#include "llvm/Analysis/MemoryLocation.h"
46	#include "llvm/Analysis/MemorySSA.h"
47	#include "llvm/Analysis/MemorySSAUpdater.h"
48	#include "llvm/Analysis/MustExecute.h"
49	#include "llvm/Analysis/PostDominators.h"
50	#include "llvm/Analysis/TargetLibraryInfo.h"
51	#include "llvm/Analysis/ValueTracking.h"
52	#include "llvm/IR/Argument.h"
53	#include "llvm/IR/AttributeMask.h"
54	#include "llvm/IR/BasicBlock.h"
55	#include "llvm/IR/Constant.h"
56	#include "llvm/IR/ConstantRangeList.h"
57	#include "llvm/IR/Constants.h"
58	#include "llvm/IR/DataLayout.h"
59	#include "llvm/IR/DebugInfo.h"
60	#include "llvm/IR/Dominators.h"
61	#include "llvm/IR/Function.h"
62	#include "llvm/IR/IRBuilder.h"
63	#include "llvm/IR/InstIterator.h"
64	#include "llvm/IR/InstrTypes.h"
65	#include "llvm/IR/Instruction.h"
66	#include "llvm/IR/Instructions.h"
67	#include "llvm/IR/IntrinsicInst.h"
68	#include "llvm/IR/Module.h"
69	#include "llvm/IR/PassManager.h"
70	#include "llvm/IR/PatternMatch.h"
71	#include "llvm/IR/Value.h"
72	#include "llvm/Support/Casting.h"
73	#include "llvm/Support/CommandLine.h"
74	#include "llvm/Support/Debug.h"
75	#include "llvm/Support/DebugCounter.h"
76	#include "llvm/Support/ErrorHandling.h"
77	#include "llvm/Support/raw_ostream.h"
78	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
79	#include "llvm/Transforms/Utils/BuildLibCalls.h"
80	#include "llvm/Transforms/Utils/Local.h"
81	#include <algorithm>
82	#include <cassert>
83	#include <cstdint>
84	#include <map>
85	#include <optional>
86	#include <utility>
87
88	using namespace llvm;
89	using namespace PatternMatch;
90
91	#define DEBUG_TYPE "dse"
92
93	STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
94	STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
95	STATISTIC(NumFastStores, "Number of stores deleted");
96	STATISTIC(NumFastOther, "Number of other instrs removed");
97	STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
98	STATISTIC(NumModifiedStores, "Number of stores modified");
99	STATISTIC(NumCFGChecks, "Number of stores modified");
100	STATISTIC(NumCFGTries, "Number of stores modified");
101	STATISTIC(NumCFGSuccess, "Number of stores modified");
102	STATISTIC(NumGetDomMemoryDefPassed,
103	"Number of times a valid candidate is returned from getDomMemoryDef");
104	STATISTIC(NumDomMemDefChecks,
105	"Number iterations check for reads in getDomMemoryDef");
106
107	DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
108	"Controls which MemoryDefs are eliminated.");
109
110	static cl::opt<bool>
111	EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
112	cl::init(Val: true), cl::Hidden,
113	cl::desc ("Enable partial-overwrite tracking in DSE"));
114
115	static cl::opt<bool>
116	EnablePartialStoreMerging("enable-dse-partial-store-merging",
117	cl::init(Val: true), cl::Hidden,
118	cl::desc ("Enable partial store merging in DSE"));
119
120	static cl::opt<unsigned>
121	MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: `150`), cl::Hidden,
122	cl::desc ("The number of memory instructions to scan for "
123	"dead store elimination (default = 150)"));
124	static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
125	"dse-memoryssa-walklimit", cl::init(Val: `90`), cl::Hidden,
126	cl::desc ("The maximum number of steps while walking upwards to find "
127	"MemoryDefs that may be killed (default = 90)"));
128
129	static cl::opt<unsigned> MemorySSAPartialStoreLimit(
130	"dse-memoryssa-partial-store-limit", cl::init(Val: `5`), cl::Hidden,
131	cl::desc ("The maximum number candidates that only partially overwrite the "
132	"killing MemoryDef to consider"
133	" (default = 5)"));
134
135	static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
136	"dse-memoryssa-defs-per-block-limit", cl::init(Val: `5000`), cl::Hidden,
137	cl::desc ("The number of MemoryDefs we consider as candidates to eliminated "
138	"other stores per basic block (default = 5000)"));
139
140	static cl::opt<unsigned> MemorySSASameBBStepCost(
141	"dse-memoryssa-samebb-cost", cl::init(Val: `1`), cl::Hidden,
142	cl::desc (
143	"The cost of a step in the same basic block as the killing MemoryDef"
144	"(default = 1)"));
145
146	static cl::opt<unsigned>
147	MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: `5`),
148	cl::Hidden,
149	cl::desc ("The cost of a step in a different basic "
150	"block than the killing MemoryDef"
151	"(default = 5)"));
152
153	static cl::opt<unsigned> MemorySSAPathCheckLimit(
154	"dse-memoryssa-path-check-limit", cl::init(Val: `50`), cl::Hidden,
155	cl::desc ("The maximum number of blocks to check when trying to prove that "
156	"all paths to an exit go through a killing block (default = 50)"));
157
158	// This flags allows or disallows DSE to optimize MemorySSA during its
159	// traversal. Note that DSE optimizing MemorySSA may impact other passes
160	// downstream of the DSE invocation and can lead to issues not being
161	// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
162	// those cases, the flag can be used to check if DSE's MemorySSA optimizations
163	// impact follow-up passes.
164	static cl::opt<bool>
165	OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
166	cl::desc ("Allow DSE to optimize memory accesses."));
167
168	// TODO: remove this flag.
169	static cl::opt<bool> EnableInitializesImprovement(
170	"enable-dse-initializes-attr-improvement", cl::init(Val: true), cl::Hidden,
171	cl::desc ("Enable the initializes attr improvement in DSE"));
172
173	//===----------------------------------------------------------------------===//
174	// Helper functions
175	//===----------------------------------------------------------------------===//
176	using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177	using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
178
179	/// Returns true if the end of this instruction can be safely shortened in
180	/// length.
181	static bool isShortenableAtTheEnd(Instruction *I) {
182	// Don't shorten stores for now
183	if (isa<StoreInst>(Val: I))
184	return false;
185
186	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
187	switch (II->getIntrinsicID()) {
188	default: return false;
189	case Intrinsic::memset:
190	case Intrinsic::memcpy:
191	case Intrinsic::memcpy_element_unordered_atomic:
192	case Intrinsic::memset_element_unordered_atomic:
193	// Do shorten memory intrinsics.
194	// FIXME: Add memmove if it's also safe to transform.
195	return true;
196	}
197	}
198
199	// Don't shorten libcalls calls for now.
200
201	return false;
202	}
203
204	/// Returns true if the beginning of this instruction can be safely shortened
205	/// in length.
206	static bool isShortenableAtTheBeginning(Instruction *I) {
207	// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
208	// easily done by offsetting the source address.
209	return isa<AnyMemSetInst>(Val: I);
210	}
211
212	static std::optional<TypeSize> getPointerSize(const Value *V,
213	const DataLayout &DL,
214	const TargetLibraryInfo &TLI,
215	const Function *F) {
216	uint64_t Size;
217	ObjectSizeOpts Opts;
218	Opts.NullIsUnknownSize = NullPointerIsDefined(F);
219
220	if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
221	return TypeSize::getFixed(ExactSize: Size);
222	return std::nullopt;
223	}
224
225	namespace {
226
227	enum OverwriteResult {
228	OW_Begin,
229	OW_Complete,
230	OW_End,
231	OW_PartialEarlierWithFullLater,
232	OW_MaybePartial,
233	OW_None,
234	OW_Unknown
235	};
236
237	} // end anonymous namespace
238
239	/// Check if two instruction are masked stores that completely
240	/// overwrite one another. More specifically, \p KillingI has to
241	/// overwrite \p DeadI.
242	static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
243	const Instruction *DeadI,
244	BatchAAResults &AA) {
245	const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
246	const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
247	if (KillingII == nullptr \|\| DeadII == nullptr)
248	return OW_Unknown;
249	if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
250	return OW_Unknown;
251
252	switch (KillingII->getIntrinsicID()) {
253	case Intrinsic::masked_store:
254	case Intrinsic::vp_store: {
255	const DataLayout &DL = KillingII->getDataLayout();
256	auto *KillingTy = KillingII->getArgOperand(i: `0`)->getType();
257	auto *DeadTy = DeadII->getArgOperand(i: `0`)->getType();
258	if (DL.getTypeSizeInBits(Ty: KillingTy) != DL.getTypeSizeInBits(Ty: DeadTy))
259	return OW_Unknown;
260	// Element count.
261	if (cast<VectorType>(Val: KillingTy)->getElementCount() !=
262	cast<VectorType>(Val: DeadTy)->getElementCount())
263	return OW_Unknown;
264	// Pointers.
265	Value *KillingPtr = KillingII->getArgOperand(i: `1`);
266	Value *DeadPtr = DeadII->getArgOperand(i: `1`);
267	if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
268	return OW_Unknown;
269	if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
270	// Masks.
271	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
272	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
273	return OW_Unknown;
274	} else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
275	// Masks.
276	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
277	if (KillingII->getArgOperand(i: `2`) != DeadII->getArgOperand(i: `2`))
278	return OW_Unknown;
279	// Lengths.
280	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
281	return OW_Unknown;
282	}
283	return OW_Complete;
284	}
285	default:
286	return OW_Unknown;
287	}
288	}
289
290	/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
291	/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
292	/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
293	/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
294	/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
295	/// overwritten by a killing (smaller) store which doesn't write outside the big
296	/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
297	/// NOTE: This function must only be called if both \p KillingLoc and \p
298	/// DeadLoc belong to the same underlying object with valid \p KillingOff and
299	/// \p DeadOff.
300	static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
301	const MemoryLocation &DeadLoc,
302	int64_t KillingOff, int64_t DeadOff,
303	Instruction *DeadI,
304	InstOverlapIntervalsTy &IOL) {
305	const uint64_t KillingSize = KillingLoc.Size.getValue();
306	const uint64_t DeadSize = DeadLoc.Size.getValue();
307	// We may now overlap, although the overlap is not complete. There might also
308	// be other incomplete overlaps, and together, they might cover the complete
309	// dead store.
310	// Note: The correctness of this logic depends on the fact that this function
311	// is not even called providing DepWrite when there are any intervening reads.
312	if (EnablePartialOverwriteTracking &&
313	KillingOff < int64_t(DeadOff + DeadSize) &&
314	int64_t(KillingOff + KillingSize) >= DeadOff) {
315
316	// Insert our part of the overlap into the map.
317	auto &IM = IOL [DeadI];
318	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
319	<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
320	<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
321	<< ")\n");
322
323	// Make sure that we only insert non-overlapping intervals and combine
324	// adjacent intervals. The intervals are stored in the map with the ending
325	// offset as the key (in the half-open sense) and the starting offset as
326	// the value.
327	int64_t KillingIntStart = KillingOff;
328	int64_t KillingIntEnd = KillingOff + KillingSize;
329
330	// Find any intervals ending at, or after, KillingIntStart which start
331	// before KillingIntEnd.
332	auto ILI = IM.lower_bound(x: KillingIntStart);
333	if (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
334	// This existing interval is overlapped with the current store somewhere
335	// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
336	// intervals and adjusting our start and end.
337	KillingIntStart = std::min(a: KillingIntStart, b: ILI ->second);
338	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
339	ILI = IM.erase(position: ILI);
340
341	// Continue erasing and adjusting our end in case other previous
342	// intervals are also overlapped with the current store.
343	//
344	// \|--- dead 1 ---\| \|--- dead 2 ---\|
345	// \|------- killing---------\|
346	//
347	while (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
348	assert(ILI->second > KillingIntStart && "Unexpected interval");
349	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
350	ILI = IM.erase(position: ILI);
351	}
352	}
353
354	IM [KillingIntEnd] = KillingIntStart;
355
356	ILI = IM.begin();
357	if (ILI ->second <= DeadOff && ILI ->first >= int64_t(DeadOff + DeadSize)) {
358	LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
359	<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
360	<< ") Composite KillingLoc [" << ILI->second << ", "
361	<< ILI->first << ")\n");
362	++NumCompletePartials;
363	return OW_Complete;
364	}
365	}
366
367	// Check for a dead store which writes to all the memory locations that
368	// the killing store writes to.
369	if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
370	int64_t(DeadOff + DeadSize) > KillingOff &&
371	uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
372	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
373	<< ", " << int64_t(DeadOff + DeadSize)
374	<< ") by a killing store [" << KillingOff << ", "
375	<< int64_t(KillingOff + KillingSize) << ")\n");
376	// TODO: Maybe come up with a better name?
377	return OW_PartialEarlierWithFullLater;
378	}
379
380	// Another interesting case is if the killing store overwrites the end of the
381	// dead store.
382	//
383	// \|--dead--\|
384	// \|-- killing --\|
385	//
386	// In this case we may want to trim the size of dead store to avoid
387	// generating stores to addresses which will definitely be overwritten killing
388	// store.
389	if (!EnablePartialOverwriteTracking &&
390	(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
391	int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
392	return OW_End;
393
394	// Finally, we also need to check if the killing store overwrites the
395	// beginning of the dead store.
396	//
397	// \|--dead--\|
398	// \|-- killing --\|
399	//
400	// In this case we may want to move the destination address and trim the size
401	// of dead store to avoid generating stores to addresses which will definitely
402	// be overwritten killing store.
403	if (!EnablePartialOverwriteTracking &&
404	(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
405	assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
406	"Expect to be handled as OW_Complete");
407	return OW_Begin;
408	}
409	// Otherwise, they don't completely overlap.
410	return OW_Unknown;
411	}
412
413	/// Returns true if the memory which is accessed by the second instruction is not
414	/// modified between the first and the second instruction.
415	/// Precondition: Second instruction must be dominated by the first
416	/// instruction.
417	static bool
418	memoryIsNotModifiedBetween(Instruction FirstI, Instruction SecondI,
419	BatchAAResults &AA, const DataLayout &DL,
420	DominatorTree *DT) {
421	// Do a backwards scan through the CFG from SecondI to FirstI. Look for
422	// instructions which can modify the memory location accessed by SecondI.
423	//
424	// While doing the walk keep track of the address to check. It might be
425	// different in different basic blocks due to PHI translation.
426	using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
427	SmallVector<BlockAddressPair, `16`> WorkList;
428	// Keep track of the address we visited each block with. Bail out if we
429	// visit a block with different addresses.
430	DenseMap<BasicBlock , Value > Visited;
431
432	BasicBlock::iterator FirstBBI(FirstI);
433	++FirstBBI;
434	BasicBlock::iterator SecondBBI(SecondI);
435	BasicBlock *FirstBB = FirstI->getParent();
436	BasicBlock *SecondBB = SecondI->getParent();
437	MemoryLocation MemLoc;
438	if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
439	MemLoc = MemoryLocation::getForDest(MI: MemSet);
440	else
441	MemLoc = MemoryLocation::get(Inst: SecondI);
442
443	auto MemLocPtr = const_cast<Value >(MemLoc.Ptr);
444
445	// Start checking the SecondBB.
446	WorkList.push_back(
447	Elt: std::make_pair(x&: SecondBB, y: PHITransAddr (MemLocPtr, DL, nullptr)));
448	bool isFirstBlock = true;
449
450	// Check all blocks going backward until we reach the FirstBB.
451	while (!WorkList.empty()) {
452	BlockAddressPair Current = WorkList.pop_back_val();
453	BasicBlock *B = Current.first;
454	PHITransAddr &Addr = Current.second;
455	Value *Ptr = Addr.getAddr();
456
457	// Ignore instructions before FirstI if this is the FirstBB.
458	BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
459
460	BasicBlock::iterator EI;
461	if (isFirstBlock) {
462	// Ignore instructions after SecondI if this is the first visit of SecondBB.
463	assert(B == SecondBB && "first block is not the store block");
464	EI = SecondBBI;
465	isFirstBlock = false;
466	} else {
467	// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
468	// In this case we also have to look at instructions after SecondI.
469	EI = B->end();
470	}
471	for (; BI != EI; ++BI) {
472	Instruction I = &BI;
473	if (I->mayWriteToMemory() && I != SecondI)
474	if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
475	return false;
476	}
477	if (B != FirstBB) {
478	assert(B != &FirstBB->getParent()->getEntryBlock() &&
479	"Should not hit the entry block because SI must be dominated by LI");
480	for (BasicBlock *Pred : predecessors(BB: B)) {
481	PHITransAddr PredAddr = Addr;
482	if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
483	if (!PredAddr.isPotentiallyPHITranslatable())
484	return false;
485	if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
486	return false;
487	}
488	Value *TranslatedPtr = PredAddr.getAddr();
489	auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
490	if (!Inserted.second) {
491	// We already visited this block before. If it was with a different
492	// address - bail out!
493	if (TranslatedPtr != Inserted.first ->second)
494	return false;
495	// ... otherwise just skip it.
496	continue;
497	}
498	WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
499	}
500	}
501	}
502	return true;
503	}
504
505	static void shortenAssignment(Instruction Inst, Value OriginalDest,
506	uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
507	uint64_t NewSizeInBits, bool IsOverwriteEnd) {
508	const DataLayout &DL = Inst->getDataLayout();
509	uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
510	uint64_t DeadSliceOffsetInBits =
511	OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : `0`);
512	auto SetDeadFragExpr = [](auto *Assign,
513	DIExpression::FragmentInfo DeadFragment) {
514	// createFragmentExpression expects an offset relative to the existing
515	// fragment offset if there is one.
516	uint64_t RelativeOffset = DeadFragment.OffsetInBits -
517	Assign->getExpression()
518	->getFragmentInfo()
519	.value_or(DIExpression::FragmentInfo (`0`, `0`))
520	.OffsetInBits;
521	if (auto NewExpr = DIExpression::createFragmentExpression(
522	Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
523	Assign->setExpression(*NewExpr);
524	return;
525	}
526	// Failed to create a fragment expression for this so discard the value,
527	// making this a kill location.
528	auto Expr = DIExpression::createFragmentExpression(
529	Expr: DIExpression::get(Context&: Assign->getContext(), Elements: {}), OffsetInBits: DeadFragment.OffsetInBits,
530	SizeInBits: DeadFragment.SizeInBits);
531	Assign->setExpression(Expr);
532	Assign->setKillLocation();
533	};
534
535	// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
536	// link to any instructions. Created in the loop below (once).
537	DIAssignID LinkToNothing = nullptr*;
538	LLVMContext &Ctx = Inst->getContext();
539	auto GetDeadLink = [&Ctx, &LinkToNothing]() {
540	if (!LinkToNothing)
541	LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
542	return LinkToNothing;
543	};
544
545	// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
546	// overlapping dbg.assign intrinsic. The loop invalidates the iterators
547	// returned by getAssignmentMarkers so save a copy of the markers to iterate
548	// over.
549	auto LinkedRange = at::getAssignmentMarkers(Inst);
550	SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
551	at::getDVRAssignmentMarkers(Inst);
552	SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
553	LinkedRange.end());
554	auto InsertAssignForOverlap = [&](auto *Assign) {
555	std::optional<DIExpression::FragmentInfo> NewFragment;
556	if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
557	DeadSliceSizeInBits, Assign,
558	NewFragment) \|\|
559	!NewFragment) {
560	// We couldn't calculate the intersecting fragment for some reason. Be
561	// cautious and unlink the whole assignment from the store.
562	Assign->setKillAddress();
563	Assign->setAssignId(GetDeadLink ());
564	return;
565	}
566	// No intersect.
567	if (NewFragment ->SizeInBits == `0`)
568	return;
569
570	// Fragments overlap: insert a new dbg.assign for this dead part.
571	auto NewAssign = static_cast<decltype*(Assign)>(Assign->clone());
572	NewAssign->insertAfter(Assign->getIterator());
573	NewAssign->setAssignId(GetDeadLink ());
574	if (NewFragment)
575	SetDeadFragExpr(NewAssign, *NewFragment);
576	NewAssign->setKillAddress();
577	};
578	for_each(Range&: Linked, F: InsertAssignForOverlap);
579	for_each(Range&: LinkedDVRAssigns, F: InsertAssignForOverlap);
580	}
581
582	/// Update the attributes given that a memory access is updated (the
583	/// dereferenced pointer could be moved forward when shortening a
584	/// mem intrinsic).
585	static void adjustArgAttributes(AnyMemIntrinsic Intrinsic, unsigned* ArgNo,
586	uint64_t PtrOffset) {
587	// Remember old attributes.
588	AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
589
590	// Find attributes that should be kept, and remove the rest.
591	AttributeMask AttrsToRemove;
592	for (auto &Attr : OldAttrs) {
593	if (Attr.hasKindAsEnum()) {
594	switch (Attr.getKindAsEnum()) {
595	default:
596	break;
597	case Attribute::Alignment:
598	// Only keep alignment if PtrOffset satisfy the alignment.
599	if (isAligned(Lhs: Attr.getAlignment().valueOrOne(), SizeInBytes: PtrOffset))
600	continue;
601	break;
602	case Attribute::Dereferenceable:
603	case Attribute::DereferenceableOrNull:
604	// We could reduce the size of these attributes according to
605	// PtrOffset. But we simply drop these for now.
606	break;
607	case Attribute::NonNull:
608	case Attribute::NoUndef:
609	continue;
610	}
611	}
612	AttrsToRemove.addAttribute(A: Attr);
613	}
614
615	// Remove the attributes that should be dropped.
616	Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
617	}
618
619	static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
620	uint64_t &DeadSize, int64_t KillingStart,
621	uint64_t KillingSize, bool IsOverwriteEnd) {
622	auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
623	Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
624
625	// We assume that memet/memcpy operates in chunks of the "largest" native
626	// type size and aligned on the same value. That means optimal start and size
627	// of memset/memcpy should be modulo of preferred alignment of that type. That
628	// is it there is no any sense in trying to reduce store size any further
629	// since any "extra" stores comes for free anyway.
630	// On the other hand, maximum alignment we can achieve is limited by alignment
631	// of initial store.
632
633	// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
634	// "largest" native type.
635	// Note: What is the proper way to get that value?
636	// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
637	// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
638
639	int64_t ToRemoveStart = `0`;
640	uint64_t ToRemoveSize = `0`;
641	// Compute start and size of the region to remove. Make sure 'PrefAlign' is
642	// maintained on the remaining store.
643	if (IsOverwriteEnd) {
644	// Calculate required adjustment for 'KillingStart' in order to keep
645	// remaining store size aligned on 'PerfAlign'.
646	uint64_t Off =
647	offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
648	ToRemoveStart = KillingStart + Off;
649	if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
650	return false;
651	ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
652	} else {
653	ToRemoveStart = DeadStart;
654	assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
655	"Not overlapping accesses?");
656	ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
657	// Calculate required adjustment for 'ToRemoveSize'in order to keep
658	// start of the remaining store aligned on 'PerfAlign'.
659	uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
660	if (Off != `0`) {
661	if (ToRemoveSize <= (PrefAlign.value() - Off))
662	return false;
663	ToRemoveSize -= PrefAlign.value() - Off;
664	}
665	assert(isAligned(PrefAlign, ToRemoveSize) &&
666	"Should preserve selected alignment");
667	}
668
669	assert(ToRemoveSize > `0` && "Shouldn't reach here if nothing to remove");
670	assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
671
672	uint64_t NewSize = DeadSize - ToRemoveSize;
673	if (DeadIntrinsic->isAtomic()) {
674	// When shortening an atomic memory intrinsic, the newly shortened
675	// length must remain an integer multiple of the element size.
676	const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
677	if (`0` != NewSize % ElementSize)
678	return false;
679	}
680
681	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
682	<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
683	<< "\n KILLER [" << ToRemoveStart << ", "
684	<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
685
686	Value *DeadWriteLength = DeadIntrinsic->getLength();
687	Value *TrimmedLength = ConstantInt::get(Ty: DeadWriteLength->getType(), V: NewSize);
688	DeadIntrinsic->setLength(TrimmedLength);
689	DeadIntrinsic->setDestAlignment(PrefAlign);
690
691	Value *OrigDest = DeadIntrinsic->getRawDest();
692	if (!IsOverwriteEnd) {
693	Value *Indices[`1`] = {
694	ConstantInt::get(Ty: DeadWriteLength->getType(), V: ToRemoveSize)};
695	Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
696	PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
697	InsertBefore: DeadI->getIterator());
698	NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
699	DeadIntrinsic->setDest(NewDestGEP);
700	adjustArgAttributes(Intrinsic: DeadIntrinsic, ArgNo: `0`, PtrOffset: ToRemoveSize);
701	}
702
703	// Update attached dbg.assign intrinsics. Assume 8-bit byte.
704	shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * `8`, OldSizeInBits: DeadSize * `8`, NewSizeInBits: NewSize * `8`,
705	IsOverwriteEnd);
706
707	// Finally update start and size of dead access.
708	if (!IsOverwriteEnd)
709	DeadStart += ToRemoveSize;
710	DeadSize = NewSize;
711
712	return true;
713	}
714
715	static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
716	int64_t &DeadStart, uint64_t &DeadSize) {
717	if (IntervalMap.empty() \|\| !isShortenableAtTheEnd(I: DeadI))
718	return false;
719
720	OverlapIntervalsTy::iterator OII = --IntervalMap.end();
721	int64_t KillingStart = OII ->second;
722	uint64_t KillingSize = OII ->first - KillingStart;
723
724	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
725
726	if (KillingStart > DeadStart &&
727	// Note: "KillingStart - KillingStart" is known to be positive due to
728	// preceding check.
729	(uint64_t)(KillingStart - DeadStart) < DeadSize &&
730	// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
731	// be non negative due to preceding checks.
732	KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
733	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
734	IsOverwriteEnd: true)) {
735	IntervalMap.erase(position: OII);
736	return true;
737	}
738	}
739	return false;
740	}
741
742	static bool tryToShortenBegin(Instruction *DeadI,
743	OverlapIntervalsTy &IntervalMap,
744	int64_t &DeadStart, uint64_t &DeadSize) {
745	if (IntervalMap.empty() \|\| !isShortenableAtTheBeginning(I: DeadI))
746	return false;
747
748	OverlapIntervalsTy::iterator OII = IntervalMap.begin();
749	int64_t KillingStart = OII ->second;
750	uint64_t KillingSize = OII ->first - KillingStart;
751
752	assert(OII->first - KillingStart >= `0` && "Size expected to be positive");
753
754	if (KillingStart <= DeadStart &&
755	// Note: "DeadStart - KillingStart" is known to be non negative due to
756	// preceding check.
757	KillingSize > (uint64_t)(DeadStart - KillingStart)) {
758	// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
759	// be positive due to preceding checks.
760	assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
761	"Should have been handled as OW_Complete");
762	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
763	IsOverwriteEnd: false)) {
764	IntervalMap.erase(position: OII);
765	return true;
766	}
767	}
768	return false;
769	}
770
771	static Constant *
772	tryToMergePartialOverlappingStores(StoreInst KillingI, StoreInst DeadI,
773	int64_t KillingOffset, int64_t DeadOffset,
774	const DataLayout &DL, BatchAAResults &AA,
775	DominatorTree *DT) {
776
777	if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
778	DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
779	KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
780	DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
781	memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
782	// If the store we find is:
783	// a) partially overwritten by the store to 'Loc'
784	// b) the killing store is fully contained in the dead one and
785	// c) they both have a constant value
786	// d) none of the two stores need padding
787	// Merge the two stores, replacing the dead store's value with a
788	// merge of both values.
789	// TODO: Deal with other constant types (vectors, etc), and probably
790	// some mem intrinsics (if needed)
791
792	APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
793	APInt KillingValue =
794	cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
795	unsigned KillingBits = KillingValue.getBitWidth();
796	assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
797	KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
798
799	// Offset of the smaller store inside the larger store
800	unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * `8`;
801	unsigned LShiftAmount =
802	DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
803	: BitOffsetDiff;
804	APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
805	hiBit: LShiftAmount + KillingBits);
806	// Clear the bits we'll be replacing, then OR with the smaller
807	// store, shifted appropriately.
808	APInt Merged = (DeadValue & ~Mask) \| (KillingValue << LShiftAmount);
809	LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
810	<< "\n Killing: " << *KillingI
811	<< "\n Merged Value: " << Merged << `'\n'`);
812	return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
813	}
814	return nullptr;
815	}
816
817	namespace {
818	// Returns true if \p I is an intrinsic that does not read or write memory.
819	bool isNoopIntrinsic(Instruction *I) {
820	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
821	switch (II->getIntrinsicID()) {
822	case Intrinsic::lifetime_start:
823	case Intrinsic::lifetime_end:
824	case Intrinsic::invariant_end:
825	case Intrinsic::launder_invariant_group:
826	case Intrinsic::assume:
827	return true;
828	case Intrinsic::dbg_declare:
829	case Intrinsic::dbg_label:
830	case Intrinsic::dbg_value:
831	llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
832	default:
833	return false;
834	}
835	}
836	return false;
837	}
838
839	// Check if we can ignore \p D for DSE.
840	bool canSkipDef(MemoryDef D, bool* DefVisibleToCaller) {
841	Instruction *DI = D->getMemoryInst();
842	// Calls that only access inaccessible memory cannot read or write any memory
843	// locations we consider for elimination.
844	if (auto *CB = dyn_cast<CallBase>(Val: DI))
845	if (CB->onlyAccessesInaccessibleMemory())
846	return true;
847
848	// We can eliminate stores to locations not visible to the caller across
849	// throwing instructions.
850	if (DI->mayThrow() && !DefVisibleToCaller)
851	return true;
852
853	// We can remove the dead stores, irrespective of the fence and its ordering
854	// (release/acquire/seq_cst). Fences only constraints the ordering of
855	// already visible stores, it does not make a store visible to other
856	// threads. So, skipping over a fence does not change a store from being
857	// dead.
858	if (isa<FenceInst>(Val: DI))
859	return true;
860
861	// Skip intrinsics that do not really read or modify memory.
862	if (isNoopIntrinsic(I: DI))
863	return true;
864
865	return false;
866	}
867
868	// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
869	// defined by `MemDef`.
870	struct MemoryLocationWrapper {
871	MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
872	bool DefByInitializesAttr)
873	: MemLoc (MemLoc), MemDef(MemDef),
874	DefByInitializesAttr(DefByInitializesAttr) {
875	assert(MemLoc.Ptr && "MemLoc should be not null");
876	UnderlyingObject = getUnderlyingObject(V: MemLoc.Ptr);
877	DefInst = MemDef->getMemoryInst();
878	}
879
880	MemoryLocation MemLoc;
881	const Value *UnderlyingObject;
882	MemoryDef *MemDef;
883	Instruction *DefInst;
884	bool DefByInitializesAttr = false;
885	};
886
887	// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
888	// defined by this MemoryDef.
889	struct MemoryDefWrapper {
890	MemoryDefWrapper(MemoryDef *MemDef,
891	ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
892	DefInst = MemDef->getMemoryInst();
893	for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
894	DefinedLocations.push_back(
895	Elt: MemoryLocationWrapper (MemLoc, MemDef, DefByInitializesAttr));
896	}
897	Instruction *DefInst;
898	SmallVector<MemoryLocationWrapper, `1`> DefinedLocations;
899	};
900
901	bool hasInitializesAttr(Instruction *I) {
902	CallBase *CB = dyn_cast<CallBase>(Val: I);
903	return CB && CB->getArgOperandWithAttribute(Kind: Attribute::Initializes);
904	}
905
906	struct ArgumentInitInfo {
907	unsigned Idx;
908	bool IsDeadOrInvisibleOnUnwind;
909	ConstantRangeList Inits;
910	};
911
912	// Return the intersected range list of the initializes attributes of "Args".
913	// "Args" are call arguments that alias to each other.
914	// If any argument in "Args" doesn't have dead_on_unwind attr and
915	// "CallHasNoUnwindAttr" is false, return empty.
916	ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
917	bool CallHasNoUnwindAttr) {
918	if (Args.empty())
919	return {};
920
921	// To address unwind, the function should have nounwind attribute or the
922	// arguments have dead or invisible on unwind. Otherwise, return empty.
923	for (const auto &Arg : Args) {
924	if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
925	return {};
926	if (Arg.Inits.empty())
927	return {};
928	}
929
930	ConstantRangeList IntersectedIntervals = Args.front().Inits;
931	for (auto &Arg : Args.drop_front())
932	IntersectedIntervals = IntersectedIntervals.intersectWith(CRL: Arg.Inits);
933
934	return IntersectedIntervals;
935	}
936
937	struct DSEState {
938	Function &F;
939	AliasAnalysis &AA;
940	EarliestEscapeAnalysis EA;
941
942	/// The single BatchAA instance that is used to cache AA queries. It will
943	/// not be invalidated over the whole run. This is safe, because:
944	/// 1. Only memory writes are removed, so the alias cache for memory
945	/// locations remains valid.
946	/// 2. No new instructions are added (only instructions removed), so cached
947	/// information for a deleted value cannot be accessed by a re-used new
948	/// value pointer.
949	BatchAAResults BatchAA;
950
951	MemorySSA &MSSA;
952	DominatorTree &DT;
953	PostDominatorTree &PDT;
954	const TargetLibraryInfo &TLI;
955	const DataLayout &DL;
956	const LoopInfo &LI;
957
958	// Whether the function contains any irreducible control flow, useful for
959	// being accurately able to detect loops.
960	bool ContainsIrreducibleLoops;
961
962	// All MemoryDefs that potentially could kill other MemDefs.
963	SmallVector<MemoryDef *, `64`> MemDefs;
964	// Any that should be skipped as they are already deleted
965	SmallPtrSet<MemoryAccess *, `4`> SkipStores;
966	// Keep track whether a given object is captured before return or not.
967	DenseMap<const Value , bool*> CapturedBeforeReturn;
968	// Keep track of all of the objects that are invisible to the caller after
969	// the function returns.
970	DenseMap<const Value , bool*> InvisibleToCallerAfterRet;
971	// Keep track of blocks with throwing instructions not modeled in MemorySSA.
972	SmallPtrSet<BasicBlock *, `16`> ThrowingBlocks;
973	// Post-order numbers for each basic block. Used to figure out if memory
974	// accesses are executed before another access.
975	DenseMap<BasicBlock , unsigned*> PostOrderNumbers;
976
977	/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
978	/// basic block.
979	MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
980	// Check if there are root nodes that are terminated by UnreachableInst.
981	// Those roots pessimize post-dominance queries. If there are such roots,
982	// fall back to CFG scan starting from all non-unreachable roots.
983	bool AnyUnreachableExit;
984
985	// Whether or not we should iterate on removing dead stores at the end of the
986	// function due to removing a store causing a previously captured pointer to
987	// no longer be captured.
988	bool ShouldIterateEndOfFunctionDSE;
989
990	/// Dead instructions to be removed at the end of DSE.
991	SmallVector<Instruction *> ToRemove;
992
993	// Class contains self-reference, make sure it's not copied/moved.
994	DSEState(const DSEState &) = delete;
995	DSEState &operator=(const DSEState &) = delete;
996
997	DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
998	PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
999	const LoopInfo &LI)
1000	: F(F), AA(AA), EA (DT, &LI), BatchAA (AA, &EA), MSSA(MSSA), DT(DT),
1001	PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1002	// Collect blocks with throwing instructions not modeled in MemorySSA and
1003	// alloc-like objects.
1004	unsigned PO = `0`;
1005	for (BasicBlock *BB : post_order(G: &F)) {
1006	PostOrderNumbers [BB] = PO++;
1007	for (Instruction &I : *BB) {
1008	MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
1009	if (I.mayThrow() && !MA)
1010	ThrowingBlocks.insert(Ptr: I.getParent());
1011
1012	auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
1013	if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1014	(getLocForWrite(I: &I) \|\| isMemTerminatorInst(I: &I) \|\|
1015	(EnableInitializesImprovement && hasInitializesAttr(I: &I))))
1016	MemDefs.push_back(Elt: MD);
1017	}
1018	}
1019
1020	// Treat byval, inalloca or dead on return arguments the same as Allocas,
1021	// stores to them are dead at the end of the function.
1022	for (Argument &AI : F.args())
1023	if (AI.hasPassPointeeByValueCopyAttr() \|\| AI.hasDeadOnReturnAttr())
1024	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1025
1026	// Collect whether there is any irreducible control flow in the function.
1027	ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
1028
1029	AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
1030	return isa<UnreachableInst>(Val: E->getTerminator());
1031	});
1032	}
1033
1034	static void pushMemUses(MemoryAccess *Acc,
1035	SmallVectorImpl<MemoryAccess *> &WorkList,
1036	SmallPtrSetImpl<MemoryAccess *> &Visited) {
1037	for (Use &U : Acc->uses()) {
1038	auto *MA = cast<MemoryAccess>(Val: U.getUser());
1039	if (Visited.insert(Ptr: MA).second)
1040	WorkList.push_back(Elt: MA);
1041	}
1042	};
1043
1044	LocationSize strengthenLocationSize(const Instruction *I,
1045	LocationSize Size) const {
1046	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1047	LibFunc F;
1048	if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
1049	(F == LibFunc_memset_chk \|\| F == LibFunc_memcpy_chk)) {
1050	// Use the precise location size specified by the 3rd argument
1051	// for determining KillingI overwrites DeadLoc if it is a memset_chk
1052	// instruction. memset_chk will write either the amount specified as 3rd
1053	// argument or the function will immediately abort and exit the program.
1054	// NOTE: AA may determine NoAlias if it can prove that the access size
1055	// is larger than the allocation size due to that being UB. To avoid
1056	// returning potentially invalid NoAlias results by AA, limit the use of
1057	// the precise location size to isOverwrite.
1058	if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: `2`)))
1059	return LocationSize::precise(Value: Len->getZExtValue());
1060	}
1061	}
1062	return Size;
1063	}
1064
1065	/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1066	/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1067	/// location (by \p DeadI instruction).
1068	/// Return OW_MaybePartial if \p KillingI does not completely overwrite
1069	/// \p DeadI, but they both write to the same underlying object. In that
1070	/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1071	/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1072	/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1073	OverwriteResult isOverwrite(const Instruction *KillingI,
1074	const Instruction *DeadI,
1075	const MemoryLocation &KillingLoc,
1076	const MemoryLocation &DeadLoc,
1077	int64_t &KillingOff, int64_t &DeadOff) {
1078	// AliasAnalysis does not always account for loops. Limit overwrite checks
1079	// to dependencies for which we can guarantee they are independent of any
1080	// loops they are in.
1081	if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
1082	return OW_Unknown;
1083
1084	LocationSize KillingLocSize =
1085	strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
1086	const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1087	const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1088	const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
1089	const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
1090
1091	// Check whether the killing store overwrites the whole object, in which
1092	// case the size/offset of the dead store does not matter.
1093	if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1094	isIdentifiedObject(V: KillingUndObj)) {
1095	std::optional<TypeSize> KillingUndObjSize =
1096	getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
1097	if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1098	return OW_Complete;
1099	}
1100
1101	// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1102	// get imprecise values here, though (except for unknown sizes).
1103	if (!KillingLocSize.isPrecise() \|\| !DeadLoc.Size.isPrecise()) {
1104	// In case no constant size is known, try to an IR values for the number
1105	// of bytes written and check if they match.
1106	const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
1107	const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
1108	if (KillingMemI && DeadMemI) {
1109	const Value *KillingV = KillingMemI->getLength();
1110	const Value *DeadV = DeadMemI->getLength();
1111	if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
1112	return OW_Complete;
1113	}
1114
1115	// Masked stores have imprecise locations, but we can reason about them
1116	// to some extent.
1117	return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
1118	}
1119
1120	const TypeSize KillingSize = KillingLocSize.getValue();
1121	const TypeSize DeadSize = DeadLoc.Size.getValue();
1122	// Bail on doing Size comparison which depends on AA for now
1123	// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1124	const bool AnyScalable =
1125	DeadSize.isScalable() \|\| KillingLocSize.isScalable();
1126
1127	if (AnyScalable)
1128	return OW_Unknown;
1129	// Query the alias information
1130	AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1131
1132	// If the start pointers are the same, we just have to compare sizes to see if
1133	// the killing store was larger than the dead store.
1134	if (AAR == AliasResult::MustAlias) {
1135	// Make sure that the KillingSize size is >= the DeadSize size.
1136	if (KillingSize >= DeadSize)
1137	return OW_Complete;
1138	}
1139
1140	// If we hit a partial alias we may have a full overwrite
1141	if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1142	int32_t Off = AAR.getOffset();
1143	if (Off >= `0` && (uint64_t)Off + DeadSize <= KillingSize)
1144	return OW_Complete;
1145	}
1146
1147	// If we can't resolve the same pointers to the same object, then we can't
1148	// analyze them at all.
1149	if (DeadUndObj != KillingUndObj) {
1150	// Non aliasing stores to different objects don't overlap. Note that
1151	// if the killing store is known to overwrite whole object (out of
1152	// bounds access overwrites whole object as well) then it is assumed to
1153	// completely overwrite any store to the same object even if they don't
1154	// actually alias (see next check).
1155	if (AAR == AliasResult::NoAlias)
1156	return OW_None;
1157	return OW_Unknown;
1158	}
1159
1160	// Okay, we have stores to two completely different pointers. Try to
1161	// decompose the pointer into a "base + constant_offset" form. If the base
1162	// pointers are equal, then we can reason about the two stores.
1163	DeadOff = `0`;
1164	KillingOff = `0`;
1165	const Value *DeadBasePtr =
1166	GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1167	const Value *KillingBasePtr =
1168	GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1169
1170	// If the base pointers still differ, we have two completely different
1171	// stores.
1172	if (DeadBasePtr != KillingBasePtr)
1173	return OW_Unknown;
1174
1175	// The killing access completely overlaps the dead store if and only if
1176	// both start and end of the dead one is "inside" the killing one:
1177	// \|<->\|--dead--\|<->\|
1178	// \|-----killing------\|
1179	// Accesses may overlap if and only if start of one of them is "inside"
1180	// another one:
1181	// \|<->\|--dead--\|<-------->\|
1182	// \|-------killing--------\|
1183	// OR
1184	// \|-------dead-------\|
1185	// \|<->\|---killing---\|<----->\|
1186	//
1187	// We have to be careful here as Off is signed while .Size is unsigned.
1188
1189	// Check if the dead access starts "not before" the killing one.
1190	if (DeadOff >= KillingOff) {
1191	// If the dead access ends "not after" the killing access then the
1192	// dead one is completely overwritten by the killing one.
1193	if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1194	return OW_Complete;
1195	// If start of the dead access is "before" end of the killing access
1196	// then accesses overlap.
1197	else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1198	return OW_MaybePartial;
1199	}
1200	// If start of the killing access is "before" end of the dead access then
1201	// accesses overlap.
1202	else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1203	return OW_MaybePartial;
1204	}
1205
1206	// Can reach here only if accesses are known not to overlap.
1207	return OW_None;
1208	}
1209
1210	bool isInvisibleToCallerAfterRet(const Value *V) {
1211	if (isa<AllocaInst>(Val: V))
1212	return true;
1213
1214	auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1215	if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1216	I.first ->second = capturesNothing(CC: PointerMayBeCaptured(
1217	V, /ReturnCaptures=/true, Mask: CaptureComponents::Provenance));
1218	return I.first ->second;
1219	}
1220
1221	bool isInvisibleToCallerOnUnwind(const Value *V) {
1222	bool RequiresNoCaptureBeforeUnwind;
1223	if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1224	return false;
1225	if (!RequiresNoCaptureBeforeUnwind)
1226	return true;
1227
1228	auto I = CapturedBeforeReturn.insert(KV: {V, true});
1229	if (I.second)
1230	// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1231	// with the killing MemoryDef. But we refrain from doing so for now to
1232	// limit compile-time and this does not cause any changes to the number
1233	// of stores removed on a large test set in practice.
1234	I.first ->second = capturesAnything(CC: PointerMayBeCaptured(
1235	V, /ReturnCaptures=/false, Mask: CaptureComponents::Provenance));
1236	return !I.first ->second;
1237	}
1238
1239	std::optional<MemoryLocation> getLocForWrite(Instruction I) const* {
1240	if (!I->mayWriteToMemory())
1241	return std::nullopt;
1242
1243	if (auto *CB = dyn_cast<CallBase>(Val: I))
1244	return MemoryLocation::getForDest(CI: CB, TLI);
1245
1246	return MemoryLocation::getOrNone(Inst: I);
1247	}
1248
1249	// Returns a list of <MemoryLocation, bool> pairs written by I.
1250	// The bool means whether the write is from Initializes attr.
1251	SmallVector<std::pair<MemoryLocation, bool>, `1`>
1252	getLocForInst(Instruction I, bool* ConsiderInitializesAttr) {
1253	SmallVector<std::pair<MemoryLocation, bool>, `1`> Locations;
1254	if (isMemTerminatorInst(I)) {
1255	if (auto Loc = getLocForTerminator(I))
1256	Locations.push_back(Elt: std::make_pair(x&: Loc ->first, y: false));
1257	return Locations;
1258	}
1259
1260	if (auto Loc = getLocForWrite(I))
1261	Locations.push_back(Elt: std::make_pair(x&: Loc, y: false*));
1262
1263	if (ConsiderInitializesAttr) {
1264	for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1265	Locations.push_back(Elt: std::make_pair(x&: MemLoc, y: true));
1266	}
1267	}
1268	return Locations;
1269	}
1270
1271	/// Assuming this instruction has a dead analyzable write, can we delete
1272	/// this instruction?
1273	bool isRemovable(Instruction *I) {
1274	assert(getLocForWrite(I) && "Must have analyzable write");
1275
1276	// Don't remove volatile/atomic stores.
1277	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1278	return SI->isUnordered();
1279
1280	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1281	// Don't remove volatile memory intrinsics.
1282	if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1283	return !MI->isVolatile();
1284
1285	// Never remove dead lifetime intrinsics, e.g. because they are followed
1286	// by a free.
1287	if (CB->isLifetimeStartOrEnd())
1288	return false;
1289
1290	return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1291	!CB->isTerminator();
1292	}
1293
1294	return false;
1295	}
1296
1297	/// Returns true if \p UseInst completely overwrites \p DefLoc
1298	/// (stored by \p DefInst).
1299	bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1300	Instruction *UseInst) {
1301	// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1302	// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1303	// MemoryDef.
1304	if (!UseInst->mayWriteToMemory())
1305	return false;
1306
1307	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1308	if (CB->onlyAccessesInaccessibleMemory())
1309	return false;
1310
1311	int64_t InstWriteOffset, DepWriteOffset;
1312	if (auto CC = getLocForWrite(I: UseInst))
1313	return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1314	DeadOff&: DepWriteOffset) == OW_Complete;
1315	return false;
1316	}
1317
1318	/// Returns true if \p Def is not read before returning from the function.
1319	bool isWriteAtEndOfFunction(MemoryDef Def, const* MemoryLocation &DefLoc) {
1320	LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1321	<< *Def->getMemoryInst()
1322	<< ") is at the end the function \n");
1323	SmallVector<MemoryAccess *, `4`> WorkList;
1324	SmallPtrSet<MemoryAccess *, `8`> Visited;
1325
1326	pushMemUses(Acc: Def, WorkList, Visited);
1327	for (unsigned I = `0`; I < WorkList.size(); I++) {
1328	if (WorkList.size() >= MemorySSAScanLimit) {
1329	LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1330	return false;
1331	}
1332
1333	MemoryAccess *UseAccess = WorkList [I];
1334	if (isa<MemoryPhi>(Val: UseAccess)) {
1335	// AliasAnalysis does not account for loops. Limit elimination to
1336	// candidates for which we can guarantee they always store to the same
1337	// memory location.
1338	if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1339	return false;
1340
1341	pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1342	continue;
1343	}
1344	// TODO: Checking for aliasing is expensive. Consider reducing the amount
1345	// of times this is called and/or caching it.
1346	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1347	if (isReadClobber(DefLoc, UseInst)) {
1348	LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1349	return false;
1350	}
1351
1352	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1353	pushMemUses(Acc: UseDef, WorkList, Visited);
1354	}
1355	return true;
1356	}
1357
1358	/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1359	/// pair with the MemoryLocation terminated by \p I and a boolean flag
1360	/// indicating whether \p I is a free-like call.
1361	std::optional<std::pair<MemoryLocation, bool>>
1362	getLocForTerminator(Instruction I) const* {
1363	uint64_t Len;
1364	Value *Ptr;
1365	if (match(V: I, P: m_Intrinsic<Intrinsic::lifetime_end>(Op0: m_ConstantInt(V&: Len),
1366	Op1: m_Value(V&: Ptr))))
1367	return {std::make_pair(x: MemoryLocation (Ptr, Len), y: false)};
1368
1369	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1370	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1371	return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1372	}
1373
1374	return std::nullopt;
1375	}
1376
1377	/// Returns true if \p I is a memory terminator instruction like
1378	/// llvm.lifetime.end or free.
1379	bool isMemTerminatorInst(Instruction I) const* {
1380	auto *CB = dyn_cast<CallBase>(Val: I);
1381	return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end \|\|
1382	getFreedOperand(CB, TLI: &TLI) != nullptr);
1383	}
1384
1385	/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1386	/// instruction \p AccessI.
1387	bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1388	Instruction *MaybeTerm) {
1389	std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1390	getLocForTerminator(I: MaybeTerm);
1391
1392	if (!MaybeTermLoc)
1393	return false;
1394
1395	// If the terminator is a free-like call, all accesses to the underlying
1396	// object can be considered terminated.
1397	if (getUnderlyingObject(V: Loc.Ptr) !=
1398	getUnderlyingObject(V: MaybeTermLoc ->first.Ptr))
1399	return false;
1400
1401	auto TermLoc = MaybeTermLoc ->first;
1402	if (MaybeTermLoc ->second) {
1403	const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1404	return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1405	}
1406	int64_t InstWriteOffset = `0`;
1407	int64_t DepWriteOffset = `0`;
1408	return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1409	DeadOff&: DepWriteOffset) == OW_Complete;
1410	}
1411
1412	// Returns true if \p Use may read from \p DefLoc.
1413	bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1414	if (isNoopIntrinsic(I: UseInst))
1415	return false;
1416
1417	// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1418	// treated as read clobber.
1419	if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1420	return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1421
1422	if (!UseInst->mayReadFromMemory())
1423	return false;
1424
1425	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1426	if (CB->onlyAccessesInaccessibleMemory())
1427	return false;
1428
1429	return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1430	}
1431
1432	/// Returns true if a dependency between \p Current and \p KillingDef is
1433	/// guaranteed to be loop invariant for the loops that they are in. Either
1434	/// because they are known to be in the same block, in the same loop level or
1435	/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1436	/// during execution of the containing function.
1437	bool isGuaranteedLoopIndependent(const Instruction *Current,
1438	const Instruction *KillingDef,
1439	const MemoryLocation &CurrentLoc) {
1440	// If the dependency is within the same block or loop level (being careful
1441	// of irreducible loops), we know that AA will return a valid result for the
1442	// memory dependency. (Both at the function level, outside of any loop,
1443	// would also be valid but we currently disable that to limit compile time).
1444	if (Current->getParent() == KillingDef->getParent())
1445	return true;
1446	const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1447	if (!ContainsIrreducibleLoops && CurrentLI &&
1448	CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1449	return true;
1450	// Otherwise check the memory location is invariant to any loops.
1451	return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1452	}
1453
1454	/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1455	/// loop. In particular, this guarantees that it only references a single
1456	/// MemoryLocation during execution of the containing function.
1457	bool isGuaranteedLoopInvariant(const Value *Ptr) {
1458	Ptr = Ptr->stripPointerCasts();
1459	if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1460	if (GEP->hasAllConstantIndices())
1461	Ptr = GEP->getPointerOperand()->stripPointerCasts();
1462
1463	if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1464	return I->getParent()->isEntryBlock() \|\|
1465	(!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1466	}
1467	return true;
1468	}
1469
1470	// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1471	// with no read access between them or on any other path to a function exit
1472	// block if \p KillingLoc is not accessible after the function returns. If
1473	// there is no such MemoryDef, return std::nullopt. The returned value may not
1474	// (completely) overwrite \p KillingLoc. Currently we bail out when we
1475	// encounter an aliasing MemoryUse (read).
1476	std::optional<MemoryAccess *>
1477	getDomMemoryDef(MemoryDef KillingDef, MemoryAccess StartAccess,
1478	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1479	unsigned &ScanLimit, unsigned &WalkerStepLimit,
1480	bool IsMemTerm, unsigned &PartialLimit,
1481	bool IsInitializesAttrMemLoc) {
1482	if (ScanLimit == `0` \|\| WalkerStepLimit == `0`) {
1483	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1484	return std::nullopt;
1485	}
1486
1487	MemoryAccess *Current = StartAccess;
1488	Instruction *KillingI = KillingDef->getMemoryInst();
1489	LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1490
1491	// Only optimize defining access of KillingDef when directly starting at its
1492	// defining access. The defining access also must only access KillingLoc. At
1493	// the moment we only support instructions with a single write location, so
1494	// it should be sufficient to disable optimizations for instructions that
1495	// also read from memory.
1496	bool CanOptimize = OptimizeMemorySSA &&
1497	KillingDef->getDefiningAccess() == StartAccess &&
1498	!KillingI->mayReadFromMemory();
1499
1500	// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1501	std::optional<MemoryLocation> CurrentLoc;
1502	for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1503	LLVM_DEBUG({
1504	dbgs() << " visiting " << *Current;
1505	if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1506	dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1507	<< ")";
1508	dbgs() << "\n";
1509	});
1510
1511	// Reached TOP.
1512	if (MSSA.isLiveOnEntryDef(MA: Current)) {
1513	LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1514	if (CanOptimize && Current != KillingDef->getDefiningAccess())
1515	// The first clobbering def is... none.
1516	KillingDef->setOptimized(Current);
1517	return std::nullopt;
1518	}
1519
1520	// Cost of a step. Accesses in the same block are more likely to be valid
1521	// candidates for elimination, hence consider them cheaper.
1522	unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1523	? MemorySSASameBBStepCost
1524	: MemorySSAOtherBBStepCost;
1525	if (WalkerStepLimit <= StepCost) {
1526	LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1527	return std::nullopt;
1528	}
1529	WalkerStepLimit -= StepCost;
1530
1531	// Return for MemoryPhis. They cannot be eliminated directly and the
1532	// caller is responsible for traversing them.
1533	if (isa<MemoryPhi>(Val: Current)) {
1534	LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1535	return Current;
1536	}
1537
1538	// Below, check if CurrentDef is a valid candidate to be eliminated by
1539	// KillingDef. If it is not, check the next candidate.
1540	MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1541	Instruction *CurrentI = CurrentDef->getMemoryInst();
1542
1543	if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1544	CanOptimize = false;
1545	continue;
1546	}
1547
1548	// Before we try to remove anything, check for any extra throwing
1549	// instructions that block us from DSEing
1550	if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1551	LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1552	return std::nullopt;
1553	}
1554
1555	// Check for anything that looks like it will be a barrier to further
1556	// removal
1557	if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1558	LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1559	return std::nullopt;
1560	}
1561
1562	// If Current is known to be on path that reads DefLoc or is a read
1563	// clobber, bail out, as the path is not profitable. We skip this check
1564	// for intrinsic calls, because the code knows how to handle memcpy
1565	// intrinsics.
1566	if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1567	return std::nullopt;
1568
1569	// Quick check if there are direct uses that are read-clobbers.
1570	if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1571	if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1572	return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1573	isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1574	return false;
1575	})) {
1576	LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1577	return std::nullopt;
1578	}
1579
1580	// If Current does not have an analyzable write location or is not
1581	// removable, skip it.
1582	CurrentLoc = getLocForWrite(I: CurrentI);
1583	if (!CurrentLoc \|\| !isRemovable(I: CurrentI)) {
1584	CanOptimize = false;
1585	continue;
1586	}
1587
1588	// AliasAnalysis does not account for loops. Limit elimination to
1589	// candidates for which we can guarantee they always store to the same
1590	// memory location and not located in different loops.
1591	if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1592	LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1593	CanOptimize = false;
1594	continue;
1595	}
1596
1597	if (IsMemTerm) {
1598	// If the killing def is a memory terminator (e.g. lifetime.end), check
1599	// the next candidate if the current Current does not write the same
1600	// underlying object as the terminator.
1601	if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1602	CanOptimize = false;
1603	continue;
1604	}
1605	} else {
1606	int64_t KillingOffset = `0`;
1607	int64_t DeadOffset = `0`;
1608	auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1609	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1610	if (CanOptimize) {
1611	// CurrentDef is the earliest write clobber of KillingDef. Use it as
1612	// optimized access. Do not optimize if CurrentDef is already the
1613	// defining access of KillingDef.
1614	if (CurrentDef != KillingDef->getDefiningAccess() &&
1615	(OR == OW_Complete \|\| OR == OW_MaybePartial))
1616	KillingDef->setOptimized(CurrentDef);
1617
1618	// Once a may-aliasing def is encountered do not set an optimized
1619	// access.
1620	if (OR != OW_None)
1621	CanOptimize = false;
1622	}
1623
1624	// If Current does not write to the same object as KillingDef, check
1625	// the next candidate.
1626	if (OR == OW_Unknown \|\| OR == OW_None)
1627	continue;
1628	else if (OR == OW_MaybePartial) {
1629	// If KillingDef only partially overwrites Current, check the next
1630	// candidate if the partial step limit is exceeded. This aggressively
1631	// limits the number of candidates for partial store elimination,
1632	// which are less likely to be removable in the end.
1633	if (PartialLimit <= `1`) {
1634	WalkerStepLimit -= `1`;
1635	LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1636	continue;
1637	}
1638	PartialLimit -= `1`;
1639	}
1640	}
1641	break;
1642	};
1643
1644	// Accesses to objects accessible after the function returns can only be
1645	// eliminated if the access is dead along all paths to the exit. Collect
1646	// the blocks with killing (=completely overwriting MemoryDefs) and check if
1647	// they cover all paths from MaybeDeadAccess to any function exit.
1648	SmallPtrSet<Instruction *, `16`> KillingDefs;
1649	KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1650	MemoryAccess *MaybeDeadAccess = Current;
1651	MemoryLocation MaybeDeadLoc = *CurrentLoc;
1652	Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1653	LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1654	<< *MaybeDeadI << ")\n");
1655
1656	SmallVector<MemoryAccess *, `32`> WorkList;
1657	SmallPtrSet<MemoryAccess *, `32`> Visited;
1658	pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1659
1660	// Check if DeadDef may be read.
1661	for (unsigned I = `0`; I < WorkList.size(); I++) {
1662	MemoryAccess *UseAccess = WorkList [I];
1663
1664	LLVM_DEBUG(dbgs() << " " << *UseAccess);
1665	// Bail out if the number of accesses to check exceeds the scan limit.
1666	if (ScanLimit < (WorkList.size() - I)) {
1667	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1668	return std::nullopt;
1669	}
1670	--ScanLimit;
1671	NumDomMemDefChecks ++;
1672
1673	if (isa<MemoryPhi>(Val: UseAccess)) {
1674	if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1675	return DT.properlyDominates(A: KI->getParent(),
1676	B: UseAccess->getBlock());
1677	})) {
1678	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1679	continue;
1680	}
1681	LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1682	pushMemUses(Acc: UseAccess, WorkList, Visited);
1683	continue;
1684	}
1685
1686	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1687	LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1688
1689	if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1690	return DT.dominates(Def: KI, User: UseInst);
1691	})) {
1692	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1693	continue;
1694	}
1695
1696	// A memory terminator kills all preceeding MemoryDefs and all succeeding
1697	// MemoryAccesses. We do not have to check it's users.
1698	if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1699	LLVM_DEBUG(
1700	dbgs()
1701	<< " ... skipping, memterminator invalidates following accesses\n");
1702	continue;
1703	}
1704
1705	if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1706	LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1707	pushMemUses(Acc: UseAccess, WorkList, Visited);
1708	continue;
1709	}
1710
1711	if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1712	LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1713	return std::nullopt;
1714	}
1715
1716	// Uses which may read the original MemoryDef mean we cannot eliminate the
1717	// original MD. Stop walk.
1718	// If KillingDef is a CallInst with "initializes" attribute, the reads in
1719	// the callee would be dominated by initializations, so it should be safe.
1720	bool IsKillingDefFromInitAttr = false;
1721	if (IsInitializesAttrMemLoc) {
1722	if (KillingI == UseInst &&
1723	KillingUndObj == getUnderlyingObject(V: MaybeDeadLoc.Ptr))
1724	IsKillingDefFromInitAttr = true;
1725	}
1726
1727	if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1728	LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1729	return std::nullopt;
1730	}
1731
1732	// If this worklist walks back to the original memory access (and the
1733	// pointer is not guarenteed loop invariant) then we cannot assume that a
1734	// store kills itself.
1735	if (MaybeDeadAccess == UseAccess &&
1736	!isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1737	LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1738	return std::nullopt;
1739	}
1740	// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1741	// if it reads the memory location.
1742	// TODO: It would probably be better to check for self-reads before
1743	// calling the function.
1744	if (KillingDef == UseAccess \|\| MaybeDeadAccess == UseAccess) {
1745	LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1746	continue;
1747	}
1748
1749	// Check all uses for MemoryDefs, except for defs completely overwriting
1750	// the original location. Otherwise we have to check uses of all
1751	// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1752	// miss cases like the following
1753	// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1754	// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1755	// Use(2) ; MayAlias 2 and* 1, loads [0, 3].*
1756	// (The Use points to the first* Def it may alias)*
1757	// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1758	// stores [0,1]
1759	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1760	if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1761	BasicBlock *MaybeKillingBlock = UseInst->getParent();
1762	if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1763	PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1764	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1765	LLVM_DEBUG(dbgs()
1766	<< " ... found killing def " << *UseInst << "\n");
1767	KillingDefs.insert(Ptr: UseInst);
1768	}
1769	} else {
1770	LLVM_DEBUG(dbgs()
1771	<< " ... found preceeding def " << *UseInst << "\n");
1772	return std::nullopt;
1773	}
1774	} else
1775	pushMemUses(Acc: UseDef, WorkList, Visited);
1776	}
1777	}
1778
1779	// For accesses to locations visible after the function returns, make sure
1780	// that the location is dead (=overwritten) along all paths from
1781	// MaybeDeadAccess to the exit.
1782	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1783	SmallPtrSet<BasicBlock *, `16`> KillingBlocks;
1784	for (Instruction *KD : KillingDefs)
1785	KillingBlocks.insert(Ptr: KD->getParent());
1786	assert(!KillingBlocks.empty() &&
1787	"Expected at least a single killing block");
1788
1789	// Find the common post-dominator of all killing blocks.
1790	BasicBlock CommonPred = KillingBlocks.begin();
1791	for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1792	if (!CommonPred)
1793	break;
1794	CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1795	}
1796
1797	// If the common post-dominator does not post-dominate MaybeDeadAccess,
1798	// there is a path from MaybeDeadAccess to an exit not going through a
1799	// killing block.
1800	if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1801	if (!AnyUnreachableExit)
1802	return std::nullopt;
1803
1804	// Fall back to CFG scan starting at all non-unreachable roots if not
1805	// all paths to the exit go through CommonPred.
1806	CommonPred = nullptr;
1807	}
1808
1809	// If CommonPred itself is in the set of killing blocks, we're done.
1810	if (KillingBlocks.count(Ptr: CommonPred))
1811	return {MaybeDeadAccess};
1812
1813	SetVector<BasicBlock *> WorkList;
1814	// If CommonPred is null, there are multiple exits from the function.
1815	// They all have to be added to the worklist.
1816	if (CommonPred)
1817	WorkList.insert(X: CommonPred);
1818	else
1819	for (BasicBlock *R : PDT.roots()) {
1820	if (!isa<UnreachableInst>(Val: R->getTerminator()))
1821	WorkList.insert(X: R);
1822	}
1823
1824	NumCFGTries ++;
1825	// Check if all paths starting from an exit node go through one of the
1826	// killing blocks before reaching MaybeDeadAccess.
1827	for (unsigned I = `0`; I < WorkList.size(); I++) {
1828	NumCFGChecks ++;
1829	BasicBlock *Current = WorkList [I];
1830	if (KillingBlocks.count(Ptr: Current))
1831	continue;
1832	if (Current == MaybeDeadAccess->getBlock())
1833	return std::nullopt;
1834
1835	// MaybeDeadAccess is reachable from the entry, so we don't have to
1836	// explore unreachable blocks further.
1837	if (!DT.isReachableFromEntry(A: Current))
1838	continue;
1839
1840	WorkList.insert_range(R: predecessors(BB: Current));
1841
1842	if (WorkList.size() >= MemorySSAPathCheckLimit)
1843	return std::nullopt;
1844	}
1845	NumCFGSuccess ++;
1846	}
1847
1848	// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1849	// potentially dead.
1850	return {MaybeDeadAccess};
1851	}
1852
1853	/// Delete dead memory defs and recursively add their operands to ToRemove if
1854	/// they became dead.
1855	void
1856	deleteDeadInstruction(Instruction *SI,
1857	SmallPtrSetImpl<MemoryAccess > Deleted = nullptr) {
1858	MemorySSAUpdater Updater(&MSSA);
1859	SmallVector<Instruction *, `32`> NowDeadInsts;
1860	NowDeadInsts.push_back(Elt: SI);
1861	--NumFastOther;
1862
1863	while (!NowDeadInsts.empty()) {
1864	Instruction *DeadInst = NowDeadInsts.pop_back_val();
1865	++NumFastOther;
1866
1867	// Try to preserve debug information attached to the dead instruction.
1868	salvageDebugInfo(I&: *DeadInst);
1869	salvageKnowledge(I: DeadInst);
1870
1871	// Remove the Instruction from MSSA.
1872	MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
1873	bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
1874	if (MA) {
1875	if (IsMemDef) {
1876	auto *MD = cast<MemoryDef>(Val: MA);
1877	SkipStores.insert(Ptr: MD);
1878	if (Deleted)
1879	Deleted->insert(Ptr: MD);
1880	if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
1881	if (SI->getValueOperand()->getType()->isPointerTy()) {
1882	const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
1883	if (CapturedBeforeReturn.erase(Val: UO))
1884	ShouldIterateEndOfFunctionDSE = true;
1885	InvisibleToCallerAfterRet.erase(Val: UO);
1886	}
1887	}
1888	}
1889
1890	Updater.removeMemoryAccess(MA);
1891	}
1892
1893	auto I = IOLs.find(Key: DeadInst->getParent());
1894	if (I != IOLs.end())
1895	I->second.erase(Key: DeadInst);
1896	// Remove its operands
1897	for (Use &O : DeadInst->operands())
1898	if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
1899	O.set(PoisonValue::get(T: O ->getType()));
1900	if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
1901	NowDeadInsts.push_back(Elt: OpI);
1902	}
1903
1904	EA.removeInstruction(I: DeadInst);
1905	// Remove memory defs directly if they don't produce results, but only
1906	// queue other dead instructions for later removal. They may have been
1907	// used as memory locations that have been cached by BatchAA. Removing
1908	// them here may lead to newly created instructions to be allocated at the
1909	// same address, yielding stale cache entries.
1910	if (IsMemDef && DeadInst->getType()->isVoidTy())
1911	DeadInst->eraseFromParent();
1912	else
1913	ToRemove.push_back(Elt: DeadInst);
1914	}
1915	}
1916
1917	// Check for any extra throws between \p KillingI and \p DeadI that block
1918	// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1919	// MemoryDef that may throw are handled during the walk from one def to the
1920	// next.
1921	bool mayThrowBetween(Instruction KillingI, Instruction DeadI,
1922	const Value *KillingUndObj) {
1923	// First see if we can ignore it by using the fact that KillingI is an
1924	// alloca/alloca like object that is not visible to the caller during
1925	// execution of the function.
1926	if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
1927	return false;
1928
1929	if (KillingI->getParent() == DeadI->getParent())
1930	return ThrowingBlocks.count(Ptr: KillingI->getParent());
1931	return !ThrowingBlocks.empty();
1932	}
1933
1934	// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1935	// instructions act as barriers:
1936	// A memory instruction that may throw and \p KillingI accesses a non-stack*
1937	// object.
1938	// Atomic stores stronger that monotonic.*
1939	bool isDSEBarrier(const Value KillingUndObj, Instruction DeadI) {
1940	// If DeadI may throw it acts as a barrier, unless we are to an
1941	// alloca/alloca like object that does not escape.
1942	if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
1943	return true;
1944
1945	// If DeadI is an atomic load/store stronger than monotonic, do not try to
1946	// eliminate/reorder it.
1947	if (DeadI->isAtomic()) {
1948	if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
1949	return isStrongerThanMonotonic(AO: LI->getOrdering());
1950	if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
1951	return isStrongerThanMonotonic(AO: SI->getOrdering());
1952	if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
1953	return isStrongerThanMonotonic(AO: ARMW->getOrdering());
1954	if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
1955	return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) \|\|
1956	isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
1957	llvm_unreachable("other instructions should be skipped in MemorySSA");
1958	}
1959	return false;
1960	}
1961
1962	/// Eliminate writes to objects that are not visible in the caller and are not
1963	/// accessed before returning from the function.
1964	bool eliminateDeadWritesAtEndOfFunction() {
1965	bool MadeChange = false;
1966	LLVM_DEBUG(
1967	dbgs()
1968	<< "Trying to eliminate MemoryDefs at the end of the function\n");
1969	do {
1970	ShouldIterateEndOfFunctionDSE = false;
1971	for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
1972	if (SkipStores.contains(Ptr: Def))
1973	continue;
1974
1975	Instruction *DefI = Def->getMemoryInst();
1976	auto DefLoc = getLocForWrite(I: DefI);
1977	if (!DefLoc \|\| !isRemovable(I: DefI)) {
1978	LLVM_DEBUG(dbgs() << " ... could not get location for write or "
1979	"instruction not removable.\n");
1980	continue;
1981	}
1982
1983	// NOTE: Currently eliminating writes at the end of a function is
1984	// limited to MemoryDefs with a single underlying object, to save
1985	// compile-time. In practice it appears the case with multiple
1986	// underlying objects is very uncommon. If it turns out to be important,
1987	// we can use getUnderlyingObjects here instead.
1988	const Value *UO = getUnderlyingObject(V: DefLoc ->Ptr);
1989	if (!isInvisibleToCallerAfterRet(V: UO))
1990	continue;
1991
1992	if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
1993	// See through pointer-to-pointer bitcasts
1994	LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1995	"of the function\n");
1996	deleteDeadInstruction(SI: DefI);
1997	++NumFastStores;
1998	MadeChange = true;
1999	}
2000	}
2001	} while (ShouldIterateEndOfFunctionDSE);
2002	return MadeChange;
2003	}
2004
2005	/// If we have a zero initializing memset following a call to malloc,
2006	/// try folding it into a call to calloc.
2007	bool tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO) {
2008	Instruction *DefI = Def->getMemoryInst();
2009	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2010	if (!MemSet)
2011	// TODO: Could handle zero store to small allocation as well.
2012	return false;
2013	Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2014	if (!StoredConstant \|\| !StoredConstant->isNullValue())
2015	return false;
2016
2017	if (!isRemovable(I: DefI))
2018	// The memset might be volatile..
2019	return false;
2020
2021	if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
2022	F.hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
2023	F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) \|\|
2024	F.getName() == "calloc")
2025	return false;
2026	auto Malloc = const_cast<CallInst >(dyn_cast<CallInst>(Val: DefUO));
2027	if (!Malloc)
2028	return false;
2029	auto *InnerCallee = Malloc->getCalledFunction();
2030	if (!InnerCallee)
2031	return false;
2032	LibFunc Func = NotLibFunc;
2033	StringRef ZeroedVariantName;
2034	if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) \|\| !TLI.has(F: Func) \|\|
2035	Func != LibFunc_malloc) {
2036	Attribute Attr = Malloc->getFnAttr(Kind: "alloc-variant-zeroed");
2037	if (!Attr.isValid())
2038	return false;
2039	ZeroedVariantName = Attr.getValueAsString();
2040	if (ZeroedVariantName.empty())
2041	return false;
2042	}
2043
2044	// Gracefully handle malloc with unexpected memory attributes.
2045	auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
2046	if (!MallocDef)
2047	return false;
2048
2049	auto shouldCreateCalloc = [](CallInst Malloc, CallInst Memset) {
2050	// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2051	// of malloc block
2052	auto *MallocBB = Malloc->getParent(),
2053	*MemsetBB = Memset->getParent();
2054	if (MallocBB == MemsetBB)
2055	return true;
2056	auto *Ptr = Memset->getArgOperand(i: `0`);
2057	auto *TI = MallocBB->getTerminator();
2058	BasicBlock TrueBB, FalseBB;
2059	if (!match(V: TI, P: m_Br(C: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: Ptr),
2060	R: m_Zero()),
2061	T&: TrueBB, F&: FalseBB)))
2062	return false;
2063	if (MemsetBB != FalseBB)
2064	return false;
2065	return true;
2066	};
2067
2068	if (Malloc->getOperand(i_nocapture: `0`) != MemSet->getLength())
2069	return false;
2070	if (!shouldCreateCalloc(Malloc, MemSet) \|\| !DT.dominates(Def: Malloc, User: MemSet) \|\|
2071	!memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
2072	return false;
2073	IRBuilder<> IRB(Malloc);
2074	assert(Func == LibFunc_malloc \|\| !ZeroedVariantName.empty());
2075	Value Calloc = nullptr*;
2076	if (!ZeroedVariantName.empty()) {
2077	LLVMContext &Ctx = Malloc->getContext();
2078	AttributeList Attrs = InnerCallee->getAttributes();
2079	AllocFnKind AllocKind =
2080	Attrs.getFnAttr(Kind: Attribute::AllocKind).getAllocKind() \|
2081	AllocFnKind::Zeroed;
2082	Attrs =
2083	Attrs.addFnAttribute(C&: Ctx, Attr: Attribute::getWithAllocKind(Context&: Ctx, Kind: AllocKind))
2084	.removeFnAttribute(C&: Ctx, Kind: "alloc-variant-zeroed");
2085	FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2086	Name: ZeroedVariantName, T: InnerCallee->getFunctionType(), AttributeList: Attrs);
2087	SmallVector<Value *, `3`> Args;
2088	Args.append(in_start: Malloc->arg_begin(), in_end: Malloc->arg_end());
2089	Calloc = IRB.CreateCall(Callee: ZeroedVariant, Args, Name: ZeroedVariantName);
2090	} else {
2091	Type *SizeTTy = Malloc->getArgOperand(i: `0`)->getType();
2092	Calloc =
2093	emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: `1`), Size: Malloc->getArgOperand(i: `0`),
2094	B&: IRB, TLI, AddrSpace: Malloc->getType()->getPointerAddressSpace());
2095	}
2096	if (!Calloc)
2097	return false;
2098
2099	MemorySSAUpdater Updater(&MSSA);
2100	auto *NewAccess =
2101	Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc), Definition: nullptr,
2102	InsertPt: MallocDef);
2103	auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
2104	Updater.insertDef(Def: NewAccessMD, /RenameUses=/true);
2105	Malloc->replaceAllUsesWith(V: Calloc);
2106	deleteDeadInstruction(SI: Malloc);
2107	return true;
2108	}
2109
2110	// Check if there is a dominating condition, that implies that the value
2111	// being stored in a ptr is already present in the ptr.
2112	bool dominatingConditionImpliesValue(MemoryDef *Def) {
2113	auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
2114	BasicBlock *StoreBB = StoreI->getParent();
2115	Value *StorePtr = StoreI->getPointerOperand();
2116	Value *StoreVal = StoreI->getValueOperand();
2117
2118	DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
2119	if (!IDom)
2120	return false;
2121
2122	auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
2123	if (!BI \|\| !BI->isConditional())
2124	return false;
2125
2126	// In case both blocks are the same, it is not possible to determine
2127	// if optimization is possible. (We would not want to optimize a store
2128	// in the FalseBB if condition is true and vice versa.)
2129	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`))
2130	return false;
2131
2132	Instruction *ICmpL;
2133	CmpPredicate Pred;
2134	if (!match(V: BI->getCondition(),
2135	P: m_c_ICmp(Pred,
2136	L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
2137	R: m_Instruction(I&: ICmpL)),
2138	R: m_Specific(V: StoreVal))) \|\|
2139	!ICmpInst::isEquality(P: Pred))
2140	return false;
2141
2142	// In case the else blocks also branches to the if block or the other way
2143	// around it is not possible to determine if the optimization is possible.
2144	if (Pred == ICmpInst::ICMP_EQ &&
2145	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `0`)),
2146	BB: StoreBB))
2147	return false;
2148
2149	if (Pred == ICmpInst::ICMP_NE &&
2150	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `1`)),
2151	BB: StoreBB))
2152	return false;
2153
2154	MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
2155	MemoryAccess *ClobAcc =
2156	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2157
2158	return MSSA.dominates(A: ClobAcc, B: LoadAcc);
2159	}
2160
2161	/// \returns true if \p Def is a no-op store, either because it
2162	/// directly stores back a loaded value or stores zero to a calloced object.
2163	bool storeIsNoop(MemoryDef Def, const* Value *DefUO) {
2164	Instruction *DefI = Def->getMemoryInst();
2165	StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
2166	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2167	Constant StoredConstant = nullptr*;
2168	if (Store)
2169	StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: `0`));
2170	else if (MemSet)
2171	StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2172	else
2173	return false;
2174
2175	if (!isRemovable(I: DefI))
2176	return false;
2177
2178	if (StoredConstant) {
2179	Constant *InitC =
2180	getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
2181	// If the clobbering access is LiveOnEntry, no instructions between them
2182	// can modify the memory location.
2183	if (InitC && InitC == StoredConstant)
2184	return MSSA.isLiveOnEntryDef(
2185	MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2186	}
2187
2188	if (!Store)
2189	return false;
2190
2191	if (dominatingConditionImpliesValue(Def))
2192	return true;
2193
2194	if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: `0`))) {
2195	if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: `1`)) {
2196	// Get the defining access for the load.
2197	auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2198	// Fast path: the defining accesses are the same.
2199	if (LoadAccess == Def->getDefiningAccess())
2200	return true;
2201
2202	// Look through phi accesses. Recursively scan all phi accesses by
2203	// adding them to a worklist. Bail when we run into a memory def that
2204	// does not match LoadAccess.
2205	SetVector<MemoryAccess *> ToCheck;
2206	MemoryAccess *Current =
2207	MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2208	// We don't want to bail when we run into the store memory def. But,
2209	// the phi access may point to it. So, pretend like we've already
2210	// checked it.
2211	ToCheck.insert(X: Def);
2212	ToCheck.insert(X: Current);
2213	// Start at current (1) to simulate already having checked Def.
2214	for (unsigned I = `1`; I < ToCheck.size(); ++I) {
2215	Current = ToCheck [I];
2216	if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2217	// Check all the operands.
2218	for (auto &Use : PhiAccess->incoming_values())
2219	ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2220	continue;
2221	}
2222
2223	// If we found a memory def, bail. This happens when we have an
2224	// unrelated write in between an otherwise noop store.
2225	assert(isa<MemoryDef>(Current) &&
2226	"Only MemoryDefs should reach here.");
2227	// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2228	// We are searching for the definition of the store's destination.
2229	// So, if that is the same definition as the load, then this is a
2230	// noop. Otherwise, fail.
2231	if (LoadAccess != Current)
2232	return false;
2233	}
2234	return true;
2235	}
2236	}
2237
2238	return false;
2239	}
2240
2241	bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2242	bool Changed = false;
2243	for (auto OI : IOL) {
2244	Instruction *DeadI = OI.first;
2245	MemoryLocation Loc = *getLocForWrite(I: DeadI);
2246	assert(isRemovable(DeadI) && "Expect only removable instruction");
2247
2248	const Value *Ptr = Loc.Ptr->stripPointerCasts();
2249	int64_t DeadStart = `0`;
2250	uint64_t DeadSize = Loc.Size.getValue();
2251	GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2252	OverlapIntervalsTy &IntervalMap = OI.second;
2253	Changed \|= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2254	if (IntervalMap.empty())
2255	continue;
2256	Changed \|= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2257	}
2258	return Changed;
2259	}
2260
2261	/// Eliminates writes to locations where the value that is being written
2262	/// is already stored at the same location.
2263	bool eliminateRedundantStoresOfExistingValues() {
2264	bool MadeChange = false;
2265	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2266	"already existing value\n");
2267	for (auto *Def : MemDefs) {
2268	if (SkipStores.contains(Ptr: Def) \|\| MSSA.isLiveOnEntryDef(MA: Def))
2269	continue;
2270
2271	Instruction *DefInst = Def->getMemoryInst();
2272	auto MaybeDefLoc = getLocForWrite(I: DefInst);
2273	if (!MaybeDefLoc \|\| !isRemovable(I: DefInst))
2274	continue;
2275
2276	MemoryDef *UpperDef;
2277	// To conserve compile-time, we avoid walking to the next clobbering def.
2278	// Instead, we just try to get the optimized access, if it exists. DSE
2279	// will try to optimize defs during the earlier traversal.
2280	if (Def->isOptimized())
2281	UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2282	else
2283	UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2284	if (!UpperDef \|\| MSSA.isLiveOnEntryDef(MA: UpperDef))
2285	continue;
2286
2287	Instruction *UpperInst = UpperDef->getMemoryInst();
2288	auto IsRedundantStore = [&]() {
2289	// We don't care about differences in call attributes here.
2290	if (DefInst->isIdenticalToWhenDefined(I: UpperInst,
2291	/IntersectAttrs=/true))
2292	return true;
2293	if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2294	if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2295	// MemSetInst must have a write location.
2296	auto UpperLoc = getLocForWrite(I: UpperInst);
2297	if (!UpperLoc)
2298	return false;
2299	int64_t InstWriteOffset = `0`;
2300	int64_t DepWriteOffset = `0`;
2301	auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: UpperLoc, DeadLoc: MaybeDefLoc,
2302	KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2303	Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2304	return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: `1`) &&
2305	OR == OW_Complete;
2306	}
2307	}
2308	return false;
2309	};
2310
2311	if (!IsRedundantStore() \|\| isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2312	continue;
2313	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2314	<< `'\n'`);
2315	deleteDeadInstruction(SI: DefInst);
2316	NumRedundantStores ++;
2317	MadeChange = true;
2318	}
2319	return MadeChange;
2320	}
2321
2322	// Return the locations written by the initializes attribute.
2323	// Note that this function considers:
2324	// 1. Unwind edge: use "initializes" attribute only if the callee has
2325	// "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
2326	// or the argument is invisible to caller on unwind. That is, we don't
2327	// perform incorrect DSE on unwind edges in the current function.
2328	// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
2329	// the intersected range list of their "initializes" attributes.
2330	SmallVector<MemoryLocation, `1`> getInitializesArgMemLoc(const Instruction *I);
2331
2332	// Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
2333	// killed by `KillingLocWrapper.MemDef`. Return whether
2334	// any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
2335	std::pair<bool, bool>
2336	eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
2337
2338	// Try to eliminate dead defs killed by `KillingDefWrapper` and return the
2339	// change state: whether make any change.
2340	bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
2341	};
2342
2343	// Return true if "Arg" is function local and isn't captured before "CB".
2344	bool isFuncLocalAndNotCaptured(Value Arg, const* CallBase *CB,
2345	EarliestEscapeAnalysis &EA) {
2346	const Value *UnderlyingObj = getUnderlyingObject(V: Arg);
2347	return isIdentifiedFunctionLocal(V: UnderlyingObj) &&
2348	capturesNothing(
2349	CC: EA.getCapturesBefore(Object: UnderlyingObj, I: CB, /OrAt/ true));
2350	}
2351
2352	SmallVector<MemoryLocation, `1`>
2353	DSEState::getInitializesArgMemLoc(const Instruction *I) {
2354	const CallBase *CB = dyn_cast<CallBase>(Val: I);
2355	if (!CB)
2356	return {};
2357
2358	// Collect aliasing arguments and their initializes ranges.
2359	SmallMapVector<Value *, SmallVector<ArgumentInitInfo, `2`>, `2`> Arguments;
2360	for (unsigned Idx = `0`, Count = CB->arg_size(); Idx < Count; ++Idx) {
2361	Value *CurArg = CB->getArgOperand(i: Idx);
2362	if (!CurArg->getType()->isPointerTy())
2363	continue;
2364
2365	ConstantRangeList Inits;
2366	Attribute InitializesAttr = CB->getParamAttr(ArgNo: Idx, Kind: Attribute::Initializes);
2367	// initializes on byval arguments refers to the callee copy, not the
2368	// original memory the caller passed in.
2369	if (InitializesAttr.isValid() && !CB->isByValArgument(ArgNo: Idx))
2370	Inits = InitializesAttr.getValueAsConstantRangeList();
2371
2372	// Check whether "CurArg" could alias with global variables. We require
2373	// either it's function local and isn't captured before or the "CB" only
2374	// accesses arg or inaccessible mem.
2375	if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2376	!isFuncLocalAndNotCaptured(Arg: CurArg, CB, EA))
2377	Inits = ConstantRangeList ();
2378
2379	// We don't perform incorrect DSE on unwind edges in the current function,
2380	// and use the "initializes" attribute to kill dead stores if:
2381	// - The call does not throw exceptions, "CB->doesNotThrow()".
2382	// - Or the callee parameter has "dead_on_unwind" attribute.
2383	// - Or the argument is invisible to caller on unwind, and there are no
2384	// unwind edges from this call in the current function (e.g. `CallInst`).
2385	bool IsDeadOrInvisibleOnUnwind =
2386	CB->paramHasAttr(ArgNo: Idx, Kind: Attribute::DeadOnUnwind) \|\|
2387	(isa<CallInst>(Val: CB) && isInvisibleToCallerOnUnwind(V: CurArg));
2388	ArgumentInitInfo InitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind, .Inits: Inits};
2389	bool FoundAliasing = false;
2390	for (auto &[Arg, AliasList] : Arguments) {
2391	auto AAR = BatchAA.alias(LocA: MemoryLocation::getBeforeOrAfter(Ptr: Arg),
2392	LocB: MemoryLocation::getBeforeOrAfter(Ptr: CurArg));
2393	if (AAR == AliasResult::NoAlias) {
2394	continue;
2395	} else if (AAR == AliasResult::MustAlias) {
2396	FoundAliasing = true;
2397	AliasList.push_back(Elt: InitInfo);
2398	} else {
2399	// For PartialAlias and MayAlias, there is an offset or may be an
2400	// unknown offset between the arguments and we insert an empty init
2401	// range to discard the entire initializes info while intersecting.
2402	FoundAliasing = true;
2403	AliasList.push_back(Elt: ArgumentInitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind,
2404	.Inits: ConstantRangeList ()});
2405	}
2406	}
2407	if (!FoundAliasing)
2408	Arguments [CurArg] = {InitInfo};
2409	}
2410
2411	SmallVector<MemoryLocation, `1`> Locations;
2412	for (const auto &[_, Args] : Arguments) {
2413	auto IntersectedRanges =
2414	getIntersectedInitRangeList(Args, CallHasNoUnwindAttr: CB->doesNotThrow());
2415	if (IntersectedRanges.empty())
2416	continue;
2417
2418	for (const auto &Arg : Args) {
2419	for (const auto &Range : IntersectedRanges) {
2420	int64_t Start = Range.getLower().getSExtValue();
2421	int64_t End = Range.getUpper().getSExtValue();
2422	// For now, we only handle locations starting at offset 0.
2423	if (Start == `0`)
2424	Locations.push_back(Elt: MemoryLocation (CB->getArgOperand(i: Arg.Idx),
2425	LocationSize::precise(Value: End - Start),
2426	CB->getAAMetadata()));
2427	}
2428	}
2429	}
2430	return Locations;
2431	}
2432
2433	std::pair<bool, bool>
2434	DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2435	bool Changed = false;
2436	bool DeletedKillingLoc = false;
2437	unsigned ScanLimit = MemorySSAScanLimit;
2438	unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2439	unsigned PartialLimit = MemorySSAPartialStoreLimit;
2440	// Worklist of MemoryAccesses that may be killed by
2441	// "KillingLocWrapper.MemDef".
2442	SmallSetVector<MemoryAccess *, `8`> ToCheck;
2443	// Track MemoryAccesses that have been deleted in the loop below, so we can
2444	// skip them. Don't use SkipStores for this, which may contain reused
2445	// MemoryAccess addresses.
2446	SmallPtrSet<MemoryAccess *, `8`> Deleted;
2447	[[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2448	ToCheck.insert(X: KillingLocWrapper.MemDef->getDefiningAccess());
2449
2450	// Check if MemoryAccesses in the worklist are killed by
2451	// "KillingLocWrapper.MemDef".
2452	for (unsigned I = `0`; I < ToCheck.size(); I++) {
2453	MemoryAccess *Current = ToCheck [I];
2454	if (Deleted.contains(Ptr: Current))
2455	continue;
2456	std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2457	KillingDef: KillingLocWrapper.MemDef, StartAccess: Current, KillingLoc: KillingLocWrapper.MemLoc,
2458	KillingUndObj: KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2459	IsMemTerm: isMemTerminatorInst(I: KillingLocWrapper.DefInst), PartialLimit,
2460	IsInitializesAttrMemLoc: KillingLocWrapper.DefByInitializesAttr);
2461
2462	if (!MaybeDeadAccess) {
2463	LLVM_DEBUG(dbgs() << " finished walk\n");
2464	continue;
2465	}
2466	MemoryAccess DeadAccess = MaybeDeadAccess;
2467	LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2468	if (isa<MemoryPhi>(Val: DeadAccess)) {
2469	LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2470	for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2471	MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2472	BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2473	BasicBlock *PhiBlock = DeadAccess->getBlock();
2474
2475	// We only consider incoming MemoryAccesses that come before the
2476	// MemoryPhi. Otherwise we could discover candidates that do not
2477	// strictly dominate our starting def.
2478	if (PostOrderNumbers [IncomingBlock] > PostOrderNumbers [PhiBlock])
2479	ToCheck.insert(X: IncomingAccess);
2480	}
2481	continue;
2482	}
2483	// We cannot apply the initializes attribute to DeadAccess/DeadDef.
2484	// It would incorrectly consider a call instruction as redundant store
2485	// and remove this call instruction.
2486	// TODO: this conflates the existence of a MemoryLocation with being able
2487	// to delete the instruction. Fix isRemovable() to consider calls with
2488	// side effects that cannot be removed, e.g. calls with the initializes
2489	// attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2490	MemoryDefWrapper DeadDefWrapper(
2491	cast<MemoryDef>(Val: DeadAccess),
2492	getLocForInst(I: cast<MemoryDef>(Val: DeadAccess)->getMemoryInst(),
2493	/ConsiderInitializesAttr=/false));
2494	assert(DeadDefWrapper.DefinedLocations.size() == `1`);
2495	MemoryLocationWrapper &DeadLocWrapper =
2496	DeadDefWrapper.DefinedLocations.front();
2497	LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2498	ToCheck.insert(X: DeadLocWrapper.MemDef->getDefiningAccess());
2499	NumGetDomMemoryDefPassed ++;
2500
2501	if (!DebugCounter::shouldExecute(CounterName: MemorySSACounter))
2502	continue;
2503	if (isMemTerminatorInst(I: KillingLocWrapper.DefInst)) {
2504	if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2505	continue;
2506	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2507	<< *DeadLocWrapper.DefInst << "\n KILLER: "
2508	<< *KillingLocWrapper.DefInst << `'\n'`);
2509	deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2510	++NumFastStores;
2511	Changed = true;
2512	} else {
2513	// Check if DeadI overwrites KillingI.
2514	int64_t KillingOffset = `0`;
2515	int64_t DeadOffset = `0`;
2516	OverwriteResult OR =
2517	isOverwrite(KillingI: KillingLocWrapper.DefInst, DeadI: DeadLocWrapper.DefInst,
2518	KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2519	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2520	if (OR == OW_MaybePartial) {
2521	auto &IOL = IOLs [DeadLocWrapper.DefInst->getParent()];
2522	OR = isPartialOverwrite(KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2523	KillingOff: KillingOffset, DeadOff: DeadOffset,
2524	DeadI: DeadLocWrapper.DefInst, IOL);
2525	}
2526	if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2527	auto *DeadSI = dyn_cast<StoreInst>(Val: DeadLocWrapper.DefInst);
2528	auto *KillingSI = dyn_cast<StoreInst>(Val: KillingLocWrapper.DefInst);
2529	// We are re-using tryToMergePartialOverlappingStores, which requires
2530	// DeadSI to dominate KillingSI.
2531	// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2532	if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2533	if (Constant *Merged = tryToMergePartialOverlappingStores(
2534	KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL, AA&: BatchAA,
2535	DT: &DT)) {
2536
2537	// Update stored value of earlier store to merged constant.
2538	DeadSI->setOperand(i_nocapture: `0`, Val_nocapture: Merged);
2539	++NumModifiedStores;
2540	Changed = true;
2541	DeletedKillingLoc = true;
2542
2543	// Remove killing store and remove any outstanding overlap
2544	// intervals for the updated store.
2545	deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2546	auto I = IOLs.find(Key: DeadSI->getParent());
2547	if (I != IOLs.end())
2548	I->second.erase(Key: DeadSI);
2549	break;
2550	}
2551	}
2552	}
2553	if (OR == OW_Complete) {
2554	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2555	<< *DeadLocWrapper.DefInst << "\n KILLER: "
2556	<< *KillingLocWrapper.DefInst << `'\n'`);
2557	deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2558	++NumFastStores;
2559	Changed = true;
2560	}
2561	}
2562	}
2563
2564	assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2565	"SkipStores and Deleted out of sync?");
2566
2567	return {Changed, DeletedKillingLoc};
2568	}
2569
2570	bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2571	if (KillingDefWrapper.DefinedLocations.empty()) {
2572	LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2573	<< *KillingDefWrapper.DefInst << "\n");
2574	return false;
2575	}
2576
2577	bool MadeChange = false;
2578	for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2579	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2580	<< *KillingLocWrapper.MemDef << " ("
2581	<< *KillingLocWrapper.DefInst << ")\n");
2582	auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2583	MadeChange \|= Changed;
2584
2585	// Check if the store is a no-op.
2586	if (!DeletedKillingLoc && storeIsNoop(Def: KillingLocWrapper.MemDef,
2587	DefUO: KillingLocWrapper.UnderlyingObject)) {
2588	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2589	<< *KillingLocWrapper.DefInst << `'\n'`);
2590	deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2591	NumRedundantStores ++;
2592	MadeChange = true;
2593	continue;
2594	}
2595	// Can we form a calloc from a memset/malloc pair?
2596	if (!DeletedKillingLoc &&
2597	tryFoldIntoCalloc(Def: KillingLocWrapper.MemDef,
2598	DefUO: KillingLocWrapper.UnderlyingObject)) {
2599	LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2600	<< " DEAD: " << *KillingLocWrapper.DefInst << `'\n'`);
2601	deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2602	MadeChange = true;
2603	continue;
2604	}
2605	}
2606	return MadeChange;
2607	}
2608
2609	static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2610	DominatorTree &DT, PostDominatorTree &PDT,
2611	const TargetLibraryInfo &TLI,
2612	const LoopInfo &LI) {
2613	bool MadeChange = false;
2614	DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2615	// For each store:
2616	for (unsigned I = `0`; I < State.MemDefs.size(); I++) {
2617	MemoryDef *KillingDef = State.MemDefs [I];
2618	if (State.SkipStores.count(Ptr: KillingDef))
2619	continue;
2620
2621	MemoryDefWrapper KillingDefWrapper(
2622	KillingDef, State.getLocForInst(I: KillingDef->getMemoryInst(),
2623	ConsiderInitializesAttr: EnableInitializesImprovement));
2624	MadeChange \|= State.eliminateDeadDefs(KillingDefWrapper);
2625	}
2626
2627	if (EnablePartialOverwriteTracking)
2628	for (auto &KV : State.IOLs)
2629	MadeChange \|= State.removePartiallyOverlappedStores(IOL&: KV.second);
2630
2631	MadeChange \|= State.eliminateRedundantStoresOfExistingValues();
2632	MadeChange \|= State.eliminateDeadWritesAtEndOfFunction();
2633
2634	while (!State.ToRemove.empty()) {
2635	Instruction *DeadInst = State.ToRemove.pop_back_val();
2636	DeadInst->eraseFromParent();
2637	}
2638
2639	return MadeChange;
2640	}
2641	} // end anonymous namespace
2642
2643	//===----------------------------------------------------------------------===//
2644	// DSE Pass
2645	//===----------------------------------------------------------------------===//
2646	PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2647	AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2648	const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2649	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2650	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2651	PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2652	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2653
2654	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2655
2656	#ifdef LLVM_ENABLE_STATS
2657	if (AreStatisticsEnabled())
2658	for (auto &I : instructions(F))
2659	NumRemainingStores += isa<StoreInst>(Val: &I);
2660	#endif
2661
2662	if (!Changed)
2663	return PreservedAnalyses::all();
2664
2665	PreservedAnalyses PA;
2666	PA.preserveSet<CFGAnalyses>();
2667	PA.preserve<MemorySSAAnalysis>();
2668	PA.preserve<LoopAnalysis>();
2669	return PA;
2670	}
2671

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp