MemCpyOptimizer.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp]

1	//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass performs various transformations related to eliminating memcpy
10	// calls, or transforming sets of stores into memset's.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
15	#include "llvm/ADT/DenseSet.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/ADT/Statistic.h"
20	#include "llvm/ADT/iterator_range.h"
21	#include "llvm/Analysis/AliasAnalysis.h"
22	#include "llvm/Analysis/AssumptionCache.h"
23	#include "llvm/Analysis/CFG.h"
24	#include "llvm/Analysis/CaptureTracking.h"
25	#include "llvm/Analysis/GlobalsModRef.h"
26	#include "llvm/Analysis/InstructionSimplify.h"
27	#include "llvm/Analysis/Loads.h"
28	#include "llvm/Analysis/MemoryLocation.h"
29	#include "llvm/Analysis/MemorySSA.h"
30	#include "llvm/Analysis/MemorySSAUpdater.h"
31	#include "llvm/Analysis/PostDominators.h"
32	#include "llvm/Analysis/TargetLibraryInfo.h"
33	#include "llvm/Analysis/ValueTracking.h"
34	#include "llvm/IR/BasicBlock.h"
35	#include "llvm/IR/Constants.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/Dominators.h"
39	#include "llvm/IR/Function.h"
40	#include "llvm/IR/GlobalVariable.h"
41	#include "llvm/IR/IRBuilder.h"
42	#include "llvm/IR/InstrTypes.h"
43	#include "llvm/IR/Instruction.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/IntrinsicInst.h"
46	#include "llvm/IR/Intrinsics.h"
47	#include "llvm/IR/LLVMContext.h"
48	#include "llvm/IR/Module.h"
49	#include "llvm/IR/PassManager.h"
50	#include "llvm/IR/Type.h"
51	#include "llvm/IR/User.h"
52	#include "llvm/IR/Value.h"
53	#include "llvm/Support/Casting.h"
54	#include "llvm/Support/Debug.h"
55	#include "llvm/Support/MathExtras.h"
56	#include "llvm/Support/raw_ostream.h"
57	#include "llvm/Transforms/Utils/Local.h"
58	#include <algorithm>
59	#include <cassert>
60	#include <cstdint>
61	#include <optional>
62
63	using namespace llvm;
64
65	#define DEBUG_TYPE "memcpyopt"
66
67	static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
68	"enable-memcpyopt-without-libcalls", cl::Hidden,
69	cl::desc ("Enable memcpyopt even when libcalls are disabled"));
70
71	STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
72	STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73	STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74	STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75	STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76	STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78	namespace {
79
80	/// Represents a range of memset'd bytes with the ByteVal value.
81	/// This allows us to analyze stores like:
82	/// store 0 -> P+1
83	/// store 0 -> P+0
84	/// store 0 -> P+3
85	/// store 0 -> P+2
86	/// which sometimes happens with stores to arrays of structs etc. When we see
87	/// the first store, we make a range [1, 2). The second store extends the range
88	/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89	/// two ranges into [0, 3) which is memset'able.
90	struct MemsetRange {
91	// Start/End - A semi range that describes the span that this range covers.
92	// The range is closed at the start and open at the end: [Start, End).
93	int64_t Start, End;
94
95	/// StartPtr - The getelementptr instruction that points to the start of the
96	/// range.
97	Value *StartPtr;
98
99	/// Alignment - The known alignment of the first store.
100	MaybeAlign Alignment;
101
102	/// TheStores - The actual stores that make up this range.
103	SmallVector<Instruction *, `16`> TheStores;
104
105	bool isProfitableToUseMemset(const DataLayout &DL) const;
106	};
107
108	} // end anonymous namespace
109
110	bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111	// If we found more than 4 stores to merge or 16 bytes, use memset.
112	if (TheStores.size() >= `4` \|\| End - Start >= `16`)
113	return true;
114
115	// If there is nothing to merge, don't do anything.
116	if (TheStores.size() < `2`)
117	return false;
118
119	// If any of the stores are a memset, then it is always good to extend the
120	// memset.
121	for (Instruction *SI : TheStores)
122	if (!isa<StoreInst>(Val: SI))
123	return true;
124
125	// Assume that the code generator is capable of merging pairs of stores
126	// together if it wants to.
127	if (TheStores.size() == `2`)
128	return false;
129
130	// If we have fewer than 8 stores, it can still be worthwhile to do this.
131	// For example, merging 4 i8 stores into an i32 store is useful almost always.
132	// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133	// memset will be split into 2 32-bit stores anyway) and doing so can
134	// pessimize the llvm optimizer.
135	//
136	// Since we don't have perfect knowledge here, make some assumptions: assume
137	// the maximum GPR width is the same size as the largest legal integer
138	// size. If so, check to see whether we will end up actually reducing the
139	// number of stores used.
140	unsigned Bytes = unsigned(End - Start);
141	unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / `8`;
142	if (MaxIntSize == `0`)
143	MaxIntSize = `1`;
144	unsigned NumPointerStores = Bytes / MaxIntSize;
145
146	// Assume the remaining bytes if any are done a byte at a time.
147	unsigned NumByteStores = Bytes % MaxIntSize;
148
149	// If we will reduce the # stores (according to this heuristic), do the
150	// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151	// etc.
152	return TheStores.size() > NumPointerStores + NumByteStores;
153	}
154
155	namespace {
156
157	class MemsetRanges {
158	using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159
160	/// A sorted list of the memset ranges.
161	SmallVector<MemsetRange, `8`> Ranges;
162
163	const DataLayout &DL;
164
165	public:
166	MemsetRanges(const DataLayout &DL) : DL(DL) {}
167
168	using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
169
170	const_iterator begin() const { return Ranges.begin(); }
171	const_iterator end() const { return Ranges.end(); }
172	bool empty() const { return Ranges.empty(); }
173
174	void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175	if (auto *SI = dyn_cast<StoreInst>(Val: Inst))
176	addStore(OffsetFromFirst, SI);
177	else
178	addMemSet(OffsetFromFirst, MSI: cast<MemSetInst>(Val: Inst));
179	}
180
181	void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182	TypeSize StoreSize = DL.getTypeStoreSize(Ty: SI->getOperand(i_nocapture: `0`)->getType());
183	assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184	addRange(Start: OffsetFromFirst, Size: StoreSize.getFixedValue(),
185	Ptr: SI->getPointerOperand(), Alignment: SI->getAlign(), Inst: SI);
186	}
187
188	void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189	int64_t Size = cast<ConstantInt>(Val: MSI->getLength())->getZExtValue();
190	addRange(Start: OffsetFromFirst, Size, Ptr: MSI->getDest(), Alignment: MSI->getDestAlign(), Inst: MSI);
191	}
192
193	void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
194	Instruction *Inst);
195	};
196
197	} // end anonymous namespace
198
199	/// Add a new store to the MemsetRanges data structure. This adds a
200	/// new range for the specified store at the specified offset, merging into
201	/// existing ranges as appropriate.
202	void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203	MaybeAlign Alignment, Instruction *Inst) {
204	int64_t End = Start + Size;
205
206	range_iterator I = partition_point(
207	Range&: Ranges, P: [=](const MemsetRange &O) { return O.End < Start; });
208
209	// We now know that I == E, in which case we didn't find anything to merge
210	// with, or that Start <= I->End. If End < I->Start or I == E, then we need
211	// to insert a new range. Handle this now.
212	if (I == Ranges.end() \|\| End < I->Start) {
213	MemsetRange &R = *Ranges.insert(I, Elt: MemsetRange ());
214	R.Start = Start;
215	R.End = End;
216	R.StartPtr = Ptr;
217	R.Alignment = Alignment;
218	R.TheStores.push_back(Elt: Inst);
219	return;
220	}
221
222	// This store overlaps with I, add it.
223	I->TheStores.push_back(Elt: Inst);
224
225	// At this point, we may have an interval that completely contains our store.
226	// If so, just add it to the interval and return.
227	if (I->Start <= Start && I->End >= End)
228	return;
229
230	// Now we know that Start <= I->End and End >= I->Start so the range overlaps
231	// but is not entirely contained within the range.
232
233	// See if the range extends the start of the range. In this case, it couldn't
234	// possibly cause it to join the prior range, because otherwise we would have
235	// stopped on it.
236	if (Start < I->Start) {
237	I->Start = Start;
238	I->StartPtr = Ptr;
239	I->Alignment = Alignment;
240	}
241
242	// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243	// is in or right at the end of I), and that End >= I->Start. Extend I out to
244	// End.
245	if (End > I->End) {
246	I->End = End;
247	range_iterator NextI = I;
248	while (++NextI != Ranges.end() && End >= NextI->Start) {
249	// Merge the range in.
250	I->TheStores.append(in_start: NextI->TheStores.begin(), in_end: NextI->TheStores.end());
251	if (NextI->End > I->End)
252	I->End = NextI->End;
253	Ranges.erase(CI: NextI);
254	NextI = I;
255	}
256	}
257	}
258
259	//===----------------------------------------------------------------------===//
260	// MemCpyOptLegacyPass Pass
261	//===----------------------------------------------------------------------===//
262
263	// Check that V is either not accessible by the caller, or unwinding cannot
264	// occur between Start and End.
265	static bool mayBeVisibleThroughUnwinding(Value V, Instruction Start,
266	Instruction *End) {
267	assert(Start->getParent() == End->getParent() && "Must be in same block");
268	// Function can't unwind, so it also can't be visible through unwinding.
269	if (Start->getFunction()->doesNotThrow())
270	return false;
271
272	// Object is not visible on unwind.
273	// TODO: Support RequiresNoCaptureBeforeUnwind case.
274	bool RequiresNoCaptureBeforeUnwind;
275	if (isNotVisibleOnUnwind(Object: getUnderlyingObject(V),
276	RequiresNoCaptureBeforeUnwind) &&
277	!RequiresNoCaptureBeforeUnwind)
278	return false;
279
280	// Check whether there are any unwinding instructions in the range.
281	return any_of(Range: make_range(x: Start->getIterator(), y: End->getIterator()),
282	P: [](const Instruction &I) { return I.mayThrow(); });
283	}
284
285	void MemCpyOptPass::eraseInstruction(Instruction *I) {
286	MSSAU->removeMemoryAccess(I);
287	I->eraseFromParent();
288	}
289
290	// Check for mod or ref of Loc between Start and End, excluding both boundaries.
291	// Start and End must be in the same block.
292	// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
293	// intrinsic and store it inside SkippedLifetimeStart.
294	static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc,
295	const MemoryUseOrDef *Start,
296	const MemoryUseOrDef *End,
297	Instruction SkippedLifetimeStart = nullptr**) {
298	assert(Start->getBlock() == End->getBlock() && "Only local supported");
299	for (const MemoryAccess &MA :
300	make_range(x: ++Start->getIterator(), y: End->getIterator())) {
301	Instruction *I = cast<MemoryUseOrDef>(Val: MA).getMemoryInst();
302	if (isModOrRefSet(MRI: AA.getModRefInfo(I, OptLoc: Loc))) {
303	auto *II = dyn_cast<IntrinsicInst>(Val: I);
304	if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
305	SkippedLifetimeStart && !*SkippedLifetimeStart) {
306	*SkippedLifetimeStart = I;
307	continue;
308	}
309
310	return true;
311	}
312	}
313	return false;
314	}
315
316	// Check for mod of Loc between Start and End, excluding both boundaries.
317	// Start and End can be in different blocks.
318	static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
319	MemoryLocation Loc, const MemoryUseOrDef *Start,
320	const MemoryUseOrDef *End) {
321	if (isa<MemoryUse>(Val: End)) {
322	// For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
323	// Manually check read accesses between Start and End, if they are in the
324	// same block, for clobbers. Otherwise assume Loc is clobbered.
325	return Start->getBlock() != End->getBlock() \|\|
326	any_of(
327	Range: make_range(x: std::next(x: Start->getIterator()), y: End->getIterator()),
328	P: [&AA, Loc](const MemoryAccess &Acc) {
329	if (isa<MemoryUse>(Val: &Acc))
330	return false;
331	Instruction *AccInst =
332	cast<MemoryUseOrDef>(Val: &Acc)->getMemoryInst();
333	return isModSet(MRI: AA.getModRefInfo(I: AccInst, OptLoc: Loc));
334	});
335	}
336
337	// TODO: Only walk until we hit Start.
338	MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
339	End->getDefiningAccess(), Loc, AA);
340	return !MSSA->dominates(A: Clobber, B: Start);
341	}
342
343	// Update AA metadata
344	static void combineAAMetadata(Instruction ReplInst, Instruction I) {
345	// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
346	// handled here, but combineMetadata doesn't support them yet
347	unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
348	LLVMContext::MD_noalias,
349	LLVMContext::MD_invariant_group,
350	LLVMContext::MD_access_group};
351	combineMetadata(K: ReplInst, J: I, KnownIDs, DoesKMove: true);
352	}
353
354	/// When scanning forward over instructions, we look for some other patterns to
355	/// fold away. In particular, this looks for stores to neighboring locations of
356	/// memory. If it sees enough consecutive ones, it attempts to merge them
357	/// together into a memcpy/memset.
358	Instruction MemCpyOptPass::tryMergingIntoMemset(Instruction StartInst,
359	Value *StartPtr,
360	Value *ByteVal) {
361	const DataLayout &DL = StartInst->getDataLayout();
362
363	// We can't track scalable types
364	if (auto *SI = dyn_cast<StoreInst>(Val: StartInst))
365	if (DL.getTypeStoreSize(Ty: SI->getOperand(i_nocapture: `0`)->getType()).isScalable())
366	return nullptr;
367
368	// Okay, so we now have a single store that can be splatable. Scan to find
369	// all subsequent stores of the same value to offset from the same pointer.
370	// Join these together into ranges, so we can decide whether contiguous blocks
371	// are stored.
372	MemsetRanges Ranges(DL);
373
374	BasicBlock::iterator BI(StartInst);
375
376	// Keeps track of the last memory use or def before the insertion point for
377	// the new memset. The new MemoryDef for the inserted memsets will be inserted
378	// after MemInsertPoint.
379	MemoryUseOrDef MemInsertPoint = nullptr*;
380	for (++BI; !BI ->isTerminator(); ++BI) {
381	auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
382	Val: MSSAU->getMemorySSA()->getMemoryAccess(I: &*BI));
383	if (CurrentAcc)
384	MemInsertPoint = CurrentAcc;
385
386	// Calls that only access inaccessible memory do not block merging
387	// accessible stores.
388	if (auto *CB = dyn_cast<CallBase>(Val&: BI)) {
389	if (CB->onlyAccessesInaccessibleMemory())
390	continue;
391	}
392
393	if (!isa<StoreInst>(Val: BI) && !isa<MemSetInst>(Val: BI)) {
394	// If the instruction is readnone, ignore it, otherwise bail out. We
395	// don't even allow readonly here because we don't want something like:
396	// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
397	if (BI ->mayWriteToMemory() \|\| BI ->mayReadFromMemory())
398	break;
399	continue;
400	}
401
402	if (auto *NextStore = dyn_cast<StoreInst>(Val&: BI)) {
403	// If this is a store, see if we can merge it in.
404	if (!NextStore->isSimple())
405	break;
406
407	Value *StoredVal = NextStore->getValueOperand();
408
409	// Don't convert stores of non-integral pointer types to memsets (which
410	// stores integers).
411	if (DL.isNonIntegralPointerType(Ty: StoredVal->getType()->getScalarType()))
412	break;
413
414	// We can't track ranges involving scalable types.
415	if (DL.getTypeStoreSize(Ty: StoredVal->getType()).isScalable())
416	break;
417
418	// Check to see if this stored value is of the same byte-splattable value.
419	Value *StoredByte = isBytewiseValue(V: StoredVal, DL);
420	if (isa<UndefValue>(Val: ByteVal) && StoredByte)
421	ByteVal = StoredByte;
422	if (ByteVal != StoredByte)
423	break;
424
425	// Check to see if this store is to a constant offset from the start ptr.
426	std::optional<int64_t> Offset =
427	NextStore->getPointerOperand()->getPointerOffsetFrom(Other: StartPtr, DL);
428	if (!Offset)
429	break;
430
431	Ranges.addStore(OffsetFromFirst: *Offset, SI: NextStore);
432	} else {
433	auto *MSI = cast<MemSetInst>(Val&: BI);
434
435	if (MSI->isVolatile() \|\| ByteVal != MSI->getValue() \|\|
436	!isa<ConstantInt>(Val: MSI->getLength()))
437	break;
438
439	// Check to see if this store is to a constant offset from the start ptr.
440	std::optional<int64_t> Offset =
441	MSI->getDest()->getPointerOffsetFrom(Other: StartPtr, DL);
442	if (!Offset)
443	break;
444
445	Ranges.addMemSet(OffsetFromFirst: *Offset, MSI);
446	}
447	}
448
449	// If we have no ranges, then we just had a single store with nothing that
450	// could be merged in. This is a very common case of course.
451	if (Ranges.empty())
452	return nullptr;
453
454	// If we had at least one store that could be merged in, add the starting
455	// store as well. We try to avoid this unless there is at least something
456	// interesting as a small compile-time optimization.
457	Ranges.addInst(OffsetFromFirst: `0`, Inst: StartInst);
458
459	// If we create any memsets, we put it right before the first instruction that
460	// isn't part of the memset block. This ensure that the memset is dominated
461	// by any addressing instruction needed by the start of the block.
462	IRBuilder<> Builder(&*BI);
463
464	// Now that we have full information about ranges, loop over the ranges and
465	// emit memset's for anything big enough to be worthwhile.
466	Instruction AMemSet = nullptr*;
467	for (const MemsetRange &Range : Ranges) {
468	if (Range.TheStores.size() == `1`)
469	continue;
470
471	// If it is profitable to lower this range to memset, do so now.
472	if (!Range.isProfitableToUseMemset(DL))
473	continue;
474
475	// Otherwise, we do want to transform this! Create a new memset.
476	// Get the starting pointer of the block.
477	StartPtr = Range.StartPtr;
478
479	AMemSet = Builder.CreateMemSet(Ptr: StartPtr, Val: ByteVal, Size: Range.End - Range.Start,
480	Align: Range.Alignment);
481	AMemSet->mergeDIAssignID(SourceInstructions: Range.TheStores);
482
483	LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
484	: Range.TheStores) dbgs()
485	<< *SI << `'\n'`;
486	dbgs() << "With: " << *AMemSet << `'\n'`);
487	if (!Range.TheStores.empty())
488	AMemSet->setDebugLoc(Range.TheStores [`0`]->getDebugLoc());
489
490	auto *NewDef = cast<MemoryDef>(
491	Val: MemInsertPoint->getMemoryInst() == &*BI
492	? MSSAU->createMemoryAccessBefore(I: AMemSet, Definition: nullptr, InsertPt: MemInsertPoint)
493	: MSSAU->createMemoryAccessAfter(I: AMemSet, Definition: nullptr, InsertPt: MemInsertPoint));
494	MSSAU->insertDef(Def: NewDef, /RenameUses=/true);
495	MemInsertPoint = NewDef;
496
497	// Zap all the stores.
498	for (Instruction *SI : Range.TheStores)
499	eraseInstruction(I: SI);
500
501	++NumMemSetInfer;
502	}
503
504	return AMemSet;
505	}
506
507	// This method try to lift a store instruction before position P.
508	// It will lift the store and its argument + that anything that
509	// may alias with these.
510	// The method returns true if it was successful.
511	bool MemCpyOptPass::moveUp(StoreInst SI, Instruction P, const LoadInst *LI) {
512	// If the store alias this position, early bail out.
513	MemoryLocation StoreLoc = MemoryLocation::get(SI);
514	if (isModOrRefSet(MRI: AA->getModRefInfo(I: P, OptLoc: StoreLoc)))
515	return false;
516
517	// Keep track of the arguments of all instruction we plan to lift
518	// so we can make sure to lift them as well if appropriate.
519	DenseSet<Instruction *> Args;
520	auto AddArg = [&](Value *Arg) {
521	auto *I = dyn_cast<Instruction>(Val: Arg);
522	if (I && I->getParent() == SI->getParent()) {
523	// Cannot hoist user of P above P
524	if (I == P)
525	return false;
526	Args.insert(V: I);
527	}
528	return true;
529	};
530	if (!AddArg (SI->getPointerOperand()))
531	return false;
532
533	// Instruction to lift before P.
534	SmallVector<Instruction *, `8`> ToLift{SI};
535
536	// Memory locations of lifted instructions.
537	SmallVector<MemoryLocation, `8`> MemLocs{StoreLoc};
538
539	// Lifted calls.
540	SmallVector<const CallBase *, `8`> Calls;
541
542	const MemoryLocation LoadLoc = MemoryLocation::get(LI);
543
544	for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
545	auto C = &I;
546
547	// Make sure hoisting does not perform a store that was not guaranteed to
548	// happen.
549	if (!isGuaranteedToTransferExecutionToSuccessor(I: C))
550	return false;
551
552	bool MayAlias = isModOrRefSet(MRI: AA->getModRefInfo(I: C, OptLoc: std::nullopt));
553
554	bool NeedLift = false;
555	if (Args.erase(V: C))
556	NeedLift = true;
557	else if (MayAlias) {
558	NeedLift = llvm::any_of(Range&: MemLocs, P: [C, this](const MemoryLocation &ML) {
559	return isModOrRefSet(MRI: AA->getModRefInfo(I: C, OptLoc: ML));
560	});
561
562	if (!NeedLift)
563	NeedLift = llvm::any_of(Range&: Calls, P: [C, this](const CallBase *Call) {
564	return isModOrRefSet(MRI: AA->getModRefInfo(I: C, Call));
565	});
566	}
567
568	if (!NeedLift)
569	continue;
570
571	if (MayAlias) {
572	// Since LI is implicitly moved downwards past the lifted instructions,
573	// none of them may modify its source.
574	if (isModSet(MRI: AA->getModRefInfo(I: C, OptLoc: LoadLoc)))
575	return false;
576	else if (const auto *Call = dyn_cast<CallBase>(Val: C)) {
577	// If we can't lift this before P, it's game over.
578	if (isModOrRefSet(MRI: AA->getModRefInfo(I: P, Call)))
579	return false;
580
581	Calls.push_back(Elt: Call);
582	} else if (isa<LoadInst>(Val: C) \|\| isa<StoreInst>(Val: C) \|\| isa<VAArgInst>(Val: C)) {
583	// If we can't lift this before P, it's game over.
584	auto ML = MemoryLocation::get(Inst: C);
585	if (isModOrRefSet(MRI: AA->getModRefInfo(I: P, OptLoc: ML)))
586	return false;
587
588	MemLocs.push_back(Elt: ML);
589	} else
590	// We don't know how to lift this instruction.
591	return false;
592	}
593
594	ToLift.push_back(Elt: C);
595	for (Value *Op : C->operands())
596	if (!AddArg (Op))
597	return false;
598	}
599
600	// Find MSSA insertion point. Normally P will always have a corresponding
601	// memory access before which we can insert. However, with non-standard AA
602	// pipelines, there may be a mismatch between AA and MSSA, in which case we
603	// will scan for a memory access before P. In either case, we know for sure
604	// that at least the load will have a memory access.
605	// TODO: Simplify this once P will be determined by MSSA, in which case the
606	// discrepancy can no longer occur.
607	MemoryUseOrDef MemInsertPoint = nullptr*;
608	if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I: P)) {
609	MemInsertPoint = cast<MemoryUseOrDef>(Val&: --MA->getIterator());
610	} else {
611	const Instruction *ConstP = P;
612	for (const Instruction &I : make_range(x: ++ConstP->getReverseIterator(),
613	y: ++LI->getReverseIterator())) {
614	if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I: &I)) {
615	MemInsertPoint = MA;
616	break;
617	}
618	}
619	}
620
621	// We made it, we need to lift.
622	for (auto *I : llvm::reverse(C&: ToLift)) {
623	LLVM_DEBUG(dbgs() << "Lifting " << I << " before " << P << "\n");
624	I->moveBefore(MovePos: P);
625	assert(MemInsertPoint && "Must have found insert point");
626	if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
627	MSSAU->moveAfter(What: MA, Where: MemInsertPoint);
628	MemInsertPoint = MA;
629	}
630	}
631
632	return true;
633	}
634
635	bool MemCpyOptPass::processStoreOfLoad(StoreInst SI, LoadInst LI,
636	const DataLayout &DL,
637	BasicBlock::iterator &BBI) {
638	if (!LI->isSimple() \|\| !LI->hasOneUse() \|\| LI->getParent() != SI->getParent())
639	return false;
640
641	auto *T = LI->getType();
642	// Don't introduce calls to memcpy/memmove intrinsics out of thin air if
643	// the corresponding libcalls are not available.
644	// TODO: We should really distinguish between libcall availability and
645	// our ability to introduce intrinsics.
646	if (T->isAggregateType() &&
647	(EnableMemCpyOptWithoutLibcalls \|\|
648	(TLI->has(F: LibFunc_memcpy) && TLI->has(F: LibFunc_memmove)))) {
649	MemoryLocation LoadLoc = MemoryLocation::get(LI);
650
651	// We use alias analysis to check if an instruction may store to
652	// the memory we load from in between the load and the store. If
653	// such an instruction is found, we try to promote there instead
654	// of at the store position.
655	// TODO: Can use MSSA for this.
656	Instruction *P = SI;
657	for (auto &I : make_range(x: ++LI->getIterator(), y: SI->getIterator())) {
658	if (isModSet(MRI: AA->getModRefInfo(I: &I, OptLoc: LoadLoc))) {
659	P = &I;
660	break;
661	}
662	}
663
664	// We found an instruction that may write to the loaded memory.
665	// We can try to promote at this position instead of the store
666	// position if nothing aliases the store memory after this and the store
667	// destination is not in the range.
668	if (P && P != SI) {
669	if (!moveUp(SI, P, LI))
670	P = nullptr;
671	}
672
673	// If a valid insertion position is found, then we can promote
674	// the load/store pair to a memcpy.
675	if (P) {
676	// If we load from memory that may alias the memory we store to,
677	// memmove must be used to preserve semantic. If not, memcpy can
678	// be used. Also, if we load from constant memory, memcpy can be used
679	// as the constant memory won't be modified.
680	bool UseMemMove = false;
681	if (isModSet(MRI: AA->getModRefInfo(I: SI, OptLoc: LoadLoc)))
682	UseMemMove = true;
683
684	IRBuilder<> Builder(P);
685	Value *Size =
686	Builder.CreateTypeSize(DstType: Builder.getInt64Ty(), Size: DL.getTypeStoreSize(Ty: T));
687	Instruction *M;
688	if (UseMemMove)
689	M = Builder.CreateMemMove(Dst: SI->getPointerOperand(), DstAlign: SI->getAlign(),
690	Src: LI->getPointerOperand(), SrcAlign: LI->getAlign(),
691	Size);
692	else
693	M = Builder.CreateMemCpy(Dst: SI->getPointerOperand(), DstAlign: SI->getAlign(),
694	Src: LI->getPointerOperand(), SrcAlign: LI->getAlign(), Size);
695	M->copyMetadata(SrcInst: *SI, WL: LLVMContext::MD_DIAssignID);
696
697	LLVM_DEBUG(dbgs() << "Promoting " << LI << " to " << SI << " => " << *M
698	<< "\n");
699
700	auto *LastDef =
701	cast<MemoryDef>(Val: MSSAU->getMemorySSA()->getMemoryAccess(I: SI));
702	auto NewAccess = MSSAU->createMemoryAccessAfter(I: M, Definition: nullptr*, InsertPt: LastDef);
703	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/true);
704
705	eraseInstruction(I: SI);
706	eraseInstruction(I: LI);
707	++NumMemCpyInstr;
708
709	// Make sure we do not invalidate the iterator.
710	BBI = M->getIterator();
711	return true;
712	}
713	}
714
715	// Detect cases where we're performing call slot forwarding, but
716	// happen to be using a load-store pair to implement it, rather than
717	// a memcpy.
718	BatchAAResults BAA(*AA);
719	auto GetCall = [&]() -> CallInst * {
720	// We defer this expensive clobber walk until the cheap checks
721	// have been done on the source inside performCallSlotOptzn.
722	if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
723	Val: MSSA->getWalker()->getClobberingMemoryAccess(I: LI, AA&: BAA)))
724	return dyn_cast_or_null<CallInst>(Val: LoadClobber->getMemoryInst());
725	return nullptr;
726	};
727
728	bool Changed = performCallSlotOptzn(
729	cpyLoad: LI, cpyStore: SI, cpyDst: SI->getPointerOperand()->stripPointerCasts(),
730	cpySrc: LI->getPointerOperand()->stripPointerCasts(),
731	cpyLen: DL.getTypeStoreSize(Ty: SI->getOperand(i_nocapture: `0`)->getType()),
732	cpyAlign: std::min(a: SI->getAlign(), b: LI->getAlign()), BAA, GetC: GetCall);
733	if (Changed) {
734	eraseInstruction(I: SI);
735	eraseInstruction(I: LI);
736	++NumMemCpyInstr;
737	return true;
738	}
739
740	// If this is a load-store pair from a stack slot to a stack slot, we
741	// might be able to perform the stack-move optimization just as we do for
742	// memcpys from an alloca to an alloca.
743	if (auto *DestAlloca = dyn_cast<AllocaInst>(Val: SI->getPointerOperand())) {
744	if (auto *SrcAlloca = dyn_cast<AllocaInst>(Val: LI->getPointerOperand())) {
745	if (performStackMoveOptzn(Load: LI, Store: SI, DestAlloca, SrcAlloca,
746	Size: DL.getTypeStoreSize(Ty: T), BAA)) {
747	// Avoid invalidating the iterator.
748	BBI = SI->getNextNonDebugInstruction()->getIterator();
749	eraseInstruction(I: SI);
750	eraseInstruction(I: LI);
751	++NumMemCpyInstr;
752	return true;
753	}
754	}
755	}
756
757	return false;
758	}
759
760	bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
761	if (!SI->isSimple())
762	return false;
763
764	// Avoid merging nontemporal stores since the resulting
765	// memcpy/memset would not be able to preserve the nontemporal hint.
766	// In theory we could teach how to propagate the !nontemporal metadata to
767	// memset calls. However, that change would force the backend to
768	// conservatively expand !nontemporal memset calls back to sequences of
769	// store instructions (effectively undoing the merging).
770	if (SI->getMetadata(KindID: LLVMContext::MD_nontemporal))
771	return false;
772
773	const DataLayout &DL = SI->getDataLayout();
774
775	Value *StoredVal = SI->getValueOperand();
776
777	// Not all the transforms below are correct for non-integral pointers, bail
778	// until we've audited the individual pieces.
779	if (DL.isNonIntegralPointerType(Ty: StoredVal->getType()->getScalarType()))
780	return false;
781
782	// Load to store forwarding can be interpreted as memcpy.
783	if (auto *LI = dyn_cast<LoadInst>(Val: StoredVal))
784	return processStoreOfLoad(SI, LI, DL, BBI);
785
786	// The following code creates memset intrinsics out of thin air. Don't do
787	// this if the corresponding libfunc is not available.
788	// TODO: We should really distinguish between libcall availability and
789	// our ability to introduce intrinsics.
790	if (!(TLI->has(F: LibFunc_memset) \|\| EnableMemCpyOptWithoutLibcalls))
791	return false;
792
793	// There are two cases that are interesting for this code to handle: memcpy
794	// and memset. Right now we only handle memset.
795
796	// Ensure that the value being stored is something that can be memset'able a
797	// byte at a time like "0" or "-1" or any width, as well as things like
798	// 0xA0A0A0A0 and 0.0.
799	auto *V = SI->getOperand(i_nocapture: `0`);
800	if (Value *ByteVal = isBytewiseValue(V, DL)) {
801	if (Instruction *I =
802	tryMergingIntoMemset(StartInst: SI, StartPtr: SI->getPointerOperand(), ByteVal)) {
803	BBI = I->getIterator(); // Don't invalidate iterator.
804	return true;
805	}
806
807	// If we have an aggregate, we try to promote it to memset regardless
808	// of opportunity for merging as it can expose optimization opportunities
809	// in subsequent passes.
810	auto *T = V->getType();
811	if (T->isAggregateType()) {
812	uint64_t Size = DL.getTypeStoreSize(Ty: T);
813	IRBuilder<> Builder(SI);
814	auto *M = Builder.CreateMemSet(Ptr: SI->getPointerOperand(), Val: ByteVal, Size,
815	Align: SI->getAlign());
816	M->copyMetadata(SrcInst: *SI, WL: LLVMContext::MD_DIAssignID);
817
818	LLVM_DEBUG(dbgs() << "Promoting " << SI << " to " << M << "\n");
819
820	// The newly inserted memset is immediately overwritten by the original
821	// store, so we do not need to rename uses.
822	auto *StoreDef = cast<MemoryDef>(Val: MSSA->getMemoryAccess(I: SI));
823	auto NewAccess = MSSAU->createMemoryAccessBefore(I: M, Definition: nullptr*, InsertPt: StoreDef);
824	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/false);
825
826	eraseInstruction(I: SI);
827	NumMemSetInfer ++;
828
829	// Make sure we do not invalidate the iterator.
830	BBI = M->getIterator();
831	return true;
832	}
833	}
834
835	return false;
836	}
837
838	bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
839	// See if there is another memset or store neighboring this memset which
840	// allows us to widen out the memset to do a single larger store.
841	if (isa<ConstantInt>(Val: MSI->getLength()) && !MSI->isVolatile())
842	if (Instruction *I =
843	tryMergingIntoMemset(StartInst: MSI, StartPtr: MSI->getDest(), ByteVal: MSI->getValue())) {
844	BBI = I->getIterator(); // Don't invalidate iterator.
845	return true;
846	}
847	return false;
848	}
849
850	/// Takes a memcpy and a call that it depends on,
851	/// and checks for the possibility of a call slot optimization by having
852	/// the call write its result directly into the destination of the memcpy.
853	bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854	Instruction cpyStore, Value cpyDest,
855	Value *cpySrc, TypeSize cpySize,
856	Align cpyDestAlign,
857	BatchAAResults &BAA,
858	std::function<CallInst *()> GetC) {
859	// The general transformation to keep in mind is
860	//
861	// call @func(..., src, ...)
862	// memcpy(dest, src, ...)
863	//
864	// ->
865	//
866	// memcpy(dest, src, ...)
867	// call @func(..., dest, ...)
868	//
869	// Since moving the memcpy is technically awkward, we additionally check that
870	// src only holds uninitialized values at the moment of the call, meaning that
871	// the memcpy can be discarded rather than moved.
872
873	// We can't optimize scalable types.
874	if (cpySize.isScalable())
875	return false;
876
877	// Require that src be an alloca. This simplifies the reasoning considerably.
878	auto *srcAlloca = dyn_cast<AllocaInst>(Val: cpySrc);
879	if (!srcAlloca)
880	return false;
881
882	ConstantInt *srcArraySize = dyn_cast<ConstantInt>(Val: srcAlloca->getArraySize());
883	if (!srcArraySize)
884	return false;
885
886	const DataLayout &DL = cpyLoad->getDataLayout();
887	TypeSize SrcAllocaSize = DL.getTypeAllocSize(Ty: srcAlloca->getAllocatedType());
888	// We can't optimize scalable types.
889	if (SrcAllocaSize.isScalable())
890	return false;
891	uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
892
893	if (cpySize < srcSize)
894	return false;
895
896	CallInst *C = GetC ();
897	if (!C)
898	return false;
899
900	// Lifetime marks shouldn't be operated on.
901	if (Function *F = C->getCalledFunction())
902	if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
903	return false;
904
905	if (C->getParent() != cpyStore->getParent()) {
906	LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
907	return false;
908	}
909
910	MemoryLocation DestLoc =
911	isa<StoreInst>(Val: cpyStore)
912	? MemoryLocation::get(Inst: cpyStore)
913	: MemoryLocation::getForDest(MI: cast<MemCpyInst>(Val: cpyStore));
914
915	// Check that nothing touches the dest of the copy between
916	// the call and the store/memcpy.
917	Instruction SkippedLifetimeStart = nullptr*;
918	if (accessedBetween(AA&: BAA, Loc: DestLoc, Start: MSSA->getMemoryAccess(I: C),
919	End: MSSA->getMemoryAccess(I: cpyStore), SkippedLifetimeStart: &SkippedLifetimeStart)) {
920	LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
921	return false;
922	}
923
924	// If we need to move a lifetime.start above the call, make sure that we can
925	// actually do so. If the argument is bitcasted for example, we would have to
926	// move the bitcast as well, which we don't handle.
927	if (SkippedLifetimeStart) {
928	auto *LifetimeArg =
929	dyn_cast<Instruction>(Val: SkippedLifetimeStart->getOperand(i: `1`));
930	if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
931	C->comesBefore(Other: LifetimeArg))
932	return false;
933	}
934
935	// Check that storing to the first srcSize bytes of dest will not cause a
936	// trap or data race.
937	bool ExplicitlyDereferenceableOnly;
938	if (!isWritableObject(Object: getUnderlyingObject(V: cpyDest),
939	ExplicitlyDereferenceableOnly) \|\|
940	!isDereferenceableAndAlignedPointer(V: cpyDest, Alignment: Align (`1`), Size: APInt (`64`, cpySize),
941	DL, CtxI: C, AC, DT)) {
942	LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
943	return false;
944	}
945
946	// Make sure that nothing can observe cpyDest being written early. There are
947	// a number of cases to consider:
948	// 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
949	// the transform.
950	// 2. C itself may not access cpyDest (prior to the transform). This is
951	// checked further below.
952	// 3. If cpyDest is accessible to the caller of this function (potentially
953	// captured and not based on an alloca), we need to ensure that we cannot
954	// unwind between C and cpyStore. This is checked here.
955	// 4. If cpyDest is potentially captured, there may be accesses to it from
956	// another thread. In this case, we need to check that cpyStore is
957	// guaranteed to be executed if C is. As it is a non-atomic access, it
958	// renders accesses from other threads undefined.
959	// TODO: This is currently not checked.
960	if (mayBeVisibleThroughUnwinding(V: cpyDest, Start: C, End: cpyStore)) {
961	LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
962	return false;
963	}
964
965	// Check that dest points to memory that is at least as aligned as src.
966	Align srcAlign = srcAlloca->getAlign();
967	bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
968	// If dest is not aligned enough and we can't increase its alignment then
969	// bail out.
970	if (!isDestSufficientlyAligned && !isa<AllocaInst>(Val: cpyDest)) {
971	LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
972	return false;
973	}
974
975	// Check that src is not accessed except via the call and the memcpy. This
976	// guarantees that it holds only undefined values when passed in (so the final
977	// memcpy can be dropped), that it is not read or written between the call and
978	// the memcpy, and that writing beyond the end of it is undefined.
979	SmallVector<User *, `8`> srcUseList(srcAlloca->users());
980	while (!srcUseList.empty()) {
981	User *U = srcUseList.pop_back_val();
982
983	if (isa<BitCastInst>(Val: U) \|\| isa<AddrSpaceCastInst>(Val: U)) {
984	append_range(C&: srcUseList, R: U->users());
985	continue;
986	}
987	if (const auto *G = dyn_cast<GetElementPtrInst>(Val: U);
988	G && G->hasAllZeroIndices()) {
989	append_range(C&: srcUseList, R: U->users());
990	continue;
991	}
992	if (const auto *IT = dyn_cast<IntrinsicInst>(Val: U))
993	if (IT->isLifetimeStartOrEnd())
994	continue;
995
996	if (U != C && U != cpyLoad) {
997	LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
998	return false;
999	}
1000	}
1001
1002	// Check whether src is captured by the called function, in which case there
1003	// may be further indirect uses of src.
1004	bool SrcIsCaptured = any_of(Range: C->args(), P: [&](Use &U) {
1005	return U ->stripPointerCasts() == cpySrc &&
1006	!C->doesNotCapture(OpNo: C->getArgOperandNo(U: &U));
1007	});
1008
1009	// If src is captured, then check whether there are any potential uses of
1010	// src through the captured pointer before the lifetime of src ends, either
1011	// due to a lifetime.end or a return from the function.
1012	if (SrcIsCaptured) {
1013	// Check that dest is not captured before/at the call. We have already
1014	// checked that src is not captured before it. If either had been captured,
1015	// then the call might be comparing the argument against the captured dest
1016	// or src pointer.
1017	Value *DestObj = getUnderlyingObject(V: cpyDest);
1018	if (!isIdentifiedFunctionLocal(V: DestObj) \|\|
1019	PointerMayBeCapturedBefore(V: DestObj, / ReturnCaptures / true,
1020	/ StoreCaptures / true, I: C, DT,
1021	/ IncludeI / true))
1022	return false;
1023
1024	MemoryLocation SrcLoc =
1025	MemoryLocation (srcAlloca, LocationSize::precise(Value: srcSize));
1026	for (Instruction &I :
1027	make_range(x: ++C->getIterator(), y: C->getParent()->end())) {
1028	// Lifetime of srcAlloca ends at lifetime.end.
1029	if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
1030	if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1031	II->getArgOperand(i: `1`)->stripPointerCasts() == srcAlloca &&
1032	cast<ConstantInt>(Val: II->getArgOperand(i: `0`))->uge(Num: srcSize))
1033	break;
1034	}
1035
1036	// Lifetime of srcAlloca ends at return.
1037	if (isa<ReturnInst>(Val: &I))
1038	break;
1039
1040	// Ignore the direct read of src in the load.
1041	if (&I == cpyLoad)
1042	continue;
1043
1044	// Check whether this instruction may mod/ref src through the captured
1045	// pointer (we have already any direct mod/refs in the loop above).
1046	// Also bail if we hit a terminator, as we don't want to scan into other
1047	// blocks.
1048	if (isModOrRefSet(MRI: BAA.getModRefInfo(I: &I, OptLoc: SrcLoc)) \|\| I.isTerminator())
1049	return false;
1050	}
1051	}
1052
1053	// Since we're changing the parameter to the callsite, we need to make sure
1054	// that what would be the new parameter dominates the callsite.
1055	bool NeedMoveGEP = false;
1056	if (!DT->dominates(Def: cpyDest, User: C)) {
1057	// Support moving a constant index GEP before the call.
1058	auto *GEP = dyn_cast<GetElementPtrInst>(Val: cpyDest);
1059	if (GEP && GEP->hasAllConstantIndices() &&
1060	DT->dominates(Def: GEP->getPointerOperand(), User: C))
1061	NeedMoveGEP = true;
1062	else
1063	return false;
1064	}
1065
1066	// In addition to knowing that the call does not access src in some
1067	// unexpected manner, for example via a global, which we deduce from
1068	// the use analysis, we also need to know that it does not sneakily
1069	// access dest. We rely on AA to figure this out for us.
1070	MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(Value: srcSize));
1071	ModRefInfo MR = BAA.getModRefInfo(I: C, OptLoc: DestWithSrcSize);
1072	// If necessary, perform additional analysis.
1073	if (isModOrRefSet(MRI: MR))
1074	MR = BAA.callCapturesBefore(I: C, MemLoc: DestWithSrcSize, DT);
1075	if (isModOrRefSet(MRI: MR))
1076	return false;
1077
1078	// We can't create address space casts here because we don't know if they're
1079	// safe for the target.
1080	if (cpySrc->getType() != cpyDest->getType())
1081	return false;
1082	for (unsigned ArgI = `0`; ArgI < C->arg_size(); ++ArgI)
1083	if (C->getArgOperand(i: ArgI)->stripPointerCasts() == cpySrc &&
1084	cpySrc->getType() != C->getArgOperand(i: ArgI)->getType())
1085	return false;
1086
1087	// All the checks have passed, so do the transformation.
1088	bool changedArgument = false;
1089	for (unsigned ArgI = `0`; ArgI < C->arg_size(); ++ArgI)
1090	if (C->getArgOperand(i: ArgI)->stripPointerCasts() == cpySrc) {
1091	changedArgument = true;
1092	C->setArgOperand(i: ArgI, v: cpyDest);
1093	}
1094
1095	if (!changedArgument)
1096	return false;
1097
1098	// If the destination wasn't sufficiently aligned then increase its alignment.
1099	if (!isDestSufficientlyAligned) {
1100	assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1101	cast<AllocaInst>(Val: cpyDest)->setAlignment(srcAlign);
1102	}
1103
1104	if (NeedMoveGEP) {
1105	auto *GEP = dyn_cast<GetElementPtrInst>(Val: cpyDest);
1106	GEP->moveBefore(MovePos: C);
1107	}
1108
1109	if (SkippedLifetimeStart) {
1110	SkippedLifetimeStart->moveBefore(MovePos: C);
1111	MSSAU->moveBefore(What: MSSA->getMemoryAccess(I: SkippedLifetimeStart),
1112	Where: MSSA->getMemoryAccess(I: C));
1113	}
1114
1115	combineAAMetadata(ReplInst: C, I: cpyLoad);
1116	if (cpyLoad != cpyStore)
1117	combineAAMetadata(ReplInst: C, I: cpyStore);
1118
1119	++NumCallSlot;
1120	return true;
1121	}
1122
1123	/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1124	/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1125	bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1126	MemCpyInst *MDep,
1127	BatchAAResults &BAA) {
1128	// If dep instruction is reading from our current input, then it is a noop
1129	// transfer and substituting the input won't change this instruction. Just
1130	// ignore the input and let someone else zap MDep. This handles cases like:
1131	// memcpy(a <- a)
1132	// memcpy(b <- a)
1133	if (M->getSource() == MDep->getSource())
1134	return false;
1135
1136	// We can only optimize non-volatile memcpy's.
1137	if (MDep->isVolatile())
1138	return false;
1139
1140	int64_t MForwardOffset = `0`;
1141	const DataLayout &DL = M->getModule()->getDataLayout();
1142	// We can only transforms memcpy's where the dest of one is the source of the
1143	// other, or they have an offset in a range.
1144	if (M->getSource() != MDep->getDest()) {
1145	std::optional<int64_t> Offset =
1146	M->getSource()->getPointerOffsetFrom(Other: MDep->getDest(), DL);
1147	if (!Offset \|\| *Offset < `0`)
1148	return false;
1149	MForwardOffset = *Offset;
1150	}
1151
1152	// The length of the memcpy's must be the same, or the preceding one
1153	// must be larger than the following one.
1154	if (MForwardOffset != `0` \|\| MDep->getLength() != M->getLength()) {
1155	auto *MDepLen = dyn_cast<ConstantInt>(Val: MDep->getLength());
1156	auto *MLen = dyn_cast<ConstantInt>(Val: M->getLength());
1157	if (!MDepLen \|\| !MLen \|\|
1158	MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1159	return false;
1160	}
1161
1162	IRBuilder<> Builder(M);
1163	auto *CopySource = MDep->getSource();
1164	Instruction NewCopySource = nullptr*;
1165	auto CleanupOnRet = llvm::make_scope_exit(F: [&NewCopySource] {
1166	if (NewCopySource && NewCopySource->use_empty())
1167	// Safety: It's safe here because we will only allocate more instructions
1168	// after finishing all BatchAA queries, but we have to be careful if we
1169	// want to do something like this in another place. Then we'd probably
1170	// have to delay instruction removal until all transforms on an
1171	// instruction finished.
1172	NewCopySource->eraseFromParent();
1173	});
1174	MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1175	// We just need to calculate the actual size of the copy.
1176	auto MCopyLoc = MemoryLocation::getForSource(MTI: MDep).getWithNewSize(
1177	NewSize: MemoryLocation::getForSource(MTI: M).Size);
1178
1179	// When the forwarding offset is greater than 0, we transform
1180	// memcpy(d1 <- s1)
1181	// memcpy(d2 <- d1+o)
1182	// to
1183	// memcpy(d2 <- s1+o)
1184	if (MForwardOffset > `0`) {
1185	// The copy destination of `M` maybe can serve as the source of copying.
1186	std::optional<int64_t> MDestOffset =
1187	M->getRawDest()->getPointerOffsetFrom(Other: MDep->getRawSource(), DL);
1188	if (MDestOffset == MForwardOffset)
1189	CopySource = M->getDest();
1190	else {
1191	CopySource = Builder.CreateInBoundsPtrAdd(
1192	Ptr: CopySource, Offset: Builder.getInt64(C: MForwardOffset));
1193	NewCopySource = dyn_cast<Instruction>(Val: CopySource);
1194	}
1195	// We need to update `MCopyLoc` if an offset exists.
1196	MCopyLoc = MCopyLoc.getWithNewPtr(NewPtr: CopySource);
1197	if (CopySourceAlign)
1198	CopySourceAlign = commonAlignment(A: *CopySourceAlign, Offset: MForwardOffset);
1199	}
1200
1201	// Verify that the copied-from memory doesn't change in between the two
1202	// transfers. For example, in:
1203	// memcpy(a <- b)
1204	// b = 42;*
1205	// memcpy(c <- a)
1206	// It would be invalid to transform the second memcpy into memcpy(c <- b).
1207	//
1208	// TODO: If the code between M and MDep is transparent to the destination "c",
1209	// then we could still perform the xform by moving M up to the first memcpy.
1210	if (writtenBetween(MSSA, AA&: BAA, Loc: MCopyLoc, Start: MSSA->getMemoryAccess(I: MDep),
1211	End: MSSA->getMemoryAccess(I: M)))
1212	return false;
1213
1214	// No need to create `memcpy(a <- a)`.
1215	if (BAA.isMustAlias(V1: M->getDest(), V2: CopySource)) {
1216	// Remove the instruction we're replacing.
1217	eraseInstruction(I: M);
1218	++NumMemCpyInstr;
1219	return true;
1220	}
1221
1222	// If the dest of the second might alias the source of the first, then the
1223	// source and dest might overlap. In addition, if the source of the first
1224	// points to constant memory, they won't overlap by definition. Otherwise, we
1225	// still want to eliminate the intermediate value, but we have to generate a
1226	// memmove instead of memcpy.
1227	bool UseMemMove = false;
1228	if (isModSet(MRI: BAA.getModRefInfo(I: M, OptLoc: MemoryLocation::getForSource(MTI: MDep)))) {
1229	// Don't convert llvm.memcpy.inline into memmove because memmove can be
1230	// lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1231	// there is no inline version of llvm.memmove)
1232	if (isa<MemCpyInlineInst>(Val: M))
1233	return false;
1234	UseMemMove = true;
1235	}
1236
1237	// If all checks passed, then we can transform M.
1238	LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1239	<< *MDep << `'\n'`
1240	<< *M << `'\n'`);
1241
1242	// TODO: Is this worth it if we're creating a less aligned memcpy? For
1243	// example we could be moving from movaps -> movq on x86.
1244	Instruction *NewM;
1245	if (UseMemMove)
1246	NewM =
1247	Builder.CreateMemMove(Dst: M->getDest(), DstAlign: M->getDestAlign(), Src: CopySource,
1248	SrcAlign: CopySourceAlign, Size: M->getLength(), isVolatile: M->isVolatile());
1249	else if (isa<MemCpyInlineInst>(Val: M)) {
1250	// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1251	// never allowed since that would allow the latter to be lowered as a call
1252	// to an external function.
1253	NewM = Builder.CreateMemCpyInline(Dst: M->getDest(), DstAlign: M->getDestAlign(),
1254	Src: CopySource, SrcAlign: CopySourceAlign,
1255	Size: M->getLength(), isVolatile: M->isVolatile());
1256	} else
1257	NewM =
1258	Builder.CreateMemCpy(Dst: M->getDest(), DstAlign: M->getDestAlign(), Src: CopySource,
1259	SrcAlign: CopySourceAlign, Size: M->getLength(), isVolatile: M->isVolatile());
1260	NewM->copyMetadata(SrcInst: *M, WL: LLVMContext::MD_DIAssignID);
1261
1262	assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1263	auto *LastDef = cast<MemoryDef>(Val: MSSAU->getMemorySSA()->getMemoryAccess(I: M));
1264	auto NewAccess = MSSAU->createMemoryAccessAfter(I: NewM, Definition: nullptr*, InsertPt: LastDef);
1265	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/true);
1266
1267	// Remove the instruction we're replacing.
1268	eraseInstruction(I: M);
1269	++NumMemCpyInstr;
1270	return true;
1271	}
1272
1273	/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1274	/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1275	/// weren't copied over by \p MemCpy.
1276	///
1277	/// In other words, transform:
1278	/// \code
1279	/// memset(dst, c, dst_size);
1280	/// ...
1281	/// memcpy(dst, src, src_size);
1282	/// \endcode
1283	/// into:
1284	/// \code
1285	/// ...
1286	/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1287	/// memcpy(dst, src, src_size);
1288	/// \endcode
1289	///
1290	/// The memset is sunk to just before the memcpy to ensure that src_size is
1291	/// present when emitting the simplified memset.
1292	bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1293	MemSetInst *MemSet,
1294	BatchAAResults &BAA) {
1295	// We can only transform memset/memcpy with the same destination.
1296	if (!BAA.isMustAlias(V1: MemSet->getDest(), V2: MemCpy->getDest()))
1297	return false;
1298
1299	// Don't perform the transform if src_size may be zero. In that case, the
1300	// transform is essentially a complex no-op and may lead to an infinite
1301	// loop if BasicAA is smart enough to understand that dst and dst + src_size
1302	// are still MustAlias after the transform.
1303	Value *SrcSize = MemCpy->getLength();
1304	if (!isKnownNonZero(V: SrcSize,
1305	Q: SimplifyQuery (MemCpy->getDataLayout(), DT, AC, MemCpy)))
1306	return false;
1307
1308	// Check that src and dst of the memcpy aren't the same. While memcpy
1309	// operands cannot partially overlap, exact equality is allowed.
1310	if (isModSet(MRI: BAA.getModRefInfo(I: MemCpy, OptLoc: MemoryLocation::getForSource(MTI: MemCpy))))
1311	return false;
1312
1313	// We know that dst up to src_size is not written. We now need to make sure
1314	// that dst up to dst_size is not accessed. (If we did not move the memset,
1315	// checking for reads would be sufficient.)
1316	if (accessedBetween(AA&: BAA, Loc: MemoryLocation::getForDest(MI: MemSet),
1317	Start: MSSA->getMemoryAccess(I: MemSet),
1318	End: MSSA->getMemoryAccess(I: MemCpy)))
1319	return false;
1320
1321	// Use the same i8 dest as the memcpy, killing the memset dest if different.*
1322	Value *Dest = MemCpy->getRawDest();
1323	Value *DestSize = MemSet->getLength();
1324
1325	if (mayBeVisibleThroughUnwinding(V: Dest, Start: MemSet, End: MemCpy))
1326	return false;
1327
1328	// If the sizes are the same, simply drop the memset instead of generating
1329	// a replacement with zero size.
1330	if (DestSize == SrcSize) {
1331	eraseInstruction(I: MemSet);
1332	return true;
1333	}
1334
1335	// By default, create an unaligned memset.
1336	Align Alignment = Align (`1`);
1337	// If Dest is aligned, and SrcSize is constant, use the minimum alignment
1338	// of the sum.
1339	const Align DestAlign = std::max(a: MemSet->getDestAlign().valueOrOne(),
1340	b: MemCpy->getDestAlign().valueOrOne());
1341	if (DestAlign > `1`)
1342	if (auto *SrcSizeC = dyn_cast<ConstantInt>(Val: SrcSize))
1343	Alignment = commonAlignment(A: DestAlign, Offset: SrcSizeC->getZExtValue());
1344
1345	IRBuilder<> Builder(MemCpy);
1346
1347	// Preserve the debug location of the old memset for the code emitted here
1348	// related to the new memset. This is correct according to the rules in
1349	// https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1350	// instruction location", given that we move the memset within the basic
1351	// block.
1352	assert(MemSet->getParent() == MemCpy->getParent() &&
1353	"Preserving debug location based on moving memset within BB.");
1354	Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1355
1356	// If the sizes have different types, zext the smaller one.
1357	if (DestSize->getType() != SrcSize->getType()) {
1358	if (DestSize->getType()->getIntegerBitWidth() >
1359	SrcSize->getType()->getIntegerBitWidth())
1360	SrcSize = Builder.CreateZExt(V: SrcSize, DestTy: DestSize->getType());
1361	else
1362	DestSize = Builder.CreateZExt(V: DestSize, DestTy: SrcSize->getType());
1363	}
1364
1365	Value *Ule = Builder.CreateICmpULE(LHS: DestSize, RHS: SrcSize);
1366	Value *SizeDiff = Builder.CreateSub(LHS: DestSize, RHS: SrcSize);
1367	Value *MemsetLen = Builder.CreateSelect(
1368	C: Ule, True: ConstantInt::getNullValue(Ty: DestSize->getType()), False: SizeDiff);
1369	Instruction *NewMemSet =
1370	Builder.CreateMemSet(Ptr: Builder.CreatePtrAdd(Ptr: Dest, Offset: SrcSize),
1371	Val: MemSet->getOperand(i_nocapture: `1`), Size: MemsetLen, Align: Alignment);
1372
1373	assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1374	"MemCpy must be a MemoryDef");
1375	// The new memset is inserted before the memcpy, and it is known that the
1376	// memcpy's defining access is the memset about to be removed.
1377	auto *LastDef =
1378	cast<MemoryDef>(Val: MSSAU->getMemorySSA()->getMemoryAccess(I: MemCpy));
1379	auto *NewAccess =
1380	MSSAU->createMemoryAccessBefore(I: NewMemSet, Definition: nullptr, InsertPt: LastDef);
1381	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/true);
1382
1383	eraseInstruction(I: MemSet);
1384	return true;
1385	}
1386
1387	/// Determine whether the instruction has undefined content for the given Size,
1388	/// either because it was freshly alloca'd or started its lifetime.
1389	static bool hasUndefContents(MemorySSA MSSA, BatchAAResults &AA, Value V,
1390	MemoryDef Def, Value Size) {
1391	if (MSSA->isLiveOnEntryDef(MA: Def))
1392	return isa<AllocaInst>(Val: getUnderlyingObject(V));
1393
1394	if (auto *II = dyn_cast_or_null<IntrinsicInst>(Val: Def->getMemoryInst())) {
1395	if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1396	auto *LTSize = cast<ConstantInt>(Val: II->getArgOperand(i: `0`));
1397
1398	if (auto *CSize = dyn_cast<ConstantInt>(Val: Size)) {
1399	if (AA.isMustAlias(V1: V, V2: II->getArgOperand(i: `1`)) &&
1400	LTSize->getZExtValue() >= CSize->getZExtValue())
1401	return true;
1402	}
1403
1404	// If the lifetime.start covers a whole alloca (as it almost always
1405	// does) and we're querying a pointer based on that alloca, then we know
1406	// the memory is definitely undef, regardless of how exactly we alias.
1407	// The size also doesn't matter, as an out-of-bounds access would be UB.
1408	if (auto *Alloca = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V))) {
1409	if (getUnderlyingObject(V: II->getArgOperand(i: `1`)) == Alloca) {
1410	const DataLayout &DL = Alloca->getDataLayout();
1411	if (std::optional<TypeSize> AllocaSize =
1412	Alloca->getAllocationSize(DL))
1413	if (*AllocaSize == LTSize->getValue())
1414	return true;
1415	}
1416	}
1417	}
1418	}
1419
1420	return false;
1421	}
1422
1423	/// Transform memcpy to memset when its source was just memset.
1424	/// In other words, turn:
1425	/// \code
1426	/// memset(dst1, c, dst1_size);
1427	/// memcpy(dst2, dst1, dst2_size);
1428	/// \endcode
1429	/// into:
1430	/// \code
1431	/// memset(dst1, c, dst1_size);
1432	/// memset(dst2, c, dst2_size);
1433	/// \endcode
1434	/// When dst2_size <= dst1_size.
1435	bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1436	MemSetInst *MemSet,
1437	BatchAAResults &BAA) {
1438	// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1439	// memcpying from the same address. Otherwise it is hard to reason about.
1440	if (!BAA.isMustAlias(V1: MemSet->getRawDest(), V2: MemCpy->getRawSource()))
1441	return false;
1442
1443	Value *MemSetSize = MemSet->getLength();
1444	Value *CopySize = MemCpy->getLength();
1445
1446	if (MemSetSize != CopySize) {
1447	// Make sure the memcpy doesn't read any more than what the memset wrote.
1448	// Don't worry about sizes larger than i64.
1449
1450	// A known memset size is required.
1451	auto *CMemSetSize = dyn_cast<ConstantInt>(Val: MemSetSize);
1452	if (!CMemSetSize)
1453	return false;
1454
1455	// A known memcpy size is also required.
1456	auto *CCopySize = dyn_cast<ConstantInt>(Val: CopySize);
1457	if (!CCopySize)
1458	return false;
1459	if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1460	// If the memcpy is larger than the memset, but the memory was undef prior
1461	// to the memset, we can just ignore the tail. Technically we're only
1462	// interested in the bytes from MemSetSize..CopySize here, but as we can't
1463	// easily represent this location, we use the full 0..CopySize range.
1464	MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MTI: MemCpy);
1465	bool CanReduceSize = false;
1466	MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(I: MemSet);
1467	MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1468	MemSetAccess->getDefiningAccess(), MemCpyLoc, AA&: BAA);
1469	if (auto *MD = dyn_cast<MemoryDef>(Val: Clobber))
1470	if (hasUndefContents(MSSA, AA&: BAA, V: MemCpy->getSource(), Def: MD, Size: CopySize))
1471	CanReduceSize = true;
1472
1473	if (!CanReduceSize)
1474	return false;
1475	CopySize = MemSetSize;
1476	}
1477	}
1478
1479	IRBuilder<> Builder(MemCpy);
1480	Instruction *NewM =
1481	Builder.CreateMemSet(Ptr: MemCpy->getRawDest(), Val: MemSet->getOperand(i_nocapture: `1`),
1482	Size: CopySize, Align: MemCpy->getDestAlign());
1483	auto *LastDef =
1484	cast<MemoryDef>(Val: MSSAU->getMemorySSA()->getMemoryAccess(I: MemCpy));
1485	auto NewAccess = MSSAU->createMemoryAccessAfter(I: NewM, Definition: nullptr*, InsertPt: LastDef);
1486	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/true);
1487
1488	return true;
1489	}
1490
1491	// Attempts to optimize the pattern whereby memory is copied from an alloca to
1492	// another alloca, where the two allocas don't have conflicting mod/ref. If
1493	// successful, the two allocas can be merged into one and the transfer can be
1494	// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1495	// move operations in that language.
1496	//
1497	// Once we determine that the optimization is safe to perform, we replace all
1498	// uses of the destination alloca with the source alloca. We also "shrink wrap"
1499	// the lifetime markers of the single merged alloca to before the first use
1500	// and after the last use. Note that the "shrink wrapping" procedure is a safe
1501	// transformation only because we restrict the scope of this optimization to
1502	// allocas that aren't captured.
1503	bool MemCpyOptPass::performStackMoveOptzn(Instruction Load, Instruction Store,
1504	AllocaInst *DestAlloca,
1505	AllocaInst *SrcAlloca, TypeSize Size,
1506	BatchAAResults &BAA) {
1507	LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1508	<< *Store << "\n");
1509
1510	// Make sure the two allocas are in the same address space.
1511	if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1512	LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1513	return false;
1514	}
1515
1516	// Check that copy is full with static size.
1517	const DataLayout &DL = DestAlloca->getDataLayout();
1518	std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1519	if (!SrcSize \|\| Size != *SrcSize) {
1520	LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1521	return false;
1522	}
1523	std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1524	if (!DestSize \|\| Size != *DestSize) {
1525	LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1526	return false;
1527	}
1528
1529	if (!SrcAlloca->isStaticAlloca() \|\| !DestAlloca->isStaticAlloca())
1530	return false;
1531
1532	// Check that src and dest are never captured, unescaped allocas. Also
1533	// find the nearest common dominator and postdominator for all users in
1534	// order to shrink wrap the lifetimes, and instructions with noalias metadata
1535	// to remove them.
1536
1537	SmallVector<Instruction *, `4`> LifetimeMarkers;
1538	SmallSet<Instruction *, `4`> NoAliasInstrs;
1539	bool SrcNotDom = false;
1540
1541	// Recursively track the user and check whether modified alias exist.
1542	auto IsDereferenceableOrNull = [](Value V, const* DataLayout &DL) -> bool {
1543	bool CanBeNull, CanBeFreed;
1544	return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1545	};
1546
1547	auto CaptureTrackingWithModRef =
1548	[&](Instruction *AI,
1549	function_ref<bool(Instruction )> ModRefCallback) -> bool* {
1550	SmallVector<Instruction *, `8`> Worklist;
1551	Worklist.push_back(Elt: AI);
1552	unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1553	Worklist.reserve(N: MaxUsesToExplore);
1554	SmallSet<const Use *, `20`> Visited;
1555	while (!Worklist.empty()) {
1556	Instruction *I = Worklist.back();
1557	Worklist.pop_back();
1558	for (const Use &U : I->uses()) {
1559	auto *UI = cast<Instruction>(Val: U.getUser());
1560	// If any use that isn't dominated by SrcAlloca exists, we move src
1561	// alloca to the entry before the transformation.
1562	if (!DT->dominates(Def: SrcAlloca, User: UI))
1563	SrcNotDom = true;
1564
1565	if (Visited.size() >= MaxUsesToExplore) {
1566	LLVM_DEBUG(
1567	dbgs()
1568	<< "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1569	return false;
1570	}
1571	if (!Visited.insert(Ptr: &U).second)
1572	continue;
1573	switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1574	case UseCaptureKind::MAY_CAPTURE:
1575	return false;
1576	case UseCaptureKind::PASSTHROUGH:
1577	// Instructions cannot have non-instruction users.
1578	Worklist.push_back(Elt: UI);
1579	continue;
1580	case UseCaptureKind::NO_CAPTURE: {
1581	if (UI->isLifetimeStartOrEnd()) {
1582	// We note the locations of these intrinsic calls so that we can
1583	// delete them later if the optimization succeeds, this is safe
1584	// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1585	// practically fill all the bytes of the alloca with an undefined
1586	// value, although conceptually marked as alive/dead.
1587	int64_t Size = cast<ConstantInt>(Val: UI->getOperand(i: `0`))->getSExtValue();
1588	if (Size < `0` \|\| Size == DestSize) {
1589	LifetimeMarkers.push_back(Elt: UI);
1590	continue;
1591	}
1592	}
1593	if (UI->hasMetadata(KindID: LLVMContext::MD_noalias))
1594	NoAliasInstrs.insert(Ptr: UI);
1595	if (!ModRefCallback (UI))
1596	return false;
1597	}
1598	}
1599	}
1600	}
1601	return true;
1602	};
1603
1604	// Check that dest has no Mod/Ref, from the alloca to the Store, except full
1605	// size lifetime intrinsics. And collect modref inst for the reachability
1606	// check.
1607	ModRefInfo DestModRef = ModRefInfo::NoModRef;
1608	MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Value: Size));
1609	SmallVector<BasicBlock *, `8`> ReachabilityWorklist;
1610	auto DestModRefCallback = [&](Instruction UI) -> bool* {
1611	// We don't care about the store itself.
1612	if (UI == Store)
1613	return true;
1614	ModRefInfo Res = BAA.getModRefInfo(I: UI, OptLoc: DestLoc);
1615	DestModRef \|= Res;
1616	if (isModOrRefSet(MRI: Res)) {
1617	// Instructions reachability checks.
1618	// FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1619	// lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1620	if (UI->getParent() == Store->getParent()) {
1621	// The same block case is special because it's the only time we're
1622	// looking within a single block to see which instruction comes first.
1623	// Once we start looking at multiple blocks, the first instruction of
1624	// the block is reachable, so we only need to determine reachability
1625	// between whole blocks.
1626	BasicBlock *BB = UI->getParent();
1627
1628	// If A comes before B, then B is definitively reachable from A.
1629	if (UI->comesBefore(Other: Store))
1630	return false;
1631
1632	// If the user's parent block is entry, no predecessor exists.
1633	if (BB->isEntryBlock())
1634	return true;
1635
1636	// Otherwise, continue doing the normal per-BB CFG walk.
1637	ReachabilityWorklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
1638	} else {
1639	ReachabilityWorklist.push_back(Elt: UI->getParent());
1640	}
1641	}
1642	return true;
1643	};
1644
1645	if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1646	return false;
1647	// Bailout if Dest may have any ModRef before Store.
1648	if (!ReachabilityWorklist.empty() &&
1649	isPotentiallyReachableFromMany(Worklist&: ReachabilityWorklist, StopBB: Store->getParent(),
1650	ExclusionSet: nullptr, DT, LI: nullptr))
1651	return false;
1652
1653	// Check that, from after the Load to the end of the BB,
1654	// - if the dest has any Mod, src has no Ref, and
1655	// - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1656	MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Value: Size));
1657
1658	auto SrcModRefCallback = [&](Instruction UI) -> bool* {
1659	// Any ModRef post-dominated by Load doesn't matter, also Load and Store
1660	// themselves can be ignored.
1661	if (PDT->dominates(I1: Load, I2: UI) \|\| UI == Load \|\| UI == Store)
1662	return true;
1663	ModRefInfo Res = BAA.getModRefInfo(I: UI, OptLoc: SrcLoc);
1664	if ((isModSet(MRI: DestModRef) && isRefSet(MRI: Res)) \|\|
1665	(isRefSet(MRI: DestModRef) && isModSet(MRI: Res)))
1666	return false;
1667
1668	return true;
1669	};
1670
1671	if (!CaptureTrackingWithModRef (SrcAlloca, SrcModRefCallback))
1672	return false;
1673
1674	// We can do the transformation. First, move the SrcAlloca to the start of the
1675	// BB.
1676	if (SrcNotDom)
1677	SrcAlloca->moveBefore(BB&: *SrcAlloca->getParent(),
1678	I: SrcAlloca->getParent()->getFirstInsertionPt());
1679	// Align the allocas appropriately.
1680	SrcAlloca->setAlignment(
1681	std::max(a: SrcAlloca->getAlign(), b: DestAlloca->getAlign()));
1682
1683	// Merge the two allocas.
1684	DestAlloca->replaceAllUsesWith(V: SrcAlloca);
1685	eraseInstruction(I: DestAlloca);
1686
1687	// Drop metadata on the source alloca.
1688	SrcAlloca->dropUnknownNonDebugMetadata();
1689
1690	// TODO: Reconstruct merged lifetime markers.
1691	// Remove all other lifetime markers. if the original lifetime intrinsics
1692	// exists.
1693	if (!LifetimeMarkers.empty()) {
1694	for (Instruction *I : LifetimeMarkers)
1695	eraseInstruction(I);
1696	}
1697
1698	// As this transformation can cause memory accesses that didn't previously
1699	// alias to begin to alias one another, we remove !noalias metadata from any
1700	// uses of either alloca. This is conservative, but more precision doesn't
1701	// seem worthwhile right now.
1702	for (Instruction *I : NoAliasInstrs)
1703	I->setMetadata(KindID: LLVMContext::MD_noalias, Node: nullptr);
1704
1705	LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1706	NumStackMove ++;
1707	return true;
1708	}
1709
1710	static bool isZeroSize(Value *Size) {
1711	if (auto *I = dyn_cast<Instruction>(Val: Size))
1712	if (auto *Res = simplifyInstruction(I, Q: I->getDataLayout()))
1713	Size = Res;
1714	// Treat undef/poison size like zero.
1715	if (auto *C = dyn_cast<Constant>(Val: Size))
1716	return isa<UndefValue>(Val: C) \|\| C->isNullValue();
1717	return false;
1718	}
1719
1720	/// Perform simplification of memcpy's. If we have memcpy A
1721	/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1722	/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1723	/// circumstances). This allows later passes to remove the first memcpy
1724	/// altogether.
1725	bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1726	// We can only optimize non-volatile memcpy's.
1727	if (M->isVolatile())
1728	return false;
1729
1730	// If the source and destination of the memcpy are the same, then zap it.
1731	if (M->getSource() == M->getDest()) {
1732	++BBI;
1733	eraseInstruction(I: M);
1734	return true;
1735	}
1736
1737	// If the size is zero, remove the memcpy.
1738	if (isZeroSize(Size: M->getLength())) {
1739	++BBI;
1740	eraseInstruction(I: M);
1741	return true;
1742	}
1743
1744	MemoryUseOrDef *MA = MSSA->getMemoryAccess(I: M);
1745	if (!MA)
1746	// Degenerate case: memcpy marked as not accessing memory.
1747	return false;
1748
1749	// If copying from a constant, try to turn the memcpy into a memset.
1750	if (auto *GV = dyn_cast<GlobalVariable>(Val: M->getSource()))
1751	if (GV->isConstant() && GV->hasDefinitiveInitializer())
1752	if (Value *ByteVal = isBytewiseValue(V: GV->getInitializer(),
1753	DL: M->getDataLayout())) {
1754	IRBuilder<> Builder(M);
1755	Instruction *NewM = Builder.CreateMemSet(
1756	Ptr: M->getRawDest(), Val: ByteVal, Size: M->getLength(), Align: M->getDestAlign(), isVolatile: false);
1757	auto *LastDef = cast<MemoryDef>(Val: MA);
1758	auto *NewAccess =
1759	MSSAU->createMemoryAccessAfter(I: NewM, Definition: nullptr, InsertPt: LastDef);
1760	MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewAccess), /RenameUses=/true);
1761
1762	eraseInstruction(I: M);
1763	++NumCpyToSet;
1764	return true;
1765	}
1766
1767	BatchAAResults BAA(*AA);
1768	// FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1769	MemoryAccess *AnyClobber = MA->getDefiningAccess();
1770	MemoryLocation DestLoc = MemoryLocation::getForDest(MI: M);
1771	const MemoryAccess *DestClobber =
1772	MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, AA&: BAA);
1773
1774	// Try to turn a partially redundant memset + memcpy into
1775	// smaller memset + memcpy. We don't need the memcpy size for this.
1776	// The memcpy must post-dom the memset, so limit this to the same basic
1777	// block. A non-local generalization is likely not worthwhile.
1778	if (auto *MD = dyn_cast<MemoryDef>(Val: DestClobber))
1779	if (auto *MDep = dyn_cast_or_null<MemSetInst>(Val: MD->getMemoryInst()))
1780	if (DestClobber->getBlock() == M->getParent())
1781	if (processMemSetMemCpyDependence(MemCpy: M, MemSet: MDep, BAA))
1782	return true;
1783
1784	MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1785	AnyClobber, MemoryLocation::getForSource(MTI: M), AA&: BAA);
1786
1787	// There are five possible optimizations we can do for memcpy:
1788	// a) memcpy-memcpy xform which exposes redundance for DSE.
1789	// b) call-memcpy xform for return slot optimization.
1790	// c) memcpy from freshly alloca'd space or space that has just started
1791	// its lifetime copies undefined data, and we can therefore eliminate
1792	// the memcpy in favor of the data that was already at the destination.
1793	// d) memcpy from a just-memset'd source can be turned into memset.
1794	// e) elimination of memcpy via stack-move optimization.
1795	if (auto *MD = dyn_cast<MemoryDef>(Val: SrcClobber)) {
1796	if (Instruction *MI = MD->getMemoryInst()) {
1797	if (auto *CopySize = dyn_cast<ConstantInt>(Val: M->getLength())) {
1798	if (auto *C = dyn_cast<CallInst>(Val: MI)) {
1799	if (performCallSlotOptzn(cpyLoad: M, cpyStore: M, cpyDest: M->getDest(), cpySrc: M->getSource(),
1800	cpySize: TypeSize::getFixed(ExactSize: CopySize->getZExtValue()),
1801	cpyDestAlign: M->getDestAlign().valueOrOne(), BAA,
1802	GetC: [C]() -> CallInst * { return C; })) {
1803	LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1804	<< " call: " << *C << "\n"
1805	<< " memcpy: " << *M << "\n");
1806	eraseInstruction(I: M);
1807	++NumMemCpyInstr;
1808	return true;
1809	}
1810	}
1811	}
1812	if (auto *MDep = dyn_cast<MemCpyInst>(Val: MI))
1813	if (processMemCpyMemCpyDependence(M, MDep, BAA))
1814	return true;
1815	if (auto *MDep = dyn_cast<MemSetInst>(Val: MI)) {
1816	if (performMemCpyToMemSetOptzn(MemCpy: M, MemSet: MDep, BAA)) {
1817	LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1818	eraseInstruction(I: M);
1819	++NumCpyToSet;
1820	return true;
1821	}
1822	}
1823	}
1824
1825	if (hasUndefContents(MSSA, AA&: BAA, V: M->getSource(), Def: MD, Size: M->getLength())) {
1826	LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1827	eraseInstruction(I: M);
1828	++NumMemCpyInstr;
1829	return true;
1830	}
1831	}
1832
1833	// If the transfer is from a stack slot to a stack slot, then we may be able
1834	// to perform the stack-move optimization. See the comments in
1835	// performStackMoveOptzn() for more details.
1836	auto *DestAlloca = dyn_cast<AllocaInst>(Val: M->getDest());
1837	if (!DestAlloca)
1838	return false;
1839	auto *SrcAlloca = dyn_cast<AllocaInst>(Val: M->getSource());
1840	if (!SrcAlloca)
1841	return false;
1842	ConstantInt *Len = dyn_cast<ConstantInt>(Val: M->getLength());
1843	if (Len == nullptr)
1844	return false;
1845	if (performStackMoveOptzn(Load: M, Store: M, DestAlloca, SrcAlloca,
1846	Size: TypeSize::getFixed(ExactSize: Len->getZExtValue()), BAA)) {
1847	// Avoid invalidating the iterator.
1848	BBI = M->getNextNonDebugInstruction()->getIterator();
1849	eraseInstruction(I: M);
1850	++NumMemCpyInstr;
1851	return true;
1852	}
1853
1854	return false;
1855	}
1856
1857	/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1858	/// not to alias.
1859	bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1860	// See if the source could be modified by this memmove potentially.
1861	if (isModSet(MRI: AA->getModRefInfo(I: M, OptLoc: MemoryLocation::getForSource(MTI: M))))
1862	return false;
1863
1864	LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1865	<< "\n");
1866
1867	// If not, then we know we can transform this.
1868	Type *ArgTys[`3`] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1869	M->getLength()->getType()};
1870	M->setCalledFunction(
1871	Intrinsic::getDeclaration(M: M->getModule(), id: Intrinsic::memcpy, Tys: ArgTys));
1872
1873	// For MemorySSA nothing really changes (except that memcpy may imply stricter
1874	// aliasing guarantees).
1875
1876	++NumMoveToCpy;
1877	return true;
1878	}
1879
1880	/// This is called on every byval argument in call sites.
1881	bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1882	const DataLayout &DL = CB.getDataLayout();
1883	// Find out what feeds this byval argument.
1884	Value *ByValArg = CB.getArgOperand(i: ArgNo);
1885	Type *ByValTy = CB.getParamByValType(ArgNo);
1886	TypeSize ByValSize = DL.getTypeAllocSize(Ty: ByValTy);
1887	MemoryLocation Loc(ByValArg, LocationSize::precise(Value: ByValSize));
1888	MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(I: &CB);
1889	if (!CallAccess)
1890	return false;
1891	MemCpyInst MDep = nullptr*;
1892	BatchAAResults BAA(*AA);
1893	MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1894	CallAccess->getDefiningAccess(), Loc, AA&: BAA);
1895	if (auto *MD = dyn_cast<MemoryDef>(Val: Clobber))
1896	MDep = dyn_cast_or_null<MemCpyInst>(Val: MD->getMemoryInst());
1897
1898	// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1899	// a memcpy, see if we can byval from the source of the memcpy instead of the
1900	// result.
1901	if (!MDep \|\| MDep->isVolatile() \|\|
1902	ByValArg->stripPointerCasts() != MDep->getDest())
1903	return false;
1904
1905	// The length of the memcpy must be larger or equal to the size of the byval.
1906	auto *C1 = dyn_cast<ConstantInt>(Val: MDep->getLength());
1907	if (!C1 \|\| !TypeSize::isKnownGE(
1908	LHS: TypeSize::getFixed(ExactSize: C1->getValue().getZExtValue()), RHS: ByValSize))
1909	return false;
1910
1911	// Get the alignment of the byval. If the call doesn't specify the alignment,
1912	// then it is some target specific value that we can't know.
1913	MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1914	if (!ByValAlign)
1915	return false;
1916
1917	// If it is greater than the memcpy, then we check to see if we can force the
1918	// source of the memcpy to the alignment we need. If we fail, we bail out.
1919	MaybeAlign MemDepAlign = MDep->getSourceAlign();
1920	if ((!MemDepAlign \|\| MemDepAlign < ByValAlign) &&
1921	getOrEnforceKnownAlignment(V: MDep->getSource(), PrefAlign: ByValAlign, DL, CxtI: &CB, AC,
1922	DT) < *ByValAlign)
1923	return false;
1924
1925	// The type of the memcpy source must match the byval argument
1926	if (MDep->getSource()->getType() != ByValArg->getType())
1927	return false;
1928
1929	// Verify that the copied-from memory doesn't change in between the memcpy and
1930	// the byval call.
1931	// memcpy(a <- b)
1932	// b = 42;*
1933	// foo(a)*
1934	// It would be invalid to transform the second memcpy into foo(b).*
1935	if (writtenBetween(MSSA, AA&: BAA, Loc: MemoryLocation::getForSource(MTI: MDep),
1936	Start: MSSA->getMemoryAccess(I: MDep), End: CallAccess))
1937	return false;
1938
1939	LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1940	<< " " << *MDep << "\n"
1941	<< " " << CB << "\n");
1942
1943	// Otherwise we're good! Update the byval argument.
1944	combineAAMetadata(ReplInst: &CB, I: MDep);
1945	CB.setArgOperand(i: ArgNo, v: MDep->getSource());
1946	++NumMemCpyInstr;
1947	return true;
1948	}
1949
1950	/// This is called on memcpy dest pointer arguments attributed as immutable
1951	/// during call. Try to use memcpy source directly if all of the following
1952	/// conditions are satisfied.
1953	/// 1. The memcpy dst is neither modified during the call nor captured by the
1954	/// call. (if readonly, noalias, nocapture attributes on call-site.)
1955	/// 2. The memcpy dst is an alloca with known alignment & size.
1956	/// 2-1. The memcpy length == the alloca size which ensures that the new
1957	/// pointer is dereferenceable for the required range
1958	/// 2-2. The src pointer has alignment >= the alloca alignment or can be
1959	/// enforced so.
1960	/// 3. The memcpy dst and src is not modified between the memcpy and the call.
1961	/// (if MSSA clobber check is safe.)
1962	/// 4. The memcpy src is not modified during the call. (ModRef check shows no
1963	/// Mod.)
1964	bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
1965	// 1. Ensure passed argument is immutable during call.
1966	if (!(CB.paramHasAttr(ArgNo, Kind: Attribute::NoAlias) &&
1967	CB.paramHasAttr(ArgNo, Kind: Attribute::NoCapture)))
1968	return false;
1969	const DataLayout &DL = CB.getDataLayout();
1970	Value *ImmutArg = CB.getArgOperand(i: ArgNo);
1971
1972	// 2. Check that arg is alloca
1973	// TODO: Even if the arg gets back to branches, we can remove memcpy if all
1974	// the alloca alignments can be enforced to source alignment.
1975	auto *AI = dyn_cast<AllocaInst>(Val: ImmutArg->stripPointerCasts());
1976	if (!AI)
1977	return false;
1978
1979	std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
1980	// Can't handle unknown size alloca.
1981	// (e.g. Variable Length Array, Scalable Vector)
1982	if (!AllocaSize \|\| AllocaSize ->isScalable())
1983	return false;
1984	MemoryLocation Loc(ImmutArg, LocationSize::precise(Value: *AllocaSize));
1985	MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(I: &CB);
1986	if (!CallAccess)
1987	return false;
1988
1989	MemCpyInst MDep = nullptr*;
1990	BatchAAResults BAA(*AA);
1991	MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1992	CallAccess->getDefiningAccess(), Loc, AA&: BAA);
1993	if (auto *MD = dyn_cast<MemoryDef>(Val: Clobber))
1994	MDep = dyn_cast_or_null<MemCpyInst>(Val: MD->getMemoryInst());
1995
1996	// If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
1997	// a memcpy, check that the arg equals the memcpy dest.
1998	if (!MDep \|\| MDep->isVolatile() \|\| AI != MDep->getDest())
1999	return false;
2000
2001	// The type of the memcpy source must match the immut argument
2002	if (MDep->getSource()->getType() != ImmutArg->getType())
2003	return false;
2004
2005	// 2-1. The length of the memcpy must be equal to the size of the alloca.
2006	auto *MDepLen = dyn_cast<ConstantInt>(Val: MDep->getLength());
2007	if (!MDepLen \|\| AllocaSize != MDepLen->getValue())
2008	return false;
2009
2010	// 2-2. the memcpy source align must be larger than or equal the alloca's
2011	// align. If not so, we check to see if we can force the source of the memcpy
2012	// to the alignment we need. If we fail, we bail out.
2013	Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2014	Align AllocaAlign = AI->getAlign();
2015	if (MemDepAlign < AllocaAlign &&
2016	getOrEnforceKnownAlignment(V: MDep->getSource(), PrefAlign: AllocaAlign, DL, CxtI: &CB, AC,
2017	DT) < AllocaAlign)
2018	return false;
2019
2020	// 3. Verify that the source doesn't change in between the memcpy and
2021	// the call.
2022	// memcpy(a <- b)
2023	// b = 42;*
2024	// foo(a)*
2025	// It would be invalid to transform the second memcpy into foo(b).*
2026	if (writtenBetween(MSSA, AA&: BAA, Loc: MemoryLocation::getForSource(MTI: MDep),
2027	Start: MSSA->getMemoryAccess(I: MDep), End: CallAccess))
2028	return false;
2029
2030	// 4. The memcpy src must not be modified during the call.
2031	if (isModSet(MRI: AA->getModRefInfo(I: &CB, OptLoc: MemoryLocation::getForSource(MTI: MDep))))
2032	return false;
2033
2034	LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2035	<< " " << *MDep << "\n"
2036	<< " " << CB << "\n");
2037
2038	// Otherwise we're good! Update the immut argument.
2039	combineAAMetadata(ReplInst: &CB, I: MDep);
2040	CB.setArgOperand(i: ArgNo, v: MDep->getSource());
2041	++NumMemCpyInstr;
2042	return true;
2043	}
2044
2045	/// Executes one iteration of MemCpyOptPass.
2046	bool MemCpyOptPass::iterateOnFunction(Function &F) {
2047	bool MadeChange = false;
2048
2049	// Walk all instruction in the function.
2050	for (BasicBlock &BB : F) {
2051	// Skip unreachable blocks. For example processStore assumes that an
2052	// instruction in a BB can't be dominated by a later instruction in the
2053	// same BB (which is a scenario that can happen for an unreachable BB that
2054	// has itself as a predecessor).
2055	if (!DT->isReachableFromEntry(A: &BB))
2056	continue;
2057
2058	for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2059	// Avoid invalidating the iterator.
2060	Instruction I = &BI ++;
2061
2062	bool RepeatInstruction = false;
2063
2064	if (auto *SI = dyn_cast<StoreInst>(Val: I))
2065	MadeChange \|= processStore(SI, BBI&: BI);
2066	else if (auto *M = dyn_cast<MemSetInst>(Val: I))
2067	RepeatInstruction = processMemSet(MSI: M, BBI&: BI);
2068	else if (auto *M = dyn_cast<MemCpyInst>(Val: I))
2069	RepeatInstruction = processMemCpy(M, BBI&: BI);
2070	else if (auto *M = dyn_cast<MemMoveInst>(Val: I))
2071	RepeatInstruction = processMemMove(M);
2072	else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
2073	for (unsigned i = `0`, e = CB->arg_size(); i != e; ++i) {
2074	if (CB->isByValArgument(ArgNo: i))
2075	MadeChange \|= processByValArgument(CB&: *CB, ArgNo: i);
2076	else if (CB->onlyReadsMemory(OpNo: i))
2077	MadeChange \|= processImmutArgument(CB&: *CB, ArgNo: i);
2078	}
2079	}
2080
2081	// Reprocess the instruction if desired.
2082	if (RepeatInstruction) {
2083	if (BI != BB.begin())
2084	--BI;
2085	MadeChange = true;
2086	}
2087	}
2088	}
2089
2090	return MadeChange;
2091	}
2092
2093	PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
2094	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2095	auto *AA = &AM.getResult<AAManager>(IR&: F);
2096	auto *AC = &AM.getResult<AssumptionAnalysis>(IR&: F);
2097	auto *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
2098	auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2099	auto *MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F);
2100
2101	bool MadeChange = runImpl(F, TLI: &TLI, AA, AC, DT, PDT, MSSA: &MSSA->getMSSA());
2102	if (!MadeChange)
2103	return PreservedAnalyses::all();
2104
2105	PreservedAnalyses PA;
2106	PA.preserveSet<CFGAnalyses>();
2107	PA.preserve<MemorySSAAnalysis>();
2108	return PA;
2109	}
2110
2111	bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
2112	AliasAnalysis AA_, AssumptionCache AC_,
2113	DominatorTree DT_, PostDominatorTree PDT_,
2114	MemorySSA *MSSA_) {
2115	bool MadeChange = false;
2116	TLI = TLI_;
2117	AA = AA_;
2118	AC = AC_;
2119	DT = DT_;
2120	PDT = PDT_;
2121	MSSA = MSSA_;
2122	MemorySSAUpdater MSSAU_(MSSA_);
2123	MSSAU = &MSSAU_;
2124
2125	while (true) {
2126	if (!iterateOnFunction(F))
2127	break;
2128	MadeChange = true;
2129	}
2130
2131	if (VerifyMemorySSA)
2132	MSSA_->verifyMemorySSA();
2133
2134	return MadeChange;
2135	}
2136

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp