InstCombineLoadStoreAlloca.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp]

1	//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visit functions for load, store and alloca.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/MapVector.h"
15	#include "llvm/ADT/SetOperations.h"
16	#include "llvm/ADT/SmallString.h"
17	#include "llvm/ADT/Statistic.h"
18	#include "llvm/Analysis/AliasAnalysis.h"
19	#include "llvm/Analysis/Loads.h"
20	#include "llvm/IR/DataLayout.h"
21	#include "llvm/IR/IntrinsicInst.h"
22	#include "llvm/IR/LLVMContext.h"
23	#include "llvm/IR/PatternMatch.h"
24	#include "llvm/Transforms/InstCombine/InstCombiner.h"
25	#include "llvm/Transforms/Utils/Local.h"
26	using namespace llvm;
27	using namespace PatternMatch;
28
29	#define DEBUG_TYPE "instcombine"
30
31	STATISTIC(NumDeadStore, "Number of dead stores eliminated");
32	STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
33
34	static cl::opt<unsigned> MaxCopiedFromConstantUsers(
35	"instcombine-max-copied-from-constant-users", cl::init(Val: `300`),
36	cl::desc ("Maximum users to visit in copy from constant transform"),
37	cl::Hidden);
38
39	/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
40	/// pointer to an alloca. Ignore any reads of the pointer, return false if we
41	/// see any stores or other unknown uses. If we see pointer arithmetic, keep
42	/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
43	/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
44	/// the alloca, and if the source pointer is a pointer to a constant memory
45	/// location, we can optimize this.
46	static bool
47	isOnlyCopiedFromConstantMemory(AAResults AA, AllocaInst V,
48	MemTransferInst *&TheCopy,
49	SmallVectorImpl<Instruction *> &ToDelete) {
50	// We track lifetime intrinsics as we encounter them. If we decide to go
51	// ahead and replace the value with the memory location, this lets the caller
52	// quickly eliminate the markers.
53
54	using ValueAndIsOffset = PointerIntPair<Value , `1`, bool*>;
55	SmallVector<ValueAndIsOffset, `32`> Worklist;
56	SmallPtrSet<ValueAndIsOffset, `32`> Visited;
57	Worklist.emplace_back(Args&: V, Args: false);
58	while (!Worklist.empty()) {
59	ValueAndIsOffset Elem = Worklist.pop_back_val();
60	if (!Visited.insert(Ptr: Elem).second)
61	continue;
62	if (Visited.size() > MaxCopiedFromConstantUsers)
63	return false;
64
65	const auto [Value, IsOffset] = Elem;
66	for (auto &U : Value->uses()) {
67	auto *I = cast<Instruction>(Val: U.getUser());
68
69	if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
70	// Ignore non-volatile loads, they are always ok.
71	if (!LI->isSimple()) return false;
72	continue;
73	}
74
75	if (isa<PHINode, SelectInst>(Val: I)) {
76	// We set IsOffset=true, to forbid the memcpy from occurring after the
77	// phi: If one of the phi operands is not based on the alloca, we
78	// would incorrectly omit a write.
79	Worklist.emplace_back(Args&: I, Args: true);
80	continue;
81	}
82	if (isa<BitCastInst, AddrSpaceCastInst>(Val: I)) {
83	// If uses of the bitcast are ok, we are ok.
84	Worklist.emplace_back(Args&: I, Args: IsOffset);
85	continue;
86	}
87	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
88	// If the GEP has all zero indices, it doesn't offset the pointer. If it
89	// doesn't, it does.
90	Worklist.emplace_back(Args&: I, Args: IsOffset \|\| !GEP->hasAllZeroIndices());
91	continue;
92	}
93
94	if (auto *Call = dyn_cast<CallBase>(Val: I)) {
95	// If this is the function being called then we treat it like a load and
96	// ignore it.
97	if (Call->isCallee(U: &U))
98	continue;
99
100	unsigned DataOpNo = Call->getDataOperandNo(U: &U);
101	bool IsArgOperand = Call->isArgOperand(U: &U);
102
103	// Inalloca arguments are clobbered by the call.
104	if (IsArgOperand && Call->isInAllocaArgument(ArgNo: DataOpNo))
105	return false;
106
107	// If this call site doesn't modify the memory, then we know it is just
108	// a load (but one that potentially returns the value itself), so we can
109	// ignore it if we know that the value isn't captured.
110	bool NoCapture = Call->doesNotCapture(OpNo: DataOpNo);
111	if ((Call->onlyReadsMemory() && (Call->use_empty() \|\| NoCapture)) \|\|
112	(Call->onlyReadsMemory(OpNo: DataOpNo) && NoCapture))
113	continue;
114	}
115
116	// Lifetime intrinsics can be handled by the caller.
117	if (I->isLifetimeStartOrEnd()) {
118	assert(I->use_empty() && "Lifetime markers have no result to use!");
119	ToDelete.push_back(Elt: I);
120	continue;
121	}
122
123	// If this is isn't our memcpy/memmove, reject it as something we can't
124	// handle.
125	MemTransferInst *MI = dyn_cast<MemTransferInst>(Val: I);
126	if (!MI)
127	return false;
128
129	// If the transfer is volatile, reject it.
130	if (MI->isVolatile())
131	return false;
132
133	// If the transfer is using the alloca as a source of the transfer, then
134	// ignore it since it is a load (unless the transfer is volatile).
135	if (U.getOperandNo() == `1`)
136	continue;
137
138	// If we already have seen a copy, reject the second one.
139	if (TheCopy) return false;
140
141	// If the pointer has been offset from the start of the alloca, we can't
142	// safely handle this.
143	if (IsOffset) return false;
144
145	// If the memintrinsic isn't using the alloca as the dest, reject it.
146	if (U.getOperandNo() != `0`) return false;
147
148	// If the source of the memcpy/move is not constant, reject it.
149	if (isModSet(MRI: AA->getModRefInfoMask(P: MI->getSource())))
150	return false;
151
152	// Otherwise, the transform is safe. Remember the copy instruction.
153	TheCopy = MI;
154	}
155	}
156	return true;
157	}
158
159	/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
160	/// modified by a copy from a constant memory location. If we can prove this, we
161	/// can replace any uses of the alloca with uses of the memory location
162	/// directly.
163	static MemTransferInst *
164	isOnlyCopiedFromConstantMemory(AAResults *AA,
165	AllocaInst *AI,
166	SmallVectorImpl<Instruction *> &ToDelete) {
167	MemTransferInst TheCopy = nullptr*;
168	if (isOnlyCopiedFromConstantMemory(AA, V: AI, TheCopy, ToDelete))
169	return TheCopy;
170	return nullptr;
171	}
172
173	/// Returns true if V is dereferenceable for size of alloca.
174	static bool isDereferenceableForAllocaSize(const Value V, const* AllocaInst *AI,
175	const DataLayout &DL) {
176	if (AI->isArrayAllocation())
177	return false;
178	uint64_t AllocaSize = DL.getTypeStoreSize(Ty: AI->getAllocatedType());
179	if (!AllocaSize)
180	return false;
181	return isDereferenceableAndAlignedPointer(V, Alignment: AI->getAlign(),
182	Size: APInt (`64`, AllocaSize), DL);
183	}
184
185	static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
186	AllocaInst &AI, DominatorTree &DT) {
187	// Check for array size of 1 (scalar allocation).
188	if (!AI.isArrayAllocation()) {
189	// i32 1 is the canonical array size for scalar allocations.
190	if (AI.getArraySize()->getType()->isIntegerTy(Bitwidth: `32`))
191	return nullptr;
192
193	// Canonicalize it.
194	return IC.replaceOperand(I&: AI, OpNum: `0`, V: IC.Builder.getInt32(C: `1`));
195	}
196
197	// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
198	if (const ConstantInt *C = dyn_cast<ConstantInt>(Val: AI.getArraySize())) {
199	if (C->getValue().getActiveBits() <= `64`) {
200	Type *NewTy = ArrayType::get(ElementType: AI.getAllocatedType(), NumElements: C->getZExtValue());
201	AllocaInst *New = IC.Builder.CreateAlloca(Ty: NewTy, AddrSpace: AI.getAddressSpace(),
202	ArraySize: nullptr, Name: AI.getName());
203	New->setAlignment(AI.getAlign());
204	New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
205
206	replaceAllDbgUsesWith(From&: AI, To&: New, DomPoint&: New, DT);
207	return IC.replaceInstUsesWith(I&: AI, V: New);
208	}
209	}
210
211	if (isa<UndefValue>(Val: AI.getArraySize()))
212	return IC.replaceInstUsesWith(I&: AI, V: Constant::getNullValue(Ty: AI.getType()));
213
214	// Ensure that the alloca array size argument has type equal to the offset
215	// size of the alloca() pointer, which, in the tyical case, is intptr_t,
216	// so that any casting is exposed early.
217	Type *PtrIdxTy = IC.getDataLayout().getIndexType(PtrTy: AI.getType());
218	if (AI.getArraySize()->getType() != PtrIdxTy) {
219	Value V = IC.Builder.CreateIntCast(V: AI.getArraySize(), DestTy: PtrIdxTy, isSigned: false*);
220	return IC.replaceOperand(I&: AI, OpNum: `0`, V);
221	}
222
223	return nullptr;
224	}
225
226	namespace {
227	// If I and V are pointers in different address space, it is not allowed to
228	// use replaceAllUsesWith since I and V have different types. A
229	// non-target-specific transformation should not use addrspacecast on V since
230	// the two address space may be disjoint depending on target.
231	//
232	// This class chases down uses of the old pointer until reaching the load
233	// instructions, then replaces the old pointer in the load instructions with
234	// the new pointer. If during the chasing it sees bitcast or GEP, it will
235	// create new bitcast or GEP with the new pointer and use them in the load
236	// instruction.
237	class PointerReplacer {
238	public:
239	PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
240	: IC(IC), Root(Root), FromAS(SrcAS) {}
241
242	bool collectUsers();
243	void replacePointer(Value *V);
244
245	private:
246	void replace(Instruction *I);
247	Value getReplacement(Value V) const { return WorkMap.lookup(Key: V); }
248	bool isAvailable(Instruction I) const* {
249	return I == &Root \|\| UsersToReplace.contains(key: I);
250	}
251
252	bool isEqualOrValidAddrSpaceCast(const Instruction *I,
253	unsigned FromAS) const {
254	const auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: I);
255	if (!ASC)
256	return false;
257	unsigned ToAS = ASC->getDestAddressSpace();
258	return (FromAS == ToAS) \|\| IC.isValidAddrSpaceCast(FromAS, ToAS);
259	}
260
261	SmallSetVector<Instruction *, `32`> UsersToReplace;
262	MapVector<Value , Value > WorkMap;
263	InstCombinerImpl &IC;
264	Instruction &Root;
265	unsigned FromAS;
266	};
267	} // end anonymous namespace
268
269	bool PointerReplacer::collectUsers() {
270	SmallVector<Instruction *> Worklist;
271	SmallSetVector<Instruction *, `32`> ValuesToRevisit;
272
273	auto PushUsersToWorklist = [&](Instruction *Inst) {
274	for (auto *U : Inst->users())
275	if (auto *I = dyn_cast<Instruction>(Val: U))
276	if (!isAvailable(I) && !ValuesToRevisit.contains(key: I))
277	Worklist.emplace_back(Args&: I);
278	};
279
280	PushUsersToWorklist (&Root);
281	while (!Worklist.empty()) {
282	Instruction *Inst = Worklist.pop_back_val();
283	if (auto *Load = dyn_cast<LoadInst>(Val: Inst)) {
284	if (Load->isVolatile())
285	return false;
286	UsersToReplace.insert(X: Load);
287	} else if (auto *PHI = dyn_cast<PHINode>(Val: Inst)) {
288	/// TODO: Handle poison and null pointers for PHI and select.
289	// If all incoming values are available, mark this PHI as
290	// replacable and push it's users into the worklist.
291	bool IsReplaceable = true;
292	if (all_of(Range: PHI->incoming_values(), P: [&](Value *V) {
293	if (!isa<Instruction>(Val: V))
294	return IsReplaceable = false;
295	return isAvailable(I: cast<Instruction>(Val: V));
296	})) {
297	UsersToReplace.insert(X: PHI);
298	PushUsersToWorklist (PHI);
299	continue;
300	}
301
302	// Either an incoming value is not an instruction or not all
303	// incoming values are available. If this PHI was already
304	// visited prior to this iteration, return false.
305	if (!IsReplaceable \|\| !ValuesToRevisit.insert(X: PHI))
306	return false;
307
308	// Push PHI back into the stack, followed by unavailable
309	// incoming values.
310	Worklist.emplace_back(Args&: PHI);
311	for (unsigned Idx = `0`; Idx < PHI->getNumIncomingValues(); ++Idx) {
312	auto *IncomingValue = cast<Instruction>(Val: PHI->getIncomingValue(i: Idx));
313	if (UsersToReplace.contains(key: IncomingValue))
314	continue;
315	if (!ValuesToRevisit.insert(X: IncomingValue))
316	return false;
317	Worklist.emplace_back(Args&: IncomingValue);
318	}
319	} else if (auto *SI = dyn_cast<SelectInst>(Val: Inst)) {
320	auto *TrueInst = dyn_cast<Instruction>(Val: SI->getTrueValue());
321	auto *FalseInst = dyn_cast<Instruction>(Val: SI->getFalseValue());
322	if (!TrueInst \|\| !FalseInst)
323	return false;
324
325	UsersToReplace.insert(X: SI);
326	PushUsersToWorklist (SI);
327	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Inst)) {
328	UsersToReplace.insert(X: GEP);
329	PushUsersToWorklist (GEP);
330	} else if (auto *MI = dyn_cast<MemTransferInst>(Val: Inst)) {
331	if (MI->isVolatile())
332	return false;
333	UsersToReplace.insert(X: Inst);
334	} else if (isEqualOrValidAddrSpaceCast(I: Inst, FromAS)) {
335	UsersToReplace.insert(X: Inst);
336	PushUsersToWorklist (Inst);
337	} else if (Inst->isLifetimeStartOrEnd()) {
338	continue;
339	} else {
340	// TODO: For arbitrary uses with address space mismatches, should we check
341	// if we can introduce a valid addrspacecast?
342	LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << `'\n'`);
343	return false;
344	}
345	}
346
347	return true;
348	}
349
350	void PointerReplacer::replacePointer(Value *V) {
351	assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
352	"Invalid usage");
353	WorkMap [&Root] = V;
354	SmallVector<Instruction *> Worklist;
355	SetVector<Instruction *> PostOrderWorklist;
356	SmallPtrSet<Instruction *, `32`> Visited;
357
358	// Perform a postorder traversal of the users of Root.
359	Worklist.push_back(Elt: &Root);
360	while (!Worklist.empty()) {
361	Instruction *I = Worklist.back();
362
363	// If I has not been processed before, push each of its
364	// replacable users into the worklist.
365	if (Visited.insert(Ptr: I).second) {
366	for (auto *U : I->users()) {
367	auto *UserInst = cast<Instruction>(Val: U);
368	if (UsersToReplace.contains(key: UserInst) && !Visited.contains(Ptr: UserInst))
369	Worklist.push_back(Elt: UserInst);
370	}
371	// Otherwise, users of I have already been pushed into
372	// the PostOrderWorklist. Push I as well.
373	} else {
374	PostOrderWorklist.insert(X: I);
375	Worklist.pop_back();
376	}
377	}
378
379	// Replace pointers in reverse-postorder.
380	for (Instruction *I : reverse(C&: PostOrderWorklist))
381	replace(I);
382	}
383
384	void PointerReplacer::replace(Instruction *I) {
385	if (getReplacement(V: I))
386	return;
387
388	if (auto *LT = dyn_cast<LoadInst>(Val: I)) {
389	auto *V = getReplacement(V: LT->getPointerOperand());
390	assert(V && "Operand not replaced");
391	auto NewI = new* LoadInst (LT->getType(), V, "", LT->isVolatile(),
392	LT->getAlign(), LT->getOrdering(),
393	LT->getSyncScopeID());
394	NewI->takeName(V: LT);
395	copyMetadataForLoad(Dest&: NewI, Source: LT);
396
397	IC.InsertNewInstWith(New: NewI, Old: LT->getIterator());
398	IC.replaceInstUsesWith(I&: *LT, V: NewI);
399	// LT has actually been replaced by NewI. It is useless to insert LT into
400	// the map. Instead, we insert NewI into the map to indicate this is the
401	// replacement (new value).
402	WorkMap [NewI] = NewI;
403	} else if (auto *PHI = dyn_cast<PHINode>(Val: I)) {
404	// Create a new PHI by replacing any incoming value that is a user of the
405	// root pointer and has a replacement.
406	Value *V = WorkMap.lookup(Key: PHI->getIncomingValue(i: `0`));
407	PHI->mutateType(Ty: V ? V->getType() : PHI->getIncomingValue(i: `0`)->getType());
408	for (unsigned int I = `0`; I < PHI->getNumIncomingValues(); ++I) {
409	Value *V = WorkMap.lookup(Key: PHI->getIncomingValue(i: I));
410	PHI->setIncomingValue(i: I, V: V ? V : PHI->getIncomingValue(i: I));
411	}
412	WorkMap [PHI] = PHI;
413	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
414	auto *V = getReplacement(V: GEP->getPointerOperand());
415	assert(V && "Operand not replaced");
416	SmallVector<Value *, `8`> Indices(GEP->indices());
417	auto *NewI =
418	GetElementPtrInst::Create(PointeeType: GEP->getSourceElementType(), Ptr: V, IdxList: Indices);
419	IC.InsertNewInstWith(New: NewI, Old: GEP->getIterator());
420	NewI->takeName(V: GEP);
421	NewI->setNoWrapFlags(GEP->getNoWrapFlags());
422	WorkMap [GEP] = NewI;
423	} else if (auto *SI = dyn_cast<SelectInst>(Val: I)) {
424	Value *TrueValue = SI->getTrueValue();
425	Value *FalseValue = SI->getFalseValue();
426	if (Value *Replacement = getReplacement(V: TrueValue))
427	TrueValue = Replacement;
428	if (Value *Replacement = getReplacement(V: FalseValue))
429	FalseValue = Replacement;
430	auto *NewSI = SelectInst::Create(C: SI->getCondition(), S1: TrueValue, S2: FalseValue,
431	NameStr: SI->getName(), InsertBefore: nullptr, MDFrom: SI);
432	IC.InsertNewInstWith(New: NewSI, Old: SI->getIterator());
433	NewSI->takeName(V: SI);
434	WorkMap [SI] = NewSI;
435	} else if (auto *MemCpy = dyn_cast<MemTransferInst>(Val: I)) {
436	auto *DestV = MemCpy->getRawDest();
437	auto *SrcV = MemCpy->getRawSource();
438
439	if (auto *DestReplace = getReplacement(V: DestV))
440	DestV = DestReplace;
441	if (auto *SrcReplace = getReplacement(V: SrcV))
442	SrcV = SrcReplace;
443
444	IC.Builder.SetInsertPoint(MemCpy);
445	auto *NewI = IC.Builder.CreateMemTransferInst(
446	IntrID: MemCpy->getIntrinsicID(), Dst: DestV, DstAlign: MemCpy->getDestAlign(), Src: SrcV,
447	SrcAlign: MemCpy->getSourceAlign(), Size: MemCpy->getLength(), isVolatile: MemCpy->isVolatile());
448	AAMDNodes AAMD = MemCpy->getAAMetadata();
449	if (AAMD)
450	NewI->setAAMetadata(AAMD);
451
452	IC.eraseInstFromFunction(I&: *MemCpy);
453	WorkMap [MemCpy] = NewI;
454	} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: I)) {
455	auto *V = getReplacement(V: ASC->getPointerOperand());
456	assert(V && "Operand not replaced");
457	assert(isEqualOrValidAddrSpaceCast(
458	ASC, V->getType()->getPointerAddressSpace()) &&
459	"Invalid address space cast!");
460
461	if (V->getType()->getPointerAddressSpace() !=
462	ASC->getType()->getPointerAddressSpace()) {
463	auto NewI = new* AddrSpaceCastInst (V, ASC->getType(), "");
464	NewI->takeName(V: ASC);
465	IC.InsertNewInstWith(New: NewI, Old: ASC->getIterator());
466	WorkMap [ASC] = NewI;
467	} else {
468	WorkMap [ASC] = V;
469	}
470
471	} else {
472	llvm_unreachable("should never reach here");
473	}
474	}
475
476	Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
477	if (auto I = simplifyAllocaArraySize(IC&: this, AI, DT))
478	return I;
479
480	if (AI.getAllocatedType()->isSized()) {
481	// Move all alloca's of zero byte objects to the entry block and merge them
482	// together. Note that we only do this for alloca's, because malloc should
483	// allocate and return a unique pointer, even for a zero byte allocation.
484	if (DL.getTypeAllocSize(Ty: AI.getAllocatedType()).getKnownMinValue() == `0`) {
485	// For a zero sized alloca there is no point in doing an array allocation.
486	// This is helpful if the array size is a complicated expression not used
487	// elsewhere.
488	if (AI.isArrayAllocation())
489	return replaceOperand(I&: AI, OpNum: `0`,
490	V: ConstantInt::get(Ty: AI.getArraySize()->getType(), V: `1`));
491
492	// Get the first instruction in the entry block.
493	BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
494	BasicBlock::iterator FirstInst = EntryBlock.getFirstNonPHIOrDbg();
495	if (&*FirstInst != &AI) {
496	// If the entry block doesn't start with a zero-size alloca then move
497	// this one to the start of the entry block. There is no problem with
498	// dominance as the array size was forced to a constant earlier already.
499	AllocaInst *EntryAI = dyn_cast<AllocaInst>(Val&: FirstInst);
500	if (!EntryAI \|\| !EntryAI->getAllocatedType()->isSized() \|\|
501	DL.getTypeAllocSize(Ty: EntryAI->getAllocatedType())
502	.getKnownMinValue() != `0`) {
503	AI.moveBefore(InsertPos: FirstInst);
504	return &AI;
505	}
506
507	// Replace this zero-sized alloca with the one at the start of the entry
508	// block after ensuring that the address will be aligned enough for both
509	// types.
510	const Align MaxAlign = std::max(a: EntryAI->getAlign(), b: AI.getAlign());
511	EntryAI->setAlignment(MaxAlign);
512	return replaceInstUsesWith(I&: AI, V: EntryAI);
513	}
514	}
515	}
516
517	// Check to see if this allocation is only modified by a memcpy/memmove from
518	// a memory location whose alignment is equal to or exceeds that of the
519	// allocation. If this is the case, we can change all users to use the
520	// constant memory location instead. This is commonly produced by the CFE by
521	// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
522	// is only subsequently read.
523	SmallVector<Instruction *, `4`> ToDelete;
524	if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, AI: &AI, ToDelete)) {
525	Value *TheSrc = Copy->getSource();
526	Align AllocaAlign = AI.getAlign();
527	Align SourceAlign = getOrEnforceKnownAlignment(
528	V: TheSrc, PrefAlign: AllocaAlign, DL, CxtI: &AI, AC: &AC, DT: &DT);
529	if (AllocaAlign <= SourceAlign &&
530	isDereferenceableForAllocaSize(V: TheSrc, AI: &AI, DL) &&
531	!isa<Instruction>(Val: TheSrc)) {
532	// FIXME: Can we sink instructions without violating dominance when TheSrc
533	// is an instruction instead of a constant or argument?
534	LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << `'\n'`);
535	LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << `'\n'`);
536	unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
537	if (AI.getAddressSpace() == SrcAddrSpace) {
538	for (Instruction *Delete : ToDelete)
539	eraseInstFromFunction(I&: *Delete);
540
541	Instruction *NewI = replaceInstUsesWith(I&: AI, V: TheSrc);
542	eraseInstFromFunction(I&: *Copy);
543	++NumGlobalCopies;
544	return NewI;
545	}
546
547	PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
548	if (PtrReplacer.collectUsers()) {
549	for (Instruction *Delete : ToDelete)
550	eraseInstFromFunction(I&: *Delete);
551
552	PtrReplacer.replacePointer(V: TheSrc);
553	++NumGlobalCopies;
554	}
555	}
556	}
557
558	// At last, use the generic allocation site handler to aggressively remove
559	// unused allocas.
560	return visitAllocSite(FI&: AI);
561	}
562
563	// Are we allowed to form a atomic load or store of this type?
564	static bool isSupportedAtomicType(Type *Ty) {
565	return Ty->isIntOrPtrTy() \|\| Ty->isFloatingPointTy();
566	}
567
568	/// Helper to combine a load to a new type.
569	///
570	/// This just does the work of combining a load to a new type. It handles
571	/// metadata, etc., and returns the new instruction. The \c NewTy should be the
572	/// loaded value* type. This will convert it to a pointer, cast the operand to*
573	/// that pointer type, load it, etc.
574	///
575	/// Note that this will create all of the instructions with whatever insert
576	/// point the \c InstCombinerImpl currently is using.
577	LoadInst InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type NewTy,
578	const Twine &Suffix) {
579	assert((!LI.isAtomic() \|\| isSupportedAtomicType(NewTy)) &&
580	"can't fold an atomic load to requested type");
581
582	LoadInst *NewLoad =
583	Builder.CreateAlignedLoad(Ty: NewTy, Ptr: LI.getPointerOperand(), Align: LI.getAlign(),
584	isVolatile: LI.isVolatile(), Name: LI.getName() + Suffix);
585	NewLoad->setAtomic(Ordering: LI.getOrdering(), SSID: LI.getSyncScopeID());
586	copyMetadataForLoad(Dest&: *NewLoad, Source: LI);
587	return NewLoad;
588	}
589
590	/// Combine a store to a new type.
591	///
592	/// Returns the newly created store instruction.
593	static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
594	Value *V) {
595	assert((!SI.isAtomic() \|\| isSupportedAtomicType(V->getType())) &&
596	"can't fold an atomic store of requested type");
597
598	Value *Ptr = SI.getPointerOperand();
599	SmallVector<std::pair<unsigned, MDNode *>, `8`> MD;
600	SI.getAllMetadata(MDs&: MD);
601
602	StoreInst *NewStore =
603	IC.Builder.CreateAlignedStore(Val: V, Ptr, Align: SI.getAlign(), isVolatile: SI.isVolatile());
604	NewStore->setAtomic(Ordering: SI.getOrdering(), SSID: SI.getSyncScopeID());
605	for (const auto &MDPair : MD) {
606	unsigned ID = MDPair.first;
607	MDNode *N = MDPair.second;
608	// Note, essentially every kind of metadata should be preserved here! This
609	// routine is supposed to clone a store instruction changing only its*
610	// type. The only metadata it makes sense to drop is metadata which is*
611	// invalidated when the pointer type changes. This should essentially
612	// never be the case in LLVM, but we explicitly switch over only known
613	// metadata to be conservatively correct. If you are adding metadata to
614	// LLVM which pertains to stores, you almost certainly want to add it
615	// here.
616	switch (ID) {
617	case LLVMContext::MD_dbg:
618	case LLVMContext::MD_DIAssignID:
619	case LLVMContext::MD_tbaa:
620	case LLVMContext::MD_prof:
621	case LLVMContext::MD_fpmath:
622	case LLVMContext::MD_tbaa_struct:
623	case LLVMContext::MD_alias_scope:
624	case LLVMContext::MD_noalias:
625	case LLVMContext::MD_nontemporal:
626	case LLVMContext::MD_mem_parallel_loop_access:
627	case LLVMContext::MD_access_group:
628	// All of these directly apply.
629	NewStore->setMetadata(KindID: ID, Node: N);
630	break;
631	case LLVMContext::MD_invariant_load:
632	case LLVMContext::MD_nonnull:
633	case LLVMContext::MD_noundef:
634	case LLVMContext::MD_range:
635	case LLVMContext::MD_align:
636	case LLVMContext::MD_dereferenceable:
637	case LLVMContext::MD_dereferenceable_or_null:
638	// These don't apply for stores.
639	break;
640	}
641	}
642
643	return NewStore;
644	}
645
646	/// Combine loads to match the type of their uses' value after looking
647	/// through intervening bitcasts.
648	///
649	/// The core idea here is that if the result of a load is used in an operation,
650	/// we should load the type most conducive to that operation. For example, when
651	/// loading an integer and converting that immediately to a pointer, we should
652	/// instead directly load a pointer.
653	///
654	/// However, this routine must never change the width of a load or the number of
655	/// loads as that would introduce a semantic change. This combine is expected to
656	/// be a semantic no-op which just allows loads to more closely model the types
657	/// of their consuming operations.
658	///
659	/// Currently, we also refuse to change the precise type used for an atomic load
660	/// or a volatile load. This is debatable, and might be reasonable to change
661	/// later. However, it is risky in case some backend or other part of LLVM is
662	/// relying on the exact type loaded to select appropriate atomic operations.
663	static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
664	LoadInst &Load) {
665	// FIXME: We could probably with some care handle both volatile and ordered
666	// atomic loads here but it isn't clear that this is important.
667	if (!Load.isUnordered())
668	return nullptr;
669
670	if (Load.use_empty())
671	return nullptr;
672
673	// swifterror values can't be bitcasted.
674	if (Load.getPointerOperand()->isSwiftError())
675	return nullptr;
676
677	// Fold away bit casts of the loaded value by loading the desired type.
678	// Note that we should not do this for pointer<->integer casts,
679	// because that would result in type punning.
680	if (Load.hasOneUse()) {
681	// Don't transform when the type is x86_amx, it makes the pass that lower
682	// x86_amx type happy.
683	Type *LoadTy = Load.getType();
684	if (auto *BC = dyn_cast<BitCastInst>(Val: Load.user_back())) {
685	assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
686	if (BC->getType()->isX86_AMXTy())
687	return nullptr;
688	}
689
690	if (auto *CastUser = dyn_cast<CastInst>(Val: Load.user_back())) {
691	Type *DestTy = CastUser->getDestTy();
692	if (CastUser->isNoopCast(DL: IC.getDataLayout()) &&
693	LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
694	(!Load.isAtomic() \|\| isSupportedAtomicType(Ty: DestTy))) {
695	LoadInst *NewLoad = IC.combineLoadToNewType(LI&: Load, NewTy: DestTy);
696	CastUser->replaceAllUsesWith(V: NewLoad);
697	IC.eraseInstFromFunction(I&: *CastUser);
698	return &Load;
699	}
700	}
701	}
702
703	// FIXME: We should also canonicalize loads of vectors when their elements are
704	// cast to other types.
705	return nullptr;
706	}
707
708	static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
709	// FIXME: We could probably with some care handle both volatile and atomic
710	// stores here but it isn't clear that this is important.
711	if (!LI.isSimple())
712	return nullptr;
713
714	Type *T = LI.getType();
715	if (!T->isAggregateType())
716	return nullptr;
717
718	StringRef Name = LI.getName();
719
720	if (auto *ST = dyn_cast<StructType>(Val: T)) {
721	// If the struct only have one element, we unpack.
722	auto NumElements = ST->getNumElements();
723	if (NumElements == `1`) {
724	LoadInst *NewLoad = IC.combineLoadToNewType(LI, NewTy: ST->getTypeAtIndex(N: `0U`),
725	Suffix: ".unpack");
726	NewLoad->setAAMetadata(LI.getAAMetadata());
727	return IC.replaceInstUsesWith(I&: LI, V: IC.Builder.CreateInsertValue(
728	Agg: PoisonValue::get(T), Val: NewLoad, Idxs: `0`, Name));
729	}
730
731	// We don't want to break loads with padding here as we'd loose
732	// the knowledge that padding exists for the rest of the pipeline.
733	const DataLayout &DL = IC.getDataLayout();
734	auto *SL = DL.getStructLayout(Ty: ST);
735
736	if (SL->hasPadding())
737	return nullptr;
738
739	const auto Align = LI.getAlign();
740	auto *Addr = LI.getPointerOperand();
741	auto *IdxType = DL.getIndexType(PtrTy: Addr->getType());
742
743	Value *V = PoisonValue::get(T);
744	for (unsigned i = `0`; i < NumElements; i++) {
745	auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
746	Ptr: Addr, Offset: IC.Builder.CreateTypeSize(Ty: IdxType, Size: SL->getElementOffset(Idx: i)),
747	Name: Name + ".elt");
748	auto *L = IC.Builder.CreateAlignedLoad(
749	Ty: ST->getElementType(N: i), Ptr,
750	Align: commonAlignment(A: Align, Offset: SL->getElementOffset(Idx: i).getKnownMinValue()),
751	Name: Name + ".unpack");
752	// Propagate AA metadata. It'll still be valid on the narrowed load.
753	L->setAAMetadata(LI.getAAMetadata());
754	V = IC.Builder.CreateInsertValue(Agg: V, Val: L, Idxs: i);
755	}
756
757	V->setName(Name);
758	return IC.replaceInstUsesWith(I&: LI, V);
759	}
760
761	if (auto *AT = dyn_cast<ArrayType>(Val: T)) {
762	auto *ET = AT->getElementType();
763	auto NumElements = AT->getNumElements();
764	if (NumElements == `1`) {
765	LoadInst *NewLoad = IC.combineLoadToNewType(LI, NewTy: ET, Suffix: ".unpack");
766	NewLoad->setAAMetadata(LI.getAAMetadata());
767	return IC.replaceInstUsesWith(I&: LI, V: IC.Builder.CreateInsertValue(
768	Agg: PoisonValue::get(T), Val: NewLoad, Idxs: `0`, Name));
769	}
770
771	// Bail out if the array is too large. Ideally we would like to optimize
772	// arrays of arbitrary size but this has a terrible impact on compile time.
773	// The threshold here is chosen arbitrarily, maybe needs a little bit of
774	// tuning.
775	if (NumElements > IC.MaxArraySizeForCombine)
776	return nullptr;
777
778	const DataLayout &DL = IC.getDataLayout();
779	TypeSize EltSize = DL.getTypeAllocSize(Ty: ET);
780	const auto Align = LI.getAlign();
781
782	auto *Addr = LI.getPointerOperand();
783	auto *IdxType = Type::getInt64Ty(C&: T->getContext());
784	auto *Zero = ConstantInt::get(Ty: IdxType, V: `0`);
785
786	Value *V = PoisonValue::get(T);
787	TypeSize Offset = TypeSize::getZero();
788	for (uint64_t i = `0`; i < NumElements; i++) {
789	Value *Indices[`2`] = {
790	Zero,
791	ConstantInt::get(Ty: IdxType, V: i),
792	};
793	auto *Ptr = IC.Builder.CreateInBoundsGEP(Ty: AT, Ptr: Addr, IdxList: ArrayRef(Indices),
794	Name: Name + ".elt");
795	auto EltAlign = commonAlignment(A: Align, Offset: Offset.getKnownMinValue());
796	auto *L = IC.Builder.CreateAlignedLoad(Ty: AT->getElementType(), Ptr,
797	Align: EltAlign, Name: Name + ".unpack");
798	L->setAAMetadata(LI.getAAMetadata());
799	V = IC.Builder.CreateInsertValue(Agg: V, Val: L, Idxs: i);
800	Offset += EltSize;
801	}
802
803	V->setName(Name);
804	return IC.replaceInstUsesWith(I&: LI, V);
805	}
806
807	return nullptr;
808	}
809
810	// If we can determine that all possible objects pointed to by the provided
811	// pointer value are, not only dereferenceable, but also definitively less than
812	// or equal to the provided maximum size, then return true. Otherwise, return
813	// false (constant global values and allocas fall into this category).
814	//
815	// FIXME: This should probably live in ValueTracking (or similar).
816	static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
817	const DataLayout &DL) {
818	SmallPtrSet<Value *, `4`> Visited;
819	SmallVector<Value *, `4`> Worklist(`1`, V);
820
821	do {
822	Value *P = Worklist.pop_back_val();
823	P = P->stripPointerCasts();
824
825	if (!Visited.insert(Ptr: P).second)
826	continue;
827
828	if (SelectInst *SI = dyn_cast<SelectInst>(Val: P)) {
829	Worklist.push_back(Elt: SI->getTrueValue());
830	Worklist.push_back(Elt: SI->getFalseValue());
831	continue;
832	}
833
834	if (PHINode *PN = dyn_cast<PHINode>(Val: P)) {
835	append_range(C&: Worklist, R: PN->incoming_values());
836	continue;
837	}
838
839	if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: P)) {
840	if (GA->isInterposable())
841	return false;
842	Worklist.push_back(Elt: GA->getAliasee());
843	continue;
844	}
845
846	// If we know how big this object is, and it is less than MaxSize, continue
847	// searching. Otherwise, return false.
848	if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: P)) {
849	if (!AI->getAllocatedType()->isSized())
850	return false;
851
852	ConstantInt *CS = dyn_cast<ConstantInt>(Val: AI->getArraySize());
853	if (!CS)
854	return false;
855
856	TypeSize TS = DL.getTypeAllocSize(Ty: AI->getAllocatedType());
857	if (TS.isScalable())
858	return false;
859	// Make sure that, even if the multiplication below would wrap as an
860	// uint64_t, we still do the right thing.
861	if ((CS->getValue().zext(width: `128`) * APInt (`128`, TS.getFixedValue()))
862	.ugt(RHS: MaxSize))
863	return false;
864	continue;
865	}
866
867	if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: P)) {
868	if (!GV->hasDefinitiveInitializer() \|\| !GV->isConstant())
869	return false;
870
871	uint64_t InitSize = DL.getTypeAllocSize(Ty: GV->getValueType());
872	if (InitSize > MaxSize)
873	return false;
874	continue;
875	}
876
877	return false;
878	} while (!Worklist.empty());
879
880	return true;
881	}
882
883	// If we're indexing into an object of a known size, and the outer index is
884	// not a constant, but having any value but zero would lead to undefined
885	// behavior, replace it with zero.
886	//
887	// For example, if we have:
888	// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
889	// ...
890	// %arrayidx = getelementptr inbounds [1 x i32] @f.a, i64 0, i64 %x*
891	// ... = load i32 %arrayidx, align 4*
892	// Then we know that we can replace %x in the GEP with i64 0.
893	//
894	// FIXME: We could fold any GEP index to zero that would cause UB if it were
895	// not zero. Currently, we only handle the first such index. Also, we could
896	// also search through non-zero constant indices if we kept track of the
897	// offsets those indices implied.
898	static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
899	GetElementPtrInst GEPI, Instruction MemI,
900	unsigned &Idx) {
901	if (GEPI->getNumOperands() < `2`)
902	return false;
903
904	// Find the first non-zero index of a GEP. If all indices are zero, return
905	// one past the last index.
906	auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
907	unsigned I = `1`;
908	for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
909	Value *V = GEPI->getOperand(i_nocapture: I);
910	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: V))
911	if (CI->isZero())
912	continue;
913
914	break;
915	}
916
917	return I;
918	};
919
920	// Skip through initial 'zero' indices, and find the corresponding pointer
921	// type. See if the next index is not a constant.
922	Idx = FirstNZIdx (GEPI);
923	if (Idx == GEPI->getNumOperands())
924	return false;
925	if (isa<Constant>(Val: GEPI->getOperand(i_nocapture: Idx)))
926	return false;
927
928	SmallVector<Value *, `4`> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
929	Type *SourceElementType = GEPI->getSourceElementType();
930	// Size information about scalable vectors is not available, so we cannot
931	// deduce whether indexing at n is undefined behaviour or not. Bail out.
932	if (SourceElementType->isScalableTy())
933	return false;
934
935	Type *AllocTy = GetElementPtrInst::getIndexedType(Ty: SourceElementType, IdxList: Ops);
936	if (!AllocTy \|\| !AllocTy->isSized())
937	return false;
938	const DataLayout &DL = IC.getDataLayout();
939	uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: AllocTy).getFixedValue();
940
941	// If there are more indices after the one we might replace with a zero, make
942	// sure they're all non-negative. If any of them are negative, the overall
943	// address being computed might be before the base address determined by the
944	// first non-zero index.
945	auto IsAllNonNegative = [&]() {
946	for (unsigned i = Idx+`1`, e = GEPI->getNumOperands(); i != e; ++i) {
947	KnownBits Known = IC.computeKnownBits(V: GEPI->getOperand(i_nocapture: i), CxtI: MemI);
948	if (Known.isNonNegative())
949	continue;
950	return false;
951	}
952
953	return true;
954	};
955
956	// FIXME: If the GEP is not inbounds, and there are extra indices after the
957	// one we'll replace, those could cause the address computation to wrap
958	// (rendering the IsAllNonNegative() check below insufficient). We can do
959	// better, ignoring zero indices (and other indices we can prove small
960	// enough not to wrap).
961	if (Idx+`1` != GEPI->getNumOperands() && !GEPI->isInBounds())
962	return false;
963
964	// Note that isObjectSizeLessThanOrEq will return true only if the pointer is
965	// also known to be dereferenceable.
966	return isObjectSizeLessThanOrEq(V: GEPI->getOperand(i_nocapture: `0`), MaxSize: TyAllocSize, DL) &&
967	IsAllNonNegative ();
968	}
969
970	// If we're indexing into an object with a variable index for the memory
971	// access, but the object has only one element, we can assume that the index
972	// will always be zero. If we replace the GEP, return it.
973	static Instruction replaceGEPIdxWithZero(InstCombinerImpl &IC, Value Ptr,
974	Instruction &MemI) {
975	if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Val: Ptr)) {
976	unsigned Idx;
977	if (canReplaceGEPIdxWithZero(IC, GEPI, MemI: &MemI, Idx)) {
978	Instruction *NewGEPI = GEPI->clone();
979	NewGEPI->setOperand(i: Idx,
980	Val: ConstantInt::get(Ty: GEPI->getOperand(i_nocapture: Idx)->getType(), V: `0`));
981	IC.InsertNewInstBefore(New: NewGEPI, Old: GEPI->getIterator());
982	return NewGEPI;
983	}
984	}
985
986	return nullptr;
987	}
988
989	static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
990	if (NullPointerIsDefined(F: SI.getFunction(), AS: SI.getPointerAddressSpace()))
991	return false;
992
993	auto *Ptr = SI.getPointerOperand();
994	if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Val: Ptr))
995	Ptr = GEPI->getOperand(i_nocapture: `0`);
996	return (isa<ConstantPointerNull>(Val: Ptr) &&
997	!NullPointerIsDefined(F: SI.getFunction(), AS: SI.getPointerAddressSpace()));
998	}
999
1000	static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
1001	if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Val: Op)) {
1002	const Value *GEPI0 = GEPI->getOperand(i_nocapture: `0`);
1003	if (isa<ConstantPointerNull>(Val: GEPI0) &&
1004	!NullPointerIsDefined(F: LI.getFunction(), AS: GEPI->getPointerAddressSpace()))
1005	return true;
1006	}
1007	if (isa<UndefValue>(Val: Op) \|\|
1008	(isa<ConstantPointerNull>(Val: Op) &&
1009	!NullPointerIsDefined(F: LI.getFunction(), AS: LI.getPointerAddressSpace())))
1010	return true;
1011	return false;
1012	}
1013
1014	Value InstCombinerImpl::simplifyNonNullOperand(Value V,
1015	bool HasDereferenceable,
1016	unsigned Depth) {
1017	if (auto *Sel = dyn_cast<SelectInst>(Val: V)) {
1018	if (isa<ConstantPointerNull>(Val: Sel->getOperand(i_nocapture: `1`)))
1019	return Sel->getOperand(i_nocapture: `2`);
1020
1021	if (isa<ConstantPointerNull>(Val: Sel->getOperand(i_nocapture: `2`)))
1022	return Sel->getOperand(i_nocapture: `1`);
1023	}
1024
1025	if (!V->hasOneUse())
1026	return nullptr;
1027
1028	constexpr unsigned RecursionLimit = `3`;
1029	if (Depth == RecursionLimit)
1030	return nullptr;
1031
1032	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
1033	if (HasDereferenceable \|\| GEP->isInBounds()) {
1034	if (auto *Res = simplifyNonNullOperand(V: GEP->getPointerOperand(),
1035	HasDereferenceable, Depth: Depth + `1`)) {
1036	replaceOperand(I&: *GEP, OpNum: `0`, V: Res);
1037	addToWorklist(I: GEP);
1038	return nullptr;
1039	}
1040	}
1041	}
1042
1043	if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
1044	bool Changed = false;
1045	for (Use &U : PHI->incoming_values()) {
1046	// We set Depth to RecursionLimit to avoid expensive recursion.
1047	if (auto *Res = simplifyNonNullOperand(V: U.get(), HasDereferenceable,
1048	Depth: RecursionLimit)) {
1049	replaceUse(U, NewValue: Res);
1050	Changed = true;
1051	}
1052	}
1053	if (Changed)
1054	addToWorklist(I: PHI);
1055	return nullptr;
1056	}
1057
1058	return nullptr;
1059	}
1060
1061	Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
1062	Value *Op = LI.getOperand(i_nocapture: `0`);
1063	if (Value *Res = simplifyLoadInst(LI: &LI, PtrOp: Op, Q: SQ.getWithInstruction(I: &LI)))
1064	return replaceInstUsesWith(I&: LI, V: Res);
1065
1066	// Try to canonicalize the loaded type.
1067	if (Instruction Res = combineLoadToOperationType(IC&: this, Load&: LI))
1068	return Res;
1069
1070	// Replace GEP indices if possible.
1071	if (Instruction NewGEPI = replaceGEPIdxWithZero(IC&: this, Ptr: Op, MemI&: LI))
1072	return replaceOperand(I&: LI, OpNum: `0`, V: NewGEPI);
1073
1074	if (Instruction Res = unpackLoadToAggregate(IC&: this, LI))
1075	return Res;
1076
1077	// Do really simple store-to-load forwarding and load CSE, to catch cases
1078	// where there are several consecutive memory accesses to the same location,
1079	// separated by a few arithmetic operations.
1080	bool IsLoadCSE = false;
1081	BatchAAResults BatchAA(*AA);
1082	if (Value *AvailableVal = FindAvailableLoadedValue(Load: &LI, AA&: BatchAA, IsLoadCSE: &IsLoadCSE)) {
1083	if (IsLoadCSE)
1084	combineMetadataForCSE(K: cast<LoadInst>(Val: AvailableVal), J: &LI, DoesKMove: false);
1085
1086	return replaceInstUsesWith(
1087	I&: LI, V: Builder.CreateBitOrPointerCast(V: AvailableVal, DestTy: LI.getType(),
1088	Name: LI.getName() + ".cast"));
1089	}
1090
1091	// None of the following transforms are legal for volatile/ordered atomic
1092	// loads. Most of them do apply for unordered atomics.
1093	if (!LI.isUnordered()) return nullptr;
1094
1095	// load(gep null, ...) -> unreachable
1096	// load null/undef -> unreachable
1097	// TODO: Consider a target hook for valid address spaces for this xforms.
1098	if (canSimplifyNullLoadOrGEP(LI, Op)) {
1099	CreateNonTerminatorUnreachable(InsertAt: &LI);
1100	return replaceInstUsesWith(I&: LI, V: PoisonValue::get(T: LI.getType()));
1101	}
1102
1103	if (Op->hasOneUse()) {
1104	// Change select and PHI nodes to select values instead of addresses: this
1105	// helps alias analysis out a lot, allows many others simplifications, and
1106	// exposes redundancy in the code.
1107	//
1108	// Note that we cannot do the transformation unless we know that the
1109	// introduced loads cannot trap! Something like this is valid as long as
1110	// the condition is always false: load (select bool %C, int null, int* %G),*
1111	// but it would not be valid if we transformed it to load from null
1112	// unconditionally.
1113	//
1114	if (SelectInst *SI = dyn_cast<SelectInst>(Val: Op)) {
1115	// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1116	Align Alignment = LI.getAlign();
1117	if (isSafeToLoadUnconditionally(V: SI->getOperand(i_nocapture: `1`), Ty: LI.getType(),
1118	Alignment, DL, ScanFrom: SI) &&
1119	isSafeToLoadUnconditionally(V: SI->getOperand(i_nocapture: `2`), Ty: LI.getType(),
1120	Alignment, DL, ScanFrom: SI)) {
1121	LoadInst *V1 =
1122	Builder.CreateLoad(Ty: LI.getType(), Ptr: SI->getOperand(i_nocapture: `1`),
1123	Name: SI->getOperand(i_nocapture: `1`)->getName() + ".val");
1124	LoadInst *V2 =
1125	Builder.CreateLoad(Ty: LI.getType(), Ptr: SI->getOperand(i_nocapture: `2`),
1126	Name: SI->getOperand(i_nocapture: `2`)->getName() + ".val");
1127	assert(LI.isUnordered() && "implied by above");
1128	V1->setAlignment(Alignment);
1129	V1->setAtomic(Ordering: LI.getOrdering(), SSID: LI.getSyncScopeID());
1130	V2->setAlignment(Alignment);
1131	V2->setAtomic(Ordering: LI.getOrdering(), SSID: LI.getSyncScopeID());
1132	// It is safe to copy any metadata that does not trigger UB. Copy any
1133	// poison-generating metadata.
1134	V1->copyMetadata(SrcInst: LI, WL: Metadata::PoisonGeneratingIDs);
1135	V2->copyMetadata(SrcInst: LI, WL: Metadata::PoisonGeneratingIDs);
1136	return SelectInst::Create(C: SI->getCondition(), S1: V1, S2: V2);
1137	}
1138	}
1139	}
1140
1141	if (!NullPointerIsDefined(F: LI.getFunction(), AS: LI.getPointerAddressSpace()))
1142	if (Value V = simplifyNonNullOperand(V: Op, /HasDereferenceable=/*true))
1143	return replaceOperand(I&: LI, OpNum: `0`, V);
1144
1145	return nullptr;
1146	}
1147
1148	/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1149	///
1150	/// \returns underlying value that was "cast", or nullptr otherwise.
1151	///
1152	/// For example, if we have:
1153	///
1154	/// %E0 = extractelement <2 x double> %U, i32 0
1155	/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1156	/// %E1 = extractelement <2 x double> %U, i32 1
1157	/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1158	///
1159	/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1160	/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1161	/// Note that %U may contain non-undef values where %V1 has undef.
1162	static Value likeBitCastFromVector(InstCombinerImpl &IC, Value V) {
1163	Value U = nullptr*;
1164	while (auto *IV = dyn_cast<InsertValueInst>(Val: V)) {
1165	auto *E = dyn_cast<ExtractElementInst>(Val: IV->getInsertedValueOperand());
1166	if (!E)
1167	return nullptr;
1168	auto *W = E->getVectorOperand();
1169	if (!U)
1170	U = W;
1171	else if (U != W)
1172	return nullptr;
1173	auto *CI = dyn_cast<ConstantInt>(Val: E->getIndexOperand());
1174	if (!CI \|\| IV->getNumIndices() != `1` \|\| CI->getZExtValue() != *IV->idx_begin())
1175	return nullptr;
1176	V = IV->getAggregateOperand();
1177	}
1178	if (!match(V, P: m_Undef()) \|\| !U)
1179	return nullptr;
1180
1181	auto *UT = cast<VectorType>(Val: U->getType());
1182	auto *VT = V->getType();
1183	// Check that types UT and VT are bitwise isomorphic.
1184	const auto &DL = IC.getDataLayout();
1185	if (DL.getTypeStoreSizeInBits(Ty: UT) != DL.getTypeStoreSizeInBits(Ty: VT)) {
1186	return nullptr;
1187	}
1188	if (auto *AT = dyn_cast<ArrayType>(Val: VT)) {
1189	if (AT->getNumElements() != cast<FixedVectorType>(Val: UT)->getNumElements())
1190	return nullptr;
1191	} else {
1192	auto *ST = cast<StructType>(Val: VT);
1193	if (ST->getNumElements() != cast<FixedVectorType>(Val: UT)->getNumElements())
1194	return nullptr;
1195	for (const auto *EltT : ST->elements()) {
1196	if (EltT != UT->getElementType())
1197	return nullptr;
1198	}
1199	}
1200	return U;
1201	}
1202
1203	/// Combine stores to match the type of value being stored.
1204	///
1205	/// The core idea here is that the memory does not have any intrinsic type and
1206	/// where we can we should match the type of a store to the type of value being
1207	/// stored.
1208	///
1209	/// However, this routine must never change the width of a store or the number of
1210	/// stores as that would introduce a semantic change. This combine is expected to
1211	/// be a semantic no-op which just allows stores to more closely model the types
1212	/// of their incoming values.
1213	///
1214	/// Currently, we also refuse to change the precise type used for an atomic or
1215	/// volatile store. This is debatable, and might be reasonable to change later.
1216	/// However, it is risky in case some backend or other part of LLVM is relying
1217	/// on the exact type stored to select appropriate atomic operations.
1218	///
1219	/// \returns true if the store was successfully combined away. This indicates
1220	/// the caller must erase the store instruction. We have to let the caller erase
1221	/// the store instruction as otherwise there is no way to signal whether it was
1222	/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1223	static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
1224	// FIXME: We could probably with some care handle both volatile and ordered
1225	// atomic stores here but it isn't clear that this is important.
1226	if (!SI.isUnordered())
1227	return false;
1228
1229	// swifterror values can't be bitcasted.
1230	if (SI.getPointerOperand()->isSwiftError())
1231	return false;
1232
1233	Value *V = SI.getValueOperand();
1234
1235	// Fold away bit casts of the stored value by storing the original type.
1236	if (auto *BC = dyn_cast<BitCastInst>(Val: V)) {
1237	assert(!BC->getType()->isX86_AMXTy() &&
1238	"store to x86_amx* should not happen!");
1239	V = BC->getOperand(i_nocapture: `0`);
1240	// Don't transform when the type is x86_amx, it makes the pass that lower
1241	// x86_amx type happy.
1242	if (V->getType()->isX86_AMXTy())
1243	return false;
1244	if (!SI.isAtomic() \|\| isSupportedAtomicType(Ty: V->getType())) {
1245	combineStoreToNewValue(IC, SI, V);
1246	return true;
1247	}
1248	}
1249
1250	if (Value *U = likeBitCastFromVector(IC, V))
1251	if (!SI.isAtomic() \|\| isSupportedAtomicType(Ty: U->getType())) {
1252	combineStoreToNewValue(IC, SI, V: U);
1253	return true;
1254	}
1255
1256	// FIXME: We should also canonicalize stores of vectors when their elements
1257	// are cast to other types.
1258	return false;
1259	}
1260
1261	static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
1262	// FIXME: We could probably with some care handle both volatile and atomic
1263	// stores here but it isn't clear that this is important.
1264	if (!SI.isSimple())
1265	return false;
1266
1267	Value *V = SI.getValueOperand();
1268	Type *T = V->getType();
1269
1270	if (!T->isAggregateType())
1271	return false;
1272
1273	if (auto *ST = dyn_cast<StructType>(Val: T)) {
1274	// If the struct only have one element, we unpack.
1275	unsigned Count = ST->getNumElements();
1276	if (Count == `1`) {
1277	V = IC.Builder.CreateExtractValue(Agg: V, Idxs: `0`);
1278	combineStoreToNewValue(IC, SI, V);
1279	return true;
1280	}
1281
1282	// We don't want to break loads with padding here as we'd loose
1283	// the knowledge that padding exists for the rest of the pipeline.
1284	const DataLayout &DL = IC.getDataLayout();
1285	auto *SL = DL.getStructLayout(Ty: ST);
1286
1287	if (SL->hasPadding())
1288	return false;
1289
1290	const auto Align = SI.getAlign();
1291
1292	SmallString<`16`> EltName = V->getName();
1293	EltName += ".elt";
1294	auto *Addr = SI.getPointerOperand();
1295	SmallString<`16`> AddrName = Addr->getName();
1296	AddrName += ".repack";
1297
1298	auto *IdxType = DL.getIndexType(PtrTy: Addr->getType());
1299	for (unsigned i = `0`; i < Count; i++) {
1300	auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
1301	Ptr: Addr, Offset: IC.Builder.CreateTypeSize(Ty: IdxType, Size: SL->getElementOffset(Idx: i)),
1302	Name: AddrName);
1303	auto *Val = IC.Builder.CreateExtractValue(Agg: V, Idxs: i, Name: EltName);
1304	auto EltAlign =
1305	commonAlignment(A: Align, Offset: SL->getElementOffset(Idx: i).getKnownMinValue());
1306	llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, Align: EltAlign);
1307	NS->setAAMetadata(SI.getAAMetadata());
1308	}
1309
1310	return true;
1311	}
1312
1313	if (auto *AT = dyn_cast<ArrayType>(Val: T)) {
1314	// If the array only have one element, we unpack.
1315	auto NumElements = AT->getNumElements();
1316	if (NumElements == `1`) {
1317	V = IC.Builder.CreateExtractValue(Agg: V, Idxs: `0`);
1318	combineStoreToNewValue(IC, SI, V);
1319	return true;
1320	}
1321
1322	// Bail out if the array is too large. Ideally we would like to optimize
1323	// arrays of arbitrary size but this has a terrible impact on compile time.
1324	// The threshold here is chosen arbitrarily, maybe needs a little bit of
1325	// tuning.
1326	if (NumElements > IC.MaxArraySizeForCombine)
1327	return false;
1328
1329	const DataLayout &DL = IC.getDataLayout();
1330	TypeSize EltSize = DL.getTypeAllocSize(Ty: AT->getElementType());
1331	const auto Align = SI.getAlign();
1332
1333	SmallString<`16`> EltName = V->getName();
1334	EltName += ".elt";
1335	auto *Addr = SI.getPointerOperand();
1336	SmallString<`16`> AddrName = Addr->getName();
1337	AddrName += ".repack";
1338
1339	auto *IdxType = Type::getInt64Ty(C&: T->getContext());
1340	auto *Zero = ConstantInt::get(Ty: IdxType, V: `0`);
1341
1342	TypeSize Offset = TypeSize::getZero();
1343	for (uint64_t i = `0`; i < NumElements; i++) {
1344	Value *Indices[`2`] = {
1345	Zero,
1346	ConstantInt::get(Ty: IdxType, V: i),
1347	};
1348	auto *Ptr =
1349	IC.Builder.CreateInBoundsGEP(Ty: AT, Ptr: Addr, IdxList: ArrayRef(Indices), Name: AddrName);
1350	auto *Val = IC.Builder.CreateExtractValue(Agg: V, Idxs: i, Name: EltName);
1351	auto EltAlign = commonAlignment(A: Align, Offset: Offset.getKnownMinValue());
1352	Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, Align: EltAlign);
1353	NS->setAAMetadata(SI.getAAMetadata());
1354	Offset += EltSize;
1355	}
1356
1357	return true;
1358	}
1359
1360	return false;
1361	}
1362
1363	/// equivalentAddressValues - Test if A and B will obviously have the same
1364	/// value. This includes recognizing that %t0 and %t1 will have the same
1365	/// value in code like this:
1366	/// %t0 = getelementptr \@a, 0, 3
1367	/// store i32 0, i32 %t0*
1368	/// %t1 = getelementptr \@a, 0, 3
1369	/// %t2 = load i32 %t1*
1370	///
1371	static bool equivalentAddressValues(Value A, Value B) {
1372	// Test if the values are trivially equivalent.
1373	if (A == B) return true;
1374
1375	// Test if the values come form identical arithmetic instructions.
1376	// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1377	// its only used to compare two uses within the same basic block, which
1378	// means that they'll always either have the same value or one of them
1379	// will have an undefined value.
1380	if (isa<BinaryOperator>(Val: A) \|\|
1381	isa<CastInst>(Val: A) \|\|
1382	isa<PHINode>(Val: A) \|\|
1383	isa<GetElementPtrInst>(Val: A))
1384	if (Instruction *BI = dyn_cast<Instruction>(Val: B))
1385	if (cast<Instruction>(Val: A)->isIdenticalToWhenDefined(I: BI))
1386	return true;
1387
1388	// Otherwise they may not be equivalent.
1389	return false;
1390	}
1391
1392	Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
1393	Value *Val = SI.getOperand(i_nocapture: `0`);
1394	Value *Ptr = SI.getOperand(i_nocapture: `1`);
1395
1396	// Try to canonicalize the stored type.
1397	if (combineStoreToValueType(IC&: *this, SI))
1398	return eraseInstFromFunction(I&: SI);
1399
1400	// Try to canonicalize the stored type.
1401	if (unpackStoreToAggregate(IC&: *this, SI))
1402	return eraseInstFromFunction(I&: SI);
1403
1404	// Replace GEP indices if possible.
1405	if (Instruction NewGEPI = replaceGEPIdxWithZero(IC&: this, Ptr, MemI&: SI))
1406	return replaceOperand(I&: SI, OpNum: `1`, V: NewGEPI);
1407
1408	// Don't hack volatile/ordered stores.
1409	// FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1410	if (!SI.isUnordered()) return nullptr;
1411
1412	// If the RHS is an alloca with a single use, zapify the store, making the
1413	// alloca dead.
1414	if (Ptr->hasOneUse()) {
1415	if (isa<AllocaInst>(Val: Ptr))
1416	return eraseInstFromFunction(I&: SI);
1417	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr)) {
1418	if (isa<AllocaInst>(Val: GEP->getOperand(i_nocapture: `0`))) {
1419	if (GEP->getOperand(i_nocapture: `0`)->hasOneUse())
1420	return eraseInstFromFunction(I&: SI);
1421	}
1422	}
1423	}
1424
1425	// If we have a store to a location which is known constant, we can conclude
1426	// that the store must be storing the constant value (else the memory
1427	// wouldn't be constant), and this must be a noop.
1428	if (!isModSet(MRI: AA->getModRefInfoMask(P: Ptr)))
1429	return eraseInstFromFunction(I&: SI);
1430
1431	// Do really simple DSE, to catch cases where there are several consecutive
1432	// stores to the same location, separated by a few arithmetic operations. This
1433	// situation often occurs with bitfield accesses.
1434	BasicBlock::iterator BBI(SI);
1435	for (unsigned ScanInsts = `6`; BBI != SI.getParent()->begin() && ScanInsts;
1436	--ScanInsts) {
1437	--BBI;
1438	// Don't count debug info directives, lest they affect codegen,
1439	// and we skip pointer-to-pointer bitcasts, which are NOPs.
1440	if (BBI ->isDebugOrPseudoInst()) {
1441	ScanInsts++;
1442	continue;
1443	}
1444
1445	if (StoreInst *PrevSI = dyn_cast<StoreInst>(Val&: BBI)) {
1446	// Prev store isn't volatile, and stores to the same location?
1447	if (PrevSI->isUnordered() &&
1448	equivalentAddressValues(A: PrevSI->getOperand(i_nocapture: `1`), B: SI.getOperand(i_nocapture: `1`)) &&
1449	PrevSI->getValueOperand()->getType() ==
1450	SI.getValueOperand()->getType()) {
1451	++NumDeadStore;
1452	// Manually add back the original store to the worklist now, so it will
1453	// be processed after the operands of the removed store, as this may
1454	// expose additional DSE opportunities.
1455	Worklist.push(I: &SI);
1456	eraseInstFromFunction(I&: *PrevSI);
1457	return nullptr;
1458	}
1459	break;
1460	}
1461
1462	// If this is a load, we have to stop. However, if the loaded value is from
1463	// the pointer we're loading and is producing the pointer we're storing,
1464	// then this* store is dead (X = load P; store X -> P).*
1465	if (LoadInst *LI = dyn_cast<LoadInst>(Val&: BBI)) {
1466	if (LI == Val && equivalentAddressValues(A: LI->getOperand(i_nocapture: `0`), B: Ptr)) {
1467	assert(SI.isUnordered() && "can't eliminate ordering operation");
1468	return eraseInstFromFunction(I&: SI);
1469	}
1470
1471	// Otherwise, this is a load from some other location. Stores before it
1472	// may not be dead.
1473	break;
1474	}
1475
1476	// Don't skip over loads, throws or things that can modify memory.
1477	if (BBI ->mayWriteToMemory() \|\| BBI ->mayReadFromMemory() \|\| BBI ->mayThrow())
1478	break;
1479	}
1480
1481	// store X, null -> turns into 'unreachable' in SimplifyCFG
1482	// store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1483	if (canSimplifyNullStoreOrGEP(SI)) {
1484	if (!isa<PoisonValue>(Val))
1485	return replaceOperand(I&: SI, OpNum: `0`, V: PoisonValue::get(T: Val->getType()));
1486	return nullptr; // Do not modify these!
1487	}
1488
1489	// This is a non-terminator unreachable marker. Don't remove it.
1490	if (isa<UndefValue>(Val: Ptr)) {
1491	// Remove guaranteed-to-transfer instructions before the marker.
1492	if (removeInstructionsBeforeUnreachable(I&: SI))
1493	return &SI;
1494
1495	// Remove all instructions after the marker and handle dead blocks this
1496	// implies.
1497	SmallVector<BasicBlock *> Worklist;
1498	handleUnreachableFrom(I: SI.getNextNode(), Worklist);
1499	handlePotentiallyDeadBlocks(Worklist);
1500	return nullptr;
1501	}
1502
1503	// store undef, Ptr -> noop
1504	// FIXME: This is technically incorrect because it might overwrite a poison
1505	// value. Change to PoisonValue once #52930 is resolved.
1506	if (isa<UndefValue>(Val))
1507	return eraseInstFromFunction(I&: SI);
1508
1509	if (!NullPointerIsDefined(F: SI.getFunction(), AS: SI.getPointerAddressSpace()))
1510	if (Value V = simplifyNonNullOperand(V: Ptr, /HasDereferenceable=/*true))
1511	return replaceOperand(I&: SI, OpNum: `1`, V);
1512
1513	return nullptr;
1514	}
1515
1516	/// Try to transform:
1517	/// if () { P = v1; } else { P = v2 }
1518	/// or:
1519	/// P = v1; if () { P = v2; }
1520	/// into a phi node with a store in the successor.
1521	bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
1522	if (!SI.isUnordered())
1523	return false; // This code has not been audited for volatile/ordered case.
1524
1525	// Check if the successor block has exactly 2 incoming edges.
1526	BasicBlock *StoreBB = SI.getParent();
1527	BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(Idx: `0`);
1528	if (!DestBB->hasNPredecessors(N: `2`))
1529	return false;
1530
1531	// Capture the other block (the block that doesn't contain our store).
1532	pred_iterator PredIter = pred_begin(BB: DestBB);
1533	if (*PredIter == StoreBB)
1534	++PredIter;
1535	BasicBlock OtherBB = PredIter;
1536
1537	// Bail out if all of the relevant blocks aren't distinct. This can happen,
1538	// for example, if SI is in an infinite loop.
1539	if (StoreBB == DestBB \|\| OtherBB == DestBB)
1540	return false;
1541
1542	// Verify that the other block ends in a branch and is not otherwise empty.
1543	BasicBlock::iterator BBI(OtherBB->getTerminator());
1544	BranchInst *OtherBr = dyn_cast<BranchInst>(Val&: BBI);
1545	if (!OtherBr \|\| BBI == OtherBB->begin())
1546	return false;
1547
1548	auto OtherStoreIsMergeable = [&](StoreInst OtherStore) -> bool* {
1549	if (!OtherStore \|\|
1550	OtherStore->getPointerOperand() != SI.getPointerOperand())
1551	return false;
1552
1553	auto *SIVTy = SI.getValueOperand()->getType();
1554	auto *OSVTy = OtherStore->getValueOperand()->getType();
1555	return CastInst::isBitOrNoopPointerCastable(SrcTy: OSVTy, DestTy: SIVTy, DL) &&
1556	SI.hasSameSpecialState(I2: OtherStore);
1557	};
1558
1559	// If the other block ends in an unconditional branch, check for the 'if then
1560	// else' case. There is an instruction before the branch.
1561	StoreInst OtherStore = nullptr*;
1562	if (OtherBr->isUnconditional()) {
1563	--BBI;
1564	// Skip over debugging info and pseudo probes.
1565	while (BBI ->isDebugOrPseudoInst()) {
1566	if (BBI ==OtherBB->begin())
1567	return false;
1568	--BBI;
1569	}
1570	// If this isn't a store, isn't a store to the same location, or is not the
1571	// right kind of store, bail out.
1572	OtherStore = dyn_cast<StoreInst>(Val&: BBI);
1573	if (!OtherStoreIsMergeable (OtherStore))
1574	return false;
1575	} else {
1576	// Otherwise, the other block ended with a conditional branch. If one of the
1577	// destinations is StoreBB, then we have the if/then case.
1578	if (OtherBr->getSuccessor(i: `0`) != StoreBB &&
1579	OtherBr->getSuccessor(i: `1`) != StoreBB)
1580	return false;
1581
1582	// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1583	// if/then triangle. See if there is a store to the same ptr as SI that
1584	// lives in OtherBB.
1585	for (;; --BBI) {
1586	// Check to see if we find the matching store.
1587	OtherStore = dyn_cast<StoreInst>(Val&: BBI);
1588	if (OtherStoreIsMergeable (OtherStore))
1589	break;
1590
1591	// If we find something that may be using or overwriting the stored
1592	// value, or if we run out of instructions, we can't do the transform.
1593	if (BBI ->mayReadFromMemory() \|\| BBI ->mayThrow() \|\|
1594	BBI ->mayWriteToMemory() \|\| BBI == OtherBB->begin())
1595	return false;
1596	}
1597
1598	// In order to eliminate the store in OtherBr, we have to make sure nothing
1599	// reads or overwrites the stored value in StoreBB.
1600	for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1601	// FIXME: This should really be AA driven.
1602	if (I ->mayReadFromMemory() \|\| I ->mayThrow() \|\| I ->mayWriteToMemory())
1603	return false;
1604	}
1605	}
1606
1607	// Insert a PHI node now if we need it.
1608	Value *MergedVal = OtherStore->getValueOperand();
1609	// The debug locations of the original instructions might differ. Merge them.
1610	DebugLoc MergedLoc =
1611	DebugLoc::getMergedLocation(LocA: SI.getDebugLoc(), LocB: OtherStore->getDebugLoc());
1612	if (MergedVal != SI.getValueOperand()) {
1613	PHINode *PN =
1614	PHINode::Create(Ty: SI.getValueOperand()->getType(), NumReservedValues: `2`, NameStr: "storemerge");
1615	PN->addIncoming(V: SI.getValueOperand(), BB: SI.getParent());
1616	Builder.SetInsertPoint(OtherStore);
1617	PN->addIncoming(V: Builder.CreateBitOrPointerCast(V: MergedVal, DestTy: PN->getType()),
1618	BB: OtherBB);
1619	MergedVal = InsertNewInstBefore(New: PN, Old: DestBB->begin());
1620	PN->setDebugLoc(MergedLoc);
1621	}
1622
1623	// Advance to a place where it is safe to insert the new store and insert it.
1624	BBI = DestBB->getFirstInsertionPt();
1625	StoreInst *NewSI =
1626	new StoreInst (MergedVal, SI.getOperand(i_nocapture: `1`), SI.isVolatile(), SI.getAlign(),
1627	SI.getOrdering(), SI.getSyncScopeID());
1628	InsertNewInstBefore(New: NewSI, Old: BBI);
1629	NewSI->setDebugLoc(MergedLoc);
1630	NewSI->mergeDIAssignID(SourceInstructions: {&SI, OtherStore});
1631
1632	// If the two stores had AA tags, merge them.
1633	AAMDNodes AATags = SI.getAAMetadata();
1634	if (AATags)
1635	NewSI->setAAMetadata(AATags.merge(Other: OtherStore->getAAMetadata()));
1636
1637	// Nuke the old stores.
1638	eraseInstFromFunction(I&: SI);
1639	eraseInstFromFunction(I&: *OtherStore);
1640	return true;
1641	}
1642

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp