LoopLoadElimination.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp]

1	//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implement a loop-aware load elimination pass.
10	//
11	// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12	// distance of one between stores and loads. These form the candidates for the
13	// transformation. The source value of each store then propagated to the user
14	// of the corresponding load. This makes the load dead.
15	//
16	// The pass can also version the loop and add memchecks in order to prove that
17	// may-aliasing stores can't change the value in memory before it's read by the
18	// load.
19	//
20	//===----------------------------------------------------------------------===//
21
22	#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
23	#include "llvm/ADT/APInt.h"
24	#include "llvm/ADT/DenseMap.h"
25	#include "llvm/ADT/DepthFirstIterator.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallVector.h"
29	#include "llvm/ADT/Statistic.h"
30	#include "llvm/Analysis/AssumptionCache.h"
31	#include "llvm/Analysis/BlockFrequencyInfo.h"
32	#include "llvm/Analysis/GlobalsModRef.h"
33	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
34	#include "llvm/Analysis/LoopAccessAnalysis.h"
35	#include "llvm/Analysis/LoopAnalysisManager.h"
36	#include "llvm/Analysis/LoopInfo.h"
37	#include "llvm/Analysis/ProfileSummaryInfo.h"
38	#include "llvm/Analysis/ScalarEvolution.h"
39	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
40	#include "llvm/Analysis/TargetLibraryInfo.h"
41	#include "llvm/Analysis/TargetTransformInfo.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/Dominators.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/PassManager.h"
46	#include "llvm/IR/Type.h"
47	#include "llvm/IR/Value.h"
48	#include "llvm/Support/Casting.h"
49	#include "llvm/Support/CommandLine.h"
50	#include "llvm/Support/Debug.h"
51	#include "llvm/Support/raw_ostream.h"
52	#include "llvm/Transforms/Utils/LoopSimplify.h"
53	#include "llvm/Transforms/Utils/LoopVersioning.h"
54	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
55	#include "llvm/Transforms/Utils/SizeOpts.h"
56	#include <algorithm>
57	#include <cassert>
58	#include <forward_list>
59	#include <tuple>
60	#include <utility>
61
62	using namespace llvm;
63
64	#define LLE_OPTION "loop-load-elim"
65	#define DEBUG_TYPE LLE_OPTION
66
67	static cl::opt<unsigned> CheckPerElim(
68	"runtime-check-per-loop-load-elim", cl::Hidden,
69	cl::desc ("Max number of memchecks allowed per eliminated load on average"),
70	cl::init(Val: `1`));
71
72	static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
73	"loop-load-elimination-scev-check-threshold", cl::init(Val: `8`), cl::Hidden,
74	cl::desc ("The maximum number of SCEV checks allowed for Loop "
75	"Load Elimination"));
76
77	STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
78
79	namespace {
80
81	/// Represent a store-to-forwarding candidate.
82	struct StoreToLoadForwardingCandidate {
83	LoadInst *Load;
84	StoreInst *Store;
85
86	StoreToLoadForwardingCandidate(LoadInst Load, StoreInst Store)
87	: Load(Load), Store(Store) {}
88
89	/// Return true if the dependence from the store to the load has an
90	/// absolute distance of one.
91	/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
92	bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
93	Loop L) const* {
94	Value *LoadPtr = Load->getPointerOperand();
95	Value *StorePtr = Store->getPointerOperand();
96	Type *LoadType = getLoadStoreType(I: Load);
97	auto &DL = Load->getDataLayout();
98
99	assert(LoadPtr->getType()->getPointerAddressSpace() ==
100	StorePtr->getType()->getPointerAddressSpace() &&
101	DL.getTypeSizeInBits(LoadType) ==
102	DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
103	"Should be a known dependence");
104
105	int64_t StrideLoad = getPtrStride(PSE, AccessTy: LoadType, Ptr: LoadPtr, Lp: L).value_or(u: `0`);
106	int64_t StrideStore = getPtrStride(PSE, AccessTy: LoadType, Ptr: StorePtr, Lp: L).value_or(u: `0`);
107	if (!StrideLoad \|\| !StrideStore \|\| StrideLoad != StrideStore)
108	return false;
109
110	// TODO: This check for stride values other than 1 and -1 can be eliminated.
111	// However, doing so may cause the LoopAccessAnalysis to overcompensate,
112	// generating numerous non-wrap runtime checks that may undermine the
113	// benefits of load elimination. To safely implement support for non-unit
114	// strides, we would need to ensure either that the processed case does not
115	// require these additional checks, or improve the LAA to handle them more
116	// efficiently, or potentially both.
117	if (std::abs(i: StrideLoad) != `1`)
118	return false;
119
120	unsigned TypeByteSize = DL.getTypeAllocSize(Ty: const_cast<Type *>(LoadType));
121
122	auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: LoadPtr));
123	auto *StorePtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: StorePtr));
124
125	// We don't need to check non-wrapping here because forward/backward
126	// dependence wouldn't be valid if these weren't monotonic accesses.
127	auto *Dist = dyn_cast<SCEVConstant>(
128	Val: PSE.getSE()->getMinusSCEV(LHS: StorePtrSCEV, RHS: LoadPtrSCEV));
129	if (!Dist)
130	return false;
131	const APInt &Val = Dist->getAPInt();
132	return Val == TypeByteSize * StrideLoad;
133	}
134
135	Value getLoadPtr() const* { return Load->getPointerOperand(); }
136
137	#ifndef NDEBUG
138	friend raw_ostream &operator<<(raw_ostream &OS,
139	const StoreToLoadForwardingCandidate &Cand) {
140	OS << *Cand.Store << " -->\n";
141	OS.indent(`2`) << *Cand.Load << "\n";
142	return OS;
143	}
144	#endif
145	};
146
147	} // end anonymous namespace
148
149	/// Check if the store dominates all latches, so as long as there is no
150	/// intervening store this value will be loaded in the next iteration.
151	static bool doesStoreDominatesAllLatches(BasicBlock StoreBlock, Loop L,
152	DominatorTree *DT) {
153	SmallVector<BasicBlock *, `8`> Latches;
154	L->getLoopLatches(LoopLatches&: Latches);
155	return llvm::all_of(Range&: Latches, P: [&](const BasicBlock *Latch) {
156	return DT->dominates(A: StoreBlock, B: Latch);
157	});
158	}
159
160	/// Return true if the load is not executed on all paths in the loop.
161	static bool isLoadConditional(LoadInst Load, Loop L) {
162	return Load->getParent() != L->getHeader();
163	}
164
165	namespace {
166
167	/// The per-loop class that does most of the work.
168	class LoadEliminationForLoop {
169	public:
170	LoadEliminationForLoop(Loop L, LoopInfo LI, const LoopAccessInfo &LAI,
171	DominatorTree DT, BlockFrequencyInfo BFI,
172	ProfileSummaryInfo* PSI)
173	: L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE (LAI.getPSE()) {}
174
175	/// Look through the loop-carried and loop-independent dependences in
176	/// this loop and find store->load dependences.
177	///
178	/// Note that no candidate is returned if LAA has failed to analyze the loop
179	/// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
180	std::forward_list<StoreToLoadForwardingCandidate>
181	findStoreToLoadDependences(const LoopAccessInfo &LAI) {
182	std::forward_list<StoreToLoadForwardingCandidate> Candidates;
183
184	const auto &DepChecker = LAI.getDepChecker();
185	const auto *Deps = DepChecker.getDependences();
186	if (!Deps)
187	return Candidates;
188
189	// Find store->load dependences (consequently true dep). Both lexically
190	// forward and backward dependences qualify. Disqualify loads that have
191	// other unknown dependences.
192
193	SmallPtrSet<Instruction *, `4`> LoadsWithUnknownDependence;
194
195	for (const auto &Dep : *Deps) {
196	Instruction *Source = Dep.getSource(DepChecker);
197	Instruction *Destination = Dep.getDestination(DepChecker);
198
199	if (Dep.Type == MemoryDepChecker::Dependence::Unknown \|\|
200	Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
201	if (isa<LoadInst>(Val: Source))
202	LoadsWithUnknownDependence.insert(Ptr: Source);
203	if (isa<LoadInst>(Val: Destination))
204	LoadsWithUnknownDependence.insert(Ptr: Destination);
205	continue;
206	}
207
208	if (Dep.isBackward())
209	// Note that the designations source and destination follow the program
210	// order, i.e. source is always first. (The direction is given by the
211	// DepType.)
212	std::swap(a&: Source, b&: Destination);
213	else
214	assert(Dep.isForward() && "Needs to be a forward dependence");
215
216	auto *Store = dyn_cast<StoreInst>(Val: Source);
217	if (!Store)
218	continue;
219	auto *Load = dyn_cast<LoadInst>(Val: Destination);
220	if (!Load)
221	continue;
222
223	// Only propagate if the stored values are bit/pointer castable.
224	if (!CastInst::isBitOrNoopPointerCastable(
225	SrcTy: getLoadStoreType(I: Store), DestTy: getLoadStoreType(I: Load),
226	DL: Store->getDataLayout()))
227	continue;
228
229	Candidates.emplace_front(args&: Load, args&: Store);
230	}
231
232	if (!LoadsWithUnknownDependence.empty())
233	Candidates.remove_if(pred: [&](const StoreToLoadForwardingCandidate &C) {
234	return LoadsWithUnknownDependence.count(Ptr: C.Load);
235	});
236
237	return Candidates;
238	}
239
240	/// Return the index of the instruction according to program order.
241	unsigned getInstrIndex(Instruction *Inst) {
242	auto I = InstOrder.find(Val: Inst);
243	assert(I != InstOrder.end() && "No index for instruction");
244	return I ->second;
245	}
246
247	/// If a load has multiple candidates associated (i.e. different
248	/// stores), it means that it could be forwarding from multiple stores
249	/// depending on control flow. Remove these candidates.
250	///
251	/// Here, we rely on LAA to include the relevant loop-independent dependences.
252	/// LAA is known to omit these in the very simple case when the read and the
253	/// write within an alias set always takes place using the same* pointer.*
254	///
255	/// However, we know that this is not the case here, i.e. we can rely on LAA
256	/// to provide us with loop-independent dependences for the cases we're
257	/// interested. Consider the case for example where a loop-independent
258	/// dependece S1->S2 invalidates the forwarding S3->S2.
259	///
260	/// A[i] = ... (S1)
261	/// ... = A[i] (S2)
262	/// A[i+1] = ... (S3)
263	///
264	/// LAA will perform dependence analysis here because there are two
265	/// different* pointers involved in the same alias set (&A[i] and &A[i+1]).*
266	void removeDependencesFromMultipleStores(
267	std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
268	// If Store is nullptr it means that we have multiple stores forwarding to
269	// this store.
270	using LoadToSingleCandT =
271	DenseMap<LoadInst , const* StoreToLoadForwardingCandidate *>;
272	LoadToSingleCandT LoadToSingleCand;
273
274	for (const auto &Cand : Candidates) {
275	bool NewElt;
276	LoadToSingleCandT::iterator Iter;
277
278	std::tie(args&: Iter, args&: NewElt) =
279	LoadToSingleCand.insert(KV: std::make_pair(x: Cand.Load, y: &Cand));
280	if (!NewElt) {
281	const StoreToLoadForwardingCandidate *&OtherCand = Iter ->second;
282	// Already multiple stores forward to this load.
283	if (OtherCand == nullptr)
284	continue;
285
286	// Handle the very basic case when the two stores are in the same block
287	// so deciding which one forwards is easy. The later one forwards as
288	// long as they both have a dependence distance of one to the load.
289	if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
290	Cand.isDependenceDistanceOfOne(PSE, L) &&
291	OtherCand->isDependenceDistanceOfOne(PSE, L)) {
292	// They are in the same block, the later one will forward to the load.
293	if (getInstrIndex(Inst: OtherCand->Store) < getInstrIndex(Inst: Cand.Store))
294	OtherCand = &Cand;
295	} else
296	OtherCand = nullptr;
297	}
298	}
299
300	Candidates.remove_if(pred: [&](const StoreToLoadForwardingCandidate &Cand) {
301	if (LoadToSingleCand [Cand.Load] != &Cand) {
302	LLVM_DEBUG(
303	dbgs() << "Removing from candidates: \n"
304	<< Cand
305	<< " The load may have multiple stores forwarding to "
306	<< "it\n");
307	return true;
308	}
309	return false;
310	});
311	}
312
313	/// Given two pointers operations by their RuntimePointerChecking
314	/// indices, return true if they require an alias check.
315	///
316	/// We need a check if one is a pointer for a candidate load and the other is
317	/// a pointer for a possibly intervening store.
318	bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
319	const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
320	const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
321	Value *Ptr1 =
322	LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx: PtrIdx1).PointerValue;
323	Value *Ptr2 =
324	LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx: PtrIdx2).PointerValue;
325	return ((PtrsWrittenOnFwdingPath.count(Ptr: Ptr1) && CandLoadPtrs.count(Ptr: Ptr2)) \|\|
326	(PtrsWrittenOnFwdingPath.count(Ptr: Ptr2) && CandLoadPtrs.count(Ptr: Ptr1)));
327	}
328
329	/// Return pointers that are possibly written to on the path from a
330	/// forwarding store to a load.
331	///
332	/// These pointers need to be alias-checked against the forwarding candidates.
333	SmallPtrSet<Value *, `4`> findPointersWrittenOnForwardingPath(
334	const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
335	// From FirstStore to LastLoad neither of the elimination candidate loads
336	// should overlap with any of the stores.
337	//
338	// E.g.:
339	//
340	// st1 C[i]
341	// ld1 B[i] <-------,
342	// ld0 A[i] <----, \| LastLoad*
343	// ... \| \|
344	// st2 E[i] \| \|
345	// st3 B[i+1] -- \| -' FirstStore*
346	// st0 A[i+1] ---'
347	// st4 D[i]
348	//
349	// st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
350	// ld0.
351
352	LoadInst *LastLoad =
353	llvm::max_element(Range: Candidates,
354	C: [&](const StoreToLoadForwardingCandidate &A,
355	const StoreToLoadForwardingCandidate &B) {
356	return getInstrIndex(Inst: A.Load) <
357	getInstrIndex(Inst: B.Load);
358	})
359	->Load;
360	StoreInst *FirstStore =
361	llvm::min_element(Range: Candidates,
362	C: [&](const StoreToLoadForwardingCandidate &A,
363	const StoreToLoadForwardingCandidate &B) {
364	return getInstrIndex(Inst: A.Store) <
365	getInstrIndex(Inst: B.Store);
366	})
367	->Store;
368
369	// We're looking for stores after the first forwarding store until the end
370	// of the loop, then from the beginning of the loop until the last
371	// forwarded-to load. Collect the pointer for the stores.
372	SmallPtrSet<Value *, `4`> PtrsWrittenOnFwdingPath;
373
374	auto InsertStorePtr = [&](Instruction *I) {
375	if (auto *S = dyn_cast<StoreInst>(Val: I))
376	PtrsWrittenOnFwdingPath.insert(Ptr: S->getPointerOperand());
377	};
378	const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
379	std::for_each(first: MemInstrs.begin() + getInstrIndex(Inst: FirstStore) + `1`,
380	last: MemInstrs.end(), f: InsertStorePtr);
381	std::for_each(first: MemInstrs.begin(), last: &MemInstrs [getInstrIndex(Inst: LastLoad)],
382	f: InsertStorePtr);
383
384	return PtrsWrittenOnFwdingPath;
385	}
386
387	/// Determine the pointer alias checks to prove that there are no
388	/// intervening stores.
389	SmallVector<RuntimePointerCheck, `4`> collectMemchecks(
390	const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
391
392	SmallPtrSet<Value *, `4`> PtrsWrittenOnFwdingPath =
393	findPointersWrittenOnForwardingPath(Candidates);
394
395	// Collect the pointers of the candidate loads.
396	SmallPtrSet<Value *, `4`> CandLoadPtrs;
397	for (const auto &Candidate : Candidates)
398	CandLoadPtrs.insert(Ptr: Candidate.getLoadPtr());
399
400	const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
401	SmallVector<RuntimePointerCheck, `4`> Checks;
402
403	copy_if(Range: AllChecks, Out: std::back_inserter(x&: Checks),
404	P: [&](const RuntimePointerCheck &Check) {
405	for (auto PtrIdx1 : Check.first->Members)
406	for (auto PtrIdx2 : Check.second->Members)
407	if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
408	CandLoadPtrs))
409	return true;
410	return false;
411	});
412
413	LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
414	<< "):\n");
415	LLVM_DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
416
417	return Checks;
418	}
419
420	/// Perform the transformation for a candidate.
421	void
422	propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
423	SCEVExpander &SEE) {
424	// loop:
425	// %x = load %gep_i
426	// = ... %x
427	// store %y, %gep_i_plus_1
428	//
429	// =>
430	//
431	// ph:
432	// %x.initial = load %gep_0
433	// loop:
434	// %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
435	// %x = load %gep_i <---- now dead
436	// = ... %x.storeforward
437	// store %y, %gep_i_plus_1
438
439	Value *Ptr = Cand.Load->getPointerOperand();
440	auto *PtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: Ptr));
441	auto *PH = L->getLoopPreheader();
442	assert(PH && "Preheader should exist!");
443	Value *InitialPtr = SEE.expandCodeFor(SH: PtrSCEV->getStart(), Ty: Ptr->getType(),
444	I: PH->getTerminator());
445	Instruction *Initial =
446	new LoadInst (Cand.Load->getType(), InitialPtr, "load_initial",
447	/ isVolatile / false, Cand.Load->getAlign(),
448	PH->getTerminator()->getIterator());
449	// We don't give any debug location to Initial, because it is inserted
450	// into the loop's preheader. A debug location inside the loop will cause
451	// a misleading stepping when debugging. The test update-debugloc-store
452	// -forwarded.ll checks this.
453	Initial->setDebugLoc(DebugLoc::getDropped());
454
455	PHINode *PHI = PHINode::Create(Ty: Initial->getType(), NumReservedValues: `2`, NameStr: "store_forwarded");
456	PHI->insertBefore(InsertPos: L->getHeader()->begin());
457	PHI->addIncoming(V: Initial, BB: PH);
458
459	Type *LoadType = Initial->getType();
460	Type *StoreType = Cand.Store->getValueOperand()->getType();
461	auto &DL = Cand.Load->getDataLayout();
462	(void)DL;
463
464	assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
465	"The type sizes should match!");
466
467	Value *StoreValue = Cand.Store->getValueOperand();
468	if (LoadType != StoreType) {
469	StoreValue = CastInst::CreateBitOrPointerCast(S: StoreValue, Ty: LoadType,
470	Name: "store_forward_cast",
471	InsertBefore: Cand.Store->getIterator());
472	// Because it casts the old `load` value and is used by the new `phi`
473	// which replaces the old `load`, we give the `load`'s debug location
474	// to it.
475	cast<Instruction>(Val: StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
476	}
477
478	PHI->addIncoming(V: StoreValue, BB: L->getLoopLatch());
479
480	Cand.Load->replaceAllUsesWith(V: PHI);
481	PHI->setDebugLoc(Cand.Load->getDebugLoc());
482	}
483
484	/// Top-level driver for each loop: find store->load forwarding
485	/// candidates, add run-time checks and perform transformation.
486	bool processLoop() {
487	LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
488	<< "\" checking " << *L << "\n");
489
490	// Look for store-to-load forwarding cases across the
491	// backedge. E.g.:
492	//
493	// loop:
494	// %x = load %gep_i
495	// = ... %x
496	// store %y, %gep_i_plus_1
497	//
498	// =>
499	//
500	// ph:
501	// %x.initial = load %gep_0
502	// loop:
503	// %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
504	// %x = load %gep_i <---- now dead
505	// = ... %x.storeforward
506	// store %y, %gep_i_plus_1
507
508	// First start with store->load dependences.
509	auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
510	if (StoreToLoadDependences.empty())
511	return false;
512
513	// Generate an index for each load and store according to the original
514	// program order. This will be used later.
515	InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
516
517	// To keep things simple for now, remove those where the load is potentially
518	// fed by multiple stores.
519	removeDependencesFromMultipleStores(Candidates&: StoreToLoadDependences);
520	if (StoreToLoadDependences.empty())
521	return false;
522
523	// Filter the candidates further.
524	SmallVector<StoreToLoadForwardingCandidate, `4`> Candidates;
525	for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
526	LLVM_DEBUG(dbgs() << "Candidate " << Cand);
527
528	// Make sure that the stored values is available everywhere in the loop in
529	// the next iteration.
530	if (!doesStoreDominatesAllLatches(StoreBlock: Cand.Store->getParent(), L, DT))
531	continue;
532
533	// If the load is conditional we can't hoist its 0-iteration instance to
534	// the preheader because that would make it unconditional. Thus we would
535	// access a memory location that the original loop did not access.
536	if (isLoadConditional(Load: Cand.Load, L))
537	continue;
538
539	// Check whether the SCEV difference is the same as the induction step,
540	// thus we load the value in the next iteration.
541	if (!Cand.isDependenceDistanceOfOne(PSE, L))
542	continue;
543
544	assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
545	"Loading from something other than indvar?");
546	assert(
547	isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
548	"Storing to something other than indvar?");
549
550	Candidates.push_back(Elt: Cand);
551	LLVM_DEBUG(
552	dbgs()
553	<< Candidates.size()
554	<< ". Valid store-to-load forwarding across the loop backedge\n");
555	}
556	if (Candidates.empty())
557	return false;
558
559	// Check intervening may-alias stores. These need runtime checks for alias
560	// disambiguation.
561	SmallVector<RuntimePointerCheck, `4`> Checks = collectMemchecks(Candidates);
562
563	// Too many checks are likely to outweigh the benefits of forwarding.
564	if (Checks.size() > Candidates.size() * CheckPerElim) {
565	LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
566	return false;
567	}
568
569	if (LAI.getPSE().getPredicate().getComplexity() >
570	LoadElimSCEVCheckThreshold) {
571	LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
572	return false;
573	}
574
575	if (!L->isLoopSimplifyForm()) {
576	LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
577	return false;
578	}
579
580	if (!Checks.empty() \|\| !LAI.getPSE().getPredicate().isAlwaysTrue()) {
581	if (LAI.hasConvergentOp()) {
582	LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
583	"convergent calls\n");
584	return false;
585	}
586
587	auto *HeaderBB = L->getHeader();
588	if (llvm::shouldOptimizeForSize(BB: HeaderBB, PSI, BFI,
589	QueryType: PGSOQueryType::IRPass)) {
590	LLVM_DEBUG(
591	dbgs() << "Versioning is needed but not allowed when optimizing "
592	"for size.\n");
593	return false;
594	}
595
596	// Point of no-return, start the transformation. First, version the loop
597	// if necessary.
598
599	LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
600	LV.versionLoop();
601
602	// After versioning, some of the candidates' pointers could stop being
603	// SCEVAddRecs. We need to filter them out.
604	auto NoLongerGoodCandidate = [this](
605	const StoreToLoadForwardingCandidate &Cand) {
606	return !isa<SCEVAddRecExpr>(
607	Val: PSE.getSCEV(V: Cand.Load->getPointerOperand())) \|\|
608	!isa<SCEVAddRecExpr>(
609	Val: PSE.getSCEV(V: Cand.Store->getPointerOperand()));
610	};
611	llvm::erase_if(C&: Candidates, P: NoLongerGoodCandidate);
612	}
613
614	// Next, propagate the value stored by the store to the users of the load.
615	// Also for the first iteration, generate the initial value of the load.
616	SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
617	"storeforward");
618	for (const auto &Cand : Candidates)
619	propagateStoredValueToLoadUsers(Cand, SEE);
620	NumLoopLoadEliminted += Candidates.size();
621
622	return true;
623	}
624
625	private:
626	Loop *L;
627
628	/// Maps the load/store instructions to their index according to
629	/// program order.
630	DenseMap<Instruction , unsigned*> InstOrder;
631
632	// Analyses used.
633	LoopInfo *LI;
634	const LoopAccessInfo &LAI;
635	DominatorTree *DT;
636	BlockFrequencyInfo *BFI;
637	ProfileSummaryInfo *PSI;
638	PredicatedScalarEvolution PSE;
639	};
640
641	} // end anonymous namespace
642
643	static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI,
644	DominatorTree &DT,
645	BlockFrequencyInfo *BFI,
646	ProfileSummaryInfo *PSI,
647	ScalarEvolution SE, AssumptionCache AC,
648	LoopAccessInfoManager &LAIs) {
649	// Build up a worklist of inner-loops to transform to avoid iterator
650	// invalidation.
651	// FIXME: This logic comes from other passes that actually change the loop
652	// nest structure. It isn't clear this is necessary (or useful) for a pass
653	// which merely optimizes the use of loads in a loop.
654	SmallVector<Loop *, `8`> Worklist;
655
656	bool Changed = false;
657
658	for (Loop *TopLevelLoop : LI)
659	for (Loop *L : depth_first(G: TopLevelLoop)) {
660	Changed \|= simplifyLoop(L, DT: &DT, LI: &LI, SE, AC, /MSSAU/ nullptr, PreserveLCSSA: false);
661	// We only handle inner-most loops.
662	if (L->isInnermost())
663	Worklist.push_back(Elt: L);
664	}
665
666	// Now walk the identified inner loops.
667	for (Loop *L : Worklist) {
668	// Match historical behavior
669	if (!L->isRotatedForm() \|\| !L->getExitingBlock())
670	continue;
671	// The actual work is performed by LoadEliminationForLoop.
672	LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(L&: *L), &DT, BFI, PSI);
673	Changed \|= LEL.processLoop();
674	if (Changed)
675	LAIs.clear();
676	}
677	return Changed;
678	}
679
680	PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
681	FunctionAnalysisManager &AM) {
682	auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
683	// There are no loops in the function. Return before computing other expensive
684	// analyses.
685	if (LI.empty())
686	return PreservedAnalyses::all();
687	auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
688	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
689	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
690	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
691	auto PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: F.getParent());
692	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
693	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
694	LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(IR&: F);
695
696	bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE: &SE, AC: &AC, LAIs);
697
698	if (!Changed)
699	return PreservedAnalyses::all();
700
701	PreservedAnalyses PA;
702	PA.preserve<DominatorTreeAnalysis>();
703	PA.preserve<LoopAnalysis>();
704	return PA;
705	}
706

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp