LoopLoadElimination.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp]

1	//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implement a loop-aware load elimination pass.
10	//
11	// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12	// distance of one between stores and loads. These form the candidates for the
13	// transformation. The source value of each store then propagated to the user
14	// of the corresponding load. This makes the load dead.
15	//
16	// The pass can also version the loop and add memchecks in order to prove that
17	// may-aliasing stores can't change the value in memory before it's read by the
18	// load.
19	//
20	//===----------------------------------------------------------------------===//
21
22	#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
23	#include "llvm/ADT/APInt.h"
24	#include "llvm/ADT/DenseMap.h"
25	#include "llvm/ADT/DepthFirstIterator.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallVector.h"
29	#include "llvm/ADT/Statistic.h"
30	#include "llvm/Analysis/AssumptionCache.h"
31	#include "llvm/Analysis/BlockFrequencyInfo.h"
32	#include "llvm/Analysis/GlobalsModRef.h"
33	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
34	#include "llvm/Analysis/LoopAccessAnalysis.h"
35	#include "llvm/Analysis/LoopAnalysisManager.h"
36	#include "llvm/Analysis/LoopInfo.h"
37	#include "llvm/Analysis/ProfileSummaryInfo.h"
38	#include "llvm/Analysis/ScalarEvolution.h"
39	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
40	#include "llvm/Analysis/TargetLibraryInfo.h"
41	#include "llvm/Analysis/TargetTransformInfo.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/Dominators.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/Module.h"
46	#include "llvm/IR/PassManager.h"
47	#include "llvm/IR/Type.h"
48	#include "llvm/IR/Value.h"
49	#include "llvm/Support/Casting.h"
50	#include "llvm/Support/CommandLine.h"
51	#include "llvm/Support/Debug.h"
52	#include "llvm/Support/raw_ostream.h"
53	#include "llvm/Transforms/Utils.h"
54	#include "llvm/Transforms/Utils/LoopSimplify.h"
55	#include "llvm/Transforms/Utils/LoopVersioning.h"
56	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
57	#include "llvm/Transforms/Utils/SizeOpts.h"
58	#include <algorithm>
59	#include <cassert>
60	#include <forward_list>
61	#include <tuple>
62	#include <utility>
63
64	using namespace llvm;
65
66	#define LLE_OPTION "loop-load-elim"
67	#define DEBUG_TYPE LLE_OPTION
68
69	static cl::opt<unsigned> CheckPerElim(
70	"runtime-check-per-loop-load-elim", cl::Hidden,
71	cl::desc ("Max number of memchecks allowed per eliminated load on average"),
72	cl::init(Val: `1`));
73
74	static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
75	"loop-load-elimination-scev-check-threshold", cl::init(Val: `8`), cl::Hidden,
76	cl::desc ("The maximum number of SCEV checks allowed for Loop "
77	"Load Elimination"));
78
79	STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
80
81	namespace {
82
83	/// Represent a store-to-forwarding candidate.
84	struct StoreToLoadForwardingCandidate {
85	LoadInst *Load;
86	StoreInst *Store;
87
88	StoreToLoadForwardingCandidate(LoadInst Load, StoreInst Store)
89	: Load(Load), Store(Store) {}
90
91	/// Return true if the dependence from the store to the load has an
92	/// absolute distance of one.
93	/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
94	bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
95	Loop L) const* {
96	Value *LoadPtr = Load->getPointerOperand();
97	Value *StorePtr = Store->getPointerOperand();
98	Type *LoadType = getLoadStoreType(I: Load);
99	auto &DL = Load->getDataLayout();
100
101	assert(LoadPtr->getType()->getPointerAddressSpace() ==
102	StorePtr->getType()->getPointerAddressSpace() &&
103	DL.getTypeSizeInBits(LoadType) ==
104	DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
105	"Should be a known dependence");
106
107	int64_t StrideLoad = getPtrStride(PSE, AccessTy: LoadType, Ptr: LoadPtr, Lp: L).value_or(u: `0`);
108	int64_t StrideStore = getPtrStride(PSE, AccessTy: LoadType, Ptr: StorePtr, Lp: L).value_or(u: `0`);
109	if (!StrideLoad \|\| !StrideStore \|\| StrideLoad != StrideStore)
110	return false;
111
112	// TODO: This check for stride values other than 1 and -1 can be eliminated.
113	// However, doing so may cause the LoopAccessAnalysis to overcompensate,
114	// generating numerous non-wrap runtime checks that may undermine the
115	// benefits of load elimination. To safely implement support for non-unit
116	// strides, we would need to ensure either that the processed case does not
117	// require these additional checks, or improve the LAA to handle them more
118	// efficiently, or potentially both.
119	if (std::abs(i: StrideLoad) != `1`)
120	return false;
121
122	unsigned TypeByteSize = DL.getTypeAllocSize(Ty: const_cast<Type *>(LoadType));
123
124	auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: LoadPtr));
125	auto *StorePtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: StorePtr));
126
127	// We don't need to check non-wrapping here because forward/backward
128	// dependence wouldn't be valid if these weren't monotonic accesses.
129	auto *Dist = dyn_cast<SCEVConstant>(
130	Val: PSE.getSE()->getMinusSCEV(LHS: StorePtrSCEV, RHS: LoadPtrSCEV));
131	if (!Dist)
132	return false;
133	const APInt &Val = Dist->getAPInt();
134	return Val == TypeByteSize * StrideLoad;
135	}
136
137	Value getLoadPtr() const* { return Load->getPointerOperand(); }
138
139	#ifndef NDEBUG
140	friend raw_ostream &operator<<(raw_ostream &OS,
141	const StoreToLoadForwardingCandidate &Cand) {
142	OS << *Cand.Store << " -->\n";
143	OS.indent(`2`) << *Cand.Load << "\n";
144	return OS;
145	}
146	#endif
147	};
148
149	} // end anonymous namespace
150
151	/// Check if the store dominates all latches, so as long as there is no
152	/// intervening store this value will be loaded in the next iteration.
153	static bool doesStoreDominatesAllLatches(BasicBlock StoreBlock, Loop L,
154	DominatorTree *DT) {
155	SmallVector<BasicBlock *, `8`> Latches;
156	L->getLoopLatches(LoopLatches&: Latches);
157	return llvm::all_of(Range&: Latches, P: [&](const BasicBlock *Latch) {
158	return DT->dominates(A: StoreBlock, B: Latch);
159	});
160	}
161
162	/// Return true if the load is not executed on all paths in the loop.
163	static bool isLoadConditional(LoadInst Load, Loop L) {
164	return Load->getParent() != L->getHeader();
165	}
166
167	namespace {
168
169	/// The per-loop class that does most of the work.
170	class LoadEliminationForLoop {
171	public:
172	LoadEliminationForLoop(Loop L, LoopInfo LI, const LoopAccessInfo &LAI,
173	DominatorTree DT, BlockFrequencyInfo BFI,
174	ProfileSummaryInfo* PSI)
175	: L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE (LAI.getPSE()) {}
176
177	/// Look through the loop-carried and loop-independent dependences in
178	/// this loop and find store->load dependences.
179	///
180	/// Note that no candidate is returned if LAA has failed to analyze the loop
181	/// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
182	std::forward_list<StoreToLoadForwardingCandidate>
183	findStoreToLoadDependences(const LoopAccessInfo &LAI) {
184	std::forward_list<StoreToLoadForwardingCandidate> Candidates;
185
186	const auto &DepChecker = LAI.getDepChecker();
187	const auto *Deps = DepChecker.getDependences();
188	if (!Deps)
189	return Candidates;
190
191	// Find store->load dependences (consequently true dep). Both lexically
192	// forward and backward dependences qualify. Disqualify loads that have
193	// other unknown dependences.
194
195	SmallPtrSet<Instruction *, `4`> LoadsWithUnknownDepedence;
196
197	for (const auto &Dep : *Deps) {
198	Instruction *Source = Dep.getSource(DepChecker);
199	Instruction *Destination = Dep.getDestination(DepChecker);
200
201	if (Dep.Type == MemoryDepChecker::Dependence::Unknown \|\|
202	Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
203	if (isa<LoadInst>(Val: Source))
204	LoadsWithUnknownDepedence.insert(Ptr: Source);
205	if (isa<LoadInst>(Val: Destination))
206	LoadsWithUnknownDepedence.insert(Ptr: Destination);
207	continue;
208	}
209
210	if (Dep.isBackward())
211	// Note that the designations source and destination follow the program
212	// order, i.e. source is always first. (The direction is given by the
213	// DepType.)
214	std::swap(a&: Source, b&: Destination);
215	else
216	assert(Dep.isForward() && "Needs to be a forward dependence");
217
218	auto *Store = dyn_cast<StoreInst>(Val: Source);
219	if (!Store)
220	continue;
221	auto *Load = dyn_cast<LoadInst>(Val: Destination);
222	if (!Load)
223	continue;
224
225	// Only propagate if the stored values are bit/pointer castable.
226	if (!CastInst::isBitOrNoopPointerCastable(
227	SrcTy: getLoadStoreType(I: Store), DestTy: getLoadStoreType(I: Load),
228	DL: Store->getDataLayout()))
229	continue;
230
231	Candidates.emplace_front(args&: Load, args&: Store);
232	}
233
234	if (!LoadsWithUnknownDepedence.empty())
235	Candidates.remove_if(pred: [&](const StoreToLoadForwardingCandidate &C) {
236	return LoadsWithUnknownDepedence.count(Ptr: C.Load);
237	});
238
239	return Candidates;
240	}
241
242	/// Return the index of the instruction according to program order.
243	unsigned getInstrIndex(Instruction *Inst) {
244	auto I = InstOrder.find(Val: Inst);
245	assert(I != InstOrder.end() && "No index for instruction");
246	return I ->second;
247	}
248
249	/// If a load has multiple candidates associated (i.e. different
250	/// stores), it means that it could be forwarding from multiple stores
251	/// depending on control flow. Remove these candidates.
252	///
253	/// Here, we rely on LAA to include the relevant loop-independent dependences.
254	/// LAA is known to omit these in the very simple case when the read and the
255	/// write within an alias set always takes place using the same* pointer.*
256	///
257	/// However, we know that this is not the case here, i.e. we can rely on LAA
258	/// to provide us with loop-independent dependences for the cases we're
259	/// interested. Consider the case for example where a loop-independent
260	/// dependece S1->S2 invalidates the forwarding S3->S2.
261	///
262	/// A[i] = ... (S1)
263	/// ... = A[i] (S2)
264	/// A[i+1] = ... (S3)
265	///
266	/// LAA will perform dependence analysis here because there are two
267	/// different* pointers involved in the same alias set (&A[i] and &A[i+1]).*
268	void removeDependencesFromMultipleStores(
269	std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
270	// If Store is nullptr it means that we have multiple stores forwarding to
271	// this store.
272	using LoadToSingleCandT =
273	DenseMap<LoadInst , const* StoreToLoadForwardingCandidate *>;
274	LoadToSingleCandT LoadToSingleCand;
275
276	for (const auto &Cand : Candidates) {
277	bool NewElt;
278	LoadToSingleCandT::iterator Iter;
279
280	std::tie(args&: Iter, args&: NewElt) =
281	LoadToSingleCand.insert(KV: std::make_pair(x: Cand.Load, y: &Cand));
282	if (!NewElt) {
283	const StoreToLoadForwardingCandidate *&OtherCand = Iter ->second;
284	// Already multiple stores forward to this load.
285	if (OtherCand == nullptr)
286	continue;
287
288	// Handle the very basic case when the two stores are in the same block
289	// so deciding which one forwards is easy. The later one forwards as
290	// long as they both have a dependence distance of one to the load.
291	if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
292	Cand.isDependenceDistanceOfOne(PSE, L) &&
293	OtherCand->isDependenceDistanceOfOne(PSE, L)) {
294	// They are in the same block, the later one will forward to the load.
295	if (getInstrIndex(Inst: OtherCand->Store) < getInstrIndex(Inst: Cand.Store))
296	OtherCand = &Cand;
297	} else
298	OtherCand = nullptr;
299	}
300	}
301
302	Candidates.remove_if(pred: [&](const StoreToLoadForwardingCandidate &Cand) {
303	if (LoadToSingleCand [Cand.Load] != &Cand) {
304	LLVM_DEBUG(
305	dbgs() << "Removing from candidates: \n"
306	<< Cand
307	<< " The load may have multiple stores forwarding to "
308	<< "it\n");
309	return true;
310	}
311	return false;
312	});
313	}
314
315	/// Given two pointers operations by their RuntimePointerChecking
316	/// indices, return true if they require an alias check.
317	///
318	/// We need a check if one is a pointer for a candidate load and the other is
319	/// a pointer for a possibly intervening store.
320	bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
321	const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
322	const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
323	Value *Ptr1 =
324	LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx: PtrIdx1).PointerValue;
325	Value *Ptr2 =
326	LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx: PtrIdx2).PointerValue;
327	return ((PtrsWrittenOnFwdingPath.count(Ptr: Ptr1) && CandLoadPtrs.count(Ptr: Ptr2)) \|\|
328	(PtrsWrittenOnFwdingPath.count(Ptr: Ptr2) && CandLoadPtrs.count(Ptr: Ptr1)));
329	}
330
331	/// Return pointers that are possibly written to on the path from a
332	/// forwarding store to a load.
333	///
334	/// These pointers need to be alias-checked against the forwarding candidates.
335	SmallPtrSet<Value *, `4`> findPointersWrittenOnForwardingPath(
336	const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
337	// From FirstStore to LastLoad neither of the elimination candidate loads
338	// should overlap with any of the stores.
339	//
340	// E.g.:
341	//
342	// st1 C[i]
343	// ld1 B[i] <-------,
344	// ld0 A[i] <----, \| LastLoad*
345	// ... \| \|
346	// st2 E[i] \| \|
347	// st3 B[i+1] -- \| -' FirstStore*
348	// st0 A[i+1] ---'
349	// st4 D[i]
350	//
351	// st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
352	// ld0.
353
354	LoadInst *LastLoad =
355	llvm::max_element(Range: Candidates,
356	C: [&](const StoreToLoadForwardingCandidate &A,
357	const StoreToLoadForwardingCandidate &B) {
358	return getInstrIndex(Inst: A.Load) <
359	getInstrIndex(Inst: B.Load);
360	})
361	->Load;
362	StoreInst *FirstStore =
363	llvm::min_element(Range: Candidates,
364	C: [&](const StoreToLoadForwardingCandidate &A,
365	const StoreToLoadForwardingCandidate &B) {
366	return getInstrIndex(Inst: A.Store) <
367	getInstrIndex(Inst: B.Store);
368	})
369	->Store;
370
371	// We're looking for stores after the first forwarding store until the end
372	// of the loop, then from the beginning of the loop until the last
373	// forwarded-to load. Collect the pointer for the stores.
374	SmallPtrSet<Value *, `4`> PtrsWrittenOnFwdingPath;
375
376	auto InsertStorePtr = [&](Instruction *I) {
377	if (auto *S = dyn_cast<StoreInst>(Val: I))
378	PtrsWrittenOnFwdingPath.insert(Ptr: S->getPointerOperand());
379	};
380	const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
381	std::for_each(first: MemInstrs.begin() + getInstrIndex(Inst: FirstStore) + `1`,
382	last: MemInstrs.end(), f: InsertStorePtr);
383	std::for_each(first: MemInstrs.begin(), last: &MemInstrs [getInstrIndex(Inst: LastLoad)],
384	f: InsertStorePtr);
385
386	return PtrsWrittenOnFwdingPath;
387	}
388
389	/// Determine the pointer alias checks to prove that there are no
390	/// intervening stores.
391	SmallVector<RuntimePointerCheck, `4`> collectMemchecks(
392	const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
393
394	SmallPtrSet<Value *, `4`> PtrsWrittenOnFwdingPath =
395	findPointersWrittenOnForwardingPath(Candidates);
396
397	// Collect the pointers of the candidate loads.
398	SmallPtrSet<Value *, `4`> CandLoadPtrs;
399	for (const auto &Candidate : Candidates)
400	CandLoadPtrs.insert(Ptr: Candidate.getLoadPtr());
401
402	const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
403	SmallVector<RuntimePointerCheck, `4`> Checks;
404
405	copy_if(Range: AllChecks, Out: std::back_inserter(x&: Checks),
406	P: [&](const RuntimePointerCheck &Check) {
407	for (auto PtrIdx1 : Check.first->Members)
408	for (auto PtrIdx2 : Check.second->Members)
409	if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
410	CandLoadPtrs))
411	return true;
412	return false;
413	});
414
415	LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
416	<< "):\n");
417	LLVM_DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
418
419	return Checks;
420	}
421
422	/// Perform the transformation for a candidate.
423	void
424	propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
425	SCEVExpander &SEE) {
426	// loop:
427	// %x = load %gep_i
428	// = ... %x
429	// store %y, %gep_i_plus_1
430	//
431	// =>
432	//
433	// ph:
434	// %x.initial = load %gep_0
435	// loop:
436	// %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
437	// %x = load %gep_i <---- now dead
438	// = ... %x.storeforward
439	// store %y, %gep_i_plus_1
440
441	Value *Ptr = Cand.Load->getPointerOperand();
442	auto *PtrSCEV = cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: Ptr));
443	auto *PH = L->getLoopPreheader();
444	assert(PH && "Preheader should exist!");
445	Value *InitialPtr = SEE.expandCodeFor(SH: PtrSCEV->getStart(), Ty: Ptr->getType(),
446	I: PH->getTerminator());
447	Value *Initial =
448	new LoadInst (Cand.Load->getType(), InitialPtr, "load_initial",
449	/ isVolatile / false, Cand.Load->getAlign(),
450	PH->getTerminator()->getIterator());
451	// We don't give any debug location to Initial, because it is inserted
452	// into the loop's preheader. A debug location inside the loop will cause
453	// a misleading stepping when debugging. The test update-debugloc-store
454	// -forwarded.ll checks this.
455
456	PHINode *PHI = PHINode::Create(Ty: Initial->getType(), NumReservedValues: `2`, NameStr: "store_forwarded");
457	PHI->insertBefore(InsertPos: L->getHeader()->begin());
458	PHI->addIncoming(V: Initial, BB: PH);
459
460	Type *LoadType = Initial->getType();
461	Type *StoreType = Cand.Store->getValueOperand()->getType();
462	auto &DL = Cand.Load->getDataLayout();
463	(void)DL;
464
465	assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
466	"The type sizes should match!");
467
468	Value *StoreValue = Cand.Store->getValueOperand();
469	if (LoadType != StoreType) {
470	StoreValue = CastInst::CreateBitOrPointerCast(S: StoreValue, Ty: LoadType,
471	Name: "store_forward_cast",
472	InsertBefore: Cand.Store->getIterator());
473	// Because it casts the old `load` value and is used by the new `phi`
474	// which replaces the old `load`, we give the `load`'s debug location
475	// to it.
476	cast<Instruction>(Val: StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
477	}
478
479	PHI->addIncoming(V: StoreValue, BB: L->getLoopLatch());
480
481	Cand.Load->replaceAllUsesWith(V: PHI);
482	PHI->setDebugLoc(Cand.Load->getDebugLoc());
483	}
484
485	/// Top-level driver for each loop: find store->load forwarding
486	/// candidates, add run-time checks and perform transformation.
487	bool processLoop() {
488	LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
489	<< "\" checking " << *L << "\n");
490
491	// Look for store-to-load forwarding cases across the
492	// backedge. E.g.:
493	//
494	// loop:
495	// %x = load %gep_i
496	// = ... %x
497	// store %y, %gep_i_plus_1
498	//
499	// =>
500	//
501	// ph:
502	// %x.initial = load %gep_0
503	// loop:
504	// %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
505	// %x = load %gep_i <---- now dead
506	// = ... %x.storeforward
507	// store %y, %gep_i_plus_1
508
509	// First start with store->load dependences.
510	auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
511	if (StoreToLoadDependences.empty())
512	return false;
513
514	// Generate an index for each load and store according to the original
515	// program order. This will be used later.
516	InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
517
518	// To keep things simple for now, remove those where the load is potentially
519	// fed by multiple stores.
520	removeDependencesFromMultipleStores(Candidates&: StoreToLoadDependences);
521	if (StoreToLoadDependences.empty())
522	return false;
523
524	// Filter the candidates further.
525	SmallVector<StoreToLoadForwardingCandidate, `4`> Candidates;
526	for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
527	LLVM_DEBUG(dbgs() << "Candidate " << Cand);
528
529	// Make sure that the stored values is available everywhere in the loop in
530	// the next iteration.
531	if (!doesStoreDominatesAllLatches(StoreBlock: Cand.Store->getParent(), L, DT))
532	continue;
533
534	// If the load is conditional we can't hoist its 0-iteration instance to
535	// the preheader because that would make it unconditional. Thus we would
536	// access a memory location that the original loop did not access.
537	if (isLoadConditional(Load: Cand.Load, L))
538	continue;
539
540	// Check whether the SCEV difference is the same as the induction step,
541	// thus we load the value in the next iteration.
542	if (!Cand.isDependenceDistanceOfOne(PSE, L))
543	continue;
544
545	assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
546	"Loading from something other than indvar?");
547	assert(
548	isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
549	"Storing to something other than indvar?");
550
551	Candidates.push_back(Elt: Cand);
552	LLVM_DEBUG(
553	dbgs()
554	<< Candidates.size()
555	<< ". Valid store-to-load forwarding across the loop backedge\n");
556	}
557	if (Candidates.empty())
558	return false;
559
560	// Check intervening may-alias stores. These need runtime checks for alias
561	// disambiguation.
562	SmallVector<RuntimePointerCheck, `4`> Checks = collectMemchecks(Candidates);
563
564	// Too many checks are likely to outweigh the benefits of forwarding.
565	if (Checks.size() > Candidates.size() * CheckPerElim) {
566	LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
567	return false;
568	}
569
570	if (LAI.getPSE().getPredicate().getComplexity() >
571	LoadElimSCEVCheckThreshold) {
572	LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
573	return false;
574	}
575
576	if (!L->isLoopSimplifyForm()) {
577	LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
578	return false;
579	}
580
581	if (!Checks.empty() \|\| !LAI.getPSE().getPredicate().isAlwaysTrue()) {
582	if (LAI.hasConvergentOp()) {
583	LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
584	"convergent calls\n");
585	return false;
586	}
587
588	auto *HeaderBB = L->getHeader();
589	auto *F = HeaderBB->getParent();
590	bool OptForSize = F->hasOptSize() \|\|
591	llvm::shouldOptimizeForSize(BB: HeaderBB, PSI, BFI,
592	QueryType: PGSOQueryType::IRPass);
593	if (OptForSize) {
594	LLVM_DEBUG(
595	dbgs() << "Versioning is needed but not allowed when optimizing "
596	"for size.\n");
597	return false;
598	}
599
600	// Point of no-return, start the transformation. First, version the loop
601	// if necessary.
602
603	LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
604	LV.versionLoop();
605
606	// After versioning, some of the candidates' pointers could stop being
607	// SCEVAddRecs. We need to filter them out.
608	auto NoLongerGoodCandidate = [this](
609	const StoreToLoadForwardingCandidate &Cand) {
610	return !isa<SCEVAddRecExpr>(
611	Val: PSE.getSCEV(V: Cand.Load->getPointerOperand())) \|\|
612	!isa<SCEVAddRecExpr>(
613	Val: PSE.getSCEV(V: Cand.Store->getPointerOperand()));
614	};
615	llvm::erase_if(C&: Candidates, P: NoLongerGoodCandidate);
616	}
617
618	// Next, propagate the value stored by the store to the users of the load.
619	// Also for the first iteration, generate the initial value of the load.
620	SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
621	"storeforward");
622	for (const auto &Cand : Candidates)
623	propagateStoredValueToLoadUsers(Cand, SEE);
624	NumLoopLoadEliminted += Candidates.size();
625
626	return true;
627	}
628
629	private:
630	Loop *L;
631
632	/// Maps the load/store instructions to their index according to
633	/// program order.
634	DenseMap<Instruction , unsigned*> InstOrder;
635
636	// Analyses used.
637	LoopInfo *LI;
638	const LoopAccessInfo &LAI;
639	DominatorTree *DT;
640	BlockFrequencyInfo *BFI;
641	ProfileSummaryInfo *PSI;
642	PredicatedScalarEvolution PSE;
643	};
644
645	} // end anonymous namespace
646
647	static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI,
648	DominatorTree &DT,
649	BlockFrequencyInfo *BFI,
650	ProfileSummaryInfo *PSI,
651	ScalarEvolution SE, AssumptionCache AC,
652	LoopAccessInfoManager &LAIs) {
653	// Build up a worklist of inner-loops to transform to avoid iterator
654	// invalidation.
655	// FIXME: This logic comes from other passes that actually change the loop
656	// nest structure. It isn't clear this is necessary (or useful) for a pass
657	// which merely optimizes the use of loads in a loop.
658	SmallVector<Loop *, `8`> Worklist;
659
660	bool Changed = false;
661
662	for (Loop *TopLevelLoop : LI)
663	for (Loop *L : depth_first(G: TopLevelLoop)) {
664	Changed \|= simplifyLoop(L, DT: &DT, LI: &LI, SE, AC, /MSSAU/ nullptr, PreserveLCSSA: false);
665	// We only handle inner-most loops.
666	if (L->isInnermost())
667	Worklist.push_back(Elt: L);
668	}
669
670	// Now walk the identified inner loops.
671	for (Loop *L : Worklist) {
672	// Match historical behavior
673	if (!L->isRotatedForm() \|\| !L->getExitingBlock())
674	continue;
675	// The actual work is performed by LoadEliminationForLoop.
676	LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(L&: *L), &DT, BFI, PSI);
677	Changed \|= LEL.processLoop();
678	if (Changed)
679	LAIs.clear();
680	}
681	return Changed;
682	}
683
684	PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
685	FunctionAnalysisManager &AM) {
686	auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
687	// There are no loops in the function. Return before computing other expensive
688	// analyses.
689	if (LI.empty())
690	return PreservedAnalyses::all();
691	auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
692	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
693	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
694	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
695	auto PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: F.getParent());
696	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
697	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
698	LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(IR&: F);
699
700	bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE: &SE, AC: &AC, LAIs);
701
702	if (!Changed)
703	return PreservedAnalyses::all();
704
705	PreservedAnalyses PA;
706	PA.preserve<DominatorTreeAnalysis>();
707	PA.preserve<LoopAnalysis>();
708	return PA;
709	}
710

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp