LICM.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LICM.cpp]

1	//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass performs loop invariant code motion, attempting to remove as much
10	// code from the body of a loop as possible. It does this by either hoisting
11	// code into the preheader block, or by sinking code to the exit blocks if it is
12	// safe. This pass also promotes must-aliased memory locations in the loop to
13	// live in registers, thus hoisting and sinking "invariant" loads and stores.
14	//
15	// Hoisting operations out of loops is a canonicalization transform. It
16	// enables and simplifies subsequent optimizations in the middle-end.
17	// Rematerialization of hoisted instructions to reduce register pressure is the
18	// responsibility of the back-end, which has more accurate information about
19	// register pressure and also handles other optimizations than LICM that
20	// increase live-ranges.
21	//
22	// This pass uses alias analysis for two purposes:
23	//
24	// 1. Moving loop invariant loads and calls out of loops. If we can determine
25	// that a load or call inside of a loop never aliases anything stored to,
26	// we can hoist it or sink it like any other instruction.
27	// 2. Scalar Promotion of Memory - If there is a store instruction inside of
28	// the loop, we try to move the store to happen AFTER the loop instead of
29	// inside of the loop. This can only happen if a few conditions are true:
30	// A. The pointer stored through is loop invariant
31	// B. There are no stores or loads in the loop which _may_ alias the
32	// pointer. There are no calls in the loop which mod/ref the pointer.
33	// If these conditions are true, we can promote the loads and stores in the
34	// loop of the pointer to use a temporary alloca'd variable. We then use
35	// the SSAUpdater to construct the appropriate SSA form for the value.
36	//
37	//===----------------------------------------------------------------------===//
38
39	#include "llvm/Transforms/Scalar/LICM.h"
40	#include "llvm/ADT/PriorityWorklist.h"
41	#include "llvm/ADT/SetOperations.h"
42	#include "llvm/ADT/Statistic.h"
43	#include "llvm/Analysis/AliasAnalysis.h"
44	#include "llvm/Analysis/AliasSetTracker.h"
45	#include "llvm/Analysis/AssumptionCache.h"
46	#include "llvm/Analysis/CaptureTracking.h"
47	#include "llvm/Analysis/DomTreeUpdater.h"
48	#include "llvm/Analysis/GuardUtils.h"
49	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
50	#include "llvm/Analysis/Loads.h"
51	#include "llvm/Analysis/LoopInfo.h"
52	#include "llvm/Analysis/LoopIterator.h"
53	#include "llvm/Analysis/LoopNestAnalysis.h"
54	#include "llvm/Analysis/LoopPass.h"
55	#include "llvm/Analysis/MemorySSA.h"
56	#include "llvm/Analysis/MemorySSAUpdater.h"
57	#include "llvm/Analysis/MustExecute.h"
58	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59	#include "llvm/Analysis/ScalarEvolution.h"
60	#include "llvm/Analysis/TargetLibraryInfo.h"
61	#include "llvm/Analysis/TargetTransformInfo.h"
62	#include "llvm/Analysis/ValueTracking.h"
63	#include "llvm/IR/CFG.h"
64	#include "llvm/IR/Constants.h"
65	#include "llvm/IR/DataLayout.h"
66	#include "llvm/IR/DebugInfoMetadata.h"
67	#include "llvm/IR/DerivedTypes.h"
68	#include "llvm/IR/Dominators.h"
69	#include "llvm/IR/IRBuilder.h"
70	#include "llvm/IR/Instructions.h"
71	#include "llvm/IR/IntrinsicInst.h"
72	#include "llvm/IR/LLVMContext.h"
73	#include "llvm/IR/Metadata.h"
74	#include "llvm/IR/PatternMatch.h"
75	#include "llvm/IR/PredIteratorCache.h"
76	#include "llvm/InitializePasses.h"
77	#include "llvm/Support/CommandLine.h"
78	#include "llvm/Support/Debug.h"
79	#include "llvm/Support/raw_ostream.h"
80	#include "llvm/Transforms/Scalar.h"
81	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83	#include "llvm/Transforms/Utils/Local.h"
84	#include "llvm/Transforms/Utils/LoopUtils.h"
85	#include "llvm/Transforms/Utils/SSAUpdater.h"
86	#include <algorithm>
87	#include <utility>
88	using namespace llvm;
89
90	namespace llvm {
91	class LPMUpdater;
92	} // namespace llvm
93
94	#define DEBUG_TYPE "licm"
95
96	STATISTIC(NumCreatedBlocks, "Number of blocks created");
97	STATISTIC(NumClonedBranches, "Number of branches cloned");
98	STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99	STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100	STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101	STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102	STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103	STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104	STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105	STATISTIC(NumMinMaxHoisted,
106	"Number of min/max expressions hoisted out of the loop");
107	STATISTIC(NumGEPsHoisted,
108	"Number of geps reassociated and hoisted out of the loop");
109	STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110	"and hoisted out of the loop");
111	STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112	"reassociated and hoisted out of the loop");
113	STATISTIC(NumIntAssociationsHoisted,
114	"Number of invariant int expressions "
115	"reassociated and hoisted out of the loop");
116	STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
117	"reassociated and hoisted out of the loop");
118
119	/// Memory promotion is enabled by default.
120	static cl::opt<bool>
121	DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(Val: false),
122	cl::desc ("Disable memory promotion in LICM pass"));
123
124	static cl::opt<bool> ControlFlowHoisting(
125	"licm-control-flow-hoisting", cl::Hidden, cl::init(Val: false),
126	cl::desc ("Enable control flow (and PHI) hoisting in LICM"));
127
128	static cl::opt<bool>
129	SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(Val: false),
130	cl::desc ("Force thread model single in LICM pass"));
131
132	static cl::opt<uint32_t> MaxNumUsesTraversed(
133	"licm-max-num-uses-traversed", cl::Hidden, cl::init(Val: `8`),
134	cl::desc ("Max num uses visited for identifying load "
135	"invariance in loop using invariant start (default = 8)"));
136
137	static cl::opt<unsigned> FPAssociationUpperLimit(
138	"licm-max-num-fp-reassociations", cl::init(Val: `5U`), cl::Hidden,
139	cl::desc (
140	"Set upper limit for the number of transformations performed "
141	"during a single round of hoisting the reassociated expressions."));
142
143	static cl::opt<unsigned> IntAssociationUpperLimit(
144	"licm-max-num-int-reassociations", cl::init(Val: `5U`), cl::Hidden,
145	cl::desc (
146	"Set upper limit for the number of transformations performed "
147	"during a single round of hoisting the reassociated expressions."));
148
149	// Experimental option to allow imprecision in LICM in pathological cases, in
150	// exchange for faster compile. This is to be removed if MemorySSA starts to
151	// address the same issue. LICM calls MemorySSAWalker's
152	// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
153	// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
154	// which may not be precise, since optimizeUses is capped. The result is
155	// correct, but we may not get as "far up" as possible to get which access is
156	// clobbering the one queried.
157	cl::opt<unsigned> llvm::SetLicmMssaOptCap(
158	"licm-mssa-optimization-cap", cl::init(Val: `100`), cl::Hidden,
159	cl::desc ("Enable imprecision in LICM in pathological cases, in exchange "
160	"for faster compile. Caps the MemorySSA clobbering calls."));
161
162	// Experimentally, memory promotion carries less importance than sinking and
163	// hoisting. Limit when we do promotion when using MemorySSA, in order to save
164	// compile time.
165	cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
166	"licm-mssa-max-acc-promotion", cl::init(Val: `250`), cl::Hidden,
167	cl::desc ("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
168	"effect. When MSSA in LICM is enabled, then this is the maximum "
169	"number of accesses allowed to be present in a loop in order to "
170	"enable memory promotion."));
171
172	namespace llvm {
173	extern cl::opt<bool> ProfcheckDisableMetadataFixes;
174	} // end namespace llvm
175
176	static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);
177	static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
178	const LoopSafetyInfo *SafetyInfo,
179	TargetTransformInfo *TTI,
180	bool &FoldableInLoop, bool LoopNestMode);
181	static void hoist(Instruction &I, const DominatorTree DT, const* Loop *CurLoop,
182	BasicBlock Dest, ICFLoopSafetyInfo SafetyInfo,
183	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
184	OptimizationRemarkEmitter *ORE);
185	static bool sink(Instruction &I, LoopInfo LI, DominatorTree DT,
186	const Loop CurLoop, ICFLoopSafetyInfo SafetyInfo,
187	MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
188	static bool isSafeToExecuteUnconditionally(
189	Instruction &Inst, const DominatorTree DT, const* TargetLibraryInfo *TLI,
190	const Loop CurLoop, const* LoopSafetyInfo *SafetyInfo,
191	OptimizationRemarkEmitter ORE, const* Instruction *CtxI,
192	AssumptionCache AC, bool* AllowSpeculation);
193	static bool noConflictingReadWrites(Instruction I, MemorySSA MSSA,
194	AAResults AA, Loop CurLoop,
195	SinkAndHoistLICMFlags &Flags);
196	static bool pointerInvalidatedByLoop(MemorySSA MSSA, MemoryUse MU,
197	Loop *CurLoop, Instruction &I,
198	SinkAndHoistLICMFlags &Flags,
199	bool InvariantGroup);
200	static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
201	MemoryUse &MU);
202	/// Aggregates various functions for hoisting computations out of loop.
203	static bool hoistArithmetics(Instruction &I, Loop &L,
204	ICFLoopSafetyInfo &SafetyInfo,
205	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
206	DominatorTree *DT);
207	static Instruction *cloneInstructionInExitBlock(
208	Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
209	const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
210
211	static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
212	MemorySSAUpdater &MSSAU);
213
214	static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
215	ICFLoopSafetyInfo &SafetyInfo,
216	MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
217
218	static void foreachMemoryAccess(MemorySSA MSSA, Loop L,
219	function_ref<void(Instruction *)> Fn);
220	using PointersAndHasReadsOutsideSet =
221	std::pair<SmallSetVector<Value , `8`>, bool*>;
222	static SmallVector<PointersAndHasReadsOutsideSet, `0`>
223	collectPromotionCandidates(MemorySSA MSSA, AliasAnalysis AA, Loop *L);
224
225	namespace {
226	struct LoopInvariantCodeMotion {
227	bool runOnLoop(Loop L, AAResults AA, LoopInfo LI, DominatorTree DT,
228	AssumptionCache AC, TargetLibraryInfo TLI,
229	TargetTransformInfo TTI, ScalarEvolution SE, MemorySSA *MSSA,
230	OptimizationRemarkEmitter ORE, bool* LoopNestMode = false);
231
232	LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
233	unsigned LicmMssaNoAccForPromotionCap,
234	bool LicmAllowSpeculation)
235	: LicmMssaOptCap(LicmMssaOptCap),
236	LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
237	LicmAllowSpeculation(LicmAllowSpeculation) {}
238
239	private:
240	unsigned LicmMssaOptCap;
241	unsigned LicmMssaNoAccForPromotionCap;
242	bool LicmAllowSpeculation;
243	};
244
245	struct LegacyLICMPass : public LoopPass {
246	static char ID; // Pass identification, replacement for typeid
247	LegacyLICMPass(
248	unsigned LicmMssaOptCap = SetLicmMssaOptCap,
249	unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
250	bool LicmAllowSpeculation = true)
251	: LoopPass (ID), LICM (LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
252	LicmAllowSpeculation) {
253	initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
254	}
255
256	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
257	if (skipLoop(L))
258	return false;
259
260	LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
261	<< L->getHeader()->getNameOrAsOperand() << "\n");
262
263	Function *F = L->getHeader()->getParent();
264
265	auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
266	MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
267	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
268	// pass. Function analyses need to be preserved across loop transformations
269	// but ORE cannot be preserved (see comment before the pass definition).
270	OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
271	return LICM.runOnLoop(
272	L, AA: &getAnalysis<AAResultsWrapperPass>().getAAResults(),
273	LI: &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
274	DT: &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
275	AC: &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F&: *F),
276	TLI: &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F: *F),
277	TTI: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: *F),
278	SE: SE ? &SE->getSE() : nullptr, MSSA, ORE: &ORE);
279	}
280
281	/// This transformation requires natural loop information & requires that
282	/// loop preheaders be inserted into the CFG...
283	///
284	void getAnalysisUsage(AnalysisUsage &AU) const override {
285	AU.addPreserved<DominatorTreeWrapperPass>();
286	AU.addPreserved<LoopInfoWrapperPass>();
287	AU.addRequired<TargetLibraryInfoWrapperPass>();
288	AU.addRequired<MemorySSAWrapperPass>();
289	AU.addPreserved<MemorySSAWrapperPass>();
290	AU.addRequired<TargetTransformInfoWrapperPass>();
291	AU.addRequired<AssumptionCacheTracker>();
292	getLoopAnalysisUsage(AU);
293	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
294	AU.addPreserved<LazyBlockFrequencyInfoPass>();
295	AU.addPreserved<LazyBranchProbabilityInfoPass>();
296	}
297
298	private:
299	LoopInvariantCodeMotion LICM;
300	};
301	} // namespace
302
303	PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
304	LoopStandardAnalysisResults &AR, LPMUpdater &) {
305	if (!AR.MSSA)
306	reportFatalUsageError(reason: "LICM requires MemorySSA (loop-mssa)");
307
308	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
309	// pass. Function analyses need to be preserved across loop transformations
310	// but ORE cannot be preserved (see comment before the pass definition).
311	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
312
313	LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
314	Opts.AllowSpeculation);
315	if (!LICM.runOnLoop(L: &L, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC, TLI: &AR.TLI, TTI: &AR.TTI,
316	SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE))
317	return PreservedAnalyses::all();
318
319	auto PA = getLoopPassPreservedAnalyses();
320	PA.preserve<MemorySSAAnalysis>();
321
322	return PA;
323	}
324
325	void LICMPass::printPipeline(
326	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
327	static_cast<PassInfoMixin<LICMPass> >(this*)->printPipeline(
328	OS, MapClassName2PassName);
329
330	OS << `'<'`;
331	OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
332	OS << `'>'`;
333	}
334
335	PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
336	LoopStandardAnalysisResults &AR,
337	LPMUpdater &) {
338	if (!AR.MSSA)
339	reportFatalUsageError(reason: "LNICM requires MemorySSA (loop-mssa)");
340
341	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
342	// pass. Function analyses need to be preserved across loop transformations
343	// but ORE cannot be preserved (see comment before the pass definition).
344	OptimizationRemarkEmitter ORE(LN.getParent());
345
346	LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
347	Opts.AllowSpeculation);
348
349	Loop &OutermostLoop = LN.getOutermostLoop();
350	bool Changed = LICM.runOnLoop(L: &OutermostLoop, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC,
351	TLI: &AR.TLI, TTI: &AR.TTI, SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE, LoopNestMode: true);
352
353	if (!Changed)
354	return PreservedAnalyses::all();
355
356	auto PA = getLoopPassPreservedAnalyses();
357
358	PA.preserve<DominatorTreeAnalysis>();
359	PA.preserve<LoopAnalysis>();
360	PA.preserve<MemorySSAAnalysis>();
361
362	return PA;
363	}
364
365	void LNICMPass::printPipeline(
366	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
367	static_cast<PassInfoMixin<LNICMPass> >(this*)->printPipeline(
368	OS, MapClassName2PassName);
369
370	OS << `'<'`;
371	OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
372	OS << `'>'`;
373	}
374
375	char LegacyLICMPass::ID = `0`;
376	INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
377	false, false)
378	INITIALIZE_PASS_DEPENDENCY(LoopPass)
379	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
380	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
381	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
382	INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
383	INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
384	false)
385
386	Pass llvm::createLICMPass() { return* new LegacyLICMPass (); }
387
388	llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
389	MemorySSA &MSSA)
390	: SinkAndHoistLICMFlags (SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
391	IsSink, L, MSSA) {}
392
393	llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
394	unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
395	Loop &L, MemorySSA &MSSA)
396	: LicmMssaOptCap(LicmMssaOptCap),
397	LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
398	IsSink(IsSink) {
399	unsigned AccessCapCount = `0`;
400	for (auto *BB : L.getBlocks())
401	if (const auto *Accesses = MSSA.getBlockAccesses(BB))
402	for (const auto &MA : *Accesses) {
403	(void)MA;
404	++AccessCapCount;
405	if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
406	NoOfMemAccTooLarge = true;
407	return;
408	}
409	}
410	}
411
412	/// Hoist expressions out of the specified loop. Note, alias info for inner
413	/// loop is not preserved so it is not a good idea to run LICM multiple
414	/// times on one loop.
415	bool LoopInvariantCodeMotion::runOnLoop(Loop L, AAResults AA, LoopInfo *LI,
416	DominatorTree DT, AssumptionCache AC,
417	TargetLibraryInfo *TLI,
418	TargetTransformInfo *TTI,
419	ScalarEvolution SE, MemorySSA MSSA,
420	OptimizationRemarkEmitter *ORE,
421	bool LoopNestMode) {
422	bool Changed = false;
423
424	assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
425
426	// If this loop has metadata indicating that LICM is not to be performed then
427	// just exit.
428	if (hasDisableLICMTransformsHint(L)) {
429	return false;
430	}
431
432	// Don't sink stores from loops with coroutine suspend instructions.
433	// LICM would sink instructions into the default destination of
434	// the coroutine switch. The default destination of the switch is to
435	// handle the case where the coroutine is suspended, by which point the
436	// coroutine frame may have been destroyed. No instruction can be sunk there.
437	// FIXME: This would unfortunately hurt the performance of coroutines, however
438	// there is currently no general solution for this. Similar issues could also
439	// potentially happen in other passes where instructions are being moved
440	// across that edge.
441	bool HasCoroSuspendInst = llvm::any_of(Range: L->getBlocks(), P: [](BasicBlock *BB) {
442	using namespace PatternMatch;
443	return any_of(Range: make_pointer_range(Range&: *BB),
444	P: match_fn(P: m_Intrinsic<Intrinsic::coro_suspend>()));
445	});
446
447	MemorySSAUpdater MSSAU(MSSA);
448	SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
449	/IsSink=/true, L, MSSA);
450
451	// Get the preheader block to move instructions into...
452	BasicBlock *Preheader = L->getLoopPreheader();
453
454	// Compute loop safety information.
455	ICFLoopSafetyInfo SafetyInfo;
456	SafetyInfo.computeLoopSafetyInfo(CurLoop: L);
457
458	// We want to visit all of the instructions in this loop... that are not parts
459	// of our subloops (they have already had their invariants hoisted out of
460	// their loop, into this loop, so there is no need to process the BODIES of
461	// the subloops).
462	//
463	// Traverse the body of the loop in depth first order on the dominator tree so
464	// that we are guaranteed to see definitions before we see uses. This allows
465	// us to sink instructions in one pass, without iteration. After sinking
466	// instructions, we perform another pass to hoist them out of the loop.
467	if (L->hasDedicatedExits())
468	Changed \|=
469	LoopNestMode
470	? sinkRegionForLoopNest(DT->getNode(BB: L->getHeader()), AA, LI, DT,
471	TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
472	: sinkRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
473	MSSAU, &SafetyInfo, Flags, ORE);
474	Flags.setIsSink(false);
475	if (Preheader)
476	Changed \|= hoistRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, AC, TLI, L,
477	MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
478	AllowSpeculation: LicmAllowSpeculation);
479
480	// Now that all loop invariants have been removed from the loop, promote any
481	// memory references to scalars that we can.
482	// Don't sink stores from loops without dedicated block exits. Exits
483	// containing indirect branches are not transformed by loop simplify,
484	// make sure we catch that. An additional load may be generated in the
485	// preheader for SSA updater, so also avoid sinking when no preheader
486	// is available.
487	if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
488	!Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
489	// Figure out the loop exits and their insertion points
490	SmallVector<BasicBlock *, `8`> ExitBlocks;
491	L->getUniqueExitBlocks(ExitBlocks);
492
493	// We can't insert into a catchswitch.
494	bool HasCatchSwitch = llvm::any_of(Range&: ExitBlocks, P: [](BasicBlock *Exit) {
495	return isa<CatchSwitchInst>(Val: Exit->getTerminator());
496	});
497
498	if (!HasCatchSwitch) {
499	SmallVector<BasicBlock::iterator, `8`> InsertPts;
500	SmallVector<MemoryAccess *, `8`> MSSAInsertPts;
501	InsertPts.reserve(N: ExitBlocks.size());
502	MSSAInsertPts.reserve(N: ExitBlocks.size());
503	for (BasicBlock *ExitBlock : ExitBlocks) {
504	InsertPts.push_back(Elt: ExitBlock->getFirstInsertionPt());
505	MSSAInsertPts.push_back(Elt: nullptr);
506	}
507
508	PredIteratorCache PIC;
509
510	// Promoting one set of accesses may make the pointers for another set
511	// loop invariant, so run this in a loop.
512	bool Promoted = false;
513	bool LocalPromoted;
514	do {
515	LocalPromoted = false;
516	for (auto [PointerMustAliases, HasReadsOutsideSet] :
517	collectPromotionCandidates(MSSA, AA, L)) {
518	LocalPromoted \|= promoteLoopAccessesToScalars(
519	PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
520	DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
521	AllowSpeculation: LicmAllowSpeculation, HasReadsOutsideSet);
522	}
523	Promoted \|= LocalPromoted;
524	} while (LocalPromoted);
525
526	// Once we have promoted values across the loop body we have to
527	// recursively reform LCSSA as any nested loop may now have values defined
528	// within the loop used in the outer loop.
529	// FIXME: This is really heavy handed. It would be a bit better to use an
530	// SSAUpdater strategy during promotion that was LCSSA aware and reformed
531	// it as it went.
532	if (Promoted)
533	formLCSSARecursively(L&: L, DT: DT, LI, SE);
534
535	Changed \|= Promoted;
536	}
537	}
538
539	// Check that neither this loop nor its parent have had LCSSA broken. LICM is
540	// specifically moving instructions across the loop boundary and so it is
541	// especially in need of basic functional correctness checking here.
542	assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
543	assert((L->isOutermost() \|\| L->getParentLoop()->isLCSSAForm(*DT)) &&
544	"Parent loop not left in LCSSA form after LICM!");
545
546	if (VerifyMemorySSA)
547	MSSA->verifyMemorySSA();
548
549	if (Changed && SE)
550	SE->forgetLoopDispositions();
551	return Changed;
552	}
553
554	/// Walk the specified region of the CFG (defined by all blocks dominated by
555	/// the specified block, and that are in the current loop) in reverse depth
556	/// first order w.r.t the DominatorTree. This allows us to visit uses before
557	/// definitions, allowing us to sink a loop body in one pass without iteration.
558	///
559	bool llvm::sinkRegion(DomTreeNode N, AAResults AA, LoopInfo *LI,
560	DominatorTree DT, TargetLibraryInfo TLI,
561	TargetTransformInfo TTI, Loop CurLoop,
562	MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
563	SinkAndHoistLICMFlags &Flags,
564	OptimizationRemarkEmitter ORE, Loop OutermostLoop) {
565
566	// Verify inputs.
567	assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
568	CurLoop != nullptr && SafetyInfo != nullptr &&
569	"Unexpected input to sinkRegion.");
570
571	// We want to visit children before parents. We will enqueue all the parents
572	// before their children in the worklist and process the worklist in reverse
573	// order.
574	SmallVector<BasicBlock *, `16`> Worklist =
575	collectChildrenInLoop(DT, N, CurLoop);
576
577	bool Changed = false;
578	for (BasicBlock *BB : reverse(C&: Worklist)) {
579	// subloop (which would already have been processed).
580	if (inSubLoop(BB, CurLoop, LI))
581	continue;
582
583	for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
584	Instruction &I = *--II;
585
586	// The instruction is not used in the loop if it is dead. In this case,
587	// we just delete it instead of sinking it.
588	if (isInstructionTriviallyDead(I: &I, TLI)) {
589	LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << `'\n'`);
590	salvageKnowledge(I: &I);
591	salvageDebugInfo(I);
592	++II;
593	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
594	Changed = true;
595	continue;
596	}
597
598	// Check to see if we can sink this instruction to the exit blocks
599	// of the loop. We can do this if the all users of the instruction are
600	// outside of the loop. In this case, it doesn't even matter if the
601	// operands of the instruction are loop invariant.
602	//
603	bool FoldableInLoop = false;
604	bool LoopNestMode = OutermostLoop != nullptr;
605	if (!I.mayHaveSideEffects() &&
606	isNotUsedOrFoldableInLoop(I, CurLoop: LoopNestMode ? OutermostLoop : CurLoop,
607	SafetyInfo, TTI, FoldableInLoop,
608	LoopNestMode) &&
609	canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE)) {
610	if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
611	if (!FoldableInLoop) {
612	++II;
613	salvageDebugInfo(I);
614	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
615	}
616	Changed = true;
617	}
618	}
619	}
620	}
621	if (VerifyMemorySSA)
622	MSSAU.getMemorySSA()->verifyMemorySSA();
623	return Changed;
624	}
625
626	bool llvm::sinkRegionForLoopNest(DomTreeNode N, AAResults AA, LoopInfo *LI,
627	DominatorTree DT, TargetLibraryInfo TLI,
628	TargetTransformInfo TTI, Loop CurLoop,
629	MemorySSAUpdater &MSSAU,
630	ICFLoopSafetyInfo *SafetyInfo,
631	SinkAndHoistLICMFlags &Flags,
632	OptimizationRemarkEmitter *ORE) {
633
634	bool Changed = false;
635	SmallPriorityWorklist<Loop *, `4`> Worklist;
636	Worklist.insert(X: CurLoop);
637	appendLoopsToWorklist(*CurLoop, Worklist);
638	while (!Worklist.empty()) {
639	Loop *L = Worklist.pop_back_val();
640	Changed \|= sinkRegion(N: DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
641	MSSAU, SafetyInfo, Flags, ORE, OutermostLoop: CurLoop);
642	}
643	return Changed;
644	}
645
646	namespace {
647	// This is a helper class for hoistRegion to make it able to hoist control flow
648	// in order to be able to hoist phis. The way this works is that we initially
649	// start hoisting to the loop preheader, and when we see a loop invariant branch
650	// we make note of this. When we then come to hoist an instruction that's
651	// conditional on such a branch we duplicate the branch and the relevant control
652	// flow, then hoist the instruction into the block corresponding to its original
653	// block in the duplicated control flow.
654	class ControlFlowHoister {
655	private:
656	// Information about the loop we are hoisting from
657	LoopInfo *LI;
658	DominatorTree *DT;
659	Loop *CurLoop;
660	MemorySSAUpdater &MSSAU;
661
662	// A map of blocks in the loop to the block their instructions will be hoisted
663	// to.
664	DenseMap<BasicBlock , BasicBlock > HoistDestinationMap;
665
666	// The branches that we can hoist, mapped to the block that marks a
667	// convergence point of their control flow.
668	DenseMap<CondBrInst , BasicBlock > HoistableBranches;
669
670	public:
671	ControlFlowHoister(LoopInfo LI, DominatorTree DT, Loop *CurLoop,
672	MemorySSAUpdater &MSSAU)
673	: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
674
675	void registerPossiblyHoistableBranch(CondBrInst *BI) {
676	// We can only hoist conditional branches with loop invariant operands.
677	if (!ControlFlowHoisting \|\| !CurLoop->hasLoopInvariantOperands(I: BI))
678	return;
679
680	// The branch destinations need to be in the loop, and we don't gain
681	// anything by duplicating conditional branches with duplicate successors,
682	// as it's essentially the same as an unconditional branch.
683	BasicBlock *TrueDest = BI->getSuccessor(i: `0`);
684	BasicBlock *FalseDest = BI->getSuccessor(i: `1`);
685	if (!CurLoop->contains(BB: TrueDest) \|\| !CurLoop->contains(BB: FalseDest) \|\|
686	TrueDest == FalseDest)
687	return;
688
689	// We can hoist BI if one branch destination is the successor of the other,
690	// or both have common successor which we check by seeing if the
691	// intersection of their successors is non-empty.
692	// TODO: This could be expanded to allowing branches where both ends
693	// eventually converge to a single block.
694	SmallPtrSet<BasicBlock *, `4`> TrueDestSucc(llvm::from_range,
695	successors(BB: TrueDest));
696	SmallPtrSet<BasicBlock *, `4`> FalseDestSucc(llvm::from_range,
697	successors(BB: FalseDest));
698	BasicBlock CommonSucc = nullptr*;
699	if (TrueDestSucc.count(Ptr: FalseDest)) {
700	CommonSucc = FalseDest;
701	} else if (FalseDestSucc.count(Ptr: TrueDest)) {
702	CommonSucc = TrueDest;
703	} else {
704	set_intersect(S1&: TrueDestSucc, S2: FalseDestSucc);
705	// If there's one common successor use that.
706	if (TrueDestSucc.size() == `1`)
707	CommonSucc = *TrueDestSucc.begin();
708	// If there's more than one pick whichever appears first in the block list
709	// (we can't use the value returned by TrueDestSucc.begin() as it's
710	// unpredicatable which element gets returned).
711	else if (!TrueDestSucc.empty()) {
712	Function *F = TrueDest->getParent();
713	auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(Ptr: &BB); };
714	auto It = llvm::find_if(Range&: *F, P: IsSucc);
715	assert(It != F->end() && "Could not find successor in function");
716	CommonSucc = &*It;
717	}
718	}
719	// The common successor has to be dominated by the branch, as otherwise
720	// there will be some other path to the successor that will not be
721	// controlled by this branch so any phi we hoist would be controlled by the
722	// wrong condition. This also takes care of avoiding hoisting of loop back
723	// edges.
724	// TODO: In some cases this could be relaxed if the successor is dominated
725	// by another block that's been hoisted and we can guarantee that the
726	// control flow has been replicated exactly.
727	if (CommonSucc && DT->dominates(Def: BI, BB: CommonSucc))
728	HoistableBranches [BI] = CommonSucc;
729	}
730
731	bool canHoistPHI(PHINode *PN) {
732	// The phi must have loop invariant operands.
733	if (!ControlFlowHoisting \|\| !CurLoop->hasLoopInvariantOperands(I: PN))
734	return false;
735	// We can hoist phis if the block they are in is the target of hoistable
736	// branches which cover all of the predecessors of the block.
737	BasicBlock *BB = PN->getParent();
738	SmallPtrSet<BasicBlock *, `8`> PredecessorBlocks(llvm::from_range,
739	predecessors(BB));
740	// If we have less predecessor blocks than predecessors then the phi will
741	// have more than one incoming value for the same block which we can't
742	// handle.
743	// TODO: This could be handled be erasing some of the duplicate incoming
744	// values.
745	if (PredecessorBlocks.size() != pred_size(BB))
746	return false;
747	for (auto &Pair : HoistableBranches) {
748	if (Pair.second == BB) {
749	// Which blocks are predecessors via this branch depends on if the
750	// branch is triangle-like or diamond-like.
751	if (Pair.first->getSuccessor(i: `0`) == BB) {
752	PredecessorBlocks.erase(Ptr: Pair.first->getParent());
753	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `1`));
754	} else if (Pair.first->getSuccessor(i: `1`) == BB) {
755	PredecessorBlocks.erase(Ptr: Pair.first->getParent());
756	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `0`));
757	} else {
758	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `0`));
759	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `1`));
760	}
761	}
762	}
763	// PredecessorBlocks will now be empty if for every predecessor of BB we
764	// found a hoistable branch source.
765	return PredecessorBlocks.empty();
766	}
767
768	BasicBlock getOrCreateHoistedBlock(BasicBlock BB) {
769	if (!ControlFlowHoisting)
770	return CurLoop->getLoopPreheader();
771	// If BB has already been hoisted, return that
772	if (auto It = HoistDestinationMap.find(Val: BB); It != HoistDestinationMap.end())
773	return It ->second;
774
775	// Check if this block is conditional based on a pending branch
776	auto HasBBAsSuccessor =
777	[&](DenseMap<CondBrInst , BasicBlock >::value_type &Pair) {
778	return BB != Pair.second && (Pair.first->getSuccessor(i: `0`) == BB \|\|
779	Pair.first->getSuccessor(i: `1`) == BB);
780	};
781	auto It = llvm::find_if(Range&: HoistableBranches, P: HasBBAsSuccessor);
782
783	// If not involved in a pending branch, hoist to preheader
784	BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
785	if (It == HoistableBranches.end()) {
786	LLVM_DEBUG(dbgs() << "LICM using "
787	<< InitialPreheader->getNameOrAsOperand()
788	<< " as hoist destination for "
789	<< BB->getNameOrAsOperand() << "\n");
790	HoistDestinationMap [BB] = InitialPreheader;
791	return InitialPreheader;
792	}
793	CondBrInst *BI = It ->first;
794	assert(std::none_of(std::next(It), HoistableBranches.end(),
795	HasBBAsSuccessor) &&
796	"BB is expected to be the target of at most one branch");
797
798	LLVMContext &C = BB->getContext();
799	BasicBlock *TrueDest = BI->getSuccessor(i: `0`);
800	BasicBlock *FalseDest = BI->getSuccessor(i: `1`);
801	BasicBlock *CommonSucc = HoistableBranches [BI];
802	BasicBlock *HoistTarget = getOrCreateHoistedBlock(BB: BI->getParent());
803
804	// Create hoisted versions of blocks that currently don't have them
805	auto CreateHoistedBlock = [&](BasicBlock *Orig) {
806	auto [It, Inserted] = HoistDestinationMap.try_emplace(Key: Orig);
807	if (!Inserted)
808	return It ->second;
809	BasicBlock *New =
810	BasicBlock::Create(Context&: C, Name: Orig->getName() + ".licm", Parent: Orig->getParent());
811	It ->second = New;
812	DT->addNewBlock(BB: New, DomBB: HoistTarget);
813	if (CurLoop->getParentLoop())
814	CurLoop->getParentLoop()->addBasicBlockToLoop(NewBB: New, LI&: *LI);
815	++NumCreatedBlocks;
816	LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
817	<< " as hoist destination for " << Orig->getName()
818	<< "\n");
819	return New;
820	};
821	BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
822	BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
823	BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
824
825	// Link up these blocks with branches.
826	if (!HoistCommonSucc->getTerminator()) {
827	// The new common successor we've generated will branch to whatever that
828	// hoist target branched to.
829	BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
830	assert(TargetSucc && "Expected hoist target to have a single successor");
831	HoistCommonSucc->moveBefore(MovePos: TargetSucc);
832	UncondBrInst::Create(IfTrue: TargetSucc, InsertBefore: HoistCommonSucc);
833	}
834	if (!HoistTrueDest->getTerminator()) {
835	HoistTrueDest->moveBefore(MovePos: HoistCommonSucc);
836	UncondBrInst::Create(IfTrue: HoistCommonSucc, InsertBefore: HoistTrueDest);
837	}
838	if (!HoistFalseDest->getTerminator()) {
839	HoistFalseDest->moveBefore(MovePos: HoistCommonSucc);
840	UncondBrInst::Create(IfTrue: HoistCommonSucc, InsertBefore: HoistFalseDest);
841	}
842
843	// If BI is being cloned to what was originally the preheader then
844	// HoistCommonSucc will now be the new preheader.
845	if (HoistTarget == InitialPreheader) {
846	// Phis in the loop header now need to use the new preheader.
847	InitialPreheader->replaceSuccessorsPhiUsesWith(New: HoistCommonSucc);
848	MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
849	Old: HoistTarget->getSingleSuccessor(), New: HoistCommonSucc, Preds: {HoistTarget});
850	// The new preheader dominates the loop header.
851	DomTreeNode *PreheaderNode = DT->getNode(BB: HoistCommonSucc);
852	DomTreeNode *HeaderNode = DT->getNode(BB: CurLoop->getHeader());
853	DT->changeImmediateDominator(N: HeaderNode, NewIDom: PreheaderNode);
854	// The preheader hoist destination is now the new preheader, with the
855	// exception of the hoist destination of this branch.
856	for (auto &Pair : HoistDestinationMap)
857	if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
858	Pair.second = HoistCommonSucc;
859	}
860
861	// Now finally clone BI.
862	auto *NewBI =
863	CondBrInst::Create(Cond: BI->getCondition(), IfTrue: HoistTrueDest, IfFalse: HoistFalseDest,
864	InsertBefore: HoistTarget->getTerminator()->getIterator());
865	HoistTarget->getTerminator()->eraseFromParent();
866	// md_prof should also come from the original branch - since the
867	// condition was hoisted, the branch probabilities shouldn't change.
868	if (!ProfcheckDisableMetadataFixes)
869	NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_prof});
870	// FIXME: Issue #152767: debug info should also be the same as the
871	// original branch, if* the user explicitly indicated that.*
872	NewBI->setDebugLoc(HoistTarget->getTerminator()->getDebugLoc());
873
874	++NumClonedBranches;
875
876	assert(CurLoop->getLoopPreheader() &&
877	"Hoisting blocks should not have destroyed preheader");
878	return HoistDestinationMap [BB];
879	}
880	};
881	} // namespace
882
883	/// Walk the specified region of the CFG (defined by all blocks dominated by
884	/// the specified block, and that are in the current loop) in depth first
885	/// order w.r.t the DominatorTree. This allows us to visit definitions before
886	/// uses, allowing us to hoist a loop body in one pass without iteration.
887	///
888	bool llvm::hoistRegion(DomTreeNode N, AAResults AA, LoopInfo *LI,
889	DominatorTree DT, AssumptionCache AC,
890	TargetLibraryInfo TLI, Loop CurLoop,
891	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
892	ICFLoopSafetyInfo *SafetyInfo,
893	SinkAndHoistLICMFlags &Flags,
894	OptimizationRemarkEmitter ORE, bool* LoopNestMode,
895	bool AllowSpeculation) {
896	// Verify inputs.
897	assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
898	CurLoop != nullptr && SafetyInfo != nullptr &&
899	"Unexpected input to hoistRegion.");
900
901	ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
902
903	// Keep track of instructions that have been hoisted, as they may need to be
904	// re-hoisted if they end up not dominating all of their uses.
905	SmallVector<Instruction *, `16`> HoistedInstructions;
906
907	// For PHI hoisting to work we need to hoist blocks before their successors.
908	// We can do this by iterating through the blocks in the loop in reverse
909	// post-order.
910	LoopBlocksRPO Worklist(CurLoop);
911	Worklist.perform(LI);
912	bool Changed = false;
913	BasicBlock *Preheader = CurLoop->getLoopPreheader();
914	for (BasicBlock *BB : Worklist) {
915	// Only need to process the contents of this block if it is not part of a
916	// subloop (which would already have been processed).
917	if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
918	continue;
919
920	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
921	// Try hoisting the instruction out to the preheader. We can only do
922	// this if all of the operands of the instruction are loop invariant and
923	// if it is safe to hoist the instruction.
924	// TODO: It may be safe to hoist if we are hoisting to a conditional block
925	// and we have accurately duplicated the control flow from the loop header
926	// to that block.
927	if (CurLoop->hasLoopInvariantOperands(I: &I) &&
928	canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE) &&
929	isSafeToExecuteUnconditionally(Inst&: I, DT, TLI, CurLoop, SafetyInfo, ORE,
930	CtxI: Preheader->getTerminator(), AC,
931	AllowSpeculation)) {
932	hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
933	MSSAU, SE, ORE);
934	HoistedInstructions.push_back(Elt: &I);
935	Changed = true;
936	continue;
937	}
938
939	// Attempt to remove floating point division out of the loop by
940	// converting it to a reciprocal multiplication.
941	if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
942	CurLoop->isLoopInvariant(V: I.getOperand(i: `1`))) {
943	auto Divisor = I.getOperand(i: `1`);
944	auto One = llvm::ConstantFP::get(Ty: Divisor->getType(), V: `1.0`);
945	auto ReciprocalDivisor = BinaryOperator::CreateFDiv(V1: One, V2: Divisor);
946	ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
947	SafetyInfo->insertInstructionTo(Inst: ReciprocalDivisor, BB: I.getParent());
948	ReciprocalDivisor->insertBefore(InsertPos: I.getIterator());
949	ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
950
951	auto Product =
952	BinaryOperator::CreateFMul(V1: I.getOperand(i: `0`), V2: ReciprocalDivisor);
953	Product->setFastMathFlags(I.getFastMathFlags());
954	SafetyInfo->insertInstructionTo(Inst: Product, BB: I.getParent());
955	Product->insertAfter(InsertPos: I.getIterator());
956	Product->setDebugLoc(I.getDebugLoc());
957	I.replaceAllUsesWith(V: Product);
958	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
959
960	hoist(I&: *ReciprocalDivisor, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB),
961	SafetyInfo, MSSAU, SE, ORE);
962	HoistedInstructions.push_back(Elt: ReciprocalDivisor);
963	Changed = true;
964	continue;
965	}
966
967	auto IsInvariantStart = [&](Instruction &I) {
968	using namespace PatternMatch;
969	return I.use_empty() &&
970	match(V: &I, P: m_Intrinsic<Intrinsic::invariant_start>());
971	};
972	auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
973	return SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop) &&
974	SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
975	};
976	if ((IsInvariantStart (I) \|\| isGuard(U: &I)) &&
977	CurLoop->hasLoopInvariantOperands(I: &I) &&
978	MustExecuteWithoutWritesBefore (I)) {
979	hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
980	MSSAU, SE, ORE);
981	HoistedInstructions.push_back(Elt: &I);
982	Changed = true;
983	continue;
984	}
985
986	if (PHINode *PN = dyn_cast<PHINode>(Val: &I)) {
987	if (CFH.canHoistPHI(PN)) {
988	// Redirect incoming blocks first to ensure that we create hoisted
989	// versions of those blocks before we hoist the phi.
990	for (unsigned int i = `0`; i < PN->getNumIncomingValues(); ++i)
991	PN->setIncomingBlock(
992	i, BB: CFH.getOrCreateHoistedBlock(BB: PN->getIncomingBlock(i)));
993	hoist(I&: *PN, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
994	MSSAU, SE, ORE);
995	assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
996	Changed = true;
997	continue;
998	}
999	}
1000
1001	// Try to reassociate instructions so that part of computations can be
1002	// done out of loop.
1003	if (hoistArithmetics(I, L&: CurLoop, SafetyInfo&: SafetyInfo, MSSAU, AC, DT)) {
1004	Changed = true;
1005	continue;
1006	}
1007
1008	// Remember possibly hoistable branches so we can actually hoist them
1009	// later if needed.
1010	if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: &I))
1011	CFH.registerPossiblyHoistableBranch(BI);
1012	}
1013	}
1014
1015	// If we hoisted instructions to a conditional block they may not dominate
1016	// their uses that weren't hoisted (such as phis where some operands are not
1017	// loop invariant). If so make them unconditional by moving them to their
1018	// immediate dominator. We iterate through the instructions in reverse order
1019	// which ensures that when we rehoist an instruction we rehoist its operands,
1020	// and also keep track of where in the block we are rehoisting to make sure
1021	// that we rehoist instructions before the instructions that use them.
1022	Instruction HoistPoint = nullptr*;
1023	if (ControlFlowHoisting) {
1024	for (Instruction *I : reverse(C&: HoistedInstructions)) {
1025	if (!llvm::all_of(Range: I->uses(),
1026	P: [&](Use &U) { return DT->dominates(Def: I, U); })) {
1027	BasicBlock *Dominator =
1028	DT->getNode(BB: I->getParent())->getIDom()->getBlock();
1029	if (!HoistPoint \|\| !DT->dominates(A: HoistPoint->getParent(), B: Dominator)) {
1030	if (HoistPoint)
1031	assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1032	"New hoist point expected to dominate old hoist point");
1033	HoistPoint = Dominator->getTerminator();
1034	}
1035	LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1036	<< HoistPoint->getParent()->getNameOrAsOperand()
1037	<< ": " << *I << "\n");
1038	moveInstructionBefore(I&: I, Dest: HoistPoint->getIterator(), SafetyInfo&: SafetyInfo, MSSAU,
1039	SE);
1040	HoistPoint = I;
1041	Changed = true;
1042	}
1043	}
1044	}
1045	if (VerifyMemorySSA)
1046	MSSAU.getMemorySSA()->verifyMemorySSA();
1047
1048	// Now that we've finished hoisting make sure that LI and DT are still
1049	// valid.
1050	#ifdef EXPENSIVE_CHECKS
1051	if (Changed) {
1052	assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1053	"Dominator tree verification failed");
1054	LI->verify(*DT);
1055	}
1056	#endif
1057
1058	return Changed;
1059	}
1060
1061	// Return true if LI is invariant within scope of the loop. LI is invariant if
1062	// CurLoop is dominated by an invariant.start representing the same memory
1063	// location and size as the memory location LI loads from, and also the
1064	// invariant.start has no uses.
1065	static bool isLoadInvariantInLoop(LoadInst LI, DominatorTree DT,
1066	Loop *CurLoop) {
1067	Value *Addr = LI->getPointerOperand();
1068	const DataLayout &DL = LI->getDataLayout();
1069	const TypeSize LocSizeInBits = DL.getTypeSizeInBits(Ty: LI->getType());
1070
1071	// It is not currently possible for clang to generate an invariant.start
1072	// intrinsic with scalable vector types because we don't support thread local
1073	// sizeless types and we don't permit sizeless types in structs or classes.
1074	// Furthermore, even if support is added for this in future the intrinsic
1075	// itself is defined to have a size of -1 for variable sized objects. This
1076	// makes it impossible to verify if the intrinsic envelops our region of
1077	// interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1078	// types would have a -1 parameter, but the former is clearly double the size
1079	// of the latter.
1080	if (LocSizeInBits.isScalable())
1081	return false;
1082
1083	// If we've ended up at a global/constant, bail. We shouldn't be looking at
1084	// uselists for non-local Values in a loop pass.
1085	if (isa<Constant>(Val: Addr))
1086	return false;
1087
1088	unsigned UsesVisited = `0`;
1089	// Traverse all uses of the load operand value, to see if invariant.start is
1090	// one of the uses, and whether it dominates the load instruction.
1091	for (auto *U : Addr->users()) {
1092	// Avoid traversing for Load operand with high number of users.
1093	if (++UsesVisited > MaxNumUsesTraversed)
1094	return false;
1095	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
1096	// If there are escaping uses of invariant.start instruction, the load maybe
1097	// non-invariant.
1098	if (!II \|\| II->getIntrinsicID() != Intrinsic::invariant_start \|\|
1099	!II->use_empty())
1100	continue;
1101	ConstantInt *InvariantSize = cast<ConstantInt>(Val: II->getArgOperand(i: `0`));
1102	// The intrinsic supports having a -1 argument for variable sized objects
1103	// so we should check for that here.
1104	if (InvariantSize->isNegative())
1105	continue;
1106	uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * `8`;
1107	// Confirm the invariant.start location size contains the load operand size
1108	// in bits. Also, the invariant.start should dominate the load, and we
1109	// should not hoist the load out of a loop that contains this dominating
1110	// invariant.start.
1111	if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1112	DT->properlyDominates(A: II->getParent(), B: CurLoop->getHeader()))
1113	return true;
1114	}
1115
1116	return false;
1117	}
1118
1119	/// Return true if-and-only-if we know how to (mechanically) both hoist and
1120	/// sink a given instruction out of a loop. Does not address legality
1121	/// concerns such as aliasing or speculation safety.
1122	static bool isHoistableAndSinkableInst(Instruction &I) {
1123	// Only these instructions are hoistable/sinkable.
1124	return (isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I) \|\| isa<CallInst>(Val: I) \|\|
1125	isa<FenceInst>(Val: I) \|\| isa<CastInst>(Val: I) \|\| isa<UnaryOperator>(Val: I) \|\|
1126	isa<BinaryOperator>(Val: I) \|\| isa<SelectInst>(Val: I) \|\|
1127	isa<GetElementPtrInst>(Val: I) \|\| isa<CmpInst>(Val: I) \|\|
1128	isa<InsertElementInst>(Val: I) \|\| isa<ExtractElementInst>(Val: I) \|\|
1129	isa<ShuffleVectorInst>(Val: I) \|\| isa<ExtractValueInst>(Val: I) \|\|
1130	isa<InsertValueInst>(Val: I) \|\| isa<FreezeInst>(Val: I));
1131	}
1132
1133	/// Return true if I is the only Instruction with a MemoryAccess in L.
1134	static bool isOnlyMemoryAccess(const Instruction I, const* Loop *L,
1135	const MemorySSAUpdater &MSSAU) {
1136	for (auto *BB : L->getBlocks())
1137	if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1138	int NotAPhi = `0`;
1139	for (const auto &Acc : *Accs) {
1140	if (isa<MemoryPhi>(Val: &Acc))
1141	continue;
1142	const auto *MUD = cast<MemoryUseOrDef>(Val: &Acc);
1143	if (MUD->getMemoryInst() != I \|\| NotAPhi++ == `1`)
1144	return false;
1145	}
1146	}
1147	return true;
1148	}
1149
1150	static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1151	BatchAAResults &BAA,
1152	SinkAndHoistLICMFlags &Flags,
1153	MemoryUseOrDef *MA) {
1154	// See declaration of SetLicmMssaOptCap for usage details.
1155	if (Flags.tooManyClobberingCalls())
1156	return MA->getDefiningAccess();
1157
1158	MemoryAccess *Source =
1159	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, AA&: BAA);
1160	Flags.incrementClobberingCalls();
1161	return Source;
1162	}
1163
1164	bool llvm::canSinkOrHoistInst(Instruction &I, AAResults AA, DominatorTree DT,
1165	Loop *CurLoop, MemorySSAUpdater &MSSAU,
1166	bool TargetExecutesOncePerLoop,
1167	SinkAndHoistLICMFlags &Flags,
1168	OptimizationRemarkEmitter *ORE) {
1169	// If we don't understand the instruction, bail early.
1170	if (!isHoistableAndSinkableInst(I))
1171	return false;
1172
1173	MemorySSA *MSSA = MSSAU.getMemorySSA();
1174	// Loads have extra constraints we have to verify before we can hoist them.
1175	if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) {
1176	if (!LI->isUnordered())
1177	return false; // Don't sink/hoist volatile or ordered atomic loads!
1178
1179	// Loads from constant memory are always safe to move, even if they end up
1180	// in the same alias set as something that ends up being modified.
1181	if (!isModSet(MRI: AA->getModRefInfoMask(P: LI->getOperand(i_nocapture: `0`))))
1182	return true;
1183	if (LI->hasMetadata(KindID: LLVMContext::MD_invariant_load))
1184	return true;
1185
1186	if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1187	return false; // Don't risk duplicating unordered loads
1188
1189	// This checks for an invariant.start dominating the load.
1190	if (isLoadInvariantInLoop(LI, DT, CurLoop))
1191	return true;
1192
1193	auto MU = cast<MemoryUse>(Val: MSSA->getMemoryAccess(I: LI));
1194
1195	bool InvariantGroup = LI->hasMetadata(KindID: LLVMContext::MD_invariant_group);
1196
1197	bool Invalidated = pointerInvalidatedByLoop(
1198	MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1199	// Check loop-invariant address because this may also be a sinkable load
1200	// whose address is not necessarily loop-invariant.
1201	if (ORE && Invalidated && CurLoop->isLoopInvariant(V: LI->getPointerOperand()))
1202	ORE->emit(RemarkBuilder: [&]() {
1203	return OptimizationRemarkMissed (
1204	DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1205	<< "failed to move load with loop-invariant address "
1206	"because the loop may invalidate its value";
1207	});
1208
1209	return !Invalidated;
1210	} else if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
1211	// Don't sink calls which can throw.
1212	if (CI->mayThrow())
1213	return false;
1214
1215	// Convergent attribute has been used on operations that involve
1216	// inter-thread communication which results are implicitly affected by the
1217	// enclosing control flows. It is not safe to hoist or sink such operations
1218	// across control flow.
1219	if (CI->isConvergent())
1220	return false;
1221
1222	// FIXME: Current LLVM IR semantics don't work well with coroutines and
1223	// thread local globals. We currently treat getting the address of a thread
1224	// local global as not accessing memory, even though it may not be a
1225	// constant throughout a function with coroutines. Remove this check after
1226	// we better model semantics of thread local globals.
1227	if (CI->getFunction()->isPresplitCoroutine())
1228	return false;
1229
1230	using namespace PatternMatch;
1231	if (match(V: CI, P: m_Intrinsic<Intrinsic::assume>()))
1232	// Assumes don't actually alias anything or throw
1233	return true;
1234
1235	// Handle simple cases by querying alias analysis.
1236	MemoryEffects Behavior = AA->getMemoryEffects(Call: CI);
1237
1238	if (Behavior.doesNotAccessMemory())
1239	return true;
1240	if (Behavior.onlyReadsMemory()) {
1241	// Might have stale MemoryDef for call that was later inferred to be
1242	// read-only.
1243	auto *MU = dyn_cast<MemoryUse>(Val: MSSA->getMemoryAccess(I: CI));
1244	if (!MU)
1245	return false;
1246
1247	// If we can prove there are no writes to the memory read by the call, we
1248	// can hoist or sink.
1249	return !pointerInvalidatedByLoop(
1250	MSSA, MU, CurLoop, I, Flags, /InvariantGroup=/false);
1251	}
1252
1253	if (Behavior.onlyWritesMemory()) {
1254	// can hoist or sink if there are no conflicting read/writes to the
1255	// memory location written to by the call.
1256	return noConflictingReadWrites(I: CI, MSSA, AA, CurLoop, Flags);
1257	}
1258
1259	return false;
1260	} else if (auto *FI = dyn_cast<FenceInst>(Val: &I)) {
1261	// Fences alias (most) everything to provide ordering. For the moment,
1262	// just give up if there are any other memory operations in the loop.
1263	return isOnlyMemoryAccess(I: FI, L: CurLoop, MSSAU);
1264	} else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
1265	if (!SI->isUnordered())
1266	return false; // Don't sink/hoist volatile or ordered atomic store!
1267
1268	// We can only hoist a store that we can prove writes a value which is not
1269	// read or overwritten within the loop. For those cases, we fallback to
1270	// load store promotion instead. TODO: We can extend this to cases where
1271	// there is exactly one write to the location and that write dominates an
1272	// arbitrary number of reads in the loop.
1273	if (isOnlyMemoryAccess(I: SI, L: CurLoop, MSSAU))
1274	return true;
1275	return noConflictingReadWrites(I: SI, MSSA, AA, CurLoop, Flags);
1276	}
1277
1278	assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1279
1280	// We've established mechanical ability and aliasing, it's up to the caller
1281	// to check fault safety
1282	return true;
1283	}
1284
1285	/// Returns true if a PHINode is a trivially replaceable with an
1286	/// Instruction.
1287	/// This is true when all incoming values are that instruction.
1288	/// This pattern occurs most often with LCSSA PHI nodes.
1289	///
1290	static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1291	for (const Value *IncValue : PN.incoming_values())
1292	if (IncValue != &I)
1293	return false;
1294
1295	return true;
1296	}
1297
1298	/// Return true if the instruction is foldable in the loop.
1299	static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1300	const TargetTransformInfo *TTI) {
1301	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
1302	InstructionCost CostI =
1303	TTI->getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
1304	if (CostI != TargetTransformInfo::TCC_Free)
1305	return false;
1306	// For a GEP, we cannot simply use getInstructionCost because currently
1307	// it optimistically assumes that a GEP will fold into addressing mode
1308	// regardless of its users.
1309	const BasicBlock *BB = GEP->getParent();
1310	for (const User *U : GEP->users()) {
1311	const Instruction *UI = cast<Instruction>(Val: U);
1312	if (CurLoop->contains(Inst: UI) &&
1313	(BB != UI->getParent() \|\|
1314	(!isa<StoreInst>(Val: UI) && !isa<LoadInst>(Val: UI))))
1315	return false;
1316	}
1317	return true;
1318	}
1319
1320	return false;
1321	}
1322
1323	/// Return true if the only users of this instruction are outside of
1324	/// the loop. If this is true, we can sink the instruction to the exit
1325	/// blocks of the loop.
1326	///
1327	/// We also return true if the instruction could be folded away in lowering.
1328	/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1329	static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1330	const LoopSafetyInfo *SafetyInfo,
1331	TargetTransformInfo *TTI,
1332	bool &FoldableInLoop, bool LoopNestMode) {
1333	const auto &BlockColors = SafetyInfo->getBlockColors();
1334	bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1335	for (const User *U : I.users()) {
1336	const Instruction *UI = cast<Instruction>(Val: U);
1337	if (const PHINode *PN = dyn_cast<PHINode>(Val: UI)) {
1338	const BasicBlock *BB = PN->getParent();
1339	// We cannot sink uses in catchswitches.
1340	if (isa<CatchSwitchInst>(Val: BB->getTerminator()))
1341	return false;
1342
1343	// We need to sink a callsite to a unique funclet. Avoid sinking if the
1344	// phi use is too muddled.
1345	if (isa<CallInst>(Val: I))
1346	if (!BlockColors.empty() &&
1347	BlockColors.find(Val: const_cast<BasicBlock *>(BB))->second.size() != `1`)
1348	return false;
1349
1350	if (LoopNestMode) {
1351	while (isa<PHINode>(Val: UI) && UI->hasOneUser() &&
1352	UI->getNumOperands() == `1`) {
1353	if (!CurLoop->contains(Inst: UI))
1354	break;
1355	UI = cast<Instruction>(Val: UI->user_back());
1356	}
1357	}
1358	}
1359
1360	if (CurLoop->contains(Inst: UI)) {
1361	if (IsFoldable) {
1362	FoldableInLoop = true;
1363	continue;
1364	}
1365	return false;
1366	}
1367	}
1368	return true;
1369	}
1370
1371	static Instruction *cloneInstructionInExitBlock(
1372	Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1373	const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1374	Instruction *New;
1375	if (auto *CI = dyn_cast<CallInst>(Val: &I)) {
1376	const auto &BlockColors = SafetyInfo->getBlockColors();
1377
1378	// Sinking call-sites need to be handled differently from other
1379	// instructions. The cloned call-site needs a funclet bundle operand
1380	// appropriate for its location in the CFG.
1381	SmallVector<OperandBundleDef, `1`> OpBundles;
1382	for (unsigned BundleIdx = `0`, BundleEnd = CI->getNumOperandBundles();
1383	BundleIdx != BundleEnd; ++BundleIdx) {
1384	OperandBundleUse Bundle = CI->getOperandBundleAt(Index: BundleIdx);
1385	if (Bundle.getTagID() == LLVMContext::OB_funclet)
1386	continue;
1387
1388	OpBundles.emplace_back(Args&: Bundle);
1389	}
1390
1391	if (!BlockColors.empty()) {
1392	const ColorVector &CV = BlockColors.find(Val: &ExitBlock)->second;
1393	assert(CV.size() == `1` && "non-unique color for exit block!");
1394	BasicBlock *BBColor = CV.front();
1395	BasicBlock::iterator EHPad = BBColor->getFirstNonPHIIt();
1396	if (EHPad ->isEHPad())
1397	OpBundles.emplace_back(Args: "funclet", Args: &*EHPad);
1398	}
1399
1400	New = CallInst::Create(CI, Bundles: OpBundles);
1401	New->copyMetadata(SrcInst: *CI);
1402	} else {
1403	New = I.clone();
1404	}
1405
1406	New->insertInto(ParentBB: &ExitBlock, It: ExitBlock.getFirstInsertionPt());
1407	if (!I.getName().empty())
1408	New->setName(I.getName() + ".le");
1409
1410	if (MSSAU.getMemorySSA()->getMemoryAccess(I: &I)) {
1411	// Create a new MemoryAccess and let MemorySSA set its defining access.
1412	// After running some passes, MemorySSA might be outdated, and the
1413	// instruction `I` may have become a non-memory touching instruction.
1414	MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1415	I: New, Definition: nullptr, BB: New->getParent(), Point: MemorySSA::Beginning,
1416	/CreationMustSucceed=/false);
1417	if (NewMemAcc) {
1418	if (auto *MemDef = dyn_cast<MemoryDef>(Val: NewMemAcc))
1419	MSSAU.insertDef(Def: MemDef, /RenameUses=/true);
1420	else {
1421	auto *MemUse = cast<MemoryUse>(Val: NewMemAcc);
1422	MSSAU.insertUse(Use: MemUse, /RenameUses=/true);
1423	}
1424	}
1425	}
1426
1427	// Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1428	// this is particularly cheap because we can rip off the PHI node that we're
1429	// replacing for the number and blocks of the predecessors.
1430	// OPT: If this shows up in a profile, we can instead finish sinking all
1431	// invariant instructions, and then walk their operands to re-establish
1432	// LCSSA. That will eliminate creating PHI nodes just to nuke them when
1433	// sinking bottom-up.
1434	for (Use &Op : New->operands())
1435	if (LI->wouldBeOutOfLoopUseRequiringLCSSA(V: Op.get(), ExitBB: PN.getParent())) {
1436	auto *OInst = cast<Instruction>(Val: Op.get());
1437	PHINode *OpPN =
1438	PHINode::Create(Ty: OInst->getType(), NumReservedValues: PN.getNumIncomingValues(),
1439	NameStr: OInst->getName() + ".lcssa");
1440	OpPN->insertBefore(InsertPos: ExitBlock.begin());
1441	for (unsigned i = `0`, e = PN.getNumIncomingValues(); i != e; ++i)
1442	OpPN->addIncoming(V: OInst, BB: PN.getIncomingBlock(i));
1443	Op = OpPN;
1444	}
1445	return New;
1446	}
1447
1448	static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1449	MemorySSAUpdater &MSSAU) {
1450	MSSAU.removeMemoryAccess(I: &I);
1451	SafetyInfo.removeInstruction(Inst: &I);
1452	I.eraseFromParent();
1453	}
1454
1455	static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
1456	ICFLoopSafetyInfo &SafetyInfo,
1457	MemorySSAUpdater &MSSAU,
1458	ScalarEvolution *SE) {
1459	SafetyInfo.removeInstruction(Inst: &I);
1460	SafetyInfo.insertInstructionTo(Inst: &I, BB: Dest ->getParent());
1461	I.moveBefore(BB&: *Dest ->getParent(), I: Dest);
1462	if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1463	Val: MSSAU.getMemorySSA()->getMemoryAccess(I: &I)))
1464	MSSAU.moveToPlace(What: OldMemAcc, BB: Dest ->getParent(),
1465	Where: MemorySSA::BeforeTerminator);
1466	if (SE)
1467	SE->forgetBlockAndLoopDispositions(V: &I);
1468	}
1469
1470	static Instruction *sinkThroughTriviallyReplaceablePHI(
1471	PHINode TPN, Instruction I, LoopInfo *LI,
1472	SmallDenseMap<BasicBlock , Instruction , `32`> &SunkCopies,
1473	const LoopSafetyInfo SafetyInfo, const* Loop *CurLoop,
1474	MemorySSAUpdater &MSSAU) {
1475	assert(isTriviallyReplaceablePHI(TPN, I) &&
1476	"Expect only trivially replaceable PHI");
1477	BasicBlock *ExitBlock = TPN->getParent();
1478	auto [It, Inserted] = SunkCopies.try_emplace(Key: ExitBlock);
1479	if (Inserted)
1480	It ->second = cloneInstructionInExitBlock(I&: I, ExitBlock&: ExitBlock, PN&: *TPN, LI,
1481	SafetyInfo, MSSAU);
1482	return It ->second;
1483	}
1484
1485	static bool canSplitPredecessors(PHINode PN, LoopSafetyInfo SafetyInfo) {
1486	BasicBlock *BB = PN->getParent();
1487	if (!BB->canSplitPredecessors())
1488	return false;
1489	// It's not impossible to split EHPad blocks, but if BlockColors already exist
1490	// it require updating BlockColors for all offspring blocks accordingly. By
1491	// skipping such corner case, we can make updating BlockColors after splitting
1492	// predecessor fairly simple.
1493	if (!SafetyInfo->getBlockColors().empty() &&
1494	BB->getFirstNonPHIIt()->isEHPad())
1495	return false;
1496	for (BasicBlock *BBPred : predecessors(BB)) {
1497	if (isa<IndirectBrInst>(Val: BBPred->getTerminator()))
1498	return false;
1499	}
1500	return true;
1501	}
1502
1503	static void splitPredecessorsOfLoopExit(PHINode PN, DominatorTree DT,
1504	LoopInfo LI, const* Loop *CurLoop,
1505	LoopSafetyInfo *SafetyInfo,
1506	MemorySSAUpdater *MSSAU) {
1507	#ifndef NDEBUG
1508	SmallVector<BasicBlock *, `32`> ExitBlocks;
1509	CurLoop->getUniqueExitBlocks(ExitBlocks);
1510	SmallPtrSet<BasicBlock *, `32`> ExitBlockSet(llvm::from_range, ExitBlocks);
1511	#endif
1512	BasicBlock *ExitBB = PN->getParent();
1513	assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1514
1515	// Split predecessors of the loop exit to make instructions in the loop are
1516	// exposed to exit blocks through trivially replaceable PHIs while keeping the
1517	// loop in the canonical form where each predecessor of each exit block should
1518	// be contained within the loop. For example, this will convert the loop below
1519	// from
1520	//
1521	// LB1:
1522	// %v1 =
1523	// br %LE, %LB2
1524	// LB2:
1525	// %v2 =
1526	// br %LE, %LB1
1527	// LE:
1528	// %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1529	//
1530	// to
1531	//
1532	// LB1:
1533	// %v1 =
1534	// br %LE.split, %LB2
1535	// LB2:
1536	// %v2 =
1537	// br %LE.split2, %LB1
1538	// LE.split:
1539	// %p1 = phi [%v1, %LB1] <-- trivially replaceable
1540	// br %LE
1541	// LE.split2:
1542	// %p2 = phi [%v2, %LB2] <-- trivially replaceable
1543	// br %LE
1544	// LE:
1545	// %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1546	//
1547	const auto &BlockColors = SafetyInfo->getBlockColors();
1548	SmallSetVector<BasicBlock *, `8`> PredBBs(pred_begin(BB: ExitBB), pred_end(BB: ExitBB));
1549	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1550	while (!PredBBs.empty()) {
1551	BasicBlock PredBB = PredBBs.begin();
1552	assert(CurLoop->contains(PredBB) &&
1553	"Expect all predecessors are in the loop");
1554	if (PN->getBasicBlockIndex(BB: PredBB) >= `0`) {
1555	BasicBlock *NewPred = SplitBlockPredecessors(
1556	BB: ExitBB, Preds: PredBB, Suffix: ".split.loop.exit", DTU: &DTU, LI, MSSAU, PreserveLCSSA: true);
1557	// Since we do not allow splitting EH-block with BlockColors in
1558	// canSplitPredecessors(), we can simply assign predecessor's color to
1559	// the new block.
1560	if (!BlockColors.empty())
1561	// Grab a reference to the ColorVector to be inserted before getting the
1562	// reference to the vector we are copying because inserting the new
1563	// element in BlockColors might cause the map to be reallocated.
1564	SafetyInfo->copyColors(New: NewPred, Old: PredBB);
1565	}
1566	PredBBs.remove(X: PredBB);
1567	}
1568	}
1569
1570	/// When an instruction is found to only be used outside of the loop, this
1571	/// function moves it to the exit blocks and patches up SSA form as needed.
1572	/// This method is guaranteed to remove the original instruction from its
1573	/// position, and may either delete it or move it to outside of the loop.
1574	///
1575	static bool sink(Instruction &I, LoopInfo LI, DominatorTree DT,
1576	const Loop CurLoop, ICFLoopSafetyInfo SafetyInfo,
1577	MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1578	bool Changed = false;
1579	LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1580
1581	// Iterate over users to be ready for actual sinking. Replace users via
1582	// unreachable blocks with undef and make all user PHIs trivially replaceable.
1583	SmallPtrSet<Instruction *, `8`> VisitedUsers;
1584	for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1585	auto User = cast<Instruction>(Val: UI);
1586	Use &U = UI.getUse();
1587	++UI;
1588
1589	if (VisitedUsers.count(Ptr: User) \|\| CurLoop->contains(Inst: User))
1590	continue;
1591
1592	if (!DT->isReachableFromEntry(A: User->getParent())) {
1593	U = PoisonValue::get(T: I.getType());
1594	Changed = true;
1595	continue;
1596	}
1597
1598	// The user must be a PHI node.
1599	PHINode *PN = cast<PHINode>(Val: User);
1600
1601	// Surprisingly, instructions can be used outside of loops without any
1602	// exits. This can only happen in PHI nodes if the incoming block is
1603	// unreachable.
1604	BasicBlock *BB = PN->getIncomingBlock(U);
1605	if (!DT->isReachableFromEntry(A: BB)) {
1606	U = PoisonValue::get(T: I.getType());
1607	Changed = true;
1608	continue;
1609	}
1610
1611	VisitedUsers.insert(Ptr: PN);
1612	if (isTriviallyReplaceablePHI(PN: *PN, I))
1613	continue;
1614
1615	if (!canSplitPredecessors(PN, SafetyInfo))
1616	return Changed;
1617
1618	// Split predecessors of the PHI so that we can make users trivially
1619	// replaceable.
1620	splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU: &MSSAU);
1621
1622	// Should rebuild the iterators, as they may be invalidated by
1623	// splitPredecessorsOfLoopExit().
1624	UI = I.user_begin();
1625	UE = I.user_end();
1626	}
1627
1628	if (VisitedUsers.empty())
1629	return Changed;
1630
1631	ORE->emit(RemarkBuilder: [&]() {
1632	return OptimizationRemark (DEBUG_TYPE, "InstSunk", &I)
1633	<< "sinking " << ore::NV ("Inst", &I);
1634	});
1635	if (isa<LoadInst>(Val: I))
1636	++NumMovedLoads;
1637	else if (isa<CallInst>(Val: I))
1638	++NumMovedCalls;
1639	++NumSunk;
1640
1641	#ifndef NDEBUG
1642	SmallVector<BasicBlock *, `32`> ExitBlocks;
1643	CurLoop->getUniqueExitBlocks(ExitBlocks);
1644	SmallPtrSet<BasicBlock *, `32`> ExitBlockSet(llvm::from_range, ExitBlocks);
1645	#endif
1646
1647	// Clones of this instruction. Don't create more than one per exit block!
1648	SmallDenseMap<BasicBlock , Instruction , `32`> SunkCopies;
1649
1650	// If this instruction is only used outside of the loop, then all users are
1651	// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1652	// the instruction.
1653	// First check if I is worth sinking for all uses. Sink only when it is worth
1654	// across all uses.
1655	SmallSetVector<User*, `8`> Users(I.user_begin(), I.user_end());
1656	for (auto *UI : Users) {
1657	auto *User = cast<Instruction>(Val: UI);
1658
1659	if (CurLoop->contains(Inst: User))
1660	continue;
1661
1662	PHINode *PN = cast<PHINode>(Val: User);
1663	assert(ExitBlockSet.count(PN->getParent()) &&
1664	"The LCSSA PHI is not in an exit block!");
1665
1666	// The PHI must be trivially replaceable.
1667	Instruction *New = sinkThroughTriviallyReplaceablePHI(
1668	TPN: PN, I: &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1669	// As we sink the instruction out of the BB, drop its debug location.
1670	New->dropLocation();
1671	PN->replaceAllUsesWith(V: New);
1672	eraseInstruction(I&: PN, SafetyInfo&: SafetyInfo, MSSAU);
1673	Changed = true;
1674	}
1675	return Changed;
1676	}
1677
1678	/// When an instruction is found to only use loop invariant operands that
1679	/// is safe to hoist, this instruction is called to do the dirty work.
1680	///
1681	static void hoist(Instruction &I, const DominatorTree DT, const* Loop *CurLoop,
1682	BasicBlock Dest, ICFLoopSafetyInfo SafetyInfo,
1683	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1684	OptimizationRemarkEmitter *ORE) {
1685	LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1686	<< I << "\n");
1687	ORE->emit(RemarkBuilder: [&]() {
1688	return OptimizationRemark (DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1689	<< ore::NV ("Inst", &I);
1690	});
1691
1692	// Metadata can be dependent on conditions we are hoisting above.
1693	// Conservatively strip all metadata on the instruction unless we were
1694	// guaranteed to execute I if we entered the loop, in which case the metadata
1695	// is valid in the loop preheader.
1696	// Similarly, If I is a call and it is not guaranteed to execute in the loop,
1697	// then moving to the preheader means we should strip attributes on the call
1698	// that can cause UB since we may be hoisting above conditions that allowed
1699	// inferring those attributes. They may not be valid at the preheader.
1700	if ((I.hasMetadataOtherThanDebugLoc() \|\| isa<CallInst>(Val: I)) &&
1701	// The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1702	// time in isGuaranteedToExecute if we don't actually have anything to
1703	// drop. It is a compile time optimization, not required for correctness.
1704	!SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop)) {
1705	I.dropUBImplyingAttrsAndMetadata();
1706	}
1707
1708	if (isa<PHINode>(Val: I))
1709	// Move the new node to the end of the phi list in the destination block.
1710	moveInstructionBefore(I, Dest: Dest->getFirstNonPHIIt(), SafetyInfo&: *SafetyInfo, MSSAU, SE);
1711	else
1712	// Move the new node to the destination block, before its terminator.
1713	moveInstructionBefore(I, Dest: Dest->getTerminator()->getIterator(), SafetyInfo&: *SafetyInfo,
1714	MSSAU, SE);
1715
1716	I.updateLocationAfterHoist();
1717
1718	if (isa<LoadInst>(Val: I))
1719	++NumMovedLoads;
1720	else if (isa<CallInst>(Val: I))
1721	++NumMovedCalls;
1722	++NumHoisted;
1723	}
1724
1725	/// Only sink or hoist an instruction if it is not a trapping instruction,
1726	/// or if the instruction is known not to trap when moved to the preheader.
1727	/// or if it is a trapping instruction and is guaranteed to execute.
1728	static bool isSafeToExecuteUnconditionally(
1729	Instruction &Inst, const DominatorTree DT, const* TargetLibraryInfo *TLI,
1730	const Loop CurLoop, const* LoopSafetyInfo *SafetyInfo,
1731	OptimizationRemarkEmitter ORE, const* Instruction *CtxI,
1732	AssumptionCache AC, bool* AllowSpeculation) {
1733	if (AllowSpeculation &&
1734	isSafeToSpeculativelyExecute(I: &Inst, CtxI, AC, DT, TLI))
1735	return true;
1736
1737	bool GuaranteedToExecute =
1738	SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1739
1740	if (!GuaranteedToExecute) {
1741	auto *LI = dyn_cast<LoadInst>(Val: &Inst);
1742	if (LI && CurLoop->isLoopInvariant(V: LI->getPointerOperand()))
1743	ORE->emit(RemarkBuilder: [&]() {
1744	return OptimizationRemarkMissed (
1745	DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1746	<< "failed to hoist load with loop-invariant address "
1747	"because load is conditionally executed";
1748	});
1749	}
1750
1751	return GuaranteedToExecute;
1752	}
1753
1754	namespace {
1755	class LoopPromoter : public LoadAndStorePromoter {
1756	Value SomePtr; // Designated pointer to store to.*
1757	SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1758	SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
1759	SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1760	PredIteratorCache &PredCache;
1761	MemorySSAUpdater &MSSAU;
1762	LoopInfo &LI;
1763	DebugLoc DL;
1764	Align Alignment;
1765	bool UnorderedAtomic;
1766	AAMDNodes AATags;
1767	ICFLoopSafetyInfo &SafetyInfo;
1768	bool CanInsertStoresInExitBlocks;
1769	ArrayRef<const Instruction *> Uses;
1770
1771	// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1772	// (if legal) if doing so would add an out-of-loop use to an instruction
1773	// defined in-loop.
1774	Value maybeInsertLCSSAPHI(Value V, BasicBlock BB) const* {
1775	if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, ExitBB: BB))
1776	return V;
1777
1778	Instruction *I = cast<Instruction>(Val: V);
1779	// We need to create an LCSSA PHI node for the incoming value and
1780	// store that.
1781	PHINode *PN = PHINode::Create(Ty: I->getType(), NumReservedValues: PredCache.size(BB),
1782	NameStr: I->getName() + ".lcssa");
1783	PN->insertBefore(InsertPos: BB->begin());
1784	for (BasicBlock *Pred : PredCache.get(BB))
1785	PN->addIncoming(V: I, BB: Pred);
1786	return PN;
1787	}
1788
1789	public:
1790	LoopPromoter(Value SP, ArrayRef<const* Instruction *> Insts, SSAUpdater &S,
1791	SmallVectorImpl<BasicBlock *> &LEB,
1792	SmallVectorImpl<BasicBlock::iterator> &LIP,
1793	SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1794	MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1795	Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1796	ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1797	: LoadAndStorePromoter (Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1798	LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1799	LI(li), DL (std::move(dl)), Alignment (Alignment),
1800	UnorderedAtomic(UnorderedAtomic), AATags (AATags),
1801	SafetyInfo(SafetyInfo),
1802	CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses (Insts) {}
1803
1804	void insertStoresInLoopExitBlocks() {
1805	// Insert stores after in the loop exit blocks. Each exit block gets a
1806	// store of the live-out values that feed them. Since we've already told
1807	// the SSA updater about the defs in the loop and the preheader
1808	// definition, it is all set and we can start using it.
1809	DIAssignID NewID = nullptr*;
1810	for (unsigned i = `0`, e = LoopExitBlocks.size(); i != e; ++i) {
1811	BasicBlock *ExitBlock = LoopExitBlocks [i];
1812	Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
1813	LiveInValue = maybeInsertLCSSAPHI(V: LiveInValue, BB: ExitBlock);
1814	Value *Ptr = maybeInsertLCSSAPHI(V: SomePtr, BB: ExitBlock);
1815	BasicBlock::iterator InsertPos = LoopInsertPts [i];
1816	StoreInst NewSI = new* StoreInst (LiveInValue, Ptr, InsertPos);
1817	if (UnorderedAtomic)
1818	NewSI->setOrdering(AtomicOrdering::Unordered);
1819	NewSI->setAlignment(Alignment);
1820	NewSI->setDebugLoc(DL);
1821	// Attach DIAssignID metadata to the new store, generating it on the
1822	// first loop iteration.
1823	if (i == `0`) {
1824	// NewSI will have its DIAssignID set here if there are any stores in
1825	// Uses with a DIAssignID attachment. This merged ID will then be
1826	// attached to the other inserted stores (in the branch below).
1827	NewSI->mergeDIAssignID(SourceInstructions: Uses);
1828	NewID = cast_or_null<DIAssignID>(
1829	Val: NewSI->getMetadata(KindID: LLVMContext::MD_DIAssignID));
1830	} else {
1831	// Attach the DIAssignID (or nullptr) merged from Uses in the branch
1832	// above.
1833	NewSI->setMetadata(KindID: LLVMContext::MD_DIAssignID, Node: NewID);
1834	}
1835
1836	if (AATags)
1837	NewSI->setAAMetadata(AATags);
1838
1839	MemoryAccess *MSSAInsertPoint = MSSAInsertPts [i];
1840	MemoryAccess *NewMemAcc;
1841	if (!MSSAInsertPoint) {
1842	NewMemAcc = MSSAU.createMemoryAccessInBB(
1843	I: NewSI, Definition: nullptr, BB: NewSI->getParent(), Point: MemorySSA::Beginning);
1844	} else {
1845	NewMemAcc =
1846	MSSAU.createMemoryAccessAfter(I: NewSI, Definition: nullptr, InsertPt: MSSAInsertPoint);
1847	}
1848	MSSAInsertPts [i] = NewMemAcc;
1849	MSSAU.insertDef(Def: cast<MemoryDef>(Val: NewMemAcc), RenameUses: true);
1850	// FIXME: true for safety, false may still be correct.
1851	}
1852	}
1853
1854	void doExtraRewritesBeforeFinalDeletion() override {
1855	if (CanInsertStoresInExitBlocks)
1856	insertStoresInLoopExitBlocks();
1857	}
1858
1859	void instructionDeleted(Instruction I) const* override {
1860	SafetyInfo.removeInstruction(Inst: I);
1861	MSSAU.removeMemoryAccess(I);
1862	}
1863
1864	bool shouldDelete(Instruction I) const* override {
1865	if (isa<StoreInst>(Val: I))
1866	return CanInsertStoresInExitBlocks;
1867	return true;
1868	}
1869	};
1870
1871	bool isNotCapturedBeforeOrInLoop(const Value V, const* Loop *L,
1872	DominatorTree *DT) {
1873	// We can perform the captured-before check against any instruction in the
1874	// loop header, as the loop header is reachable from any instruction inside
1875	// the loop.
1876	// TODO: ReturnCaptures=true shouldn't be necessary here.
1877	return capturesNothing(CC: PointerMayBeCapturedBefore(
1878	V, /ReturnCaptures=/true, I: L->getHeader()->getTerminator(), DT,
1879	/IncludeI=/false, Mask: CaptureComponents::Provenance));
1880	}
1881
1882	/// Return true if we can prove that a caller cannot inspect the object if an
1883	/// unwind occurs inside the loop.
1884	bool isNotVisibleOnUnwindInLoop(const Value Object, const* Loop *L,
1885	DominatorTree *DT) {
1886	bool RequiresNoCaptureBeforeUnwind;
1887	if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1888	return false;
1889
1890	return !RequiresNoCaptureBeforeUnwind \|\|
1891	isNotCapturedBeforeOrInLoop(V: Object, L, DT);
1892	}
1893
1894	bool isThreadLocalObject(const Value Object, const* Loop L, DominatorTree DT,
1895	TargetTransformInfo *TTI) {
1896	// The object must be function-local to start with, and then not captured
1897	// before/in the loop.
1898	return (isIdentifiedFunctionLocal(V: Object) &&
1899	isNotCapturedBeforeOrInLoop(V: Object, L, DT)) \|\|
1900	(TTI->isSingleThreaded() \|\| SingleThread);
1901	}
1902
1903	} // namespace
1904
1905	/// Try to promote memory values to scalars by sinking stores out of the
1906	/// loop and moving loads to before the loop. We do this by looping over
1907	/// the stores in the loop, looking for stores to Must pointers which are
1908	/// loop invariant.
1909	///
1910	bool llvm::promoteLoopAccessesToScalars(
1911	const SmallSetVector<Value *, `8`> &PointerMustAliases,
1912	SmallVectorImpl<BasicBlock *> &ExitBlocks,
1913	SmallVectorImpl<BasicBlock::iterator> &InsertPts,
1914	SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1915	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
1916	const TargetLibraryInfo TLI, TargetTransformInfo TTI, Loop *CurLoop,
1917	MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1918	OptimizationRemarkEmitter ORE, bool* AllowSpeculation,
1919	bool HasReadsOutsideSet) {
1920	// Verify inputs.
1921	assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1922	SafetyInfo != nullptr &&
1923	"Unexpected Input to promoteLoopAccessesToScalars");
1924
1925	LLVM_DEBUG({
1926	dbgs() << "Trying to promote set of must-aliased pointers:\n";
1927	for (Value *Ptr : PointerMustAliases)
1928	dbgs() << " " << *Ptr << "\n";
1929	});
1930	++NumPromotionCandidates;
1931
1932	Value SomePtr = PointerMustAliases.begin();
1933	BasicBlock *Preheader = CurLoop->getLoopPreheader();
1934
1935	// It is not safe to promote a load/store from the loop if the load/store is
1936	// conditional. For example, turning:
1937	//
1938	// for () { if (c) P += 1; }*
1939	//
1940	// into:
1941	//
1942	// tmp = P; for () { if (c) tmp +=1; } P = tmp;
1943	//
1944	// is not safe, because P may only be valid to access if 'c' is true.*
1945	//
1946	// The safety property divides into two parts:
1947	// p1) The memory may not be dereferenceable on entry to the loop. In this
1948	// case, we can't insert the required load in the preheader.
1949	// p2) The memory model does not allow us to insert a store along any dynamic
1950	// path which did not originally have one.
1951	//
1952	// If at least one store is guaranteed to execute, both properties are
1953	// satisfied, and promotion is legal.
1954	//
1955	// This, however, is not a necessary condition. Even if no store/load is
1956	// guaranteed to execute, we can still establish these properties.
1957	// We can establish (p1) by proving that hoisting the load into the preheader
1958	// is safe (i.e. proving dereferenceability on all paths through the loop). We
1959	// can use any access within the alias set to prove dereferenceability,
1960	// since they're all must alias.
1961	//
1962	// There are two ways establish (p2):
1963	// a) Prove the location is thread-local. In this case the memory model
1964	// requirement does not apply, and stores are safe to insert.
1965	// b) Prove a store dominates every exit block. In this case, if an exit
1966	// blocks is reached, the original dynamic path would have taken us through
1967	// the store, so inserting a store into the exit block is safe. Note that this
1968	// is different from the store being guaranteed to execute. For instance,
1969	// if an exception is thrown on the first iteration of the loop, the original
1970	// store is never executed, but the exit blocks are not executed either.
1971
1972	bool DereferenceableInPH = false;
1973	bool StoreIsGuanteedToExecute = false;
1974	bool LoadIsGuaranteedToExecute = false;
1975	bool FoundLoadToPromote = false;
1976
1977	// Goes from Unknown to either Safe or Unsafe, but can't switch between them.
1978	enum {
1979	StoreSafe,
1980	StoreUnsafe,
1981	StoreSafetyUnknown,
1982	} StoreSafety = StoreSafetyUnknown;
1983
1984	SmallVector<Instruction *, `64`> LoopUses;
1985
1986	// We start with an alignment of one and try to find instructions that allow
1987	// us to prove better alignment.
1988	Align Alignment;
1989	// Keep track of which types of access we see
1990	bool SawUnorderedAtomic = false;
1991	bool SawNotAtomic = false;
1992	AAMDNodes AATags;
1993
1994	const DataLayout &MDL = Preheader->getDataLayout();
1995
1996	// If there are reads outside the promoted set, then promoting stores is
1997	// definitely not safe.
1998	if (HasReadsOutsideSet)
1999	StoreSafety = StoreUnsafe;
2000
2001	if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2002	// If a loop can throw, we have to insert a store along each unwind edge.
2003	// That said, we can't actually make the unwind edge explicit. Therefore,
2004	// we have to prove that the store is dead along the unwind edge. We do
2005	// this by proving that the caller can't have a reference to the object
2006	// after return and thus can't possibly load from the object.
2007	Value *Object = getUnderlyingObject(V: SomePtr);
2008	if (!isNotVisibleOnUnwindInLoop(Object, L: CurLoop, DT))
2009	StoreSafety = StoreUnsafe;
2010	}
2011
2012	// Check that all accesses to pointers in the alias set use the same type.
2013	// We cannot (yet) promote a memory location that is loaded and stored in
2014	// different sizes. While we are at it, collect alignment and AA info.
2015	Type AccessTy = nullptr*;
2016	for (Value *ASIV : PointerMustAliases) {
2017	for (Use &U : ASIV->uses()) {
2018	// Ignore instructions that are outside the loop.
2019	Instruction *UI = dyn_cast<Instruction>(Val: U.getUser());
2020	if (!UI \|\| !CurLoop->contains(Inst: UI))
2021	continue;
2022
2023	// If there is an non-load/store instruction in the loop, we can't promote
2024	// it.
2025	if (LoadInst *Load = dyn_cast<LoadInst>(Val: UI)) {
2026	if (!Load->isUnordered())
2027	return false;
2028
2029	SawUnorderedAtomic \|= Load->isAtomic();
2030	SawNotAtomic \|= !Load->isAtomic();
2031	FoundLoadToPromote = true;
2032
2033	Align InstAlignment = Load->getAlign();
2034
2035	if (!LoadIsGuaranteedToExecute)
2036	LoadIsGuaranteedToExecute =
2037	SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2038
2039	// Note that proving a load safe to speculate requires proving
2040	// sufficient alignment at the target location. Proving it guaranteed
2041	// to execute does as well. Thus we can increase our guaranteed
2042	// alignment as well.
2043	if (!DereferenceableInPH \|\| (InstAlignment > Alignment))
2044	if (isSafeToExecuteUnconditionally(
2045	Inst&: *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2046	CtxI: Preheader->getTerminator(), AC, AllowSpeculation)) {
2047	DereferenceableInPH = true;
2048	Alignment = std::max(a: Alignment, b: InstAlignment);
2049	}
2050	} else if (const StoreInst *Store = dyn_cast<StoreInst>(Val: UI)) {
2051	// Stores of* the pointer are not interesting, only stores to the*
2052	// pointer.
2053	if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2054	continue;
2055	if (!Store->isUnordered())
2056	return false;
2057
2058	SawUnorderedAtomic \|= Store->isAtomic();
2059	SawNotAtomic \|= !Store->isAtomic();
2060
2061	// If the store is guaranteed to execute, both properties are satisfied.
2062	// We may want to check if a store is guaranteed to execute even if we
2063	// already know that promotion is safe, since it may have higher
2064	// alignment than any other guaranteed stores, in which case we can
2065	// raise the alignment on the promoted store.
2066	Align InstAlignment = Store->getAlign();
2067	bool GuaranteedToExecute =
2068	SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2069	StoreIsGuanteedToExecute \|= GuaranteedToExecute;
2070	if (GuaranteedToExecute) {
2071	DereferenceableInPH = true;
2072	if (StoreSafety == StoreSafetyUnknown)
2073	StoreSafety = StoreSafe;
2074	Alignment = std::max(a: Alignment, b: InstAlignment);
2075	}
2076
2077	// If a store dominates all exit blocks, it is safe to sink.
2078	// As explained above, if an exit block was executed, a dominating
2079	// store must have been executed at least once, so we are not
2080	// introducing stores on paths that did not have them.
2081	// Note that this only looks at explicit exit blocks. If we ever
2082	// start sinking stores into unwind edges (see above), this will break.
2083	if (StoreSafety == StoreSafetyUnknown &&
2084	llvm::all_of(Range&: ExitBlocks, P: [&](BasicBlock *Exit) {
2085	return DT->dominates(A: Store->getParent(), B: Exit);
2086	}))
2087	StoreSafety = StoreSafe;
2088
2089	// If the store is not guaranteed to execute, we may still get
2090	// deref info through it.
2091	if (!DereferenceableInPH) {
2092	DereferenceableInPH = isDereferenceableAndAlignedPointer(
2093	V: Store->getPointerOperand(), Ty: Store->getValueOperand()->getType(),
2094	Alignment: Store->getAlign(), DL: MDL, CtxI: Preheader->getTerminator(), AC, DT, TLI);
2095	}
2096	} else
2097	continue; // Not a load or store.
2098
2099	if (!AccessTy)
2100	AccessTy = getLoadStoreType(I: UI);
2101	else if (AccessTy != getLoadStoreType(I: UI))
2102	return false;
2103
2104	// Merge the AA tags.
2105	if (LoopUses.empty()) {
2106	// On the first load/store, just take its AA tags.
2107	AATags = UI->getAAMetadata();
2108	} else if (AATags) {
2109	AATags = AATags.merge(Other: UI->getAAMetadata());
2110	}
2111
2112	LoopUses.push_back(Elt: UI);
2113	}
2114	}
2115
2116	// If we found both an unordered atomic instruction and a non-atomic memory
2117	// access, bail. We can't blindly promote non-atomic to atomic since we
2118	// might not be able to lower the result. We can't downgrade since that
2119	// would violate memory model. Also, align 0 is an error for atomics.
2120	if (SawUnorderedAtomic && SawNotAtomic)
2121	return false;
2122
2123	// If we're inserting an atomic load in the preheader, we must be able to
2124	// lower it. We're only guaranteed to be able to lower naturally aligned
2125	// atomics.
2126	if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(Ty: AccessTy))
2127	return false;
2128
2129	// If we couldn't prove we can hoist the load, bail.
2130	if (!DereferenceableInPH) {
2131	LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2132	return false;
2133	}
2134
2135	// We know we can hoist the load, but don't have a guaranteed store.
2136	// Check whether the location is writable and thread-local. If it is, then we
2137	// can insert stores along paths which originally didn't have them without
2138	// violating the memory model.
2139	if (StoreSafety == StoreSafetyUnknown) {
2140	Value *Object = getUnderlyingObject(V: SomePtr);
2141	bool ExplicitlyDereferenceableOnly;
2142	if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
2143	(!ExplicitlyDereferenceableOnly \|\|
2144	isDereferenceablePointer(V: SomePtr, Ty: AccessTy, DL: MDL)) &&
2145	isThreadLocalObject(Object, L: CurLoop, DT, TTI))
2146	StoreSafety = StoreSafe;
2147	}
2148
2149	// If we've still failed to prove we can sink the store, hoist the load
2150	// only, if possible.
2151	if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2152	// If we cannot hoist the load either, give up.
2153	return false;
2154
2155	// Lets do the promotion!
2156	if (StoreSafety == StoreSafe) {
2157	LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2158	<< `'\n'`);
2159	++NumLoadStorePromoted;
2160	} else {
2161	LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2162	<< `'\n'`);
2163	++NumLoadPromoted;
2164	}
2165
2166	ORE->emit(RemarkBuilder: [&]() {
2167	return OptimizationRemark (DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2168	LoopUses [`0`])
2169	<< "Moving accesses to memory location out of the loop";
2170	});
2171
2172	// Look at all the loop uses, and try to merge their locations.
2173	std::vector<DebugLoc> LoopUsesLocs;
2174	for (auto U : LoopUses)
2175	LoopUsesLocs.push_back(x: U->getDebugLoc());
2176	auto DL = DebugLoc::getMergedLocations(Locs: LoopUsesLocs);
2177
2178	// We use the SSAUpdater interface to insert phi nodes as required.
2179	SmallVector<PHINode *, `16`> NewPHIs;
2180	SSAUpdater SSA(&NewPHIs);
2181	LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2182	MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2183	SawUnorderedAtomic,
2184	StoreIsGuanteedToExecute ? AATags : AAMDNodes (),
2185	*SafetyInfo, StoreSafety == StoreSafe);
2186
2187	// Set up the preheader to have a definition of the value. It is the live-out
2188	// value from the preheader that uses in the loop will use.
2189	LoadInst PreheaderLoad = nullptr*;
2190	if (FoundLoadToPromote \|\| !StoreIsGuanteedToExecute) {
2191	PreheaderLoad =
2192	new LoadInst (AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2193	Preheader->getTerminator()->getIterator());
2194	if (SawUnorderedAtomic)
2195	PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2196	PreheaderLoad->setAlignment(Alignment);
2197	PreheaderLoad->setDebugLoc(DebugLoc::getDropped());
2198	if (AATags && LoadIsGuaranteedToExecute)
2199	PreheaderLoad->setAAMetadata(AATags);
2200
2201	MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2202	I: PreheaderLoad, Definition: nullptr, BB: PreheaderLoad->getParent(), Point: MemorySSA::End);
2203	MemoryUse *NewMemUse = cast<MemoryUse>(Val: PreheaderLoadMemoryAccess);
2204	MSSAU.insertUse(Use: NewMemUse, /RenameUses=/true);
2205	SSA.AddAvailableValue(BB: Preheader, V: PreheaderLoad);
2206	} else {
2207	SSA.AddAvailableValue(BB: Preheader, V: PoisonValue::get(T: AccessTy));
2208	}
2209
2210	if (VerifyMemorySSA)
2211	MSSAU.getMemorySSA()->verifyMemorySSA();
2212	// Rewrite all the loads in the loop and remember all the definitions from
2213	// stores in the loop.
2214	Promoter.run(Insts: LoopUses);
2215
2216	if (VerifyMemorySSA)
2217	MSSAU.getMemorySSA()->verifyMemorySSA();
2218	// If the SSAUpdater didn't use the load in the preheader, just zap it now.
2219	if (PreheaderLoad && PreheaderLoad->use_empty())
2220	eraseInstruction(I&: PreheaderLoad, SafetyInfo&: SafetyInfo, MSSAU);
2221
2222	return true;
2223	}
2224
2225	static void foreachMemoryAccess(MemorySSA MSSA, Loop L,
2226	function_ref<void(Instruction *)> Fn) {
2227	for (const BasicBlock *BB : L->blocks())
2228	if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2229	for (const auto &Access : *Accesses)
2230	if (const auto *MUD = dyn_cast<MemoryUseOrDef>(Val: &Access))
2231	Fn (MUD->getMemoryInst());
2232	}
2233
2234	// The bool indicates whether there might be reads outside the set, in which
2235	// case only loads may be promoted.
2236	static SmallVector<PointersAndHasReadsOutsideSet, `0`>
2237	collectPromotionCandidates(MemorySSA MSSA, AliasAnalysis AA, Loop *L) {
2238	BatchAAResults BatchAA(*AA);
2239	AliasSetTracker AST(BatchAA);
2240
2241	auto IsPotentiallyPromotable = [L](const Instruction *I) {
2242	if (const auto *SI = dyn_cast<StoreInst>(Val: I)) {
2243	const Value *PtrOp = SI->getPointerOperand();
2244	return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2245	}
2246	if (const auto *LI = dyn_cast<LoadInst>(Val: I)) {
2247	const Value *PtrOp = LI->getPointerOperand();
2248	return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2249	}
2250	return false;
2251	};
2252
2253	// Populate AST with potentially promotable accesses.
2254	SmallPtrSet<Value *, `16`> AttemptingPromotion;
2255	foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2256	if (IsPotentiallyPromotable (I)) {
2257	AttemptingPromotion.insert(Ptr: I);
2258	AST.add(I);
2259	}
2260	});
2261
2262	// We're only interested in must-alias sets that contain a mod.
2263	SmallVector<PointerIntPair<const AliasSet , `1`, bool*>, `8`> Sets;
2264	for (AliasSet &AS : AST)
2265	if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2266	Sets.push_back(Elt: {&AS, false});
2267
2268	if (Sets.empty())
2269	return {}; // Nothing to promote...
2270
2271	// Discard any sets for which there is an aliasing non-promotable access.
2272	foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2273	if (AttemptingPromotion.contains(Ptr: I))
2274	return;
2275
2276	llvm::erase_if(C&: Sets, P: [&](PointerIntPair<const AliasSet , `1`, bool*> &Pair) {
2277	ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(Inst: I, AA&: BatchAA);
2278	// Cannot promote if there are writes outside the set.
2279	if (isModSet(MRI: MR))
2280	return true;
2281	if (isRefSet(MRI: MR)) {
2282	// Remember reads outside the set.
2283	Pair.setInt(true);
2284	// If this is a mod-only set and there are reads outside the set,
2285	// we will not be able to promote, so bail out early.
2286	return !Pair.getPointer()->isRef();
2287	}
2288	return false;
2289	});
2290	});
2291
2292	SmallVector<std::pair<SmallSetVector<Value , `8`>, bool*>, `0`> Result;
2293	for (auto [Set, HasReadsOutsideSet] : Sets) {
2294	SmallSetVector<Value *, `8`> PointerMustAliases;
2295	for (const auto &MemLoc : *Set)
2296	PointerMustAliases.insert(X: const_cast<Value *>(MemLoc.Ptr));
2297	Result.emplace_back(Args: std::move(PointerMustAliases), Args&: HasReadsOutsideSet);
2298	}
2299
2300	return Result;
2301	}
2302
2303	// For a given store instruction or writeonly call instruction, this function
2304	// checks that there are no read or writes that conflict with the memory
2305	// access in the instruction
2306	static bool noConflictingReadWrites(Instruction I, MemorySSA MSSA,
2307	AAResults AA, Loop CurLoop,
2308	SinkAndHoistLICMFlags &Flags) {
2309	assert(isa<CallInst>(I) \|\| isa<StoreInst>(I));
2310	// If there are more accesses than the Promotion cap, then give up as we're
2311	// not walking a list that long.
2312	if (Flags.tooManyMemoryAccesses())
2313	return false;
2314
2315	auto *IMD = MSSA->getMemoryAccess(I);
2316	BatchAAResults BAA(*AA);
2317	auto Source = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags, MA: IMD);
2318	// Make sure there are no clobbers inside the loop.
2319	if (!MSSA->isLiveOnEntryDef(MA: Source) && CurLoop->contains(BB: Source->getBlock()))
2320	return false;
2321
2322	// If there are interfering Uses (i.e. their defining access is in the
2323	// loop), or ordered loads (stored as Defs!), don't move this store.
2324	// Could do better here, but this is conservatively correct.
2325	// TODO: Cache set of Uses on the first walk in runOnLoop, update when
2326	// moving accesses. Can also extend to dominating uses.
2327	for (auto *BB : CurLoop->getBlocks()) {
2328	auto *Accesses = MSSA->getBlockAccesses(BB);
2329	if (!Accesses)
2330	continue;
2331	for (const auto &MA : *Accesses)
2332	if (const auto *MU = dyn_cast<MemoryUse>(Val: &MA)) {
2333	auto MD = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags,
2334	MA: const_cast<MemoryUse *>(MU));
2335	if (!MSSA->isLiveOnEntryDef(MA: MD) && CurLoop->contains(BB: MD->getBlock()))
2336	return false;
2337	// Disable hoisting past potentially interfering loads. Optimized
2338	// Uses may point to an access outside the loop, as getClobbering
2339	// checks the previous iteration when walking the backedge.
2340	// FIXME: More precise: no Uses that alias I.
2341	if (!Flags.getIsSink() && !MSSA->dominates(A: IMD, B: MU))
2342	return false;
2343	} else if (const auto *MD = dyn_cast<MemoryDef>(Val: &MA)) {
2344	if (auto *LI = dyn_cast<LoadInst>(Val: MD->getMemoryInst())) {
2345	(void)LI; // Silence warning.
2346	assert(!LI->isUnordered() && "Expected unordered load");
2347	return false;
2348	}
2349	// Any call, while it may not be clobbering I, it may be a use.
2350	if (auto *CI = dyn_cast<CallInst>(Val: MD->getMemoryInst())) {
2351	// Check if the call may read from the memory location written
2352	// to by I. Check CI's attributes and arguments; the number of
2353	// such checks performed is limited above by NoOfMemAccTooLarge.
2354	if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
2355	ModRefInfo MRI = BAA.getModRefInfo(I: CI, OptLoc: MemoryLocation::get(SI));
2356	if (isModOrRefSet(MRI))
2357	return false;
2358	} else {
2359	auto *SCI = cast<CallInst>(Val: I);
2360	// If the instruction we are wanting to hoist is also a call
2361	// instruction then we need not check mod/ref info with itself
2362	if (SCI == CI)
2363	continue;
2364	ModRefInfo MRI = BAA.getModRefInfo(I: CI, Call2: SCI);
2365	if (isModOrRefSet(MRI))
2366	return false;
2367	}
2368	}
2369	}
2370	}
2371	return true;
2372	}
2373
2374	static bool pointerInvalidatedByLoop(MemorySSA MSSA, MemoryUse MU,
2375	Loop *CurLoop, Instruction &I,
2376	SinkAndHoistLICMFlags &Flags,
2377	bool InvariantGroup) {
2378	// For hoisting, use the walker to determine safety
2379	if (!Flags.getIsSink()) {
2380	// If hoisting an invariant group, we only need to check that there
2381	// is no store to the loaded pointer between the start of the loop,
2382	// and the load (since all values must be the same).
2383
2384	// This can be checked in two conditions:
2385	// 1) if the memoryaccess is outside the loop
2386	// 2) the earliest access is at the loop header,
2387	// if the memory loaded is the phi node
2388
2389	BatchAAResults BAA(MSSA->getAA());
2390	MemoryAccess Source = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags, MA: MU);
2391	return !MSSA->isLiveOnEntryDef(MA: Source) &&
2392	CurLoop->contains(BB: Source->getBlock()) &&
2393	!(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Val: Source));
2394	}
2395
2396	// For sinking, we'd need to check all Defs below this use. The getClobbering
2397	// call will look on the backedge of the loop, but will check aliasing with
2398	// the instructions on the previous iteration.
2399	// For example:
2400	// for (i ... )
2401	// load a[i] ( Use (LoE)
2402	// store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2403	// i++;
2404	// The load sees no clobbering inside the loop, as the backedge alias check
2405	// does phi translation, and will check aliasing against store a[i-1].
2406	// However sinking the load outside the loop, below the store is incorrect.
2407
2408	// For now, only sink if there are no Defs in the loop, and the existing ones
2409	// precede the use and are in the same block.
2410	// FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2411	// needs PostDominatorTreeAnalysis.
2412	// FIXME: More precise: no Defs that alias this Use.
2413	if (Flags.tooManyMemoryAccesses())
2414	return true;
2415	for (auto *BB : CurLoop->getBlocks())
2416	if (pointerInvalidatedByBlock(BB&: BB, MSSA&: MSSA, MU&: *MU))
2417	return true;
2418	// When sinking, the source block may not be part of the loop so check it.
2419	if (!CurLoop->contains(Inst: &I))
2420	return pointerInvalidatedByBlock(BB&: I.getParent(), MSSA&: MSSA, MU&: *MU);
2421
2422	return false;
2423	}
2424
2425	bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2426	if (const auto *Accesses = MSSA.getBlockDefs(BB: &BB))
2427	for (const auto &MA : *Accesses)
2428	if (const auto *MD = dyn_cast<MemoryDef>(Val: &MA))
2429	if (MU.getBlock() != MD->getBlock() \|\| !MSSA.locallyDominates(A: MD, B: &MU))
2430	return true;
2431	return false;
2432	}
2433
2434	/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2435	/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2436	/// minimun can be computed outside of loop, and X is not a loop-invariant.
2437	static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2438	MemorySSAUpdater &MSSAU) {
2439	bool Inverse = false;
2440	using namespace PatternMatch;
2441	Value Cond1, Cond2;
2442	if (match(V: &I, P: m_LogicalOr(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2443	Inverse = true;
2444	} else if (match(V: &I, P: m_LogicalAnd(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2445	// Do nothing
2446	} else
2447	return false;
2448
2449	auto MatchICmpAgainstInvariant = [&](Value C, CmpPredicate &P, Value &LHS,
2450	Value *&RHS) {
2451	if (!match(V: C, P: m_OneUse(SubPattern: m_ICmp(Pred&: P, L: m_Value(V&: LHS), R: m_Value(V&: RHS)))))
2452	return false;
2453	if (!LHS->getType()->isIntegerTy())
2454	return false;
2455	if (!ICmpInst::isRelational(P))
2456	return false;
2457	if (L.isLoopInvariant(V: LHS)) {
2458	std::swap(a&: LHS, b&: RHS);
2459	P = ICmpInst::getSwappedPredicate(pred: P);
2460	}
2461	if (L.isLoopInvariant(V: LHS) \|\| !L.isLoopInvariant(V: RHS))
2462	return false;
2463	if (Inverse)
2464	P = ICmpInst::getInversePredicate(pred: P);
2465	return true;
2466	};
2467	CmpPredicate P1, P2;
2468	Value LHS1, LHS2, RHS1, RHS2;
2469	if (!MatchICmpAgainstInvariant (Cond1, P1, LHS1, RHS1) \|\|
2470	!MatchICmpAgainstInvariant (Cond2, P2, LHS2, RHS2))
2471	return false;
2472	auto MatchingPred = CmpPredicate::getMatching(A: P1, B: P2);
2473	if (!MatchingPred \|\| LHS1 != LHS2)
2474	return false;
2475
2476	// Everything is fine, we can do the transform.
2477	bool UseMin = ICmpInst::isLT(P: MatchingPred) \|\| ICmpInst::isLE(P: MatchingPred);
2478	assert(
2479	(UseMin \|\| ICmpInst::isGT(*MatchingPred) \|\|
2480	ICmpInst::isGE(*MatchingPred)) &&
2481	"Relational predicate is either less (or equal) or greater (or equal)!");
2482	Intrinsic::ID id = ICmpInst::isSigned(Pred: *MatchingPred)
2483	? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2484	: (UseMin ? Intrinsic::umin : Intrinsic::umax);
2485	auto *Preheader = L.getLoopPreheader();
2486	assert(Preheader && "Loop is not in simplify form?");
2487	IRBuilder<> Builder(Preheader->getTerminator());
2488	// We are about to create a new guaranteed use for RHS2 which might not exist
2489	// before (if it was a non-taken input of logical and/or instruction). If it
2490	// was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2491	// introduced, so they don't need this.
2492	if (isa<SelectInst>(Val: I))
2493	RHS2 = Builder.CreateFreeze(V: RHS2, Name: RHS2->getName() + ".fr");
2494	Value *NewRHS = Builder.CreateBinaryIntrinsic(
2495	ID: id, LHS: RHS1, RHS: RHS2, FMFSource: nullptr,
2496	Name: StringRef ("invariant.") +
2497	(ICmpInst::isSigned(Pred: *MatchingPred) ? "s" : "u") +
2498	(UseMin ? "min" : "max"));
2499	Builder.SetInsertPoint(&I);
2500	ICmpInst::Predicate P = *MatchingPred;
2501	if (Inverse)
2502	P = ICmpInst::getInversePredicate(pred: P);
2503	Value *NewCond = Builder.CreateICmp(P, LHS: LHS1, RHS: NewRHS);
2504	NewCond->takeName(V: &I);
2505	I.replaceAllUsesWith(V: NewCond);
2506	eraseInstruction(I, SafetyInfo, MSSAU);
2507	Instruction &CondI1 = *cast<Instruction>(Val: Cond1);
2508	Instruction &CondI2 = *cast<Instruction>(Val: Cond2);
2509	salvageDebugInfo(I&: CondI1);
2510	salvageDebugInfo(I&: CondI2);
2511	eraseInstruction(I&: CondI1, SafetyInfo, MSSAU);
2512	eraseInstruction(I&: CondI2, SafetyInfo, MSSAU);
2513	return true;
2514	}
2515
2516	/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2517	/// this allows hoisting the inner GEP.
2518	static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2519	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2520	DominatorTree *DT) {
2521	auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I);
2522	if (!GEP)
2523	return false;
2524
2525	// Do not try to hoist a constant GEP out of the loop via reassociation.
2526	// Constant GEPs can often be folded into addressing modes, and reassociating
2527	// them may inhibit CSE of a common base.
2528	if (GEP->hasAllConstantIndices())
2529	return false;
2530
2531	auto *Src = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
2532	if (!Src \|\| !Src->hasOneUse() \|\| !L.contains(Inst: Src))
2533	return false;
2534
2535	Value *SrcPtr = Src->getPointerOperand();
2536	auto LoopInvariant = [&](Value V) { return* L.isLoopInvariant(V); };
2537	if (!L.isLoopInvariant(V: SrcPtr) \|\| !all_of(Range: GEP->indices(), P: LoopInvariant))
2538	return false;
2539
2540	// This can only happen if !AllowSpeculation, otherwise this would already be
2541	// handled.
2542	// FIXME: Should we respect AllowSpeculation in these reassociation folds?
2543	// The flag exists to prevent metadata dropping, which is not relevant here.
2544	if (all_of(Range: Src->indices(), P: LoopInvariant))
2545	return false;
2546
2547	// The swapped GEPs are inbounds if both original GEPs are inbounds
2548	// and the sign of the offsets is the same. For simplicity, only
2549	// handle both offsets being non-negative.
2550	const DataLayout &DL = GEP->getDataLayout();
2551	auto NonNegative = [&](Value *V) {
2552	return isKnownNonNegative(V, SQ: SimplifyQuery (DL, DT, AC, GEP));
2553	};
2554	bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2555	all_of(Range: Src->indices(), P: NonNegative) &&
2556	all_of(Range: GEP->indices(), P: NonNegative);
2557
2558	BasicBlock *Preheader = L.getLoopPreheader();
2559	IRBuilder<> Builder(Preheader->getTerminator());
2560	Value *NewSrc = Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr: SrcPtr,
2561	IdxList: SmallVector<Value *>(GEP->indices()),
2562	Name: "invariant.gep", NW: IsInBounds);
2563	Builder.SetInsertPoint(GEP);
2564	Value *NewGEP = Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: NewSrc,
2565	IdxList: SmallVector<Value *>(Src->indices()), Name: "gep",
2566	NW: IsInBounds);
2567	GEP->replaceAllUsesWith(V: NewGEP);
2568	eraseInstruction(I&: *GEP, SafetyInfo, MSSAU);
2569	salvageDebugInfo(I&: *Src);
2570	eraseInstruction(I&: *Src, SafetyInfo, MSSAU);
2571	return true;
2572	}
2573
2574	/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2575	/// C1 and C2 are loop invariants and LV is a loop-variant.
2576	static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2577	Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2578	ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2579	AssumptionCache AC, DominatorTree DT) {
2580	assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2581	assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2582
2583	bool IsSigned = ICmpInst::isSigned(Pred);
2584
2585	// Try to represent VariantLHS as sum of invariant and variant operands.
2586	using namespace PatternMatch;
2587	Value VariantOp, InvariantOp;
2588	if (IsSigned && !match(V: VariantLHS, P: m_NSWAddLike(L: m_Value(V&: VariantOp),
2589	R: m_Value(V&: InvariantOp))))
2590	return false;
2591	if (!IsSigned && !match(V: VariantLHS, P: m_NUWAddLike(L: m_Value(V&: VariantOp),
2592	R: m_Value(V&: InvariantOp))))
2593	return false;
2594
2595	// LHS itself is a loop-variant, try to represent it in the form:
2596	// "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2597	if (L.isLoopInvariant(V: VariantOp))
2598	std::swap(a&: VariantOp, b&: InvariantOp);
2599	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2600	return false;
2601
2602	// In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2603	// freely move values from left side of inequality to right side (just as in
2604	// normal linear arithmetics). Overflows make things much more complicated, so
2605	// we want to avoid this.
2606	auto &DL = L.getHeader()->getDataLayout();
2607	SimplifyQuery SQ(DL, DT, AC, &ICmp);
2608	if (IsSigned && computeOverflowForSignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2609	llvm::OverflowResult::NeverOverflows)
2610	return false;
2611	if (!IsSigned &&
2612	computeOverflowForUnsignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2613	llvm::OverflowResult::NeverOverflows)
2614	return false;
2615	auto *Preheader = L.getLoopPreheader();
2616	assert(Preheader && "Loop is not in simplify form?");
2617	IRBuilder<> Builder(Preheader->getTerminator());
2618	Value *NewCmpOp =
2619	Builder.CreateSub(LHS: InvariantRHS, RHS: InvariantOp, Name: "invariant.op",
2620	/HasNUW/ !IsSigned, /HasNSW/ IsSigned);
2621	ICmp.setPredicate(Pred);
2622	ICmp.setOperand(i_nocapture: `0`, Val_nocapture: VariantOp);
2623	ICmp.setOperand(i_nocapture: `1`, Val_nocapture: NewCmpOp);
2624
2625	Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2626	salvageDebugInfo(I&: DeadI);
2627	eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2628	return true;
2629	}
2630
2631	/// Try to reassociate and hoist the following two patterns:
2632	/// LV - C1 < C2 --> LV < C1 + C2,
2633	/// C1 - LV < C2 --> LV > C1 - C2.
2634	static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2635	Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2636	ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2637	AssumptionCache AC, DominatorTree DT) {
2638	assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2639	assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2640
2641	bool IsSigned = ICmpInst::isSigned(Pred);
2642
2643	// Try to represent VariantLHS as sum of invariant and variant operands.
2644	using namespace PatternMatch;
2645	Value VariantOp, InvariantOp;
2646	if (IsSigned &&
2647	!match(V: VariantLHS, P: m_NSWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2648	return false;
2649	if (!IsSigned &&
2650	!match(V: VariantLHS, P: m_NUWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2651	return false;
2652
2653	bool VariantSubtracted = false;
2654	// LHS itself is a loop-variant, try to represent it in the form:
2655	// "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2656	// the variant operand goes with minus, we use a slightly different scheme.
2657	if (L.isLoopInvariant(V: VariantOp)) {
2658	std::swap(a&: VariantOp, b&: InvariantOp);
2659	VariantSubtracted = true;
2660	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2661	}
2662	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2663	return false;
2664
2665	// In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2666	// freely move values from left side of inequality to right side (just as in
2667	// normal linear arithmetics). Overflows make things much more complicated, so
2668	// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2669	// "C1 - C2" does not overflow.
2670	auto &DL = L.getHeader()->getDataLayout();
2671	SimplifyQuery SQ(DL, DT, AC, &ICmp);
2672	if (VariantSubtracted && IsSigned) {
2673	// C1 - LV < C2 --> LV > C1 - C2
2674	if (computeOverflowForSignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2675	llvm::OverflowResult::NeverOverflows)
2676	return false;
2677	} else if (VariantSubtracted && !IsSigned) {
2678	// C1 - LV < C2 --> LV > C1 - C2
2679	if (computeOverflowForUnsignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2680	llvm::OverflowResult::NeverOverflows)
2681	return false;
2682	} else if (!VariantSubtracted && IsSigned) {
2683	// LV - C1 < C2 --> LV < C1 + C2
2684	if (computeOverflowForSignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2685	llvm::OverflowResult::NeverOverflows)
2686	return false;
2687	} else { // !VariantSubtracted && !IsSigned
2688	// LV - C1 < C2 --> LV < C1 + C2
2689	if (computeOverflowForUnsignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2690	llvm::OverflowResult::NeverOverflows)
2691	return false;
2692	}
2693	auto *Preheader = L.getLoopPreheader();
2694	assert(Preheader && "Loop is not in simplify form?");
2695	IRBuilder<> Builder(Preheader->getTerminator());
2696	Value *NewCmpOp =
2697	VariantSubtracted
2698	? Builder.CreateSub(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2699	/HasNUW/ !IsSigned, /HasNSW/ IsSigned)
2700	: Builder.CreateAdd(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2701	/HasNUW/ !IsSigned, /HasNSW/ IsSigned);
2702	ICmp.setPredicate(Pred);
2703	ICmp.setOperand(i_nocapture: `0`, Val_nocapture: VariantOp);
2704	ICmp.setOperand(i_nocapture: `1`, Val_nocapture: NewCmpOp);
2705
2706	Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2707	salvageDebugInfo(I&: DeadI);
2708	eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2709	return true;
2710	}
2711
2712	/// Reassociate and hoist add/sub expressions.
2713	static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2714	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2715	DominatorTree *DT) {
2716	using namespace PatternMatch;
2717	CmpPredicate Pred;
2718	Value LHS, RHS;
2719	if (!match(V: &I, P: m_ICmp(Pred, L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
2720	return false;
2721
2722	// Put variant operand to LHS position.
2723	if (L.isLoopInvariant(V: LHS)) {
2724	std::swap(a&: LHS, b&: RHS);
2725	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2726	}
2727	// We want to delete the initial operation after reassociation, so only do it
2728	// if it has no other uses.
2729	if (L.isLoopInvariant(V: LHS) \|\| !L.isLoopInvariant(V: RHS) \|\| !LHS->hasOneUse())
2730	return false;
2731
2732	// TODO: We could go with smarter context, taking common dominator of all I's
2733	// users instead of I itself.
2734	if (hoistAdd(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2735	return true;
2736
2737	if (hoistSub(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2738	return true;
2739
2740	return false;
2741	}
2742
2743	static bool isReassociableOp(Instruction I, unsigned* IntOpcode,
2744	unsigned FPOpcode) {
2745	if (I->getOpcode() == IntOpcode)
2746	return true;
2747	if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
2748	I->hasNoSignedZeros())
2749	return true;
2750	return false;
2751	}
2752
2753	/// Try to reassociate expressions like ((A1 B1) + (A2 * B2) + ...) * C where*
2754	/// A1, A2, ... and C are loop invariants into expressions like
2755	/// ((A1 C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...*
2756	/// invariant expressions. This functions returns true only if any hoisting has
2757	/// actually occurred.
2758	static bool hoistMulAddAssociation(Instruction &I, Loop &L,
2759	ICFLoopSafetyInfo &SafetyInfo,
2760	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2761	DominatorTree *DT) {
2762	if (!isReassociableOp(I: &I, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul))
2763	return false;
2764	Value *VariantOp = I.getOperand(i: `0`);
2765	Value *InvariantOp = I.getOperand(i: `1`);
2766	if (L.isLoopInvariant(V: VariantOp))
2767	std::swap(a&: VariantOp, b&: InvariantOp);
2768	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2769	return false;
2770	Value *Factor = InvariantOp;
2771
2772	// First, we need to make sure we should do the transformation.
2773	SmallVector<Use *> Changes;
2774	SmallVector<BinaryOperator *> Adds;
2775	SmallVector<BinaryOperator *> Worklist;
2776	if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(Val: VariantOp))
2777	Worklist.push_back(Elt: VariantBinOp);
2778	while (!Worklist.empty()) {
2779	BinaryOperator *BO = Worklist.pop_back_val();
2780	if (!BO->hasOneUse())
2781	return false;
2782	if (isReassociableOp(I: BO, IntOpcode: Instruction::Add, FPOpcode: Instruction::FAdd) &&
2783	isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: `0`)) &&
2784	isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: `1`))) {
2785	Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: `0`)));
2786	Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: `1`)));
2787	Adds.push_back(Elt: BO);
2788	continue;
2789	}
2790	if (!isReassociableOp(I: BO, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul) \|\|
2791	L.isLoopInvariant(V: BO))
2792	return false;
2793	Use &U0 = BO->getOperandUse(i: `0`);
2794	Use &U1 = BO->getOperandUse(i: `1`);
2795	if (L.isLoopInvariant(V: U0))
2796	Changes.push_back(Elt: &U0);
2797	else if (L.isLoopInvariant(V: U1))
2798	Changes.push_back(Elt: &U1);
2799	else
2800	return false;
2801	unsigned Limit = I.getType()->isIntOrIntVectorTy()
2802	? IntAssociationUpperLimit
2803	: FPAssociationUpperLimit;
2804	if (Changes.size() > Limit)
2805	return false;
2806	}
2807	if (Changes.empty())
2808	return false;
2809
2810	// Drop the poison flags for any adds we looked through.
2811	if (I.getType()->isIntOrIntVectorTy()) {
2812	for (auto *Add : Adds)
2813	Add->dropPoisonGeneratingFlags();
2814	}
2815
2816	// We know we should do it so let's do the transformation.
2817	auto *Preheader = L.getLoopPreheader();
2818	assert(Preheader && "Loop is not in simplify form?");
2819	IRBuilder<> Builder(Preheader->getTerminator());
2820	for (auto *U : Changes) {
2821	assert(L.isLoopInvariant(U->get()));
2822	auto *Ins = cast<BinaryOperator>(Val: U->getUser());
2823	Value *Mul;
2824	if (I.getType()->isIntOrIntVectorTy()) {
2825	Mul = Builder.CreateMul(LHS: U->get(), RHS: Factor, Name: "factor.op.mul");
2826	// Drop the poison flags on the original multiply.
2827	Ins->dropPoisonGeneratingFlags();
2828	} else
2829	Mul = Builder.CreateFMulFMF(L: U->get(), R: Factor, FMFSource: Ins, Name: "factor.op.fmul");
2830
2831	// Rewrite the reassociable instruction.
2832	unsigned OpIdx = U->getOperandNo();
2833	auto *LHS = OpIdx == `0` ? Mul : Ins->getOperand(i_nocapture: `0`);
2834	auto *RHS = OpIdx == `1` ? Mul : Ins->getOperand(i_nocapture: `1`);
2835	auto *NewBO =
2836	BinaryOperator::Create(Op: Ins->getOpcode(), S1: LHS, S2: RHS,
2837	Name: Ins->getName() + ".reass", InsertBefore: Ins->getIterator());
2838	NewBO->setDebugLoc(DebugLoc::getDropped());
2839	NewBO->copyIRFlags(V: Ins);
2840	if (VariantOp == Ins)
2841	VariantOp = NewBO;
2842	Ins->replaceAllUsesWith(V: NewBO);
2843	eraseInstruction(I&: *Ins, SafetyInfo, MSSAU);
2844	}
2845
2846	I.replaceAllUsesWith(V: VariantOp);
2847	eraseInstruction(I, SafetyInfo, MSSAU);
2848	return true;
2849	}
2850
2851	/// Reassociate associative binary expressions of the form
2852	///
2853	/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2854	/// 2. "(C1 op LV) op C2" ==> "LV op (C1 op C2)"
2855	/// 3. "C2 op (C1 op LV)" ==> "LV op (C1 op C2)"
2856	/// 4. "C2 op (LV op C1)" ==> "LV op (C1 op C2)"
2857	///
2858	/// where op is an associative BinOp, LV is a loop variant, and C1 and C2 are
2859	/// loop invariants that we want to hoist, noting that associativity implies
2860	/// commutativity.
2861	static bool hoistBOAssociation(Instruction &I, Loop &L,
2862	ICFLoopSafetyInfo &SafetyInfo,
2863	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2864	DominatorTree *DT) {
2865	auto *BO = dyn_cast<BinaryOperator>(Val: &I);
2866	if (!BO \|\| !BO->isAssociative())
2867	return false;
2868
2869	Instruction::BinaryOps Opcode = BO->getOpcode();
2870	bool LVInRHS = L.isLoopInvariant(V: BO->getOperand(i_nocapture: `0`));
2871	auto *BO0 = dyn_cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: LVInRHS));
2872	if (!BO0 \|\| BO0->getOpcode() != Opcode \|\| !BO0->isAssociative() \|\|
2873	BO0->hasNUsesOrMore(N: BO0->getType()->isIntegerTy() ? `2` : `3`))
2874	return false;
2875
2876	Value *LV = BO0->getOperand(i_nocapture: `0`);
2877	Value *C1 = BO0->getOperand(i_nocapture: `1`);
2878	Value *C2 = BO->getOperand(i_nocapture: !LVInRHS);
2879
2880	assert(BO->isCommutative() && BO0->isCommutative() &&
2881	"Associativity implies commutativity");
2882	if (L.isLoopInvariant(V: LV) && !L.isLoopInvariant(V: C1))
2883	std::swap(a&: LV, b&: C1);
2884	if (L.isLoopInvariant(V: LV) \|\| !L.isLoopInvariant(V: C1) \|\| !L.isLoopInvariant(V: C2))
2885	return false;
2886
2887	auto *Preheader = L.getLoopPreheader();
2888	assert(Preheader && "Loop is not in simplify form?");
2889
2890	IRBuilder<> Builder(Preheader->getTerminator());
2891	auto *Inv = Builder.CreateBinOp(Opc: Opcode, LHS: C1, RHS: C2, Name: "invariant.op");
2892
2893	auto *NewBO = BinaryOperator::Create(
2894	Op: Opcode, S1: LV, S2: Inv, Name: BO->getName() + ".reass", InsertBefore: BO->getIterator());
2895	NewBO->setDebugLoc(DebugLoc::getDropped());
2896
2897	if (Opcode == Instruction::FAdd \|\| Opcode == Instruction::FMul) {
2898	// Intersect FMF flags for FADD and FMUL.
2899	FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
2900	if (auto *I = dyn_cast<Instruction>(Val: Inv))
2901	I->setFastMathFlags(Intersect);
2902	NewBO->setFastMathFlags(Intersect);
2903	} else {
2904	OverflowTracking Flags;
2905	Flags.AllKnownNonNegative = false;
2906	Flags.AllKnownNonZero = false;
2907	Flags.mergeFlags(I&: *BO);
2908	Flags.mergeFlags(I&: *BO0);
2909	// If `Inv` was not constant-folded, a new Instruction has been created.
2910	if (auto *I = dyn_cast<Instruction>(Val: Inv))
2911	Flags.applyFlags(I&: *I);
2912	Flags.applyFlags(I&: *NewBO);
2913	}
2914
2915	BO->replaceAllUsesWith(V: NewBO);
2916	eraseInstruction(I&: *BO, SafetyInfo, MSSAU);
2917
2918	// (LV op C1) might not be erased if it has more uses than the one we just
2919	// replaced.
2920	if (BO0->use_empty()) {
2921	salvageDebugInfo(I&: *BO0);
2922	eraseInstruction(I&: *BO0, SafetyInfo, MSSAU);
2923	}
2924
2925	return true;
2926	}
2927
2928	static bool hoistArithmetics(Instruction &I, Loop &L,
2929	ICFLoopSafetyInfo &SafetyInfo,
2930	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2931	DominatorTree *DT) {
2932	// Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2933	// into (x < min(INV1, INV2)), and hoisting the invariant part of this
2934	// expression out of the loop.
2935	if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2936	++NumHoisted;
2937	++NumMinMaxHoisted;
2938	return true;
2939	}
2940
2941	// Try to hoist GEPs by reassociation.
2942	if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2943	++NumHoisted;
2944	++NumGEPsHoisted;
2945	return true;
2946	}
2947
2948	// Try to hoist add/sub's by reassociation.
2949	if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2950	++NumHoisted;
2951	++NumAddSubHoisted;
2952	return true;
2953	}
2954
2955	bool IsInt = I.getType()->isIntOrIntVectorTy();
2956	if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2957	++NumHoisted;
2958	if (IsInt)
2959	++NumIntAssociationsHoisted;
2960	else
2961	++NumFPAssociationsHoisted;
2962	return true;
2963	}
2964
2965	if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2966	++NumHoisted;
2967	++NumBOAssociationsHoisted;
2968	return true;
2969	}
2970
2971	return false;
2972	}
2973
2974	/// Little predicate that returns true if the specified basic block is in
2975	/// a subloop of the current one, not the current one itself.
2976	///
2977	static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI) {
2978	assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2979	return LI->getLoopFor(BB) != CurLoop;
2980	}
2981

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LICM.cpp