LICM.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LICM.cpp]

1	//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass performs loop invariant code motion, attempting to remove as much
10	// code from the body of a loop as possible. It does this by either hoisting
11	// code into the preheader block, or by sinking code to the exit blocks if it is
12	// safe. This pass also promotes must-aliased memory locations in the loop to
13	// live in registers, thus hoisting and sinking "invariant" loads and stores.
14	//
15	// Hoisting operations out of loops is a canonicalization transform. It
16	// enables and simplifies subsequent optimizations in the middle-end.
17	// Rematerialization of hoisted instructions to reduce register pressure is the
18	// responsibility of the back-end, which has more accurate information about
19	// register pressure and also handles other optimizations than LICM that
20	// increase live-ranges.
21	//
22	// This pass uses alias analysis for two purposes:
23	//
24	// 1. Moving loop invariant loads and calls out of loops. If we can determine
25	// that a load or call inside of a loop never aliases anything stored to,
26	// we can hoist it or sink it like any other instruction.
27	// 2. Scalar Promotion of Memory - If there is a store instruction inside of
28	// the loop, we try to move the store to happen AFTER the loop instead of
29	// inside of the loop. This can only happen if a few conditions are true:
30	// A. The pointer stored through is loop invariant
31	// B. There are no stores or loads in the loop which _may_ alias the
32	// pointer. There are no calls in the loop which mod/ref the pointer.
33	// If these conditions are true, we can promote the loads and stores in the
34	// loop of the pointer to use a temporary alloca'd variable. We then use
35	// the SSAUpdater to construct the appropriate SSA form for the value.
36	//
37	//===----------------------------------------------------------------------===//
38
39	#include "llvm/Transforms/Scalar/LICM.h"
40	#include "llvm/ADT/PriorityWorklist.h"
41	#include "llvm/ADT/SetOperations.h"
42	#include "llvm/ADT/Statistic.h"
43	#include "llvm/Analysis/AliasAnalysis.h"
44	#include "llvm/Analysis/AliasSetTracker.h"
45	#include "llvm/Analysis/AssumptionCache.h"
46	#include "llvm/Analysis/CaptureTracking.h"
47	#include "llvm/Analysis/DomTreeUpdater.h"
48	#include "llvm/Analysis/GuardUtils.h"
49	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
50	#include "llvm/Analysis/Loads.h"
51	#include "llvm/Analysis/LoopInfo.h"
52	#include "llvm/Analysis/LoopIterator.h"
53	#include "llvm/Analysis/LoopNestAnalysis.h"
54	#include "llvm/Analysis/LoopPass.h"
55	#include "llvm/Analysis/MemorySSA.h"
56	#include "llvm/Analysis/MemorySSAUpdater.h"
57	#include "llvm/Analysis/MustExecute.h"
58	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59	#include "llvm/Analysis/ScalarEvolution.h"
60	#include "llvm/Analysis/TargetLibraryInfo.h"
61	#include "llvm/Analysis/TargetTransformInfo.h"
62	#include "llvm/Analysis/ValueTracking.h"
63	#include "llvm/IR/CFG.h"
64	#include "llvm/IR/Constants.h"
65	#include "llvm/IR/DataLayout.h"
66	#include "llvm/IR/DebugInfoMetadata.h"
67	#include "llvm/IR/DerivedTypes.h"
68	#include "llvm/IR/Dominators.h"
69	#include "llvm/IR/IRBuilder.h"
70	#include "llvm/IR/Instructions.h"
71	#include "llvm/IR/IntrinsicInst.h"
72	#include "llvm/IR/LLVMContext.h"
73	#include "llvm/IR/Metadata.h"
74	#include "llvm/IR/PatternMatch.h"
75	#include "llvm/IR/PredIteratorCache.h"
76	#include "llvm/InitializePasses.h"
77	#include "llvm/Support/CommandLine.h"
78	#include "llvm/Support/Debug.h"
79	#include "llvm/Support/raw_ostream.h"
80	#include "llvm/Transforms/Scalar.h"
81	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83	#include "llvm/Transforms/Utils/Local.h"
84	#include "llvm/Transforms/Utils/LoopUtils.h"
85	#include "llvm/Transforms/Utils/SSAUpdater.h"
86	#include <algorithm>
87	#include <utility>
88	using namespace llvm;
89
90	namespace llvm {
91	class LPMUpdater;
92	} // namespace llvm
93
94	#define DEBUG_TYPE "licm"
95
96	STATISTIC(NumCreatedBlocks, "Number of blocks created");
97	STATISTIC(NumClonedBranches, "Number of branches cloned");
98	STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99	STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100	STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101	STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102	STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103	STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104	STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105	STATISTIC(NumMinMaxHoisted,
106	"Number of min/max expressions hoisted out of the loop");
107	STATISTIC(NumGEPsHoisted,
108	"Number of geps reassociated and hoisted out of the loop");
109	STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110	"and hoisted out of the loop");
111	STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112	"reassociated and hoisted out of the loop");
113	STATISTIC(NumIntAssociationsHoisted,
114	"Number of invariant int expressions "
115	"reassociated and hoisted out of the loop");
116	STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
117	"reassociated and hoisted out of the loop");
118
119	/// Memory promotion is enabled by default.
120	static cl::opt<bool>
121	DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(Val: false),
122	cl::desc ("Disable memory promotion in LICM pass"));
123
124	static cl::opt<bool> ControlFlowHoisting(
125	"licm-control-flow-hoisting", cl::Hidden, cl::init(Val: false),
126	cl::desc ("Enable control flow (and PHI) hoisting in LICM"));
127
128	static cl::opt<bool>
129	SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(Val: false),
130	cl::desc ("Force thread model single in LICM pass"));
131
132	static cl::opt<uint32_t> MaxNumUsesTraversed(
133	"licm-max-num-uses-traversed", cl::Hidden, cl::init(Val: `8`),
134	cl::desc ("Max num uses visited for identifying load "
135	"invariance in loop using invariant start (default = 8)"));
136
137	static cl::opt<unsigned> FPAssociationUpperLimit(
138	"licm-max-num-fp-reassociations", cl::init(Val: `5U`), cl::Hidden,
139	cl::desc (
140	"Set upper limit for the number of transformations performed "
141	"during a single round of hoisting the reassociated expressions."));
142
143	static cl::opt<unsigned> IntAssociationUpperLimit(
144	"licm-max-num-int-reassociations", cl::init(Val: `5U`), cl::Hidden,
145	cl::desc (
146	"Set upper limit for the number of transformations performed "
147	"during a single round of hoisting the reassociated expressions."));
148
149	// Experimental option to allow imprecision in LICM in pathological cases, in
150	// exchange for faster compile. This is to be removed if MemorySSA starts to
151	// address the same issue. LICM calls MemorySSAWalker's
152	// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
153	// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
154	// which may not be precise, since optimizeUses is capped. The result is
155	// correct, but we may not get as "far up" as possible to get which access is
156	// clobbering the one queried.
157	cl::opt<unsigned> llvm::SetLicmMssaOptCap(
158	"licm-mssa-optimization-cap", cl::init(Val: `100`), cl::Hidden,
159	cl::desc ("Enable imprecision in LICM in pathological cases, in exchange "
160	"for faster compile. Caps the MemorySSA clobbering calls."));
161
162	// Experimentally, memory promotion carries less importance than sinking and
163	// hoisting. Limit when we do promotion when using MemorySSA, in order to save
164	// compile time.
165	cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
166	"licm-mssa-max-acc-promotion", cl::init(Val: `250`), cl::Hidden,
167	cl::desc ("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
168	"effect. When MSSA in LICM is enabled, then this is the maximum "
169	"number of accesses allowed to be present in a loop in order to "
170	"enable memory promotion."));
171
172	static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);
173	static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
174	const LoopSafetyInfo *SafetyInfo,
175	TargetTransformInfo *TTI,
176	bool &FoldableInLoop, bool LoopNestMode);
177	static void hoist(Instruction &I, const DominatorTree DT, const* Loop *CurLoop,
178	BasicBlock Dest, ICFLoopSafetyInfo SafetyInfo,
179	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
180	OptimizationRemarkEmitter *ORE);
181	static bool sink(Instruction &I, LoopInfo LI, DominatorTree DT,
182	const Loop CurLoop, ICFLoopSafetyInfo SafetyInfo,
183	MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
184	static bool isSafeToExecuteUnconditionally(
185	Instruction &Inst, const DominatorTree DT, const* TargetLibraryInfo *TLI,
186	const Loop CurLoop, const* LoopSafetyInfo *SafetyInfo,
187	OptimizationRemarkEmitter ORE, const* Instruction *CtxI,
188	AssumptionCache AC, bool* AllowSpeculation);
189	static bool noConflictingReadWrites(Instruction I, MemorySSA MSSA,
190	AAResults AA, Loop CurLoop,
191	SinkAndHoistLICMFlags &Flags);
192	static bool pointerInvalidatedByLoop(MemorySSA MSSA, MemoryUse MU,
193	Loop *CurLoop, Instruction &I,
194	SinkAndHoistLICMFlags &Flags,
195	bool InvariantGroup);
196	static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
197	MemoryUse &MU);
198	/// Aggregates various functions for hoisting computations out of loop.
199	static bool hoistArithmetics(Instruction &I, Loop &L,
200	ICFLoopSafetyInfo &SafetyInfo,
201	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
202	DominatorTree *DT);
203	static Instruction *cloneInstructionInExitBlock(
204	Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
205	const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
206
207	static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
208	MemorySSAUpdater &MSSAU);
209
210	static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
211	ICFLoopSafetyInfo &SafetyInfo,
212	MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
213
214	static void foreachMemoryAccess(MemorySSA MSSA, Loop L,
215	function_ref<void(Instruction *)> Fn);
216	using PointersAndHasReadsOutsideSet =
217	std::pair<SmallSetVector<Value , `8`>, bool*>;
218	static SmallVector<PointersAndHasReadsOutsideSet, `0`>
219	collectPromotionCandidates(MemorySSA MSSA, AliasAnalysis AA, Loop *L);
220
221	namespace {
222	struct LoopInvariantCodeMotion {
223	bool runOnLoop(Loop L, AAResults AA, LoopInfo LI, DominatorTree DT,
224	AssumptionCache AC, TargetLibraryInfo TLI,
225	TargetTransformInfo TTI, ScalarEvolution SE, MemorySSA *MSSA,
226	OptimizationRemarkEmitter ORE, bool* LoopNestMode = false);
227
228	LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
229	unsigned LicmMssaNoAccForPromotionCap,
230	bool LicmAllowSpeculation)
231	: LicmMssaOptCap(LicmMssaOptCap),
232	LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
233	LicmAllowSpeculation(LicmAllowSpeculation) {}
234
235	private:
236	unsigned LicmMssaOptCap;
237	unsigned LicmMssaNoAccForPromotionCap;
238	bool LicmAllowSpeculation;
239	};
240
241	struct LegacyLICMPass : public LoopPass {
242	static char ID; // Pass identification, replacement for typeid
243	LegacyLICMPass(
244	unsigned LicmMssaOptCap = SetLicmMssaOptCap,
245	unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
246	bool LicmAllowSpeculation = true)
247	: LoopPass (ID), LICM (LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
248	LicmAllowSpeculation) {
249	initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
250	}
251
252	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
253	if (skipLoop(L))
254	return false;
255
256	LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
257	<< L->getHeader()->getNameOrAsOperand() << "\n");
258
259	Function *F = L->getHeader()->getParent();
260
261	auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
262	MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
263	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
264	// pass. Function analyses need to be preserved across loop transformations
265	// but ORE cannot be preserved (see comment before the pass definition).
266	OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
267	return LICM.runOnLoop(
268	L, AA: &getAnalysis<AAResultsWrapperPass>().getAAResults(),
269	LI: &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
270	DT: &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
271	AC: &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F&: *F),
272	TLI: &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F: *F),
273	TTI: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: *F),
274	SE: SE ? &SE->getSE() : nullptr, MSSA, ORE: &ORE);
275	}
276
277	/// This transformation requires natural loop information & requires that
278	/// loop preheaders be inserted into the CFG...
279	///
280	void getAnalysisUsage(AnalysisUsage &AU) const override {
281	AU.addPreserved<DominatorTreeWrapperPass>();
282	AU.addPreserved<LoopInfoWrapperPass>();
283	AU.addRequired<TargetLibraryInfoWrapperPass>();
284	AU.addRequired<MemorySSAWrapperPass>();
285	AU.addPreserved<MemorySSAWrapperPass>();
286	AU.addRequired<TargetTransformInfoWrapperPass>();
287	AU.addRequired<AssumptionCacheTracker>();
288	getLoopAnalysisUsage(AU);
289	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
290	AU.addPreserved<LazyBlockFrequencyInfoPass>();
291	AU.addPreserved<LazyBranchProbabilityInfoPass>();
292	}
293
294	private:
295	LoopInvariantCodeMotion LICM;
296	};
297	} // namespace
298
299	PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
300	LoopStandardAnalysisResults &AR, LPMUpdater &) {
301	if (!AR.MSSA)
302	reportFatalUsageError(reason: "LICM requires MemorySSA (loop-mssa)");
303
304	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
305	// pass. Function analyses need to be preserved across loop transformations
306	// but ORE cannot be preserved (see comment before the pass definition).
307	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
308
309	LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
310	Opts.AllowSpeculation);
311	if (!LICM.runOnLoop(L: &L, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC, TLI: &AR.TLI, TTI: &AR.TTI,
312	SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE))
313	return PreservedAnalyses::all();
314
315	auto PA = getLoopPassPreservedAnalyses();
316	PA.preserve<MemorySSAAnalysis>();
317
318	return PA;
319	}
320
321	void LICMPass::printPipeline(
322	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
323	static_cast<PassInfoMixin<LICMPass> >(this*)->printPipeline(
324	OS, MapClassName2PassName);
325
326	OS << `'<'`;
327	OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
328	OS << `'>'`;
329	}
330
331	PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
332	LoopStandardAnalysisResults &AR,
333	LPMUpdater &) {
334	if (!AR.MSSA)
335	reportFatalUsageError(reason: "LNICM requires MemorySSA (loop-mssa)");
336
337	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
338	// pass. Function analyses need to be preserved across loop transformations
339	// but ORE cannot be preserved (see comment before the pass definition).
340	OptimizationRemarkEmitter ORE(LN.getParent());
341
342	LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
343	Opts.AllowSpeculation);
344
345	Loop &OutermostLoop = LN.getOutermostLoop();
346	bool Changed = LICM.runOnLoop(L: &OutermostLoop, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC,
347	TLI: &AR.TLI, TTI: &AR.TTI, SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE, LoopNestMode: true);
348
349	if (!Changed)
350	return PreservedAnalyses::all();
351
352	auto PA = getLoopPassPreservedAnalyses();
353
354	PA.preserve<DominatorTreeAnalysis>();
355	PA.preserve<LoopAnalysis>();
356	PA.preserve<MemorySSAAnalysis>();
357
358	return PA;
359	}
360
361	void LNICMPass::printPipeline(
362	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
363	static_cast<PassInfoMixin<LNICMPass> >(this*)->printPipeline(
364	OS, MapClassName2PassName);
365
366	OS << `'<'`;
367	OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
368	OS << `'>'`;
369	}
370
371	char LegacyLICMPass::ID = `0`;
372	INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
373	false, false)
374	INITIALIZE_PASS_DEPENDENCY(LoopPass)
375	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
376	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
377	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
378	INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
379	INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
380	false)
381
382	Pass llvm::createLICMPass() { return* new LegacyLICMPass (); }
383
384	llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
385	MemorySSA &MSSA)
386	: SinkAndHoistLICMFlags (SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
387	IsSink, L, MSSA) {}
388
389	llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
390	unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
391	Loop &L, MemorySSA &MSSA)
392	: LicmMssaOptCap(LicmMssaOptCap),
393	LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
394	IsSink(IsSink) {
395	unsigned AccessCapCount = `0`;
396	for (auto *BB : L.getBlocks())
397	if (const auto *Accesses = MSSA.getBlockAccesses(BB))
398	for (const auto &MA : *Accesses) {
399	(void)MA;
400	++AccessCapCount;
401	if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
402	NoOfMemAccTooLarge = true;
403	return;
404	}
405	}
406	}
407
408	/// Hoist expressions out of the specified loop. Note, alias info for inner
409	/// loop is not preserved so it is not a good idea to run LICM multiple
410	/// times on one loop.
411	bool LoopInvariantCodeMotion::runOnLoop(Loop L, AAResults AA, LoopInfo *LI,
412	DominatorTree DT, AssumptionCache AC,
413	TargetLibraryInfo *TLI,
414	TargetTransformInfo *TTI,
415	ScalarEvolution SE, MemorySSA MSSA,
416	OptimizationRemarkEmitter *ORE,
417	bool LoopNestMode) {
418	bool Changed = false;
419
420	assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
421
422	// If this loop has metadata indicating that LICM is not to be performed then
423	// just exit.
424	if (hasDisableLICMTransformsHint(L)) {
425	return false;
426	}
427
428	// Don't sink stores from loops with coroutine suspend instructions.
429	// LICM would sink instructions into the default destination of
430	// the coroutine switch. The default destination of the switch is to
431	// handle the case where the coroutine is suspended, by which point the
432	// coroutine frame may have been destroyed. No instruction can be sunk there.
433	// FIXME: This would unfortunately hurt the performance of coroutines, however
434	// there is currently no general solution for this. Similar issues could also
435	// potentially happen in other passes where instructions are being moved
436	// across that edge.
437	bool HasCoroSuspendInst = llvm::any_of(Range: L->getBlocks(), P: [](BasicBlock *BB) {
438	return llvm::any_of(Range&: *BB, P: [](Instruction &I) {
439	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I);
440	return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
441	});
442	});
443
444	MemorySSAUpdater MSSAU(MSSA);
445	SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
446	/IsSink=/true, L, MSSA);
447
448	// Get the preheader block to move instructions into...
449	BasicBlock *Preheader = L->getLoopPreheader();
450
451	// Compute loop safety information.
452	ICFLoopSafetyInfo SafetyInfo;
453	SafetyInfo.computeLoopSafetyInfo(CurLoop: L);
454
455	// We want to visit all of the instructions in this loop... that are not parts
456	// of our subloops (they have already had their invariants hoisted out of
457	// their loop, into this loop, so there is no need to process the BODIES of
458	// the subloops).
459	//
460	// Traverse the body of the loop in depth first order on the dominator tree so
461	// that we are guaranteed to see definitions before we see uses. This allows
462	// us to sink instructions in one pass, without iteration. After sinking
463	// instructions, we perform another pass to hoist them out of the loop.
464	if (L->hasDedicatedExits())
465	Changed \|=
466	LoopNestMode
467	? sinkRegionForLoopNest(DT->getNode(BB: L->getHeader()), AA, LI, DT,
468	TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
469	: sinkRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
470	MSSAU, &SafetyInfo, Flags, ORE);
471	Flags.setIsSink(false);
472	if (Preheader)
473	Changed \|= hoistRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, AC, TLI, L,
474	MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
475	AllowSpeculation: LicmAllowSpeculation);
476
477	// Now that all loop invariants have been removed from the loop, promote any
478	// memory references to scalars that we can.
479	// Don't sink stores from loops without dedicated block exits. Exits
480	// containing indirect branches are not transformed by loop simplify,
481	// make sure we catch that. An additional load may be generated in the
482	// preheader for SSA updater, so also avoid sinking when no preheader
483	// is available.
484	if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
485	!Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
486	// Figure out the loop exits and their insertion points
487	SmallVector<BasicBlock *, `8`> ExitBlocks;
488	L->getUniqueExitBlocks(ExitBlocks);
489
490	// We can't insert into a catchswitch.
491	bool HasCatchSwitch = llvm::any_of(Range&: ExitBlocks, P: [](BasicBlock *Exit) {
492	return isa<CatchSwitchInst>(Val: Exit->getTerminator());
493	});
494
495	if (!HasCatchSwitch) {
496	SmallVector<BasicBlock::iterator, `8`> InsertPts;
497	SmallVector<MemoryAccess *, `8`> MSSAInsertPts;
498	InsertPts.reserve(N: ExitBlocks.size());
499	MSSAInsertPts.reserve(N: ExitBlocks.size());
500	for (BasicBlock *ExitBlock : ExitBlocks) {
501	InsertPts.push_back(Elt: ExitBlock->getFirstInsertionPt());
502	MSSAInsertPts.push_back(Elt: nullptr);
503	}
504
505	PredIteratorCache PIC;
506
507	// Promoting one set of accesses may make the pointers for another set
508	// loop invariant, so run this in a loop.
509	bool Promoted = false;
510	bool LocalPromoted;
511	do {
512	LocalPromoted = false;
513	for (auto [PointerMustAliases, HasReadsOutsideSet] :
514	collectPromotionCandidates(MSSA, AA, L)) {
515	LocalPromoted \|= promoteLoopAccessesToScalars(
516	PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
517	DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
518	AllowSpeculation: LicmAllowSpeculation, HasReadsOutsideSet);
519	}
520	Promoted \|= LocalPromoted;
521	} while (LocalPromoted);
522
523	// Once we have promoted values across the loop body we have to
524	// recursively reform LCSSA as any nested loop may now have values defined
525	// within the loop used in the outer loop.
526	// FIXME: This is really heavy handed. It would be a bit better to use an
527	// SSAUpdater strategy during promotion that was LCSSA aware and reformed
528	// it as it went.
529	if (Promoted)
530	formLCSSARecursively(L&: L, DT: DT, LI, SE);
531
532	Changed \|= Promoted;
533	}
534	}
535
536	// Check that neither this loop nor its parent have had LCSSA broken. LICM is
537	// specifically moving instructions across the loop boundary and so it is
538	// especially in need of basic functional correctness checking here.
539	assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
540	assert((L->isOutermost() \|\| L->getParentLoop()->isLCSSAForm(*DT)) &&
541	"Parent loop not left in LCSSA form after LICM!");
542
543	if (VerifyMemorySSA)
544	MSSA->verifyMemorySSA();
545
546	if (Changed && SE)
547	SE->forgetLoopDispositions();
548	return Changed;
549	}
550
551	/// Walk the specified region of the CFG (defined by all blocks dominated by
552	/// the specified block, and that are in the current loop) in reverse depth
553	/// first order w.r.t the DominatorTree. This allows us to visit uses before
554	/// definitions, allowing us to sink a loop body in one pass without iteration.
555	///
556	bool llvm::sinkRegion(DomTreeNode N, AAResults AA, LoopInfo *LI,
557	DominatorTree DT, TargetLibraryInfo TLI,
558	TargetTransformInfo TTI, Loop CurLoop,
559	MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
560	SinkAndHoistLICMFlags &Flags,
561	OptimizationRemarkEmitter ORE, Loop OutermostLoop) {
562
563	// Verify inputs.
564	assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
565	CurLoop != nullptr && SafetyInfo != nullptr &&
566	"Unexpected input to sinkRegion.");
567
568	// We want to visit children before parents. We will enqueue all the parents
569	// before their children in the worklist and process the worklist in reverse
570	// order.
571	SmallVector<BasicBlock *, `16`> Worklist =
572	collectChildrenInLoop(DT, N, CurLoop);
573
574	bool Changed = false;
575	for (BasicBlock *BB : reverse(C&: Worklist)) {
576	// subloop (which would already have been processed).
577	if (inSubLoop(BB, CurLoop, LI))
578	continue;
579
580	for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
581	Instruction &I = *--II;
582
583	// The instruction is not used in the loop if it is dead. In this case,
584	// we just delete it instead of sinking it.
585	if (isInstructionTriviallyDead(I: &I, TLI)) {
586	LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << `'\n'`);
587	salvageKnowledge(I: &I);
588	salvageDebugInfo(I);
589	++II;
590	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
591	Changed = true;
592	continue;
593	}
594
595	// Check to see if we can sink this instruction to the exit blocks
596	// of the loop. We can do this if the all users of the instruction are
597	// outside of the loop. In this case, it doesn't even matter if the
598	// operands of the instruction are loop invariant.
599	//
600	bool FoldableInLoop = false;
601	bool LoopNestMode = OutermostLoop != nullptr;
602	if (!I.mayHaveSideEffects() &&
603	isNotUsedOrFoldableInLoop(I, CurLoop: LoopNestMode ? OutermostLoop : CurLoop,
604	SafetyInfo, TTI, FoldableInLoop,
605	LoopNestMode) &&
606	canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE)) {
607	if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
608	if (!FoldableInLoop) {
609	++II;
610	salvageDebugInfo(I);
611	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
612	}
613	Changed = true;
614	}
615	}
616	}
617	}
618	if (VerifyMemorySSA)
619	MSSAU.getMemorySSA()->verifyMemorySSA();
620	return Changed;
621	}
622
623	bool llvm::sinkRegionForLoopNest(DomTreeNode N, AAResults AA, LoopInfo *LI,
624	DominatorTree DT, TargetLibraryInfo TLI,
625	TargetTransformInfo TTI, Loop CurLoop,
626	MemorySSAUpdater &MSSAU,
627	ICFLoopSafetyInfo *SafetyInfo,
628	SinkAndHoistLICMFlags &Flags,
629	OptimizationRemarkEmitter *ORE) {
630
631	bool Changed = false;
632	SmallPriorityWorklist<Loop *, `4`> Worklist;
633	Worklist.insert(X: CurLoop);
634	appendLoopsToWorklist(*CurLoop, Worklist);
635	while (!Worklist.empty()) {
636	Loop *L = Worklist.pop_back_val();
637	Changed \|= sinkRegion(N: DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
638	MSSAU, SafetyInfo, Flags, ORE, OutermostLoop: CurLoop);
639	}
640	return Changed;
641	}
642
643	namespace {
644	// This is a helper class for hoistRegion to make it able to hoist control flow
645	// in order to be able to hoist phis. The way this works is that we initially
646	// start hoisting to the loop preheader, and when we see a loop invariant branch
647	// we make note of this. When we then come to hoist an instruction that's
648	// conditional on such a branch we duplicate the branch and the relevant control
649	// flow, then hoist the instruction into the block corresponding to its original
650	// block in the duplicated control flow.
651	class ControlFlowHoister {
652	private:
653	// Information about the loop we are hoisting from
654	LoopInfo *LI;
655	DominatorTree *DT;
656	Loop *CurLoop;
657	MemorySSAUpdater &MSSAU;
658
659	// A map of blocks in the loop to the block their instructions will be hoisted
660	// to.
661	DenseMap<BasicBlock , BasicBlock > HoistDestinationMap;
662
663	// The branches that we can hoist, mapped to the block that marks a
664	// convergence point of their control flow.
665	DenseMap<BranchInst , BasicBlock > HoistableBranches;
666
667	public:
668	ControlFlowHoister(LoopInfo LI, DominatorTree DT, Loop *CurLoop,
669	MemorySSAUpdater &MSSAU)
670	: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
671
672	void registerPossiblyHoistableBranch(BranchInst *BI) {
673	// We can only hoist conditional branches with loop invariant operands.
674	if (!ControlFlowHoisting \|\| !BI->isConditional() \|\|
675	!CurLoop->hasLoopInvariantOperands(I: BI))
676	return;
677
678	// The branch destinations need to be in the loop, and we don't gain
679	// anything by duplicating conditional branches with duplicate successors,
680	// as it's essentially the same as an unconditional branch.
681	BasicBlock *TrueDest = BI->getSuccessor(i: `0`);
682	BasicBlock *FalseDest = BI->getSuccessor(i: `1`);
683	if (!CurLoop->contains(BB: TrueDest) \|\| !CurLoop->contains(BB: FalseDest) \|\|
684	TrueDest == FalseDest)
685	return;
686
687	// We can hoist BI if one branch destination is the successor of the other,
688	// or both have common successor which we check by seeing if the
689	// intersection of their successors is non-empty.
690	// TODO: This could be expanded to allowing branches where both ends
691	// eventually converge to a single block.
692	SmallPtrSet<BasicBlock *, `4`> TrueDestSucc(llvm::from_range,
693	successors(BB: TrueDest));
694	SmallPtrSet<BasicBlock *, `4`> FalseDestSucc(llvm::from_range,
695	successors(BB: FalseDest));
696	BasicBlock CommonSucc = nullptr*;
697	if (TrueDestSucc.count(Ptr: FalseDest)) {
698	CommonSucc = FalseDest;
699	} else if (FalseDestSucc.count(Ptr: TrueDest)) {
700	CommonSucc = TrueDest;
701	} else {
702	set_intersect(S1&: TrueDestSucc, S2: FalseDestSucc);
703	// If there's one common successor use that.
704	if (TrueDestSucc.size() == `1`)
705	CommonSucc = *TrueDestSucc.begin();
706	// If there's more than one pick whichever appears first in the block list
707	// (we can't use the value returned by TrueDestSucc.begin() as it's
708	// unpredicatable which element gets returned).
709	else if (!TrueDestSucc.empty()) {
710	Function *F = TrueDest->getParent();
711	auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(Ptr: &BB); };
712	auto It = llvm::find_if(Range&: *F, P: IsSucc);
713	assert(It != F->end() && "Could not find successor in function");
714	CommonSucc = &*It;
715	}
716	}
717	// The common successor has to be dominated by the branch, as otherwise
718	// there will be some other path to the successor that will not be
719	// controlled by this branch so any phi we hoist would be controlled by the
720	// wrong condition. This also takes care of avoiding hoisting of loop back
721	// edges.
722	// TODO: In some cases this could be relaxed if the successor is dominated
723	// by another block that's been hoisted and we can guarantee that the
724	// control flow has been replicated exactly.
725	if (CommonSucc && DT->dominates(Def: BI, BB: CommonSucc))
726	HoistableBranches [BI] = CommonSucc;
727	}
728
729	bool canHoistPHI(PHINode *PN) {
730	// The phi must have loop invariant operands.
731	if (!ControlFlowHoisting \|\| !CurLoop->hasLoopInvariantOperands(I: PN))
732	return false;
733	// We can hoist phis if the block they are in is the target of hoistable
734	// branches which cover all of the predecessors of the block.
735	BasicBlock *BB = PN->getParent();
736	SmallPtrSet<BasicBlock *, `8`> PredecessorBlocks(llvm::from_range,
737	predecessors(BB));
738	// If we have less predecessor blocks than predecessors then the phi will
739	// have more than one incoming value for the same block which we can't
740	// handle.
741	// TODO: This could be handled be erasing some of the duplicate incoming
742	// values.
743	if (PredecessorBlocks.size() != pred_size(BB))
744	return false;
745	for (auto &Pair : HoistableBranches) {
746	if (Pair.second == BB) {
747	// Which blocks are predecessors via this branch depends on if the
748	// branch is triangle-like or diamond-like.
749	if (Pair.first->getSuccessor(i: `0`) == BB) {
750	PredecessorBlocks.erase(Ptr: Pair.first->getParent());
751	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `1`));
752	} else if (Pair.first->getSuccessor(i: `1`) == BB) {
753	PredecessorBlocks.erase(Ptr: Pair.first->getParent());
754	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `0`));
755	} else {
756	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `0`));
757	PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: `1`));
758	}
759	}
760	}
761	// PredecessorBlocks will now be empty if for every predecessor of BB we
762	// found a hoistable branch source.
763	return PredecessorBlocks.empty();
764	}
765
766	BasicBlock getOrCreateHoistedBlock(BasicBlock BB) {
767	if (!ControlFlowHoisting)
768	return CurLoop->getLoopPreheader();
769	// If BB has already been hoisted, return that
770	if (auto It = HoistDestinationMap.find(Val: BB); It != HoistDestinationMap.end())
771	return It ->second;
772
773	// Check if this block is conditional based on a pending branch
774	auto HasBBAsSuccessor =
775	[&](DenseMap<BranchInst , BasicBlock >::value_type &Pair) {
776	return BB != Pair.second && (Pair.first->getSuccessor(i: `0`) == BB \|\|
777	Pair.first->getSuccessor(i: `1`) == BB);
778	};
779	auto It = llvm::find_if(Range&: HoistableBranches, P: HasBBAsSuccessor);
780
781	// If not involved in a pending branch, hoist to preheader
782	BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
783	if (It == HoistableBranches.end()) {
784	LLVM_DEBUG(dbgs() << "LICM using "
785	<< InitialPreheader->getNameOrAsOperand()
786	<< " as hoist destination for "
787	<< BB->getNameOrAsOperand() << "\n");
788	HoistDestinationMap [BB] = InitialPreheader;
789	return InitialPreheader;
790	}
791	BranchInst *BI = It ->first;
792	assert(std::none_of(std::next(It), HoistableBranches.end(),
793	HasBBAsSuccessor) &&
794	"BB is expected to be the target of at most one branch");
795
796	LLVMContext &C = BB->getContext();
797	BasicBlock *TrueDest = BI->getSuccessor(i: `0`);
798	BasicBlock *FalseDest = BI->getSuccessor(i: `1`);
799	BasicBlock *CommonSucc = HoistableBranches [BI];
800	BasicBlock *HoistTarget = getOrCreateHoistedBlock(BB: BI->getParent());
801
802	// Create hoisted versions of blocks that currently don't have them
803	auto CreateHoistedBlock = [&](BasicBlock *Orig) {
804	auto [It, Inserted] = HoistDestinationMap.try_emplace(Key: Orig);
805	if (!Inserted)
806	return It ->second;
807	BasicBlock *New =
808	BasicBlock::Create(Context&: C, Name: Orig->getName() + ".licm", Parent: Orig->getParent());
809	It ->second = New;
810	DT->addNewBlock(BB: New, DomBB: HoistTarget);
811	if (CurLoop->getParentLoop())
812	CurLoop->getParentLoop()->addBasicBlockToLoop(NewBB: New, LI&: *LI);
813	++NumCreatedBlocks;
814	LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
815	<< " as hoist destination for " << Orig->getName()
816	<< "\n");
817	return New;
818	};
819	BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
820	BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
821	BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
822
823	// Link up these blocks with branches.
824	if (!HoistCommonSucc->getTerminator()) {
825	// The new common successor we've generated will branch to whatever that
826	// hoist target branched to.
827	BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
828	assert(TargetSucc && "Expected hoist target to have a single successor");
829	HoistCommonSucc->moveBefore(MovePos: TargetSucc);
830	BranchInst::Create(IfTrue: TargetSucc, InsertBefore: HoistCommonSucc);
831	}
832	if (!HoistTrueDest->getTerminator()) {
833	HoistTrueDest->moveBefore(MovePos: HoistCommonSucc);
834	BranchInst::Create(IfTrue: HoistCommonSucc, InsertBefore: HoistTrueDest);
835	}
836	if (!HoistFalseDest->getTerminator()) {
837	HoistFalseDest->moveBefore(MovePos: HoistCommonSucc);
838	BranchInst::Create(IfTrue: HoistCommonSucc, InsertBefore: HoistFalseDest);
839	}
840
841	// If BI is being cloned to what was originally the preheader then
842	// HoistCommonSucc will now be the new preheader.
843	if (HoistTarget == InitialPreheader) {
844	// Phis in the loop header now need to use the new preheader.
845	InitialPreheader->replaceSuccessorsPhiUsesWith(New: HoistCommonSucc);
846	MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
847	Old: HoistTarget->getSingleSuccessor(), New: HoistCommonSucc, Preds: {HoistTarget});
848	// The new preheader dominates the loop header.
849	DomTreeNode *PreheaderNode = DT->getNode(BB: HoistCommonSucc);
850	DomTreeNode *HeaderNode = DT->getNode(BB: CurLoop->getHeader());
851	DT->changeImmediateDominator(N: HeaderNode, NewIDom: PreheaderNode);
852	// The preheader hoist destination is now the new preheader, with the
853	// exception of the hoist destination of this branch.
854	for (auto &Pair : HoistDestinationMap)
855	if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
856	Pair.second = HoistCommonSucc;
857	}
858
859	// Now finally clone BI.
860	ReplaceInstWithInst(
861	From: HoistTarget->getTerminator(),
862	To: BranchInst::Create(IfTrue: HoistTrueDest, IfFalse: HoistFalseDest, Cond: BI->getCondition()));
863	++NumClonedBranches;
864
865	assert(CurLoop->getLoopPreheader() &&
866	"Hoisting blocks should not have destroyed preheader");
867	return HoistDestinationMap [BB];
868	}
869	};
870	} // namespace
871
872	/// Walk the specified region of the CFG (defined by all blocks dominated by
873	/// the specified block, and that are in the current loop) in depth first
874	/// order w.r.t the DominatorTree. This allows us to visit definitions before
875	/// uses, allowing us to hoist a loop body in one pass without iteration.
876	///
877	bool llvm::hoistRegion(DomTreeNode N, AAResults AA, LoopInfo *LI,
878	DominatorTree DT, AssumptionCache AC,
879	TargetLibraryInfo TLI, Loop CurLoop,
880	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
881	ICFLoopSafetyInfo *SafetyInfo,
882	SinkAndHoistLICMFlags &Flags,
883	OptimizationRemarkEmitter ORE, bool* LoopNestMode,
884	bool AllowSpeculation) {
885	// Verify inputs.
886	assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
887	CurLoop != nullptr && SafetyInfo != nullptr &&
888	"Unexpected input to hoistRegion.");
889
890	ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
891
892	// Keep track of instructions that have been hoisted, as they may need to be
893	// re-hoisted if they end up not dominating all of their uses.
894	SmallVector<Instruction *, `16`> HoistedInstructions;
895
896	// For PHI hoisting to work we need to hoist blocks before their successors.
897	// We can do this by iterating through the blocks in the loop in reverse
898	// post-order.
899	LoopBlocksRPO Worklist(CurLoop);
900	Worklist.perform(LI);
901	bool Changed = false;
902	BasicBlock *Preheader = CurLoop->getLoopPreheader();
903	for (BasicBlock *BB : Worklist) {
904	// Only need to process the contents of this block if it is not part of a
905	// subloop (which would already have been processed).
906	if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
907	continue;
908
909	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
910	// Try hoisting the instruction out to the preheader. We can only do
911	// this if all of the operands of the instruction are loop invariant and
912	// if it is safe to hoist the instruction. We also check block frequency
913	// to make sure instruction only gets hoisted into colder blocks.
914	// TODO: It may be safe to hoist if we are hoisting to a conditional block
915	// and we have accurately duplicated the control flow from the loop header
916	// to that block.
917	if (CurLoop->hasLoopInvariantOperands(I: &I) &&
918	canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE) &&
919	isSafeToExecuteUnconditionally(
920	Inst&: I, DT, TLI, CurLoop, SafetyInfo, ORE,
921	CtxI: Preheader->getTerminator(), AC, AllowSpeculation)) {
922	hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
923	MSSAU, SE, ORE);
924	HoistedInstructions.push_back(Elt: &I);
925	Changed = true;
926	continue;
927	}
928
929	// Attempt to remove floating point division out of the loop by
930	// converting it to a reciprocal multiplication.
931	if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
932	CurLoop->isLoopInvariant(V: I.getOperand(i: `1`))) {
933	auto Divisor = I.getOperand(i: `1`);
934	auto One = llvm::ConstantFP::get(Ty: Divisor->getType(), V: `1.0`);
935	auto ReciprocalDivisor = BinaryOperator::CreateFDiv(V1: One, V2: Divisor);
936	ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
937	SafetyInfo->insertInstructionTo(Inst: ReciprocalDivisor, BB: I.getParent());
938	ReciprocalDivisor->insertBefore(InsertPos: I.getIterator());
939	ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
940
941	auto Product =
942	BinaryOperator::CreateFMul(V1: I.getOperand(i: `0`), V2: ReciprocalDivisor);
943	Product->setFastMathFlags(I.getFastMathFlags());
944	SafetyInfo->insertInstructionTo(Inst: Product, BB: I.getParent());
945	Product->insertAfter(InsertPos: I.getIterator());
946	Product->setDebugLoc(I.getDebugLoc());
947	I.replaceAllUsesWith(V: Product);
948	eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
949
950	hoist(I&: *ReciprocalDivisor, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB),
951	SafetyInfo, MSSAU, SE, ORE);
952	HoistedInstructions.push_back(Elt: ReciprocalDivisor);
953	Changed = true;
954	continue;
955	}
956
957	auto IsInvariantStart = [&](Instruction &I) {
958	using namespace PatternMatch;
959	return I.use_empty() &&
960	match(V: &I, P: m_Intrinsic<Intrinsic::invariant_start>());
961	};
962	auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
963	return SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop) &&
964	SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
965	};
966	if ((IsInvariantStart (I) \|\| isGuard(U: &I)) &&
967	CurLoop->hasLoopInvariantOperands(I: &I) &&
968	MustExecuteWithoutWritesBefore (I)) {
969	hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
970	MSSAU, SE, ORE);
971	HoistedInstructions.push_back(Elt: &I);
972	Changed = true;
973	continue;
974	}
975
976	if (PHINode *PN = dyn_cast<PHINode>(Val: &I)) {
977	if (CFH.canHoistPHI(PN)) {
978	// Redirect incoming blocks first to ensure that we create hoisted
979	// versions of those blocks before we hoist the phi.
980	for (unsigned int i = `0`; i < PN->getNumIncomingValues(); ++i)
981	PN->setIncomingBlock(
982	i, BB: CFH.getOrCreateHoistedBlock(BB: PN->getIncomingBlock(i)));
983	hoist(I&: *PN, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
984	MSSAU, SE, ORE);
985	assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
986	Changed = true;
987	continue;
988	}
989	}
990
991	// Try to reassociate instructions so that part of computations can be
992	// done out of loop.
993	if (hoistArithmetics(I, L&: CurLoop, SafetyInfo&: SafetyInfo, MSSAU, AC, DT)) {
994	Changed = true;
995	continue;
996	}
997
998	// Remember possibly hoistable branches so we can actually hoist them
999	// later if needed.
1000	if (BranchInst *BI = dyn_cast<BranchInst>(Val: &I))
1001	CFH.registerPossiblyHoistableBranch(BI);
1002	}
1003	}
1004
1005	// If we hoisted instructions to a conditional block they may not dominate
1006	// their uses that weren't hoisted (such as phis where some operands are not
1007	// loop invariant). If so make them unconditional by moving them to their
1008	// immediate dominator. We iterate through the instructions in reverse order
1009	// which ensures that when we rehoist an instruction we rehoist its operands,
1010	// and also keep track of where in the block we are rehoisting to make sure
1011	// that we rehoist instructions before the instructions that use them.
1012	Instruction HoistPoint = nullptr*;
1013	if (ControlFlowHoisting) {
1014	for (Instruction *I : reverse(C&: HoistedInstructions)) {
1015	if (!llvm::all_of(Range: I->uses(),
1016	P: [&](Use &U) { return DT->dominates(Def: I, U); })) {
1017	BasicBlock *Dominator =
1018	DT->getNode(BB: I->getParent())->getIDom()->getBlock();
1019	if (!HoistPoint \|\| !DT->dominates(A: HoistPoint->getParent(), B: Dominator)) {
1020	if (HoistPoint)
1021	assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1022	"New hoist point expected to dominate old hoist point");
1023	HoistPoint = Dominator->getTerminator();
1024	}
1025	LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1026	<< HoistPoint->getParent()->getNameOrAsOperand()
1027	<< ": " << *I << "\n");
1028	moveInstructionBefore(I&: I, Dest: HoistPoint->getIterator(), SafetyInfo&: SafetyInfo, MSSAU,
1029	SE);
1030	HoistPoint = I;
1031	Changed = true;
1032	}
1033	}
1034	}
1035	if (VerifyMemorySSA)
1036	MSSAU.getMemorySSA()->verifyMemorySSA();
1037
1038	// Now that we've finished hoisting make sure that LI and DT are still
1039	// valid.
1040	#ifdef EXPENSIVE_CHECKS
1041	if (Changed) {
1042	assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1043	"Dominator tree verification failed");
1044	LI->verify(*DT);
1045	}
1046	#endif
1047
1048	return Changed;
1049	}
1050
1051	// Return true if LI is invariant within scope of the loop. LI is invariant if
1052	// CurLoop is dominated by an invariant.start representing the same memory
1053	// location and size as the memory location LI loads from, and also the
1054	// invariant.start has no uses.
1055	static bool isLoadInvariantInLoop(LoadInst LI, DominatorTree DT,
1056	Loop *CurLoop) {
1057	Value *Addr = LI->getPointerOperand();
1058	const DataLayout &DL = LI->getDataLayout();
1059	const TypeSize LocSizeInBits = DL.getTypeSizeInBits(Ty: LI->getType());
1060
1061	// It is not currently possible for clang to generate an invariant.start
1062	// intrinsic with scalable vector types because we don't support thread local
1063	// sizeless types and we don't permit sizeless types in structs or classes.
1064	// Furthermore, even if support is added for this in future the intrinsic
1065	// itself is defined to have a size of -1 for variable sized objects. This
1066	// makes it impossible to verify if the intrinsic envelops our region of
1067	// interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1068	// types would have a -1 parameter, but the former is clearly double the size
1069	// of the latter.
1070	if (LocSizeInBits.isScalable())
1071	return false;
1072
1073	// If we've ended up at a global/constant, bail. We shouldn't be looking at
1074	// uselists for non-local Values in a loop pass.
1075	if (isa<Constant>(Val: Addr))
1076	return false;
1077
1078	unsigned UsesVisited = `0`;
1079	// Traverse all uses of the load operand value, to see if invariant.start is
1080	// one of the uses, and whether it dominates the load instruction.
1081	for (auto *U : Addr->users()) {
1082	// Avoid traversing for Load operand with high number of users.
1083	if (++UsesVisited > MaxNumUsesTraversed)
1084	return false;
1085	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
1086	// If there are escaping uses of invariant.start instruction, the load maybe
1087	// non-invariant.
1088	if (!II \|\| II->getIntrinsicID() != Intrinsic::invariant_start \|\|
1089	!II->use_empty())
1090	continue;
1091	ConstantInt *InvariantSize = cast<ConstantInt>(Val: II->getArgOperand(i: `0`));
1092	// The intrinsic supports having a -1 argument for variable sized objects
1093	// so we should check for that here.
1094	if (InvariantSize->isNegative())
1095	continue;
1096	uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * `8`;
1097	// Confirm the invariant.start location size contains the load operand size
1098	// in bits. Also, the invariant.start should dominate the load, and we
1099	// should not hoist the load out of a loop that contains this dominating
1100	// invariant.start.
1101	if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1102	DT->properlyDominates(A: II->getParent(), B: CurLoop->getHeader()))
1103	return true;
1104	}
1105
1106	return false;
1107	}
1108
1109	namespace {
1110	/// Return true if-and-only-if we know how to (mechanically) both hoist and
1111	/// sink a given instruction out of a loop. Does not address legality
1112	/// concerns such as aliasing or speculation safety.
1113	bool isHoistableAndSinkableInst(Instruction &I) {
1114	// Only these instructions are hoistable/sinkable.
1115	return (isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I) \|\| isa<CallInst>(Val: I) \|\|
1116	isa<FenceInst>(Val: I) \|\| isa<CastInst>(Val: I) \|\| isa<UnaryOperator>(Val: I) \|\|
1117	isa<BinaryOperator>(Val: I) \|\| isa<SelectInst>(Val: I) \|\|
1118	isa<GetElementPtrInst>(Val: I) \|\| isa<CmpInst>(Val: I) \|\|
1119	isa<InsertElementInst>(Val: I) \|\| isa<ExtractElementInst>(Val: I) \|\|
1120	isa<ShuffleVectorInst>(Val: I) \|\| isa<ExtractValueInst>(Val: I) \|\|
1121	isa<InsertValueInst>(Val: I) \|\| isa<FreezeInst>(Val: I));
1122	}
1123
1124	/// Return true if I is the only Instruction with a MemoryAccess in L.
1125	bool isOnlyMemoryAccess(const Instruction I, const* Loop *L,
1126	const MemorySSAUpdater &MSSAU) {
1127	for (auto *BB : L->getBlocks())
1128	if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1129	int NotAPhi = `0`;
1130	for (const auto &Acc : *Accs) {
1131	if (isa<MemoryPhi>(Val: &Acc))
1132	continue;
1133	const auto *MUD = cast<MemoryUseOrDef>(Val: &Acc);
1134	if (MUD->getMemoryInst() != I \|\| NotAPhi++ == `1`)
1135	return false;
1136	}
1137	}
1138	return true;
1139	}
1140	}
1141
1142	static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1143	BatchAAResults &BAA,
1144	SinkAndHoistLICMFlags &Flags,
1145	MemoryUseOrDef *MA) {
1146	// See declaration of SetLicmMssaOptCap for usage details.
1147	if (Flags.tooManyClobberingCalls())
1148	return MA->getDefiningAccess();
1149
1150	MemoryAccess *Source =
1151	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, AA&: BAA);
1152	Flags.incrementClobberingCalls();
1153	return Source;
1154	}
1155
1156	bool llvm::canSinkOrHoistInst(Instruction &I, AAResults AA, DominatorTree DT,
1157	Loop *CurLoop, MemorySSAUpdater &MSSAU,
1158	bool TargetExecutesOncePerLoop,
1159	SinkAndHoistLICMFlags &Flags,
1160	OptimizationRemarkEmitter *ORE) {
1161	// If we don't understand the instruction, bail early.
1162	if (!isHoistableAndSinkableInst(I))
1163	return false;
1164
1165	MemorySSA *MSSA = MSSAU.getMemorySSA();
1166	// Loads have extra constraints we have to verify before we can hoist them.
1167	if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) {
1168	if (!LI->isUnordered())
1169	return false; // Don't sink/hoist volatile or ordered atomic loads!
1170
1171	// Loads from constant memory are always safe to move, even if they end up
1172	// in the same alias set as something that ends up being modified.
1173	if (!isModSet(MRI: AA->getModRefInfoMask(P: LI->getOperand(i_nocapture: `0`))))
1174	return true;
1175	if (LI->hasMetadata(KindID: LLVMContext::MD_invariant_load))
1176	return true;
1177
1178	if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1179	return false; // Don't risk duplicating unordered loads
1180
1181	// This checks for an invariant.start dominating the load.
1182	if (isLoadInvariantInLoop(LI, DT, CurLoop))
1183	return true;
1184
1185	auto MU = cast<MemoryUse>(Val: MSSA->getMemoryAccess(I: LI));
1186
1187	bool InvariantGroup = LI->hasMetadata(KindID: LLVMContext::MD_invariant_group);
1188
1189	bool Invalidated = pointerInvalidatedByLoop(
1190	MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1191	// Check loop-invariant address because this may also be a sinkable load
1192	// whose address is not necessarily loop-invariant.
1193	if (ORE && Invalidated && CurLoop->isLoopInvariant(V: LI->getPointerOperand()))
1194	ORE->emit(RemarkBuilder: [&]() {
1195	return OptimizationRemarkMissed (
1196	DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1197	<< "failed to move load with loop-invariant address "
1198	"because the loop may invalidate its value";
1199	});
1200
1201	return !Invalidated;
1202	} else if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
1203	// Don't sink calls which can throw.
1204	if (CI->mayThrow())
1205	return false;
1206
1207	// Convergent attribute has been used on operations that involve
1208	// inter-thread communication which results are implicitly affected by the
1209	// enclosing control flows. It is not safe to hoist or sink such operations
1210	// across control flow.
1211	if (CI->isConvergent())
1212	return false;
1213
1214	// FIXME: Current LLVM IR semantics don't work well with coroutines and
1215	// thread local globals. We currently treat getting the address of a thread
1216	// local global as not accessing memory, even though it may not be a
1217	// constant throughout a function with coroutines. Remove this check after
1218	// we better model semantics of thread local globals.
1219	if (CI->getFunction()->isPresplitCoroutine())
1220	return false;
1221
1222	using namespace PatternMatch;
1223	if (match(V: CI, P: m_Intrinsic<Intrinsic::assume>()))
1224	// Assumes don't actually alias anything or throw
1225	return true;
1226
1227	// Handle simple cases by querying alias analysis.
1228	MemoryEffects Behavior = AA->getMemoryEffects(Call: CI);
1229
1230	if (Behavior.doesNotAccessMemory())
1231	return true;
1232	if (Behavior.onlyReadsMemory()) {
1233	// If we can prove there are no writes to the memory read by the call, we
1234	// can hoist or sink.
1235	return !pointerInvalidatedByLoop(
1236	MSSA, MU: cast<MemoryUse>(Val: MSSA->getMemoryAccess(I: CI)), CurLoop, I, Flags,
1237	/InvariantGroup=/false);
1238	}
1239
1240	if (Behavior.onlyWritesMemory()) {
1241	// can hoist or sink if there are no conflicting read/writes to the
1242	// memory location written to by the call.
1243	return noConflictingReadWrites(I: CI, MSSA, AA, CurLoop, Flags);
1244	}
1245
1246	return false;
1247	} else if (auto *FI = dyn_cast<FenceInst>(Val: &I)) {
1248	// Fences alias (most) everything to provide ordering. For the moment,
1249	// just give up if there are any other memory operations in the loop.
1250	return isOnlyMemoryAccess(I: FI, L: CurLoop, MSSAU);
1251	} else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
1252	if (!SI->isUnordered())
1253	return false; // Don't sink/hoist volatile or ordered atomic store!
1254
1255	// We can only hoist a store that we can prove writes a value which is not
1256	// read or overwritten within the loop. For those cases, we fallback to
1257	// load store promotion instead. TODO: We can extend this to cases where
1258	// there is exactly one write to the location and that write dominates an
1259	// arbitrary number of reads in the loop.
1260	if (isOnlyMemoryAccess(I: SI, L: CurLoop, MSSAU))
1261	return true;
1262	return noConflictingReadWrites(I: SI, MSSA, AA, CurLoop, Flags);
1263	}
1264
1265	assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1266
1267	// We've established mechanical ability and aliasing, it's up to the caller
1268	// to check fault safety
1269	return true;
1270	}
1271
1272	/// Returns true if a PHINode is a trivially replaceable with an
1273	/// Instruction.
1274	/// This is true when all incoming values are that instruction.
1275	/// This pattern occurs most often with LCSSA PHI nodes.
1276	///
1277	static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1278	for (const Value *IncValue : PN.incoming_values())
1279	if (IncValue != &I)
1280	return false;
1281
1282	return true;
1283	}
1284
1285	/// Return true if the instruction is foldable in the loop.
1286	static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1287	const TargetTransformInfo *TTI) {
1288	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
1289	InstructionCost CostI =
1290	TTI->getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
1291	if (CostI != TargetTransformInfo::TCC_Free)
1292	return false;
1293	// For a GEP, we cannot simply use getInstructionCost because currently
1294	// it optimistically assumes that a GEP will fold into addressing mode
1295	// regardless of its users.
1296	const BasicBlock *BB = GEP->getParent();
1297	for (const User *U : GEP->users()) {
1298	const Instruction *UI = cast<Instruction>(Val: U);
1299	if (CurLoop->contains(Inst: UI) &&
1300	(BB != UI->getParent() \|\|
1301	(!isa<StoreInst>(Val: UI) && !isa<LoadInst>(Val: UI))))
1302	return false;
1303	}
1304	return true;
1305	}
1306
1307	return false;
1308	}
1309
1310	/// Return true if the only users of this instruction are outside of
1311	/// the loop. If this is true, we can sink the instruction to the exit
1312	/// blocks of the loop.
1313	///
1314	/// We also return true if the instruction could be folded away in lowering.
1315	/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1316	static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1317	const LoopSafetyInfo *SafetyInfo,
1318	TargetTransformInfo *TTI,
1319	bool &FoldableInLoop, bool LoopNestMode) {
1320	const auto &BlockColors = SafetyInfo->getBlockColors();
1321	bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1322	for (const User *U : I.users()) {
1323	const Instruction *UI = cast<Instruction>(Val: U);
1324	if (const PHINode *PN = dyn_cast<PHINode>(Val: UI)) {
1325	const BasicBlock *BB = PN->getParent();
1326	// We cannot sink uses in catchswitches.
1327	if (isa<CatchSwitchInst>(Val: BB->getTerminator()))
1328	return false;
1329
1330	// We need to sink a callsite to a unique funclet. Avoid sinking if the
1331	// phi use is too muddled.
1332	if (isa<CallInst>(Val: I))
1333	if (!BlockColors.empty() &&
1334	BlockColors.find(Val: const_cast<BasicBlock *>(BB))->second.size() != `1`)
1335	return false;
1336
1337	if (LoopNestMode) {
1338	while (isa<PHINode>(Val: UI) && UI->hasOneUser() &&
1339	UI->getNumOperands() == `1`) {
1340	if (!CurLoop->contains(Inst: UI))
1341	break;
1342	UI = cast<Instruction>(Val: UI->user_back());
1343	}
1344	}
1345	}
1346
1347	if (CurLoop->contains(Inst: UI)) {
1348	if (IsFoldable) {
1349	FoldableInLoop = true;
1350	continue;
1351	}
1352	return false;
1353	}
1354	}
1355	return true;
1356	}
1357
1358	static Instruction *cloneInstructionInExitBlock(
1359	Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1360	const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1361	Instruction *New;
1362	if (auto *CI = dyn_cast<CallInst>(Val: &I)) {
1363	const auto &BlockColors = SafetyInfo->getBlockColors();
1364
1365	// Sinking call-sites need to be handled differently from other
1366	// instructions. The cloned call-site needs a funclet bundle operand
1367	// appropriate for its location in the CFG.
1368	SmallVector<OperandBundleDef, `1`> OpBundles;
1369	for (unsigned BundleIdx = `0`, BundleEnd = CI->getNumOperandBundles();
1370	BundleIdx != BundleEnd; ++BundleIdx) {
1371	OperandBundleUse Bundle = CI->getOperandBundleAt(Index: BundleIdx);
1372	if (Bundle.getTagID() == LLVMContext::OB_funclet)
1373	continue;
1374
1375	OpBundles.emplace_back(Args&: Bundle);
1376	}
1377
1378	if (!BlockColors.empty()) {
1379	const ColorVector &CV = BlockColors.find(Val: &ExitBlock)->second;
1380	assert(CV.size() == `1` && "non-unique color for exit block!");
1381	BasicBlock *BBColor = CV.front();
1382	BasicBlock::iterator EHPad = BBColor->getFirstNonPHIIt();
1383	if (EHPad ->isEHPad())
1384	OpBundles.emplace_back(Args: "funclet", Args: &*EHPad);
1385	}
1386
1387	New = CallInst::Create(CI, Bundles: OpBundles);
1388	New->copyMetadata(SrcInst: *CI);
1389	} else {
1390	New = I.clone();
1391	}
1392
1393	New->insertInto(ParentBB: &ExitBlock, It: ExitBlock.getFirstInsertionPt());
1394	if (!I.getName().empty())
1395	New->setName(I.getName() + ".le");
1396
1397	if (MSSAU.getMemorySSA()->getMemoryAccess(I: &I)) {
1398	// Create a new MemoryAccess and let MemorySSA set its defining access.
1399	// After running some passes, MemorySSA might be outdated, and the
1400	// instruction `I` may have become a non-memory touching instruction.
1401	MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1402	I: New, Definition: nullptr, BB: New->getParent(), Point: MemorySSA::Beginning,
1403	/CreationMustSucceed=/false);
1404	if (NewMemAcc) {
1405	if (auto *MemDef = dyn_cast<MemoryDef>(Val: NewMemAcc))
1406	MSSAU.insertDef(Def: MemDef, /RenameUses=/true);
1407	else {
1408	auto *MemUse = cast<MemoryUse>(Val: NewMemAcc);
1409	MSSAU.insertUse(Use: MemUse, /RenameUses=/true);
1410	}
1411	}
1412	}
1413
1414	// Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1415	// this is particularly cheap because we can rip off the PHI node that we're
1416	// replacing for the number and blocks of the predecessors.
1417	// OPT: If this shows up in a profile, we can instead finish sinking all
1418	// invariant instructions, and then walk their operands to re-establish
1419	// LCSSA. That will eliminate creating PHI nodes just to nuke them when
1420	// sinking bottom-up.
1421	for (Use &Op : New->operands())
1422	if (LI->wouldBeOutOfLoopUseRequiringLCSSA(V: Op.get(), ExitBB: PN.getParent())) {
1423	auto *OInst = cast<Instruction>(Val: Op.get());
1424	PHINode *OpPN =
1425	PHINode::Create(Ty: OInst->getType(), NumReservedValues: PN.getNumIncomingValues(),
1426	NameStr: OInst->getName() + ".lcssa");
1427	OpPN->insertBefore(InsertPos: ExitBlock.begin());
1428	for (unsigned i = `0`, e = PN.getNumIncomingValues(); i != e; ++i)
1429	OpPN->addIncoming(V: OInst, BB: PN.getIncomingBlock(i));
1430	Op = OpPN;
1431	}
1432	return New;
1433	}
1434
1435	static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1436	MemorySSAUpdater &MSSAU) {
1437	MSSAU.removeMemoryAccess(I: &I);
1438	SafetyInfo.removeInstruction(Inst: &I);
1439	I.eraseFromParent();
1440	}
1441
1442	static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
1443	ICFLoopSafetyInfo &SafetyInfo,
1444	MemorySSAUpdater &MSSAU,
1445	ScalarEvolution *SE) {
1446	SafetyInfo.removeInstruction(Inst: &I);
1447	SafetyInfo.insertInstructionTo(Inst: &I, BB: Dest ->getParent());
1448	I.moveBefore(BB&: *Dest ->getParent(), I: Dest);
1449	if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1450	Val: MSSAU.getMemorySSA()->getMemoryAccess(I: &I)))
1451	MSSAU.moveToPlace(What: OldMemAcc, BB: Dest ->getParent(),
1452	Where: MemorySSA::BeforeTerminator);
1453	if (SE)
1454	SE->forgetBlockAndLoopDispositions(V: &I);
1455	}
1456
1457	static Instruction *sinkThroughTriviallyReplaceablePHI(
1458	PHINode TPN, Instruction I, LoopInfo *LI,
1459	SmallDenseMap<BasicBlock , Instruction , `32`> &SunkCopies,
1460	const LoopSafetyInfo SafetyInfo, const* Loop *CurLoop,
1461	MemorySSAUpdater &MSSAU) {
1462	assert(isTriviallyReplaceablePHI(TPN, I) &&
1463	"Expect only trivially replaceable PHI");
1464	BasicBlock *ExitBlock = TPN->getParent();
1465	auto [It, Inserted] = SunkCopies.try_emplace(Key: ExitBlock);
1466	if (Inserted)
1467	It ->second = cloneInstructionInExitBlock(I&: I, ExitBlock&: ExitBlock, PN&: *TPN, LI,
1468	SafetyInfo, MSSAU);
1469	return It ->second;
1470	}
1471
1472	static bool canSplitPredecessors(PHINode PN, LoopSafetyInfo SafetyInfo) {
1473	BasicBlock *BB = PN->getParent();
1474	if (!BB->canSplitPredecessors())
1475	return false;
1476	// It's not impossible to split EHPad blocks, but if BlockColors already exist
1477	// it require updating BlockColors for all offspring blocks accordingly. By
1478	// skipping such corner case, we can make updating BlockColors after splitting
1479	// predecessor fairly simple.
1480	if (!SafetyInfo->getBlockColors().empty() &&
1481	BB->getFirstNonPHIIt()->isEHPad())
1482	return false;
1483	for (BasicBlock *BBPred : predecessors(BB)) {
1484	if (isa<IndirectBrInst>(Val: BBPred->getTerminator()))
1485	return false;
1486	}
1487	return true;
1488	}
1489
1490	static void splitPredecessorsOfLoopExit(PHINode PN, DominatorTree DT,
1491	LoopInfo LI, const* Loop *CurLoop,
1492	LoopSafetyInfo *SafetyInfo,
1493	MemorySSAUpdater *MSSAU) {
1494	#ifndef NDEBUG
1495	SmallVector<BasicBlock *, `32`> ExitBlocks;
1496	CurLoop->getUniqueExitBlocks(ExitBlocks);
1497	SmallPtrSet<BasicBlock *, `32`> ExitBlockSet(llvm::from_range, ExitBlocks);
1498	#endif
1499	BasicBlock *ExitBB = PN->getParent();
1500	assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1501
1502	// Split predecessors of the loop exit to make instructions in the loop are
1503	// exposed to exit blocks through trivially replaceable PHIs while keeping the
1504	// loop in the canonical form where each predecessor of each exit block should
1505	// be contained within the loop. For example, this will convert the loop below
1506	// from
1507	//
1508	// LB1:
1509	// %v1 =
1510	// br %LE, %LB2
1511	// LB2:
1512	// %v2 =
1513	// br %LE, %LB1
1514	// LE:
1515	// %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1516	//
1517	// to
1518	//
1519	// LB1:
1520	// %v1 =
1521	// br %LE.split, %LB2
1522	// LB2:
1523	// %v2 =
1524	// br %LE.split2, %LB1
1525	// LE.split:
1526	// %p1 = phi [%v1, %LB1] <-- trivially replaceable
1527	// br %LE
1528	// LE.split2:
1529	// %p2 = phi [%v2, %LB2] <-- trivially replaceable
1530	// br %LE
1531	// LE:
1532	// %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1533	//
1534	const auto &BlockColors = SafetyInfo->getBlockColors();
1535	SmallSetVector<BasicBlock *, `8`> PredBBs(pred_begin(BB: ExitBB), pred_end(BB: ExitBB));
1536	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1537	while (!PredBBs.empty()) {
1538	BasicBlock PredBB = PredBBs.begin();
1539	assert(CurLoop->contains(PredBB) &&
1540	"Expect all predecessors are in the loop");
1541	if (PN->getBasicBlockIndex(BB: PredBB) >= `0`) {
1542	BasicBlock *NewPred = SplitBlockPredecessors(
1543	BB: ExitBB, Preds: PredBB, Suffix: ".split.loop.exit", DTU: &DTU, LI, MSSAU, PreserveLCSSA: true);
1544	// Since we do not allow splitting EH-block with BlockColors in
1545	// canSplitPredecessors(), we can simply assign predecessor's color to
1546	// the new block.
1547	if (!BlockColors.empty())
1548	// Grab a reference to the ColorVector to be inserted before getting the
1549	// reference to the vector we are copying because inserting the new
1550	// element in BlockColors might cause the map to be reallocated.
1551	SafetyInfo->copyColors(New: NewPred, Old: PredBB);
1552	}
1553	PredBBs.remove(X: PredBB);
1554	}
1555	}
1556
1557	/// When an instruction is found to only be used outside of the loop, this
1558	/// function moves it to the exit blocks and patches up SSA form as needed.
1559	/// This method is guaranteed to remove the original instruction from its
1560	/// position, and may either delete it or move it to outside of the loop.
1561	///
1562	static bool sink(Instruction &I, LoopInfo LI, DominatorTree DT,
1563	const Loop CurLoop, ICFLoopSafetyInfo SafetyInfo,
1564	MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1565	bool Changed = false;
1566	LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1567
1568	// Iterate over users to be ready for actual sinking. Replace users via
1569	// unreachable blocks with undef and make all user PHIs trivially replaceable.
1570	SmallPtrSet<Instruction *, `8`> VisitedUsers;
1571	for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1572	auto User = cast<Instruction>(Val: UI);
1573	Use &U = UI.getUse();
1574	++UI;
1575
1576	if (VisitedUsers.count(Ptr: User) \|\| CurLoop->contains(Inst: User))
1577	continue;
1578
1579	if (!DT->isReachableFromEntry(A: User->getParent())) {
1580	U = PoisonValue::get(T: I.getType());
1581	Changed = true;
1582	continue;
1583	}
1584
1585	// The user must be a PHI node.
1586	PHINode *PN = cast<PHINode>(Val: User);
1587
1588	// Surprisingly, instructions can be used outside of loops without any
1589	// exits. This can only happen in PHI nodes if the incoming block is
1590	// unreachable.
1591	BasicBlock *BB = PN->getIncomingBlock(U);
1592	if (!DT->isReachableFromEntry(A: BB)) {
1593	U = PoisonValue::get(T: I.getType());
1594	Changed = true;
1595	continue;
1596	}
1597
1598	VisitedUsers.insert(Ptr: PN);
1599	if (isTriviallyReplaceablePHI(PN: *PN, I))
1600	continue;
1601
1602	if (!canSplitPredecessors(PN, SafetyInfo))
1603	return Changed;
1604
1605	// Split predecessors of the PHI so that we can make users trivially
1606	// replaceable.
1607	splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU: &MSSAU);
1608
1609	// Should rebuild the iterators, as they may be invalidated by
1610	// splitPredecessorsOfLoopExit().
1611	UI = I.user_begin();
1612	UE = I.user_end();
1613	}
1614
1615	if (VisitedUsers.empty())
1616	return Changed;
1617
1618	ORE->emit(RemarkBuilder: [&]() {
1619	return OptimizationRemark (DEBUG_TYPE, "InstSunk", &I)
1620	<< "sinking " << ore::NV ("Inst", &I);
1621	});
1622	if (isa<LoadInst>(Val: I))
1623	++NumMovedLoads;
1624	else if (isa<CallInst>(Val: I))
1625	++NumMovedCalls;
1626	++NumSunk;
1627
1628	#ifndef NDEBUG
1629	SmallVector<BasicBlock *, `32`> ExitBlocks;
1630	CurLoop->getUniqueExitBlocks(ExitBlocks);
1631	SmallPtrSet<BasicBlock *, `32`> ExitBlockSet(llvm::from_range, ExitBlocks);
1632	#endif
1633
1634	// Clones of this instruction. Don't create more than one per exit block!
1635	SmallDenseMap<BasicBlock , Instruction , `32`> SunkCopies;
1636
1637	// If this instruction is only used outside of the loop, then all users are
1638	// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1639	// the instruction.
1640	// First check if I is worth sinking for all uses. Sink only when it is worth
1641	// across all uses.
1642	SmallSetVector<User*, `8`> Users(I.user_begin(), I.user_end());
1643	for (auto *UI : Users) {
1644	auto *User = cast<Instruction>(Val: UI);
1645
1646	if (CurLoop->contains(Inst: User))
1647	continue;
1648
1649	PHINode *PN = cast<PHINode>(Val: User);
1650	assert(ExitBlockSet.count(PN->getParent()) &&
1651	"The LCSSA PHI is not in an exit block!");
1652
1653	// The PHI must be trivially replaceable.
1654	Instruction *New = sinkThroughTriviallyReplaceablePHI(
1655	TPN: PN, I: &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1656	// As we sink the instruction out of the BB, drop its debug location.
1657	New->dropLocation();
1658	PN->replaceAllUsesWith(V: New);
1659	eraseInstruction(I&: PN, SafetyInfo&: SafetyInfo, MSSAU);
1660	Changed = true;
1661	}
1662	return Changed;
1663	}
1664
1665	/// When an instruction is found to only use loop invariant operands that
1666	/// is safe to hoist, this instruction is called to do the dirty work.
1667	///
1668	static void hoist(Instruction &I, const DominatorTree DT, const* Loop *CurLoop,
1669	BasicBlock Dest, ICFLoopSafetyInfo SafetyInfo,
1670	MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1671	OptimizationRemarkEmitter *ORE) {
1672	LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1673	<< I << "\n");
1674	ORE->emit(RemarkBuilder: [&]() {
1675	return OptimizationRemark (DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1676	<< ore::NV ("Inst", &I);
1677	});
1678
1679	// Metadata can be dependent on conditions we are hoisting above.
1680	// Conservatively strip all metadata on the instruction unless we were
1681	// guaranteed to execute I if we entered the loop, in which case the metadata
1682	// is valid in the loop preheader.
1683	// Similarly, If I is a call and it is not guaranteed to execute in the loop,
1684	// then moving to the preheader means we should strip attributes on the call
1685	// that can cause UB since we may be hoisting above conditions that allowed
1686	// inferring those attributes. They may not be valid at the preheader.
1687	if ((I.hasMetadataOtherThanDebugLoc() \|\| isa<CallInst>(Val: I)) &&
1688	// The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1689	// time in isGuaranteedToExecute if we don't actually have anything to
1690	// drop. It is a compile time optimization, not required for correctness.
1691	!SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop))
1692	I.dropUBImplyingAttrsAndMetadata();
1693
1694	if (isa<PHINode>(Val: I))
1695	// Move the new node to the end of the phi list in the destination block.
1696	moveInstructionBefore(I, Dest: Dest->getFirstNonPHIIt(), SafetyInfo&: *SafetyInfo, MSSAU, SE);
1697	else
1698	// Move the new node to the destination block, before its terminator.
1699	moveInstructionBefore(I, Dest: Dest->getTerminator()->getIterator(), SafetyInfo&: *SafetyInfo,
1700	MSSAU, SE);
1701
1702	I.updateLocationAfterHoist();
1703
1704	if (isa<LoadInst>(Val: I))
1705	++NumMovedLoads;
1706	else if (isa<CallInst>(Val: I))
1707	++NumMovedCalls;
1708	++NumHoisted;
1709	}
1710
1711	/// Only sink or hoist an instruction if it is not a trapping instruction,
1712	/// or if the instruction is known not to trap when moved to the preheader.
1713	/// or if it is a trapping instruction and is guaranteed to execute.
1714	static bool isSafeToExecuteUnconditionally(
1715	Instruction &Inst, const DominatorTree DT, const* TargetLibraryInfo *TLI,
1716	const Loop CurLoop, const* LoopSafetyInfo *SafetyInfo,
1717	OptimizationRemarkEmitter ORE, const* Instruction *CtxI,
1718	AssumptionCache AC, bool* AllowSpeculation) {
1719	if (AllowSpeculation &&
1720	isSafeToSpeculativelyExecute(I: &Inst, CtxI, AC, DT, TLI))
1721	return true;
1722
1723	bool GuaranteedToExecute =
1724	SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1725
1726	if (!GuaranteedToExecute) {
1727	auto *LI = dyn_cast<LoadInst>(Val: &Inst);
1728	if (LI && CurLoop->isLoopInvariant(V: LI->getPointerOperand()))
1729	ORE->emit(RemarkBuilder: [&]() {
1730	return OptimizationRemarkMissed (
1731	DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1732	<< "failed to hoist load with loop-invariant address "
1733	"because load is conditionally executed";
1734	});
1735	}
1736
1737	return GuaranteedToExecute;
1738	}
1739
1740	namespace {
1741	class LoopPromoter : public LoadAndStorePromoter {
1742	Value SomePtr; // Designated pointer to store to.*
1743	SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1744	SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
1745	SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1746	PredIteratorCache &PredCache;
1747	MemorySSAUpdater &MSSAU;
1748	LoopInfo &LI;
1749	DebugLoc DL;
1750	Align Alignment;
1751	bool UnorderedAtomic;
1752	AAMDNodes AATags;
1753	ICFLoopSafetyInfo &SafetyInfo;
1754	bool CanInsertStoresInExitBlocks;
1755	ArrayRef<const Instruction *> Uses;
1756
1757	// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1758	// (if legal) if doing so would add an out-of-loop use to an instruction
1759	// defined in-loop.
1760	Value maybeInsertLCSSAPHI(Value V, BasicBlock BB) const* {
1761	if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, ExitBB: BB))
1762	return V;
1763
1764	Instruction *I = cast<Instruction>(Val: V);
1765	// We need to create an LCSSA PHI node for the incoming value and
1766	// store that.
1767	PHINode *PN = PHINode::Create(Ty: I->getType(), NumReservedValues: PredCache.size(BB),
1768	NameStr: I->getName() + ".lcssa");
1769	PN->insertBefore(InsertPos: BB->begin());
1770	for (BasicBlock *Pred : PredCache.get(BB))
1771	PN->addIncoming(V: I, BB: Pred);
1772	return PN;
1773	}
1774
1775	public:
1776	LoopPromoter(Value SP, ArrayRef<const* Instruction *> Insts, SSAUpdater &S,
1777	SmallVectorImpl<BasicBlock *> &LEB,
1778	SmallVectorImpl<BasicBlock::iterator> &LIP,
1779	SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1780	MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1781	Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1782	ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1783	: LoadAndStorePromoter (Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1784	LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1785	LI(li), DL (std::move(dl)), Alignment (Alignment),
1786	UnorderedAtomic(UnorderedAtomic), AATags (AATags),
1787	SafetyInfo(SafetyInfo),
1788	CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses (Insts) {}
1789
1790	void insertStoresInLoopExitBlocks() {
1791	// Insert stores after in the loop exit blocks. Each exit block gets a
1792	// store of the live-out values that feed them. Since we've already told
1793	// the SSA updater about the defs in the loop and the preheader
1794	// definition, it is all set and we can start using it.
1795	DIAssignID NewID = nullptr*;
1796	for (unsigned i = `0`, e = LoopExitBlocks.size(); i != e; ++i) {
1797	BasicBlock *ExitBlock = LoopExitBlocks [i];
1798	Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
1799	LiveInValue = maybeInsertLCSSAPHI(V: LiveInValue, BB: ExitBlock);
1800	Value *Ptr = maybeInsertLCSSAPHI(V: SomePtr, BB: ExitBlock);
1801	BasicBlock::iterator InsertPos = LoopInsertPts [i];
1802	StoreInst NewSI = new* StoreInst (LiveInValue, Ptr, InsertPos);
1803	if (UnorderedAtomic)
1804	NewSI->setOrdering(AtomicOrdering::Unordered);
1805	NewSI->setAlignment(Alignment);
1806	NewSI->setDebugLoc(DL);
1807	// Attach DIAssignID metadata to the new store, generating it on the
1808	// first loop iteration.
1809	if (i == `0`) {
1810	// NewSI will have its DIAssignID set here if there are any stores in
1811	// Uses with a DIAssignID attachment. This merged ID will then be
1812	// attached to the other inserted stores (in the branch below).
1813	NewSI->mergeDIAssignID(SourceInstructions: Uses);
1814	NewID = cast_or_null<DIAssignID>(
1815	Val: NewSI->getMetadata(KindID: LLVMContext::MD_DIAssignID));
1816	} else {
1817	// Attach the DIAssignID (or nullptr) merged from Uses in the branch
1818	// above.
1819	NewSI->setMetadata(KindID: LLVMContext::MD_DIAssignID, Node: NewID);
1820	}
1821
1822	if (AATags)
1823	NewSI->setAAMetadata(AATags);
1824
1825	MemoryAccess *MSSAInsertPoint = MSSAInsertPts [i];
1826	MemoryAccess *NewMemAcc;
1827	if (!MSSAInsertPoint) {
1828	NewMemAcc = MSSAU.createMemoryAccessInBB(
1829	I: NewSI, Definition: nullptr, BB: NewSI->getParent(), Point: MemorySSA::Beginning);
1830	} else {
1831	NewMemAcc =
1832	MSSAU.createMemoryAccessAfter(I: NewSI, Definition: nullptr, InsertPt: MSSAInsertPoint);
1833	}
1834	MSSAInsertPts [i] = NewMemAcc;
1835	MSSAU.insertDef(Def: cast<MemoryDef>(Val: NewMemAcc), RenameUses: true);
1836	// FIXME: true for safety, false may still be correct.
1837	}
1838	}
1839
1840	void doExtraRewritesBeforeFinalDeletion() override {
1841	if (CanInsertStoresInExitBlocks)
1842	insertStoresInLoopExitBlocks();
1843	}
1844
1845	void instructionDeleted(Instruction I) const* override {
1846	SafetyInfo.removeInstruction(Inst: I);
1847	MSSAU.removeMemoryAccess(I);
1848	}
1849
1850	bool shouldDelete(Instruction I) const* override {
1851	if (isa<StoreInst>(Val: I))
1852	return CanInsertStoresInExitBlocks;
1853	return true;
1854	}
1855	};
1856
1857	bool isNotCapturedBeforeOrInLoop(const Value V, const* Loop *L,
1858	DominatorTree *DT) {
1859	// We can perform the captured-before check against any instruction in the
1860	// loop header, as the loop header is reachable from any instruction inside
1861	// the loop.
1862	// TODO: ReturnCaptures=true shouldn't be necessary here.
1863	return capturesNothing(CC: PointerMayBeCapturedBefore(
1864	V, /ReturnCaptures=/true, I: L->getHeader()->getTerminator(), DT,
1865	/IncludeI=/false, Mask: CaptureComponents::Provenance));
1866	}
1867
1868	/// Return true if we can prove that a caller cannot inspect the object if an
1869	/// unwind occurs inside the loop.
1870	bool isNotVisibleOnUnwindInLoop(const Value Object, const* Loop *L,
1871	DominatorTree *DT) {
1872	bool RequiresNoCaptureBeforeUnwind;
1873	if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1874	return false;
1875
1876	return !RequiresNoCaptureBeforeUnwind \|\|
1877	isNotCapturedBeforeOrInLoop(V: Object, L, DT);
1878	}
1879
1880	bool isThreadLocalObject(const Value Object, const* Loop L, DominatorTree DT,
1881	TargetTransformInfo *TTI) {
1882	// The object must be function-local to start with, and then not captured
1883	// before/in the loop.
1884	return (isIdentifiedFunctionLocal(V: Object) &&
1885	isNotCapturedBeforeOrInLoop(V: Object, L, DT)) \|\|
1886	(TTI->isSingleThreaded() \|\| SingleThread);
1887	}
1888
1889	} // namespace
1890
1891	/// Try to promote memory values to scalars by sinking stores out of the
1892	/// loop and moving loads to before the loop. We do this by looping over
1893	/// the stores in the loop, looking for stores to Must pointers which are
1894	/// loop invariant.
1895	///
1896	bool llvm::promoteLoopAccessesToScalars(
1897	const SmallSetVector<Value *, `8`> &PointerMustAliases,
1898	SmallVectorImpl<BasicBlock *> &ExitBlocks,
1899	SmallVectorImpl<BasicBlock::iterator> &InsertPts,
1900	SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1901	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
1902	const TargetLibraryInfo TLI, TargetTransformInfo TTI, Loop *CurLoop,
1903	MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1904	OptimizationRemarkEmitter ORE, bool* AllowSpeculation,
1905	bool HasReadsOutsideSet) {
1906	// Verify inputs.
1907	assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1908	SafetyInfo != nullptr &&
1909	"Unexpected Input to promoteLoopAccessesToScalars");
1910
1911	LLVM_DEBUG({
1912	dbgs() << "Trying to promote set of must-aliased pointers:\n";
1913	for (Value *Ptr : PointerMustAliases)
1914	dbgs() << " " << *Ptr << "\n";
1915	});
1916	++NumPromotionCandidates;
1917
1918	Value SomePtr = PointerMustAliases.begin();
1919	BasicBlock *Preheader = CurLoop->getLoopPreheader();
1920
1921	// It is not safe to promote a load/store from the loop if the load/store is
1922	// conditional. For example, turning:
1923	//
1924	// for () { if (c) P += 1; }*
1925	//
1926	// into:
1927	//
1928	// tmp = P; for () { if (c) tmp +=1; } P = tmp;
1929	//
1930	// is not safe, because P may only be valid to access if 'c' is true.*
1931	//
1932	// The safety property divides into two parts:
1933	// p1) The memory may not be dereferenceable on entry to the loop. In this
1934	// case, we can't insert the required load in the preheader.
1935	// p2) The memory model does not allow us to insert a store along any dynamic
1936	// path which did not originally have one.
1937	//
1938	// If at least one store is guaranteed to execute, both properties are
1939	// satisfied, and promotion is legal.
1940	//
1941	// This, however, is not a necessary condition. Even if no store/load is
1942	// guaranteed to execute, we can still establish these properties.
1943	// We can establish (p1) by proving that hoisting the load into the preheader
1944	// is safe (i.e. proving dereferenceability on all paths through the loop). We
1945	// can use any access within the alias set to prove dereferenceability,
1946	// since they're all must alias.
1947	//
1948	// There are two ways establish (p2):
1949	// a) Prove the location is thread-local. In this case the memory model
1950	// requirement does not apply, and stores are safe to insert.
1951	// b) Prove a store dominates every exit block. In this case, if an exit
1952	// blocks is reached, the original dynamic path would have taken us through
1953	// the store, so inserting a store into the exit block is safe. Note that this
1954	// is different from the store being guaranteed to execute. For instance,
1955	// if an exception is thrown on the first iteration of the loop, the original
1956	// store is never executed, but the exit blocks are not executed either.
1957
1958	bool DereferenceableInPH = false;
1959	bool StoreIsGuanteedToExecute = false;
1960	bool LoadIsGuaranteedToExecute = false;
1961	bool FoundLoadToPromote = false;
1962
1963	// Goes from Unknown to either Safe or Unsafe, but can't switch between them.
1964	enum {
1965	StoreSafe,
1966	StoreUnsafe,
1967	StoreSafetyUnknown,
1968	} StoreSafety = StoreSafetyUnknown;
1969
1970	SmallVector<Instruction *, `64`> LoopUses;
1971
1972	// We start with an alignment of one and try to find instructions that allow
1973	// us to prove better alignment.
1974	Align Alignment;
1975	// Keep track of which types of access we see
1976	bool SawUnorderedAtomic = false;
1977	bool SawNotAtomic = false;
1978	AAMDNodes AATags;
1979
1980	const DataLayout &MDL = Preheader->getDataLayout();
1981
1982	// If there are reads outside the promoted set, then promoting stores is
1983	// definitely not safe.
1984	if (HasReadsOutsideSet)
1985	StoreSafety = StoreUnsafe;
1986
1987	if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
1988	// If a loop can throw, we have to insert a store along each unwind edge.
1989	// That said, we can't actually make the unwind edge explicit. Therefore,
1990	// we have to prove that the store is dead along the unwind edge. We do
1991	// this by proving that the caller can't have a reference to the object
1992	// after return and thus can't possibly load from the object.
1993	Value *Object = getUnderlyingObject(V: SomePtr);
1994	if (!isNotVisibleOnUnwindInLoop(Object, L: CurLoop, DT))
1995	StoreSafety = StoreUnsafe;
1996	}
1997
1998	// Check that all accesses to pointers in the alias set use the same type.
1999	// We cannot (yet) promote a memory location that is loaded and stored in
2000	// different sizes. While we are at it, collect alignment and AA info.
2001	Type AccessTy = nullptr*;
2002	for (Value *ASIV : PointerMustAliases) {
2003	for (Use &U : ASIV->uses()) {
2004	// Ignore instructions that are outside the loop.
2005	Instruction *UI = dyn_cast<Instruction>(Val: U.getUser());
2006	if (!UI \|\| !CurLoop->contains(Inst: UI))
2007	continue;
2008
2009	// If there is an non-load/store instruction in the loop, we can't promote
2010	// it.
2011	if (LoadInst *Load = dyn_cast<LoadInst>(Val: UI)) {
2012	if (!Load->isUnordered())
2013	return false;
2014
2015	SawUnorderedAtomic \|= Load->isAtomic();
2016	SawNotAtomic \|= !Load->isAtomic();
2017	FoundLoadToPromote = true;
2018
2019	Align InstAlignment = Load->getAlign();
2020
2021	if (!LoadIsGuaranteedToExecute)
2022	LoadIsGuaranteedToExecute =
2023	SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2024
2025	// Note that proving a load safe to speculate requires proving
2026	// sufficient alignment at the target location. Proving it guaranteed
2027	// to execute does as well. Thus we can increase our guaranteed
2028	// alignment as well.
2029	if (!DereferenceableInPH \|\| (InstAlignment > Alignment))
2030	if (isSafeToExecuteUnconditionally(
2031	Inst&: *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2032	CtxI: Preheader->getTerminator(), AC, AllowSpeculation)) {
2033	DereferenceableInPH = true;
2034	Alignment = std::max(a: Alignment, b: InstAlignment);
2035	}
2036	} else if (const StoreInst *Store = dyn_cast<StoreInst>(Val: UI)) {
2037	// Stores of* the pointer are not interesting, only stores to the*
2038	// pointer.
2039	if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2040	continue;
2041	if (!Store->isUnordered())
2042	return false;
2043
2044	SawUnorderedAtomic \|= Store->isAtomic();
2045	SawNotAtomic \|= !Store->isAtomic();
2046
2047	// If the store is guaranteed to execute, both properties are satisfied.
2048	// We may want to check if a store is guaranteed to execute even if we
2049	// already know that promotion is safe, since it may have higher
2050	// alignment than any other guaranteed stores, in which case we can
2051	// raise the alignment on the promoted store.
2052	Align InstAlignment = Store->getAlign();
2053	bool GuaranteedToExecute =
2054	SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2055	StoreIsGuanteedToExecute \|= GuaranteedToExecute;
2056	if (GuaranteedToExecute) {
2057	DereferenceableInPH = true;
2058	if (StoreSafety == StoreSafetyUnknown)
2059	StoreSafety = StoreSafe;
2060	Alignment = std::max(a: Alignment, b: InstAlignment);
2061	}
2062
2063	// If a store dominates all exit blocks, it is safe to sink.
2064	// As explained above, if an exit block was executed, a dominating
2065	// store must have been executed at least once, so we are not
2066	// introducing stores on paths that did not have them.
2067	// Note that this only looks at explicit exit blocks. If we ever
2068	// start sinking stores into unwind edges (see above), this will break.
2069	if (StoreSafety == StoreSafetyUnknown &&
2070	llvm::all_of(Range&: ExitBlocks, P: [&](BasicBlock *Exit) {
2071	return DT->dominates(A: Store->getParent(), B: Exit);
2072	}))
2073	StoreSafety = StoreSafe;
2074
2075	// If the store is not guaranteed to execute, we may still get
2076	// deref info through it.
2077	if (!DereferenceableInPH) {
2078	DereferenceableInPH = isDereferenceableAndAlignedPointer(
2079	V: Store->getPointerOperand(), Ty: Store->getValueOperand()->getType(),
2080	Alignment: Store->getAlign(), DL: MDL, CtxI: Preheader->getTerminator(), AC, DT, TLI);
2081	}
2082	} else
2083	continue; // Not a load or store.
2084
2085	if (!AccessTy)
2086	AccessTy = getLoadStoreType(I: UI);
2087	else if (AccessTy != getLoadStoreType(I: UI))
2088	return false;
2089
2090	// Merge the AA tags.
2091	if (LoopUses.empty()) {
2092	// On the first load/store, just take its AA tags.
2093	AATags = UI->getAAMetadata();
2094	} else if (AATags) {
2095	AATags = AATags.merge(Other: UI->getAAMetadata());
2096	}
2097
2098	LoopUses.push_back(Elt: UI);
2099	}
2100	}
2101
2102	// If we found both an unordered atomic instruction and a non-atomic memory
2103	// access, bail. We can't blindly promote non-atomic to atomic since we
2104	// might not be able to lower the result. We can't downgrade since that
2105	// would violate memory model. Also, align 0 is an error for atomics.
2106	if (SawUnorderedAtomic && SawNotAtomic)
2107	return false;
2108
2109	// If we're inserting an atomic load in the preheader, we must be able to
2110	// lower it. We're only guaranteed to be able to lower naturally aligned
2111	// atomics.
2112	if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(Ty: AccessTy))
2113	return false;
2114
2115	// If we couldn't prove we can hoist the load, bail.
2116	if (!DereferenceableInPH) {
2117	LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2118	return false;
2119	}
2120
2121	// We know we can hoist the load, but don't have a guaranteed store.
2122	// Check whether the location is writable and thread-local. If it is, then we
2123	// can insert stores along paths which originally didn't have them without
2124	// violating the memory model.
2125	if (StoreSafety == StoreSafetyUnknown) {
2126	Value *Object = getUnderlyingObject(V: SomePtr);
2127	bool ExplicitlyDereferenceableOnly;
2128	if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
2129	(!ExplicitlyDereferenceableOnly \|\|
2130	isDereferenceablePointer(V: SomePtr, Ty: AccessTy, DL: MDL)) &&
2131	isThreadLocalObject(Object, L: CurLoop, DT, TTI))
2132	StoreSafety = StoreSafe;
2133	}
2134
2135	// If we've still failed to prove we can sink the store, hoist the load
2136	// only, if possible.
2137	if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2138	// If we cannot hoist the load either, give up.
2139	return false;
2140
2141	// Lets do the promotion!
2142	if (StoreSafety == StoreSafe) {
2143	LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2144	<< `'\n'`);
2145	++NumLoadStorePromoted;
2146	} else {
2147	LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2148	<< `'\n'`);
2149	++NumLoadPromoted;
2150	}
2151
2152	ORE->emit(RemarkBuilder: [&]() {
2153	return OptimizationRemark (DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2154	LoopUses [`0`])
2155	<< "Moving accesses to memory location out of the loop";
2156	});
2157
2158	// Look at all the loop uses, and try to merge their locations.
2159	std::vector<DebugLoc> LoopUsesLocs;
2160	for (auto U : LoopUses)
2161	LoopUsesLocs.push_back(x: U->getDebugLoc());
2162	auto DL = DebugLoc::getMergedLocations(Locs: LoopUsesLocs);
2163
2164	// We use the SSAUpdater interface to insert phi nodes as required.
2165	SmallVector<PHINode *, `16`> NewPHIs;
2166	SSAUpdater SSA(&NewPHIs);
2167	LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2168	MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2169	SawUnorderedAtomic,
2170	StoreIsGuanteedToExecute ? AATags : AAMDNodes (),
2171	*SafetyInfo, StoreSafety == StoreSafe);
2172
2173	// Set up the preheader to have a definition of the value. It is the live-out
2174	// value from the preheader that uses in the loop will use.
2175	LoadInst PreheaderLoad = nullptr*;
2176	if (FoundLoadToPromote \|\| !StoreIsGuanteedToExecute) {
2177	PreheaderLoad =
2178	new LoadInst (AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2179	Preheader->getTerminator()->getIterator());
2180	if (SawUnorderedAtomic)
2181	PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2182	PreheaderLoad->setAlignment(Alignment);
2183	PreheaderLoad->setDebugLoc(DebugLoc::getDropped());
2184	if (AATags && LoadIsGuaranteedToExecute)
2185	PreheaderLoad->setAAMetadata(AATags);
2186
2187	MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2188	I: PreheaderLoad, Definition: nullptr, BB: PreheaderLoad->getParent(), Point: MemorySSA::End);
2189	MemoryUse *NewMemUse = cast<MemoryUse>(Val: PreheaderLoadMemoryAccess);
2190	MSSAU.insertUse(Use: NewMemUse, /RenameUses=/true);
2191	SSA.AddAvailableValue(BB: Preheader, V: PreheaderLoad);
2192	} else {
2193	SSA.AddAvailableValue(BB: Preheader, V: PoisonValue::get(T: AccessTy));
2194	}
2195
2196	if (VerifyMemorySSA)
2197	MSSAU.getMemorySSA()->verifyMemorySSA();
2198	// Rewrite all the loads in the loop and remember all the definitions from
2199	// stores in the loop.
2200	Promoter.run(Insts: LoopUses);
2201
2202	if (VerifyMemorySSA)
2203	MSSAU.getMemorySSA()->verifyMemorySSA();
2204	// If the SSAUpdater didn't use the load in the preheader, just zap it now.
2205	if (PreheaderLoad && PreheaderLoad->use_empty())
2206	eraseInstruction(I&: PreheaderLoad, SafetyInfo&: SafetyInfo, MSSAU);
2207
2208	return true;
2209	}
2210
2211	static void foreachMemoryAccess(MemorySSA MSSA, Loop L,
2212	function_ref<void(Instruction *)> Fn) {
2213	for (const BasicBlock *BB : L->blocks())
2214	if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2215	for (const auto &Access : *Accesses)
2216	if (const auto *MUD = dyn_cast<MemoryUseOrDef>(Val: &Access))
2217	Fn (MUD->getMemoryInst());
2218	}
2219
2220	// The bool indicates whether there might be reads outside the set, in which
2221	// case only loads may be promoted.
2222	static SmallVector<PointersAndHasReadsOutsideSet, `0`>
2223	collectPromotionCandidates(MemorySSA MSSA, AliasAnalysis AA, Loop *L) {
2224	BatchAAResults BatchAA(*AA);
2225	AliasSetTracker AST(BatchAA);
2226
2227	auto IsPotentiallyPromotable = [L](const Instruction *I) {
2228	if (const auto *SI = dyn_cast<StoreInst>(Val: I)) {
2229	const Value *PtrOp = SI->getPointerOperand();
2230	return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2231	}
2232	if (const auto *LI = dyn_cast<LoadInst>(Val: I)) {
2233	const Value *PtrOp = LI->getPointerOperand();
2234	return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2235	}
2236	return false;
2237	};
2238
2239	// Populate AST with potentially promotable accesses.
2240	SmallPtrSet<Value *, `16`> AttemptingPromotion;
2241	foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2242	if (IsPotentiallyPromotable (I)) {
2243	AttemptingPromotion.insert(Ptr: I);
2244	AST.add(I);
2245	}
2246	});
2247
2248	// We're only interested in must-alias sets that contain a mod.
2249	SmallVector<PointerIntPair<const AliasSet , `1`, bool*>, `8`> Sets;
2250	for (AliasSet &AS : AST)
2251	if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2252	Sets.push_back(Elt: {&AS, false});
2253
2254	if (Sets.empty())
2255	return {}; // Nothing to promote...
2256
2257	// Discard any sets for which there is an aliasing non-promotable access.
2258	foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2259	if (AttemptingPromotion.contains(Ptr: I))
2260	return;
2261
2262	llvm::erase_if(C&: Sets, P: [&](PointerIntPair<const AliasSet , `1`, bool*> &Pair) {
2263	ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(Inst: I, AA&: BatchAA);
2264	// Cannot promote if there are writes outside the set.
2265	if (isModSet(MRI: MR))
2266	return true;
2267	if (isRefSet(MRI: MR)) {
2268	// Remember reads outside the set.
2269	Pair.setInt(true);
2270	// If this is a mod-only set and there are reads outside the set,
2271	// we will not be able to promote, so bail out early.
2272	return !Pair.getPointer()->isRef();
2273	}
2274	return false;
2275	});
2276	});
2277
2278	SmallVector<std::pair<SmallSetVector<Value , `8`>, bool*>, `0`> Result;
2279	for (auto [Set, HasReadsOutsideSet] : Sets) {
2280	SmallSetVector<Value *, `8`> PointerMustAliases;
2281	for (const auto &MemLoc : *Set)
2282	PointerMustAliases.insert(X: const_cast<Value *>(MemLoc.Ptr));
2283	Result.emplace_back(Args: std::move(PointerMustAliases), Args&: HasReadsOutsideSet);
2284	}
2285
2286	return Result;
2287	}
2288
2289	// For a given store instruction or writeonly call instruction, this function
2290	// checks that there are no read or writes that conflict with the memory
2291	// access in the instruction
2292	static bool noConflictingReadWrites(Instruction I, MemorySSA MSSA,
2293	AAResults AA, Loop CurLoop,
2294	SinkAndHoistLICMFlags &Flags) {
2295	assert(isa<CallInst>(I) \|\| isa<StoreInst>(I));
2296	// If there are more accesses than the Promotion cap, then give up as we're
2297	// not walking a list that long.
2298	if (Flags.tooManyMemoryAccesses())
2299	return false;
2300
2301	auto *IMD = MSSA->getMemoryAccess(I);
2302	BatchAAResults BAA(*AA);
2303	auto Source = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags, MA: IMD);
2304	// Make sure there are no clobbers inside the loop.
2305	if (!MSSA->isLiveOnEntryDef(MA: Source) && CurLoop->contains(BB: Source->getBlock()))
2306	return false;
2307
2308	// If there are interfering Uses (i.e. their defining access is in the
2309	// loop), or ordered loads (stored as Defs!), don't move this store.
2310	// Could do better here, but this is conservatively correct.
2311	// TODO: Cache set of Uses on the first walk in runOnLoop, update when
2312	// moving accesses. Can also extend to dominating uses.
2313	for (auto *BB : CurLoop->getBlocks()) {
2314	auto *Accesses = MSSA->getBlockAccesses(BB);
2315	if (!Accesses)
2316	continue;
2317	for (const auto &MA : *Accesses)
2318	if (const auto *MU = dyn_cast<MemoryUse>(Val: &MA)) {
2319	auto MD = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags,
2320	MA: const_cast<MemoryUse *>(MU));
2321	if (!MSSA->isLiveOnEntryDef(MA: MD) && CurLoop->contains(BB: MD->getBlock()))
2322	return false;
2323	// Disable hoisting past potentially interfering loads. Optimized
2324	// Uses may point to an access outside the loop, as getClobbering
2325	// checks the previous iteration when walking the backedge.
2326	// FIXME: More precise: no Uses that alias I.
2327	if (!Flags.getIsSink() && !MSSA->dominates(A: IMD, B: MU))
2328	return false;
2329	} else if (const auto *MD = dyn_cast<MemoryDef>(Val: &MA)) {
2330	if (auto *LI = dyn_cast<LoadInst>(Val: MD->getMemoryInst())) {
2331	(void)LI; // Silence warning.
2332	assert(!LI->isUnordered() && "Expected unordered load");
2333	return false;
2334	}
2335	// Any call, while it may not be clobbering I, it may be a use.
2336	if (auto *CI = dyn_cast<CallInst>(Val: MD->getMemoryInst())) {
2337	// Check if the call may read from the memory location written
2338	// to by I. Check CI's attributes and arguments; the number of
2339	// such checks performed is limited above by NoOfMemAccTooLarge.
2340	if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
2341	ModRefInfo MRI = BAA.getModRefInfo(I: CI, OptLoc: MemoryLocation::get(SI));
2342	if (isModOrRefSet(MRI))
2343	return false;
2344	} else {
2345	auto *SCI = cast<CallInst>(Val: I);
2346	// If the instruction we are wanting to hoist is also a call
2347	// instruction then we need not check mod/ref info with itself
2348	if (SCI == CI)
2349	continue;
2350	ModRefInfo MRI = BAA.getModRefInfo(I: CI, Call2: SCI);
2351	if (isModOrRefSet(MRI))
2352	return false;
2353	}
2354	}
2355	}
2356	}
2357	return true;
2358	}
2359
2360	static bool pointerInvalidatedByLoop(MemorySSA MSSA, MemoryUse MU,
2361	Loop *CurLoop, Instruction &I,
2362	SinkAndHoistLICMFlags &Flags,
2363	bool InvariantGroup) {
2364	// For hoisting, use the walker to determine safety
2365	if (!Flags.getIsSink()) {
2366	// If hoisting an invariant group, we only need to check that there
2367	// is no store to the loaded pointer between the start of the loop,
2368	// and the load (since all values must be the same).
2369
2370	// This can be checked in two conditions:
2371	// 1) if the memoryaccess is outside the loop
2372	// 2) the earliest access is at the loop header,
2373	// if the memory loaded is the phi node
2374
2375	BatchAAResults BAA(MSSA->getAA());
2376	MemoryAccess Source = getClobberingMemoryAccess(MSSA&: MSSA, BAA, Flags, MA: MU);
2377	return !MSSA->isLiveOnEntryDef(MA: Source) &&
2378	CurLoop->contains(BB: Source->getBlock()) &&
2379	!(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Val: Source));
2380	}
2381
2382	// For sinking, we'd need to check all Defs below this use. The getClobbering
2383	// call will look on the backedge of the loop, but will check aliasing with
2384	// the instructions on the previous iteration.
2385	// For example:
2386	// for (i ... )
2387	// load a[i] ( Use (LoE)
2388	// store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2389	// i++;
2390	// The load sees no clobbering inside the loop, as the backedge alias check
2391	// does phi translation, and will check aliasing against store a[i-1].
2392	// However sinking the load outside the loop, below the store is incorrect.
2393
2394	// For now, only sink if there are no Defs in the loop, and the existing ones
2395	// precede the use and are in the same block.
2396	// FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2397	// needs PostDominatorTreeAnalysis.
2398	// FIXME: More precise: no Defs that alias this Use.
2399	if (Flags.tooManyMemoryAccesses())
2400	return true;
2401	for (auto *BB : CurLoop->getBlocks())
2402	if (pointerInvalidatedByBlock(BB&: BB, MSSA&: MSSA, MU&: *MU))
2403	return true;
2404	// When sinking, the source block may not be part of the loop so check it.
2405	if (!CurLoop->contains(Inst: &I))
2406	return pointerInvalidatedByBlock(BB&: I.getParent(), MSSA&: MSSA, MU&: *MU);
2407
2408	return false;
2409	}
2410
2411	bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2412	if (const auto *Accesses = MSSA.getBlockDefs(BB: &BB))
2413	for (const auto &MA : *Accesses)
2414	if (const auto *MD = dyn_cast<MemoryDef>(Val: &MA))
2415	if (MU.getBlock() != MD->getBlock() \|\| !MSSA.locallyDominates(A: MD, B: &MU))
2416	return true;
2417	return false;
2418	}
2419
2420	/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2421	/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2422	/// minimun can be computed outside of loop, and X is not a loop-invariant.
2423	static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2424	MemorySSAUpdater &MSSAU) {
2425	bool Inverse = false;
2426	using namespace PatternMatch;
2427	Value Cond1, Cond2;
2428	if (match(V: &I, P: m_LogicalOr(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2429	Inverse = true;
2430	} else if (match(V: &I, P: m_LogicalAnd(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2431	// Do nothing
2432	} else
2433	return false;
2434
2435	auto MatchICmpAgainstInvariant = [&](Value C, CmpPredicate &P, Value &LHS,
2436	Value *&RHS) {
2437	if (!match(V: C, P: m_OneUse(SubPattern: m_ICmp(Pred&: P, L: m_Value(V&: LHS), R: m_Value(V&: RHS)))))
2438	return false;
2439	if (!LHS->getType()->isIntegerTy())
2440	return false;
2441	if (!ICmpInst::isRelational(P))
2442	return false;
2443	if (L.isLoopInvariant(V: LHS)) {
2444	std::swap(a&: LHS, b&: RHS);
2445	P = ICmpInst::getSwappedPredicate(pred: P);
2446	}
2447	if (L.isLoopInvariant(V: LHS) \|\| !L.isLoopInvariant(V: RHS))
2448	return false;
2449	if (Inverse)
2450	P = ICmpInst::getInversePredicate(pred: P);
2451	return true;
2452	};
2453	CmpPredicate P1, P2;
2454	Value LHS1, LHS2, RHS1, RHS2;
2455	if (!MatchICmpAgainstInvariant (Cond1, P1, LHS1, RHS1) \|\|
2456	!MatchICmpAgainstInvariant (Cond2, P2, LHS2, RHS2))
2457	return false;
2458	auto MatchingPred = CmpPredicate::getMatching(A: P1, B: P2);
2459	if (!MatchingPred \|\| LHS1 != LHS2)
2460	return false;
2461
2462	// Everything is fine, we can do the transform.
2463	bool UseMin = ICmpInst::isLT(P: MatchingPred) \|\| ICmpInst::isLE(P: MatchingPred);
2464	assert(
2465	(UseMin \|\| ICmpInst::isGT(*MatchingPred) \|\|
2466	ICmpInst::isGE(*MatchingPred)) &&
2467	"Relational predicate is either less (or equal) or greater (or equal)!");
2468	Intrinsic::ID id = ICmpInst::isSigned(predicate: *MatchingPred)
2469	? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2470	: (UseMin ? Intrinsic::umin : Intrinsic::umax);
2471	auto *Preheader = L.getLoopPreheader();
2472	assert(Preheader && "Loop is not in simplify form?");
2473	IRBuilder<> Builder(Preheader->getTerminator());
2474	// We are about to create a new guaranteed use for RHS2 which might not exist
2475	// before (if it was a non-taken input of logical and/or instruction). If it
2476	// was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2477	// introduced, so they don't need this.
2478	if (isa<SelectInst>(Val: I))
2479	RHS2 = Builder.CreateFreeze(V: RHS2, Name: RHS2->getName() + ".fr");
2480	Value *NewRHS = Builder.CreateBinaryIntrinsic(
2481	ID: id, LHS: RHS1, RHS: RHS2, FMFSource: nullptr,
2482	Name: StringRef ("invariant.") +
2483	(ICmpInst::isSigned(predicate: *MatchingPred) ? "s" : "u") +
2484	(UseMin ? "min" : "max"));
2485	Builder.SetInsertPoint(&I);
2486	ICmpInst::Predicate P = *MatchingPred;
2487	if (Inverse)
2488	P = ICmpInst::getInversePredicate(pred: P);
2489	Value *NewCond = Builder.CreateICmp(P, LHS: LHS1, RHS: NewRHS);
2490	NewCond->takeName(V: &I);
2491	I.replaceAllUsesWith(V: NewCond);
2492	eraseInstruction(I, SafetyInfo, MSSAU);
2493	eraseInstruction(I&: *cast<Instruction>(Val: Cond1), SafetyInfo, MSSAU);
2494	eraseInstruction(I&: *cast<Instruction>(Val: Cond2), SafetyInfo, MSSAU);
2495	return true;
2496	}
2497
2498	/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2499	/// this allows hoisting the inner GEP.
2500	static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2501	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2502	DominatorTree *DT) {
2503	auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I);
2504	if (!GEP)
2505	return false;
2506
2507	auto *Src = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
2508	if (!Src \|\| !Src->hasOneUse() \|\| !L.contains(Inst: Src))
2509	return false;
2510
2511	Value *SrcPtr = Src->getPointerOperand();
2512	auto LoopInvariant = [&](Value V) { return* L.isLoopInvariant(V); };
2513	if (!L.isLoopInvariant(V: SrcPtr) \|\| !all_of(Range: GEP->indices(), P: LoopInvariant))
2514	return false;
2515
2516	// This can only happen if !AllowSpeculation, otherwise this would already be
2517	// handled.
2518	// FIXME: Should we respect AllowSpeculation in these reassociation folds?
2519	// The flag exists to prevent metadata dropping, which is not relevant here.
2520	if (all_of(Range: Src->indices(), P: LoopInvariant))
2521	return false;
2522
2523	// The swapped GEPs are inbounds if both original GEPs are inbounds
2524	// and the sign of the offsets is the same. For simplicity, only
2525	// handle both offsets being non-negative.
2526	const DataLayout &DL = GEP->getDataLayout();
2527	auto NonNegative = [&](Value *V) {
2528	return isKnownNonNegative(V, SQ: SimplifyQuery (DL, DT, AC, GEP));
2529	};
2530	bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2531	all_of(Range: Src->indices(), P: NonNegative) &&
2532	all_of(Range: GEP->indices(), P: NonNegative);
2533
2534	BasicBlock *Preheader = L.getLoopPreheader();
2535	IRBuilder<> Builder(Preheader->getTerminator());
2536	Value *NewSrc = Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr: SrcPtr,
2537	IdxList: SmallVector<Value *>(GEP->indices()),
2538	Name: "invariant.gep", NW: IsInBounds);
2539	Builder.SetInsertPoint(GEP);
2540	Value *NewGEP = Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: NewSrc,
2541	IdxList: SmallVector<Value *>(Src->indices()), Name: "gep",
2542	NW: IsInBounds);
2543	GEP->replaceAllUsesWith(V: NewGEP);
2544	eraseInstruction(I&: *GEP, SafetyInfo, MSSAU);
2545	salvageDebugInfo(I&: *Src);
2546	eraseInstruction(I&: *Src, SafetyInfo, MSSAU);
2547	return true;
2548	}
2549
2550	/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2551	/// C1 and C2 are loop invariants and LV is a loop-variant.
2552	static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2553	Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2554	ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2555	AssumptionCache AC, DominatorTree DT) {
2556	assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2557	assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2558
2559	bool IsSigned = ICmpInst::isSigned(predicate: Pred);
2560
2561	// Try to represent VariantLHS as sum of invariant and variant operands.
2562	using namespace PatternMatch;
2563	Value VariantOp, InvariantOp;
2564	if (IsSigned &&
2565	!match(V: VariantLHS, P: m_NSWAdd(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2566	return false;
2567	if (!IsSigned &&
2568	!match(V: VariantLHS, P: m_NUWAdd(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2569	return false;
2570
2571	// LHS itself is a loop-variant, try to represent it in the form:
2572	// "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2573	if (L.isLoopInvariant(V: VariantOp))
2574	std::swap(a&: VariantOp, b&: InvariantOp);
2575	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2576	return false;
2577
2578	// In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2579	// freely move values from left side of inequality to right side (just as in
2580	// normal linear arithmetics). Overflows make things much more complicated, so
2581	// we want to avoid this.
2582	auto &DL = L.getHeader()->getDataLayout();
2583	SimplifyQuery SQ(DL, DT, AC, &ICmp);
2584	if (IsSigned && computeOverflowForSignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2585	llvm::OverflowResult::NeverOverflows)
2586	return false;
2587	if (!IsSigned &&
2588	computeOverflowForUnsignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2589	llvm::OverflowResult::NeverOverflows)
2590	return false;
2591	auto *Preheader = L.getLoopPreheader();
2592	assert(Preheader && "Loop is not in simplify form?");
2593	IRBuilder<> Builder(Preheader->getTerminator());
2594	Value *NewCmpOp =
2595	Builder.CreateSub(LHS: InvariantRHS, RHS: InvariantOp, Name: "invariant.op",
2596	/HasNUW/ !IsSigned, /HasNSW/ IsSigned);
2597	ICmp.setPredicate(Pred);
2598	ICmp.setOperand(i_nocapture: `0`, Val_nocapture: VariantOp);
2599	ICmp.setOperand(i_nocapture: `1`, Val_nocapture: NewCmpOp);
2600
2601	Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2602	salvageDebugInfo(I&: DeadI);
2603	eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2604	return true;
2605	}
2606
2607	/// Try to reassociate and hoist the following two patterns:
2608	/// LV - C1 < C2 --> LV < C1 + C2,
2609	/// C1 - LV < C2 --> LV > C1 - C2.
2610	static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2611	Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2612	ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2613	AssumptionCache AC, DominatorTree DT) {
2614	assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2615	assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2616
2617	bool IsSigned = ICmpInst::isSigned(predicate: Pred);
2618
2619	// Try to represent VariantLHS as sum of invariant and variant operands.
2620	using namespace PatternMatch;
2621	Value VariantOp, InvariantOp;
2622	if (IsSigned &&
2623	!match(V: VariantLHS, P: m_NSWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2624	return false;
2625	if (!IsSigned &&
2626	!match(V: VariantLHS, P: m_NUWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2627	return false;
2628
2629	bool VariantSubtracted = false;
2630	// LHS itself is a loop-variant, try to represent it in the form:
2631	// "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2632	// the variant operand goes with minus, we use a slightly different scheme.
2633	if (L.isLoopInvariant(V: VariantOp)) {
2634	std::swap(a&: VariantOp, b&: InvariantOp);
2635	VariantSubtracted = true;
2636	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2637	}
2638	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2639	return false;
2640
2641	// In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2642	// freely move values from left side of inequality to right side (just as in
2643	// normal linear arithmetics). Overflows make things much more complicated, so
2644	// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2645	// "C1 - C2" does not overflow.
2646	auto &DL = L.getHeader()->getDataLayout();
2647	SimplifyQuery SQ(DL, DT, AC, &ICmp);
2648	if (VariantSubtracted && IsSigned) {
2649	// C1 - LV < C2 --> LV > C1 - C2
2650	if (computeOverflowForSignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2651	llvm::OverflowResult::NeverOverflows)
2652	return false;
2653	} else if (VariantSubtracted && !IsSigned) {
2654	// C1 - LV < C2 --> LV > C1 - C2
2655	if (computeOverflowForUnsignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2656	llvm::OverflowResult::NeverOverflows)
2657	return false;
2658	} else if (!VariantSubtracted && IsSigned) {
2659	// LV - C1 < C2 --> LV < C1 + C2
2660	if (computeOverflowForSignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2661	llvm::OverflowResult::NeverOverflows)
2662	return false;
2663	} else { // !VariantSubtracted && !IsSigned
2664	// LV - C1 < C2 --> LV < C1 + C2
2665	if (computeOverflowForUnsignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2666	llvm::OverflowResult::NeverOverflows)
2667	return false;
2668	}
2669	auto *Preheader = L.getLoopPreheader();
2670	assert(Preheader && "Loop is not in simplify form?");
2671	IRBuilder<> Builder(Preheader->getTerminator());
2672	Value *NewCmpOp =
2673	VariantSubtracted
2674	? Builder.CreateSub(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2675	/HasNUW/ !IsSigned, /HasNSW/ IsSigned)
2676	: Builder.CreateAdd(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2677	/HasNUW/ !IsSigned, /HasNSW/ IsSigned);
2678	ICmp.setPredicate(Pred);
2679	ICmp.setOperand(i_nocapture: `0`, Val_nocapture: VariantOp);
2680	ICmp.setOperand(i_nocapture: `1`, Val_nocapture: NewCmpOp);
2681
2682	Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2683	salvageDebugInfo(I&: DeadI);
2684	eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2685	return true;
2686	}
2687
2688	/// Reassociate and hoist add/sub expressions.
2689	static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2690	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2691	DominatorTree *DT) {
2692	using namespace PatternMatch;
2693	CmpPredicate Pred;
2694	Value LHS, RHS;
2695	if (!match(V: &I, P: m_ICmp(Pred, L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
2696	return false;
2697
2698	// Put variant operand to LHS position.
2699	if (L.isLoopInvariant(V: LHS)) {
2700	std::swap(a&: LHS, b&: RHS);
2701	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2702	}
2703	// We want to delete the initial operation after reassociation, so only do it
2704	// if it has no other uses.
2705	if (L.isLoopInvariant(V: LHS) \|\| !L.isLoopInvariant(V: RHS) \|\| !LHS->hasOneUse())
2706	return false;
2707
2708	// TODO: We could go with smarter context, taking common dominator of all I's
2709	// users instead of I itself.
2710	if (hoistAdd(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2711	return true;
2712
2713	if (hoistSub(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2714	return true;
2715
2716	return false;
2717	}
2718
2719	static bool isReassociableOp(Instruction I, unsigned* IntOpcode,
2720	unsigned FPOpcode) {
2721	if (I->getOpcode() == IntOpcode)
2722	return true;
2723	if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
2724	I->hasNoSignedZeros())
2725	return true;
2726	return false;
2727	}
2728
2729	/// Try to reassociate expressions like ((A1 B1) + (A2 * B2) + ...) * C where*
2730	/// A1, A2, ... and C are loop invariants into expressions like
2731	/// ((A1 C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...*
2732	/// invariant expressions. This functions returns true only if any hoisting has
2733	/// actually occured.
2734	static bool hoistMulAddAssociation(Instruction &I, Loop &L,
2735	ICFLoopSafetyInfo &SafetyInfo,
2736	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2737	DominatorTree *DT) {
2738	if (!isReassociableOp(I: &I, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul))
2739	return false;
2740	Value *VariantOp = I.getOperand(i: `0`);
2741	Value *InvariantOp = I.getOperand(i: `1`);
2742	if (L.isLoopInvariant(V: VariantOp))
2743	std::swap(a&: VariantOp, b&: InvariantOp);
2744	if (L.isLoopInvariant(V: VariantOp) \|\| !L.isLoopInvariant(V: InvariantOp))
2745	return false;
2746	Value *Factor = InvariantOp;
2747
2748	// First, we need to make sure we should do the transformation.
2749	SmallVector<Use *> Changes;
2750	SmallVector<BinaryOperator *> Adds;
2751	SmallVector<BinaryOperator *> Worklist;
2752	if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(Val: VariantOp))
2753	Worklist.push_back(Elt: VariantBinOp);
2754	while (!Worklist.empty()) {
2755	BinaryOperator *BO = Worklist.pop_back_val();
2756	if (!BO->hasOneUse())
2757	return false;
2758	if (isReassociableOp(I: BO, IntOpcode: Instruction::Add, FPOpcode: Instruction::FAdd) &&
2759	isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: `0`)) &&
2760	isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: `1`))) {
2761	Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: `0`)));
2762	Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: `1`)));
2763	Adds.push_back(Elt: BO);
2764	continue;
2765	}
2766	if (!isReassociableOp(I: BO, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul) \|\|
2767	L.isLoopInvariant(V: BO))
2768	return false;
2769	Use &U0 = BO->getOperandUse(i: `0`);
2770	Use &U1 = BO->getOperandUse(i: `1`);
2771	if (L.isLoopInvariant(V: U0))
2772	Changes.push_back(Elt: &U0);
2773	else if (L.isLoopInvariant(V: U1))
2774	Changes.push_back(Elt: &U1);
2775	else
2776	return false;
2777	unsigned Limit = I.getType()->isIntOrIntVectorTy()
2778	? IntAssociationUpperLimit
2779	: FPAssociationUpperLimit;
2780	if (Changes.size() > Limit)
2781	return false;
2782	}
2783	if (Changes.empty())
2784	return false;
2785
2786	// Drop the poison flags for any adds we looked through.
2787	if (I.getType()->isIntOrIntVectorTy()) {
2788	for (auto *Add : Adds)
2789	Add->dropPoisonGeneratingFlags();
2790	}
2791
2792	// We know we should do it so let's do the transformation.
2793	auto *Preheader = L.getLoopPreheader();
2794	assert(Preheader && "Loop is not in simplify form?");
2795	IRBuilder<> Builder(Preheader->getTerminator());
2796	for (auto *U : Changes) {
2797	assert(L.isLoopInvariant(U->get()));
2798	auto *Ins = cast<BinaryOperator>(Val: U->getUser());
2799	Value *Mul;
2800	if (I.getType()->isIntOrIntVectorTy()) {
2801	Mul = Builder.CreateMul(LHS: U->get(), RHS: Factor, Name: "factor.op.mul");
2802	// Drop the poison flags on the original multiply.
2803	Ins->dropPoisonGeneratingFlags();
2804	} else
2805	Mul = Builder.CreateFMulFMF(L: U->get(), R: Factor, FMFSource: Ins, Name: "factor.op.fmul");
2806
2807	// Rewrite the reassociable instruction.
2808	unsigned OpIdx = U->getOperandNo();
2809	auto *LHS = OpIdx == `0` ? Mul : Ins->getOperand(i_nocapture: `0`);
2810	auto *RHS = OpIdx == `1` ? Mul : Ins->getOperand(i_nocapture: `1`);
2811	auto *NewBO =
2812	BinaryOperator::Create(Op: Ins->getOpcode(), S1: LHS, S2: RHS,
2813	Name: Ins->getName() + ".reass", InsertBefore: Ins->getIterator());
2814	NewBO->setDebugLoc(DebugLoc::getDropped());
2815	NewBO->copyIRFlags(V: Ins);
2816	if (VariantOp == Ins)
2817	VariantOp = NewBO;
2818	Ins->replaceAllUsesWith(V: NewBO);
2819	eraseInstruction(I&: *Ins, SafetyInfo, MSSAU);
2820	}
2821
2822	I.replaceAllUsesWith(V: VariantOp);
2823	eraseInstruction(I, SafetyInfo, MSSAU);
2824	return true;
2825	}
2826
2827	/// Reassociate associative binary expressions of the form
2828	///
2829	/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2830	/// 2. "(C1 op LV) op C2" ==> "LV op (C1 op C2)"
2831	/// 3. "C2 op (C1 op LV)" ==> "LV op (C1 op C2)"
2832	/// 4. "C2 op (LV op C1)" ==> "LV op (C1 op C2)"
2833	///
2834	/// where op is an associative BinOp, LV is a loop variant, and C1 and C2 are
2835	/// loop invariants that we want to hoist, noting that associativity implies
2836	/// commutativity.
2837	static bool hoistBOAssociation(Instruction &I, Loop &L,
2838	ICFLoopSafetyInfo &SafetyInfo,
2839	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2840	DominatorTree *DT) {
2841	auto *BO = dyn_cast<BinaryOperator>(Val: &I);
2842	if (!BO \|\| !BO->isAssociative())
2843	return false;
2844
2845	Instruction::BinaryOps Opcode = BO->getOpcode();
2846	bool LVInRHS = L.isLoopInvariant(V: BO->getOperand(i_nocapture: `0`));
2847	auto *BO0 = dyn_cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: LVInRHS));
2848	if (!BO0 \|\| BO0->getOpcode() != Opcode \|\| !BO0->isAssociative() \|\|
2849	BO0->hasNUsesOrMore(N: `3`))
2850	return false;
2851
2852	Value *LV = BO0->getOperand(i_nocapture: `0`);
2853	Value *C1 = BO0->getOperand(i_nocapture: `1`);
2854	Value *C2 = BO->getOperand(i_nocapture: !LVInRHS);
2855
2856	assert(BO->isCommutative() && BO0->isCommutative() &&
2857	"Associativity implies commutativity");
2858	if (L.isLoopInvariant(V: LV) && !L.isLoopInvariant(V: C1))
2859	std::swap(a&: LV, b&: C1);
2860	if (L.isLoopInvariant(V: LV) \|\| !L.isLoopInvariant(V: C1) \|\| !L.isLoopInvariant(V: C2))
2861	return false;
2862
2863	auto *Preheader = L.getLoopPreheader();
2864	assert(Preheader && "Loop is not in simplify form?");
2865
2866	IRBuilder<> Builder(Preheader->getTerminator());
2867	auto *Inv = Builder.CreateBinOp(Opc: Opcode, LHS: C1, RHS: C2, Name: "invariant.op");
2868
2869	auto *NewBO = BinaryOperator::Create(
2870	Op: Opcode, S1: LV, S2: Inv, Name: BO->getName() + ".reass", InsertBefore: BO->getIterator());
2871	NewBO->setDebugLoc(DebugLoc::getDropped());
2872
2873	if (Opcode == Instruction::FAdd \|\| Opcode == Instruction::FMul) {
2874	// Intersect FMF flags for FADD and FMUL.
2875	FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
2876	if (auto *I = dyn_cast<Instruction>(Val: Inv))
2877	I->setFastMathFlags(Intersect);
2878	NewBO->setFastMathFlags(Intersect);
2879	} else {
2880	OverflowTracking Flags;
2881	Flags.AllKnownNonNegative = false;
2882	Flags.AllKnownNonZero = false;
2883	Flags.mergeFlags(I&: *BO);
2884	Flags.mergeFlags(I&: *BO0);
2885	// If `Inv` was not constant-folded, a new Instruction has been created.
2886	if (auto *I = dyn_cast<Instruction>(Val: Inv))
2887	Flags.applyFlags(I&: *I);
2888	Flags.applyFlags(I&: *NewBO);
2889	}
2890
2891	BO->replaceAllUsesWith(V: NewBO);
2892	eraseInstruction(I&: *BO, SafetyInfo, MSSAU);
2893
2894	// (LV op C1) might not be erased if it has more uses than the one we just
2895	// replaced.
2896	if (BO0->use_empty()) {
2897	salvageDebugInfo(I&: *BO0);
2898	eraseInstruction(I&: *BO0, SafetyInfo, MSSAU);
2899	}
2900
2901	return true;
2902	}
2903
2904	static bool hoistArithmetics(Instruction &I, Loop &L,
2905	ICFLoopSafetyInfo &SafetyInfo,
2906	MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2907	DominatorTree *DT) {
2908	// Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2909	// into (x < min(INV1, INV2)), and hoisting the invariant part of this
2910	// expression out of the loop.
2911	if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2912	++NumHoisted;
2913	++NumMinMaxHoisted;
2914	return true;
2915	}
2916
2917	// Try to hoist GEPs by reassociation.
2918	if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2919	++NumHoisted;
2920	++NumGEPsHoisted;
2921	return true;
2922	}
2923
2924	// Try to hoist add/sub's by reassociation.
2925	if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2926	++NumHoisted;
2927	++NumAddSubHoisted;
2928	return true;
2929	}
2930
2931	bool IsInt = I.getType()->isIntOrIntVectorTy();
2932	if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2933	++NumHoisted;
2934	if (IsInt)
2935	++NumIntAssociationsHoisted;
2936	else
2937	++NumFPAssociationsHoisted;
2938	return true;
2939	}
2940
2941	if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2942	++NumHoisted;
2943	++NumBOAssociationsHoisted;
2944	return true;
2945	}
2946
2947	return false;
2948	}
2949
2950	/// Little predicate that returns true if the specified basic block is in
2951	/// a subloop of the current one, not the current one itself.
2952	///
2953	static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI) {
2954	assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2955	return LI->getLoopFor(BB) != CurLoop;
2956	}
2957

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LICM.cpp