LoopUtils.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/LoopUtils.cpp]

1	//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines common loop utility functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Transforms/Utils/LoopUtils.h"
14	#include "llvm/ADT/DenseSet.h"
15	#include "llvm/ADT/PriorityWorklist.h"
16	#include "llvm/ADT/ScopeExit.h"
17	#include "llvm/ADT/SetVector.h"
18	#include "llvm/ADT/SmallPtrSet.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/Analysis/AliasAnalysis.h"
21	#include "llvm/Analysis/BasicAliasAnalysis.h"
22	#include "llvm/Analysis/DomTreeUpdater.h"
23	#include "llvm/Analysis/GlobalsModRef.h"
24	#include "llvm/Analysis/InstSimplifyFolder.h"
25	#include "llvm/Analysis/LoopAccessAnalysis.h"
26	#include "llvm/Analysis/LoopInfo.h"
27	#include "llvm/Analysis/LoopPass.h"
28	#include "llvm/Analysis/MemorySSA.h"
29	#include "llvm/Analysis/MemorySSAUpdater.h"
30	#include "llvm/Analysis/ScalarEvolution.h"
31	#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
32	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
33	#include "llvm/IR/DIBuilder.h"
34	#include "llvm/IR/Dominators.h"
35	#include "llvm/IR/Instructions.h"
36	#include "llvm/IR/IntrinsicInst.h"
37	#include "llvm/IR/MDBuilder.h"
38	#include "llvm/IR/Module.h"
39	#include "llvm/IR/PatternMatch.h"
40	#include "llvm/IR/ProfDataUtils.h"
41	#include "llvm/IR/ValueHandle.h"
42	#include "llvm/InitializePasses.h"
43	#include "llvm/Pass.h"
44	#include "llvm/Support/Debug.h"
45	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46	#include "llvm/Transforms/Utils/Local.h"
47	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
48
49	using namespace llvm;
50	using namespace llvm::PatternMatch;
51
52	#define DEBUG_TYPE "loop-utils"
53
54	static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55	static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
57	bool llvm::formDedicatedExitBlocks(Loop L, DominatorTree DT, LoopInfo *LI,
58	MemorySSAUpdater *MSSAU,
59	bool PreserveLCSSA) {
60	bool Changed = false;
61
62	// We re-use a vector for the in-loop predecesosrs.
63	SmallVector<BasicBlock *, `4`> InLoopPredecessors;
64
65	auto RewriteExit = [&](BasicBlock *BB) {
66	assert(InLoopPredecessors.empty() &&
67	"Must start with an empty predecessors list!");
68	auto Cleanup = make_scope_exit(F: [&] { InLoopPredecessors.clear(); });
69
70	// See if there are any non-loop predecessors of this exit block and
71	// keep track of the in-loop predecessors.
72	bool IsDedicatedExit = true;
73	for (auto *PredBB : predecessors(BB))
74	if (L->contains(BB: PredBB)) {
75	if (isa<IndirectBrInst>(Val: PredBB->getTerminator()))
76	// We cannot rewrite exiting edges from an indirectbr.
77	return false;
78
79	InLoopPredecessors.push_back(Elt: PredBB);
80	} else {
81	IsDedicatedExit = false;
82	}
83
84	assert(!InLoopPredecessors.empty() && "Must have some loop predecessor!");
85
86	// Nothing to do if this is already a dedicated exit.
87	if (IsDedicatedExit)
88	return false;
89
90	auto *NewExitBB = SplitBlockPredecessors(
91	BB, Preds: InLoopPredecessors, Suffix: ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93	if (!NewExitBB)
94	LLVM_DEBUG(
95	dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96	<< *L << "\n");
97	else
98	LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99	<< NewExitBB->getName() << "\n");
100	return true;
101	};
102
103	// Walk the exit blocks directly rather than building up a data structure for
104	// them, but only visit each one once.
105	SmallPtrSet<BasicBlock *, `4`> Visited;
106	for (auto *BB : L->blocks())
107	for (auto *SuccBB : successors(BB)) {
108	// We're looking for exit blocks so skip in-loop successors.
109	if (L->contains(BB: SuccBB))
110	continue;
111
112	// Visit each exit block exactly once.
113	if (!Visited.insert(Ptr: SuccBB).second)
114	continue;
115
116	Changed \|= RewriteExit (SuccBB);
117	}
118
119	return Changed;
120	}
121
122	/// Returns the instructions that use values defined in the loop.
123	SmallVector<Instruction , `8`> llvm::findDefsUsedOutsideOfLoop(Loop L) {
124	SmallVector<Instruction *, `8`> UsedOutside;
125
126	for (auto *Block : L->getBlocks())
127	// FIXME: I believe that this could use copy_if if the Inst reference could
128	// be adapted into a pointer.
129	for (auto &Inst : *Block) {
130	auto Users = Inst.users();
131	if (any_of(Range&: Users, P: [&](User *U) {
132	auto *Use = cast<Instruction>(Val: U);
133	return !L->contains(BB: Use->getParent());
134	}))
135	UsedOutside.push_back(Elt: &Inst);
136	}
137
138	return UsedOutside;
139	}
140
141	void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
142	// By definition, all loop passes need the LoopInfo analysis and the
143	// Dominator tree it depends on. Because they all participate in the loop
144	// pass manager, they must also preserve these.
145	AU.addRequired<DominatorTreeWrapperPass>();
146	AU.addPreserved<DominatorTreeWrapperPass>();
147	AU.addRequired<LoopInfoWrapperPass>();
148	AU.addPreserved<LoopInfoWrapperPass>();
149
150	// We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151	// here because users shouldn't directly get them from this header.
152	extern char &LoopSimplifyID;
153	extern char &LCSSAID;
154	AU.addRequiredID(ID&: LoopSimplifyID);
155	AU.addPreservedID(ID&: LoopSimplifyID);
156	AU.addRequiredID(ID&: LCSSAID);
157	AU.addPreservedID(ID&: LCSSAID);
158	// This is used in the LPPassManager to perform LCSSA verification on passes
159	// which preserve lcssa form
160	AU.addRequired<LCSSAVerificationPass>();
161	AU.addPreserved<LCSSAVerificationPass>();
162
163	// Loop passes are designed to run inside of a loop pass manager which means
164	// that any function analyses they require must be required by the first loop
165	// pass in the manager (so that it is computed before the loop pass manager
166	// runs) and preserved by all loop pasess in the manager. To make this
167	// reasonably robust, the set needed for most loop passes is maintained here.
168	// If your loop pass requires an analysis not listed here, you will need to
169	// carefully audit the loop pass manager nesting structure that results.
170	AU.addRequired<AAResultsWrapperPass>();
171	AU.addPreserved<AAResultsWrapperPass>();
172	AU.addPreserved<BasicAAWrapperPass>();
173	AU.addPreserved<GlobalsAAWrapperPass>();
174	AU.addPreserved<SCEVAAWrapperPass>();
175	AU.addRequired<ScalarEvolutionWrapperPass>();
176	AU.addPreserved<ScalarEvolutionWrapperPass>();
177	// FIXME: When all loop passes preserve MemorySSA, it can be required and
178	// preserved here instead of the individual handling in each pass.
179	}
180
181	/// Manually defined generic "LoopPass" dependency initialization. This is used
182	/// to initialize the exact set of passes from above in \c
183	/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184	/// with:
185	///
186	/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187	///
188	/// As-if "LoopPass" were a pass.
189	void llvm::initializeLoopPassPass(PassRegistry &Registry) {
190	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
191	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
192	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193	INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
194	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
195	INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
196	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
197	INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
198	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
199	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
200	}
201
202	/// Create MDNode for input string.
203	static MDNode createStringMetadata(Loop TheLoop, StringRef Name, unsigned V) {
204	LLVMContext &Context = TheLoop->getHeader()->getContext();
205	Metadata *MDs[] = {
206	MDString::get(Context, Str: Name),
207	ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V))};
208	return MDNode::get(Context, MDs);
209	}
210
211	/// Set input string into loop metadata by keeping other values intact.
212	/// If the string is already in loop metadata update value if it is
213	/// different.
214	void llvm::addStringMetadataToLoop(Loop TheLoop, const* char *StringMD,
215	unsigned V) {
216	SmallVector<Metadata *, `4`> MDs(`1`);
217	// If the loop already has metadata, retain it.
218	MDNode *LoopID = TheLoop->getLoopID();
219	if (LoopID) {
220	for (unsigned i = `1`, ie = LoopID->getNumOperands(); i < ie; ++i) {
221	MDNode *Node = cast<MDNode>(Val: LoopID->getOperand(I: i));
222	// If it is of form key = value, try to parse it.
223	if (Node->getNumOperands() == `2`) {
224	MDString *S = dyn_cast<MDString>(Val: Node->getOperand(I: `0`));
225	if (S && S->getString() == StringMD) {
226	ConstantInt *IntMD =
227	mdconst::extract_or_null<ConstantInt>(MD: Node->getOperand(I: `1`));
228	if (IntMD && IntMD->getSExtValue() == V)
229	// It is already in place. Do nothing.
230	return;
231	// We need to update the value, so just skip it here and it will
232	// be added after copying other existed nodes.
233	continue;
234	}
235	}
236	MDs.push_back(Elt: Node);
237	}
238	}
239	// Add new metadata.
240	MDs.push_back(Elt: createStringMetadata(TheLoop, Name: StringMD, V));
241	// Replace current metadata node with new one.
242	LLVMContext &Context = TheLoop->getHeader()->getContext();
243	MDNode *NewLoopID = MDNode::get(Context, MDs);
244	// Set operand 0 to refer to the loop id itself.
245	NewLoopID->replaceOperandWith(I: `0`, New: NewLoopID);
246	TheLoop->setLoopID(NewLoopID);
247	}
248
249	std::optional<ElementCount>
250	llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
251	std::optional<int> Width =
252	getOptionalIntLoopAttribute(TheLoop, Name: "llvm.loop.vectorize.width");
253
254	if (Width) {
255	std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256	TheLoop, Name: "llvm.loop.vectorize.scalable.enable");
257	return ElementCount::get(MinVal: Width, Scalable: IsScalable.value_or(u: false*));
258	}
259
260	return std::nullopt;
261	}
262
263	std::optional<MDNode *> llvm::makeFollowupLoopID(
264	MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265	const char InheritOptionsExceptPrefix, bool* AlwaysNew) {
266	if (!OrigLoopID) {
267	if (AlwaysNew)
268	return nullptr;
269	return std::nullopt;
270	}
271
272	assert(OrigLoopID->getOperand(`0`) == OrigLoopID);
273
274	bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275	bool InheritSomeAttrs =
276	InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[`0`] != `'\0'`;
277	SmallVector<Metadata *, `8`> MDs;
278	MDs.push_back(Elt: nullptr);
279
280	bool Changed = false;
281	if (InheritAllAttrs \|\| InheritSomeAttrs) {
282	for (const MDOperand &Existing : drop_begin(RangeOrContainer: OrigLoopID->operands())) {
283	MDNode *Op = cast<MDNode>(Val: Existing.get());
284
285	auto InheritThisAttribute = [InheritSomeAttrs,
286	InheritOptionsExceptPrefix](MDNode *Op) {
287	if (!InheritSomeAttrs)
288	return false;
289
290	// Skip malformatted attribute metadata nodes.
291	if (Op->getNumOperands() == `0`)
292	return true;
293	Metadata *NameMD = Op->getOperand(I: `0`).get();
294	if (!isa<MDString>(Val: NameMD))
295	return true;
296	StringRef AttrName = cast<MDString>(Val: NameMD)->getString();
297
298	// Do not inherit excluded attributes.
299	return !AttrName.starts_with(Prefix: InheritOptionsExceptPrefix);
300	};
301
302	if (InheritThisAttribute (Op))
303	MDs.push_back(Elt: Op);
304	else
305	Changed = true;
306	}
307	} else {
308	// Modified if we dropped at least one attribute.
309	Changed = OrigLoopID->getNumOperands() > `1`;
310	}
311
312	bool HasAnyFollowup = false;
313	for (StringRef OptionName : FollowupOptions) {
314	MDNode *FollowupNode = findOptionMDForLoopID(LoopID: OrigLoopID, Name: OptionName);
315	if (!FollowupNode)
316	continue;
317
318	HasAnyFollowup = true;
319	for (const MDOperand &Option : drop_begin(RangeOrContainer: FollowupNode->operands())) {
320	MDs.push_back(Elt: Option.get());
321	Changed = true;
322	}
323	}
324
325	// Attributes of the followup loop not specified explicity, so signal to the
326	// transformation pass to add suitable attributes.
327	if (!AlwaysNew && !HasAnyFollowup)
328	return std::nullopt;
329
330	// If no attributes were added or remove, the previous loop Id can be reused.
331	if (!AlwaysNew && !Changed)
332	return OrigLoopID;
333
334	// No attributes is equivalent to having no !llvm.loop metadata at all.
335	if (MDs.size() == `1`)
336	return nullptr;
337
338	// Build the new loop ID.
339	MDTuple *FollowupLoopID = MDNode::get(Context&: OrigLoopID->getContext(), MDs);
340	FollowupLoopID->replaceOperandWith(I: `0`, New: FollowupLoopID);
341	return FollowupLoopID;
342	}
343
344	bool llvm::hasDisableAllTransformsHint(const Loop *L) {
345	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableNonforced);
346	}
347
348	bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
349	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableLICM);
350	}
351
352	TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
353	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.disable"))
354	return TM_SuppressedByUser;
355
356	std::optional<int> Count =
357	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.count");
358	if (Count)
359	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
360
361	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.enable"))
362	return TM_ForcedByUser;
363
364	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.full"))
365	return TM_ForcedByUser;
366
367	if (hasDisableAllTransformsHint(L))
368	return TM_Disable;
369
370	return TM_Unspecified;
371	}
372
373	TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
374	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.disable"))
375	return TM_SuppressedByUser;
376
377	std::optional<int> Count =
378	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.count");
379	if (Count)
380	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
381
382	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.enable"))
383	return TM_ForcedByUser;
384
385	if (hasDisableAllTransformsHint(L))
386	return TM_Disable;
387
388	return TM_Unspecified;
389	}
390
391	TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
392	std::optional<bool> Enable =
393	getOptionalBoolLoopAttribute(TheLoop: L, Name: "llvm.loop.vectorize.enable");
394
395	if (Enable == false)
396	return TM_SuppressedByUser;
397
398	std::optional<ElementCount> VectorizeWidth =
399	getOptionalElementCountLoopAttribute(TheLoop: L);
400	std::optional<int> InterleaveCount =
401	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.interleave.count");
402
403	// 'Forcing' vector width and interleave count to one effectively disables
404	// this tranformation.
405	if (Enable == true && VectorizeWidth && VectorizeWidth ->isScalar() &&
406	InterleaveCount == `1`)
407	return TM_SuppressedByUser;
408
409	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.isvectorized"))
410	return TM_Disable;
411
412	if (Enable == true)
413	return TM_ForcedByUser;
414
415	if ((VectorizeWidth && VectorizeWidth ->isScalar()) && InterleaveCount == `1`)
416	return TM_Disable;
417
418	if ((VectorizeWidth && VectorizeWidth ->isVector()) \|\| InterleaveCount > `1`)
419	return TM_Enable;
420
421	if (hasDisableAllTransformsHint(L))
422	return TM_Disable;
423
424	return TM_Unspecified;
425	}
426
427	TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
428	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.distribute.enable"))
429	return TM_ForcedByUser;
430
431	if (hasDisableAllTransformsHint(L))
432	return TM_Disable;
433
434	return TM_Unspecified;
435	}
436
437	TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
438	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.licm_versioning.disable"))
439	return TM_SuppressedByUser;
440
441	if (hasDisableAllTransformsHint(L))
442	return TM_Disable;
443
444	return TM_Unspecified;
445	}
446
447	/// Does a BFS from a given node to all of its children inside a given loop.
448	/// The returned vector of nodes includes the starting point.
449	SmallVector<DomTreeNode *, `16`>
450	llvm::collectChildrenInLoop(DomTreeNode N, const* Loop *CurLoop) {
451	SmallVector<DomTreeNode *, `16`> Worklist;
452	auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453	// Only include subregions in the top level loop.
454	BasicBlock *BB = DTN->getBlock();
455	if (CurLoop->contains(BB))
456	Worklist.push_back(Elt: DTN);
457	};
458
459	AddRegionToWorklist (N);
460
461	for (size_t I = `0`; I < Worklist.size(); I++) {
462	for (DomTreeNode *Child : Worklist [I]->children())
463	AddRegionToWorklist (Child);
464	}
465
466	return Worklist;
467	}
468
469	bool llvm::isAlmostDeadIV(PHINode PN, BasicBlock LatchBlock, Value *Cond) {
470	int LatchIdx = PN->getBasicBlockIndex(BB: LatchBlock);
471	assert(LatchIdx != -`1` && "LatchBlock is not a case in this PHINode");
472	Value *IncV = PN->getIncomingValue(i: LatchIdx);
473
474	for (User *U : PN->users())
475	if (U != Cond && U != IncV) return false;
476
477	for (User *U : IncV->users())
478	if (U != Cond && U != PN) return false;
479	return true;
480	}
481
482
483	void llvm::deleteDeadLoop(Loop L, DominatorTree DT, ScalarEvolution *SE,
484	LoopInfo LI, MemorySSA MSSA) {
485	assert((!DT \|\| L->isLCSSAForm(*DT)) && "Expected LCSSA!");
486	auto *Preheader = L->getLoopPreheader();
487	assert(Preheader && "Preheader should exist!");
488
489	std::unique_ptr<MemorySSAUpdater> MSSAU;
490	if (MSSA)
491	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
492
493	// Now that we know the removal is safe, remove the loop by changing the
494	// branch from the preheader to go to the single exit block.
495	//
496	// Because we're deleting a large chunk of code at once, the sequence in which
497	// we remove things is very important to avoid invalidation issues.
498
499	// Tell ScalarEvolution that the loop is deleted. Do this before
500	// deleting the loop so that ScalarEvolution can look at the loop
501	// to determine what it needs to clean up.
502	if (SE) {
503	SE->forgetLoop(L);
504	SE->forgetBlockAndLoopDispositions();
505	}
506
507	Instruction *OldTerm = Preheader->getTerminator();
508	assert(!OldTerm->mayHaveSideEffects() &&
509	"Preheader must end with a side-effect-free terminator");
510	assert(OldTerm->getNumSuccessors() == `1` &&
511	"Preheader must have a single successor");
512	// Connect the preheader to the exit block. Keep the old edge to the header
513	// around to perform the dominator tree update in two separate steps
514	// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
515	// preheader -> header.
516	//
517	//
518	// 0. Preheader 1. Preheader 2. Preheader
519	// \| \| \| \|
520	// V \| V \|
521	// Header <--\ \| Header <--\ \| Header <--\
522	// \| \| \| \| \| \| \| \| \| \| \|
523	// \| V \| \| \| V \| \| \| V \|
524	// \| Body --/ \| \| Body --/ \| \| Body --/
525	// V V V V V
526	// Exit Exit Exit
527	//
528	// By doing this is two separate steps we can perform the dominator tree
529	// update without using the batch update API.
530	//
531	// Even when the loop is never executed, we cannot remove the edge from the
532	// source block to the exit block. Consider the case where the unexecuted loop
533	// branches back to an outer loop. If we deleted the loop and removed the edge
534	// coming to this inner loop, this will break the outer loop structure (by
535	// deleting the backedge of the outer loop). If the outer loop is indeed a
536	// non-loop, it will be deleted in a future iteration of loop deletion pass.
537	IRBuilder<> Builder(OldTerm);
538
539	auto *ExitBlock = L->getUniqueExitBlock();
540	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
541	if (ExitBlock) {
542	assert(ExitBlock && "Should have a unique exit block!");
543	assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
544
545	Builder.CreateCondBr(Cond: Builder.getFalse(), True: L->getHeader(), False: ExitBlock);
546	// Remove the old branch. The conditional branch becomes a new terminator.
547	OldTerm->eraseFromParent();
548
549	// Rewrite phis in the exit block to get their inputs from the Preheader
550	// instead of the exiting block.
551	for (PHINode &P : ExitBlock->phis()) {
552	// Set the zero'th element of Phi to be from the preheader and remove all
553	// other incoming values. Given the loop has dedicated exits, all other
554	// incoming values must be from the exiting blocks.
555	int PredIndex = `0`;
556	P.setIncomingBlock(i: PredIndex, BB: Preheader);
557	// Removes all incoming values from all other exiting blocks (including
558	// duplicate values from an exiting block).
559	// Nuke all entries except the zero'th entry which is the preheader entry.
560	P.removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != `0`; },
561	/ DeletePHIIfEmpty / false);
562
563	assert((P.getNumIncomingValues() == `1` &&
564	P.getIncomingBlock(PredIndex) == Preheader) &&
565	"Should have exactly one value and that's from the preheader!");
566	}
567
568	if (DT) {
569	DTU.applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}});
570	if (MSSA) {
571	MSSAU ->applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}},
572	DT&: *DT);
573	if (VerifyMemorySSA)
574	MSSA->verifyMemorySSA();
575	}
576	}
577
578	// Disconnect the loop body by branching directly to its exit.
579	Builder.SetInsertPoint(Preheader->getTerminator());
580	Builder.CreateBr(Dest: ExitBlock);
581	// Remove the old branch.
582	Preheader->getTerminator()->eraseFromParent();
583	} else {
584	assert(L->hasNoExitBlocks() &&
585	"Loop should have either zero or one exit blocks.");
586
587	Builder.SetInsertPoint(OldTerm);
588	Builder.CreateUnreachable();
589	Preheader->getTerminator()->eraseFromParent();
590	}
591
592	if (DT) {
593	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}});
594	if (MSSA) {
595	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}},
596	DT&: *DT);
597	SmallSetVector<BasicBlock *, `8`> DeadBlockSet(L->block_begin(),
598	L->block_end());
599	MSSAU ->removeBlocks(DeadBlocks: DeadBlockSet);
600	if (VerifyMemorySSA)
601	MSSA->verifyMemorySSA();
602	}
603	}
604
605	// Use a map to unique and a vector to guarantee deterministic ordering.
606	llvm::SmallDenseSet<DebugVariable, `4`> DeadDebugSet;
607	llvm::SmallVector<DbgVariableIntrinsic *, `4`> DeadDebugInst;
608	llvm::SmallVector<DbgVariableRecord *, `4`> DeadDbgVariableRecords;
609
610	if (ExitBlock) {
611	// Given LCSSA form is satisfied, we should not have users of instructions
612	// within the dead loop outside of the loop. However, LCSSA doesn't take
613	// unreachable uses into account. We handle them here.
614	// We could do it after drop all references (in this case all users in the
615	// loop will be already eliminated and we have less work to do but according
616	// to API doc of User::dropAllReferences only valid operation after dropping
617	// references, is deletion. So let's substitute all usages of
618	// instruction from the loop with poison value of corresponding type first.
619	for (auto *Block : L->blocks())
620	for (Instruction &I : *Block) {
621	auto *Poison = PoisonValue::get(T: I.getType());
622	for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
623	if (auto *Usr = dyn_cast<Instruction>(Val: U.getUser()))
624	if (L->contains(BB: Usr->getParent()))
625	continue;
626	// If we have a DT then we can check that uses outside a loop only in
627	// unreachable block.
628	if (DT)
629	assert(!DT->isReachableFromEntry(U) &&
630	"Unexpected user in reachable block");
631	U.set(Poison);
632	}
633
634	// RemoveDIs: do the same as below for DbgVariableRecords.
635	if (Block->IsNewDbgInfoFormat) {
636	for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
637	Range: filterDbgVars(R: I.getDbgRecordRange()))) {
638	DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
639	DVR.getDebugLoc().get());
640	if (!DeadDebugSet.insert(V: Key).second)
641	continue;
642	// Unlinks the DVR from it's container, for later insertion.
643	DVR.removeFromParent();
644	DeadDbgVariableRecords.push_back(Elt: &DVR);
645	}
646	}
647
648	// For one of each variable encountered, preserve a debug intrinsic (set
649	// to Poison) and transfer it to the loop exit. This terminates any
650	// variable locations that were set during the loop.
651	auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I);
652	if (!DVI)
653	continue;
654	if (!DeadDebugSet.insert(V: DebugVariable (DVI)).second)
655	continue;
656	DeadDebugInst.push_back(Elt: DVI);
657	}
658
659	// After the loop has been deleted all the values defined and modified
660	// inside the loop are going to be unavailable. Values computed in the
661	// loop will have been deleted, automatically causing their debug uses
662	// be be replaced with undef. Loop invariant values will still be available.
663	// Move dbg.values out the loop so that earlier location ranges are still
664	// terminated and loop invariant assignments are preserved.
665	DIBuilder DIB(*ExitBlock->getModule());
666	BasicBlock::iterator InsertDbgValueBefore =
667	ExitBlock->getFirstInsertionPt();
668	assert(InsertDbgValueBefore != ExitBlock->end() &&
669	"There should be a non-PHI instruction in exit block, else these "
670	"instructions will have no parent.");
671
672	for (auto *DVI : DeadDebugInst)
673	DVI->moveBefore(BB&: *ExitBlock, I: InsertDbgValueBefore);
674
675	// Due to the "head" bit in BasicBlock::iterator, we're going to insert
676	// each DbgVariableRecord right at the start of the block, wheras dbg.values
677	// would be repeatedly inserted before the first instruction. To replicate
678	// this behaviour, do it backwards.
679	for (DbgVariableRecord *DVR : llvm::reverse(C&: DeadDbgVariableRecords))
680	ExitBlock->insertDbgRecordBefore(DR: DVR, Here: InsertDbgValueBefore);
681	}
682
683	// Remove the block from the reference counting scheme, so that we can
684	// delete it freely later.
685	for (auto *Block : L->blocks())
686	Block->dropAllReferences();
687
688	if (MSSA && VerifyMemorySSA)
689	MSSA->verifyMemorySSA();
690
691	if (LI) {
692	// Erase the instructions and the blocks without having to worry
693	// about ordering because we already dropped the references.
694	// NOTE: This iteration is safe because erasing the block does not remove
695	// its entry from the loop's block list. We do that in the next section.
696	for (BasicBlock *BB : L->blocks())
697	BB->eraseFromParent();
698
699	// Finally, the blocks from loopinfo. This has to happen late because
700	// otherwise our loop iterators won't work.
701
702	SmallPtrSet<BasicBlock *, `8`> blocks;
703	blocks.insert(I: L->block_begin(), E: L->block_end());
704	for (BasicBlock *BB : blocks)
705	LI->removeBlock(BB);
706
707	// The last step is to update LoopInfo now that we've eliminated this loop.
708	// Note: LoopInfo::erase remove the given loop and relink its subloops with
709	// its parent. While removeLoop/removeChildLoop remove the given loop but
710	// not relink its subloops, which is what we want.
711	if (Loop *ParentLoop = L->getParentLoop()) {
712	Loop::iterator I = find(Range&: *ParentLoop, Val: L);
713	assert(I != ParentLoop->end() && "Couldn't find loop");
714	ParentLoop->removeChildLoop(I);
715	} else {
716	Loop::iterator I = find(Range&: *LI, Val: L);
717	assert(I != LI->end() && "Couldn't find loop");
718	LI->removeLoop(I);
719	}
720	LI->destroy(L);
721	}
722	}
723
724	void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
725	LoopInfo &LI, MemorySSA *MSSA) {
726	auto *Latch = L->getLoopLatch();
727	assert(Latch && "multiple latches not yet supported");
728	auto *Header = L->getHeader();
729	Loop *OutermostLoop = L->getOutermostLoop();
730
731	SE.forgetLoop(L);
732	SE.forgetBlockAndLoopDispositions();
733
734	std::unique_ptr<MemorySSAUpdater> MSSAU;
735	if (MSSA)
736	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
737
738	// Update the CFG and domtree. We chose to special case a couple of
739	// of common cases for code quality and test readability reasons.
740	[&]() -> void {
741	if (auto *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator())) {
742	if (!BI->isConditional()) {
743	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
744	(void)changeToUnreachable(I: BI, /PreserveLCSSA/ true, DTU: &DTU,
745	MSSAU: MSSAU.get());
746	return;
747	}
748
749	// Conditional latch/exit - note that latch can be shared by inner
750	// and outer loop so the other target doesn't need to an exit
751	if (L->isLoopExiting(BB: Latch)) {
752	// TODO: Generalize ConstantFoldTerminator so that it can be used
753	// here without invalidating LCSSA or MemorySSA. (Tricky case for
754	// LCSSA: header is an exit block of a preceeding sibling loop w/o
755	// dedicated exits.)
756	const unsigned ExitIdx = L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`;
757	BasicBlock *ExitBB = BI->getSuccessor(i: ExitIdx);
758
759	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
760	Header->removePredecessor(Pred: Latch, KeepOneInputPHIs: true);
761
762	IRBuilder<> Builder(BI);
763	auto *NewBI = Builder.CreateBr(Dest: ExitBB);
764	// Transfer the metadata to the new branch instruction (minus the
765	// loop info since this is no longer a loop)
766	NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_dbg,
767	LLVMContext::MD_annotation});
768
769	BI->eraseFromParent();
770	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}});
771	if (MSSA)
772	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}}, DT);
773	return;
774	}
775	}
776
777	// General case. By splitting the backedge, and then explicitly making it
778	// unreachable we gracefully handle corner cases such as switch and invoke
779	// termiantors.
780	auto *BackedgeBB = SplitEdge(From: Latch, To: Header, DT: &DT, LI: &LI, MSSAU: MSSAU.get());
781
782	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
783	(void)changeToUnreachable(I: BackedgeBB->getTerminator(),
784	/PreserveLCSSA/ true, DTU: &DTU, MSSAU: MSSAU.get());
785	}();
786
787	// Erase (and destroy) this loop instance. Handles relinking sub-loops
788	// and blocks within the loop as needed.
789	LI.erase(L);
790
791	// If the loop we broke had a parent, then changeToUnreachable might have
792	// caused a block to be removed from the parent loop (see loop_nest_lcssa
793	// test case in zero-btc.ll for an example), thus changing the parent's
794	// exit blocks. If that happened, we need to rebuild LCSSA on the outermost
795	// loop which might have a had a block removed.
796	if (OutermostLoop != L)
797	formLCSSARecursively(L&: *OutermostLoop, DT, LI: &LI, SE: &SE);
798	}
799
800
801	/// Checks if \p L has an exiting latch branch. There may also be other
802	/// exiting blocks. Returns branch instruction terminating the loop
803	/// latch if above check is successful, nullptr otherwise.
804	static BranchInst getExpectedExitLoopLatchBranch(Loop L) {
805	BasicBlock *Latch = L->getLoopLatch();
806	if (!Latch)
807	return nullptr;
808
809	BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
810	if (!LatchBR \|\| LatchBR->getNumSuccessors() != `2` \|\| !L->isLoopExiting(BB: Latch))
811	return nullptr;
812
813	assert((LatchBR->getSuccessor(`0`) == L->getHeader() \|\|
814	LatchBR->getSuccessor(`1`) == L->getHeader()) &&
815	"At least one edge out of the latch must go to the header");
816
817	return LatchBR;
818	}
819
820	/// Return the estimated trip count for any exiting branch which dominates
821	/// the loop latch.
822	static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
823	Loop *L,
824	uint64_t &OrigExitWeight) {
825	// To estimate the number of times the loop body was executed, we want to
826	// know the number of times the backedge was taken, vs. the number of times
827	// we exited the loop.
828	uint64_t LoopWeight, ExitWeight;
829	if (!extractBranchWeights(I: *ExitingBranch, TrueVal&: LoopWeight, FalseVal&: ExitWeight))
830	return std::nullopt;
831
832	if (L->contains(BB: ExitingBranch->getSuccessor(i: `1`)))
833	std::swap(a&: LoopWeight, b&: ExitWeight);
834
835	if (!ExitWeight)
836	// Don't have a way to return predicated infinite
837	return std::nullopt;
838
839	OrigExitWeight = ExitWeight;
840
841	// Estimated exit count is a ratio of the loop weight by the weight of the
842	// edge exiting the loop, rounded to nearest.
843	uint64_t ExitCount = llvm::divideNearest(Numerator: LoopWeight, Denominator: ExitWeight);
844	// Estimated trip count is one plus estimated exit count.
845	return ExitCount + `1`;
846	}
847
848	std::optional<unsigned>
849	llvm::getLoopEstimatedTripCount(Loop *L,
850	unsigned *EstimatedLoopInvocationWeight) {
851	// Currently we take the estimate exit count only from the loop latch,
852	// ignoring other exiting blocks. This can overestimate the trip count
853	// if we exit through another exit, but can never underestimate it.
854	// TODO: incorporate information from other exits
855	if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
856	uint64_t ExitWeight;
857	if (std::optional<uint64_t> EstTripCount =
858	getEstimatedTripCount(ExitingBranch: LatchBranch, L, OrigExitWeight&: ExitWeight)) {
859	if (EstimatedLoopInvocationWeight)
860	*EstimatedLoopInvocationWeight = ExitWeight;
861	return *EstTripCount;
862	}
863	}
864	return std::nullopt;
865	}
866
867	bool llvm::setLoopEstimatedTripCount(Loop L, unsigned* EstimatedTripCount,
868	unsigned EstimatedloopInvocationWeight) {
869	// At the moment, we currently support changing the estimate trip count of
870	// the latch branch only. We could extend this API to manipulate estimated
871	// trip counts for any exit.
872	BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
873	if (!LatchBranch)
874	return false;
875
876	// Calculate taken and exit weights.
877	unsigned LatchExitWeight = `0`;
878	unsigned BackedgeTakenWeight = `0`;
879
880	if (EstimatedTripCount > `0`) {
881	LatchExitWeight = EstimatedloopInvocationWeight;
882	BackedgeTakenWeight = (EstimatedTripCount - `1`) * LatchExitWeight;
883	}
884
885	// Make a swap if back edge is taken when condition is "false".
886	if (LatchBranch->getSuccessor(i: `0`) != L->getHeader())
887	std::swap(a&: BackedgeTakenWeight, b&: LatchExitWeight);
888
889	MDBuilder MDB(LatchBranch->getContext());
890
891	// Set/Update profile metadata.
892	LatchBranch->setMetadata(
893	KindID: LLVMContext::MD_prof,
894	Node: MDB.createBranchWeights(TrueWeight: BackedgeTakenWeight, FalseWeight: LatchExitWeight));
895
896	return true;
897	}
898
899	bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
900	ScalarEvolution &SE) {
901	Loop *OuterL = InnerLoop->getParentLoop();
902	if (!OuterL)
903	return true;
904
905	// Get the backedge taken count for the inner loop
906	BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
907	const SCEV *InnerLoopBECountSC = SE.getExitCount(L: InnerLoop, ExitingBlock: InnerLoopLatch);
908	if (isa<SCEVCouldNotCompute>(Val: InnerLoopBECountSC) \|\|
909	!InnerLoopBECountSC->getType()->isIntegerTy())
910	return false;
911
912	// Get whether count is invariant to the outer loop
913	ScalarEvolution::LoopDisposition LD =
914	SE.getLoopDisposition(S: InnerLoopBECountSC, L: OuterL);
915	if (LD != ScalarEvolution::LoopInvariant)
916	return false;
917
918	return true;
919	}
920
921	constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
922	switch (RK) {
923	default:
924	llvm_unreachable("Unexpected recurrence kind");
925	case RecurKind::Add:
926	return Intrinsic::vector_reduce_add;
927	case RecurKind::Mul:
928	return Intrinsic::vector_reduce_mul;
929	case RecurKind::And:
930	return Intrinsic::vector_reduce_and;
931	case RecurKind::Or:
932	return Intrinsic::vector_reduce_or;
933	case RecurKind::Xor:
934	return Intrinsic::vector_reduce_xor;
935	case RecurKind::FMulAdd:
936	case RecurKind::FAdd:
937	return Intrinsic::vector_reduce_fadd;
938	case RecurKind::FMul:
939	return Intrinsic::vector_reduce_fmul;
940	case RecurKind::SMax:
941	return Intrinsic::vector_reduce_smax;
942	case RecurKind::SMin:
943	return Intrinsic::vector_reduce_smin;
944	case RecurKind::UMax:
945	return Intrinsic::vector_reduce_umax;
946	case RecurKind::UMin:
947	return Intrinsic::vector_reduce_umin;
948	case RecurKind::FMax:
949	return Intrinsic::vector_reduce_fmax;
950	case RecurKind::FMin:
951	return Intrinsic::vector_reduce_fmin;
952	case RecurKind::FMaximum:
953	return Intrinsic::vector_reduce_fmaximum;
954	case RecurKind::FMinimum:
955	return Intrinsic::vector_reduce_fminimum;
956	}
957	}
958
959	unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {
960	switch (RdxID) {
961	case Intrinsic::vector_reduce_fadd:
962	return Instruction::FAdd;
963	case Intrinsic::vector_reduce_fmul:
964	return Instruction::FMul;
965	case Intrinsic::vector_reduce_add:
966	return Instruction::Add;
967	case Intrinsic::vector_reduce_mul:
968	return Instruction::Mul;
969	case Intrinsic::vector_reduce_and:
970	return Instruction::And;
971	case Intrinsic::vector_reduce_or:
972	return Instruction::Or;
973	case Intrinsic::vector_reduce_xor:
974	return Instruction::Xor;
975	case Intrinsic::vector_reduce_smax:
976	case Intrinsic::vector_reduce_smin:
977	case Intrinsic::vector_reduce_umax:
978	case Intrinsic::vector_reduce_umin:
979	return Instruction::ICmp;
980	case Intrinsic::vector_reduce_fmax:
981	case Intrinsic::vector_reduce_fmin:
982	return Instruction::FCmp;
983	default:
984	llvm_unreachable("Unexpected ID");
985	}
986	}
987
988	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) {
989	switch (RdxID) {
990	default:
991	llvm_unreachable("Unknown min/max recurrence kind");
992	case Intrinsic::vector_reduce_umin:
993	return Intrinsic::umin;
994	case Intrinsic::vector_reduce_umax:
995	return Intrinsic::umax;
996	case Intrinsic::vector_reduce_smin:
997	return Intrinsic::smin;
998	case Intrinsic::vector_reduce_smax:
999	return Intrinsic::smax;
1000	case Intrinsic::vector_reduce_fmin:
1001	return Intrinsic::minnum;
1002	case Intrinsic::vector_reduce_fmax:
1003	return Intrinsic::maxnum;
1004	case Intrinsic::vector_reduce_fminimum:
1005	return Intrinsic::minimum;
1006	case Intrinsic::vector_reduce_fmaximum:
1007	return Intrinsic::maximum;
1008	}
1009	}
1010
1011	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
1012	switch (RK) {
1013	default:
1014	llvm_unreachable("Unknown min/max recurrence kind");
1015	case RecurKind::UMin:
1016	return Intrinsic::umin;
1017	case RecurKind::UMax:
1018	return Intrinsic::umax;
1019	case RecurKind::SMin:
1020	return Intrinsic::smin;
1021	case RecurKind::SMax:
1022	return Intrinsic::smax;
1023	case RecurKind::FMin:
1024	return Intrinsic::minnum;
1025	case RecurKind::FMax:
1026	return Intrinsic::maxnum;
1027	case RecurKind::FMinimum:
1028	return Intrinsic::minimum;
1029	case RecurKind::FMaximum:
1030	return Intrinsic::maximum;
1031	}
1032	}
1033
1034	RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) {
1035	switch (RdxID) {
1036	case Intrinsic::vector_reduce_smax:
1037	return RecurKind::SMax;
1038	case Intrinsic::vector_reduce_smin:
1039	return RecurKind::SMin;
1040	case Intrinsic::vector_reduce_umax:
1041	return RecurKind::UMax;
1042	case Intrinsic::vector_reduce_umin:
1043	return RecurKind::UMin;
1044	case Intrinsic::vector_reduce_fmax:
1045	return RecurKind::FMax;
1046	case Intrinsic::vector_reduce_fmin:
1047	return RecurKind::FMin;
1048	default:
1049	return RecurKind::None;
1050	}
1051	}
1052
1053	CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
1054	switch (RK) {
1055	default:
1056	llvm_unreachable("Unknown min/max recurrence kind");
1057	case RecurKind::UMin:
1058	return CmpInst::ICMP_ULT;
1059	case RecurKind::UMax:
1060	return CmpInst::ICMP_UGT;
1061	case RecurKind::SMin:
1062	return CmpInst::ICMP_SLT;
1063	case RecurKind::SMax:
1064	return CmpInst::ICMP_SGT;
1065	case RecurKind::FMin:
1066	return CmpInst::FCMP_OLT;
1067	case RecurKind::FMax:
1068	return CmpInst::FCMP_OGT;
1069	// We do not add FMinimum/FMaximum recurrence kind here since there is no
1070	// equivalent predicate which compares signed zeroes according to the
1071	// semantics of the intrinsics (llvm.minimum/maximum).
1072	}
1073	}
1074
1075	Value llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value Left,
1076	Value *Right) {
1077	Type *Ty = Left->getType();
1078	if (Ty->isIntOrIntVectorTy() \|\|
1079	(RK == RecurKind::FMinimum \|\| RK == RecurKind::FMaximum)) {
1080	// TODO: Add float minnum/maxnum support when FMF nnan is set.
1081	Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
1082	return Builder.CreateIntrinsic(RetTy: Ty, ID: Id, Args: {Left, Right}, FMFSource: nullptr,
1083	Name: "rdx.minmax");
1084	}
1085	CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
1086	Value *Cmp = Builder.CreateCmp(Pred, LHS: Left, RHS: Right, Name: "rdx.minmax.cmp");
1087	Value *Select = Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.minmax.select");
1088	return Select;
1089	}
1090
1091	// Helper to generate an ordered reduction.
1092	Value llvm::getOrderedReduction(IRBuilderBase &Builder, Value Acc, Value *Src,
1093	unsigned Op, RecurKind RdxKind) {
1094	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1095
1096	// Extract and apply reduction ops in ascending order:
1097	// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1098	Value *Result = Acc;
1099	for (unsigned ExtractIdx = `0`; ExtractIdx != VF; ++ExtractIdx) {
1100	Value *Ext =
1101	Builder.CreateExtractElement(Vec: Src, Idx: Builder.getInt32(C: ExtractIdx));
1102
1103	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1104	Result = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: Result, RHS: Ext,
1105	Name: "bin.rdx");
1106	} else {
1107	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1108	"Invalid min/max");
1109	Result = createMinMaxOp(Builder, RK: RdxKind, Left: Result, Right: Ext);
1110	}
1111	}
1112
1113	return Result;
1114	}
1115
1116	// Helper to generate a log2 shuffle reduction.
1117	Value llvm::getShuffleReduction(IRBuilderBase &Builder, Value Src,
1118	unsigned Op,
1119	TargetTransformInfo::ReductionShuffle RS,
1120	RecurKind RdxKind) {
1121	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1122	// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1123	// and vector ops, reducing the set of values being computed by half each
1124	// round.
1125	assert(isPowerOf2_32(VF) &&
1126	"Reduction emission only supported for pow2 vectors!");
1127	// Note: fast-math-flags flags are controlled by the builder configuration
1128	// and are assumed to apply to all generated arithmetic instructions. Other
1129	// poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1130	// of the builder configuration, and since they're not passed explicitly,
1131	// will never be relevant here. Note that it would be generally unsound to
1132	// propagate these from an intrinsic call to the expansion anyways as we/
1133	// change the order of operations.
1134	auto BuildShuffledOp = [&Builder, &Op,
1135	&RdxKind](SmallVectorImpl<int> &ShuffleMask,
1136	Value &TmpVec) -> void* {
1137	Value *Shuf = Builder.CreateShuffleVector(V: TmpVec, Mask: ShuffleMask, Name: "rdx.shuf");
1138	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1139	TmpVec = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: TmpVec, RHS: Shuf,
1140	Name: "bin.rdx");
1141	} else {
1142	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1143	"Invalid min/max");
1144	TmpVec = createMinMaxOp(Builder, RK: RdxKind, Left: TmpVec, Right: Shuf);
1145	}
1146	};
1147
1148	Value *TmpVec = Src;
1149	if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1150	SmallVector<int, `32`> ShuffleMask(VF);
1151	for (unsigned stride = `1`; stride < VF; stride <<= `1`) {
1152	// Initialise the mask with undef.
1153	std::fill(ShuffleMask.begin(), ShuffleMask.end(), -`1`);
1154	for (unsigned j = `0`; j < VF; j += stride << `1`) {
1155	ShuffleMask [j] = j + stride;
1156	}
1157	BuildShuffledOp (ShuffleMask, TmpVec);
1158	}
1159	} else {
1160	SmallVector<int, `32`> ShuffleMask(VF);
1161	for (unsigned i = VF; i != `1`; i >>= `1`) {
1162	// Move the upper half of the vector to the lower half.
1163	for (unsigned j = `0`; j != i / `2`; ++j)
1164	ShuffleMask [j] = i / `2` + j;
1165
1166	// Fill the rest of the mask with undef.
1167	std::fill(&ShuffleMask [i / `2`], ShuffleMask.end(), -`1`);
1168	BuildShuffledOp (ShuffleMask, TmpVec);
1169	}
1170	}
1171	// The result is in the first element of the vector.
1172	return Builder.CreateExtractElement(Vec: TmpVec, Idx: Builder.getInt32(C: `0`));
1173	}
1174
1175	Value llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value Src,
1176	const RecurrenceDescriptor &Desc,
1177	PHINode *OrigPhi) {
1178	assert(
1179	RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
1180	"Unexpected reduction kind");
1181	Value *InitVal = Desc.getRecurrenceStartValue();
1182	Value NewVal = nullptr*;
1183
1184	// First use the original phi to determine the new value we're trying to
1185	// select from in the loop.
1186	SelectInst SI = nullptr*;
1187	for (auto *U : OrigPhi->users()) {
1188	if ((SI = dyn_cast<SelectInst>(Val: U)))
1189	break;
1190	}
1191	assert(SI && "One user of the original phi should be a select");
1192
1193	if (SI->getTrueValue() == OrigPhi)
1194	NewVal = SI->getFalseValue();
1195	else {
1196	assert(SI->getFalseValue() == OrigPhi &&
1197	"At least one input to the select should be the original Phi");
1198	NewVal = SI->getTrueValue();
1199	}
1200
1201	// If any predicate is true it means that we want to select the new value.
1202	Value *AnyOf =
1203	Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1204	// The compares in the loop may yield poison, which propagates through the
1205	// bitwise ORs. Freeze it here before the condition is used.
1206	AnyOf = Builder.CreateFreeze(V: AnyOf);
1207	return Builder.CreateSelect(C: AnyOf, True: NewVal, False: InitVal, Name: "rdx.select");
1208	}
1209
1210	Value llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value Src,
1211	RecurKind RdxKind) {
1212	auto *SrcVecEltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1213	switch (RdxKind) {
1214	case RecurKind::Add:
1215	return Builder.CreateAddReduce(Src);
1216	case RecurKind::Mul:
1217	return Builder.CreateMulReduce(Src);
1218	case RecurKind::And:
1219	return Builder.CreateAndReduce(Src);
1220	case RecurKind::Or:
1221	return Builder.CreateOrReduce(Src);
1222	case RecurKind::Xor:
1223	return Builder.CreateXorReduce(Src);
1224	case RecurKind::FMulAdd:
1225	case RecurKind::FAdd:
1226	return Builder.CreateFAddReduce(Acc: ConstantFP::getNegativeZero(Ty: SrcVecEltTy),
1227	Src);
1228	case RecurKind::FMul:
1229	return Builder.CreateFMulReduce(Acc: ConstantFP::get(Ty: SrcVecEltTy, V: `1.0`), Src);
1230	case RecurKind::SMax:
1231	return Builder.CreateIntMaxReduce(Src, IsSigned: true);
1232	case RecurKind::SMin:
1233	return Builder.CreateIntMinReduce(Src, IsSigned: true);
1234	case RecurKind::UMax:
1235	return Builder.CreateIntMaxReduce(Src, IsSigned: false);
1236	case RecurKind::UMin:
1237	return Builder.CreateIntMinReduce(Src, IsSigned: false);
1238	case RecurKind::FMax:
1239	return Builder.CreateFPMaxReduce(Src);
1240	case RecurKind::FMin:
1241	return Builder.CreateFPMinReduce(Src);
1242	case RecurKind::FMinimum:
1243	return Builder.CreateFPMinimumReduce(Src);
1244	case RecurKind::FMaximum:
1245	return Builder.CreateFPMaximumReduce(Src);
1246	default:
1247	llvm_unreachable("Unhandled opcode");
1248	}
1249	}
1250
1251	Value llvm::createSimpleTargetReduction(VectorBuilder &VBuilder, Value Src,
1252	const RecurrenceDescriptor &Desc) {
1253	RecurKind Kind = Desc.getRecurrenceKind();
1254	assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
1255	"AnyOf reduction is not supported.");
1256	Intrinsic::ID Id = getReductionIntrinsicID(RK: Kind);
1257	auto *SrcTy = cast<VectorType>(Val: Src->getType());
1258	Type *SrcEltTy = SrcTy->getElementType();
1259	Value *Iden =
1260	Desc.getRecurrenceIdentity(K: Kind, Tp: SrcEltTy, FMF: Desc.getFastMathFlags());
1261	Value *Ops[] = {Iden, Src};
1262	return VBuilder.createSimpleTargetReduction(RdxID: Id, ValTy: SrcTy, VecOpArray: Ops);
1263	}
1264
1265	Value *llvm::createTargetReduction(IRBuilderBase &B,
1266	const RecurrenceDescriptor &Desc, Value *Src,
1267	PHINode *OrigPhi) {
1268	// TODO: Support in-order reductions based on the recurrence descriptor.
1269	// All ops in the reduction inherit fast-math-flags from the recurrence
1270	// descriptor.
1271	IRBuilderBase::FastMathFlagGuard FMFGuard(B);
1272	B.setFastMathFlags(Desc.getFastMathFlags());
1273
1274	RecurKind RK = Desc.getRecurrenceKind();
1275	if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind: RK))
1276	return createAnyOfTargetReduction(Builder&: B, Src, Desc, OrigPhi);
1277
1278	return createSimpleTargetReduction(Builder&: B, Src, RdxKind: RK);
1279	}
1280
1281	Value *llvm::createOrderedReduction(IRBuilderBase &B,
1282	const RecurrenceDescriptor &Desc,
1283	Value Src, Value Start) {
1284	assert((Desc.getRecurrenceKind() == RecurKind::FAdd \|\|
1285	Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1286	"Unexpected reduction kind");
1287	assert(Src->getType()->isVectorTy() && "Expected a vector type");
1288	assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1289
1290	return B.CreateFAddReduce(Acc: Start, Src);
1291	}
1292
1293	Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
1294	const RecurrenceDescriptor &Desc,
1295	Value Src, Value Start) {
1296	assert((Desc.getRecurrenceKind() == RecurKind::FAdd \|\|
1297	Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1298	"Unexpected reduction kind");
1299	assert(Src->getType()->isVectorTy() && "Expected a vector type");
1300	assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1301
1302	Intrinsic::ID Id = getReductionIntrinsicID(RK: RecurKind::FAdd);
1303	auto *SrcTy = cast<VectorType>(Val: Src->getType());
1304	Value *Ops[] = {Start, Src};
1305	return VBuilder.createSimpleTargetReduction(RdxID: Id, ValTy: SrcTy, VecOpArray: Ops);
1306	}
1307
1308	void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue,
1309	bool IncludeWrapFlags) {
1310	auto *VecOp = dyn_cast<Instruction>(Val: I);
1311	if (!VecOp)
1312	return;
1313	auto Intersection = (OpValue == nullptr*) ? dyn_cast<Instruction>(Val: VL [`0`])
1314	: dyn_cast<Instruction>(Val: OpValue);
1315	if (!Intersection)
1316	return;
1317	const unsigned Opcode = Intersection->getOpcode();
1318	VecOp->copyIRFlags(V: Intersection, IncludeWrapFlags);
1319	for (auto *V : VL) {
1320	auto *Instr = dyn_cast<Instruction>(Val: V);
1321	if (!Instr)
1322	continue;
1323	if (OpValue == nullptr \|\| Opcode == Instr->getOpcode())
1324	VecOp->andIRFlags(V);
1325	}
1326	}
1327
1328	bool llvm::isKnownNegativeInLoop(const SCEV S, const* Loop *L,
1329	ScalarEvolution &SE) {
1330	const SCEV *Zero = SE.getZero(Ty: S->getType());
1331	return SE.isAvailableAtLoopEntry(S, L) &&
1332	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLT, LHS: S, RHS: Zero);
1333	}
1334
1335	bool llvm::isKnownNonNegativeInLoop(const SCEV S, const* Loop *L,
1336	ScalarEvolution &SE) {
1337	const SCEV *Zero = SE.getZero(Ty: S->getType());
1338	return SE.isAvailableAtLoopEntry(S, L) &&
1339	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGE, LHS: S, RHS: Zero);
1340	}
1341
1342	bool llvm::isKnownPositiveInLoop(const SCEV S, const* Loop *L,
1343	ScalarEvolution &SE) {
1344	const SCEV *Zero = SE.getZero(Ty: S->getType());
1345	return SE.isAvailableAtLoopEntry(S, L) &&
1346	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGT, LHS: S, RHS: Zero);
1347	}
1348
1349	bool llvm::isKnownNonPositiveInLoop(const SCEV S, const* Loop *L,
1350	ScalarEvolution &SE) {
1351	const SCEV *Zero = SE.getZero(Ty: S->getType());
1352	return SE.isAvailableAtLoopEntry(S, L) &&
1353	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLE, LHS: S, RHS: Zero);
1354	}
1355
1356	bool llvm::cannotBeMinInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1357	bool Signed) {
1358	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1359	APInt Min = Signed ? APInt::getSignedMinValue(numBits: BitWidth) :
1360	APInt::getMinValue(numBits: BitWidth);
1361	auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1362	return SE.isAvailableAtLoopEntry(S, L) &&
1363	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1364	RHS: SE.getConstant(Val: Min));
1365	}
1366
1367	bool llvm::cannotBeMaxInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1368	bool Signed) {
1369	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1370	APInt Max = Signed ? APInt::getSignedMaxValue(numBits: BitWidth) :
1371	APInt::getMaxValue(numBits: BitWidth);
1372	auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1373	return SE.isAvailableAtLoopEntry(S, L) &&
1374	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1375	RHS: SE.getConstant(Val: Max));
1376	}
1377
1378	//===----------------------------------------------------------------------===//
1379	// rewriteLoopExitValues - Optimize IV users outside the loop.
1380	// As a side effect, reduces the amount of IV processing within the loop.
1381	//===----------------------------------------------------------------------===//
1382
1383	static bool hasHardUserWithinLoop(const Loop L, const* Instruction *I) {
1384	SmallPtrSet<const Instruction *, `8`> Visited;
1385	SmallVector<const Instruction *, `8`> WorkList;
1386	Visited.insert(Ptr: I);
1387	WorkList.push_back(Elt: I);
1388	while (!WorkList.empty()) {
1389	const Instruction *Curr = WorkList.pop_back_val();
1390	// This use is outside the loop, nothing to do.
1391	if (!L->contains(Inst: Curr))
1392	continue;
1393	// Do we assume it is a "hard" use which will not be eliminated easily?
1394	if (Curr->mayHaveSideEffects())
1395	return true;
1396	// Otherwise, add all its users to worklist.
1397	for (const auto *U : Curr->users()) {
1398	auto *UI = cast<Instruction>(Val: U);
1399	if (Visited.insert(Ptr: UI).second)
1400	WorkList.push_back(Elt: UI);
1401	}
1402	}
1403	return false;
1404	}
1405
1406	// Collect information about PHI nodes which can be transformed in
1407	// rewriteLoopExitValues.
1408	struct RewritePhi {
1409	PHINode PN; // For which PHI node is this replacement?*
1410	unsigned Ith; // For which incoming value?
1411	const SCEV ExpansionSCEV; // The SCEV of the incoming value we are rewriting.*
1412	Instruction ExpansionPoint; // Where we'd like to expand that SCEV?*
1413	bool HighCost; // Is this expansion a high-cost?
1414
1415	RewritePhi(PHINode P, unsigned* I, const SCEV Val, Instruction ExpansionPt,
1416	bool H)
1417	: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1418	HighCost(H) {}
1419	};
1420
1421	// Check whether it is possible to delete the loop after rewriting exit
1422	// value. If it is possible, ignore ReplaceExitValue and do rewriting
1423	// aggressively.
1424	static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, `8`> &RewritePhiSet) {
1425	BasicBlock *Preheader = L->getLoopPreheader();
1426	// If there is no preheader, the loop will not be deleted.
1427	if (!Preheader)
1428	return false;
1429
1430	// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1431	// We obviate multiple ExitingBlocks case for simplicity.
1432	// TODO: If we see testcase with multiple ExitingBlocks can be deleted
1433	// after exit value rewriting, we can enhance the logic here.
1434	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1435	L->getExitingBlocks(ExitingBlocks);
1436	SmallVector<BasicBlock *, `8`> ExitBlocks;
1437	L->getUniqueExitBlocks(ExitBlocks);
1438	if (ExitBlocks.size() != `1` \|\| ExitingBlocks.size() != `1`)
1439	return false;
1440
1441	BasicBlock *ExitBlock = ExitBlocks [`0`];
1442	BasicBlock::iterator BI = ExitBlock->begin();
1443	while (PHINode *P = dyn_cast<PHINode>(Val&: BI)) {
1444	Value *Incoming = P->getIncomingValueForBlock(BB: ExitingBlocks [`0`]);
1445
1446	// If the Incoming value of P is found in RewritePhiSet, we know it
1447	// could be rewritten to use a loop invariant value in transformation
1448	// phase later. Skip it in the loop invariant check below.
1449	bool found = false;
1450	for (const RewritePhi &Phi : RewritePhiSet) {
1451	unsigned i = Phi.Ith;
1452	if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1453	found = true;
1454	break;
1455	}
1456	}
1457
1458	Instruction *I;
1459	if (!found && (I = dyn_cast<Instruction>(Val: Incoming)))
1460	if (!L->hasLoopInvariantOperands(I))
1461	return false;
1462
1463	++BI;
1464	}
1465
1466	for (auto *BB : L->blocks())
1467	if (llvm::any_of(Range&: *BB, P: [](Instruction &I) {
1468	return I.mayHaveSideEffects();
1469	}))
1470	return false;
1471
1472	return true;
1473	}
1474
1475	/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1476	/// and returns true if this Phi is an induction phi in the loop. When
1477	/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1478	static bool checkIsIndPhi(PHINode Phi, Loop L, ScalarEvolution *SE,
1479	InductionDescriptor &ID) {
1480	if (!Phi)
1481	return false;
1482	if (!L->getLoopPreheader())
1483	return false;
1484	if (Phi->getParent() != L->getHeader())
1485	return false;
1486	return InductionDescriptor::isInductionPHI(Phi, L, SE, D&: ID);
1487	}
1488
1489	int llvm::rewriteLoopExitValues(Loop L, LoopInfo LI, TargetLibraryInfo *TLI,
1490	ScalarEvolution *SE,
1491	const TargetTransformInfo *TTI,
1492	SCEVExpander &Rewriter, DominatorTree *DT,
1493	ReplaceExitVal ReplaceExitValue,
1494	SmallVector<WeakTrackingVH, `16`> &DeadInsts) {
1495	// Check a pre-condition.
1496	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
1497	"Indvars did not preserve LCSSA!");
1498
1499	SmallVector<BasicBlock*, `8`> ExitBlocks;
1500	L->getUniqueExitBlocks(ExitBlocks);
1501
1502	SmallVector<RewritePhi, `8`> RewritePhiSet;
1503	// Find all values that are computed inside the loop, but used outside of it.
1504	// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1505	// the exit blocks of the loop to find them.
1506	for (BasicBlock *ExitBB : ExitBlocks) {
1507	// If there are no PHI nodes in this exit block, then no values defined
1508	// inside the loop are used on this path, skip it.
1509	PHINode *PN = dyn_cast<PHINode>(Val: ExitBB->begin());
1510	if (!PN) continue;
1511
1512	unsigned NumPreds = PN->getNumIncomingValues();
1513
1514	// Iterate over all of the PHI nodes.
1515	BasicBlock::iterator BBI = ExitBB->begin();
1516	while ((PN = dyn_cast<PHINode>(Val: BBI ++))) {
1517	if (PN->use_empty())
1518	continue; // dead use, don't replace it
1519
1520	if (!SE->isSCEVable(Ty: PN->getType()))
1521	continue;
1522
1523	// Iterate over all of the values in all the PHI nodes.
1524	for (unsigned i = `0`; i != NumPreds; ++i) {
1525	// If the value being merged in is not integer or is not defined
1526	// in the loop, skip it.
1527	Value *InVal = PN->getIncomingValue(i);
1528	if (!isa<Instruction>(Val: InVal))
1529	continue;
1530
1531	// If this pred is for a subloop, not L itself, skip it.
1532	if (LI->getLoopFor(BB: PN->getIncomingBlock(i)) != L)
1533	continue; // The Block is in a subloop, skip it.
1534
1535	// Check that InVal is defined in the loop.
1536	Instruction *Inst = cast<Instruction>(Val: InVal);
1537	if (!L->contains(Inst))
1538	continue;
1539
1540	// Find exit values which are induction variables in the loop, and are
1541	// unused in the loop, with the only use being the exit block PhiNode,
1542	// and the induction variable update binary operator.
1543	// The exit value can be replaced with the final value when it is cheap
1544	// to do so.
1545	if (ReplaceExitValue == UnusedIndVarInLoop) {
1546	InductionDescriptor ID;
1547	PHINode *IndPhi = dyn_cast<PHINode>(Val: Inst);
1548	if (IndPhi) {
1549	if (!checkIsIndPhi(Phi: IndPhi, L, SE, ID))
1550	continue;
1551	// This is an induction PHI. Check that the only users are PHI
1552	// nodes, and induction variable update binary operators.
1553	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1554	if (!isa<PHINode>(Val: U) && !isa<BinaryOperator>(Val: U))
1555	return true;
1556	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: U);
1557	if (B && B != ID.getInductionBinOp())
1558	return true;
1559	return false;
1560	}))
1561	continue;
1562	} else {
1563	// If it is not an induction phi, it must be an induction update
1564	// binary operator with an induction phi user.
1565	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: Inst);
1566	if (!B)
1567	continue;
1568	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1569	PHINode *Phi = dyn_cast<PHINode>(Val: U);
1570	if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1571	return true;
1572	return false;
1573	}))
1574	continue;
1575	if (B != ID.getInductionBinOp())
1576	continue;
1577	}
1578	}
1579
1580	// Okay, this instruction has a user outside of the current loop
1581	// and varies predictably inside* the loop. Evaluate the value it*
1582	// contains when the loop exits, if possible. We prefer to start with
1583	// expressions which are true for all exits (so as to maximize
1584	// expression reuse by the SCEVExpander), but resort to per-exit
1585	// evaluation if that fails.
1586	const SCEV *ExitValue = SE->getSCEVAtScope(V: Inst, L: L->getParentLoop());
1587	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1588	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1589	!Rewriter.isSafeToExpand(S: ExitValue)) {
1590	// TODO: This should probably be sunk into SCEV in some way; maybe a
1591	// getSCEVForExit(SCEV, L, ExitingBB)? It can be generalized for*
1592	// most SCEV expressions and other recurrence types (e.g. shift
1593	// recurrences). Is there existing code we can reuse?
1594	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: PN->getIncomingBlock(i));
1595	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1596	continue;
1597	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Inst)))
1598	if (AddRec->getLoop() == L)
1599	ExitValue = AddRec->evaluateAtIteration(It: ExitCount, SE&: *SE);
1600	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1601	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1602	!Rewriter.isSafeToExpand(S: ExitValue))
1603	continue;
1604	}
1605
1606	// Computing the value outside of the loop brings no benefit if it is
1607	// definitely used inside the loop in a way which can not be optimized
1608	// away. Avoid doing so unless we know we have a value which computes
1609	// the ExitValue already. TODO: This should be merged into SCEV
1610	// expander to leverage its knowledge of existing expressions.
1611	if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(Val: ExitValue) &&
1612	!isa<SCEVUnknown>(Val: ExitValue) && hasHardUserWithinLoop(L, I: Inst))
1613	continue;
1614
1615	// Check if expansions of this SCEV would count as being high cost.
1616	bool HighCost = Rewriter.isHighCostExpansion(
1617	Exprs: ExitValue, L, Budget: SCEVCheapExpansionBudget, TTI, At: Inst);
1618
1619	// Note that we must not perform expansions until after
1620	// we query all* the costs, because if we perform temporary expansion*
1621	// inbetween, one that we might not intend to keep, said expansion
1622	// may* affect cost calculation of the next SCEV's we'll query,*
1623	// and next SCEV may errneously get smaller cost.
1624
1625	// Collect all the candidate PHINodes to be rewritten.
1626	Instruction *InsertPt =
1627	(isa<PHINode>(Val: Inst) \|\| isa<LandingPadInst>(Val: Inst)) ?
1628	&*Inst->getParent()->getFirstInsertionPt() : Inst;
1629	RewritePhiSet.emplace_back(Args&: PN, Args&: i, Args&: ExitValue, Args&: InsertPt, Args&: HighCost);
1630	}
1631	}
1632	}
1633
1634	// TODO: evaluate whether it is beneficial to change how we calculate
1635	// high-cost: if we have SCEV 'A' which we know we will expand, should we
1636	// calculate the cost of other SCEV's after expanding SCEV 'A', thus
1637	// potentially giving cost bonus to those other SCEV's?
1638
1639	bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1640	int NumReplaced = `0`;
1641
1642	// Transformation.
1643	for (const RewritePhi &Phi : RewritePhiSet) {
1644	PHINode *PN = Phi.PN;
1645
1646	// Only do the rewrite when the ExitValue can be expanded cheaply.
1647	// If LoopCanBeDel is true, rewrite exit value aggressively.
1648	if ((ReplaceExitValue == OnlyCheapRepl \|\|
1649	ReplaceExitValue == UnusedIndVarInLoop) &&
1650	!LoopCanBeDel && Phi.HighCost)
1651	continue;
1652
1653	Value *ExitVal = Rewriter.expandCodeFor(
1654	SH: Phi.ExpansionSCEV, Ty: Phi.PN->getType(), I: Phi.ExpansionPoint);
1655
1656	LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1657	<< `'\n'`
1658	<< " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1659
1660	#ifndef NDEBUG
1661	// If we reuse an instruction from a loop which is neither L nor one of
1662	// its containing loops, we end up breaking LCSSA form for this loop by
1663	// creating a new use of its instruction.
1664	if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1665	if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1666	if (EVL != L)
1667	assert(EVL->contains(L) && "LCSSA breach detected!");
1668	#endif
1669
1670	NumReplaced++;
1671	Instruction *Inst = cast<Instruction>(Val: PN->getIncomingValue(i: Phi.Ith));
1672	PN->setIncomingValue(i: Phi.Ith, V: ExitVal);
1673	// It's necessary to tell ScalarEvolution about this explicitly so that
1674	// it can walk the def-use list and forget all SCEVs, as it may not be
1675	// watching the PHI itself. Once the new exit value is in place, there
1676	// may not be a def-use connection between the loop and every instruction
1677	// which got a SCEVAddRecExpr for that loop.
1678	SE->forgetValue(V: PN);
1679
1680	// If this instruction is dead now, delete it. Don't do it now to avoid
1681	// invalidating iterators.
1682	if (isInstructionTriviallyDead(I: Inst, TLI))
1683	DeadInsts.push_back(Elt: Inst);
1684
1685	// Replace PN with ExitVal if that is legal and does not break LCSSA.
1686	if (PN->getNumIncomingValues() == `1` &&
1687	LI->replacementPreservesLCSSAForm(From: PN, To: ExitVal)) {
1688	PN->replaceAllUsesWith(V: ExitVal);
1689	PN->eraseFromParent();
1690	}
1691	}
1692
1693	// The insertion point instruction may have been deleted; clear it out
1694	// so that the rewriter doesn't trip over it later.
1695	Rewriter.clearInsertPoint();
1696	return NumReplaced;
1697	}
1698
1699	/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1700	/// \p OrigLoop.
1701	void llvm::setProfileInfoAfterUnrolling(Loop OrigLoop, Loop UnrolledLoop,
1702	Loop *RemainderLoop, uint64_t UF) {
1703	assert(UF > `0` && "Zero unrolled factor is not supported");
1704	assert(UnrolledLoop != RemainderLoop &&
1705	"Unrolled and Remainder loops are expected to distinct");
1706
1707	// Get number of iterations in the original scalar loop.
1708	unsigned OrigLoopInvocationWeight = `0`;
1709	std::optional<unsigned> OrigAverageTripCount =
1710	getLoopEstimatedTripCount(L: OrigLoop, EstimatedLoopInvocationWeight: &OrigLoopInvocationWeight);
1711	if (!OrigAverageTripCount)
1712	return;
1713
1714	// Calculate number of iterations in unrolled loop.
1715	unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1716	// Calculate number of iterations for remainder loop.
1717	unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1718
1719	setLoopEstimatedTripCount(L: UnrolledLoop, EstimatedTripCount: UnrolledAverageTripCount,
1720	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1721	setLoopEstimatedTripCount(L: RemainderLoop, EstimatedTripCount: RemainderAverageTripCount,
1722	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1723	}
1724
1725	/// Utility that implements appending of loops onto a worklist.
1726	/// Loops are added in preorder (analogous for reverse postorder for trees),
1727	/// and the worklist is processed LIFO.
1728	template <typename RangeT>
1729	void llvm::appendReversedLoopsToWorklist(
1730	RangeT &&Loops, SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1731	// We use an internal worklist to build up the preorder traversal without
1732	// recursion.
1733	SmallVector<Loop *, `4`> PreOrderLoops, PreOrderWorklist;
1734
1735	// We walk the initial sequence of loops in reverse because we generally want
1736	// to visit defs before uses and the worklist is LIFO.
1737	for (Loop *RootL : Loops) {
1738	assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1739	assert(PreOrderWorklist.empty() &&
1740	"Must start with an empty preorder walk worklist.");
1741	PreOrderWorklist.push_back(Elt: RootL);
1742	do {
1743	Loop *L = PreOrderWorklist.pop_back_val();
1744	PreOrderWorklist.append(in_start: L->begin(), in_end: L->end());
1745	PreOrderLoops.push_back(Elt: L);
1746	} while (!PreOrderWorklist.empty());
1747
1748	Worklist.insert(Input: std::move(PreOrderLoops));
1749	PreOrderLoops.clear();
1750	}
1751	}
1752
1753	template <typename RangeT>
1754	void llvm::appendLoopsToWorklist(RangeT &&Loops,
1755	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1756	appendReversedLoopsToWorklist(reverse(Loops), Worklist);
1757	}
1758
1759	template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1760	ArrayRef<Loop > &Loops, SmallPriorityWorklist<Loop , `4`> &Worklist);
1761
1762	template void
1763	llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1764	SmallPriorityWorklist<Loop *, `4`> &Worklist);
1765
1766	void llvm::appendLoopsToWorklist(LoopInfo &LI,
1767	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1768	appendReversedLoopsToWorklist(Loops&: LI, Worklist);
1769	}
1770
1771	Loop llvm::cloneLoop(Loop L, Loop *PL, ValueToValueMapTy &VM,
1772	LoopInfo LI, LPPassManager LPM) {
1773	Loop &New = *LI->AllocateLoop();
1774	if (PL)
1775	PL->addChildLoop(NewChild: &New);
1776	else
1777	LI->addTopLevelLoop(New: &New);
1778
1779	if (LPM)
1780	LPM->addLoop(L&: New);
1781
1782	// Add all of the blocks in L to the new loop.
1783	for (BasicBlock *BB : L->blocks())
1784	if (LI->getLoopFor(BB) == L)
1785	New.addBasicBlockToLoop(NewBB: cast<BasicBlock>(Val&: VM [BB]), LI&: *LI);
1786
1787	// Add all of the subloops to the new loop.
1788	for (Loop I : L)
1789	cloneLoop(L: I, PL: &New, VM, LI, LPM);
1790
1791	return &New;
1792	}
1793
1794	/// IR Values for the lower and upper bounds of a pointer evolution. We
1795	/// need to use value-handles because SCEV expansion can invalidate previously
1796	/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1797	/// a previous one.
1798	struct PointerBounds {
1799	TrackingVH<Value> Start;
1800	TrackingVH<Value> End;
1801	Value *StrideToCheck;
1802	};
1803
1804	/// Expand code for the lower and upper bound of the pointer group \p CG
1805	/// in \p TheLoop. \return the values for the bounds.
1806	static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
1807	Loop TheLoop, Instruction Loc,
1808	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1809	LLVMContext &Ctx = Loc->getContext();
1810	Type *PtrArithTy = PointerType::get(C&: Ctx, AddressSpace: CG->AddressSpace);
1811
1812	Value Start = nullptr, End = nullptr;
1813	LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1814	const SCEV Low = CG->Low, High = CG->High, Stride = nullptr*;
1815
1816	// If the Low and High values are themselves loop-variant, then we may want
1817	// to expand the range to include those covered by the outer loop as well.
1818	// There is a trade-off here with the advantage being that creating checks
1819	// using the expanded range permits the runtime memory checks to be hoisted
1820	// out of the outer loop. This reduces the cost of entering the inner loop,
1821	// which can be significant for low trip counts. The disadvantage is that
1822	// there is a chance we may now never enter the vectorized inner loop,
1823	// whereas using a restricted range check could have allowed us to enter at
1824	// least once. This is why the behaviour is not currently the default and is
1825	// controlled by the parameter 'HoistRuntimeChecks'.
1826	if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1827	isa<SCEVAddRecExpr>(Val: High) && isa<SCEVAddRecExpr>(Val: Low)) {
1828	auto *HighAR = cast<SCEVAddRecExpr>(Val: High);
1829	auto *LowAR = cast<SCEVAddRecExpr>(Val: Low);
1830	const Loop *OuterLoop = TheLoop->getParentLoop();
1831	ScalarEvolution &SE = *Exp.getSE();
1832	const SCEV *Recur = LowAR->getStepRecurrence(SE);
1833	if (Recur == HighAR->getStepRecurrence(SE) &&
1834	HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1835	BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1836	const SCEV *OuterExitCount = SE.getExitCount(L: OuterLoop, ExitingBlock: OuterLoopLatch);
1837	if (!isa<SCEVCouldNotCompute>(Val: OuterExitCount) &&
1838	OuterExitCount->getType()->isIntegerTy()) {
1839	const SCEV *NewHigh =
1840	cast<SCEVAddRecExpr>(Val: High)->evaluateAtIteration(It: OuterExitCount, SE);
1841	if (!isa<SCEVCouldNotCompute>(Val: NewHigh)) {
1842	LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1843	"outer loop in order to permit hoisting\n");
1844	High = NewHigh;
1845	Low = cast<SCEVAddRecExpr>(Val: Low)->getStart();
1846	// If there is a possibility that the stride is negative then we have
1847	// to generate extra checks to ensure the stride is positive.
1848	if (!SE.isKnownNonNegative(
1849	S: SE.applyLoopGuards(Expr: Recur, L: HighAR->getLoop()))) {
1850	Stride = Recur;
1851	LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1852	"positive: "
1853	<< *Stride << `'\n'`);
1854	}
1855	}
1856	}
1857	}
1858	}
1859
1860	Start = Exp.expandCodeFor(SH: Low, Ty: PtrArithTy, I: Loc);
1861	End = Exp.expandCodeFor(SH: High, Ty: PtrArithTy, I: Loc);
1862	if (CG->NeedsFreeze) {
1863	IRBuilder<> Builder(Loc);
1864	Start = Builder.CreateFreeze(V: Start, Name: Start->getName() + ".fr");
1865	End = Builder.CreateFreeze(V: End, Name: End->getName() + ".fr");
1866	}
1867	Value *StrideVal =
1868	Stride ? Exp.expandCodeFor(SH: Stride, Ty: Stride->getType(), I: Loc) : nullptr;
1869	LLVM_DEBUG(dbgs() << "Start: " << Low << " End: " << High << "\n");
1870	return {.Start: Start, .End: End, .StrideToCheck: StrideVal};
1871	}
1872
1873	/// Turns a collection of checks into a collection of expanded upper and
1874	/// lower bounds for both pointers in the check.
1875	static SmallVector<std::pair<PointerBounds, PointerBounds>, `4`>
1876	expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
1877	Instruction Loc, SCEVExpander &Exp, bool* HoistRuntimeChecks) {
1878	SmallVector<std::pair<PointerBounds, PointerBounds>, `4`> ChecksWithBounds;
1879
1880	// Here we're relying on the SCEV Expander's cache to only emit code for the
1881	// same bounds once.
1882	transform(Range: PointerChecks, d_first: std::back_inserter(x&: ChecksWithBounds),
1883	F: [&](const RuntimePointerCheck &Check) {
1884	PointerBounds First = expandBounds(CG: Check.first, TheLoop: L, Loc, Exp,
1885	HoistRuntimeChecks),
1886	Second = expandBounds(CG: Check.second, TheLoop: L, Loc, Exp,
1887	HoistRuntimeChecks);
1888	return std::make_pair(x&: First, y&: Second);
1889	});
1890
1891	return ChecksWithBounds;
1892	}
1893
1894	Value *llvm::addRuntimeChecks(
1895	Instruction Loc, Loop TheLoop,
1896	const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1897	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1898	// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1899	// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1900	auto ExpandedChecks =
1901	expandBounds(PointerChecks, L: TheLoop, Loc, Exp, HoistRuntimeChecks);
1902
1903	LLVMContext &Ctx = Loc->getContext();
1904	IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1905	Loc->getDataLayout());
1906	ChkBuilder.SetInsertPoint(Loc);
1907	// Our instructions might fold to a constant.
1908	Value MemoryRuntimeCheck = nullptr*;
1909
1910	for (const auto &[A, B] : ExpandedChecks) {
1911	// Check if two pointers (A and B) conflict where conflict is computed as:
1912	// start(A) <= end(B) && start(B) <= end(A)
1913
1914	assert((A.Start->getType()->getPointerAddressSpace() ==
1915	B.End->getType()->getPointerAddressSpace()) &&
1916	(B.Start->getType()->getPointerAddressSpace() ==
1917	A.End->getType()->getPointerAddressSpace()) &&
1918	"Trying to bounds check pointers with different address spaces");
1919
1920	// [A\|B].Start points to the first accessed byte under base [A\|B].
1921	// [A\|B].End points to the last accessed byte, plus one.
1922	// There is no conflict when the intervals are disjoint:
1923	// NoConflict = (B.Start >= A.End) \|\| (A.Start >= B.End)
1924	//
1925	// bound0 = (B.Start < A.End)
1926	// bound1 = (A.Start < B.End)
1927	// IsConflict = bound0 & bound1
1928	Value *Cmp0 = ChkBuilder.CreateICmpULT(LHS: A.Start, RHS: B.End, Name: "bound0");
1929	Value *Cmp1 = ChkBuilder.CreateICmpULT(LHS: B.Start, RHS: A.End, Name: "bound1");
1930	Value *IsConflict = ChkBuilder.CreateAnd(LHS: Cmp0, RHS: Cmp1, Name: "found.conflict");
1931	if (A.StrideToCheck) {
1932	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1933	LHS: A.StrideToCheck, RHS: ConstantInt::get(Ty: A.StrideToCheck->getType(), V: `0`),
1934	Name: "stride.check");
1935	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1936	}
1937	if (B.StrideToCheck) {
1938	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1939	LHS: B.StrideToCheck, RHS: ConstantInt::get(Ty: B.StrideToCheck->getType(), V: `0`),
1940	Name: "stride.check");
1941	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1942	}
1943	if (MemoryRuntimeCheck) {
1944	IsConflict =
1945	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1946	}
1947	MemoryRuntimeCheck = IsConflict;
1948	}
1949
1950	return MemoryRuntimeCheck;
1951	}
1952
1953	Value *llvm::addDiffRuntimeChecks(
1954	Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1955	function_ref<Value (IRBuilderBase &, unsigned)> GetVF, unsigned* IC) {
1956
1957	LLVMContext &Ctx = Loc->getContext();
1958	IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1959	Loc->getDataLayout());
1960	ChkBuilder.SetInsertPoint(Loc);
1961	// Our instructions might fold to a constant.
1962	Value MemoryRuntimeCheck = nullptr*;
1963
1964	auto &SE = *Expander.getSE();
1965	// Map to keep track of created compares, The key is the pair of operands for
1966	// the compare, to allow detecting and re-using redundant compares.
1967	DenseMap<std::pair<Value , Value >, Value *> SeenCompares;
1968	for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
1969	Type *Ty = SinkStart->getType();
1970	// Compute VF IC * AccessSize.*
1971	auto *VFTimesUFTimesSize =
1972	ChkBuilder.CreateMul(LHS: GetVF (ChkBuilder, Ty->getScalarSizeInBits()),
1973	RHS: ConstantInt::get(Ty, V: IC * AccessSize));
1974	Value *Diff =
1975	Expander.expandCodeFor(SH: SE.getMinusSCEV(LHS: SinkStart, RHS: SrcStart), Ty, I: Loc);
1976
1977	// Check if the same compare has already been created earlier. In that case,
1978	// there is no need to check it again.
1979	Value *IsConflict = SeenCompares.lookup(Val: {Diff, VFTimesUFTimesSize});
1980	if (IsConflict)
1981	continue;
1982
1983	IsConflict =
1984	ChkBuilder.CreateICmpULT(LHS: Diff, RHS: VFTimesUFTimesSize, Name: "diff.check");
1985	SeenCompares.insert(KV: {{Diff, VFTimesUFTimesSize}, IsConflict});
1986	if (NeedsFreeze)
1987	IsConflict =
1988	ChkBuilder.CreateFreeze(V: IsConflict, Name: IsConflict->getName() + ".fr");
1989	if (MemoryRuntimeCheck) {
1990	IsConflict =
1991	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1992	}
1993	MemoryRuntimeCheck = IsConflict;
1994	}
1995
1996	return MemoryRuntimeCheck;
1997	}
1998
1999	std::optional<IVConditionInfo>
2000	llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
2001	const MemorySSA &MSSA, AAResults &AA) {
2002	auto *TI = dyn_cast<BranchInst>(Val: L.getHeader()->getTerminator());
2003	if (!TI \|\| !TI->isConditional())
2004	return {};
2005
2006	auto *CondI = dyn_cast<Instruction>(Val: TI->getCondition());
2007	// The case with the condition outside the loop should already be handled
2008	// earlier.
2009	// Allow CmpInst and TruncInsts as they may be users of load instructions
2010	// and have potential for partial unswitching
2011	if (!CondI \|\| !isa<CmpInst, TruncInst>(Val: CondI) \|\| !L.contains(Inst: CondI))
2012	return {};
2013
2014	SmallVector<Instruction *> InstToDuplicate;
2015	InstToDuplicate.push_back(Elt: CondI);
2016
2017	SmallVector<Value *, `4`> WorkList;
2018	WorkList.append(in_start: CondI->op_begin(), in_end: CondI->op_end());
2019
2020	SmallVector<MemoryAccess *, `4`> AccessesToCheck;
2021	SmallVector<MemoryLocation, `4`> AccessedLocs;
2022	while (!WorkList.empty()) {
2023	Instruction *I = dyn_cast<Instruction>(Val: WorkList.pop_back_val());
2024	if (!I \|\| !L.contains(Inst: I))
2025	continue;
2026
2027	// TODO: support additional instructions.
2028	if (!isa<LoadInst>(Val: I) && !isa<GetElementPtrInst>(Val: I))
2029	return {};
2030
2031	// Do not duplicate volatile and atomic loads.
2032	if (auto *LI = dyn_cast<LoadInst>(Val: I))
2033	if (LI->isVolatile() \|\| LI->isAtomic())
2034	return {};
2035
2036	InstToDuplicate.push_back(Elt: I);
2037	if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
2038	if (auto *MemUse = dyn_cast_or_null<MemoryUse>(Val: MA)) {
2039	// Queue the defining access to check for alias checks.
2040	AccessesToCheck.push_back(Elt: MemUse->getDefiningAccess());
2041	AccessedLocs.push_back(Elt: MemoryLocation::get(Inst: I));
2042	} else {
2043	// MemoryDefs may clobber the location or may be atomic memory
2044	// operations. Bail out.
2045	return {};
2046	}
2047	}
2048	WorkList.append(in_start: I->op_begin(), in_end: I->op_end());
2049	}
2050
2051	if (InstToDuplicate.empty())
2052	return {};
2053
2054	SmallVector<BasicBlock *, `4`> ExitingBlocks;
2055	L.getExitingBlocks(ExitingBlocks);
2056	auto HasNoClobbersOnPath =
2057	[&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2058	MSSAThreshold](BasicBlock Succ, BasicBlock Header,
2059	SmallVector<MemoryAccess *, `4`> AccessesToCheck)
2060	-> std::optional<IVConditionInfo> {
2061	IVConditionInfo Info;
2062	// First, collect all blocks in the loop that are on a patch from Succ
2063	// to the header.
2064	SmallVector<BasicBlock *, `4`> WorkList;
2065	WorkList.push_back(Elt: Succ);
2066	WorkList.push_back(Elt: Header);
2067	SmallPtrSet<BasicBlock *, `4`> Seen;
2068	Seen.insert(Ptr: Header);
2069	Info.PathIsNoop &=
2070	all_of(Range&: Header, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
2071
2072	while (!WorkList.empty()) {
2073	BasicBlock *Current = WorkList.pop_back_val();
2074	if (!L.contains(BB: Current))
2075	continue;
2076	const auto &SeenIns = Seen.insert(Ptr: Current);
2077	if (!SeenIns.second)
2078	continue;
2079
2080	Info.PathIsNoop &= all_of(
2081	Range&: Current, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
2082	WorkList.append(in_start: succ_begin(BB: Current), in_end: succ_end(BB: Current));
2083	}
2084
2085	// Require at least 2 blocks on a path through the loop. This skips
2086	// paths that directly exit the loop.
2087	if (Seen.size() < `2`)
2088	return {};
2089
2090	// Next, check if there are any MemoryDefs that are on the path through
2091	// the loop (in the Seen set) and they may-alias any of the locations in
2092	// AccessedLocs. If that is the case, they may modify the condition and
2093	// partial unswitching is not possible.
2094	SmallPtrSet<MemoryAccess *, `4`> SeenAccesses;
2095	while (!AccessesToCheck.empty()) {
2096	MemoryAccess *Current = AccessesToCheck.pop_back_val();
2097	auto SeenI = SeenAccesses.insert(Ptr: Current);
2098	if (!SeenI.second \|\| !Seen.contains(Ptr: Current->getBlock()))
2099	continue;
2100
2101	// Bail out if exceeded the threshold.
2102	if (SeenAccesses.size() >= MSSAThreshold)
2103	return {};
2104
2105	// MemoryUse are read-only accesses.
2106	if (isa<MemoryUse>(Val: Current))
2107	continue;
2108
2109	// For a MemoryDef, check if is aliases any of the location feeding
2110	// the original condition.
2111	if (auto *CurrentDef = dyn_cast<MemoryDef>(Val: Current)) {
2112	if (any_of(Range&: AccessedLocs, P: [&AA, CurrentDef](MemoryLocation &Loc) {
2113	return isModSet(
2114	MRI: AA.getModRefInfo(I: CurrentDef->getMemoryInst(), OptLoc: Loc));
2115	}))
2116	return {};
2117	}
2118
2119	for (Use &U : Current->uses())
2120	AccessesToCheck.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
2121	}
2122
2123	// We could also allow loops with known trip counts without mustprogress,
2124	// but ScalarEvolution may not be available.
2125	Info.PathIsNoop &= isMustProgress(L: &L);
2126
2127	// If the path is considered a no-op so far, check if it reaches a
2128	// single exit block without any phis. This ensures no values from the
2129	// loop are used outside of the loop.
2130	if (Info.PathIsNoop) {
2131	for (auto *Exiting : ExitingBlocks) {
2132	if (!Seen.contains(Ptr: Exiting))
2133	continue;
2134	for (auto *Succ : successors(BB: Exiting)) {
2135	if (L.contains(BB: Succ))
2136	continue;
2137
2138	Info.PathIsNoop &= Succ->phis().empty() &&
2139	(!Info.ExitForPath \|\| Info.ExitForPath == Succ);
2140	if (!Info.PathIsNoop)
2141	break;
2142	assert((!Info.ExitForPath \|\| Info.ExitForPath == Succ) &&
2143	"cannot have multiple exit blocks");
2144	Info.ExitForPath = Succ;
2145	}
2146	}
2147	}
2148	if (!Info.ExitForPath)
2149	Info.PathIsNoop = false;
2150
2151	Info.InstToDuplicate = InstToDuplicate;
2152	return Info;
2153	};
2154
2155	// If we branch to the same successor, partial unswitching will not be
2156	// beneficial.
2157	if (TI->getSuccessor(i: `0`) == TI->getSuccessor(i: `1`))
2158	return {};
2159
2160	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `0`), L.getHeader(),
2161	AccessesToCheck)) {
2162	Info ->KnownValue = ConstantInt::getTrue(Context&: TI->getContext());
2163	return Info;
2164	}
2165	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `1`), L.getHeader(),
2166	AccessesToCheck)) {
2167	Info ->KnownValue = ConstantInt::getFalse(Context&: TI->getContext());
2168	return Info;
2169	}
2170
2171	return {};
2172	}
2173

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/LoopUtils.cpp