LoopUtils.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/LoopUtils.cpp]

1	//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines common loop utility functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Transforms/Utils/LoopUtils.h"
14	#include "llvm/ADT/DenseSet.h"
15	#include "llvm/ADT/PriorityWorklist.h"
16	#include "llvm/ADT/ScopeExit.h"
17	#include "llvm/ADT/SetVector.h"
18	#include "llvm/ADT/SmallPtrSet.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/Analysis/AliasAnalysis.h"
21	#include "llvm/Analysis/BasicAliasAnalysis.h"
22	#include "llvm/Analysis/DomTreeUpdater.h"
23	#include "llvm/Analysis/GlobalsModRef.h"
24	#include "llvm/Analysis/InstSimplifyFolder.h"
25	#include "llvm/Analysis/LoopAccessAnalysis.h"
26	#include "llvm/Analysis/LoopInfo.h"
27	#include "llvm/Analysis/LoopPass.h"
28	#include "llvm/Analysis/MemorySSA.h"
29	#include "llvm/Analysis/MemorySSAUpdater.h"
30	#include "llvm/Analysis/ScalarEvolution.h"
31	#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
32	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
33	#include "llvm/IR/DIBuilder.h"
34	#include "llvm/IR/Dominators.h"
35	#include "llvm/IR/Instructions.h"
36	#include "llvm/IR/IntrinsicInst.h"
37	#include "llvm/IR/MDBuilder.h"
38	#include "llvm/IR/Module.h"
39	#include "llvm/IR/PatternMatch.h"
40	#include "llvm/IR/ProfDataUtils.h"
41	#include "llvm/IR/ValueHandle.h"
42	#include "llvm/InitializePasses.h"
43	#include "llvm/Pass.h"
44	#include "llvm/Support/Compiler.h"
45	#include "llvm/Support/Debug.h"
46	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
47	#include "llvm/Transforms/Utils/Local.h"
48	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
49
50	using namespace llvm;
51	using namespace llvm::PatternMatch;
52
53	#define DEBUG_TYPE "loop-utils"
54
55	static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
56	static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
57
58	bool llvm::formDedicatedExitBlocks(Loop L, DominatorTree DT, LoopInfo *LI,
59	MemorySSAUpdater *MSSAU,
60	bool PreserveLCSSA) {
61	bool Changed = false;
62
63	// We re-use a vector for the in-loop predecesosrs.
64	SmallVector<BasicBlock *, `4`> InLoopPredecessors;
65
66	auto RewriteExit = [&](BasicBlock *BB) {
67	assert(InLoopPredecessors.empty() &&
68	"Must start with an empty predecessors list!");
69	auto Cleanup = make_scope_exit(F: [&] { InLoopPredecessors.clear(); });
70
71	// See if there are any non-loop predecessors of this exit block and
72	// keep track of the in-loop predecessors.
73	bool IsDedicatedExit = true;
74	for (auto *PredBB : predecessors(BB))
75	if (L->contains(BB: PredBB)) {
76	if (isa<IndirectBrInst>(Val: PredBB->getTerminator()))
77	// We cannot rewrite exiting edges from an indirectbr.
78	return false;
79
80	InLoopPredecessors.push_back(Elt: PredBB);
81	} else {
82	IsDedicatedExit = false;
83	}
84
85	assert(!InLoopPredecessors.empty() && "Must have some loop predecessor!");
86
87	// Nothing to do if this is already a dedicated exit.
88	if (IsDedicatedExit)
89	return false;
90
91	auto *NewExitBB = SplitBlockPredecessors(
92	BB, Preds: InLoopPredecessors, Suffix: ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
93
94	if (!NewExitBB)
95	LLVM_DEBUG(
96	dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
97	<< *L << "\n");
98	else
99	LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
100	<< NewExitBB->getName() << "\n");
101	return true;
102	};
103
104	// Walk the exit blocks directly rather than building up a data structure for
105	// them, but only visit each one once.
106	SmallPtrSet<BasicBlock *, `4`> Visited;
107	for (auto *BB : L->blocks())
108	for (auto *SuccBB : successors(BB)) {
109	// We're looking for exit blocks so skip in-loop successors.
110	if (L->contains(BB: SuccBB))
111	continue;
112
113	// Visit each exit block exactly once.
114	if (!Visited.insert(Ptr: SuccBB).second)
115	continue;
116
117	Changed \|= RewriteExit (SuccBB);
118	}
119
120	return Changed;
121	}
122
123	/// Returns the instructions that use values defined in the loop.
124	SmallVector<Instruction , `8`> llvm::findDefsUsedOutsideOfLoop(Loop L) {
125	SmallVector<Instruction *, `8`> UsedOutside;
126
127	for (auto *Block : L->getBlocks())
128	// FIXME: I believe that this could use copy_if if the Inst reference could
129	// be adapted into a pointer.
130	for (auto &Inst : *Block) {
131	auto Users = Inst.users();
132	if (any_of(Range&: Users, P: [&](User *U) {
133	auto *Use = cast<Instruction>(Val: U);
134	return !L->contains(BB: Use->getParent());
135	}))
136	UsedOutside.push_back(Elt: &Inst);
137	}
138
139	return UsedOutside;
140	}
141
142	void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
143	// By definition, all loop passes need the LoopInfo analysis and the
144	// Dominator tree it depends on. Because they all participate in the loop
145	// pass manager, they must also preserve these.
146	AU.addRequired<DominatorTreeWrapperPass>();
147	AU.addPreserved<DominatorTreeWrapperPass>();
148	AU.addRequired<LoopInfoWrapperPass>();
149	AU.addPreserved<LoopInfoWrapperPass>();
150
151	// We must also preserve LoopSimplify and LCSSA. We locally access their IDs
152	// here because users shouldn't directly get them from this header.
153	extern char &LoopSimplifyID;
154	extern char &LCSSAID;
155	AU.addRequiredID(ID&: LoopSimplifyID);
156	AU.addPreservedID(ID&: LoopSimplifyID);
157	AU.addRequiredID(ID&: LCSSAID);
158	AU.addPreservedID(ID&: LCSSAID);
159	// This is used in the LPPassManager to perform LCSSA verification on passes
160	// which preserve lcssa form
161	AU.addRequired<LCSSAVerificationPass>();
162	AU.addPreserved<LCSSAVerificationPass>();
163
164	// Loop passes are designed to run inside of a loop pass manager which means
165	// that any function analyses they require must be required by the first loop
166	// pass in the manager (so that it is computed before the loop pass manager
167	// runs) and preserved by all loop pasess in the manager. To make this
168	// reasonably robust, the set needed for most loop passes is maintained here.
169	// If your loop pass requires an analysis not listed here, you will need to
170	// carefully audit the loop pass manager nesting structure that results.
171	AU.addRequired<AAResultsWrapperPass>();
172	AU.addPreserved<AAResultsWrapperPass>();
173	AU.addPreserved<BasicAAWrapperPass>();
174	AU.addPreserved<GlobalsAAWrapperPass>();
175	AU.addPreserved<SCEVAAWrapperPass>();
176	AU.addRequired<ScalarEvolutionWrapperPass>();
177	AU.addPreserved<ScalarEvolutionWrapperPass>();
178	// FIXME: When all loop passes preserve MemorySSA, it can be required and
179	// preserved here instead of the individual handling in each pass.
180	}
181
182	/// Manually defined generic "LoopPass" dependency initialization. This is used
183	/// to initialize the exact set of passes from above in \c
184	/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
185	/// with:
186	///
187	/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
188	///
189	/// As-if "LoopPass" were a pass.
190	void llvm::initializeLoopPassPass(PassRegistry &Registry) {
191	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
192	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
193	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
194	INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
195	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
196	INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
197	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
198	INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
199	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
200	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
201	}
202
203	/// Create MDNode for input string.
204	static MDNode createStringMetadata(Loop TheLoop, StringRef Name, unsigned V) {
205	LLVMContext &Context = TheLoop->getHeader()->getContext();
206	Metadata *MDs[] = {
207	MDString::get(Context, Str: Name),
208	ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V))};
209	return MDNode::get(Context, MDs);
210	}
211
212	/// Set input string into loop metadata by keeping other values intact.
213	/// If the string is already in loop metadata update value if it is
214	/// different.
215	void llvm::addStringMetadataToLoop(Loop TheLoop, const* char *StringMD,
216	unsigned V) {
217	SmallVector<Metadata *, `4`> MDs(`1`);
218	// If the loop already has metadata, retain it.
219	MDNode *LoopID = TheLoop->getLoopID();
220	if (LoopID) {
221	for (unsigned i = `1`, ie = LoopID->getNumOperands(); i < ie; ++i) {
222	MDNode *Node = cast<MDNode>(Val: LoopID->getOperand(I: i));
223	// If it is of form key = value, try to parse it.
224	if (Node->getNumOperands() == `2`) {
225	MDString *S = dyn_cast<MDString>(Val: Node->getOperand(I: `0`));
226	if (S && S->getString() == StringMD) {
227	ConstantInt *IntMD =
228	mdconst::extract_or_null<ConstantInt>(MD: Node->getOperand(I: `1`));
229	if (IntMD && IntMD->getSExtValue() == V)
230	// It is already in place. Do nothing.
231	return;
232	// We need to update the value, so just skip it here and it will
233	// be added after copying other existed nodes.
234	continue;
235	}
236	}
237	MDs.push_back(Elt: Node);
238	}
239	}
240	// Add new metadata.
241	MDs.push_back(Elt: createStringMetadata(TheLoop, Name: StringMD, V));
242	// Replace current metadata node with new one.
243	LLVMContext &Context = TheLoop->getHeader()->getContext();
244	MDNode *NewLoopID = MDNode::get(Context, MDs);
245	// Set operand 0 to refer to the loop id itself.
246	NewLoopID->replaceOperandWith(I: `0`, New: NewLoopID);
247	TheLoop->setLoopID(NewLoopID);
248	}
249
250	std::optional<ElementCount>
251	llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
252	std::optional<int> Width =
253	getOptionalIntLoopAttribute(TheLoop, Name: "llvm.loop.vectorize.width");
254
255	if (Width) {
256	std::optional<int> IsScalable = getOptionalIntLoopAttribute(
257	TheLoop, Name: "llvm.loop.vectorize.scalable.enable");
258	return ElementCount::get(MinVal: Width, Scalable: IsScalable.value_or(u: false*));
259	}
260
261	return std::nullopt;
262	}
263
264	std::optional<MDNode *> llvm::makeFollowupLoopID(
265	MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
266	const char InheritOptionsExceptPrefix, bool* AlwaysNew) {
267	if (!OrigLoopID) {
268	if (AlwaysNew)
269	return nullptr;
270	return std::nullopt;
271	}
272
273	assert(OrigLoopID->getOperand(`0`) == OrigLoopID);
274
275	bool InheritAllAttrs = !InheritOptionsExceptPrefix;
276	bool InheritSomeAttrs =
277	InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[`0`] != `'\0'`;
278	SmallVector<Metadata *, `8`> MDs;
279	MDs.push_back(Elt: nullptr);
280
281	bool Changed = false;
282	if (InheritAllAttrs \|\| InheritSomeAttrs) {
283	for (const MDOperand &Existing : drop_begin(RangeOrContainer: OrigLoopID->operands())) {
284	MDNode *Op = cast<MDNode>(Val: Existing.get());
285
286	auto InheritThisAttribute = [InheritSomeAttrs,
287	InheritOptionsExceptPrefix](MDNode *Op) {
288	if (!InheritSomeAttrs)
289	return false;
290
291	// Skip malformatted attribute metadata nodes.
292	if (Op->getNumOperands() == `0`)
293	return true;
294	Metadata *NameMD = Op->getOperand(I: `0`).get();
295	if (!isa<MDString>(Val: NameMD))
296	return true;
297	StringRef AttrName = cast<MDString>(Val: NameMD)->getString();
298
299	// Do not inherit excluded attributes.
300	return !AttrName.starts_with(Prefix: InheritOptionsExceptPrefix);
301	};
302
303	if (InheritThisAttribute (Op))
304	MDs.push_back(Elt: Op);
305	else
306	Changed = true;
307	}
308	} else {
309	// Modified if we dropped at least one attribute.
310	Changed = OrigLoopID->getNumOperands() > `1`;
311	}
312
313	bool HasAnyFollowup = false;
314	for (StringRef OptionName : FollowupOptions) {
315	MDNode *FollowupNode = findOptionMDForLoopID(LoopID: OrigLoopID, Name: OptionName);
316	if (!FollowupNode)
317	continue;
318
319	HasAnyFollowup = true;
320	for (const MDOperand &Option : drop_begin(RangeOrContainer: FollowupNode->operands())) {
321	MDs.push_back(Elt: Option.get());
322	Changed = true;
323	}
324	}
325
326	// Attributes of the followup loop not specified explicity, so signal to the
327	// transformation pass to add suitable attributes.
328	if (!AlwaysNew && !HasAnyFollowup)
329	return std::nullopt;
330
331	// If no attributes were added or remove, the previous loop Id can be reused.
332	if (!AlwaysNew && !Changed)
333	return OrigLoopID;
334
335	// No attributes is equivalent to having no !llvm.loop metadata at all.
336	if (MDs.size() == `1`)
337	return nullptr;
338
339	// Build the new loop ID.
340	MDTuple *FollowupLoopID = MDNode::get(Context&: OrigLoopID->getContext(), MDs);
341	FollowupLoopID->replaceOperandWith(I: `0`, New: FollowupLoopID);
342	return FollowupLoopID;
343	}
344
345	bool llvm::hasDisableAllTransformsHint(const Loop *L) {
346	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableNonforced);
347	}
348
349	bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
350	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableLICM);
351	}
352
353	TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
354	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.disable"))
355	return TM_SuppressedByUser;
356
357	std::optional<int> Count =
358	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.count");
359	if (Count)
360	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
361
362	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.enable"))
363	return TM_ForcedByUser;
364
365	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.full"))
366	return TM_ForcedByUser;
367
368	if (hasDisableAllTransformsHint(L))
369	return TM_Disable;
370
371	return TM_Unspecified;
372	}
373
374	TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
375	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.disable"))
376	return TM_SuppressedByUser;
377
378	std::optional<int> Count =
379	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.count");
380	if (Count)
381	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
382
383	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.enable"))
384	return TM_ForcedByUser;
385
386	if (hasDisableAllTransformsHint(L))
387	return TM_Disable;
388
389	return TM_Unspecified;
390	}
391
392	TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
393	std::optional<bool> Enable =
394	getOptionalBoolLoopAttribute(TheLoop: L, Name: "llvm.loop.vectorize.enable");
395
396	if (Enable == false)
397	return TM_SuppressedByUser;
398
399	std::optional<ElementCount> VectorizeWidth =
400	getOptionalElementCountLoopAttribute(TheLoop: L);
401	std::optional<int> InterleaveCount =
402	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.interleave.count");
403
404	// 'Forcing' vector width and interleave count to one effectively disables
405	// this tranformation.
406	if (Enable == true && VectorizeWidth && VectorizeWidth ->isScalar() &&
407	InterleaveCount == `1`)
408	return TM_SuppressedByUser;
409
410	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.isvectorized"))
411	return TM_Disable;
412
413	if (Enable == true)
414	return TM_ForcedByUser;
415
416	if ((VectorizeWidth && VectorizeWidth ->isScalar()) && InterleaveCount == `1`)
417	return TM_Disable;
418
419	if ((VectorizeWidth && VectorizeWidth ->isVector()) \|\| InterleaveCount > `1`)
420	return TM_Enable;
421
422	if (hasDisableAllTransformsHint(L))
423	return TM_Disable;
424
425	return TM_Unspecified;
426	}
427
428	TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
429	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.distribute.enable"))
430	return TM_ForcedByUser;
431
432	if (hasDisableAllTransformsHint(L))
433	return TM_Disable;
434
435	return TM_Unspecified;
436	}
437
438	TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
439	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.licm_versioning.disable"))
440	return TM_SuppressedByUser;
441
442	if (hasDisableAllTransformsHint(L))
443	return TM_Disable;
444
445	return TM_Unspecified;
446	}
447
448	/// Does a BFS from a given node to all of its children inside a given loop.
449	/// The returned vector of basic blocks includes the starting point.
450	SmallVector<BasicBlock , `16`> llvm::collectChildrenInLoop(DominatorTree DT,
451	DomTreeNode *N,
452	const Loop *CurLoop) {
453	SmallVector<BasicBlock *, `16`> Worklist;
454	auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
455	// Only include subregions in the top level loop.
456	BasicBlock *BB = DTN->getBlock();
457	if (CurLoop->contains(BB))
458	Worklist.push_back(Elt: DTN->getBlock());
459	};
460
461	AddRegionToWorklist (N);
462
463	for (size_t I = `0`; I < Worklist.size(); I++) {
464	for (DomTreeNode *Child : DT->getNode(BB: Worklist [I])->children())
465	AddRegionToWorklist (Child);
466	}
467
468	return Worklist;
469	}
470
471	bool llvm::isAlmostDeadIV(PHINode PN, BasicBlock LatchBlock, Value *Cond) {
472	int LatchIdx = PN->getBasicBlockIndex(BB: LatchBlock);
473	assert(LatchIdx != -`1` && "LatchBlock is not a case in this PHINode");
474	Value *IncV = PN->getIncomingValue(i: LatchIdx);
475
476	for (User *U : PN->users())
477	if (U != Cond && U != IncV) return false;
478
479	for (User *U : IncV->users())
480	if (U != Cond && U != PN) return false;
481	return true;
482	}
483
484
485	void llvm::deleteDeadLoop(Loop L, DominatorTree DT, ScalarEvolution *SE,
486	LoopInfo LI, MemorySSA MSSA) {
487	assert((!DT \|\| L->isLCSSAForm(*DT)) && "Expected LCSSA!");
488	auto *Preheader = L->getLoopPreheader();
489	assert(Preheader && "Preheader should exist!");
490
491	std::unique_ptr<MemorySSAUpdater> MSSAU;
492	if (MSSA)
493	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
494
495	// Now that we know the removal is safe, remove the loop by changing the
496	// branch from the preheader to go to the single exit block.
497	//
498	// Because we're deleting a large chunk of code at once, the sequence in which
499	// we remove things is very important to avoid invalidation issues.
500
501	// Tell ScalarEvolution that the loop is deleted. Do this before
502	// deleting the loop so that ScalarEvolution can look at the loop
503	// to determine what it needs to clean up.
504	if (SE) {
505	SE->forgetLoop(L);
506	SE->forgetBlockAndLoopDispositions();
507	}
508
509	Instruction *OldTerm = Preheader->getTerminator();
510	assert(!OldTerm->mayHaveSideEffects() &&
511	"Preheader must end with a side-effect-free terminator");
512	assert(OldTerm->getNumSuccessors() == `1` &&
513	"Preheader must have a single successor");
514	// Connect the preheader to the exit block. Keep the old edge to the header
515	// around to perform the dominator tree update in two separate steps
516	// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
517	// preheader -> header.
518	//
519	//
520	// 0. Preheader 1. Preheader 2. Preheader
521	// \| \| \| \|
522	// V \| V \|
523	// Header <--\ \| Header <--\ \| Header <--\
524	// \| \| \| \| \| \| \| \| \| \| \|
525	// \| V \| \| \| V \| \| \| V \|
526	// \| Body --/ \| \| Body --/ \| \| Body --/
527	// V V V V V
528	// Exit Exit Exit
529	//
530	// By doing this is two separate steps we can perform the dominator tree
531	// update without using the batch update API.
532	//
533	// Even when the loop is never executed, we cannot remove the edge from the
534	// source block to the exit block. Consider the case where the unexecuted loop
535	// branches back to an outer loop. If we deleted the loop and removed the edge
536	// coming to this inner loop, this will break the outer loop structure (by
537	// deleting the backedge of the outer loop). If the outer loop is indeed a
538	// non-loop, it will be deleted in a future iteration of loop deletion pass.
539	IRBuilder<> Builder(OldTerm);
540
541	auto *ExitBlock = L->getUniqueExitBlock();
542	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
543	if (ExitBlock) {
544	assert(ExitBlock && "Should have a unique exit block!");
545	assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
546
547	Builder.CreateCondBr(Cond: Builder.getFalse(), True: L->getHeader(), False: ExitBlock);
548	// Remove the old branch. The conditional branch becomes a new terminator.
549	OldTerm->eraseFromParent();
550
551	// Rewrite phis in the exit block to get their inputs from the Preheader
552	// instead of the exiting block.
553	for (PHINode &P : ExitBlock->phis()) {
554	// Set the zero'th element of Phi to be from the preheader and remove all
555	// other incoming values. Given the loop has dedicated exits, all other
556	// incoming values must be from the exiting blocks.
557	int PredIndex = `0`;
558	P.setIncomingBlock(i: PredIndex, BB: Preheader);
559	// Removes all incoming values from all other exiting blocks (including
560	// duplicate values from an exiting block).
561	// Nuke all entries except the zero'th entry which is the preheader entry.
562	P.removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != `0`; },
563	/ DeletePHIIfEmpty / false);
564
565	assert((P.getNumIncomingValues() == `1` &&
566	P.getIncomingBlock(PredIndex) == Preheader) &&
567	"Should have exactly one value and that's from the preheader!");
568	}
569
570	if (DT) {
571	DTU.applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}});
572	if (MSSA) {
573	MSSAU ->applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}},
574	DT&: *DT);
575	if (VerifyMemorySSA)
576	MSSA->verifyMemorySSA();
577	}
578	}
579
580	// Disconnect the loop body by branching directly to its exit.
581	Builder.SetInsertPoint(Preheader->getTerminator());
582	Builder.CreateBr(Dest: ExitBlock);
583	// Remove the old branch.
584	Preheader->getTerminator()->eraseFromParent();
585	} else {
586	assert(L->hasNoExitBlocks() &&
587	"Loop should have either zero or one exit blocks.");
588
589	Builder.SetInsertPoint(OldTerm);
590	Builder.CreateUnreachable();
591	Preheader->getTerminator()->eraseFromParent();
592	}
593
594	if (DT) {
595	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}});
596	if (MSSA) {
597	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}},
598	DT&: *DT);
599	SmallSetVector<BasicBlock *, `8`> DeadBlockSet(L->block_begin(),
600	L->block_end());
601	MSSAU ->removeBlocks(DeadBlocks: DeadBlockSet);
602	if (VerifyMemorySSA)
603	MSSA->verifyMemorySSA();
604	}
605	}
606
607	// Use a map to unique and a vector to guarantee deterministic ordering.
608	llvm::SmallDenseSet<DebugVariable, `4`> DeadDebugSet;
609	llvm::SmallVector<DbgVariableRecord *, `4`> DeadDbgVariableRecords;
610
611	if (ExitBlock) {
612	// Given LCSSA form is satisfied, we should not have users of instructions
613	// within the dead loop outside of the loop. However, LCSSA doesn't take
614	// unreachable uses into account. We handle them here.
615	// We could do it after drop all references (in this case all users in the
616	// loop will be already eliminated and we have less work to do but according
617	// to API doc of User::dropAllReferences only valid operation after dropping
618	// references, is deletion. So let's substitute all usages of
619	// instruction from the loop with poison value of corresponding type first.
620	for (auto *Block : L->blocks())
621	for (Instruction &I : *Block) {
622	auto *Poison = PoisonValue::get(T: I.getType());
623	for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
624	if (auto *Usr = dyn_cast<Instruction>(Val: U.getUser()))
625	if (L->contains(BB: Usr->getParent()))
626	continue;
627	// If we have a DT then we can check that uses outside a loop only in
628	// unreachable block.
629	if (DT)
630	assert(!DT->isReachableFromEntry(U) &&
631	"Unexpected user in reachable block");
632	U.set(Poison);
633	}
634
635	// For one of each variable encountered, preserve a debug record (set
636	// to Poison) and transfer it to the loop exit. This terminates any
637	// variable locations that were set during the loop.
638	for (DbgVariableRecord &DVR :
639	llvm::make_early_inc_range(Range: filterDbgVars(R: I.getDbgRecordRange()))) {
640	DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
641	DVR.getDebugLoc().get());
642	if (!DeadDebugSet.insert(V: Key).second)
643	continue;
644	// Unlinks the DVR from it's container, for later insertion.
645	DVR.removeFromParent();
646	DeadDbgVariableRecords.push_back(Elt: &DVR);
647	}
648	}
649
650	// After the loop has been deleted all the values defined and modified
651	// inside the loop are going to be unavailable. Values computed in the
652	// loop will have been deleted, automatically causing their debug uses
653	// be be replaced with undef. Loop invariant values will still be available.
654	// Move dbg.values out the loop so that earlier location ranges are still
655	// terminated and loop invariant assignments are preserved.
656	DIBuilder DIB(*ExitBlock->getModule());
657	BasicBlock::iterator InsertDbgValueBefore =
658	ExitBlock->getFirstInsertionPt();
659	assert(InsertDbgValueBefore != ExitBlock->end() &&
660	"There should be a non-PHI instruction in exit block, else these "
661	"instructions will have no parent.");
662
663	// Due to the "head" bit in BasicBlock::iterator, we're going to insert
664	// each DbgVariableRecord right at the start of the block, wheras dbg.values
665	// would be repeatedly inserted before the first instruction. To replicate
666	// this behaviour, do it backwards.
667	for (DbgVariableRecord *DVR : llvm::reverse(C&: DeadDbgVariableRecords))
668	ExitBlock->insertDbgRecordBefore(DR: DVR, Here: InsertDbgValueBefore);
669	}
670
671	// Remove the block from the reference counting scheme, so that we can
672	// delete it freely later.
673	for (auto *Block : L->blocks())
674	Block->dropAllReferences();
675
676	if (MSSA && VerifyMemorySSA)
677	MSSA->verifyMemorySSA();
678
679	if (LI) {
680	// Erase the instructions and the blocks without having to worry
681	// about ordering because we already dropped the references.
682	// NOTE: This iteration is safe because erasing the block does not remove
683	// its entry from the loop's block list. We do that in the next section.
684	for (BasicBlock *BB : L->blocks())
685	BB->eraseFromParent();
686
687	// Finally, the blocks from loopinfo. This has to happen late because
688	// otherwise our loop iterators won't work.
689
690	SmallPtrSet<BasicBlock *, `8`> blocks(llvm::from_range, L->blocks());
691	for (BasicBlock *BB : blocks)
692	LI->removeBlock(BB);
693
694	// The last step is to update LoopInfo now that we've eliminated this loop.
695	// Note: LoopInfo::erase remove the given loop and relink its subloops with
696	// its parent. While removeLoop/removeChildLoop remove the given loop but
697	// not relink its subloops, which is what we want.
698	if (Loop *ParentLoop = L->getParentLoop()) {
699	Loop::iterator I = find(Range&: *ParentLoop, Val: L);
700	assert(I != ParentLoop->end() && "Couldn't find loop");
701	ParentLoop->removeChildLoop(I);
702	} else {
703	Loop::iterator I = find(Range&: *LI, Val: L);
704	assert(I != LI->end() && "Couldn't find loop");
705	LI->removeLoop(I);
706	}
707	LI->destroy(L);
708	}
709	}
710
711	void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
712	LoopInfo &LI, MemorySSA *MSSA) {
713	auto *Latch = L->getLoopLatch();
714	assert(Latch && "multiple latches not yet supported");
715	auto *Header = L->getHeader();
716	Loop *OutermostLoop = L->getOutermostLoop();
717
718	SE.forgetLoop(L);
719	SE.forgetBlockAndLoopDispositions();
720
721	std::unique_ptr<MemorySSAUpdater> MSSAU;
722	if (MSSA)
723	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
724
725	// Update the CFG and domtree. We chose to special case a couple of
726	// of common cases for code quality and test readability reasons.
727	[&]() -> void {
728	if (auto *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator())) {
729	if (!BI->isConditional()) {
730	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
731	(void)changeToUnreachable(I: BI, /PreserveLCSSA/ true, DTU: &DTU,
732	MSSAU: MSSAU.get());
733	return;
734	}
735
736	// Conditional latch/exit - note that latch can be shared by inner
737	// and outer loop so the other target doesn't need to an exit
738	if (L->isLoopExiting(BB: Latch)) {
739	// TODO: Generalize ConstantFoldTerminator so that it can be used
740	// here without invalidating LCSSA or MemorySSA. (Tricky case for
741	// LCSSA: header is an exit block of a preceeding sibling loop w/o
742	// dedicated exits.)
743	const unsigned ExitIdx = L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`;
744	BasicBlock *ExitBB = BI->getSuccessor(i: ExitIdx);
745
746	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
747	Header->removePredecessor(Pred: Latch, KeepOneInputPHIs: true);
748
749	IRBuilder<> Builder(BI);
750	auto *NewBI = Builder.CreateBr(Dest: ExitBB);
751	// Transfer the metadata to the new branch instruction (minus the
752	// loop info since this is no longer a loop)
753	NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_dbg,
754	LLVMContext::MD_annotation});
755
756	BI->eraseFromParent();
757	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}});
758	if (MSSA)
759	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}}, DT);
760	return;
761	}
762	}
763
764	// General case. By splitting the backedge, and then explicitly making it
765	// unreachable we gracefully handle corner cases such as switch and invoke
766	// termiantors.
767	auto *BackedgeBB = SplitEdge(From: Latch, To: Header, DT: &DT, LI: &LI, MSSAU: MSSAU.get());
768
769	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
770	(void)changeToUnreachable(I: BackedgeBB->getTerminator(),
771	/PreserveLCSSA/ true, DTU: &DTU, MSSAU: MSSAU.get());
772	}();
773
774	// Erase (and destroy) this loop instance. Handles relinking sub-loops
775	// and blocks within the loop as needed.
776	LI.erase(L);
777
778	// If the loop we broke had a parent, then changeToUnreachable might have
779	// caused a block to be removed from the parent loop (see loop_nest_lcssa
780	// test case in zero-btc.ll for an example), thus changing the parent's
781	// exit blocks. If that happened, we need to rebuild LCSSA on the outermost
782	// loop which might have a had a block removed.
783	if (OutermostLoop != L)
784	formLCSSARecursively(L&: *OutermostLoop, DT, LI: &LI, SE: &SE);
785	}
786
787
788	/// Checks if \p L has an exiting latch branch. There may also be other
789	/// exiting blocks. Returns branch instruction terminating the loop
790	/// latch if above check is successful, nullptr otherwise.
791	static BranchInst getExpectedExitLoopLatchBranch(Loop L) {
792	BasicBlock *Latch = L->getLoopLatch();
793	if (!Latch)
794	return nullptr;
795
796	BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
797	if (!LatchBR \|\| LatchBR->getNumSuccessors() != `2` \|\| !L->isLoopExiting(BB: Latch))
798	return nullptr;
799
800	assert((LatchBR->getSuccessor(`0`) == L->getHeader() \|\|
801	LatchBR->getSuccessor(`1`) == L->getHeader()) &&
802	"At least one edge out of the latch must go to the header");
803
804	return LatchBR;
805	}
806
807	/// Return the estimated trip count for any exiting branch which dominates
808	/// the loop latch.
809	static std::optional<unsigned> getEstimatedTripCount(BranchInst *ExitingBranch,
810	Loop *L,
811	uint64_t &OrigExitWeight) {
812	// To estimate the number of times the loop body was executed, we want to
813	// know the number of times the backedge was taken, vs. the number of times
814	// we exited the loop.
815	uint64_t LoopWeight, ExitWeight;
816	if (!extractBranchWeights(I: *ExitingBranch, TrueVal&: LoopWeight, FalseVal&: ExitWeight))
817	return std::nullopt;
818
819	if (L->contains(BB: ExitingBranch->getSuccessor(i: `1`)))
820	std::swap(a&: LoopWeight, b&: ExitWeight);
821
822	if (!ExitWeight)
823	// Don't have a way to return predicated infinite
824	return std::nullopt;
825
826	OrigExitWeight = ExitWeight;
827
828	// Estimated exit count is a ratio of the loop weight by the weight of the
829	// edge exiting the loop, rounded to nearest.
830	uint64_t ExitCount = llvm::divideNearest(Numerator: LoopWeight, Denominator: ExitWeight);
831
832	// When ExitCount + 1 would wrap in unsigned, saturate at UINT_MAX.
833	if (ExitCount >= std::numeric_limits<unsigned>::max())
834	return std::numeric_limits<unsigned>::max();
835
836	// Estimated trip count is one plus estimated exit count.
837	return ExitCount + `1`;
838	}
839
840	std::optional<unsigned>
841	llvm::getLoopEstimatedTripCount(Loop *L,
842	unsigned *EstimatedLoopInvocationWeight) {
843	// Currently we take the estimate exit count only from the loop latch,
844	// ignoring other exiting blocks. This can overestimate the trip count
845	// if we exit through another exit, but can never underestimate it.
846	// TODO: incorporate information from other exits
847	if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
848	uint64_t ExitWeight;
849	if (std::optional<uint64_t> EstTripCount =
850	getEstimatedTripCount(ExitingBranch: LatchBranch, L, OrigExitWeight&: ExitWeight)) {
851	if (EstimatedLoopInvocationWeight)
852	*EstimatedLoopInvocationWeight = ExitWeight;
853	return *EstTripCount;
854	}
855	}
856	return std::nullopt;
857	}
858
859	bool llvm::setLoopEstimatedTripCount(Loop L, unsigned* EstimatedTripCount,
860	unsigned EstimatedloopInvocationWeight) {
861	// At the moment, we currently support changing the estimate trip count of
862	// the latch branch only. We could extend this API to manipulate estimated
863	// trip counts for any exit.
864	BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
865	if (!LatchBranch)
866	return false;
867
868	// Calculate taken and exit weights.
869	unsigned LatchExitWeight = `0`;
870	unsigned BackedgeTakenWeight = `0`;
871
872	if (EstimatedTripCount > `0`) {
873	LatchExitWeight = EstimatedloopInvocationWeight;
874	BackedgeTakenWeight = (EstimatedTripCount - `1`) * LatchExitWeight;
875	}
876
877	// Make a swap if back edge is taken when condition is "false".
878	if (LatchBranch->getSuccessor(i: `0`) != L->getHeader())
879	std::swap(a&: BackedgeTakenWeight, b&: LatchExitWeight);
880
881	MDBuilder MDB(LatchBranch->getContext());
882
883	// Set/Update profile metadata.
884	LatchBranch->setMetadata(
885	KindID: LLVMContext::MD_prof,
886	Node: MDB.createBranchWeights(TrueWeight: BackedgeTakenWeight, FalseWeight: LatchExitWeight));
887
888	return true;
889	}
890
891	bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
892	ScalarEvolution &SE) {
893	Loop *OuterL = InnerLoop->getParentLoop();
894	if (!OuterL)
895	return true;
896
897	// Get the backedge taken count for the inner loop
898	BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
899	const SCEV *InnerLoopBECountSC = SE.getExitCount(L: InnerLoop, ExitingBlock: InnerLoopLatch);
900	if (isa<SCEVCouldNotCompute>(Val: InnerLoopBECountSC) \|\|
901	!InnerLoopBECountSC->getType()->isIntegerTy())
902	return false;
903
904	// Get whether count is invariant to the outer loop
905	ScalarEvolution::LoopDisposition LD =
906	SE.getLoopDisposition(S: InnerLoopBECountSC, L: OuterL);
907	if (LD != ScalarEvolution::LoopInvariant)
908	return false;
909
910	return true;
911	}
912
913	constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
914	switch (RK) {
915	default:
916	llvm_unreachable("Unexpected recurrence kind");
917	case RecurKind::Add:
918	return Intrinsic::vector_reduce_add;
919	case RecurKind::Mul:
920	return Intrinsic::vector_reduce_mul;
921	case RecurKind::And:
922	return Intrinsic::vector_reduce_and;
923	case RecurKind::Or:
924	return Intrinsic::vector_reduce_or;
925	case RecurKind::Xor:
926	return Intrinsic::vector_reduce_xor;
927	case RecurKind::FMulAdd:
928	case RecurKind::FAdd:
929	return Intrinsic::vector_reduce_fadd;
930	case RecurKind::FMul:
931	return Intrinsic::vector_reduce_fmul;
932	case RecurKind::SMax:
933	return Intrinsic::vector_reduce_smax;
934	case RecurKind::SMin:
935	return Intrinsic::vector_reduce_smin;
936	case RecurKind::UMax:
937	return Intrinsic::vector_reduce_umax;
938	case RecurKind::UMin:
939	return Intrinsic::vector_reduce_umin;
940	case RecurKind::FMax:
941	return Intrinsic::vector_reduce_fmax;
942	case RecurKind::FMin:
943	return Intrinsic::vector_reduce_fmin;
944	case RecurKind::FMaximum:
945	return Intrinsic::vector_reduce_fmaximum;
946	case RecurKind::FMinimum:
947	return Intrinsic::vector_reduce_fminimum;
948	case RecurKind::FMaximumNum:
949	return Intrinsic::vector_reduce_fmax;
950	case RecurKind::FMinimumNum:
951	return Intrinsic::vector_reduce_fmin;
952	}
953	}
954
955	// This is the inverse to getReductionForBinop
956	unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {
957	switch (RdxID) {
958	case Intrinsic::vector_reduce_fadd:
959	return Instruction::FAdd;
960	case Intrinsic::vector_reduce_fmul:
961	return Instruction::FMul;
962	case Intrinsic::vector_reduce_add:
963	return Instruction::Add;
964	case Intrinsic::vector_reduce_mul:
965	return Instruction::Mul;
966	case Intrinsic::vector_reduce_and:
967	return Instruction::And;
968	case Intrinsic::vector_reduce_or:
969	return Instruction::Or;
970	case Intrinsic::vector_reduce_xor:
971	return Instruction::Xor;
972	case Intrinsic::vector_reduce_smax:
973	case Intrinsic::vector_reduce_smin:
974	case Intrinsic::vector_reduce_umax:
975	case Intrinsic::vector_reduce_umin:
976	return Instruction::ICmp;
977	case Intrinsic::vector_reduce_fmax:
978	case Intrinsic::vector_reduce_fmin:
979	return Instruction::FCmp;
980	default:
981	llvm_unreachable("Unexpected ID");
982	}
983	}
984
985	// This is the inverse to getArithmeticReductionInstruction
986	Intrinsic::ID llvm::getReductionForBinop(Instruction::BinaryOps Opc) {
987	switch (Opc) {
988	default:
989	break;
990	case Instruction::Add:
991	return Intrinsic::vector_reduce_add;
992	case Instruction::Mul:
993	return Intrinsic::vector_reduce_mul;
994	case Instruction::And:
995	return Intrinsic::vector_reduce_and;
996	case Instruction::Or:
997	return Intrinsic::vector_reduce_or;
998	case Instruction::Xor:
999	return Intrinsic::vector_reduce_xor;
1000	}
1001	return Intrinsic::not_intrinsic;
1002	}
1003
1004	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) {
1005	switch (RdxID) {
1006	default:
1007	llvm_unreachable("Unknown min/max recurrence kind");
1008	case Intrinsic::vector_reduce_umin:
1009	return Intrinsic::umin;
1010	case Intrinsic::vector_reduce_umax:
1011	return Intrinsic::umax;
1012	case Intrinsic::vector_reduce_smin:
1013	return Intrinsic::smin;
1014	case Intrinsic::vector_reduce_smax:
1015	return Intrinsic::smax;
1016	case Intrinsic::vector_reduce_fmin:
1017	return Intrinsic::minnum;
1018	case Intrinsic::vector_reduce_fmax:
1019	return Intrinsic::maxnum;
1020	case Intrinsic::vector_reduce_fminimum:
1021	return Intrinsic::minimum;
1022	case Intrinsic::vector_reduce_fmaximum:
1023	return Intrinsic::maximum;
1024	}
1025	}
1026
1027	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
1028	switch (RK) {
1029	default:
1030	llvm_unreachable("Unknown min/max recurrence kind");
1031	case RecurKind::UMin:
1032	return Intrinsic::umin;
1033	case RecurKind::UMax:
1034	return Intrinsic::umax;
1035	case RecurKind::SMin:
1036	return Intrinsic::smin;
1037	case RecurKind::SMax:
1038	return Intrinsic::smax;
1039	case RecurKind::FMin:
1040	return Intrinsic::minnum;
1041	case RecurKind::FMax:
1042	return Intrinsic::maxnum;
1043	case RecurKind::FMinimum:
1044	return Intrinsic::minimum;
1045	case RecurKind::FMaximum:
1046	return Intrinsic::maximum;
1047	case RecurKind::FMinimumNum:
1048	return Intrinsic::minimumnum;
1049	case RecurKind::FMaximumNum:
1050	return Intrinsic::maximumnum;
1051	}
1052	}
1053
1054	RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) {
1055	switch (RdxID) {
1056	case Intrinsic::vector_reduce_smax:
1057	return RecurKind::SMax;
1058	case Intrinsic::vector_reduce_smin:
1059	return RecurKind::SMin;
1060	case Intrinsic::vector_reduce_umax:
1061	return RecurKind::UMax;
1062	case Intrinsic::vector_reduce_umin:
1063	return RecurKind::UMin;
1064	case Intrinsic::vector_reduce_fmax:
1065	return RecurKind::FMax;
1066	case Intrinsic::vector_reduce_fmin:
1067	return RecurKind::FMin;
1068	default:
1069	return RecurKind::None;
1070	}
1071	}
1072
1073	CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
1074	switch (RK) {
1075	default:
1076	llvm_unreachable("Unknown min/max recurrence kind");
1077	case RecurKind::UMin:
1078	return CmpInst::ICMP_ULT;
1079	case RecurKind::UMax:
1080	return CmpInst::ICMP_UGT;
1081	case RecurKind::SMin:
1082	return CmpInst::ICMP_SLT;
1083	case RecurKind::SMax:
1084	return CmpInst::ICMP_SGT;
1085	case RecurKind::FMin:
1086	return CmpInst::FCMP_OLT;
1087	case RecurKind::FMax:
1088	return CmpInst::FCMP_OGT;
1089	// We do not add FMinimum/FMaximum recurrence kind here since there is no
1090	// equivalent predicate which compares signed zeroes according to the
1091	// semantics of the intrinsics (llvm.minimum/maximum).
1092	}
1093	}
1094
1095	Value llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value Left,
1096	Value *Right) {
1097	Type *Ty = Left->getType();
1098	if (Ty->isIntOrIntVectorTy() \|\|
1099	(RK == RecurKind::FMinimum \|\| RK == RecurKind::FMaximum \|\|
1100	RK == RecurKind::FMinimumNum \|\| RK == RecurKind::FMaximumNum)) {
1101	// TODO: Add float minnum/maxnum support when FMF nnan is set.
1102	Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
1103	return Builder.CreateIntrinsic(RetTy: Ty, ID: Id, Args: {Left, Right}, FMFSource: nullptr,
1104	Name: "rdx.minmax");
1105	}
1106	CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
1107	Value *Cmp = Builder.CreateCmp(Pred, LHS: Left, RHS: Right, Name: "rdx.minmax.cmp");
1108	Value *Select = Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.minmax.select");
1109	return Select;
1110	}
1111
1112	// Helper to generate an ordered reduction.
1113	Value llvm::getOrderedReduction(IRBuilderBase &Builder, Value Acc, Value *Src,
1114	unsigned Op, RecurKind RdxKind) {
1115	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1116
1117	// Extract and apply reduction ops in ascending order:
1118	// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1119	Value *Result = Acc;
1120	for (unsigned ExtractIdx = `0`; ExtractIdx != VF; ++ExtractIdx) {
1121	Value *Ext =
1122	Builder.CreateExtractElement(Vec: Src, Idx: Builder.getInt32(C: ExtractIdx));
1123
1124	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1125	Result = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: Result, RHS: Ext,
1126	Name: "bin.rdx");
1127	} else {
1128	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1129	"Invalid min/max");
1130	Result = createMinMaxOp(Builder, RK: RdxKind, Left: Result, Right: Ext);
1131	}
1132	}
1133
1134	return Result;
1135	}
1136
1137	// Helper to generate a log2 shuffle reduction.
1138	Value llvm::getShuffleReduction(IRBuilderBase &Builder, Value Src,
1139	unsigned Op,
1140	TargetTransformInfo::ReductionShuffle RS,
1141	RecurKind RdxKind) {
1142	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1143	// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1144	// and vector ops, reducing the set of values being computed by half each
1145	// round.
1146	assert(isPowerOf2_32(VF) &&
1147	"Reduction emission only supported for pow2 vectors!");
1148	// Note: fast-math-flags flags are controlled by the builder configuration
1149	// and are assumed to apply to all generated arithmetic instructions. Other
1150	// poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1151	// of the builder configuration, and since they're not passed explicitly,
1152	// will never be relevant here. Note that it would be generally unsound to
1153	// propagate these from an intrinsic call to the expansion anyways as we/
1154	// change the order of operations.
1155	auto BuildShuffledOp = [&Builder, &Op,
1156	&RdxKind](SmallVectorImpl<int> &ShuffleMask,
1157	Value &TmpVec) -> void* {
1158	Value *Shuf = Builder.CreateShuffleVector(V: TmpVec, Mask: ShuffleMask, Name: "rdx.shuf");
1159	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1160	TmpVec = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: TmpVec, RHS: Shuf,
1161	Name: "bin.rdx");
1162	} else {
1163	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1164	"Invalid min/max");
1165	TmpVec = createMinMaxOp(Builder, RK: RdxKind, Left: TmpVec, Right: Shuf);
1166	}
1167	};
1168
1169	Value *TmpVec = Src;
1170	if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1171	SmallVector<int, `32`> ShuffleMask(VF);
1172	for (unsigned stride = `1`; stride < VF; stride <<= `1`) {
1173	// Initialise the mask with undef.
1174	llvm::fill(Range&: ShuffleMask, Value: -`1`);
1175	for (unsigned j = `0`; j < VF; j += stride << `1`) {
1176	ShuffleMask [j] = j + stride;
1177	}
1178	BuildShuffledOp (ShuffleMask, TmpVec);
1179	}
1180	} else {
1181	SmallVector<int, `32`> ShuffleMask(VF);
1182	for (unsigned i = VF; i != `1`; i >>= `1`) {
1183	// Move the upper half of the vector to the lower half.
1184	for (unsigned j = `0`; j != i / `2`; ++j)
1185	ShuffleMask [j] = i / `2` + j;
1186
1187	// Fill the rest of the mask with undef.
1188	std::fill(first: &ShuffleMask [i / `2`], last: ShuffleMask.end(), value: -`1`);
1189	BuildShuffledOp (ShuffleMask, TmpVec);
1190	}
1191	}
1192	// The result is in the first element of the vector.
1193	return Builder.CreateExtractElement(Vec: TmpVec, Idx: Builder.getInt32(C: `0`));
1194	}
1195
1196	Value llvm::createAnyOfReduction(IRBuilderBase &Builder, Value Src,
1197	Value InitVal, PHINode OrigPhi) {
1198	Value NewVal = nullptr*;
1199
1200	// First use the original phi to determine the new value we're trying to
1201	// select from in the loop.
1202	SelectInst SI = nullptr*;
1203	for (auto *U : OrigPhi->users()) {
1204	if ((SI = dyn_cast<SelectInst>(Val: U)))
1205	break;
1206	}
1207	assert(SI && "One user of the original phi should be a select");
1208
1209	if (SI->getTrueValue() == OrigPhi)
1210	NewVal = SI->getFalseValue();
1211	else {
1212	assert(SI->getFalseValue() == OrigPhi &&
1213	"At least one input to the select should be the original Phi");
1214	NewVal = SI->getTrueValue();
1215	}
1216
1217	// If any predicate is true it means that we want to select the new value.
1218	Value *AnyOf =
1219	Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1220	// The compares in the loop may yield poison, which propagates through the
1221	// bitwise ORs. Freeze it here before the condition is used.
1222	AnyOf = Builder.CreateFreeze(V: AnyOf);
1223	return Builder.CreateSelect(C: AnyOf, True: NewVal, False: InitVal, Name: "rdx.select");
1224	}
1225
1226	Value llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value Src,
1227	RecurKind RdxKind, Value *Start,
1228	Value *Sentinel) {
1229	bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(Kind: RdxKind);
1230	bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind: RdxKind);
1231	Value *MaxRdx = Src->getType()->isVectorTy()
1232	? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned)
1233	: Builder.CreateIntMinReduce(Src, IsSigned))
1234	: Src;
1235	// Correct the final reduction result back to the start value if the maximum
1236	// reduction is sentinel value.
1237	Value *Cmp =
1238	Builder.CreateCmp(Pred: CmpInst::ICMP_NE, LHS: MaxRdx, RHS: Sentinel, Name: "rdx.select.cmp");
1239	return Builder.CreateSelect(C: Cmp, True: MaxRdx, False: Start, Name: "rdx.select");
1240	}
1241
1242	Value llvm::getReductionIdentity(Intrinsic::ID RdxID, Type Ty,
1243	FastMathFlags Flags) {
1244	bool Negative = false;
1245	switch (RdxID) {
1246	default:
1247	llvm_unreachable("Expecting a reduction intrinsic");
1248	case Intrinsic::vector_reduce_add:
1249	case Intrinsic::vector_reduce_mul:
1250	case Intrinsic::vector_reduce_or:
1251	case Intrinsic::vector_reduce_xor:
1252	case Intrinsic::vector_reduce_and:
1253	case Intrinsic::vector_reduce_fadd:
1254	case Intrinsic::vector_reduce_fmul: {
1255	unsigned Opc = getArithmeticReductionInstruction(RdxID);
1256	return ConstantExpr::getBinOpIdentity(Opcode: Opc, Ty, AllowRHSConstant: false,
1257	NSZ: Flags.noSignedZeros());
1258	}
1259	case Intrinsic::vector_reduce_umax:
1260	case Intrinsic::vector_reduce_umin:
1261	case Intrinsic::vector_reduce_smin:
1262	case Intrinsic::vector_reduce_smax: {
1263	Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RdxID);
1264	return ConstantExpr::getIntrinsicIdentity(ScalarID, Ty);
1265	}
1266	case Intrinsic::vector_reduce_fmax:
1267	case Intrinsic::vector_reduce_fmaximum:
1268	Negative = true;
1269	[[fallthrough]];
1270	case Intrinsic::vector_reduce_fmin:
1271	case Intrinsic::vector_reduce_fminimum: {
1272	bool PropagatesNaN = RdxID == Intrinsic::vector_reduce_fminimum \|\|
1273	RdxID == Intrinsic::vector_reduce_fmaximum;
1274	const fltSemantics &Semantics = Ty->getFltSemantics();
1275	return (!Flags.noNaNs() && !PropagatesNaN)
1276	? ConstantFP::getQNaN(Ty, Negative)
1277	: !Flags.noInfs()
1278	? ConstantFP::getInfinity(Ty, Negative)
1279	: ConstantFP::get(Ty, V: APFloat::getLargest(Sem: Semantics, Negative));
1280	}
1281	}
1282	}
1283
1284	Value llvm::getRecurrenceIdentity(RecurKind K, Type Tp, FastMathFlags FMF) {
1285	assert((!(K == RecurKind::FMin \|\| K == RecurKind::FMax) \|\|
1286	(FMF.noNaNs() && FMF.noSignedZeros())) &&
1287	"nnan, nsz is expected to be set for FP min/max reduction.");
1288	Intrinsic::ID RdxID = getReductionIntrinsicID(RK: K);
1289	return getReductionIdentity(RdxID, Ty: Tp, Flags: FMF);
1290	}
1291
1292	Value llvm::createSimpleReduction(IRBuilderBase &Builder, Value Src,
1293	RecurKind RdxKind) {
1294	auto *SrcVecEltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1295	auto getIdentity = [&]() {
1296	return getRecurrenceIdentity(K: RdxKind, Tp: SrcVecEltTy,
1297	FMF: Builder.getFastMathFlags());
1298	};
1299	switch (RdxKind) {
1300	case RecurKind::Add:
1301	case RecurKind::Mul:
1302	case RecurKind::And:
1303	case RecurKind::Or:
1304	case RecurKind::Xor:
1305	case RecurKind::SMax:
1306	case RecurKind::SMin:
1307	case RecurKind::UMax:
1308	case RecurKind::UMin:
1309	case RecurKind::FMax:
1310	case RecurKind::FMin:
1311	case RecurKind::FMinimum:
1312	case RecurKind::FMaximum:
1313	case RecurKind::FMinimumNum:
1314	case RecurKind::FMaximumNum:
1315	return Builder.CreateUnaryIntrinsic(ID: getReductionIntrinsicID(RK: RdxKind), V: Src);
1316	case RecurKind::FMulAdd:
1317	case RecurKind::FAdd:
1318	return Builder.CreateFAddReduce(Acc: getIdentity (), Src);
1319	case RecurKind::FMul:
1320	return Builder.CreateFMulReduce(Acc: getIdentity (), Src);
1321	default:
1322	llvm_unreachable("Unhandled opcode");
1323	}
1324	}
1325
1326	Value llvm::createSimpleReduction(IRBuilderBase &Builder, Value Src,
1327	RecurKind Kind, Value Mask, Value EVL) {
1328	assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
1329	!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
1330	"AnyOf and FindIV reductions are not supported.");
1331	Intrinsic::ID Id = getReductionIntrinsicID(RK: Kind);
1332	auto VPID = VPIntrinsic::getForIntrinsic(Id);
1333	assert(VPReductionIntrinsic::isVPReduction(VPID) &&
1334	"No VPIntrinsic for this reduction");
1335	auto *EltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1336	Value *Iden = getRecurrenceIdentity(K: Kind, Tp: EltTy, FMF: Builder.getFastMathFlags());
1337	Value *Ops[] = {Iden, Src, Mask, EVL};
1338	return Builder.CreateIntrinsic(RetTy: EltTy, ID: VPID, Args: Ops);
1339	}
1340
1341	Value *llvm::createOrderedReduction(IRBuilderBase &B, RecurKind Kind,
1342	Value Src, Value Start) {
1343	assert((Kind == RecurKind::FAdd \|\| Kind == RecurKind::FMulAdd) &&
1344	"Unexpected reduction kind");
1345	assert(Src->getType()->isVectorTy() && "Expected a vector type");
1346	assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1347
1348	return B.CreateFAddReduce(Acc: Start, Src);
1349	}
1350
1351	Value *llvm::createOrderedReduction(IRBuilderBase &Builder, RecurKind Kind,
1352	Value Src, Value Start, Value *Mask,
1353	Value *EVL) {
1354	assert((Kind == RecurKind::FAdd \|\| Kind == RecurKind::FMulAdd) &&
1355	"Unexpected reduction kind");
1356	assert(Src->getType()->isVectorTy() && "Expected a vector type");
1357	assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1358
1359	Intrinsic::ID Id = getReductionIntrinsicID(RK: RecurKind::FAdd);
1360	auto VPID = VPIntrinsic::getForIntrinsic(Id);
1361	assert(VPReductionIntrinsic::isVPReduction(VPID) &&
1362	"No VPIntrinsic for this reduction");
1363	auto *EltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1364	Value *Ops[] = {Start, Src, Mask, EVL};
1365	return Builder.CreateIntrinsic(RetTy: EltTy, ID: VPID, Args: Ops);
1366	}
1367
1368	void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue,
1369	bool IncludeWrapFlags) {
1370	auto *VecOp = dyn_cast<Instruction>(Val: I);
1371	if (!VecOp)
1372	return;
1373	auto Intersection = (OpValue == nullptr*) ? dyn_cast<Instruction>(Val: VL [`0`])
1374	: dyn_cast<Instruction>(Val: OpValue);
1375	if (!Intersection)
1376	return;
1377	const unsigned Opcode = Intersection->getOpcode();
1378	VecOp->copyIRFlags(V: Intersection, IncludeWrapFlags);
1379	for (auto *V : VL) {
1380	auto *Instr = dyn_cast<Instruction>(Val: V);
1381	if (!Instr)
1382	continue;
1383	if (OpValue == nullptr \|\| Opcode == Instr->getOpcode())
1384	VecOp->andIRFlags(V);
1385	}
1386	}
1387
1388	bool llvm::isKnownNegativeInLoop(const SCEV S, const* Loop *L,
1389	ScalarEvolution &SE) {
1390	const SCEV *Zero = SE.getZero(Ty: S->getType());
1391	return SE.isAvailableAtLoopEntry(S, L) &&
1392	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLT, LHS: S, RHS: Zero);
1393	}
1394
1395	bool llvm::isKnownNonNegativeInLoop(const SCEV S, const* Loop *L,
1396	ScalarEvolution &SE) {
1397	const SCEV *Zero = SE.getZero(Ty: S->getType());
1398	return SE.isAvailableAtLoopEntry(S, L) &&
1399	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGE, LHS: S, RHS: Zero);
1400	}
1401
1402	bool llvm::isKnownPositiveInLoop(const SCEV S, const* Loop *L,
1403	ScalarEvolution &SE) {
1404	const SCEV *Zero = SE.getZero(Ty: S->getType());
1405	return SE.isAvailableAtLoopEntry(S, L) &&
1406	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGT, LHS: S, RHS: Zero);
1407	}
1408
1409	bool llvm::isKnownNonPositiveInLoop(const SCEV S, const* Loop *L,
1410	ScalarEvolution &SE) {
1411	const SCEV *Zero = SE.getZero(Ty: S->getType());
1412	return SE.isAvailableAtLoopEntry(S, L) &&
1413	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLE, LHS: S, RHS: Zero);
1414	}
1415
1416	bool llvm::cannotBeMinInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1417	bool Signed) {
1418	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1419	APInt Min = Signed ? APInt::getSignedMinValue(numBits: BitWidth) :
1420	APInt::getMinValue(numBits: BitWidth);
1421	auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1422	return SE.isAvailableAtLoopEntry(S, L) &&
1423	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1424	RHS: SE.getConstant(Val: Min));
1425	}
1426
1427	bool llvm::cannotBeMaxInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1428	bool Signed) {
1429	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1430	APInt Max = Signed ? APInt::getSignedMaxValue(numBits: BitWidth) :
1431	APInt::getMaxValue(numBits: BitWidth);
1432	auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1433	return SE.isAvailableAtLoopEntry(S, L) &&
1434	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1435	RHS: SE.getConstant(Val: Max));
1436	}
1437
1438	//===----------------------------------------------------------------------===//
1439	// rewriteLoopExitValues - Optimize IV users outside the loop.
1440	// As a side effect, reduces the amount of IV processing within the loop.
1441	//===----------------------------------------------------------------------===//
1442
1443	static bool hasHardUserWithinLoop(const Loop L, const* Instruction *I) {
1444	SmallPtrSet<const Instruction *, `8`> Visited;
1445	SmallVector<const Instruction *, `8`> WorkList;
1446	Visited.insert(Ptr: I);
1447	WorkList.push_back(Elt: I);
1448	while (!WorkList.empty()) {
1449	const Instruction *Curr = WorkList.pop_back_val();
1450	// This use is outside the loop, nothing to do.
1451	if (!L->contains(Inst: Curr))
1452	continue;
1453	// Do we assume it is a "hard" use which will not be eliminated easily?
1454	if (Curr->mayHaveSideEffects())
1455	return true;
1456	// Otherwise, add all its users to worklist.
1457	for (const auto *U : Curr->users()) {
1458	auto *UI = cast<Instruction>(Val: U);
1459	if (Visited.insert(Ptr: UI).second)
1460	WorkList.push_back(Elt: UI);
1461	}
1462	}
1463	return false;
1464	}
1465
1466	// Collect information about PHI nodes which can be transformed in
1467	// rewriteLoopExitValues.
1468	struct RewritePhi {
1469	PHINode PN; // For which PHI node is this replacement?*
1470	unsigned Ith; // For which incoming value?
1471	const SCEV ExpansionSCEV; // The SCEV of the incoming value we are rewriting.*
1472	Instruction ExpansionPoint; // Where we'd like to expand that SCEV?*
1473	bool HighCost; // Is this expansion a high-cost?
1474
1475	RewritePhi(PHINode P, unsigned* I, const SCEV Val, Instruction ExpansionPt,
1476	bool H)
1477	: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1478	HighCost(H) {}
1479	};
1480
1481	// Check whether it is possible to delete the loop after rewriting exit
1482	// value. If it is possible, ignore ReplaceExitValue and do rewriting
1483	// aggressively.
1484	static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, `8`> &RewritePhiSet) {
1485	BasicBlock *Preheader = L->getLoopPreheader();
1486	// If there is no preheader, the loop will not be deleted.
1487	if (!Preheader)
1488	return false;
1489
1490	// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1491	// We obviate multiple ExitingBlocks case for simplicity.
1492	// TODO: If we see testcase with multiple ExitingBlocks can be deleted
1493	// after exit value rewriting, we can enhance the logic here.
1494	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1495	L->getExitingBlocks(ExitingBlocks);
1496	SmallVector<BasicBlock *, `8`> ExitBlocks;
1497	L->getUniqueExitBlocks(ExitBlocks);
1498	if (ExitBlocks.size() != `1` \|\| ExitingBlocks.size() != `1`)
1499	return false;
1500
1501	BasicBlock *ExitBlock = ExitBlocks [`0`];
1502	BasicBlock::iterator BI = ExitBlock->begin();
1503	while (PHINode *P = dyn_cast<PHINode>(Val&: BI)) {
1504	Value *Incoming = P->getIncomingValueForBlock(BB: ExitingBlocks [`0`]);
1505
1506	// If the Incoming value of P is found in RewritePhiSet, we know it
1507	// could be rewritten to use a loop invariant value in transformation
1508	// phase later. Skip it in the loop invariant check below.
1509	bool found = false;
1510	for (const RewritePhi &Phi : RewritePhiSet) {
1511	unsigned i = Phi.Ith;
1512	if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1513	found = true;
1514	break;
1515	}
1516	}
1517
1518	Instruction *I;
1519	if (!found && (I = dyn_cast<Instruction>(Val: Incoming)))
1520	if (!L->hasLoopInvariantOperands(I))
1521	return false;
1522
1523	++BI;
1524	}
1525
1526	for (auto *BB : L->blocks())
1527	if (llvm::any_of(Range&: *BB, P: [](Instruction &I) {
1528	return I.mayHaveSideEffects();
1529	}))
1530	return false;
1531
1532	return true;
1533	}
1534
1535	/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1536	/// and returns true if this Phi is an induction phi in the loop. When
1537	/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1538	static bool checkIsIndPhi(PHINode Phi, Loop L, ScalarEvolution *SE,
1539	InductionDescriptor &ID) {
1540	if (!Phi)
1541	return false;
1542	if (!L->getLoopPreheader())
1543	return false;
1544	if (Phi->getParent() != L->getHeader())
1545	return false;
1546	return InductionDescriptor::isInductionPHI(Phi, L, SE, D&: ID);
1547	}
1548
1549	int llvm::rewriteLoopExitValues(Loop L, LoopInfo LI, TargetLibraryInfo *TLI,
1550	ScalarEvolution *SE,
1551	const TargetTransformInfo *TTI,
1552	SCEVExpander &Rewriter, DominatorTree *DT,
1553	ReplaceExitVal ReplaceExitValue,
1554	SmallVector<WeakTrackingVH, `16`> &DeadInsts) {
1555	// Check a pre-condition.
1556	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
1557	"Indvars did not preserve LCSSA!");
1558
1559	SmallVector<BasicBlock*, `8`> ExitBlocks;
1560	L->getUniqueExitBlocks(ExitBlocks);
1561
1562	SmallVector<RewritePhi, `8`> RewritePhiSet;
1563	// Find all values that are computed inside the loop, but used outside of it.
1564	// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1565	// the exit blocks of the loop to find them.
1566	for (BasicBlock *ExitBB : ExitBlocks) {
1567	// If there are no PHI nodes in this exit block, then no values defined
1568	// inside the loop are used on this path, skip it.
1569	PHINode *PN = dyn_cast<PHINode>(Val: ExitBB->begin());
1570	if (!PN) continue;
1571
1572	unsigned NumPreds = PN->getNumIncomingValues();
1573
1574	// Iterate over all of the PHI nodes.
1575	BasicBlock::iterator BBI = ExitBB->begin();
1576	while ((PN = dyn_cast<PHINode>(Val: BBI ++))) {
1577	if (PN->use_empty())
1578	continue; // dead use, don't replace it
1579
1580	if (!SE->isSCEVable(Ty: PN->getType()))
1581	continue;
1582
1583	// Iterate over all of the values in all the PHI nodes.
1584	for (unsigned i = `0`; i != NumPreds; ++i) {
1585	// If the value being merged in is not integer or is not defined
1586	// in the loop, skip it.
1587	Value *InVal = PN->getIncomingValue(i);
1588	if (!isa<Instruction>(Val: InVal))
1589	continue;
1590
1591	// If this pred is for a subloop, not L itself, skip it.
1592	if (LI->getLoopFor(BB: PN->getIncomingBlock(i)) != L)
1593	continue; // The Block is in a subloop, skip it.
1594
1595	// Check that InVal is defined in the loop.
1596	Instruction *Inst = cast<Instruction>(Val: InVal);
1597	if (!L->contains(Inst))
1598	continue;
1599
1600	// Find exit values which are induction variables in the loop, and are
1601	// unused in the loop, with the only use being the exit block PhiNode,
1602	// and the induction variable update binary operator.
1603	// The exit value can be replaced with the final value when it is cheap
1604	// to do so.
1605	if (ReplaceExitValue == UnusedIndVarInLoop) {
1606	InductionDescriptor ID;
1607	PHINode *IndPhi = dyn_cast<PHINode>(Val: Inst);
1608	if (IndPhi) {
1609	if (!checkIsIndPhi(Phi: IndPhi, L, SE, ID))
1610	continue;
1611	// This is an induction PHI. Check that the only users are PHI
1612	// nodes, and induction variable update binary operators.
1613	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1614	if (!isa<PHINode>(Val: U) && !isa<BinaryOperator>(Val: U))
1615	return true;
1616	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: U);
1617	if (B && B != ID.getInductionBinOp())
1618	return true;
1619	return false;
1620	}))
1621	continue;
1622	} else {
1623	// If it is not an induction phi, it must be an induction update
1624	// binary operator with an induction phi user.
1625	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: Inst);
1626	if (!B)
1627	continue;
1628	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1629	PHINode *Phi = dyn_cast<PHINode>(Val: U);
1630	if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1631	return true;
1632	return false;
1633	}))
1634	continue;
1635	if (B != ID.getInductionBinOp())
1636	continue;
1637	}
1638	}
1639
1640	// Okay, this instruction has a user outside of the current loop
1641	// and varies predictably inside* the loop. Evaluate the value it*
1642	// contains when the loop exits, if possible. We prefer to start with
1643	// expressions which are true for all exits (so as to maximize
1644	// expression reuse by the SCEVExpander), but resort to per-exit
1645	// evaluation if that fails.
1646	const SCEV *ExitValue = SE->getSCEVAtScope(V: Inst, L: L->getParentLoop());
1647	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1648	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1649	!Rewriter.isSafeToExpand(S: ExitValue)) {
1650	// TODO: This should probably be sunk into SCEV in some way; maybe a
1651	// getSCEVForExit(SCEV, L, ExitingBB)? It can be generalized for*
1652	// most SCEV expressions and other recurrence types (e.g. shift
1653	// recurrences). Is there existing code we can reuse?
1654	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: PN->getIncomingBlock(i));
1655	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1656	continue;
1657	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Inst)))
1658	if (AddRec->getLoop() == L)
1659	ExitValue = AddRec->evaluateAtIteration(It: ExitCount, SE&: *SE);
1660	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1661	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1662	!Rewriter.isSafeToExpand(S: ExitValue))
1663	continue;
1664	}
1665
1666	// Computing the value outside of the loop brings no benefit if it is
1667	// definitely used inside the loop in a way which can not be optimized
1668	// away. Avoid doing so unless we know we have a value which computes
1669	// the ExitValue already. TODO: This should be merged into SCEV
1670	// expander to leverage its knowledge of existing expressions.
1671	if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(Val: ExitValue) &&
1672	!isa<SCEVUnknown>(Val: ExitValue) && hasHardUserWithinLoop(L, I: Inst))
1673	continue;
1674
1675	// Check if expansions of this SCEV would count as being high cost.
1676	bool HighCost = Rewriter.isHighCostExpansion(
1677	Exprs: ExitValue, L, Budget: SCEVCheapExpansionBudget, TTI, At: Inst);
1678
1679	// Note that we must not perform expansions until after
1680	// we query all* the costs, because if we perform temporary expansion*
1681	// inbetween, one that we might not intend to keep, said expansion
1682	// may* affect cost calculation of the next SCEV's we'll query,*
1683	// and next SCEV may errneously get smaller cost.
1684
1685	// Collect all the candidate PHINodes to be rewritten.
1686	Instruction *InsertPt =
1687	(isa<PHINode>(Val: Inst) \|\| isa<LandingPadInst>(Val: Inst)) ?
1688	&*Inst->getParent()->getFirstInsertionPt() : Inst;
1689	RewritePhiSet.emplace_back(Args&: PN, Args&: i, Args&: ExitValue, Args&: InsertPt, Args&: HighCost);
1690	}
1691	}
1692	}
1693
1694	// TODO: evaluate whether it is beneficial to change how we calculate
1695	// high-cost: if we have SCEV 'A' which we know we will expand, should we
1696	// calculate the cost of other SCEV's after expanding SCEV 'A', thus
1697	// potentially giving cost bonus to those other SCEV's?
1698
1699	bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1700	int NumReplaced = `0`;
1701
1702	// Transformation.
1703	for (const RewritePhi &Phi : RewritePhiSet) {
1704	PHINode *PN = Phi.PN;
1705
1706	// Only do the rewrite when the ExitValue can be expanded cheaply.
1707	// If LoopCanBeDel is true, rewrite exit value aggressively.
1708	if ((ReplaceExitValue == OnlyCheapRepl \|\|
1709	ReplaceExitValue == UnusedIndVarInLoop) &&
1710	!LoopCanBeDel && Phi.HighCost)
1711	continue;
1712
1713	Value *ExitVal = Rewriter.expandCodeFor(
1714	SH: Phi.ExpansionSCEV, Ty: Phi.PN->getType(), I: Phi.ExpansionPoint);
1715
1716	LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1717	<< `'\n'`
1718	<< " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1719
1720	#ifndef NDEBUG
1721	// If we reuse an instruction from a loop which is neither L nor one of
1722	// its containing loops, we end up breaking LCSSA form for this loop by
1723	// creating a new use of its instruction.
1724	if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1725	if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1726	if (EVL != L)
1727	assert(EVL->contains(L) && "LCSSA breach detected!");
1728	#endif
1729
1730	NumReplaced++;
1731	Instruction *Inst = cast<Instruction>(Val: PN->getIncomingValue(i: Phi.Ith));
1732	PN->setIncomingValue(i: Phi.Ith, V: ExitVal);
1733	// It's necessary to tell ScalarEvolution about this explicitly so that
1734	// it can walk the def-use list and forget all SCEVs, as it may not be
1735	// watching the PHI itself. Once the new exit value is in place, there
1736	// may not be a def-use connection between the loop and every instruction
1737	// which got a SCEVAddRecExpr for that loop.
1738	SE->forgetValue(V: PN);
1739
1740	// If this instruction is dead now, delete it. Don't do it now to avoid
1741	// invalidating iterators.
1742	if (isInstructionTriviallyDead(I: Inst, TLI))
1743	DeadInsts.push_back(Elt: Inst);
1744
1745	// Replace PN with ExitVal if that is legal and does not break LCSSA.
1746	if (PN->getNumIncomingValues() == `1` &&
1747	LI->replacementPreservesLCSSAForm(From: PN, To: ExitVal)) {
1748	PN->replaceAllUsesWith(V: ExitVal);
1749	PN->eraseFromParent();
1750	}
1751	}
1752
1753	// The insertion point instruction may have been deleted; clear it out
1754	// so that the rewriter doesn't trip over it later.
1755	Rewriter.clearInsertPoint();
1756	return NumReplaced;
1757	}
1758
1759	/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1760	/// \p OrigLoop.
1761	void llvm::setProfileInfoAfterUnrolling(Loop OrigLoop, Loop UnrolledLoop,
1762	Loop *RemainderLoop, uint64_t UF) {
1763	assert(UF > `0` && "Zero unrolled factor is not supported");
1764	assert(UnrolledLoop != RemainderLoop &&
1765	"Unrolled and Remainder loops are expected to distinct");
1766
1767	// Get number of iterations in the original scalar loop.
1768	unsigned OrigLoopInvocationWeight = `0`;
1769	std::optional<unsigned> OrigAverageTripCount =
1770	getLoopEstimatedTripCount(L: OrigLoop, EstimatedLoopInvocationWeight: &OrigLoopInvocationWeight);
1771	if (!OrigAverageTripCount)
1772	return;
1773
1774	// Calculate number of iterations in unrolled loop.
1775	unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1776	// Calculate number of iterations for remainder loop.
1777	unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1778
1779	setLoopEstimatedTripCount(L: UnrolledLoop, EstimatedTripCount: UnrolledAverageTripCount,
1780	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1781	setLoopEstimatedTripCount(L: RemainderLoop, EstimatedTripCount: RemainderAverageTripCount,
1782	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1783	}
1784
1785	/// Utility that implements appending of loops onto a worklist.
1786	/// Loops are added in preorder (analogous for reverse postorder for trees),
1787	/// and the worklist is processed LIFO.
1788	template <typename RangeT>
1789	void llvm::appendReversedLoopsToWorklist(
1790	RangeT &&Loops, SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1791	// We use an internal worklist to build up the preorder traversal without
1792	// recursion.
1793	SmallVector<Loop *, `4`> PreOrderLoops, PreOrderWorklist;
1794
1795	// We walk the initial sequence of loops in reverse because we generally want
1796	// to visit defs before uses and the worklist is LIFO.
1797	for (Loop *RootL : Loops) {
1798	assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1799	assert(PreOrderWorklist.empty() &&
1800	"Must start with an empty preorder walk worklist.");
1801	PreOrderWorklist.push_back(Elt: RootL);
1802	do {
1803	Loop *L = PreOrderWorklist.pop_back_val();
1804	PreOrderWorklist.append(in_start: L->begin(), in_end: L->end());
1805	PreOrderLoops.push_back(Elt: L);
1806	} while (!PreOrderWorklist.empty());
1807
1808	Worklist.insert(Input: std::move(PreOrderLoops));
1809	PreOrderLoops.clear();
1810	}
1811	}
1812
1813	template <typename RangeT>
1814	void llvm::appendLoopsToWorklist(RangeT &&Loops,
1815	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1816	appendReversedLoopsToWorklist(reverse(Loops), Worklist);
1817	}
1818
1819	template LLVM_EXPORT_TEMPLATE void
1820	llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1821	ArrayRef<Loop > &Loops, SmallPriorityWorklist<Loop , `4`> &Worklist);
1822
1823	template LLVM_EXPORT_TEMPLATE void
1824	llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1825	SmallPriorityWorklist<Loop *, `4`> &Worklist);
1826
1827	void llvm::appendLoopsToWorklist(LoopInfo &LI,
1828	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1829	appendReversedLoopsToWorklist(Loops&: LI, Worklist);
1830	}
1831
1832	Loop llvm::cloneLoop(Loop L, Loop *PL, ValueToValueMapTy &VM,
1833	LoopInfo LI, LPPassManager LPM) {
1834	Loop &New = *LI->AllocateLoop();
1835	if (PL)
1836	PL->addChildLoop(NewChild: &New);
1837	else
1838	LI->addTopLevelLoop(New: &New);
1839
1840	if (LPM)
1841	LPM->addLoop(L&: New);
1842
1843	// Add all of the blocks in L to the new loop.
1844	for (BasicBlock *BB : L->blocks())
1845	if (LI->getLoopFor(BB) == L)
1846	New.addBasicBlockToLoop(NewBB: cast<BasicBlock>(Val&: VM [BB]), LI&: *LI);
1847
1848	// Add all of the subloops to the new loop.
1849	for (Loop I : L)
1850	cloneLoop(L: I, PL: &New, VM, LI, LPM);
1851
1852	return &New;
1853	}
1854
1855	/// IR Values for the lower and upper bounds of a pointer evolution. We
1856	/// need to use value-handles because SCEV expansion can invalidate previously
1857	/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1858	/// a previous one.
1859	struct PointerBounds {
1860	TrackingVH<Value> Start;
1861	TrackingVH<Value> End;
1862	Value *StrideToCheck;
1863	};
1864
1865	/// Expand code for the lower and upper bound of the pointer group \p CG
1866	/// in \p TheLoop. \return the values for the bounds.
1867	static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
1868	Loop TheLoop, Instruction Loc,
1869	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1870	LLVMContext &Ctx = Loc->getContext();
1871	Type *PtrArithTy = PointerType::get(C&: Ctx, AddressSpace: CG->AddressSpace);
1872
1873	Value Start = nullptr, End = nullptr;
1874	LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1875	const SCEV Low = CG->Low, High = CG->High, Stride = nullptr*;
1876
1877	// If the Low and High values are themselves loop-variant, then we may want
1878	// to expand the range to include those covered by the outer loop as well.
1879	// There is a trade-off here with the advantage being that creating checks
1880	// using the expanded range permits the runtime memory checks to be hoisted
1881	// out of the outer loop. This reduces the cost of entering the inner loop,
1882	// which can be significant for low trip counts. The disadvantage is that
1883	// there is a chance we may now never enter the vectorized inner loop,
1884	// whereas using a restricted range check could have allowed us to enter at
1885	// least once. This is why the behaviour is not currently the default and is
1886	// controlled by the parameter 'HoistRuntimeChecks'.
1887	if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1888	isa<SCEVAddRecExpr>(Val: High) && isa<SCEVAddRecExpr>(Val: Low)) {
1889	auto *HighAR = cast<SCEVAddRecExpr>(Val: High);
1890	auto *LowAR = cast<SCEVAddRecExpr>(Val: Low);
1891	const Loop *OuterLoop = TheLoop->getParentLoop();
1892	ScalarEvolution &SE = *Exp.getSE();
1893	const SCEV *Recur = LowAR->getStepRecurrence(SE);
1894	if (Recur == HighAR->getStepRecurrence(SE) &&
1895	HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1896	BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1897	const SCEV *OuterExitCount = SE.getExitCount(L: OuterLoop, ExitingBlock: OuterLoopLatch);
1898	if (!isa<SCEVCouldNotCompute>(Val: OuterExitCount) &&
1899	OuterExitCount->getType()->isIntegerTy()) {
1900	const SCEV *NewHigh =
1901	cast<SCEVAddRecExpr>(Val: High)->evaluateAtIteration(It: OuterExitCount, SE);
1902	if (!isa<SCEVCouldNotCompute>(Val: NewHigh)) {
1903	LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1904	"outer loop in order to permit hoisting\n");
1905	High = NewHigh;
1906	Low = cast<SCEVAddRecExpr>(Val: Low)->getStart();
1907	// If there is a possibility that the stride is negative then we have
1908	// to generate extra checks to ensure the stride is positive.
1909	if (!SE.isKnownNonNegative(
1910	S: SE.applyLoopGuards(Expr: Recur, L: HighAR->getLoop()))) {
1911	Stride = Recur;
1912	LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1913	"positive: "
1914	<< *Stride << `'\n'`);
1915	}
1916	}
1917	}
1918	}
1919	}
1920
1921	Start = Exp.expandCodeFor(SH: Low, Ty: PtrArithTy, I: Loc);
1922	End = Exp.expandCodeFor(SH: High, Ty: PtrArithTy, I: Loc);
1923	if (CG->NeedsFreeze) {
1924	IRBuilder<> Builder(Loc);
1925	Start = Builder.CreateFreeze(V: Start, Name: Start->getName() + ".fr");
1926	End = Builder.CreateFreeze(V: End, Name: End->getName() + ".fr");
1927	}
1928	Value *StrideVal =
1929	Stride ? Exp.expandCodeFor(SH: Stride, Ty: Stride->getType(), I: Loc) : nullptr;
1930	LLVM_DEBUG(dbgs() << "Start: " << Low << " End: " << High << "\n");
1931	return {.Start: Start, .End: End, .StrideToCheck: StrideVal};
1932	}
1933
1934	/// Turns a collection of checks into a collection of expanded upper and
1935	/// lower bounds for both pointers in the check.
1936	static SmallVector<std::pair<PointerBounds, PointerBounds>, `4`>
1937	expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
1938	Instruction Loc, SCEVExpander &Exp, bool* HoistRuntimeChecks) {
1939	SmallVector<std::pair<PointerBounds, PointerBounds>, `4`> ChecksWithBounds;
1940
1941	// Here we're relying on the SCEV Expander's cache to only emit code for the
1942	// same bounds once.
1943	transform(Range: PointerChecks, d_first: std::back_inserter(x&: ChecksWithBounds),
1944	F: [&](const RuntimePointerCheck &Check) {
1945	PointerBounds First = expandBounds(CG: Check.first, TheLoop: L, Loc, Exp,
1946	HoistRuntimeChecks),
1947	Second = expandBounds(CG: Check.second, TheLoop: L, Loc, Exp,
1948	HoistRuntimeChecks);
1949	return std::make_pair(x&: First, y&: Second);
1950	});
1951
1952	return ChecksWithBounds;
1953	}
1954
1955	Value *llvm::addRuntimeChecks(
1956	Instruction Loc, Loop TheLoop,
1957	const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1958	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1959	// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1960	// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1961	auto ExpandedChecks =
1962	expandBounds(PointerChecks, L: TheLoop, Loc, Exp, HoistRuntimeChecks);
1963
1964	LLVMContext &Ctx = Loc->getContext();
1965	IRBuilder ChkBuilder(Ctx, InstSimplifyFolder (Loc->getDataLayout()));
1966	ChkBuilder.SetInsertPoint(Loc);
1967	// Our instructions might fold to a constant.
1968	Value MemoryRuntimeCheck = nullptr*;
1969
1970	for (const auto &[A, B] : ExpandedChecks) {
1971	// Check if two pointers (A and B) conflict where conflict is computed as:
1972	// start(A) <= end(B) && start(B) <= end(A)
1973
1974	assert((A.Start->getType()->getPointerAddressSpace() ==
1975	B.End->getType()->getPointerAddressSpace()) &&
1976	(B.Start->getType()->getPointerAddressSpace() ==
1977	A.End->getType()->getPointerAddressSpace()) &&
1978	"Trying to bounds check pointers with different address spaces");
1979
1980	// [A\|B].Start points to the first accessed byte under base [A\|B].
1981	// [A\|B].End points to the last accessed byte, plus one.
1982	// There is no conflict when the intervals are disjoint:
1983	// NoConflict = (B.Start >= A.End) \|\| (A.Start >= B.End)
1984	//
1985	// bound0 = (B.Start < A.End)
1986	// bound1 = (A.Start < B.End)
1987	// IsConflict = bound0 & bound1
1988	Value *Cmp0 = ChkBuilder.CreateICmpULT(LHS: A.Start, RHS: B.End, Name: "bound0");
1989	Value *Cmp1 = ChkBuilder.CreateICmpULT(LHS: B.Start, RHS: A.End, Name: "bound1");
1990	Value *IsConflict = ChkBuilder.CreateAnd(LHS: Cmp0, RHS: Cmp1, Name: "found.conflict");
1991	if (A.StrideToCheck) {
1992	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1993	LHS: A.StrideToCheck, RHS: ConstantInt::get(Ty: A.StrideToCheck->getType(), V: `0`),
1994	Name: "stride.check");
1995	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1996	}
1997	if (B.StrideToCheck) {
1998	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1999	LHS: B.StrideToCheck, RHS: ConstantInt::get(Ty: B.StrideToCheck->getType(), V: `0`),
2000	Name: "stride.check");
2001	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
2002	}
2003	if (MemoryRuntimeCheck) {
2004	IsConflict =
2005	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
2006	}
2007	MemoryRuntimeCheck = IsConflict;
2008	}
2009
2010	return MemoryRuntimeCheck;
2011	}
2012
2013	Value *llvm::addDiffRuntimeChecks(
2014	Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
2015	function_ref<Value (IRBuilderBase &, unsigned)> GetVF, unsigned* IC) {
2016
2017	LLVMContext &Ctx = Loc->getContext();
2018	IRBuilder ChkBuilder(Ctx, InstSimplifyFolder (Loc->getDataLayout()));
2019	ChkBuilder.SetInsertPoint(Loc);
2020	// Our instructions might fold to a constant.
2021	Value MemoryRuntimeCheck = nullptr*;
2022
2023	auto &SE = *Expander.getSE();
2024	// Map to keep track of created compares, The key is the pair of operands for
2025	// the compare, to allow detecting and re-using redundant compares.
2026	DenseMap<std::pair<Value , Value >, Value *> SeenCompares;
2027	for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
2028	Type *Ty = SinkStart->getType();
2029	// Compute VF IC * AccessSize.*
2030	auto *VFTimesICTimesSize =
2031	ChkBuilder.CreateMul(LHS: GetVF (ChkBuilder, Ty->getScalarSizeInBits()),
2032	RHS: ConstantInt::get(Ty, V: IC * AccessSize));
2033	Value *Diff =
2034	Expander.expandCodeFor(SH: SE.getMinusSCEV(LHS: SinkStart, RHS: SrcStart), Ty, I: Loc);
2035
2036	// Check if the same compare has already been created earlier. In that case,
2037	// there is no need to check it again.
2038	Value *IsConflict = SeenCompares.lookup(Val: {Diff, VFTimesICTimesSize});
2039	if (IsConflict)
2040	continue;
2041
2042	IsConflict =
2043	ChkBuilder.CreateICmpULT(LHS: Diff, RHS: VFTimesICTimesSize, Name: "diff.check");
2044	SeenCompares.insert(KV: {{Diff, VFTimesICTimesSize}, IsConflict});
2045	if (NeedsFreeze)
2046	IsConflict =
2047	ChkBuilder.CreateFreeze(V: IsConflict, Name: IsConflict->getName() + ".fr");
2048	if (MemoryRuntimeCheck) {
2049	IsConflict =
2050	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
2051	}
2052	MemoryRuntimeCheck = IsConflict;
2053	}
2054
2055	return MemoryRuntimeCheck;
2056	}
2057
2058	std::optional<IVConditionInfo>
2059	llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
2060	const MemorySSA &MSSA, AAResults &AA) {
2061	auto *TI = dyn_cast<BranchInst>(Val: L.getHeader()->getTerminator());
2062	if (!TI \|\| !TI->isConditional())
2063	return {};
2064
2065	auto *CondI = dyn_cast<Instruction>(Val: TI->getCondition());
2066	// The case with the condition outside the loop should already be handled
2067	// earlier.
2068	// Allow CmpInst and TruncInsts as they may be users of load instructions
2069	// and have potential for partial unswitching
2070	if (!CondI \|\| !isa<CmpInst, TruncInst>(Val: CondI) \|\| !L.contains(Inst: CondI))
2071	return {};
2072
2073	SmallVector<Instruction *> InstToDuplicate;
2074	InstToDuplicate.push_back(Elt: CondI);
2075
2076	SmallVector<Value *, `4`> WorkList;
2077	WorkList.append(in_start: CondI->op_begin(), in_end: CondI->op_end());
2078
2079	SmallVector<MemoryAccess *, `4`> AccessesToCheck;
2080	SmallVector<MemoryLocation, `4`> AccessedLocs;
2081	while (!WorkList.empty()) {
2082	Instruction *I = dyn_cast<Instruction>(Val: WorkList.pop_back_val());
2083	if (!I \|\| !L.contains(Inst: I))
2084	continue;
2085
2086	// TODO: support additional instructions.
2087	if (!isa<LoadInst>(Val: I) && !isa<GetElementPtrInst>(Val: I))
2088	return {};
2089
2090	// Do not duplicate volatile and atomic loads.
2091	if (auto *LI = dyn_cast<LoadInst>(Val: I))
2092	if (LI->isVolatile() \|\| LI->isAtomic())
2093	return {};
2094
2095	InstToDuplicate.push_back(Elt: I);
2096	if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
2097	if (auto *MemUse = dyn_cast_or_null<MemoryUse>(Val: MA)) {
2098	// Queue the defining access to check for alias checks.
2099	AccessesToCheck.push_back(Elt: MemUse->getDefiningAccess());
2100	AccessedLocs.push_back(Elt: MemoryLocation::get(Inst: I));
2101	} else {
2102	// MemoryDefs may clobber the location or may be atomic memory
2103	// operations. Bail out.
2104	return {};
2105	}
2106	}
2107	WorkList.append(in_start: I->op_begin(), in_end: I->op_end());
2108	}
2109
2110	if (InstToDuplicate.empty())
2111	return {};
2112
2113	SmallVector<BasicBlock *, `4`> ExitingBlocks;
2114	L.getExitingBlocks(ExitingBlocks);
2115	auto HasNoClobbersOnPath =
2116	[&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2117	MSSAThreshold](BasicBlock Succ, BasicBlock Header,
2118	SmallVector<MemoryAccess *, `4`> AccessesToCheck)
2119	-> std::optional<IVConditionInfo> {
2120	IVConditionInfo Info;
2121	// First, collect all blocks in the loop that are on a patch from Succ
2122	// to the header.
2123	SmallVector<BasicBlock *, `4`> WorkList;
2124	WorkList.push_back(Elt: Succ);
2125	WorkList.push_back(Elt: Header);
2126	SmallPtrSet<BasicBlock *, `4`> Seen;
2127	Seen.insert(Ptr: Header);
2128	Info.PathIsNoop &=
2129	all_of(Range&: Header, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
2130
2131	while (!WorkList.empty()) {
2132	BasicBlock *Current = WorkList.pop_back_val();
2133	if (!L.contains(BB: Current))
2134	continue;
2135	const auto &SeenIns = Seen.insert(Ptr: Current);
2136	if (!SeenIns.second)
2137	continue;
2138
2139	Info.PathIsNoop &= all_of(
2140	Range&: Current, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
2141	WorkList.append(in_start: succ_begin(BB: Current), in_end: succ_end(BB: Current));
2142	}
2143
2144	// Require at least 2 blocks on a path through the loop. This skips
2145	// paths that directly exit the loop.
2146	if (Seen.size() < `2`)
2147	return {};
2148
2149	// Next, check if there are any MemoryDefs that are on the path through
2150	// the loop (in the Seen set) and they may-alias any of the locations in
2151	// AccessedLocs. If that is the case, they may modify the condition and
2152	// partial unswitching is not possible.
2153	SmallPtrSet<MemoryAccess *, `4`> SeenAccesses;
2154	while (!AccessesToCheck.empty()) {
2155	MemoryAccess *Current = AccessesToCheck.pop_back_val();
2156	auto SeenI = SeenAccesses.insert(Ptr: Current);
2157	if (!SeenI.second \|\| !Seen.contains(Ptr: Current->getBlock()))
2158	continue;
2159
2160	// Bail out if exceeded the threshold.
2161	if (SeenAccesses.size() >= MSSAThreshold)
2162	return {};
2163
2164	// MemoryUse are read-only accesses.
2165	if (isa<MemoryUse>(Val: Current))
2166	continue;
2167
2168	// For a MemoryDef, check if is aliases any of the location feeding
2169	// the original condition.
2170	if (auto *CurrentDef = dyn_cast<MemoryDef>(Val: Current)) {
2171	if (any_of(Range&: AccessedLocs, P: [&AA, CurrentDef](MemoryLocation &Loc) {
2172	return isModSet(
2173	MRI: AA.getModRefInfo(I: CurrentDef->getMemoryInst(), OptLoc: Loc));
2174	}))
2175	return {};
2176	}
2177
2178	for (Use &U : Current->uses())
2179	AccessesToCheck.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
2180	}
2181
2182	// We could also allow loops with known trip counts without mustprogress,
2183	// but ScalarEvolution may not be available.
2184	Info.PathIsNoop &= isMustProgress(L: &L);
2185
2186	// If the path is considered a no-op so far, check if it reaches a
2187	// single exit block without any phis. This ensures no values from the
2188	// loop are used outside of the loop.
2189	if (Info.PathIsNoop) {
2190	for (auto *Exiting : ExitingBlocks) {
2191	if (!Seen.contains(Ptr: Exiting))
2192	continue;
2193	for (auto *Succ : successors(BB: Exiting)) {
2194	if (L.contains(BB: Succ))
2195	continue;
2196
2197	Info.PathIsNoop &= Succ->phis().empty() &&
2198	(!Info.ExitForPath \|\| Info.ExitForPath == Succ);
2199	if (!Info.PathIsNoop)
2200	break;
2201	assert((!Info.ExitForPath \|\| Info.ExitForPath == Succ) &&
2202	"cannot have multiple exit blocks");
2203	Info.ExitForPath = Succ;
2204	}
2205	}
2206	}
2207	if (!Info.ExitForPath)
2208	Info.PathIsNoop = false;
2209
2210	Info.InstToDuplicate = InstToDuplicate;
2211	return Info;
2212	};
2213
2214	// If we branch to the same successor, partial unswitching will not be
2215	// beneficial.
2216	if (TI->getSuccessor(i: `0`) == TI->getSuccessor(i: `1`))
2217	return {};
2218
2219	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `0`), L.getHeader(),
2220	AccessesToCheck)) {
2221	Info ->KnownValue = ConstantInt::getTrue(Context&: TI->getContext());
2222	return Info;
2223	}
2224	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `1`), L.getHeader(),
2225	AccessesToCheck)) {
2226	Info ->KnownValue = ConstantInt::getFalse(Context&: TI->getContext());
2227	return Info;
2228	}
2229
2230	return {};
2231	}
2232

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/LoopUtils.cpp